summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/.gitignore5
-rw-r--r--src/Makefile514
-rw-r--r--src/acl.c3150
-rw-r--r--src/adlist.c417
-rw-r--r--src/adlist.h100
-rw-r--r--src/ae.c512
-rw-r--r--src/ae.h136
-rw-r--r--src/ae_epoll.c139
-rw-r--r--src/ae_evport.c321
-rw-r--r--src/ae_kqueue.c190
-rw-r--r--src/ae_select.c110
-rw-r--r--src/anet.c706
-rw-r--r--src/anet.h75
-rw-r--r--src/aof.c2742
-rw-r--r--src/asciilogo.h47
-rw-r--r--src/atomicvar.h158
-rw-r--r--src/bio.c345
-rw-r--r--src/bio.h54
-rw-r--r--src/bitops.c1267
-rw-r--r--src/blocked.c763
-rw-r--r--src/call_reply.c560
-rw-r--r--src/call_reply.h60
-rw-r--r--src/childinfo.c183
-rw-r--r--src/cli_commands.c13
-rw-r--r--src/cli_commands.h46
-rw-r--r--src/cli_common.c408
-rw-r--r--src/cli_common.h54
-rw-r--r--src/cluster.c7717
-rw-r--r--src/cluster.h447
-rw-r--r--src/commands.c13
-rw-r--r--src/commands.def10899
-rw-r--r--src/commands.h40
-rw-r--r--src/commands/acl-cat.json42
-rw-r--r--src/commands/acl-deluser.json33
-rw-r--r--src/commands/acl-dryrun.json47
-rw-r--r--src/commands/acl-genpass.json28
-rw-r--r--src/commands/acl-getuser.json91
-rw-r--r--src/commands/acl-help.json23
-rw-r--r--src/commands/acl-list.json25
-rw-r--r--src/commands/acl-load.json21
-rw-r--r--src/commands/acl-log.json90
-rw-r--r--src/commands/acl-save.json25
-rw-r--r--src/commands/acl-setuser.json47
-rw-r--r--src/commands/acl-users.json25
-rw-r--r--src/commands/acl-whoami.json21
-rw-r--r--src/commands/acl.json12
-rw-r--r--src/commands/append.json53
-rw-r--r--src/commands/asking.json19
-rw-r--r--src/commands/auth.json43
-rw-r--r--src/commands/bgrewriteaof.json19
-rw-r--r--src/commands/bgsave.json40
-rw-r--r--src/commands/bitcount.json87
-rw-r--r--src/commands/bitfield.json159
-rw-r--r--src/commands/bitfield_ro.json69
-rw-r--r--src/commands/bitop.json99
-rw-r--r--src/commands/bitpos.json106
-rw-r--r--src/commands/blmove.json117
-rw-r--r--src/commands/blmpop.json105
-rw-r--r--src/commands/blpop.json80
-rw-r--r--src/commands/brpop.json79
-rw-r--r--src/commands/brpoplpush.json96
-rw-r--r--src/commands/bzmpop.json117
-rw-r--r--src/commands/bzpopmax.json85
-rw-r--r--src/commands/bzpopmin.json85
-rw-r--r--src/commands/client-caching.json41
-rw-r--r--src/commands/client-getname.json32
-rw-r--r--src/commands/client-getredir.json37
-rw-r--r--src/commands/client-help.json26
-rw-r--r--src/commands/client-id.json24
-rw-r--r--src/commands/client-info.json27
-rw-r--r--src/commands/client-kill.json159
-rw-r--r--src/commands/client-list.json93
-rw-r--r--src/commands/client-no-evict.json42
-rw-r--r--src/commands/client-no-touch.json40
-rw-r--r--src/commands/client-pause.json54
-rw-r--r--src/commands/client-reply.json47
-rw-r--r--src/commands/client-setinfo.json45
-rw-r--r--src/commands/client-setname.json33
-rw-r--r--src/commands/client-tracking.json80
-rw-r--r--src/commands/client-trackinginfo.json80
-rw-r--r--src/commands/client-unblock.json56
-rw-r--r--src/commands/client-unpause.json24
-rw-r--r--src/commands/client.json12
-rw-r--r--src/commands/cluster-addslots.json26
-rw-r--r--src/commands/cluster-addslotsrange.json36
-rw-r--r--src/commands/cluster-bumpepoch.json33
-rw-r--r--src/commands/cluster-count-failure-reports.json29
-rw-r--r--src/commands/cluster-countkeysinslot.json25
-rw-r--r--src/commands/cluster-delslots.json26
-rw-r--r--src/commands/cluster-delslotsrange.json36
-rw-r--r--src/commands/cluster-failover.json38
-rw-r--r--src/commands/cluster-flushslots.json19
-rw-r--r--src/commands/cluster-forget.json25
-rw-r--r--src/commands/cluster-getkeysinslot.json35
-rw-r--r--src/commands/cluster-help.json22
-rw-r--r--src/commands/cluster-info.json21
-rw-r--r--src/commands/cluster-keyslot.json25
-rw-r--r--src/commands/cluster-links.json60
-rw-r--r--src/commands/cluster-meet.json41
-rw-r--r--src/commands/cluster-myid.json18
-rw-r--r--src/commands/cluster-myshardid.json22
-rw-r--r--src/commands/cluster-nodes.json21
-rw-r--r--src/commands/cluster-replicas.json32
-rw-r--r--src/commands/cluster-replicate.json25
-rw-r--r--src/commands/cluster-reset.json38
-rw-r--r--src/commands/cluster-saveconfig.json19
-rw-r--r--src/commands/cluster-set-config-epoch.json25
-rw-r--r--src/commands/cluster-setslot.json54
-rw-r--r--src/commands/cluster-shards.json90
-rw-r--r--src/commands/cluster-slaves.json37
-rw-r--r--src/commands/cluster-slots.json136
-rw-r--r--src/commands/cluster.json9
-rw-r--r--src/commands/command-count.json23
-rw-r--r--src/commands/command-docs.json211
-rw-r--r--src/commands/command-getkeys.json39
-rw-r--r--src/commands/command-getkeysandflags.json55
-rw-r--r--src/commands/command-help.json26
-rw-r--r--src/commands/command-info.json213
-rw-r--r--src/commands/command-list.json55
-rw-r--r--src/commands/command.json21
-rw-r--r--src/commands/config-get.json36
-rw-r--r--src/commands/config-help.json22
-rw-r--r--src/commands/config-resetstat.json24
-rw-r--r--src/commands/config-rewrite.json24
-rw-r--r--src/commands/config-set.json47
-rw-r--r--src/commands/config.json9
-rw-r--r--src/commands/copy.json91
-rw-r--r--src/commands/dbsize.json25
-rw-r--r--src/commands/debug.json20
-rw-r--r--src/commands/decr.json50
-rw-r--r--src/commands/decrby.json54
-rw-r--r--src/commands/del.json53
-rw-r--r--src/commands/discard.json23
-rw-r--r--src/commands/dump.json58
-rw-r--r--src/commands/echo.json28
-rw-r--r--src/commands/eval.json69
-rw-r--r--src/commands/eval_ro.json68
-rw-r--r--src/commands/evalsha.json68
-rw-r--r--src/commands/evalsha_ro.json67
-rw-r--r--src/commands/exec.json31
-rw-r--r--src/commands/exists.json58
-rw-r--r--src/commands/expire.json94
-rw-r--r--src/commands/expireat.json94
-rw-r--r--src/commands/expiretime.json61
-rw-r--r--src/commands/failover.json54
-rw-r--r--src/commands/fcall.json69
-rw-r--r--src/commands/fcall_ro.json68
-rw-r--r--src/commands/flushall.json55
-rw-r--r--src/commands/flushdb.json55
-rw-r--r--src/commands/function-delete.json31
-rw-r--r--src/commands/function-dump.json21
-rw-r--r--src/commands/function-flush.json44
-rw-r--r--src/commands/function-help.json25
-rw-r--r--src/commands/function-kill.json25
-rw-r--r--src/commands/function-list.json87
-rw-r--r--src/commands/function-load.json39
-rw-r--r--src/commands/function-restore.json54
-rw-r--r--src/commands/function-stats.json81
-rw-r--r--src/commands/function.json9
-rw-r--r--src/commands/geoadd.json98
-rw-r--r--src/commands/geodist.json91
-rw-r--r--src/commands/geohash.json56
-rw-r--r--src/commands/geopos.json76
-rw-r--r--src/commands/georadius.json270
-rw-r--r--src/commands/georadius_ro.json201
-rw-r--r--src/commands/georadiusbymember.json261
-rw-r--r--src/commands/georadiusbymember_ro.json190
-rw-r--r--src/commands/geosearch.json267
-rw-r--r--src/commands/geosearchstore.json228
-rw-r--r--src/commands/get.json56
-rw-r--r--src/commands/getbit.json59
-rw-r--r--src/commands/getdel.json57
-rw-r--r--src/commands/getex.json90
-rw-r--r--src/commands/getrange.json55
-rw-r--r--src/commands/getset.json67
-rw-r--r--src/commands/hdel.json59
-rw-r--r--src/commands/hello.json111
-rw-r--r--src/commands/hexists.json59
-rw-r--r--src/commands/hget.json60
-rw-r--r--src/commands/hgetall.json53
-rw-r--r--src/commands/hincrby.json58
-rw-r--r--src/commands/hincrbyfloat.json58
-rw-r--r--src/commands/hkeys.json54
-rw-r--r--src/commands/hlen.json47
-rw-r--r--src/commands/hmget.json64
-rw-r--r--src/commands/hmset.json68
-rw-r--r--src/commands/hrandfield.json101
-rw-r--r--src/commands/hscan.json81
-rw-r--r--src/commands/hset.json70
-rw-r--r--src/commands/hsetnx.json65
-rw-r--r--src/commands/hstrlen.json52
-rw-r--r--src/commands/hvals.json53
-rw-r--r--src/commands/incr.json50
-rw-r--r--src/commands/incrby.json54
-rw-r--r--src/commands/incrbyfloat.json54
-rw-r--r--src/commands/info.json41
-rw-r--r--src/commands/keys.json34
-rw-r--r--src/commands/lastsave.json26
-rw-r--r--src/commands/latency-doctor.json26
-rw-r--r--src/commands/latency-graph.json32
-rw-r--r--src/commands/latency-help.json22
-rw-r--r--src/commands/latency-histogram.json54
-rw-r--r--src/commands/latency-history.json49
-rw-r--r--src/commands/latency-latest.json49
-rw-r--r--src/commands/latency-reset.json33
-rw-r--r--src/commands/latency.json9
-rw-r--r--src/commands/lcs.json127
-rw-r--r--src/commands/lindex.json59
-rw-r--r--src/commands/linsert.json85
-rw-r--r--src/commands/llen.json48
-rw-r--r--src/commands/lmove.json104
-rw-r--r--src/commands/lmpop.json100
-rw-r--r--src/commands/lolwut.json25
-rw-r--r--src/commands/lpop.json77
-rw-r--r--src/commands/lpos.json85
-rw-r--r--src/commands/lpush.json60
-rw-r--r--src/commands/lpushx.json61
-rw-r--r--src/commands/lrange.json58
-rw-r--r--src/commands/lrem.json56
-rw-r--r--src/commands/lset.json55
-rw-r--r--src/commands/ltrim.json54
-rw-r--r--src/commands/memory-doctor.json20
-rw-r--r--src/commands/memory-help.json22
-rw-r--r--src/commands/memory-malloc-stats.json20
-rw-r--r--src/commands/memory-purge.json18
-rw-r--r--src/commands/memory-stats.json121
-rw-r--r--src/commands/memory-usage.json58
-rw-r--r--src/commands/memory.json9
-rw-r--r--src/commands/mget.json63
-rw-r--r--src/commands/migrate.json181
-rw-r--r--src/commands/module-help.json22
-rw-r--r--src/commands/module-list.json47
-rw-r--r--src/commands/module-load.json32
-rw-r--r--src/commands/module-loadex.json51
-rw-r--r--src/commands/module-unload.json26
-rw-r--r--src/commands/module.json9
-rw-r--r--src/commands/monitor.json16
-rw-r--r--src/commands/move.json61
-rw-r--r--src/commands/mset.json62
-rw-r--r--src/commands/msetnx.json67
-rw-r--r--src/commands/multi.json23
-rw-r--r--src/commands/object-encoding.json58
-rw-r--r--src/commands/object-freq.json50
-rw-r--r--src/commands/object-help.json25
-rw-r--r--src/commands/object-idletime.json50
-rw-r--r--src/commands/object-refcount.json50
-rw-r--r--src/commands/object.json9
-rw-r--r--src/commands/persist.json56
-rw-r--r--src/commands/pexpire.json94
-rw-r--r--src/commands/pexpireat.json94
-rw-r--r--src/commands/pexpiretime.json61
-rw-r--r--src/commands/pfadd.json63
-rw-r--r--src/commands/pfcount.json50
-rw-r--r--src/commands/pfdebug.json52
-rw-r--r--src/commands/pfmerge.json73
-rw-r--r--src/commands/pfselftest.json22
-rw-r--r--src/commands/ping.json40
-rw-r--r--src/commands/psetex.json60
-rw-r--r--src/commands/psubscribe.json24
-rw-r--r--src/commands/psync.json25
-rw-r--r--src/commands/pttl.json70
-rw-r--r--src/commands/publish.json33
-rw-r--r--src/commands/pubsub-channels.json31
-rw-r--r--src/commands/pubsub-help.json22
-rw-r--r--src/commands/pubsub-numpat.json21
-rw-r--r--src/commands/pubsub-numsub.json28
-rw-r--r--src/commands/pubsub-shardchannels.json31
-rw-r--r--src/commands/pubsub-shardnumsub.json28
-rw-r--r--src/commands/pubsub.json9
-rw-r--r--src/commands/punsubscribe.json25
-rw-r--r--src/commands/quit.json29
-rw-r--r--src/commands/randomkey.json34
-rw-r--r--src/commands/readonly.json21
-rw-r--r--src/commands/readwrite.json21
-rw-r--r--src/commands/rename.json72
-rw-r--r--src/commands/renamenx.json86
-rw-r--r--src/commands/replconf.json23
-rw-r--r--src/commands/replicaof.json59
-rw-r--r--src/commands/reset.json24
-rw-r--r--src/commands/restore-asking.json102
-rw-r--r--src/commands/restore.json98
-rw-r--r--src/commands/role.json134
-rw-r--r--src/commands/rpop.json76
-rw-r--r--src/commands/rpoplpush.json85
-rw-r--r--src/commands/rpush.json61
-rw-r--r--src/commands/rpushx.json61
-rw-r--r--src/commands/sadd.json60
-rw-r--r--src/commands/save.json19
-rw-r--r--src/commands/scan.json72
-rw-r--r--src/commands/scard.json48
-rw-r--r--src/commands/script-debug.json43
-rw-r--r--src/commands/script-exists.json44
-rw-r--r--src/commands/script-flush.json50
-rw-r--r--src/commands/script-help.json25
-rw-r--r--src/commands/script-kill.json25
-rw-r--r--src/commands/script-load.json32
-rw-r--r--src/commands/script.json9
-rw-r--r--src/commands/sdiff.json55
-rw-r--r--src/commands/sdiffstore.json73
-rw-r--r--src/commands/select.json27
-rw-r--r--src/commands/sentinel-ckquorum.json26
-rw-r--r--src/commands/sentinel-config.json121
-rw-r--r--src/commands/sentinel-debug.json49
-rw-r--r--src/commands/sentinel-failover.json25
-rw-r--r--src/commands/sentinel-flushconfig.json20
-rw-r--r--src/commands/sentinel-get-master-addr-by-name.json38
-rw-r--r--src/commands/sentinel-help.json24
-rw-r--r--src/commands/sentinel-info-cache.json64
-rw-r--r--src/commands/sentinel-is-master-down-by-addr.json61
-rw-r--r--src/commands/sentinel-master.json29
-rw-r--r--src/commands/sentinel-masters.json26
-rw-r--r--src/commands/sentinel-monitor.json37
-rw-r--r--src/commands/sentinel-myid.json20
-rw-r--r--src/commands/sentinel-pending-scripts.json52
-rw-r--r--src/commands/sentinel-remove.json25
-rw-r--r--src/commands/sentinel-replicas.json32
-rw-r--r--src/commands/sentinel-reset.json26
-rw-r--r--src/commands/sentinel-sentinels.json32
-rw-r--r--src/commands/sentinel-set.json40
-rw-r--r--src/commands/sentinel-simulate-failure.json52
-rw-r--r--src/commands/sentinel-slaves.json37
-rw-r--r--src/commands/sentinel.json14
-rw-r--r--src/commands/set.json152
-rw-r--r--src/commands/setbit.json64
-rw-r--r--src/commands/setex.json60
-rw-r--r--src/commands/setnx.json66
-rw-r--r--src/commands/setrange.json57
-rw-r--r--src/commands/shutdown.json69
-rw-r--r--src/commands/sinter.json55
-rw-r--r--src/commands/sintercard.json60
-rw-r--r--src/commands/sinterstore.json73
-rw-r--r--src/commands/sismember.json59
-rw-r--r--src/commands/slaveof.json64
-rw-r--r--src/commands/slowlog-get.json74
-rw-r--r--src/commands/slowlog-help.json22
-rw-r--r--src/commands/slowlog-len.json26
-rw-r--r--src/commands/slowlog-reset.json23
-rw-r--r--src/commands/slowlog.json9
-rw-r--r--src/commands/smembers.json54
-rw-r--r--src/commands/smismember.json66
-rw-r--r--src/commands/smove.json84
-rw-r--r--src/commands/sort.json162
-rw-r--r--src/commands/sort_ro.json132
-rw-r--r--src/commands/spop.json80
-rw-r--r--src/commands/spublish.json51
-rw-r--r--src/commands/srandmember.json83
-rw-r--r--src/commands/srem.json60
-rw-r--r--src/commands/sscan.json81
-rw-r--r--src/commands/ssubscribe.json42
-rw-r--r--src/commands/strlen.json48
-rw-r--r--src/commands/subscribe.json25
-rw-r--r--src/commands/substr.json60
-rw-r--r--src/commands/sunion.json55
-rw-r--r--src/commands/sunionstore.json73
-rw-r--r--src/commands/sunsubscribe.json43
-rw-r--r--src/commands/swapdb.json31
-rw-r--r--src/commands/sync.json15
-rw-r--r--src/commands/time.json28
-rw-r--r--src/commands/touch.json53
-rw-r--r--src/commands/ttl.json70
-rw-r--r--src/commands/type.json55
-rw-r--r--src/commands/unlink.json54
-rw-r--r--src/commands/unsubscribe.json25
-rw-r--r--src/commands/unwatch.json23
-rw-r--r--src/commands/wait.json34
-rw-r--r--src/commands/waitaof.json52
-rw-r--r--src/commands/watch.json50
-rw-r--r--src/commands/xack.json58
-rw-r--r--src/commands/xadd.json161
-rw-r--r--src/commands/xautoclaim.json158
-rw-r--r--src/commands/xclaim.json138
-rw-r--r--src/commands/xdel.json54
-rw-r--r--src/commands/xgroup-create.json85
-rw-r--r--src/commands/xgroup-createconsumer.json64
-rw-r--r--src/commands/xgroup-delconsumer.json57
-rw-r--r--src/commands/xgroup-destroy.json59
-rw-r--r--src/commands/xgroup-help.json25
-rw-r--r--src/commands/xgroup-setid.json79
-rw-r--r--src/commands/xgroup.json9
-rw-r--r--src/commands/xinfo-consumers.json80
-rw-r--r--src/commands/xinfo-groups.json92
-rw-r--r--src/commands/xinfo-help.json25
-rw-r--r--src/commands/xinfo-stream.json361
-rw-r--r--src/commands/xinfo.json9
-rw-r--r--src/commands/xlen.json48
-rw-r--r--src/commands/xpending.json160
-rw-r--r--src/commands/xrange.json87
-rw-r--r--src/commands/xread.json108
-rw-r--r--src/commands/xreadgroup.json134
-rw-r--r--src/commands/xrevrange.json86
-rw-r--r--src/commands/xsetid.json72
-rw-r--r--src/commands/xtrim.json108
-rw-r--r--src/commands/zadd.json144
-rw-r--r--src/commands/zcard.json47
-rw-r--r--src/commands/zcount.json56
-rw-r--r--src/commands/zdiff.json85
-rw-r--r--src/commands/zdiffstore.json77
-rw-r--r--src/commands/zincrby.json58
-rw-r--r--src/commands/zinter.json115
-rw-r--r--src/commands/zintercard.json60
-rw-r--r--src/commands/zinterstore.json108
-rw-r--r--src/commands/zlexcount.json57
-rw-r--r--src/commands/zmpop.json111
-rw-r--r--src/commands/zmscore.json65
-rw-r--r--src/commands/zpopmax.json89
-rw-r--r--src/commands/zpopmin.json89
-rw-r--r--src/commands/zrandmember.json101
-rw-r--r--src/commands/zrange.json137
-rw-r--r--src/commands/zrangebylex.json80
-rw-r--r--src/commands/zrangebyscore.json119
-rw-r--r--src/commands/zrangestore.json118
-rw-r--r--src/commands/zrank.json86
-rw-r--r--src/commands/zrem.json60
-rw-r--r--src/commands/zremrangebylex.json55
-rw-r--r--src/commands/zremrangebyrank.json55
-rw-r--r--src/commands/zremrangebyscore.json55
-rw-r--r--src/commands/zrevrange.json94
-rw-r--r--src/commands/zrevrangebylex.json80
-rw-r--r--src/commands/zrevrangebyscore.json118
-rw-r--r--src/commands/zrevrank.json86
-rw-r--r--src/commands/zscan.json81
-rw-r--r--src/commands/zscore.json60
-rw-r--r--src/commands/zunion.json115
-rw-r--r--src/commands/zunionstore.json107
-rw-r--r--src/config.c3413
-rw-r--r--src/config.h321
-rw-r--r--src/connection.c208
-rw-r--r--src/connection.h454
-rw-r--r--src/connhelpers.h88
-rw-r--r--src/crc16.c88
-rw-r--r--src/crc16_slottable.h835
-rw-r--r--src/crc64.c161
-rw-r--r--src/crc64.h13
-rw-r--r--src/crcspeed.c282
-rw-r--r--src/crcspeed.h60
-rw-r--r--src/db.c2558
-rw-r--r--src/debug.c2322
-rw-r--r--src/debugmacro.h46
-rw-r--r--src/defrag.c1079
-rw-r--r--src/dict.c1749
-rw-r--r--src/dict.h231
-rw-r--r--src/endianconv.c129
-rw-r--r--src/endianconv.h78
-rw-r--r--src/eval.c1667
-rw-r--r--src/evict.c757
-rw-r--r--src/expire.c754
-rw-r--r--src/fmacros.h76
-rw-r--r--src/function_lua.c506
-rw-r--r--src/functions.c1139
-rw-r--r--src/functions.h136
-rw-r--r--src/geo.c1005
-rw-r--r--src/geo.h22
-rw-r--r--src/geohash.c299
-rw-r--r--src/geohash.h135
-rw-r--r--src/geohash_helper.c280
-rw-r--r--src/geohash_helper.h65
-rw-r--r--src/hyperloglog.c1618
-rw-r--r--src/intset.c560
-rw-r--r--src/intset.h57
-rw-r--r--src/latency.c739
-rw-r--r--src/latency.h108
-rw-r--r--src/lazyfree.c227
-rw-r--r--src/listpack.c2660
-rw-r--r--src/listpack.h106
-rw-r--r--src/listpack_malloc.h49
-rw-r--r--src/localtime.c123
-rw-r--r--src/logreqres.c315
-rw-r--r--src/lolwut.c188
-rw-r--r--src/lolwut.h55
-rw-r--r--src/lolwut5.c177
-rw-r--r--src/lolwut6.c201
-rw-r--r--src/lzf.h100
-rw-r--r--src/lzfP.h190
-rw-r--r--src/lzf_c.c302
-rw-r--r--src/lzf_d.c191
-rw-r--r--src/memtest.c377
-rwxr-xr-xsrc/mkreleasehdr.sh16
-rw-r--r--src/module.c13846
-rw-r--r--src/modules/.gitignore2
-rw-r--r--src/modules/Makefile69
-rw-r--r--src/modules/helloacl.c190
-rw-r--r--src/modules/helloblock.c218
-rw-r--r--src/modules/hellocluster.c118
-rw-r--r--src/modules/hellodict.c131
-rw-r--r--src/modules/hellohook.c92
-rw-r--r--src/modules/hellotimer.c75
-rw-r--r--src/modules/hellotype.c362
-rw-r--r--src/modules/helloworld.c621
-rw-r--r--src/monotonic.c180
-rw-r--r--src/monotonic.h61
-rw-r--r--src/mt19937-64.c187
-rw-r--r--src/mt19937-64.h87
-rw-r--r--src/multi.c500
-rw-r--r--src/networking.c4578
-rw-r--r--src/notify.c145
-rw-r--r--src/object.c1688
-rw-r--r--src/pqsort.c185
-rw-r--r--src/pqsort.h40
-rw-r--r--src/pubsub.c754
-rw-r--r--src/quicklist.c3257
-rw-r--r--src/quicklist.h214
-rw-r--r--src/rand.c93
-rw-r--r--src/rand.h38
-rw-r--r--src/rax.c1927
-rw-r--r--src/rax.h216
-rw-r--r--src/rax_malloc.h44
-rw-r--r--src/rdb.c3722
-rw-r--r--src/rdb.h184
-rw-r--r--src/redis-benchmark.c2060
-rw-r--r--src/redis-check-aof.c566
-rw-r--r--src/redis-check-rdb.c447
-rw-r--r--src/redis-cli.c9983
-rwxr-xr-xsrc/redis-trib.rb129
-rw-r--r--src/redisassert.c53
-rw-r--r--src/redisassert.h49
-rw-r--r--src/redismodule.h1685
-rw-r--r--src/release.c69
-rw-r--r--src/replication.c4241
-rw-r--r--src/resp_parser.c228
-rw-r--r--src/resp_parser.h94
-rw-r--r--src/rio.c520
-rw-r--r--src/rio.h185
-rw-r--r--src/script.c578
-rw-r--r--src/script.h111
-rw-r--r--src/script_lua.c1722
-rw-r--r--src/script_lua.h87
-rw-r--r--src/sds.c1496
-rw-r--r--src/sds.h287
-rw-r--r--src/sdsalloc.h54
-rw-r--r--src/sentinel.c5484
-rw-r--r--src/server.c7365
-rw-r--r--src/server.h3744
-rw-r--r--src/setcpuaffinity.c155
-rw-r--r--src/setproctitle.c331
-rw-r--r--src/sha1.c239
-rw-r--r--src/sha1.h27
-rw-r--r--src/sha256.c163
-rw-r--r--src/sha256.h35
-rw-r--r--src/siphash.c373
-rw-r--r--src/slowlog.c206
-rw-r--r--src/slowlog.h51
-rw-r--r--src/socket.c471
-rw-r--r--src/solarisfixes.h54
-rw-r--r--src/sort.c619
-rw-r--r--src/sparkline.c179
-rw-r--r--src/sparkline.h56
-rw-r--r--src/stream.h147
-rw-r--r--src/strl.c86
-rw-r--r--src/syncio.c145
-rw-r--r--src/syscheck.c375
-rw-r--r--src/syscheck.h46
-rw-r--r--src/t_hash.c1163
-rw-r--r--src/t_list.c1388
-rw-r--r--src/t_set.c1680
-rw-r--r--src/t_stream.c4038
-rw-r--r--src/t_string.c951
-rw-r--r--src/t_zset.c4460
-rw-r--r--src/testhelp.h62
-rw-r--r--src/timeout.c202
-rw-r--r--src/tls.c1204
-rw-r--r--src/tracking.c660
-rw-r--r--src/unix.c207
-rw-r--r--src/util.c1431
-rw-r--r--src/util.h99
-rw-r--r--src/valgrind.sup26
-rw-r--r--src/version.h2
-rw-r--r--src/ziplist.c2666
-rw-r--r--src/ziplist.h74
-rw-r--r--src/zipmap.c542
-rw-r--r--src/zipmap.h54
-rw-r--r--src/zmalloc.c851
-rw-r--r--src/zmalloc.h167
572 files changed, 185878 insertions, 0 deletions
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 0000000..aee7aac
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,5 @@
+*.gcda
+*.gcno
+*.gcov
+redis.info
+lcov-html
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..ecbd275
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,514 @@
+# Redis Makefile
+# Copyright (C) 2009 Salvatore Sanfilippo <antirez at gmail dot com>
+# This file is released under the BSD license, see the COPYING file
+#
+# The Makefile composes the final FINAL_CFLAGS and FINAL_LDFLAGS using
+# what is needed for Redis plus the standard CFLAGS and LDFLAGS passed.
+# However when building the dependencies (Jemalloc, Lua, Hiredis, ...)
+# CFLAGS and LDFLAGS are propagated to the dependencies, so to pass
+# flags only to be used when compiling / linking Redis itself REDIS_CFLAGS
+# and REDIS_LDFLAGS are used instead (this is the case of 'make gcov').
+#
+# Dependencies are stored in the Makefile.dep file. To rebuild this file
+# Just use 'make dep', but this is only needed by developers.
+
+release_hdr := $(shell sh -c './mkreleasehdr.sh')
+uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
+CLANG := $(findstring clang,$(shell sh -c '$(CC) --version | head -1'))
+OPTIMIZATION?=-O3
+ifeq ($(OPTIMIZATION),-O3)
+ ifeq (clang,$(CLANG))
+ REDIS_CFLAGS+=-flto
+ else
+ REDIS_CFLAGS+=-flto=auto
+ endif
+ REDIS_LDFLAGS+=-O3 -flto
+endif
+DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv
+NODEPS:=clean distclean
+
+# Default settings
+STD=-pedantic -DREDIS_STATIC=''
+
+# Use -Wno-c11-extensions on clang, either where explicitly used or on
+# platforms we can assume it's being used.
+ifeq (clang,$(CLANG))
+ STD+=-Wno-c11-extensions
+else
+ifneq (,$(findstring FreeBSD,$(uname_S)))
+ STD+=-Wno-c11-extensions
+endif
+endif
+WARN=-Wall -W -Wno-missing-field-initializers -Werror=deprecated-declarations -Wstrict-prototypes
+OPT=$(OPTIMIZATION)
+
+# Detect if the compiler supports C11 _Atomic.
+# NUMBER_SIGN_CHAR is a workaround to support both GNU Make 4.3 and older versions.
+NUMBER_SIGN_CHAR := \#
+C11_ATOMIC := $(shell sh -c 'echo "$(NUMBER_SIGN_CHAR)include <stdatomic.h>" > foo.c; \
+ $(CC) -std=gnu11 -c foo.c -o foo.o > /dev/null 2>&1; \
+ if [ -f foo.o ]; then echo "yes"; rm foo.o; fi; rm foo.c')
+ifeq ($(C11_ATOMIC),yes)
+ STD+=-std=gnu11
+else
+ STD+=-std=c99
+endif
+
+PREFIX?=/usr/local
+INSTALL_BIN=$(PREFIX)/bin
+INSTALL=install
+PKG_CONFIG?=pkg-config
+
+ifndef PYTHON
+PYTHON := $(shell which python3 || which python)
+endif
+
+# Default allocator defaults to Jemalloc on Linux and libc otherwise
+MALLOC=libc
+ifeq ($(uname_S),Linux)
+ MALLOC=jemalloc
+endif
+
+# To get ARM stack traces if Redis crashes we need a special C flag.
+ifneq (,$(filter aarch64 armv%,$(uname_M)))
+ CFLAGS+=-funwind-tables
+endif
+
+# Backwards compatibility for selecting an allocator
+ifeq ($(USE_TCMALLOC),yes)
+ MALLOC=tcmalloc
+endif
+
+ifeq ($(USE_TCMALLOC_MINIMAL),yes)
+ MALLOC=tcmalloc_minimal
+endif
+
+ifeq ($(USE_JEMALLOC),yes)
+ MALLOC=jemalloc
+endif
+
+ifeq ($(USE_JEMALLOC),no)
+ MALLOC=libc
+endif
+
+ifdef SANITIZER
+ifeq ($(SANITIZER),address)
+ MALLOC=libc
+ CFLAGS+=-fsanitize=address -fno-sanitize-recover=all -fno-omit-frame-pointer
+ LDFLAGS+=-fsanitize=address
+else
+ifeq ($(SANITIZER),undefined)
+ MALLOC=libc
+ CFLAGS+=-fsanitize=undefined -fno-sanitize-recover=all -fno-omit-frame-pointer
+ LDFLAGS+=-fsanitize=undefined
+else
+ifeq ($(SANITIZER),thread)
+ CFLAGS+=-fsanitize=thread -fno-sanitize-recover=all -fno-omit-frame-pointer
+ LDFLAGS+=-fsanitize=thread
+else
+ $(error "unknown sanitizer=${SANITIZER}")
+endif
+endif
+endif
+endif
+
+# Override default settings if possible
+-include .make-settings
+
+FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(REDIS_CFLAGS)
+FINAL_LDFLAGS=$(LDFLAGS) $(REDIS_LDFLAGS) $(DEBUG)
+FINAL_LIBS=-lm
+DEBUG=-g -ggdb
+
+# Linux ARM32 needs -latomic at linking time
+ifneq (,$(findstring armv,$(uname_M)))
+ FINAL_LIBS+=-latomic
+endif
+
+ifeq ($(uname_S),SunOS)
+ # SunOS
+ ifeq ($(findstring -m32,$(FINAL_CFLAGS)),)
+ CFLAGS+=-m64
+ endif
+ ifeq ($(findstring -m32,$(FINAL_LDFLAGS)),)
+ LDFLAGS+=-m64
+ endif
+ DEBUG=-g
+ DEBUG_FLAGS=-g
+ export CFLAGS LDFLAGS DEBUG DEBUG_FLAGS
+ INSTALL=cp -pf
+ FINAL_CFLAGS+= -D__EXTENSIONS__ -D_XPG6
+ FINAL_LIBS+= -ldl -lnsl -lsocket -lresolv -lpthread -lrt
+ ifeq ($(USE_BACKTRACE),yes)
+ FINAL_CFLAGS+= -DUSE_BACKTRACE
+ endif
+else
+ifeq ($(uname_S),Darwin)
+ # Darwin
+ FINAL_LIBS+= -ldl
+ # Homebrew's OpenSSL is not linked to /usr/local to avoid
+ # conflicts with the system's LibreSSL installation so it
+ # must be referenced explicitly during build.
+ifeq ($(uname_M),arm64)
+ # Homebrew arm64 uses /opt/homebrew as HOMEBREW_PREFIX
+ OPENSSL_PREFIX?=/opt/homebrew/opt/openssl
+else
+ # Homebrew x86/ppc uses /usr/local as HOMEBREW_PREFIX
+ OPENSSL_PREFIX?=/usr/local/opt/openssl
+endif
+else
+ifeq ($(uname_S),AIX)
+ # AIX
+ FINAL_LDFLAGS+= -Wl,-bexpall
+ FINAL_LIBS+=-ldl -pthread -lcrypt -lbsd
+else
+ifeq ($(uname_S),OpenBSD)
+ # OpenBSD
+ FINAL_LIBS+= -lpthread
+ ifeq ($(USE_BACKTRACE),yes)
+ FINAL_CFLAGS+= -DUSE_BACKTRACE -I/usr/local/include
+ FINAL_LDFLAGS+= -L/usr/local/lib
+ FINAL_LIBS+= -lexecinfo
+ endif
+
+else
+ifeq ($(uname_S),NetBSD)
+ # NetBSD
+ FINAL_LIBS+= -lpthread
+ ifeq ($(USE_BACKTRACE),yes)
+ FINAL_CFLAGS+= -DUSE_BACKTRACE -I/usr/pkg/include
+ FINAL_LDFLAGS+= -L/usr/pkg/lib
+ FINAL_LIBS+= -lexecinfo
+ endif
+else
+ifeq ($(uname_S),FreeBSD)
+ # FreeBSD
+ FINAL_LIBS+= -lpthread -lexecinfo
+else
+ifeq ($(uname_S),DragonFly)
+ # DragonFly
+ FINAL_LIBS+= -lpthread -lexecinfo
+else
+ifeq ($(uname_S),OpenBSD)
+ # OpenBSD
+ FINAL_LIBS+= -lpthread -lexecinfo
+else
+ifeq ($(uname_S),NetBSD)
+ # NetBSD
+ FINAL_LIBS+= -lpthread -lexecinfo
+else
+ifeq ($(uname_S),Haiku)
+ # Haiku
+ FINAL_CFLAGS+= -DBSD_SOURCE
+ FINAL_LDFLAGS+= -lbsd -lnetwork
+ FINAL_LIBS+= -lpthread
+else
+ # All the other OSes (notably Linux)
+ FINAL_LDFLAGS+= -rdynamic
+ FINAL_LIBS+=-ldl -pthread -lrt
+endif
+endif
+endif
+endif
+endif
+endif
+endif
+endif
+endif
+endif
+
+ifdef OPENSSL_PREFIX
+ OPENSSL_CFLAGS=-I$(OPENSSL_PREFIX)/include
+ OPENSSL_LDFLAGS=-L$(OPENSSL_PREFIX)/lib
+ # Also export OPENSSL_PREFIX so it ends up in deps sub-Makefiles
+ export OPENSSL_PREFIX
+endif
+
+# Include paths to dependencies
+FINAL_CFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src -I../deps/hdr_histogram -I../deps/fpconv
+
+# Determine systemd support and/or build preference (defaulting to auto-detection)
+BUILD_WITH_SYSTEMD=no
+LIBSYSTEMD_LIBS=-lsystemd
+
+# If 'USE_SYSTEMD' in the environment is neither "no" nor "yes", try to
+# auto-detect libsystemd's presence and link accordingly.
+ifneq ($(USE_SYSTEMD),no)
+ LIBSYSTEMD_PKGCONFIG := $(shell $(PKG_CONFIG) --exists libsystemd && echo $$?)
+# If libsystemd cannot be detected, continue building without support for it
+# (unless a later check tells us otherwise)
+ifeq ($(LIBSYSTEMD_PKGCONFIG),0)
+ BUILD_WITH_SYSTEMD=yes
+ LIBSYSTEMD_LIBS=$(shell $(PKG_CONFIG) --libs libsystemd)
+endif
+endif
+
+# If 'USE_SYSTEMD' is set to "yes" use pkg-config if available or fall back to
+# default -lsystemd.
+ifeq ($(USE_SYSTEMD),yes)
+ BUILD_WITH_SYSTEMD=yes
+endif
+
+ifeq ($(BUILD_WITH_SYSTEMD),yes)
+ FINAL_LIBS+=$(LIBSYSTEMD_LIBS)
+ FINAL_CFLAGS+= -DHAVE_LIBSYSTEMD
+endif
+
+ifeq ($(MALLOC),tcmalloc)
+ FINAL_CFLAGS+= -DUSE_TCMALLOC
+ FINAL_LIBS+= -ltcmalloc
+endif
+
+ifeq ($(MALLOC),tcmalloc_minimal)
+ FINAL_CFLAGS+= -DUSE_TCMALLOC
+ FINAL_LIBS+= -ltcmalloc_minimal
+endif
+
+ifeq ($(MALLOC),jemalloc)
+ DEPENDENCY_TARGETS+= jemalloc
+ FINAL_CFLAGS+= -DUSE_JEMALLOC -I../deps/jemalloc/include
+ FINAL_LIBS := ../deps/jemalloc/lib/libjemalloc.a $(FINAL_LIBS)
+endif
+
+# LIBSSL & LIBCRYPTO
+LIBSSL_LIBS=
+LIBSSL_PKGCONFIG := $(shell $(PKG_CONFIG) --exists libssl && echo $$?)
+ifeq ($(LIBSSL_PKGCONFIG),0)
+ LIBSSL_LIBS=$(shell $(PKG_CONFIG) --libs libssl)
+else
+ LIBSSL_LIBS=-lssl
+endif
+LIBCRYPTO_LIBS=
+LIBCRYPTO_PKGCONFIG := $(shell $(PKG_CONFIG) --exists libcrypto && echo $$?)
+ifeq ($(LIBCRYPTO_PKGCONFIG),0)
+ LIBCRYPTO_LIBS=$(shell $(PKG_CONFIG) --libs libcrypto)
+else
+ LIBCRYPTO_LIBS=-lcrypto
+endif
+
+BUILD_NO:=0
+BUILD_YES:=1
+BUILD_MODULE:=2
+ifeq ($(BUILD_TLS),yes)
+ FINAL_CFLAGS+=-DUSE_OPENSSL=$(BUILD_YES) $(OPENSSL_CFLAGS) -DBUILD_TLS_MODULE=$(BUILD_NO)
+ FINAL_LDFLAGS+=$(OPENSSL_LDFLAGS)
+ FINAL_LIBS += ../deps/hiredis/libhiredis_ssl.a $(LIBSSL_LIBS) $(LIBCRYPTO_LIBS)
+endif
+
+TLS_MODULE=
+TLS_MODULE_NAME:=redis-tls$(PROG_SUFFIX).so
+TLS_MODULE_CFLAGS:=$(FINAL_CFLAGS)
+ifeq ($(BUILD_TLS),module)
+ FINAL_CFLAGS+=-DUSE_OPENSSL=$(BUILD_MODULE) $(OPENSSL_CFLAGS)
+ TLS_CLIENT_LIBS = ../deps/hiredis/libhiredis_ssl.a $(LIBSSL_LIBS) $(LIBCRYPTO_LIBS)
+ TLS_MODULE=$(TLS_MODULE_NAME)
+ TLS_MODULE_CFLAGS+=-DUSE_OPENSSL=$(BUILD_MODULE) $(OPENSSL_CFLAGS) -DBUILD_TLS_MODULE=$(BUILD_MODULE)
+endif
+
+ifndef V
+ define MAKE_INSTALL
+ @printf ' %b %b\n' $(LINKCOLOR)INSTALL$(ENDCOLOR) $(BINCOLOR)$(1)$(ENDCOLOR) 1>&2
+ @$(INSTALL) $(1) $(2)
+ endef
+else
+ define MAKE_INSTALL
+ $(INSTALL) $(1) $(2)
+ endef
+endif
+
+REDIS_CC=$(QUIET_CC)$(CC) $(FINAL_CFLAGS)
+REDIS_LD=$(QUIET_LINK)$(CC) $(FINAL_LDFLAGS)
+REDIS_INSTALL=$(QUIET_INSTALL)$(INSTALL)
+
+CCCOLOR="\033[34m"
+LINKCOLOR="\033[34;1m"
+SRCCOLOR="\033[33m"
+BINCOLOR="\033[37;1m"
+MAKECOLOR="\033[32;1m"
+ENDCOLOR="\033[0m"
+
+ifndef V
+QUIET_CC = @printf ' %b %b\n' $(CCCOLOR)CC$(ENDCOLOR) $(SRCCOLOR)$@$(ENDCOLOR) 1>&2;
+QUIET_GEN = @printf ' %b %b\n' $(CCCOLOR)GEN$(ENDCOLOR) $(SRCCOLOR)$@$(ENDCOLOR) 1>&2;
+QUIET_LINK = @printf ' %b %b\n' $(LINKCOLOR)LINK$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) 1>&2;
+QUIET_INSTALL = @printf ' %b %b\n' $(LINKCOLOR)INSTALL$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) 1>&2;
+endif
+
+ifneq (, $(findstring LOG_REQ_RES, $(REDIS_CFLAGS)))
+ COMMANDS_DEF_FILENAME=commands_with_reply_schema
+ GEN_COMMANDS_FLAGS=--with-reply-schema
+else
+ COMMANDS_DEF_FILENAME=commands
+ GEN_COMMANDS_FLAGS=
+endif
+
+REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX)
+REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX)
+REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
+REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX)
+REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
+REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX)
+REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o redisassert.o release.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o
+REDIS_CHECK_RDB_NAME=redis-check-rdb$(PROG_SUFFIX)
+REDIS_CHECK_AOF_NAME=redis-check-aof$(PROG_SUFFIX)
+ALL_SOURCES=$(sort $(patsubst %.o,%.c,$(REDIS_SERVER_OBJ) $(REDIS_CLI_OBJ) $(REDIS_BENCHMARK_OBJ)))
+
+all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) $(TLS_MODULE)
+ @echo ""
+ @echo "Hint: It's a good idea to run 'make test' ;)"
+ @echo ""
+
+Makefile.dep:
+ -$(REDIS_CC) -MM $(ALL_SOURCES) > Makefile.dep 2> /dev/null || true
+
+ifeq (0, $(words $(findstring $(MAKECMDGOALS), $(NODEPS))))
+-include Makefile.dep
+endif
+
+.PHONY: all
+
+persist-settings: distclean
+ echo STD=$(STD) >> .make-settings
+ echo WARN=$(WARN) >> .make-settings
+ echo OPT=$(OPT) >> .make-settings
+ echo MALLOC=$(MALLOC) >> .make-settings
+ echo BUILD_TLS=$(BUILD_TLS) >> .make-settings
+ echo USE_SYSTEMD=$(USE_SYSTEMD) >> .make-settings
+ echo CFLAGS=$(CFLAGS) >> .make-settings
+ echo LDFLAGS=$(LDFLAGS) >> .make-settings
+ echo REDIS_CFLAGS=$(REDIS_CFLAGS) >> .make-settings
+ echo REDIS_LDFLAGS=$(REDIS_LDFLAGS) >> .make-settings
+ echo PREV_FINAL_CFLAGS=$(FINAL_CFLAGS) >> .make-settings
+ echo PREV_FINAL_LDFLAGS=$(FINAL_LDFLAGS) >> .make-settings
+ -(cd ../deps && $(MAKE) $(DEPENDENCY_TARGETS))
+
+.PHONY: persist-settings
+
+# Prerequisites target
+.make-prerequisites:
+ @touch $@
+
+# Clean everything, persist settings and build dependencies if anything changed
+ifneq ($(strip $(PREV_FINAL_CFLAGS)), $(strip $(FINAL_CFLAGS)))
+.make-prerequisites: persist-settings
+endif
+
+ifneq ($(strip $(PREV_FINAL_LDFLAGS)), $(strip $(FINAL_LDFLAGS)))
+.make-prerequisites: persist-settings
+endif
+
+# redis-server
+$(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ)
+ $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a $(FINAL_LIBS)
+
+# redis-sentinel
+$(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME)
+ $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME)
+
+# redis-check-rdb
+$(REDIS_CHECK_RDB_NAME): $(REDIS_SERVER_NAME)
+ $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_RDB_NAME)
+
+# redis-check-aof
+$(REDIS_CHECK_AOF_NAME): $(REDIS_SERVER_NAME)
+ $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_AOF_NAME)
+
+# redis-tls.so
+$(TLS_MODULE_NAME): $(REDIS_SERVER_NAME)
+ $(QUIET_CC)$(CC) -o $@ tls.c -shared -fPIC $(TLS_MODULE_CFLAGS) $(TLS_CLIENT_LIBS)
+
+# redis-cli
+$(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
+ $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o $(FINAL_LIBS) $(TLS_CLIENT_LIBS)
+
+# redis-benchmark
+$(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ)
+ $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/hdr_histogram/libhdrhistogram.a $(FINAL_LIBS) $(TLS_CLIENT_LIBS)
+
+DEP = $(REDIS_SERVER_OBJ:%.o=%.d) $(REDIS_CLI_OBJ:%.o=%.d) $(REDIS_BENCHMARK_OBJ:%.o=%.d)
+-include $(DEP)
+
+# Because the jemalloc.h header is generated as a part of the jemalloc build,
+# building it should complete before building any other object. Instead of
+# depending on a single artifact, build all dependencies first.
+%.o: %.c .make-prerequisites
+ $(REDIS_CC) -MMD -o $@ -c $<
+
+# The file commands.def is checked in and doesn't normally need to be rebuilt. It
+# is built only if python is available and its prereqs are modified.
+ifneq (,$(PYTHON))
+$(COMMANDS_DEF_FILENAME).def: commands/*.json ../utils/generate-command-code.py
+ $(QUIET_GEN)$(PYTHON) ../utils/generate-command-code.py $(GEN_COMMANDS_FLAGS)
+endif
+
+commands.c: $(COMMANDS_DEF_FILENAME).def
+
+clean:
+ rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html Makefile.dep *.so
+ rm -f $(DEP)
+
+.PHONY: clean
+
+distclean: clean
+ -(cd ../deps && $(MAKE) distclean)
+ -(cd modules && $(MAKE) clean)
+ -(cd ../tests/modules && $(MAKE) clean)
+ -(rm -f .make-*)
+
+.PHONY: distclean
+
+test: $(REDIS_SERVER_NAME) $(REDIS_CHECK_AOF_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME)
+ @(cd ..; ./runtest)
+
+test-modules: $(REDIS_SERVER_NAME)
+ @(cd ..; ./runtest-moduleapi)
+
+test-sentinel: $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME)
+ @(cd ..; ./runtest-sentinel)
+
+test-cluster: $(REDIS_SERVER_NAME) $(REDIS_CLI_NAME)
+ @(cd ..; ./runtest-cluster)
+
+check: test
+
+lcov:
+ $(MAKE) gcov
+ @(set -e; cd ..; ./runtest --clients 1)
+ @geninfo -o redis.info .
+ @genhtml --legend -o lcov-html redis.info
+
+.PHONY: lcov
+
+bench: $(REDIS_BENCHMARK_NAME)
+ ./$(REDIS_BENCHMARK_NAME)
+
+32bit:
+ @echo ""
+ @echo "WARNING: if it fails under Linux you probably need to install libc6-dev-i386"
+ @echo ""
+ $(MAKE) CFLAGS="-m32" LDFLAGS="-m32"
+
+gcov:
+ $(MAKE) REDIS_CFLAGS="-fprofile-arcs -ftest-coverage -DCOVERAGE_TEST" REDIS_LDFLAGS="-fprofile-arcs -ftest-coverage"
+
+noopt:
+ $(MAKE) OPTIMIZATION="-O0"
+
+valgrind:
+ $(MAKE) OPTIMIZATION="-O0" MALLOC="libc"
+
+helgrind:
+ $(MAKE) OPTIMIZATION="-O0" MALLOC="libc" CFLAGS="-D__ATOMIC_VAR_FORCE_SYNC_MACROS" REDIS_CFLAGS="-I/usr/local/include" REDIS_LDFLAGS="-L/usr/local/lib"
+
+install: all
+ @mkdir -p $(INSTALL_BIN)
+ $(call MAKE_INSTALL,$(REDIS_SERVER_NAME),$(INSTALL_BIN))
+ $(call MAKE_INSTALL,$(REDIS_BENCHMARK_NAME),$(INSTALL_BIN))
+ $(call MAKE_INSTALL,$(REDIS_CLI_NAME),$(INSTALL_BIN))
+ @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_CHECK_RDB_NAME)
+ @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_CHECK_AOF_NAME)
+ @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_SENTINEL_NAME)
+
+uninstall:
+ rm -f $(INSTALL_BIN)/{$(REDIS_SERVER_NAME),$(REDIS_BENCHMARK_NAME),$(REDIS_CLI_NAME),$(REDIS_CHECK_RDB_NAME),$(REDIS_CHECK_AOF_NAME),$(REDIS_SENTINEL_NAME)}
diff --git a/src/acl.c b/src/acl.c
new file mode 100644
index 0000000..5fd956d
--- /dev/null
+++ b/src/acl.c
@@ -0,0 +1,3150 @@
+/*
+ * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "sha256.h"
+#include <fcntl.h>
+#include <ctype.h>
+
+/* =============================================================================
+ * Global state for ACLs
+ * ==========================================================================*/
+
+rax *Users; /* Table mapping usernames to user structures. */
+
+user *DefaultUser; /* Global reference to the default user.
+ Every new connection is associated to it, if no
+ AUTH or HELLO is used to authenticate with a
+ different user. */
+
+list *UsersToLoad; /* This is a list of users found in the configuration file
+ that we'll need to load in the final stage of Redis
+ initialization, after all the modules are already
+ loaded. Every list element is a NULL terminated
+ array of SDS pointers: the first is the user name,
+ all the remaining pointers are ACL rules in the same
+ format as ACLSetUser(). */
+list *ACLLog; /* Our security log, the user is able to inspect that
+ using the ACL LOG command .*/
+
+long long ACLLogEntryCount = 0; /* Number of ACL log entries created */
+
+static rax *commandId = NULL; /* Command name to id mapping */
+
+static unsigned long nextid = 0; /* Next command id that has not been assigned */
+
+struct ACLCategoryItem {
+ const char *name;
+ uint64_t flag;
+} ACLCommandCategories[] = { /* See redis.conf for details on each category. */
+ {"keyspace", ACL_CATEGORY_KEYSPACE},
+ {"read", ACL_CATEGORY_READ},
+ {"write", ACL_CATEGORY_WRITE},
+ {"set", ACL_CATEGORY_SET},
+ {"sortedset", ACL_CATEGORY_SORTEDSET},
+ {"list", ACL_CATEGORY_LIST},
+ {"hash", ACL_CATEGORY_HASH},
+ {"string", ACL_CATEGORY_STRING},
+ {"bitmap", ACL_CATEGORY_BITMAP},
+ {"hyperloglog", ACL_CATEGORY_HYPERLOGLOG},
+ {"geo", ACL_CATEGORY_GEO},
+ {"stream", ACL_CATEGORY_STREAM},
+ {"pubsub", ACL_CATEGORY_PUBSUB},
+ {"admin", ACL_CATEGORY_ADMIN},
+ {"fast", ACL_CATEGORY_FAST},
+ {"slow", ACL_CATEGORY_SLOW},
+ {"blocking", ACL_CATEGORY_BLOCKING},
+ {"dangerous", ACL_CATEGORY_DANGEROUS},
+ {"connection", ACL_CATEGORY_CONNECTION},
+ {"transaction", ACL_CATEGORY_TRANSACTION},
+ {"scripting", ACL_CATEGORY_SCRIPTING},
+ {NULL,0} /* Terminator. */
+};
+
+struct ACLUserFlag {
+ const char *name;
+ uint64_t flag;
+} ACLUserFlags[] = {
+ /* Note: the order here dictates the emitted order at ACLDescribeUser */
+ {"on", USER_FLAG_ENABLED},
+ {"off", USER_FLAG_DISABLED},
+ {"nopass", USER_FLAG_NOPASS},
+ {"skip-sanitize-payload", USER_FLAG_SANITIZE_PAYLOAD_SKIP},
+ {"sanitize-payload", USER_FLAG_SANITIZE_PAYLOAD},
+ {NULL,0} /* Terminator. */
+};
+
+struct ACLSelectorFlags {
+ const char *name;
+ uint64_t flag;
+} ACLSelectorFlags[] = {
+ /* Note: the order here dictates the emitted order at ACLDescribeUser */
+ {"allkeys", SELECTOR_FLAG_ALLKEYS},
+ {"allchannels", SELECTOR_FLAG_ALLCHANNELS},
+ {"allcommands", SELECTOR_FLAG_ALLCOMMANDS},
+ {NULL,0} /* Terminator. */
+};
+
+/* ACL selectors are private and not exposed outside of acl.c. */
+typedef struct {
+ uint32_t flags; /* See SELECTOR_FLAG_* */
+ /* The bit in allowed_commands is set if this user has the right to
+ * execute this command.
+ *
+ * If the bit for a given command is NOT set and the command has
+ * allowed first-args, Redis will also check allowed_firstargs in order to
+ * understand if the command can be executed. */
+ uint64_t allowed_commands[USER_COMMAND_BITS_COUNT/64];
+ /* allowed_firstargs is used by ACL rules to block access to a command unless a
+ * specific argv[1] is given.
+ *
+ * For each command ID (corresponding to the command bit set in allowed_commands),
+ * This array points to an array of SDS strings, terminated by a NULL pointer,
+ * with all the first-args that are allowed for this command. When no first-arg
+ * matching is used, the field is just set to NULL to avoid allocating
+ * USER_COMMAND_BITS_COUNT pointers. */
+ sds **allowed_firstargs;
+ list *patterns; /* A list of allowed key patterns. If this field is NULL
+ the user cannot mention any key in a command, unless
+ the flag ALLKEYS is set in the user. */
+ list *channels; /* A list of allowed Pub/Sub channel patterns. If this
+ field is NULL the user cannot mention any channel in a
+ `PUBLISH` or [P][UNSUBSCRIBE] command, unless the flag
+ ALLCHANNELS is set in the user. */
+ sds command_rules; /* A string representation of the ordered categories and commands, this
+ * is used to regenerate the original ACL string for display. */
+} aclSelector;
+
+void ACLResetFirstArgsForCommand(aclSelector *selector, unsigned long id);
+void ACLResetFirstArgs(aclSelector *selector);
+void ACLAddAllowedFirstArg(aclSelector *selector, unsigned long id, const char *sub);
+void ACLFreeLogEntry(void *le);
+int ACLSetSelector(aclSelector *selector, const char *op, size_t oplen);
+
+/* The length of the string representation of a hashed password. */
+#define HASH_PASSWORD_LEN (SHA256_BLOCK_SIZE*2)
+
+/* =============================================================================
+ * Helper functions for the rest of the ACL implementation
+ * ==========================================================================*/
+
+/* Return zero if strings are the same, non-zero if they are not.
+ * The comparison is performed in a way that prevents an attacker to obtain
+ * information about the nature of the strings just monitoring the execution
+ * time of the function. Note: The two strings must be the same length.
+ */
+int time_independent_strcmp(char *a, char *b, int len) {
+ int diff = 0;
+ for (int j = 0; j < len; j++) {
+ diff |= (a[j] ^ b[j]);
+ }
+ return diff; /* If zero strings are the same. */
+}
+
+/* Given an SDS string, returns the SHA256 hex representation as a
+ * new SDS string. */
+sds ACLHashPassword(unsigned char *cleartext, size_t len) {
+ SHA256_CTX ctx;
+ unsigned char hash[SHA256_BLOCK_SIZE];
+ char hex[HASH_PASSWORD_LEN];
+ char *cset = "0123456789abcdef";
+
+ sha256_init(&ctx);
+ sha256_update(&ctx,(unsigned char*)cleartext,len);
+ sha256_final(&ctx,hash);
+
+ for (int j = 0; j < SHA256_BLOCK_SIZE; j++) {
+ hex[j*2] = cset[((hash[j]&0xF0)>>4)];
+ hex[j*2+1] = cset[(hash[j]&0xF)];
+ }
+ return sdsnewlen(hex,HASH_PASSWORD_LEN);
+}
+
+/* Given a hash and the hash length, returns C_OK if it is a valid password
+ * hash, or C_ERR otherwise. */
+int ACLCheckPasswordHash(unsigned char *hash, int hashlen) {
+ if (hashlen != HASH_PASSWORD_LEN) {
+ return C_ERR;
+ }
+
+ /* Password hashes can only be characters that represent
+ * hexadecimal values, which are numbers and lowercase
+ * characters 'a' through 'f'. */
+ for(int i = 0; i < HASH_PASSWORD_LEN; i++) {
+ char c = hash[i];
+ if ((c < 'a' || c > 'f') && (c < '0' || c > '9')) {
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
+/* =============================================================================
+ * Low level ACL API
+ * ==========================================================================*/
+
+/* Return 1 if the specified string contains spaces or null characters.
+ * We do this for usernames and key patterns for simpler rewriting of
+ * ACL rules, presentation on ACL list, and to avoid subtle security bugs
+ * that may arise from parsing the rules in presence of escapes.
+ * The function returns 0 if the string has no spaces. */
+int ACLStringHasSpaces(const char *s, size_t len) {
+ for (size_t i = 0; i < len; i++) {
+ if (isspace(s[i]) || s[i] == 0) return 1;
+ }
+ return 0;
+}
+
+/* Given the category name the command returns the corresponding flag, or
+ * zero if there is no match. */
+uint64_t ACLGetCommandCategoryFlagByName(const char *name) {
+ for (int j = 0; ACLCommandCategories[j].flag != 0; j++) {
+ if (!strcasecmp(name,ACLCommandCategories[j].name)) {
+ return ACLCommandCategories[j].flag;
+ }
+ }
+ return 0; /* No match. */
+}
+
+/* Method for searching for a user within a list of user definitions. The
+ * list contains an array of user arguments, and we are only
+ * searching the first argument, the username, for a match. */
+int ACLListMatchLoadedUser(void *definition, void *user) {
+ sds *user_definition = definition;
+ return sdscmp(user_definition[0], user) == 0;
+}
+
+/* Method for passwords/pattern comparison used for the user->passwords list
+ * so that we can search for items with listSearchKey(). */
+int ACLListMatchSds(void *a, void *b) {
+ return sdscmp(a,b) == 0;
+}
+
+/* Method to free list elements from ACL users password/patterns lists. */
+void ACLListFreeSds(void *item) {
+ sdsfree(item);
+}
+
+/* Method to duplicate list elements from ACL users password/patterns lists. */
+void *ACLListDupSds(void *item) {
+ return sdsdup(item);
+}
+
+/* Structure used for handling key patterns with different key
+ * based permissions. */
+typedef struct {
+ int flags; /* The CMD_KEYS_* flags for this key pattern */
+ sds pattern; /* The pattern to match keys against */
+} keyPattern;
+
+/* Create a new key pattern. */
+keyPattern *ACLKeyPatternCreate(sds pattern, int flags) {
+ keyPattern *new = (keyPattern *) zmalloc(sizeof(keyPattern));
+ new->pattern = pattern;
+ new->flags = flags;
+ return new;
+}
+
+/* Free a key pattern and internal structures. */
+void ACLKeyPatternFree(keyPattern *pattern) {
+ sdsfree(pattern->pattern);
+ zfree(pattern);
+}
+
+/* Method for passwords/pattern comparison used for the user->passwords list
+ * so that we can search for items with listSearchKey(). */
+int ACLListMatchKeyPattern(void *a, void *b) {
+ return sdscmp(((keyPattern *) a)->pattern,((keyPattern *) b)->pattern) == 0;
+}
+
+/* Method to free list elements from ACL users password/patterns lists. */
+void ACLListFreeKeyPattern(void *item) {
+ ACLKeyPatternFree(item);
+}
+
+/* Method to duplicate list elements from ACL users password/patterns lists. */
+void *ACLListDupKeyPattern(void *item) {
+ keyPattern *old = (keyPattern *) item;
+ return ACLKeyPatternCreate(sdsdup(old->pattern), old->flags);
+}
+
+/* Append the string representation of a key pattern onto the
+ * provided base string. */
+sds sdsCatPatternString(sds base, keyPattern *pat) {
+ if (pat->flags == ACL_ALL_PERMISSION) {
+ base = sdscatlen(base,"~",1);
+ } else if (pat->flags == ACL_READ_PERMISSION) {
+ base = sdscatlen(base,"%R~",3);
+ } else if (pat->flags == ACL_WRITE_PERMISSION) {
+ base = sdscatlen(base,"%W~",3);
+ } else {
+ serverPanic("Invalid key pattern flag detected");
+ }
+ return sdscatsds(base, pat->pattern);
+}
+
+/* Create an empty selector with the provided set of initial
+ * flags. The selector will be default have no permissions. */
+aclSelector *ACLCreateSelector(int flags) {
+ aclSelector *selector = zmalloc(sizeof(aclSelector));
+ selector->flags = flags | server.acl_pubsub_default;
+ selector->patterns = listCreate();
+ selector->channels = listCreate();
+ selector->allowed_firstargs = NULL;
+ selector->command_rules = sdsempty();
+
+ listSetMatchMethod(selector->patterns,ACLListMatchKeyPattern);
+ listSetFreeMethod(selector->patterns,ACLListFreeKeyPattern);
+ listSetDupMethod(selector->patterns,ACLListDupKeyPattern);
+ listSetMatchMethod(selector->channels,ACLListMatchSds);
+ listSetFreeMethod(selector->channels,ACLListFreeSds);
+ listSetDupMethod(selector->channels,ACLListDupSds);
+ memset(selector->allowed_commands,0,sizeof(selector->allowed_commands));
+
+ return selector;
+}
+
+/* Cleanup the provided selector, including all interior structures. */
+void ACLFreeSelector(aclSelector *selector) {
+ listRelease(selector->patterns);
+ listRelease(selector->channels);
+ sdsfree(selector->command_rules);
+ ACLResetFirstArgs(selector);
+ zfree(selector);
+}
+
+/* Create an exact copy of the provided selector. */
+aclSelector *ACLCopySelector(aclSelector *src) {
+ aclSelector *dst = zmalloc(sizeof(aclSelector));
+ dst->flags = src->flags;
+ dst->patterns = listDup(src->patterns);
+ dst->channels = listDup(src->channels);
+ dst->command_rules = sdsdup(src->command_rules);
+ memcpy(dst->allowed_commands,src->allowed_commands,
+ sizeof(dst->allowed_commands));
+ dst->allowed_firstargs = NULL;
+ /* Copy the allowed first-args array of array of SDS strings. */
+ if (src->allowed_firstargs) {
+ for (int j = 0; j < USER_COMMAND_BITS_COUNT; j++) {
+ if (!(src->allowed_firstargs[j])) continue;
+ for (int i = 0; src->allowed_firstargs[j][i]; i++) {
+ ACLAddAllowedFirstArg(dst, j, src->allowed_firstargs[j][i]);
+ }
+ }
+ }
+ return dst;
+}
+
+/* List method for freeing a selector */
+void ACLListFreeSelector(void *a) {
+ ACLFreeSelector((aclSelector *) a);
+}
+
+/* List method for duplicating a selector */
+void *ACLListDuplicateSelector(void *src) {
+ return ACLCopySelector((aclSelector *)src);
+}
+
+/* All users have an implicit root selector which
+ * provides backwards compatibility to the old ACLs-
+ * permissions. */
+aclSelector *ACLUserGetRootSelector(user *u) {
+ serverAssert(listLength(u->selectors));
+ aclSelector *s = (aclSelector *) listNodeValue(listFirst(u->selectors));
+ serverAssert(s->flags & SELECTOR_FLAG_ROOT);
+ return s;
+}
+
+/* Create a new user with the specified name, store it in the list
+ * of users (the Users global radix tree), and returns a reference to
+ * the structure representing the user.
+ *
+ * If the user with such name already exists NULL is returned. */
+user *ACLCreateUser(const char *name, size_t namelen) {
+ if (raxFind(Users,(unsigned char*)name,namelen) != raxNotFound) return NULL;
+ user *u = zmalloc(sizeof(*u));
+ u->name = sdsnewlen(name,namelen);
+ u->flags = USER_FLAG_DISABLED;
+ u->flags |= USER_FLAG_SANITIZE_PAYLOAD;
+ u->passwords = listCreate();
+ u->acl_string = NULL;
+ listSetMatchMethod(u->passwords,ACLListMatchSds);
+ listSetFreeMethod(u->passwords,ACLListFreeSds);
+ listSetDupMethod(u->passwords,ACLListDupSds);
+
+ u->selectors = listCreate();
+ listSetFreeMethod(u->selectors,ACLListFreeSelector);
+ listSetDupMethod(u->selectors,ACLListDuplicateSelector);
+
+ /* Add the initial root selector */
+ aclSelector *s = ACLCreateSelector(SELECTOR_FLAG_ROOT);
+ listAddNodeHead(u->selectors, s);
+
+ raxInsert(Users,(unsigned char*)name,namelen,u,NULL);
+ return u;
+}
+
+/* This function should be called when we need an unlinked "fake" user
+ * we can use in order to validate ACL rules or for other similar reasons.
+ * The user will not get linked to the Users radix tree. The returned
+ * user should be released with ACLFreeUser() as usually. */
+user *ACLCreateUnlinkedUser(void) {
+ char username[64];
+ for (int j = 0; ; j++) {
+ snprintf(username,sizeof(username),"__fakeuser:%d__",j);
+ user *fakeuser = ACLCreateUser(username,strlen(username));
+ if (fakeuser == NULL) continue;
+ int retval = raxRemove(Users,(unsigned char*) username,
+ strlen(username),NULL);
+ serverAssert(retval != 0);
+ return fakeuser;
+ }
+}
+
+/* Release the memory used by the user structure. Note that this function
+ * will not remove the user from the Users global radix tree. */
+void ACLFreeUser(user *u) {
+ sdsfree(u->name);
+ if (u->acl_string) {
+ decrRefCount(u->acl_string);
+ u->acl_string = NULL;
+ }
+ listRelease(u->passwords);
+ listRelease(u->selectors);
+ zfree(u);
+}
+
+/* When a user is deleted we need to cycle the active
+ * connections in order to kill all the pending ones that
+ * are authenticated with such user. */
+void ACLFreeUserAndKillClients(user *u) {
+ listIter li;
+ listNode *ln;
+ listRewind(server.clients,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *c = listNodeValue(ln);
+ if (c->user == u) {
+ /* We'll free the connection asynchronously, so
+ * in theory to set a different user is not needed.
+ * However if there are bugs in Redis, soon or later
+ * this may result in some security hole: it's much
+ * more defensive to set the default user and put
+ * it in non authenticated mode. */
+ c->user = DefaultUser;
+ c->authenticated = 0;
+ /* We will write replies to this client later, so we can't
+ * close it directly even if async. */
+ if (c == server.current_client) {
+ c->flags |= CLIENT_CLOSE_AFTER_COMMAND;
+ } else {
+ freeClientAsync(c);
+ }
+ }
+ }
+ ACLFreeUser(u);
+}
+
+/* Copy the user ACL rules from the source user 'src' to the destination
+ * user 'dst' so that at the end of the process they'll have exactly the
+ * same rules (but the names will continue to be the original ones). */
+void ACLCopyUser(user *dst, user *src) {
+ listRelease(dst->passwords);
+ listRelease(dst->selectors);
+ dst->passwords = listDup(src->passwords);
+ dst->selectors = listDup(src->selectors);
+ dst->flags = src->flags;
+ if (dst->acl_string) {
+ decrRefCount(dst->acl_string);
+ }
+ dst->acl_string = src->acl_string;
+ if (dst->acl_string) {
+ /* if src is NULL, we set it to NULL, if not, need to increment reference count */
+ incrRefCount(dst->acl_string);
+ }
+}
+
+/* Free all the users registered in the radix tree 'users' and free the
+ * radix tree itself. */
+void ACLFreeUsersSet(rax *users) {
+ raxFreeWithCallback(users,(void(*)(void*))ACLFreeUserAndKillClients);
+}
+
+/* Given a command ID, this function set by reference 'word' and 'bit'
+ * so that user->allowed_commands[word] will address the right word
+ * where the corresponding bit for the provided ID is stored, and
+ * so that user->allowed_commands[word]&bit will identify that specific
+ * bit. The function returns C_ERR in case the specified ID overflows
+ * the bitmap in the user representation. */
+int ACLGetCommandBitCoordinates(uint64_t id, uint64_t *word, uint64_t *bit) {
+ if (id >= USER_COMMAND_BITS_COUNT) return C_ERR;
+ *word = id / sizeof(uint64_t) / 8;
+ *bit = 1ULL << (id % (sizeof(uint64_t) * 8));
+ return C_OK;
+}
+
+/* Check if the specified command bit is set for the specified user.
+ * The function returns 1 is the bit is set or 0 if it is not.
+ * Note that this function does not check the ALLCOMMANDS flag of the user
+ * but just the lowlevel bitmask.
+ *
+ * If the bit overflows the user internal representation, zero is returned
+ * in order to disallow the execution of the command in such edge case. */
+int ACLGetSelectorCommandBit(const aclSelector *selector, unsigned long id) {
+ uint64_t word, bit;
+ if (ACLGetCommandBitCoordinates(id,&word,&bit) == C_ERR) return 0;
+ return (selector->allowed_commands[word] & bit) != 0;
+}
+
+/* When +@all or allcommands is given, we set a reserved bit as well that we
+ * can later test, to see if the user has the right to execute "future commands",
+ * that is, commands loaded later via modules. */
+int ACLSelectorCanExecuteFutureCommands(aclSelector *selector) {
+ return ACLGetSelectorCommandBit(selector,USER_COMMAND_BITS_COUNT-1);
+}
+
+/* Set the specified command bit for the specified user to 'value' (0 or 1).
+ * If the bit overflows the user internal representation, no operation
+ * is performed. As a side effect of calling this function with a value of
+ * zero, the user flag ALLCOMMANDS is cleared since it is no longer possible
+ * to skip the command bit explicit test. */
+void ACLSetSelectorCommandBit(aclSelector *selector, unsigned long id, int value) {
+ uint64_t word, bit;
+ if (ACLGetCommandBitCoordinates(id,&word,&bit) == C_ERR) return;
+ if (value) {
+ selector->allowed_commands[word] |= bit;
+ } else {
+ selector->allowed_commands[word] &= ~bit;
+ selector->flags &= ~SELECTOR_FLAG_ALLCOMMANDS;
+ }
+}
+
+/* Remove a rule from the retained command rules. Always match rules
+ * verbatim, but also remove subcommand rules if we are adding or removing the
+ * entire command. */
+void ACLSelectorRemoveCommandRule(aclSelector *selector, sds new_rule) {
+ size_t new_len = sdslen(new_rule);
+ char *existing_rule = selector->command_rules;
+
+ /* Loop over the existing rules, trying to find a rule that "matches"
+ * the new rule. If we find a match, then remove the command from the string by
+ * copying the later rules over it. */
+ while(existing_rule[0]) {
+ /* The first character of the rule is +/-, which we don't need to compare. */
+ char *copy_position = existing_rule;
+ existing_rule += 1;
+
+ /* Assume a trailing space after a command is part of the command, like '+get ', so trim it
+ * as well if the command is removed. */
+ char *rule_end = strchr(existing_rule, ' ');
+ if (!rule_end) {
+ /* This is the last rule, so move it to the end of the string. */
+ rule_end = existing_rule + strlen(existing_rule);
+
+ /* This approach can leave a trailing space if the last rule is removed,
+ * but only if it's not the first rule, so handle that case. */
+ if (copy_position != selector->command_rules) copy_position -= 1;
+ }
+ char *copy_end = rule_end;
+ if (*copy_end == ' ') copy_end++;
+
+ /* Exact match or the rule we are comparing is a subcommand denoted by '|' */
+ size_t existing_len = rule_end - existing_rule;
+ if (!memcmp(existing_rule, new_rule, min(existing_len, new_len))) {
+ if ((existing_len == new_len) || (existing_len > new_len && (existing_rule[new_len]) == '|')) {
+ /* Copy the remaining rules starting at the next rule to replace the rule to be
+ * deleted, including the terminating NULL character. */
+ memmove(copy_position, copy_end, strlen(copy_end) + 1);
+ existing_rule = copy_position;
+ continue;
+ }
+ }
+ existing_rule = copy_end;
+ }
+
+ /* There is now extra padding at the end of the rules, so clean that up. */
+ sdsupdatelen(selector->command_rules);
+}
+
+/* This function is resopnsible for updating the command_rules struct so that relative ordering of
+ * commands and categories is maintained and can be reproduced without loss. */
+void ACLUpdateCommandRules(aclSelector *selector, const char *rule, int allow) {
+ sds new_rule = sdsnew(rule);
+ sdstolower(new_rule);
+
+ ACLSelectorRemoveCommandRule(selector, new_rule);
+ if (sdslen(selector->command_rules)) selector->command_rules = sdscat(selector->command_rules, " ");
+ selector->command_rules = sdscatfmt(selector->command_rules, allow ? "+%S" : "-%S", new_rule);
+ sdsfree(new_rule);
+}
+
+/* This function is used to allow/block a specific command.
+ * Allowing/blocking a container command also applies for its subcommands */
+void ACLChangeSelectorPerm(aclSelector *selector, struct redisCommand *cmd, int allow) {
+ unsigned long id = cmd->id;
+ ACLSetSelectorCommandBit(selector,id,allow);
+ ACLResetFirstArgsForCommand(selector,id);
+ if (cmd->subcommands_dict) {
+ dictEntry *de;
+ dictIterator *di = dictGetSafeIterator(cmd->subcommands_dict);
+ while((de = dictNext(di)) != NULL) {
+ struct redisCommand *sub = (struct redisCommand *)dictGetVal(de);
+ ACLSetSelectorCommandBit(selector,sub->id,allow);
+ }
+ dictReleaseIterator(di);
+ }
+}
+
+/* This is like ACLSetSelectorCommandBit(), but instead of setting the specified
+ * ID, it will check all the commands in the category specified as argument,
+ * and will set all the bits corresponding to such commands to the specified
+ * value. Since the category passed by the user may be non existing, the
+ * function returns C_ERR if the category was not found, or C_OK if it was
+ * found and the operation was performed. */
+void ACLSetSelectorCommandBitsForCategory(dict *commands, aclSelector *selector, uint64_t cflag, int value) {
+ dictIterator *di = dictGetIterator(commands);
+ dictEntry *de;
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *cmd = dictGetVal(de);
+ if (cmd->acl_categories & cflag) {
+ ACLChangeSelectorPerm(selector,cmd,value);
+ }
+ if (cmd->subcommands_dict) {
+ ACLSetSelectorCommandBitsForCategory(cmd->subcommands_dict, selector, cflag, value);
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* This function is responsible for recomputing the command bits for all selectors of the existing users.
+ * It uses the 'command_rules', a string representation of the ordered categories and commands,
+ * to recompute the command bits. */
+void ACLRecomputeCommandBitsFromCommandRulesAllUsers(void) {
+ raxIterator ri;
+ raxStart(&ri,Users);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ user *u = ri.data;
+ listIter li;
+ listNode *ln;
+ listRewind(u->selectors,&li);
+ while((ln = listNext(&li))) {
+ aclSelector *selector = (aclSelector *) listNodeValue(ln);
+ int argc = 0;
+ sds *argv = sdssplitargs(selector->command_rules, &argc);
+ serverAssert(argv != NULL);
+ /* Checking selector's permissions for all commands to start with a clean state. */
+ if (ACLSelectorCanExecuteFutureCommands(selector)) {
+ int res = ACLSetSelector(selector,"+@all",-1);
+ serverAssert(res == C_OK);
+ } else {
+ int res = ACLSetSelector(selector,"-@all",-1);
+ serverAssert(res == C_OK);
+ }
+
+ /* Apply all of the commands and categories to this selector. */
+ for(int i = 0; i < argc; i++) {
+ int res = ACLSetSelector(selector, argv[i], sdslen(argv[i]));
+ serverAssert(res == C_OK);
+ }
+ sdsfreesplitres(argv, argc);
+ }
+ }
+ raxStop(&ri);
+
+}
+
+int ACLSetSelectorCategory(aclSelector *selector, const char *category, int allow) {
+ uint64_t cflag = ACLGetCommandCategoryFlagByName(category + 1);
+ if (!cflag) return C_ERR;
+
+ ACLUpdateCommandRules(selector, category, allow);
+
+ /* Set the actual command bits on the selector. */
+ ACLSetSelectorCommandBitsForCategory(server.orig_commands, selector, cflag, allow);
+ return C_OK;
+}
+
+void ACLCountCategoryBitsForCommands(dict *commands, aclSelector *selector, unsigned long *on, unsigned long *off, uint64_t cflag) {
+ dictIterator *di = dictGetIterator(commands);
+ dictEntry *de;
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *cmd = dictGetVal(de);
+ if (cmd->acl_categories & cflag) {
+ if (ACLGetSelectorCommandBit(selector,cmd->id))
+ (*on)++;
+ else
+ (*off)++;
+ }
+ if (cmd->subcommands_dict) {
+ ACLCountCategoryBitsForCommands(cmd->subcommands_dict, selector, on, off, cflag);
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* Return the number of commands allowed (on) and denied (off) for the user 'u'
+ * in the subset of commands flagged with the specified category name.
+ * If the category name is not valid, C_ERR is returned, otherwise C_OK is
+ * returned and on and off are populated by reference. */
+int ACLCountCategoryBitsForSelector(aclSelector *selector, unsigned long *on, unsigned long *off,
+ const char *category)
+{
+ uint64_t cflag = ACLGetCommandCategoryFlagByName(category);
+ if (!cflag) return C_ERR;
+
+ *on = *off = 0;
+ ACLCountCategoryBitsForCommands(server.orig_commands, selector, on, off, cflag);
+ return C_OK;
+}
+
+/* This function returns an SDS string representing the specified selector ACL
+ * rules related to command execution, in the same format you could set them
+ * back using ACL SETUSER. The function will return just the set of rules needed
+ * to recreate the user commands bitmap, without including other user flags such
+ * as on/off, passwords and so forth. The returned string always starts with
+ * the +@all or -@all rule, depending on the user bitmap, and is followed, if
+ * needed, by the other rules needed to narrow or extend what the user can do. */
+sds ACLDescribeSelectorCommandRules(aclSelector *selector) {
+ sds rules = sdsempty();
+
+ /* We use this fake selector as a "sanity" check to make sure the rules
+ * we generate have the same bitmap as those on the current selector. */
+ aclSelector *fake_selector = ACLCreateSelector(0);
+
+ /* Here we want to understand if we should start with +@all or -@all.
+ * Note that when starting with +@all and subtracting, the user
+ * will be able to execute future commands, while -@all and adding will just
+ * allow the user the run the selected commands and/or categories.
+ * How do we test for that? We use the trick of a reserved command ID bit
+ * that is set only by +@all (and its alias "allcommands"). */
+ if (ACLSelectorCanExecuteFutureCommands(selector)) {
+ rules = sdscat(rules,"+@all ");
+ ACLSetSelector(fake_selector,"+@all",-1);
+ } else {
+ rules = sdscat(rules,"-@all ");
+ ACLSetSelector(fake_selector,"-@all",-1);
+ }
+
+ /* Apply all of the commands and categories to the fake selector. */
+ int argc = 0;
+ sds *argv = sdssplitargs(selector->command_rules, &argc);
+ serverAssert(argv != NULL);
+
+ for(int i = 0; i < argc; i++) {
+ int res = ACLSetSelector(fake_selector, argv[i], -1);
+ serverAssert(res == C_OK);
+ }
+ if (sdslen(selector->command_rules)) {
+ rules = sdscatfmt(rules, "%S ", selector->command_rules);
+ }
+ sdsfreesplitres(argv, argc);
+
+ /* Trim the final useless space. */
+ sdsrange(rules,0,-2);
+
+ /* This is technically not needed, but we want to verify that now the
+ * predicted bitmap is exactly the same as the user bitmap, and abort
+ * otherwise, because aborting is better than a security risk in this
+ * code path. */
+ if (memcmp(fake_selector->allowed_commands,
+ selector->allowed_commands,
+ sizeof(selector->allowed_commands)) != 0)
+ {
+ serverLog(LL_WARNING,
+ "CRITICAL ERROR: User ACLs don't match final bitmap: '%s'",
+ rules);
+ serverPanic("No bitmap match in ACLDescribeSelectorCommandRules()");
+ }
+ ACLFreeSelector(fake_selector);
+ return rules;
+}
+
+sds ACLDescribeSelector(aclSelector *selector) {
+ listIter li;
+ listNode *ln;
+ sds res = sdsempty();
+ /* Key patterns. */
+ if (selector->flags & SELECTOR_FLAG_ALLKEYS) {
+ res = sdscatlen(res,"~* ",3);
+ } else {
+ listRewind(selector->patterns,&li);
+ while((ln = listNext(&li))) {
+ keyPattern *thispat = (keyPattern *)listNodeValue(ln);
+ res = sdsCatPatternString(res, thispat);
+ res = sdscatlen(res," ",1);
+ }
+ }
+
+ /* Pub/sub channel patterns. */
+ if (selector->flags & SELECTOR_FLAG_ALLCHANNELS) {
+ res = sdscatlen(res,"&* ",3);
+ } else {
+ res = sdscatlen(res,"resetchannels ",14);
+ listRewind(selector->channels,&li);
+ while((ln = listNext(&li))) {
+ sds thispat = listNodeValue(ln);
+ res = sdscatlen(res,"&",1);
+ res = sdscatsds(res,thispat);
+ res = sdscatlen(res," ",1);
+ }
+ }
+
+ /* Command rules. */
+ sds rules = ACLDescribeSelectorCommandRules(selector);
+ res = sdscatsds(res,rules);
+ sdsfree(rules);
+ return res;
+}
+
+/* This is similar to ACLDescribeSelectorCommandRules(), however instead of
+ * describing just the user command rules, everything is described: user
+ * flags, keys, passwords and finally the command rules obtained via
+ * the ACLDescribeSelectorCommandRules() function. This is the function we call
+ * when we want to rewrite the configuration files describing ACLs and
+ * in order to show users with ACL LIST. */
+robj *ACLDescribeUser(user *u) {
+ if (u->acl_string) {
+ incrRefCount(u->acl_string);
+ return u->acl_string;
+ }
+
+ sds res = sdsempty();
+
+ /* Flags. */
+ for (int j = 0; ACLUserFlags[j].flag; j++) {
+ if (u->flags & ACLUserFlags[j].flag) {
+ res = sdscat(res,ACLUserFlags[j].name);
+ res = sdscatlen(res," ",1);
+ }
+ }
+
+ /* Passwords. */
+ listIter li;
+ listNode *ln;
+ listRewind(u->passwords,&li);
+ while((ln = listNext(&li))) {
+ sds thispass = listNodeValue(ln);
+ res = sdscatlen(res,"#",1);
+ res = sdscatsds(res,thispass);
+ res = sdscatlen(res," ",1);
+ }
+
+ /* Selectors (Commands and keys) */
+ listRewind(u->selectors,&li);
+ while((ln = listNext(&li))) {
+ aclSelector *selector = (aclSelector *) listNodeValue(ln);
+ sds default_perm = ACLDescribeSelector(selector);
+ if (selector->flags & SELECTOR_FLAG_ROOT) {
+ res = sdscatfmt(res, "%s", default_perm);
+ } else {
+ res = sdscatfmt(res, " (%s)", default_perm);
+ }
+ sdsfree(default_perm);
+ }
+
+ u->acl_string = createObject(OBJ_STRING, res);
+ /* because we are returning it, have to increase count */
+ incrRefCount(u->acl_string);
+
+ return u->acl_string;
+}
+
+/* Get a command from the original command table, that is not affected
+ * by the command renaming operations: we base all the ACL work from that
+ * table, so that ACLs are valid regardless of command renaming. */
+struct redisCommand *ACLLookupCommand(const char *name) {
+ struct redisCommand *cmd;
+ sds sdsname = sdsnew(name);
+ cmd = lookupCommandBySdsLogic(server.orig_commands,sdsname);
+ sdsfree(sdsname);
+ return cmd;
+}
+
+/* Flush the array of allowed first-args for the specified user
+ * and command ID. */
+void ACLResetFirstArgsForCommand(aclSelector *selector, unsigned long id) {
+ if (selector->allowed_firstargs && selector->allowed_firstargs[id]) {
+ for (int i = 0; selector->allowed_firstargs[id][i]; i++)
+ sdsfree(selector->allowed_firstargs[id][i]);
+ zfree(selector->allowed_firstargs[id]);
+ selector->allowed_firstargs[id] = NULL;
+ }
+}
+
+/* Flush the entire table of first-args. This is useful on +@all, -@all
+ * or similar to return back to the minimal memory usage (and checks to do)
+ * for the user. */
+void ACLResetFirstArgs(aclSelector *selector) {
+ if (selector->allowed_firstargs == NULL) return;
+ for (int j = 0; j < USER_COMMAND_BITS_COUNT; j++) {
+ if (selector->allowed_firstargs[j]) {
+ for (int i = 0; selector->allowed_firstargs[j][i]; i++)
+ sdsfree(selector->allowed_firstargs[j][i]);
+ zfree(selector->allowed_firstargs[j]);
+ }
+ }
+ zfree(selector->allowed_firstargs);
+ selector->allowed_firstargs = NULL;
+}
+
+/* Add a first-arh to the list of subcommands for the user 'u' and
+ * the command id specified. */
+void ACLAddAllowedFirstArg(aclSelector *selector, unsigned long id, const char *sub) {
+ /* If this is the first first-arg to be configured for
+ * this user, we have to allocate the first-args array. */
+ if (selector->allowed_firstargs == NULL) {
+ selector->allowed_firstargs = zcalloc(USER_COMMAND_BITS_COUNT * sizeof(sds*));
+ }
+
+ /* We also need to enlarge the allocation pointing to the
+ * null terminated SDS array, to make space for this one.
+ * To start check the current size, and while we are here
+ * make sure the first-arg is not already specified inside. */
+ long items = 0;
+ if (selector->allowed_firstargs[id]) {
+ while(selector->allowed_firstargs[id][items]) {
+ /* If it's already here do not add it again. */
+ if (!strcasecmp(selector->allowed_firstargs[id][items],sub))
+ return;
+ items++;
+ }
+ }
+
+ /* Now we can make space for the new item (and the null term). */
+ items += 2;
+ selector->allowed_firstargs[id] = zrealloc(selector->allowed_firstargs[id], sizeof(sds)*items);
+ selector->allowed_firstargs[id][items-2] = sdsnew(sub);
+ selector->allowed_firstargs[id][items-1] = NULL;
+}
+
+/* Create an ACL selector from the given ACL operations, which should be
+ * a list of space separate ACL operations that starts and ends
+ * with parentheses.
+ *
+ * If any of the operations are invalid, NULL will be returned instead
+ * and errno will be set corresponding to the interior error. */
+aclSelector *aclCreateSelectorFromOpSet(const char *opset, size_t opsetlen) {
+ serverAssert(opset[0] == '(' && opset[opsetlen - 1] == ')');
+ aclSelector *s = ACLCreateSelector(0);
+
+ int argc = 0;
+ sds trimmed = sdsnewlen(opset + 1, opsetlen - 2);
+ sds *argv = sdssplitargs(trimmed, &argc);
+ for (int i = 0; i < argc; i++) {
+ if (ACLSetSelector(s, argv[i], sdslen(argv[i])) == C_ERR) {
+ ACLFreeSelector(s);
+ s = NULL;
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ sdsfreesplitres(argv, argc);
+ sdsfree(trimmed);
+ return s;
+}
+
+/* Set a selector's properties with the provided 'op'.
+ *
+ * +<command> Allow the execution of that command.
+ * May be used with `|` for allowing subcommands (e.g "+config|get")
+ * -<command> Disallow the execution of that command.
+ * May be used with `|` for blocking subcommands (e.g "-config|set")
+ * +@<category> Allow the execution of all the commands in such category
+ * with valid categories are like @admin, @set, @sortedset, ...
+ * and so forth, see the full list in the server.c file where
+ * the Redis command table is described and defined.
+ * The special category @all means all the commands, but currently
+ * present in the server, and that will be loaded in the future
+ * via modules.
+ * +<command>|first-arg Allow a specific first argument of an otherwise
+ * disabled command. Note that this form is not
+ * allowed as negative like -SELECT|1, but
+ * only additive starting with "+".
+ * allcommands Alias for +@all. Note that it implies the ability to execute
+ * all the future commands loaded via the modules system.
+ * nocommands Alias for -@all.
+ * ~<pattern> Add a pattern of keys that can be mentioned as part of
+ * commands. For instance ~* allows all the keys. The pattern
+ * is a glob-style pattern like the one of KEYS.
+ * It is possible to specify multiple patterns.
+ * %R~<pattern> Add key read pattern that specifies which keys can be read
+ * from.
+ * %W~<pattern> Add key write pattern that specifies which keys can be
+ * written to.
+ * allkeys Alias for ~*
+ * resetkeys Flush the list of allowed keys patterns.
+ * &<pattern> Add a pattern of channels that can be mentioned as part of
+ * Pub/Sub commands. For instance &* allows all the channels. The
+ * pattern is a glob-style pattern like the one of PSUBSCRIBE.
+ * It is possible to specify multiple patterns.
+ * allchannels Alias for &*
+ * resetchannels Flush the list of allowed channel patterns.
+ */
+int ACLSetSelector(aclSelector *selector, const char* op, size_t oplen) {
+ if (!strcasecmp(op,"allkeys") ||
+ !strcasecmp(op,"~*"))
+ {
+ selector->flags |= SELECTOR_FLAG_ALLKEYS;
+ listEmpty(selector->patterns);
+ } else if (!strcasecmp(op,"resetkeys")) {
+ selector->flags &= ~SELECTOR_FLAG_ALLKEYS;
+ listEmpty(selector->patterns);
+ } else if (!strcasecmp(op,"allchannels") ||
+ !strcasecmp(op,"&*"))
+ {
+ selector->flags |= SELECTOR_FLAG_ALLCHANNELS;
+ listEmpty(selector->channels);
+ } else if (!strcasecmp(op,"resetchannels")) {
+ selector->flags &= ~SELECTOR_FLAG_ALLCHANNELS;
+ listEmpty(selector->channels);
+ } else if (!strcasecmp(op,"allcommands") ||
+ !strcasecmp(op,"+@all"))
+ {
+ memset(selector->allowed_commands,255,sizeof(selector->allowed_commands));
+ selector->flags |= SELECTOR_FLAG_ALLCOMMANDS;
+ sdsclear(selector->command_rules);
+ ACLResetFirstArgs(selector);
+ } else if (!strcasecmp(op,"nocommands") ||
+ !strcasecmp(op,"-@all"))
+ {
+ memset(selector->allowed_commands,0,sizeof(selector->allowed_commands));
+ selector->flags &= ~SELECTOR_FLAG_ALLCOMMANDS;
+ sdsclear(selector->command_rules);
+ ACLResetFirstArgs(selector);
+ } else if (op[0] == '~' || op[0] == '%') {
+ if (selector->flags & SELECTOR_FLAG_ALLKEYS) {
+ errno = EEXIST;
+ return C_ERR;
+ }
+ int flags = 0;
+ size_t offset = 1;
+ if (op[0] == '%') {
+ for (; offset < oplen; offset++) {
+ if (toupper(op[offset]) == 'R' && !(flags & ACL_READ_PERMISSION)) {
+ flags |= ACL_READ_PERMISSION;
+ } else if (toupper(op[offset]) == 'W' && !(flags & ACL_WRITE_PERMISSION)) {
+ flags |= ACL_WRITE_PERMISSION;
+ } else if (op[offset] == '~') {
+ offset++;
+ break;
+ } else {
+ errno = EINVAL;
+ return C_ERR;
+ }
+ }
+ } else {
+ flags = ACL_ALL_PERMISSION;
+ }
+
+ if (ACLStringHasSpaces(op+offset,oplen-offset)) {
+ errno = EINVAL;
+ return C_ERR;
+ }
+ keyPattern *newpat = ACLKeyPatternCreate(sdsnewlen(op+offset,oplen-offset), flags);
+ listNode *ln = listSearchKey(selector->patterns,newpat);
+ /* Avoid re-adding the same key pattern multiple times. */
+ if (ln == NULL) {
+ listAddNodeTail(selector->patterns,newpat);
+ } else {
+ ((keyPattern *)listNodeValue(ln))->flags |= flags;
+ ACLKeyPatternFree(newpat);
+ }
+ selector->flags &= ~SELECTOR_FLAG_ALLKEYS;
+ } else if (op[0] == '&') {
+ if (selector->flags & SELECTOR_FLAG_ALLCHANNELS) {
+ errno = EISDIR;
+ return C_ERR;
+ }
+ if (ACLStringHasSpaces(op+1,oplen-1)) {
+ errno = EINVAL;
+ return C_ERR;
+ }
+ sds newpat = sdsnewlen(op+1,oplen-1);
+ listNode *ln = listSearchKey(selector->channels,newpat);
+ /* Avoid re-adding the same channel pattern multiple times. */
+ if (ln == NULL)
+ listAddNodeTail(selector->channels,newpat);
+ else
+ sdsfree(newpat);
+ selector->flags &= ~SELECTOR_FLAG_ALLCHANNELS;
+ } else if (op[0] == '+' && op[1] != '@') {
+ if (strrchr(op,'|') == NULL) {
+ struct redisCommand *cmd = ACLLookupCommand(op+1);
+ if (cmd == NULL) {
+ errno = ENOENT;
+ return C_ERR;
+ }
+ ACLChangeSelectorPerm(selector,cmd,1);
+ ACLUpdateCommandRules(selector,cmd->fullname,1);
+ } else {
+ /* Split the command and subcommand parts. */
+ char *copy = zstrdup(op+1);
+ char *sub = strrchr(copy,'|');
+ sub[0] = '\0';
+ sub++;
+
+ struct redisCommand *cmd = ACLLookupCommand(copy);
+
+ /* Check if the command exists. We can't check the
+ * first-arg to see if it is valid. */
+ if (cmd == NULL) {
+ zfree(copy);
+ errno = ENOENT;
+ return C_ERR;
+ }
+
+ /* We do not support allowing first-arg of a subcommand */
+ if (cmd->parent) {
+ zfree(copy);
+ errno = ECHILD;
+ return C_ERR;
+ }
+
+ /* The subcommand cannot be empty, so things like DEBUG|
+ * are syntax errors of course. */
+ if (strlen(sub) == 0) {
+ zfree(copy);
+ errno = EINVAL;
+ return C_ERR;
+ }
+
+ if (cmd->subcommands_dict) {
+ /* If user is trying to allow a valid subcommand we can just add its unique ID */
+ cmd = ACLLookupCommand(op+1);
+ if (cmd == NULL) {
+ zfree(copy);
+ errno = ENOENT;
+ return C_ERR;
+ }
+ ACLChangeSelectorPerm(selector,cmd,1);
+ } else {
+ /* If user is trying to use the ACL mech to block SELECT except SELECT 0 or
+ * block DEBUG except DEBUG OBJECT (DEBUG subcommands are not considered
+ * subcommands for now) we use the allowed_firstargs mechanism. */
+
+ /* Add the first-arg to the list of valid ones. */
+ serverLog(LL_WARNING, "Deprecation warning: Allowing a first arg of an otherwise "
+ "blocked command is a misuse of ACL and may get disabled "
+ "in the future (offender: +%s)", op+1);
+ ACLAddAllowedFirstArg(selector,cmd->id,sub);
+ }
+ ACLUpdateCommandRules(selector,op+1,1);
+ zfree(copy);
+ }
+ } else if (op[0] == '-' && op[1] != '@') {
+ struct redisCommand *cmd = ACLLookupCommand(op+1);
+ if (cmd == NULL) {
+ errno = ENOENT;
+ return C_ERR;
+ }
+ ACLChangeSelectorPerm(selector,cmd,0);
+ ACLUpdateCommandRules(selector,cmd->fullname,0);
+ } else if ((op[0] == '+' || op[0] == '-') && op[1] == '@') {
+ int bitval = op[0] == '+' ? 1 : 0;
+ if (ACLSetSelectorCategory(selector,op+1,bitval) == C_ERR) {
+ errno = ENOENT;
+ return C_ERR;
+ }
+ } else {
+ errno = EINVAL;
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+/* Set user properties according to the string "op". The following
+ * is a description of what different strings will do:
+ *
+ * on Enable the user: it is possible to authenticate as this user.
+ * off Disable the user: it's no longer possible to authenticate
+ * with this user, however the already authenticated connections
+ * will still work.
+ * skip-sanitize-payload RESTORE dump-payload sanitization is skipped.
+ * sanitize-payload RESTORE dump-payload is sanitized (default).
+ * ><password> Add this password to the list of valid password for the user.
+ * For example >mypass will add "mypass" to the list.
+ * This directive clears the "nopass" flag (see later).
+ * #<hash> Add this password hash to the list of valid hashes for
+ * the user. This is useful if you have previously computed
+ * the hash, and don't want to store it in plaintext.
+ * This directive clears the "nopass" flag (see later).
+ * <<password> Remove this password from the list of valid passwords.
+ * !<hash> Remove this hashed password from the list of valid passwords.
+ * This is useful when you want to remove a password just by
+ * hash without knowing its plaintext version at all.
+ * nopass All the set passwords of the user are removed, and the user
+ * is flagged as requiring no password: it means that every
+ * password will work against this user. If this directive is
+ * used for the default user, every new connection will be
+ * immediately authenticated with the default user without
+ * any explicit AUTH command required. Note that the "resetpass"
+ * directive will clear this condition.
+ * resetpass Flush the list of allowed passwords. Moreover removes the
+ * "nopass" status. After "resetpass" the user has no associated
+ * passwords and there is no way to authenticate without adding
+ * some password (or setting it as "nopass" later).
+ * reset Performs the following actions: resetpass, resetkeys, resetchannels,
+ * allchannels (if acl-pubsub-default is set), off, clearselectors, -@all.
+ * The user returns to the same state it has immediately after its creation.
+ * (<options>) Create a new selector with the options specified within the
+ * parentheses and attach it to the user. Each option should be
+ * space separated. The first character must be ( and the last
+ * character must be ).
+ * clearselectors Remove all of the currently attached selectors.
+ * Note this does not change the "root" user permissions,
+ * which are the permissions directly applied onto the
+ * user (outside the parentheses).
+ *
+ * Selector options can also be specified by this function, in which case
+ * they update the root selector for the user.
+ *
+ * The 'op' string must be null terminated. The 'oplen' argument should
+ * specify the length of the 'op' string in case the caller requires to pass
+ * binary data (for instance the >password form may use a binary password).
+ * Otherwise the field can be set to -1 and the function will use strlen()
+ * to determine the length.
+ *
+ * The function returns C_OK if the action to perform was understood because
+ * the 'op' string made sense. Otherwise C_ERR is returned if the operation
+ * is unknown or has some syntax error.
+ *
+ * When an error is returned, errno is set to the following values:
+ *
+ * EINVAL: The specified opcode is not understood or the key/channel pattern is
+ * invalid (contains non allowed characters).
+ * ENOENT: The command name or command category provided with + or - is not
+ * known.
+ * EEXIST: You are adding a key pattern after "*" was already added. This is
+ * almost surely an error on the user side.
+ * EISDIR: You are adding a channel pattern after "*" was already added. This is
+ * almost surely an error on the user side.
+ * ENODEV: The password you are trying to remove from the user does not exist.
+ * EBADMSG: The hash you are trying to add is not a valid hash.
+ * ECHILD: Attempt to allow a specific first argument of a subcommand
+ */
+int ACLSetUser(user *u, const char *op, ssize_t oplen) {
+ /* as we are changing the ACL, the old generated string is now invalid */
+ if (u->acl_string) {
+ decrRefCount(u->acl_string);
+ u->acl_string = NULL;
+ }
+
+ if (oplen == -1) oplen = strlen(op);
+ if (oplen == 0) return C_OK; /* Empty string is a no-operation. */
+ if (!strcasecmp(op,"on")) {
+ u->flags |= USER_FLAG_ENABLED;
+ u->flags &= ~USER_FLAG_DISABLED;
+ } else if (!strcasecmp(op,"off")) {
+ u->flags |= USER_FLAG_DISABLED;
+ u->flags &= ~USER_FLAG_ENABLED;
+ } else if (!strcasecmp(op,"skip-sanitize-payload")) {
+ u->flags |= USER_FLAG_SANITIZE_PAYLOAD_SKIP;
+ u->flags &= ~USER_FLAG_SANITIZE_PAYLOAD;
+ } else if (!strcasecmp(op,"sanitize-payload")) {
+ u->flags &= ~USER_FLAG_SANITIZE_PAYLOAD_SKIP;
+ u->flags |= USER_FLAG_SANITIZE_PAYLOAD;
+ } else if (!strcasecmp(op,"nopass")) {
+ u->flags |= USER_FLAG_NOPASS;
+ listEmpty(u->passwords);
+ } else if (!strcasecmp(op,"resetpass")) {
+ u->flags &= ~USER_FLAG_NOPASS;
+ listEmpty(u->passwords);
+ } else if (op[0] == '>' || op[0] == '#') {
+ sds newpass;
+ if (op[0] == '>') {
+ newpass = ACLHashPassword((unsigned char*)op+1,oplen-1);
+ } else {
+ if (ACLCheckPasswordHash((unsigned char*)op+1,oplen-1) == C_ERR) {
+ errno = EBADMSG;
+ return C_ERR;
+ }
+ newpass = sdsnewlen(op+1,oplen-1);
+ }
+
+ listNode *ln = listSearchKey(u->passwords,newpass);
+ /* Avoid re-adding the same password multiple times. */
+ if (ln == NULL)
+ listAddNodeTail(u->passwords,newpass);
+ else
+ sdsfree(newpass);
+ u->flags &= ~USER_FLAG_NOPASS;
+ } else if (op[0] == '<' || op[0] == '!') {
+ sds delpass;
+ if (op[0] == '<') {
+ delpass = ACLHashPassword((unsigned char*)op+1,oplen-1);
+ } else {
+ if (ACLCheckPasswordHash((unsigned char*)op+1,oplen-1) == C_ERR) {
+ errno = EBADMSG;
+ return C_ERR;
+ }
+ delpass = sdsnewlen(op+1,oplen-1);
+ }
+ listNode *ln = listSearchKey(u->passwords,delpass);
+ sdsfree(delpass);
+ if (ln) {
+ listDelNode(u->passwords,ln);
+ } else {
+ errno = ENODEV;
+ return C_ERR;
+ }
+ } else if (op[0] == '(' && op[oplen - 1] == ')') {
+ aclSelector *selector = aclCreateSelectorFromOpSet(op, oplen);
+ if (!selector) {
+ /* No errorno set, propagate it from interior error. */
+ return C_ERR;
+ }
+ listAddNodeTail(u->selectors, selector);
+ return C_OK;
+ } else if (!strcasecmp(op,"clearselectors")) {
+ listIter li;
+ listNode *ln;
+ listRewind(u->selectors,&li);
+ /* There has to be a root selector */
+ serverAssert(listNext(&li));
+ while((ln = listNext(&li))) {
+ listDelNode(u->selectors, ln);
+ }
+ return C_OK;
+ } else if (!strcasecmp(op,"reset")) {
+ serverAssert(ACLSetUser(u,"resetpass",-1) == C_OK);
+ serverAssert(ACLSetUser(u,"resetkeys",-1) == C_OK);
+ serverAssert(ACLSetUser(u,"resetchannels",-1) == C_OK);
+ if (server.acl_pubsub_default & SELECTOR_FLAG_ALLCHANNELS)
+ serverAssert(ACLSetUser(u,"allchannels",-1) == C_OK);
+ serverAssert(ACLSetUser(u,"off",-1) == C_OK);
+ serverAssert(ACLSetUser(u,"sanitize-payload",-1) == C_OK);
+ serverAssert(ACLSetUser(u,"clearselectors",-1) == C_OK);
+ serverAssert(ACLSetUser(u,"-@all",-1) == C_OK);
+ } else {
+ aclSelector *selector = ACLUserGetRootSelector(u);
+ if (ACLSetSelector(selector, op, oplen) == C_ERR) {
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
+/* Return a description of the error that occurred in ACLSetUser() according to
+ * the errno value set by the function on error. */
+const char *ACLSetUserStringError(void) {
+ const char *errmsg = "Wrong format";
+ if (errno == ENOENT)
+ errmsg = "Unknown command or category name in ACL";
+ else if (errno == EINVAL)
+ errmsg = "Syntax error";
+ else if (errno == EEXIST)
+ errmsg = "Adding a pattern after the * pattern (or the "
+ "'allkeys' flag) is not valid and does not have any "
+ "effect. Try 'resetkeys' to start with an empty "
+ "list of patterns";
+ else if (errno == EISDIR)
+ errmsg = "Adding a pattern after the * pattern (or the "
+ "'allchannels' flag) is not valid and does not have any "
+ "effect. Try 'resetchannels' to start with an empty "
+ "list of channels";
+ else if (errno == ENODEV)
+ errmsg = "The password you are trying to remove from the user does "
+ "not exist";
+ else if (errno == EBADMSG)
+ errmsg = "The password hash must be exactly 64 characters and contain "
+ "only lowercase hexadecimal characters";
+ else if (errno == EALREADY)
+ errmsg = "Duplicate user found. A user can only be defined once in "
+ "config files";
+ else if (errno == ECHILD)
+ errmsg = "Allowing first-arg of a subcommand is not supported";
+ return errmsg;
+}
+
+/* Create the default user, this has special permissions. */
+user *ACLCreateDefaultUser(void) {
+ user *new = ACLCreateUser("default",7);
+ ACLSetUser(new,"+@all",-1);
+ ACLSetUser(new,"~*",-1);
+ ACLSetUser(new,"&*",-1);
+ ACLSetUser(new,"on",-1);
+ ACLSetUser(new,"nopass",-1);
+ return new;
+}
+
+/* Initialization of the ACL subsystem. */
+void ACLInit(void) {
+ Users = raxNew();
+ UsersToLoad = listCreate();
+ listSetMatchMethod(UsersToLoad, ACLListMatchLoadedUser);
+ ACLLog = listCreate();
+ DefaultUser = ACLCreateDefaultUser();
+}
+
+/* Check the username and password pair and return C_OK if they are valid,
+ * otherwise C_ERR is returned and errno is set to:
+ *
+ * EINVAL: if the username-password do not match.
+ * ENONENT: if the specified user does not exist at all.
+ */
+int ACLCheckUserCredentials(robj *username, robj *password) {
+ user *u = ACLGetUserByName(username->ptr,sdslen(username->ptr));
+ if (u == NULL) {
+ errno = ENOENT;
+ return C_ERR;
+ }
+
+ /* Disabled users can't login. */
+ if (u->flags & USER_FLAG_DISABLED) {
+ errno = EINVAL;
+ return C_ERR;
+ }
+
+ /* If the user is configured to don't require any password, we
+ * are already fine here. */
+ if (u->flags & USER_FLAG_NOPASS) return C_OK;
+
+ /* Check all the user passwords for at least one to match. */
+ listIter li;
+ listNode *ln;
+ listRewind(u->passwords,&li);
+ sds hashed = ACLHashPassword(password->ptr,sdslen(password->ptr));
+ while((ln = listNext(&li))) {
+ sds thispass = listNodeValue(ln);
+ if (!time_independent_strcmp(hashed, thispass, HASH_PASSWORD_LEN)) {
+ sdsfree(hashed);
+ return C_OK;
+ }
+ }
+ sdsfree(hashed);
+
+ /* If we reached this point, no password matched. */
+ errno = EINVAL;
+ return C_ERR;
+}
+
+/* If `err` is provided, this is added as an error reply to the client.
+ * Otherwise, the standard Auth error is added as a reply. */
+void addAuthErrReply(client *c, robj *err) {
+ if (clientHasPendingReplies(c)) return;
+ if (!err) {
+ addReplyError(c, "-WRONGPASS invalid username-password pair or user is disabled.");
+ return;
+ }
+ addReplyError(c, err->ptr);
+}
+
+/* This is like ACLCheckUserCredentials(), however if the user/pass
+ * are correct, the connection is put in authenticated state and the
+ * connection user reference is populated.
+ *
+ * The return value is AUTH_OK on success (valid username / password pair) & AUTH_ERR otherwise. */
+int checkPasswordBasedAuth(client *c, robj *username, robj *password) {
+ if (ACLCheckUserCredentials(username,password) == C_OK) {
+ c->authenticated = 1;
+ c->user = ACLGetUserByName(username->ptr,sdslen(username->ptr));
+ moduleNotifyUserChanged(c);
+ return AUTH_OK;
+ } else {
+ addACLLogEntry(c,ACL_DENIED_AUTH,(c->flags & CLIENT_MULTI) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL,0,username->ptr,NULL);
+ return AUTH_ERR;
+ }
+}
+
+/* Attempt authenticating the user - first through module based authentication,
+ * and then, if needed, with normal password based authentication.
+ * Returns one of the following codes:
+ * AUTH_OK - Indicates that authentication succeeded.
+ * AUTH_ERR - Indicates that authentication failed.
+ * AUTH_BLOCKED - Indicates module authentication is in progress through a blocking implementation.
+ */
+int ACLAuthenticateUser(client *c, robj *username, robj *password, robj **err) {
+ int result = checkModuleAuthentication(c, username, password, err);
+ /* If authentication was not handled by any Module, attempt normal password based auth. */
+ if (result == AUTH_NOT_HANDLED) {
+ result = checkPasswordBasedAuth(c, username, password);
+ }
+ return result;
+}
+
+/* For ACL purposes, every user has a bitmap with the commands that such
+ * user is allowed to execute. In order to populate the bitmap, every command
+ * should have an assigned ID (that is used to index the bitmap). This function
+ * creates such an ID: it uses sequential IDs, reusing the same ID for the same
+ * command name, so that a command retains the same ID in case of modules that
+ * are unloaded and later reloaded.
+ *
+ * The function does not take ownership of the 'cmdname' SDS string.
+ * */
+unsigned long ACLGetCommandID(sds cmdname) {
+ sds lowername = sdsdup(cmdname);
+ sdstolower(lowername);
+ if (commandId == NULL) commandId = raxNew();
+ void *id = raxFind(commandId,(unsigned char*)lowername,sdslen(lowername));
+ if (id != raxNotFound) {
+ sdsfree(lowername);
+ return (unsigned long)id;
+ }
+ raxInsert(commandId,(unsigned char*)lowername,strlen(lowername),
+ (void*)nextid,NULL);
+ sdsfree(lowername);
+ unsigned long thisid = nextid;
+ nextid++;
+
+ /* We never assign the last bit in the user commands bitmap structure,
+ * this way we can later check if this bit is set, understanding if the
+ * current ACL for the user was created starting with a +@all to add all
+ * the possible commands and just subtracting other single commands or
+ * categories, or if, instead, the ACL was created just adding commands
+ * and command categories from scratch, not allowing future commands by
+ * default (loaded via modules). This is useful when rewriting the ACLs
+ * with ACL SAVE. */
+ if (nextid == USER_COMMAND_BITS_COUNT-1) nextid++;
+ return thisid;
+}
+
+/* Clear command id table and reset nextid to 0. */
+void ACLClearCommandID(void) {
+ if (commandId) raxFree(commandId);
+ commandId = NULL;
+ nextid = 0;
+}
+
+/* Return an username by its name, or NULL if the user does not exist. */
+user *ACLGetUserByName(const char *name, size_t namelen) {
+ void *myuser = raxFind(Users,(unsigned char*)name,namelen);
+ if (myuser == raxNotFound) return NULL;
+ return myuser;
+}
+
+/* =============================================================================
+ * ACL permission checks
+ * ==========================================================================*/
+
+/* Check if the key can be accessed by the selector.
+ *
+ * If the selector can access the key, ACL_OK is returned, otherwise
+ * ACL_DENIED_KEY is returned. */
+static int ACLSelectorCheckKey(aclSelector *selector, const char *key, int keylen, int keyspec_flags) {
+ /* The selector can access any key */
+ if (selector->flags & SELECTOR_FLAG_ALLKEYS) return ACL_OK;
+
+ listIter li;
+ listNode *ln;
+ listRewind(selector->patterns,&li);
+
+ int key_flags = 0;
+ if (keyspec_flags & CMD_KEY_ACCESS) key_flags |= ACL_READ_PERMISSION;
+ if (keyspec_flags & CMD_KEY_INSERT) key_flags |= ACL_WRITE_PERMISSION;
+ if (keyspec_flags & CMD_KEY_DELETE) key_flags |= ACL_WRITE_PERMISSION;
+ if (keyspec_flags & CMD_KEY_UPDATE) key_flags |= ACL_WRITE_PERMISSION;
+
+ /* Test this key against every pattern. */
+ while((ln = listNext(&li))) {
+ keyPattern *pattern = listNodeValue(ln);
+ if ((pattern->flags & key_flags) != key_flags)
+ continue;
+ size_t plen = sdslen(pattern->pattern);
+ if (stringmatchlen(pattern->pattern,plen,key,keylen,0))
+ return ACL_OK;
+ }
+ return ACL_DENIED_KEY;
+}
+
+/* Checks if the provided selector selector has access specified in flags
+ * to all keys in the keyspace. For example, CMD_KEY_READ access requires either
+ * '%R~*', '~*', or allkeys to be granted to the selector. Returns 1 if all
+ * the access flags are satisfied with this selector or 0 otherwise.
+ */
+static int ACLSelectorHasUnrestrictedKeyAccess(aclSelector *selector, int flags) {
+ /* The selector can access any key */
+ if (selector->flags & SELECTOR_FLAG_ALLKEYS) return 1;
+
+ listIter li;
+ listNode *ln;
+ listRewind(selector->patterns,&li);
+
+ int access_flags = 0;
+ if (flags & CMD_KEY_ACCESS) access_flags |= ACL_READ_PERMISSION;
+ if (flags & CMD_KEY_INSERT) access_flags |= ACL_WRITE_PERMISSION;
+ if (flags & CMD_KEY_DELETE) access_flags |= ACL_WRITE_PERMISSION;
+ if (flags & CMD_KEY_UPDATE) access_flags |= ACL_WRITE_PERMISSION;
+
+ /* Test this key against every pattern. */
+ while((ln = listNext(&li))) {
+ keyPattern *pattern = listNodeValue(ln);
+ if ((pattern->flags & access_flags) != access_flags)
+ continue;
+ if (!strcmp(pattern->pattern,"*")) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Checks a channel against a provided list of channels. The is_pattern
+ * argument should only be used when subscribing (not when publishing)
+ * and controls whether the input channel is evaluated as a channel pattern
+ * (like in PSUBSCRIBE) or a plain channel name (like in SUBSCRIBE).
+ *
+ * Note that a plain channel name like in PUBLISH or SUBSCRIBE can be
+ * matched against ACL channel patterns, but the pattern provided in PSUBSCRIBE
+ * can only be matched as a literal against an ACL pattern (using plain string compare). */
+static int ACLCheckChannelAgainstList(list *reference, const char *channel, int channellen, int is_pattern) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(reference, &li);
+ while((ln = listNext(&li))) {
+ sds pattern = listNodeValue(ln);
+ size_t plen = sdslen(pattern);
+ /* Channel patterns are matched literally against the channels in
+ * the list. Regular channels perform pattern matching. */
+ if ((is_pattern && !strcmp(pattern,channel)) ||
+ (!is_pattern && stringmatchlen(pattern,plen,channel,channellen,0)))
+ {
+ return ACL_OK;
+ }
+ }
+ return ACL_DENIED_CHANNEL;
+}
+
+/* To prevent duplicate calls to getKeysResult, a cache is maintained
+ * in between calls to the various selectors. */
+typedef struct {
+ int keys_init;
+ getKeysResult keys;
+} aclKeyResultCache;
+
+void initACLKeyResultCache(aclKeyResultCache *cache) {
+ cache->keys_init = 0;
+}
+
+void cleanupACLKeyResultCache(aclKeyResultCache *cache) {
+ if (cache->keys_init) getKeysFreeResult(&(cache->keys));
+}
+
+/* Check if the command is ready to be executed according to the
+ * ACLs associated with the specified selector.
+ *
+ * If the selector can execute the command ACL_OK is returned, otherwise
+ * ACL_DENIED_CMD, ACL_DENIED_KEY, or ACL_DENIED_CHANNEL is returned: the first in case the
+ * command cannot be executed because the selector is not allowed to run such
+ * command, the second and third if the command is denied because the selector is trying
+ * to access a key or channel that are not among the specified patterns. */
+static int ACLSelectorCheckCmd(aclSelector *selector, struct redisCommand *cmd, robj **argv, int argc, int *keyidxptr, aclKeyResultCache *cache) {
+ uint64_t id = cmd->id;
+ int ret;
+ if (!(selector->flags & SELECTOR_FLAG_ALLCOMMANDS) && !(cmd->flags & CMD_NO_AUTH)) {
+ /* If the bit is not set we have to check further, in case the
+ * command is allowed just with that specific first argument. */
+ if (ACLGetSelectorCommandBit(selector,id) == 0) {
+ /* Check if the first argument matches. */
+ if (argc < 2 ||
+ selector->allowed_firstargs == NULL ||
+ selector->allowed_firstargs[id] == NULL)
+ {
+ return ACL_DENIED_CMD;
+ }
+
+ long subid = 0;
+ while (1) {
+ if (selector->allowed_firstargs[id][subid] == NULL)
+ return ACL_DENIED_CMD;
+ int idx = cmd->parent ? 2 : 1;
+ if (!strcasecmp(argv[idx]->ptr,selector->allowed_firstargs[id][subid]))
+ break; /* First argument match found. Stop here. */
+ subid++;
+ }
+ }
+ }
+
+ /* Check if the user can execute commands explicitly touching the keys
+ * mentioned in the command arguments. */
+ if (!(selector->flags & SELECTOR_FLAG_ALLKEYS) && doesCommandHaveKeys(cmd)) {
+ if (!(cache->keys_init)) {
+ cache->keys = (getKeysResult) GETKEYS_RESULT_INIT;
+ getKeysFromCommandWithSpecs(cmd, argv, argc, GET_KEYSPEC_DEFAULT, &(cache->keys));
+ cache->keys_init = 1;
+ }
+ getKeysResult *result = &(cache->keys);
+ keyReference *resultidx = result->keys;
+ for (int j = 0; j < result->numkeys; j++) {
+ int idx = resultidx[j].pos;
+ ret = ACLSelectorCheckKey(selector, argv[idx]->ptr, sdslen(argv[idx]->ptr), resultidx[j].flags);
+ if (ret != ACL_OK) {
+ if (keyidxptr) *keyidxptr = resultidx[j].pos;
+ return ret;
+ }
+ }
+ }
+
+ /* Check if the user can execute commands explicitly touching the channels
+ * mentioned in the command arguments */
+ const int channel_flags = CMD_CHANNEL_PUBLISH | CMD_CHANNEL_SUBSCRIBE;
+ if (!(selector->flags & SELECTOR_FLAG_ALLCHANNELS) && doesCommandHaveChannelsWithFlags(cmd, channel_flags)) {
+ getKeysResult channels = (getKeysResult) GETKEYS_RESULT_INIT;
+ getChannelsFromCommand(cmd, argv, argc, &channels);
+ keyReference *channelref = channels.keys;
+ for (int j = 0; j < channels.numkeys; j++) {
+ int idx = channelref[j].pos;
+ if (!(channelref[j].flags & channel_flags)) continue;
+ int is_pattern = channelref[j].flags & CMD_CHANNEL_PATTERN;
+ int ret = ACLCheckChannelAgainstList(selector->channels, argv[idx]->ptr, sdslen(argv[idx]->ptr), is_pattern);
+ if (ret != ACL_OK) {
+ if (keyidxptr) *keyidxptr = channelref[j].pos;
+ getKeysFreeResult(&channels);
+ return ret;
+ }
+ }
+ getKeysFreeResult(&channels);
+ }
+ return ACL_OK;
+}
+
+/* Check if the key can be accessed by the client according to
+ * the ACLs associated with the specified user according to the
+ * keyspec access flags.
+ *
+ * If the user can access the key, ACL_OK is returned, otherwise
+ * ACL_DENIED_KEY is returned. */
+int ACLUserCheckKeyPerm(user *u, const char *key, int keylen, int flags) {
+ listIter li;
+ listNode *ln;
+
+ /* If there is no associated user, the connection can run anything. */
+ if (u == NULL) return ACL_OK;
+
+ /* Check all of the selectors */
+ listRewind(u->selectors,&li);
+ while((ln = listNext(&li))) {
+ aclSelector *s = (aclSelector *) listNodeValue(ln);
+ if (ACLSelectorCheckKey(s, key, keylen, flags) == ACL_OK) {
+ return ACL_OK;
+ }
+ }
+ return ACL_DENIED_KEY;
+}
+
+/* Checks if the user can execute the given command with the added restriction
+ * it must also have the access specified in flags to any key in the key space.
+ * For example, CMD_KEY_READ access requires either '%R~*', '~*', or allkeys to be
+ * granted in addition to the access required by the command. Returns 1
+ * if the user has access or 0 otherwise.
+ */
+int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, int flags) {
+ listIter li;
+ listNode *ln;
+ int local_idxptr;
+
+ /* If there is no associated user, the connection can run anything. */
+ if (u == NULL) return 1;
+
+ /* For multiple selectors, we cache the key result in between selector
+ * calls to prevent duplicate lookups. */
+ aclKeyResultCache cache;
+ initACLKeyResultCache(&cache);
+
+ /* Check each selector sequentially */
+ listRewind(u->selectors,&li);
+ while((ln = listNext(&li))) {
+ aclSelector *s = (aclSelector *) listNodeValue(ln);
+ int acl_retval = ACLSelectorCheckCmd(s, cmd, argv, argc, &local_idxptr, &cache);
+ if (acl_retval == ACL_OK && ACLSelectorHasUnrestrictedKeyAccess(s, flags)) {
+ cleanupACLKeyResultCache(&cache);
+ return 1;
+ }
+ }
+ cleanupACLKeyResultCache(&cache);
+ return 0;
+}
+
+/* Check if the channel can be accessed by the client according to
+ * the ACLs associated with the specified user.
+ *
+ * If the user can access the key, ACL_OK is returned, otherwise
+ * ACL_DENIED_CHANNEL is returned. */
+int ACLUserCheckChannelPerm(user *u, sds channel, int is_pattern) {
+ listIter li;
+ listNode *ln;
+
+ /* If there is no associated user, the connection can run anything. */
+ if (u == NULL) return ACL_OK;
+
+ /* Check all of the selectors */
+ listRewind(u->selectors,&li);
+ while((ln = listNext(&li))) {
+ aclSelector *s = (aclSelector *) listNodeValue(ln);
+ /* The selector can run any keys */
+ if (s->flags & SELECTOR_FLAG_ALLCHANNELS) return ACL_OK;
+
+ /* Otherwise, loop over the selectors list and check each channel */
+ if (ACLCheckChannelAgainstList(s->channels, channel, sdslen(channel), is_pattern) == ACL_OK) {
+ return ACL_OK;
+ }
+ }
+ return ACL_DENIED_CHANNEL;
+}
+
+/* Lower level API that checks if a specified user is able to execute a given command.
+ *
+ * If the command fails an ACL check, idxptr will be to set to the first argv entry that
+ * causes the failure, either 0 if the command itself fails or the idx of the key/channel
+ * that causes the failure */
+int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, int argc, int *idxptr) {
+ listIter li;
+ listNode *ln;
+
+ /* If there is no associated user, the connection can run anything. */
+ if (u == NULL) return ACL_OK;
+
+ /* We have to pick a single error to log, the logic for picking is as follows:
+ * 1) If no selector can execute the command, return the command.
+ * 2) Return the last key or channel that no selector could match. */
+ int relevant_error = ACL_DENIED_CMD;
+ int local_idxptr = 0, last_idx = 0;
+
+ /* For multiple selectors, we cache the key result in between selector
+ * calls to prevent duplicate lookups. */
+ aclKeyResultCache cache;
+ initACLKeyResultCache(&cache);
+
+ /* Check each selector sequentially */
+ listRewind(u->selectors,&li);
+ while((ln = listNext(&li))) {
+ aclSelector *s = (aclSelector *) listNodeValue(ln);
+ int acl_retval = ACLSelectorCheckCmd(s, cmd, argv, argc, &local_idxptr, &cache);
+ if (acl_retval == ACL_OK) {
+ cleanupACLKeyResultCache(&cache);
+ return ACL_OK;
+ }
+ if (acl_retval > relevant_error ||
+ (acl_retval == relevant_error && local_idxptr > last_idx))
+ {
+ relevant_error = acl_retval;
+ last_idx = local_idxptr;
+ }
+ }
+
+ *idxptr = last_idx;
+ cleanupACLKeyResultCache(&cache);
+ return relevant_error;
+}
+
+/* High level API for checking if a client can execute the queued up command */
+int ACLCheckAllPerm(client *c, int *idxptr) {
+ return ACLCheckAllUserCommandPerm(c->user, c->cmd, c->argv, c->argc, idxptr);
+}
+
+/* Check if the user's existing pub/sub clients violate the ACL pub/sub
+ * permissions specified via the upcoming argument, and kill them if so. */
+void ACLKillPubsubClientsIfNeeded(user *new, user *original) {
+ /* Do nothing if there are no subscribers. */
+ if (!dictSize(server.pubsub_patterns) &&
+ !dictSize(server.pubsub_channels) &&
+ !dictSize(server.pubsubshard_channels))
+ return;
+
+ listIter li, lpi;
+ listNode *ln, *lpn;
+ robj *o;
+ int kill = 0;
+
+ /* First optimization is we check if any selector has all channel
+ * permissions. */
+ listRewind(new->selectors,&li);
+ while((ln = listNext(&li))) {
+ aclSelector *s = (aclSelector *) listNodeValue(ln);
+ if (s->flags & SELECTOR_FLAG_ALLCHANNELS) return;
+ }
+
+ /* Second optimization is to check if the new list of channels
+ * is a strict superset of the original. This is done by
+ * created an "upcoming" list of all channels that are in
+ * the new user and checking each of the existing channels
+ * against it. */
+ list *upcoming = listCreate();
+ listRewind(new->selectors,&li);
+ while((ln = listNext(&li))) {
+ aclSelector *s = (aclSelector *) listNodeValue(ln);
+ listRewind(s->channels, &lpi);
+ while((lpn = listNext(&lpi))) {
+ listAddNodeTail(upcoming, listNodeValue(lpn));
+ }
+ }
+
+ int match = 1;
+ listRewind(original->selectors,&li);
+ while((ln = listNext(&li)) && match) {
+ aclSelector *s = (aclSelector *) listNodeValue(ln);
+ /* If any of the original selectors has the all-channels permission, but
+ * the new ones don't (this is checked earlier in this function), then the
+ * new list is not a strict superset of the original. */
+ if (s->flags & SELECTOR_FLAG_ALLCHANNELS) {
+ match = 0;
+ break;
+ }
+ listRewind(s->channels, &lpi);
+ while((lpn = listNext(&lpi)) && match) {
+ if (!listSearchKey(upcoming, listNodeValue(lpn))) {
+ match = 0;
+ break;
+ }
+ }
+ }
+
+ if (match) {
+ /* All channels were matched, no need to kill clients. */
+ listRelease(upcoming);
+ return;
+ }
+
+ /* Permissions have changed, so we need to iterate through all
+ * the clients and disconnect those that are no longer valid.
+ * Scan all connected clients to find the user's pub/subs. */
+ listRewind(server.clients,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *c = listNodeValue(ln);
+ kill = 0;
+
+ if (c->user == original && getClientType(c) == CLIENT_TYPE_PUBSUB) {
+ /* Check for pattern violations. */
+ dictIterator *di = dictGetIterator(c->pubsub_patterns);
+ dictEntry *de;
+ while (!kill && ((de = dictNext(di)) != NULL)) {
+ o = dictGetKey(de);
+ int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 1);
+ kill = (res == ACL_DENIED_CHANNEL);
+ }
+ dictReleaseIterator(di);
+
+ /* Check for channel violations. */
+ if (!kill) {
+ /* Check for global channels violation. */
+ di = dictGetIterator(c->pubsub_channels);
+ while (!kill && ((de = dictNext(di)) != NULL)) {
+ o = dictGetKey(de);
+ int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 0);
+ kill = (res == ACL_DENIED_CHANNEL);
+ }
+ dictReleaseIterator(di);
+ }
+
+ if (!kill) {
+ /* Check for shard channels violation. */
+ di = dictGetIterator(c->pubsubshard_channels);
+ while (!kill && ((de = dictNext(di)) != NULL)) {
+ o = dictGetKey(de);
+ int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 0);
+ kill = (res == ACL_DENIED_CHANNEL);
+ }
+ dictReleaseIterator(di);
+ }
+
+ /* Kill it. */
+ if (kill) {
+ freeClient(c);
+ }
+ }
+ }
+ listRelease(upcoming);
+}
+
+/* =============================================================================
+ * ACL loading / saving functions
+ * ==========================================================================*/
+
+
+/* Selector definitions should be sent as a single argument, however
+ * we will be lenient and try to find selector definitions spread
+ * across multiple arguments since it makes for a simpler user experience
+ * for ACL SETUSER as well as when loading from conf files.
+ *
+ * This function takes in an array of ACL operators, excluding the username,
+ * and merges selector operations that are spread across multiple arguments. The return
+ * value is a new SDS array, with length set to the passed in merged_argc. Arguments
+ * that are untouched are still duplicated. If there is an unmatched parenthesis, NULL
+ * is returned and invalid_idx is set to the argument with the start of the opening
+ * parenthesis. */
+sds *ACLMergeSelectorArguments(sds *argv, int argc, int *merged_argc, int *invalid_idx) {
+ *merged_argc = 0;
+ int open_bracket_start = -1;
+
+ sds *acl_args = (sds *) zmalloc(sizeof(sds) * argc);
+
+ sds selector = NULL;
+ for (int j = 0; j < argc; j++) {
+ char *op = argv[j];
+
+ if (open_bracket_start == -1 &&
+ (op[0] == '(' && op[sdslen(op) - 1] != ')')) {
+ selector = sdsdup(argv[j]);
+ open_bracket_start = j;
+ continue;
+ }
+
+ if (open_bracket_start != -1) {
+ selector = sdscatfmt(selector, " %s", op);
+ if (op[sdslen(op) - 1] == ')') {
+ open_bracket_start = -1;
+ acl_args[*merged_argc] = selector;
+ (*merged_argc)++;
+ }
+ continue;
+ }
+
+ acl_args[*merged_argc] = sdsdup(argv[j]);
+ (*merged_argc)++;
+ }
+
+ if (open_bracket_start != -1) {
+ for (int i = 0; i < *merged_argc; i++) sdsfree(acl_args[i]);
+ zfree(acl_args);
+ sdsfree(selector);
+ if (invalid_idx) *invalid_idx = open_bracket_start;
+ return NULL;
+ }
+
+ return acl_args;
+}
+
+/* takes an acl string already split on spaces and adds it to the given user
+ * if the user object is NULL, will create a user with the given username
+ *
+ * Returns an error as an sds string if the ACL string is not parsable
+ */
+sds ACLStringSetUser(user *u, sds username, sds *argv, int argc) {
+ serverAssert(u != NULL || username != NULL);
+
+ sds error = NULL;
+
+ int merged_argc = 0, invalid_idx = 0;
+ sds *acl_args = ACLMergeSelectorArguments(argv, argc, &merged_argc, &invalid_idx);
+
+ if (!acl_args) {
+ return sdscatfmt(sdsempty(),
+ "Unmatched parenthesis in acl selector starting "
+ "at '%s'.", (char *) argv[invalid_idx]);
+ }
+
+ /* Create a temporary user to validate and stage all changes against
+ * before applying to an existing user or creating a new user. If all
+ * arguments are valid the user parameters will all be applied together.
+ * If there are any errors then none of the changes will be applied. */
+ user *tempu = ACLCreateUnlinkedUser();
+ if (u) {
+ ACLCopyUser(tempu, u);
+ }
+
+ for (int j = 0; j < merged_argc; j++) {
+ if (ACLSetUser(tempu,acl_args[j],(ssize_t) sdslen(acl_args[j])) != C_OK) {
+ const char *errmsg = ACLSetUserStringError();
+ error = sdscatfmt(sdsempty(),
+ "Error in ACL SETUSER modifier '%s': %s",
+ (char*)acl_args[j], errmsg);
+ goto cleanup;
+ }
+ }
+
+ /* Existing pub/sub clients authenticated with the user may need to be
+ * disconnected if (some of) their channel permissions were revoked. */
+ if (u) {
+ ACLKillPubsubClientsIfNeeded(tempu, u);
+ }
+
+ /* Overwrite the user with the temporary user we modified above. */
+ if (!u) {
+ u = ACLCreateUser(username,sdslen(username));
+ }
+ serverAssert(u != NULL);
+
+ ACLCopyUser(u, tempu);
+
+cleanup:
+ ACLFreeUser(tempu);
+ for (int i = 0; i < merged_argc; i++) {
+ sdsfree(acl_args[i]);
+ }
+ zfree(acl_args);
+
+ return error;
+}
+
+/* Given an argument vector describing a user in the form:
+ *
+ * user <username> ... ACL rules and flags ...
+ *
+ * this function validates, and if the syntax is valid, appends
+ * the user definition to a list for later loading.
+ *
+ * The rules are tested for validity and if there obvious syntax errors
+ * the function returns C_ERR and does nothing, otherwise C_OK is returned
+ * and the user is appended to the list.
+ *
+ * Note that this function cannot stop in case of commands that are not found
+ * and, in that case, the error will be emitted later, because certain
+ * commands may be defined later once modules are loaded.
+ *
+ * When an error is detected and C_ERR is returned, the function populates
+ * by reference (if not set to NULL) the argc_err argument with the index
+ * of the argv vector that caused the error. */
+int ACLAppendUserForLoading(sds *argv, int argc, int *argc_err) {
+ if (argc < 2 || strcasecmp(argv[0],"user")) {
+ if (argc_err) *argc_err = 0;
+ return C_ERR;
+ }
+
+ if (listSearchKey(UsersToLoad, argv[1])) {
+ if (argc_err) *argc_err = 1;
+ errno = EALREADY;
+ return C_ERR;
+ }
+
+ /* Merged selectors before trying to process */
+ int merged_argc;
+ sds *acl_args = ACLMergeSelectorArguments(argv + 2, argc - 2, &merged_argc, argc_err);
+
+ if (!acl_args) {
+ return C_ERR;
+ }
+
+ /* Try to apply the user rules in a fake user to see if they
+ * are actually valid. */
+ user *fakeuser = ACLCreateUnlinkedUser();
+
+ for (int j = 0; j < merged_argc; j++) {
+ if (ACLSetUser(fakeuser,acl_args[j],sdslen(acl_args[j])) == C_ERR) {
+ if (errno != ENOENT) {
+ ACLFreeUser(fakeuser);
+ if (argc_err) *argc_err = j;
+ for (int i = 0; i < merged_argc; i++) sdsfree(acl_args[i]);
+ zfree(acl_args);
+ return C_ERR;
+ }
+ }
+ }
+
+ /* Rules look valid, let's append the user to the list. */
+ sds *copy = zmalloc(sizeof(sds)*(merged_argc + 2));
+ copy[0] = sdsdup(argv[1]);
+ for (int j = 0; j < merged_argc; j++) copy[j+1] = sdsdup(acl_args[j]);
+ copy[merged_argc + 1] = NULL;
+ listAddNodeTail(UsersToLoad,copy);
+ ACLFreeUser(fakeuser);
+ for (int i = 0; i < merged_argc; i++) sdsfree(acl_args[i]);
+ zfree(acl_args);
+ return C_OK;
+}
+
+/* This function will load the configured users appended to the server
+ * configuration via ACLAppendUserForLoading(). On loading errors it will
+ * log an error and return C_ERR, otherwise C_OK will be returned. */
+int ACLLoadConfiguredUsers(void) {
+ listIter li;
+ listNode *ln;
+ listRewind(UsersToLoad,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds *aclrules = listNodeValue(ln);
+ sds username = aclrules[0];
+
+ if (ACLStringHasSpaces(aclrules[0],sdslen(aclrules[0]))) {
+ serverLog(LL_WARNING,"Spaces not allowed in ACL usernames");
+ return C_ERR;
+ }
+
+ user *u = ACLCreateUser(username,sdslen(username));
+ if (!u) {
+ /* Only valid duplicate user is the default one. */
+ serverAssert(!strcmp(username, "default"));
+ u = ACLGetUserByName("default",7);
+ ACLSetUser(u,"reset",-1);
+ }
+
+ /* Load every rule defined for this user. */
+ for (int j = 1; aclrules[j]; j++) {
+ if (ACLSetUser(u,aclrules[j],sdslen(aclrules[j])) != C_OK) {
+ const char *errmsg = ACLSetUserStringError();
+ serverLog(LL_WARNING,"Error loading ACL rule '%s' for "
+ "the user named '%s': %s",
+ aclrules[j],aclrules[0],errmsg);
+ return C_ERR;
+ }
+ }
+
+ /* Having a disabled user in the configuration may be an error,
+ * warn about it without returning any error to the caller. */
+ if (u->flags & USER_FLAG_DISABLED) {
+ serverLog(LL_NOTICE, "The user '%s' is disabled (there is no "
+ "'on' modifier in the user description). Make "
+ "sure this is not a configuration error.",
+ aclrules[0]);
+ }
+ }
+ return C_OK;
+}
+
+/* This function loads the ACL from the specified filename: every line
+ * is validated and should be either empty or in the format used to specify
+ * users in the redis.conf configuration or in the ACL file, that is:
+ *
+ * user <username> ... rules ...
+ *
+ * Note that this function considers comments starting with '#' as errors
+ * because the ACL file is meant to be rewritten, and comments would be
+ * lost after the rewrite. Yet empty lines are allowed to avoid being too
+ * strict.
+ *
+ * One important part of implementing ACL LOAD, that uses this function, is
+ * to avoid ending with broken rules if the ACL file is invalid for some
+ * reason, so the function will attempt to validate the rules before loading
+ * each user. For every line that will be found broken the function will
+ * collect an error message.
+ *
+ * IMPORTANT: If there is at least a single error, nothing will be loaded
+ * and the rules will remain exactly as they were.
+ *
+ * At the end of the process, if no errors were found in the whole file then
+ * NULL is returned. Otherwise an SDS string describing in a single line
+ * a description of all the issues found is returned. */
+sds ACLLoadFromFile(const char *filename) {
+ FILE *fp;
+ char buf[1024];
+
+ /* Open the ACL file. */
+ if ((fp = fopen(filename,"r")) == NULL) {
+ sds errors = sdscatprintf(sdsempty(),
+ "Error loading ACLs, opening file '%s': %s",
+ filename, strerror(errno));
+ return errors;
+ }
+
+ /* Load the whole file as a single string in memory. */
+ sds acls = sdsempty();
+ while(fgets(buf,sizeof(buf),fp) != NULL)
+ acls = sdscat(acls,buf);
+ fclose(fp);
+
+ /* Split the file into lines and attempt to load each line. */
+ int totlines;
+ sds *lines, errors = sdsempty();
+ lines = sdssplitlen(acls,strlen(acls),"\n",1,&totlines);
+ sdsfree(acls);
+
+ /* We do all the loading in a fresh instance of the Users radix tree,
+ * so if there are errors loading the ACL file we can rollback to the
+ * old version. */
+ rax *old_users = Users;
+ Users = raxNew();
+
+ /* Load each line of the file. */
+ for (int i = 0; i < totlines; i++) {
+ sds *argv;
+ int argc;
+ int linenum = i+1;
+
+ lines[i] = sdstrim(lines[i]," \t\r\n");
+
+ /* Skip blank lines */
+ if (lines[i][0] == '\0') continue;
+
+ /* Split into arguments */
+ argv = sdssplitlen(lines[i],sdslen(lines[i])," ",1,&argc);
+ if (argv == NULL) {
+ errors = sdscatprintf(errors,
+ "%s:%d: unbalanced quotes in acl line. ",
+ server.acl_filename, linenum);
+ continue;
+ }
+
+ /* Skip this line if the resulting command vector is empty. */
+ if (argc == 0) {
+ sdsfreesplitres(argv,argc);
+ continue;
+ }
+
+ /* The line should start with the "user" keyword. */
+ if (strcmp(argv[0],"user") || argc < 2) {
+ errors = sdscatprintf(errors,
+ "%s:%d should start with user keyword followed "
+ "by the username. ", server.acl_filename,
+ linenum);
+ sdsfreesplitres(argv,argc);
+ continue;
+ }
+
+ /* Spaces are not allowed in usernames. */
+ if (ACLStringHasSpaces(argv[1],sdslen(argv[1]))) {
+ errors = sdscatprintf(errors,
+ "'%s:%d: username '%s' contains invalid characters. ",
+ server.acl_filename, linenum, argv[1]);
+ sdsfreesplitres(argv,argc);
+ continue;
+ }
+
+ user *u = ACLCreateUser(argv[1],sdslen(argv[1]));
+
+ /* If the user already exists we assume it's an error and abort. */
+ if (!u) {
+ errors = sdscatprintf(errors,"WARNING: Duplicate user '%s' found on line %d. ", argv[1], linenum);
+ sdsfreesplitres(argv,argc);
+ continue;
+ }
+
+ /* Finally process the options and validate they can
+ * be cleanly applied to the user. If any option fails
+ * to apply, the other values won't be applied since
+ * all the pending changes will get dropped. */
+ int merged_argc;
+ sds *acl_args = ACLMergeSelectorArguments(argv + 2, argc - 2, &merged_argc, NULL);
+ if (!acl_args) {
+ errors = sdscatprintf(errors,
+ "%s:%d: Unmatched parenthesis in selector definition.",
+ server.acl_filename, linenum);
+ }
+
+ int syntax_error = 0;
+ for (int j = 0; j < merged_argc; j++) {
+ acl_args[j] = sdstrim(acl_args[j],"\t\r\n");
+ if (ACLSetUser(u,acl_args[j],sdslen(acl_args[j])) != C_OK) {
+ const char *errmsg = ACLSetUserStringError();
+ if (errno == ENOENT) {
+ /* For missing commands, we print out more information since
+ * it shouldn't contain any sensitive information. */
+ errors = sdscatprintf(errors,
+ "%s:%d: Error in applying operation '%s': %s. ",
+ server.acl_filename, linenum, acl_args[j], errmsg);
+ } else if (syntax_error == 0) {
+ /* For all other errors, only print out the first error encountered
+ * since it might affect future operations. */
+ errors = sdscatprintf(errors,
+ "%s:%d: %s. ",
+ server.acl_filename, linenum, errmsg);
+ syntax_error = 1;
+ }
+ }
+ }
+
+ for (int i = 0; i < merged_argc; i++) sdsfree(acl_args[i]);
+ zfree(acl_args);
+
+ /* Apply the rule to the new users set only if so far there
+ * are no errors, otherwise it's useless since we are going
+ * to discard the new users set anyway. */
+ if (sdslen(errors) != 0) {
+ sdsfreesplitres(argv,argc);
+ continue;
+ }
+
+ sdsfreesplitres(argv,argc);
+ }
+
+ sdsfreesplitres(lines,totlines);
+
+ /* Check if we found errors and react accordingly. */
+ if (sdslen(errors) == 0) {
+ /* The default user pointer is referenced in different places: instead
+ * of replacing such occurrences it is much simpler to copy the new
+ * default user configuration in the old one. */
+ user *new_default = ACLGetUserByName("default",7);
+ if (!new_default) {
+ new_default = ACLCreateDefaultUser();
+ }
+
+ ACLCopyUser(DefaultUser,new_default);
+ ACLFreeUser(new_default);
+ raxInsert(Users,(unsigned char*)"default",7,DefaultUser,NULL);
+ raxRemove(old_users,(unsigned char*)"default",7,NULL);
+ ACLFreeUsersSet(old_users);
+ sdsfree(errors);
+ return NULL;
+ } else {
+ ACLFreeUsersSet(Users);
+ Users = old_users;
+ errors = sdscat(errors,"WARNING: ACL errors detected, no change to the previously active ACL rules was performed");
+ return errors;
+ }
+}
+
+/* Generate a copy of the ACLs currently in memory in the specified filename.
+ * Returns C_OK on success or C_ERR if there was an error during the I/O.
+ * When C_ERR is returned a log is produced with hints about the issue. */
+int ACLSaveToFile(const char *filename) {
+ sds acl = sdsempty();
+ int fd = -1;
+ sds tmpfilename = NULL;
+ int retval = C_ERR;
+
+ /* Let's generate an SDS string containing the new version of the
+ * ACL file. */
+ raxIterator ri;
+ raxStart(&ri,Users);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ user *u = ri.data;
+ /* Return information in the configuration file format. */
+ sds user = sdsnew("user ");
+ user = sdscatsds(user,u->name);
+ user = sdscatlen(user," ",1);
+ robj *descr = ACLDescribeUser(u);
+ user = sdscatsds(user,descr->ptr);
+ decrRefCount(descr);
+ acl = sdscatsds(acl,user);
+ acl = sdscatlen(acl,"\n",1);
+ sdsfree(user);
+ }
+ raxStop(&ri);
+
+ /* Create a temp file with the new content. */
+ tmpfilename = sdsnew(filename);
+ tmpfilename = sdscatfmt(tmpfilename,".tmp-%i-%I",
+ (int) getpid(),commandTimeSnapshot());
+ if ((fd = open(tmpfilename,O_WRONLY|O_CREAT,0644)) == -1) {
+ serverLog(LL_WARNING,"Opening temp ACL file for ACL SAVE: %s",
+ strerror(errno));
+ goto cleanup;
+ }
+
+ /* Write it. */
+ size_t offset = 0;
+ while (offset < sdslen(acl)) {
+ ssize_t written_bytes = write(fd,acl + offset,sdslen(acl) - offset);
+ if (written_bytes <= 0) {
+ if (errno == EINTR) continue;
+ serverLog(LL_WARNING,"Writing ACL file for ACL SAVE: %s",
+ strerror(errno));
+ goto cleanup;
+ }
+ offset += written_bytes;
+ }
+ if (redis_fsync(fd) == -1) {
+ serverLog(LL_WARNING,"Syncing ACL file for ACL SAVE: %s",
+ strerror(errno));
+ goto cleanup;
+ }
+ close(fd); fd = -1;
+
+ /* Let's replace the new file with the old one. */
+ if (rename(tmpfilename,filename) == -1) {
+ serverLog(LL_WARNING,"Renaming ACL file for ACL SAVE: %s",
+ strerror(errno));
+ goto cleanup;
+ }
+ if (fsyncFileDir(filename) == -1) {
+ serverLog(LL_WARNING,"Syncing ACL directory for ACL SAVE: %s",
+ strerror(errno));
+ goto cleanup;
+ }
+ sdsfree(tmpfilename); tmpfilename = NULL;
+ retval = C_OK; /* If we reached this point, everything is fine. */
+
+cleanup:
+ if (fd != -1) close(fd);
+ if (tmpfilename) unlink(tmpfilename);
+ sdsfree(tmpfilename);
+ sdsfree(acl);
+ return retval;
+}
+
+/* This function is called once the server is already running, modules are
+ * loaded, and we are ready to start, in order to load the ACLs either from
+ * the pending list of users defined in redis.conf, or from the ACL file.
+ * The function will just exit with an error if the user is trying to mix
+ * both the loading methods. */
+void ACLLoadUsersAtStartup(void) {
+ if (server.acl_filename[0] != '\0' && listLength(UsersToLoad) != 0) {
+ serverLog(LL_WARNING,
+ "Configuring Redis with users defined in redis.conf and at "
+ "the same setting an ACL file path is invalid. This setup "
+ "is very likely to lead to configuration errors and security "
+ "holes, please define either an ACL file or declare users "
+ "directly in your redis.conf, but not both.");
+ exit(1);
+ }
+
+ if (ACLLoadConfiguredUsers() == C_ERR) {
+ serverLog(LL_WARNING,
+ "Critical error while loading ACLs. Exiting.");
+ exit(1);
+ }
+
+ if (server.acl_filename[0] != '\0') {
+ sds errors = ACLLoadFromFile(server.acl_filename);
+ if (errors) {
+ serverLog(LL_WARNING,
+ "Aborting Redis startup because of ACL errors: %s", errors);
+ sdsfree(errors);
+ exit(1);
+ }
+ }
+}
+
+/* =============================================================================
+ * ACL log
+ * ==========================================================================*/
+
+#define ACL_LOG_GROUPING_MAX_TIME_DELTA 60000
+
+/* This structure defines an entry inside the ACL log. */
+typedef struct ACLLogEntry {
+ uint64_t count; /* Number of times this happened recently. */
+ int reason; /* Reason for denying the command. ACL_DENIED_*. */
+ int context; /* Toplevel, Lua or MULTI/EXEC? ACL_LOG_CTX_*. */
+ sds object; /* The key name or command name. */
+ sds username; /* User the client is authenticated with. */
+ mstime_t ctime; /* Milliseconds time of last update to this entry. */
+ sds cinfo; /* Client info (last client if updated). */
+ long long entry_id; /* The pair (entry_id, timestamp_created) is a unique identifier of this entry
+ * in case the node dies and is restarted, it can detect that if it's a new series. */
+ mstime_t timestamp_created; /* UNIX time in milliseconds at the time of this entry's creation. */
+} ACLLogEntry;
+
+/* This function will check if ACL entries 'a' and 'b' are similar enough
+ * that we should actually update the existing entry in our ACL log instead
+ * of creating a new one. */
+int ACLLogMatchEntry(ACLLogEntry *a, ACLLogEntry *b) {
+ if (a->reason != b->reason) return 0;
+ if (a->context != b->context) return 0;
+ mstime_t delta = a->ctime - b->ctime;
+ if (delta < 0) delta = -delta;
+ if (delta > ACL_LOG_GROUPING_MAX_TIME_DELTA) return 0;
+ if (sdscmp(a->object,b->object) != 0) return 0;
+ if (sdscmp(a->username,b->username) != 0) return 0;
+ return 1;
+}
+
+/* Release an ACL log entry. */
+void ACLFreeLogEntry(void *leptr) {
+ ACLLogEntry *le = leptr;
+ sdsfree(le->object);
+ sdsfree(le->username);
+ sdsfree(le->cinfo);
+ zfree(le);
+}
+
+/* Update the relevant counter by the reason */
+void ACLUpdateInfoMetrics(int reason){
+ if (reason == ACL_DENIED_AUTH) {
+ server.acl_info.user_auth_failures++;
+ } else if (reason == ACL_DENIED_CMD) {
+ server.acl_info.invalid_cmd_accesses++;
+ } else if (reason == ACL_DENIED_KEY) {
+ server.acl_info.invalid_key_accesses++;
+ } else if (reason == ACL_DENIED_CHANNEL) {
+ server.acl_info.invalid_channel_accesses++;
+ } else {
+ serverPanic("Unknown ACL_DENIED encoding");
+ }
+}
+
+/* Adds a new entry in the ACL log, making sure to delete the old entry
+ * if we reach the maximum length allowed for the log. This function attempts
+ * to find similar entries in the current log in order to bump the counter of
+ * the log entry instead of creating many entries for very similar ACL
+ * rules issues.
+ *
+ * The argpos argument is used when the reason is ACL_DENIED_KEY or
+ * ACL_DENIED_CHANNEL, since it allows the function to log the key or channel
+ * name that caused the problem.
+ *
+ * The last 2 arguments are a manual override to be used, instead of any of the automatic
+ * ones which depend on the client and reason arguments (use NULL for default).
+ *
+ * If `object` is not NULL, this functions takes over it.
+ */
+void addACLLogEntry(client *c, int reason, int context, int argpos, sds username, sds object) {
+ /* Update ACL info metrics */
+ ACLUpdateInfoMetrics(reason);
+
+ /* Create a new entry. */
+ struct ACLLogEntry *le = zmalloc(sizeof(*le));
+ le->count = 1;
+ le->reason = reason;
+ le->username = sdsdup(username ? username : c->user->name);
+ le->ctime = commandTimeSnapshot();
+ le->entry_id = ACLLogEntryCount;
+ le->timestamp_created = le->ctime;
+
+ if (object) {
+ le->object = object;
+ } else {
+ switch(reason) {
+ case ACL_DENIED_CMD: le->object = sdsdup(c->cmd->fullname); break;
+ case ACL_DENIED_KEY: le->object = sdsdup(c->argv[argpos]->ptr); break;
+ case ACL_DENIED_CHANNEL: le->object = sdsdup(c->argv[argpos]->ptr); break;
+ case ACL_DENIED_AUTH: le->object = sdsdup(c->argv[0]->ptr); break;
+ default: le->object = sdsempty();
+ }
+ }
+
+ /* if we have a real client from the network, use it (could be missing on module timers) */
+ client *realclient = server.current_client? server.current_client : c;
+
+ le->cinfo = catClientInfoString(sdsempty(),realclient);
+ le->context = context;
+
+ /* Try to match this entry with past ones, to see if we can just
+ * update an existing entry instead of creating a new one. */
+ long toscan = 10; /* Do a limited work trying to find duplicated. */
+ listIter li;
+ listNode *ln;
+ listRewind(ACLLog,&li);
+ ACLLogEntry *match = NULL;
+ while (toscan-- && (ln = listNext(&li)) != NULL) {
+ ACLLogEntry *current = listNodeValue(ln);
+ if (ACLLogMatchEntry(current,le)) {
+ match = current;
+ listDelNode(ACLLog,ln);
+ listAddNodeHead(ACLLog,current);
+ break;
+ }
+ }
+
+ /* If there is a match update the entry, otherwise add it as a
+ * new one. */
+ if (match) {
+ /* We update a few fields of the existing entry and bump the
+ * counter of events for this entry. */
+ sdsfree(match->cinfo);
+ match->cinfo = le->cinfo;
+ match->ctime = le->ctime;
+ match->count++;
+
+ /* Release the old entry. */
+ le->cinfo = NULL;
+ ACLFreeLogEntry(le);
+ } else {
+ /* Add it to our list of entries. We'll have to trim the list
+ * to its maximum size. */
+ ACLLogEntryCount++; /* Incrementing the entry_id count to make each record in the log unique. */
+ listAddNodeHead(ACLLog, le);
+ while(listLength(ACLLog) > server.acllog_max_len) {
+ listNode *ln = listLast(ACLLog);
+ ACLLogEntry *le = listNodeValue(ln);
+ ACLFreeLogEntry(le);
+ listDelNode(ACLLog,ln);
+ }
+ }
+}
+
+sds getAclErrorMessage(int acl_res, user *user, struct redisCommand *cmd, sds errored_val, int verbose) {
+ switch (acl_res) {
+ case ACL_DENIED_CMD:
+ return sdscatfmt(sdsempty(), "User %S has no permissions to run "
+ "the '%S' command", user->name, cmd->fullname);
+ case ACL_DENIED_KEY:
+ if (verbose) {
+ return sdscatfmt(sdsempty(), "User %S has no permissions to access "
+ "the '%S' key", user->name, errored_val);
+ } else {
+ return sdsnew("No permissions to access a key");
+ }
+ case ACL_DENIED_CHANNEL:
+ if (verbose) {
+ return sdscatfmt(sdsempty(), "User %S has no permissions to access "
+ "the '%S' channel", user->name, errored_val);
+ } else {
+ return sdsnew("No permissions to access a channel");
+ }
+ }
+ serverPanic("Reached deadcode on getAclErrorMessage");
+}
+
+/* =============================================================================
+ * ACL related commands
+ * ==========================================================================*/
+
+/* ACL CAT category */
+void aclCatWithFlags(client *c, dict *commands, uint64_t cflag, int *arraylen) {
+ dictEntry *de;
+ dictIterator *di = dictGetIterator(commands);
+
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *cmd = dictGetVal(de);
+ if (cmd->flags & CMD_MODULE) continue;
+ if (cmd->acl_categories & cflag) {
+ addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
+ (*arraylen)++;
+ }
+
+ if (cmd->subcommands_dict) {
+ aclCatWithFlags(c, cmd->subcommands_dict, cflag, arraylen);
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* Add the formatted response from a single selector to the ACL GETUSER
+ * response. This function returns the number of fields added.
+ *
+ * Setting verbose to 1 means that the full qualifier for key and channel
+ * permissions are shown.
+ */
+int aclAddReplySelectorDescription(client *c, aclSelector *s) {
+ listIter li;
+ listNode *ln;
+
+ /* Commands */
+ addReplyBulkCString(c,"commands");
+ sds cmddescr = ACLDescribeSelectorCommandRules(s);
+ addReplyBulkSds(c,cmddescr);
+
+ /* Key patterns */
+ addReplyBulkCString(c,"keys");
+ if (s->flags & SELECTOR_FLAG_ALLKEYS) {
+ addReplyBulkCBuffer(c,"~*",2);
+ } else {
+ sds dsl = sdsempty();
+ listRewind(s->patterns,&li);
+ while((ln = listNext(&li))) {
+ keyPattern *thispat = (keyPattern *) listNodeValue(ln);
+ if (ln != listFirst(s->patterns)) dsl = sdscat(dsl, " ");
+ dsl = sdsCatPatternString(dsl, thispat);
+ }
+ addReplyBulkSds(c, dsl);
+ }
+
+ /* Pub/sub patterns */
+ addReplyBulkCString(c,"channels");
+ if (s->flags & SELECTOR_FLAG_ALLCHANNELS) {
+ addReplyBulkCBuffer(c,"&*",2);
+ } else {
+ sds dsl = sdsempty();
+ listRewind(s->channels,&li);
+ while((ln = listNext(&li))) {
+ sds thispat = listNodeValue(ln);
+ if (ln != listFirst(s->channels)) dsl = sdscat(dsl, " ");
+ dsl = sdscatfmt(dsl, "&%S", thispat);
+ }
+ addReplyBulkSds(c, dsl);
+ }
+ return 3;
+}
+
+/* ACL -- show and modify the configuration of ACL users.
+ * ACL HELP
+ * ACL LOAD
+ * ACL SAVE
+ * ACL LIST
+ * ACL USERS
+ * ACL CAT [<category>]
+ * ACL SETUSER <username> ... acl rules ...
+ * ACL DELUSER <username> [...]
+ * ACL GETUSER <username>
+ * ACL GENPASS [<bits>]
+ * ACL WHOAMI
+ * ACL LOG [<count> | RESET]
+ */
+void aclCommand(client *c) {
+ char *sub = c->argv[1]->ptr;
+ if (!strcasecmp(sub,"setuser") && c->argc >= 3) {
+ /* Initially redact all of the arguments to not leak any information
+ * about the user. */
+ for (int j = 2; j < c->argc; j++) {
+ redactClientCommandArgument(c, j);
+ }
+
+ sds username = c->argv[2]->ptr;
+ /* Check username validity. */
+ if (ACLStringHasSpaces(username,sdslen(username))) {
+ addReplyErrorFormat(c,
+ "Usernames can't contain spaces or null characters");
+ return;
+ }
+
+ user *u = ACLGetUserByName(username,sdslen(username));
+
+ sds *temp_argv = zmalloc(c->argc * sizeof(sds));
+ for (int i = 3; i < c->argc; i++) temp_argv[i-3] = c->argv[i]->ptr;
+
+ sds error = ACLStringSetUser(u, username, temp_argv, c->argc - 3);
+ zfree(temp_argv);
+ if (error == NULL) {
+ addReply(c,shared.ok);
+ } else {
+ addReplyErrorSdsSafe(c, error);
+ }
+ return;
+ } else if (!strcasecmp(sub,"deluser") && c->argc >= 3) {
+ int deleted = 0;
+ for (int j = 2; j < c->argc; j++) {
+ sds username = c->argv[j]->ptr;
+ if (!strcmp(username,"default")) {
+ addReplyError(c,"The 'default' user cannot be removed");
+ return;
+ }
+ }
+
+ for (int j = 2; j < c->argc; j++) {
+ sds username = c->argv[j]->ptr;
+ user *u;
+ if (raxRemove(Users,(unsigned char*)username,
+ sdslen(username),
+ (void**)&u))
+ {
+ ACLFreeUserAndKillClients(u);
+ deleted++;
+ }
+ }
+ addReplyLongLong(c,deleted);
+ } else if (!strcasecmp(sub,"getuser") && c->argc == 3) {
+ user *u = ACLGetUserByName(c->argv[2]->ptr,sdslen(c->argv[2]->ptr));
+ if (u == NULL) {
+ addReplyNull(c);
+ return;
+ }
+
+ void *ufields = addReplyDeferredLen(c);
+ int fields = 3;
+
+ /* Flags */
+ addReplyBulkCString(c,"flags");
+ void *deflen = addReplyDeferredLen(c);
+ int numflags = 0;
+ for (int j = 0; ACLUserFlags[j].flag; j++) {
+ if (u->flags & ACLUserFlags[j].flag) {
+ addReplyBulkCString(c,ACLUserFlags[j].name);
+ numflags++;
+ }
+ }
+ setDeferredSetLen(c,deflen,numflags);
+
+ /* Passwords */
+ addReplyBulkCString(c,"passwords");
+ addReplyArrayLen(c,listLength(u->passwords));
+ listIter li;
+ listNode *ln;
+ listRewind(u->passwords,&li);
+ while((ln = listNext(&li))) {
+ sds thispass = listNodeValue(ln);
+ addReplyBulkCBuffer(c,thispass,sdslen(thispass));
+ }
+ /* Include the root selector at the top level for backwards compatibility */
+ fields += aclAddReplySelectorDescription(c, ACLUserGetRootSelector(u));
+
+ /* Describe all of the selectors on this user, including duplicating the root selector */
+ addReplyBulkCString(c,"selectors");
+ addReplyArrayLen(c, listLength(u->selectors) - 1);
+ listRewind(u->selectors,&li);
+ serverAssert(listNext(&li));
+ while((ln = listNext(&li))) {
+ void *slen = addReplyDeferredLen(c);
+ int sfields = aclAddReplySelectorDescription(c, (aclSelector *)listNodeValue(ln));
+ setDeferredMapLen(c, slen, sfields);
+ }
+ setDeferredMapLen(c, ufields, fields);
+ } else if ((!strcasecmp(sub,"list") || !strcasecmp(sub,"users")) &&
+ c->argc == 2)
+ {
+ int justnames = !strcasecmp(sub,"users");
+ addReplyArrayLen(c,raxSize(Users));
+ raxIterator ri;
+ raxStart(&ri,Users);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ user *u = ri.data;
+ if (justnames) {
+ addReplyBulkCBuffer(c,u->name,sdslen(u->name));
+ } else {
+ /* Return information in the configuration file format. */
+ sds config = sdsnew("user ");
+ config = sdscatsds(config,u->name);
+ config = sdscatlen(config," ",1);
+ robj *descr = ACLDescribeUser(u);
+ config = sdscatsds(config,descr->ptr);
+ decrRefCount(descr);
+ addReplyBulkSds(c,config);
+ }
+ }
+ raxStop(&ri);
+ } else if (!strcasecmp(sub,"whoami") && c->argc == 2) {
+ if (c->user != NULL) {
+ addReplyBulkCBuffer(c,c->user->name,sdslen(c->user->name));
+ } else {
+ addReplyNull(c);
+ }
+ } else if (server.acl_filename[0] == '\0' &&
+ (!strcasecmp(sub,"load") || !strcasecmp(sub,"save")))
+ {
+ addReplyError(c,"This Redis instance is not configured to use an ACL file. You may want to specify users via the ACL SETUSER command and then issue a CONFIG REWRITE (assuming you have a Redis configuration file set) in order to store users in the Redis configuration.");
+ return;
+ } else if (!strcasecmp(sub,"load") && c->argc == 2) {
+ sds errors = ACLLoadFromFile(server.acl_filename);
+ if (errors == NULL) {
+ addReply(c,shared.ok);
+ } else {
+ addReplyError(c,errors);
+ sdsfree(errors);
+ }
+ } else if (!strcasecmp(sub,"save") && c->argc == 2) {
+ if (ACLSaveToFile(server.acl_filename) == C_OK) {
+ addReply(c,shared.ok);
+ } else {
+ addReplyError(c,"There was an error trying to save the ACLs. "
+ "Please check the server logs for more "
+ "information");
+ }
+ } else if (!strcasecmp(sub,"cat") && c->argc == 2) {
+ void *dl = addReplyDeferredLen(c);
+ int j;
+ for (j = 0; ACLCommandCategories[j].flag != 0; j++)
+ addReplyBulkCString(c,ACLCommandCategories[j].name);
+ setDeferredArrayLen(c,dl,j);
+ } else if (!strcasecmp(sub,"cat") && c->argc == 3) {
+ uint64_t cflag = ACLGetCommandCategoryFlagByName(c->argv[2]->ptr);
+ if (cflag == 0) {
+ addReplyErrorFormat(c, "Unknown category '%.128s'", (char*)c->argv[2]->ptr);
+ return;
+ }
+ int arraylen = 0;
+ void *dl = addReplyDeferredLen(c);
+ aclCatWithFlags(c, server.orig_commands, cflag, &arraylen);
+ setDeferredArrayLen(c,dl,arraylen);
+ } else if (!strcasecmp(sub,"genpass") && (c->argc == 2 || c->argc == 3)) {
+ #define GENPASS_MAX_BITS 4096
+ char pass[GENPASS_MAX_BITS/8*2]; /* Hex representation. */
+ long bits = 256; /* By default generate 256 bits passwords. */
+
+ if (c->argc == 3 && getLongFromObjectOrReply(c,c->argv[2],&bits,NULL)
+ != C_OK) return;
+
+ if (bits <= 0 || bits > GENPASS_MAX_BITS) {
+ addReplyErrorFormat(c,
+ "ACL GENPASS argument must be the number of "
+ "bits for the output password, a positive number "
+ "up to %d",GENPASS_MAX_BITS);
+ return;
+ }
+
+ long chars = (bits+3)/4; /* Round to number of characters to emit. */
+ getRandomHexChars(pass,chars);
+ addReplyBulkCBuffer(c,pass,chars);
+ } else if (!strcasecmp(sub,"log") && (c->argc == 2 || c->argc ==3)) {
+ long count = 10; /* Number of entries to emit by default. */
+
+ /* Parse the only argument that LOG may have: it could be either
+ * the number of entries the user wants to display, or alternatively
+ * the "RESET" command in order to flush the old entries. */
+ if (c->argc == 3) {
+ if (!strcasecmp(c->argv[2]->ptr,"reset")) {
+ listSetFreeMethod(ACLLog,ACLFreeLogEntry);
+ listEmpty(ACLLog);
+ listSetFreeMethod(ACLLog,NULL);
+ addReply(c,shared.ok);
+ return;
+ } else if (getLongFromObjectOrReply(c,c->argv[2],&count,NULL)
+ != C_OK)
+ {
+ return;
+ }
+ if (count < 0) count = 0;
+ }
+
+ /* Fix the count according to the number of entries we got. */
+ if ((size_t)count > listLength(ACLLog))
+ count = listLength(ACLLog);
+
+ addReplyArrayLen(c,count);
+ listIter li;
+ listNode *ln;
+ listRewind(ACLLog,&li);
+ mstime_t now = commandTimeSnapshot();
+ while (count-- && (ln = listNext(&li)) != NULL) {
+ ACLLogEntry *le = listNodeValue(ln);
+ addReplyMapLen(c,10);
+ addReplyBulkCString(c,"count");
+ addReplyLongLong(c,le->count);
+
+ addReplyBulkCString(c,"reason");
+ char *reasonstr;
+ switch(le->reason) {
+ case ACL_DENIED_CMD: reasonstr="command"; break;
+ case ACL_DENIED_KEY: reasonstr="key"; break;
+ case ACL_DENIED_CHANNEL: reasonstr="channel"; break;
+ case ACL_DENIED_AUTH: reasonstr="auth"; break;
+ default: reasonstr="unknown";
+ }
+ addReplyBulkCString(c,reasonstr);
+
+ addReplyBulkCString(c,"context");
+ char *ctxstr;
+ switch(le->context) {
+ case ACL_LOG_CTX_TOPLEVEL: ctxstr="toplevel"; break;
+ case ACL_LOG_CTX_MULTI: ctxstr="multi"; break;
+ case ACL_LOG_CTX_LUA: ctxstr="lua"; break;
+ case ACL_LOG_CTX_MODULE: ctxstr="module"; break;
+ default: ctxstr="unknown";
+ }
+ addReplyBulkCString(c,ctxstr);
+
+ addReplyBulkCString(c,"object");
+ addReplyBulkCBuffer(c,le->object,sdslen(le->object));
+ addReplyBulkCString(c,"username");
+ addReplyBulkCBuffer(c,le->username,sdslen(le->username));
+ addReplyBulkCString(c,"age-seconds");
+ double age = (double)(now - le->ctime)/1000;
+ addReplyDouble(c,age);
+ addReplyBulkCString(c,"client-info");
+ addReplyBulkCBuffer(c,le->cinfo,sdslen(le->cinfo));
+ addReplyBulkCString(c, "entry-id");
+ addReplyLongLong(c, le->entry_id);
+ addReplyBulkCString(c, "timestamp-created");
+ addReplyLongLong(c, le->timestamp_created);
+ addReplyBulkCString(c, "timestamp-last-updated");
+ addReplyLongLong(c, le->ctime);
+ }
+ } else if (!strcasecmp(sub,"dryrun") && c->argc >= 4) {
+ struct redisCommand *cmd;
+ user *u = ACLGetUserByName(c->argv[2]->ptr,sdslen(c->argv[2]->ptr));
+ if (u == NULL) {
+ addReplyErrorFormat(c, "User '%s' not found", (char *)c->argv[2]->ptr);
+ return;
+ }
+
+ if ((cmd = lookupCommand(c->argv + 3, c->argc - 3)) == NULL) {
+ addReplyErrorFormat(c, "Command '%s' not found", (char *)c->argv[3]->ptr);
+ return;
+ }
+
+ if ((cmd->arity > 0 && cmd->arity != c->argc-3) ||
+ (c->argc-3 < -cmd->arity))
+ {
+ addReplyErrorFormat(c,"wrong number of arguments for '%s' command", cmd->fullname);
+ return;
+ }
+
+ int idx;
+ int result = ACLCheckAllUserCommandPerm(u, cmd, c->argv + 3, c->argc - 3, &idx);
+ if (result != ACL_OK) {
+ sds err = getAclErrorMessage(result, u, cmd, c->argv[idx+3]->ptr, 1);
+ addReplyBulkSds(c, err);
+ return;
+ }
+
+ addReply(c,shared.ok);
+ } else if (c->argc == 2 && !strcasecmp(sub,"help")) {
+ const char *help[] = {
+"CAT [<category>]",
+" List all commands that belong to <category>, or all command categories",
+" when no category is specified.",
+"DELUSER <username> [<username> ...]",
+" Delete a list of users.",
+"DRYRUN <username> <command> [<arg> ...]",
+" Returns whether the user can execute the given command without executing the command.",
+"GETUSER <username>",
+" Get the user's details.",
+"GENPASS [<bits>]",
+" Generate a secure 256-bit user password. The optional `bits` argument can",
+" be used to specify a different size.",
+"LIST",
+" Show users details in config file format.",
+"LOAD",
+" Reload users from the ACL file.",
+"LOG [<count> | RESET]",
+" Show the ACL log entries.",
+"SAVE",
+" Save the current config to the ACL file.",
+"SETUSER <username> <attribute> [<attribute> ...]",
+" Create or modify a user with the specified attributes.",
+"USERS",
+" List all the registered usernames.",
+"WHOAMI",
+" Return the current connection username.",
+NULL
+ };
+ addReplyHelp(c,help);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
+
+void addReplyCommandCategories(client *c, struct redisCommand *cmd) {
+ int flagcount = 0;
+ void *flaglen = addReplyDeferredLen(c);
+ for (int j = 0; ACLCommandCategories[j].flag != 0; j++) {
+ if (cmd->acl_categories & ACLCommandCategories[j].flag) {
+ addReplyStatusFormat(c, "@%s", ACLCommandCategories[j].name);
+ flagcount++;
+ }
+ }
+ setDeferredSetLen(c, flaglen, flagcount);
+}
+
+/* AUTH <password>
+ * AUTH <username> <password> (Redis >= 6.0 form)
+ *
+ * When the user is omitted it means that we are trying to authenticate
+ * against the default user. */
+void authCommand(client *c) {
+ /* Only two or three argument forms are allowed. */
+ if (c->argc > 3) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ /* Always redact the second argument */
+ redactClientCommandArgument(c, 1);
+
+ /* Handle the two different forms here. The form with two arguments
+ * will just use "default" as username. */
+ robj *username, *password;
+ if (c->argc == 2) {
+ /* Mimic the old behavior of giving an error for the two argument
+ * form if no password is configured. */
+ if (DefaultUser->flags & USER_FLAG_NOPASS) {
+ addReplyError(c,"AUTH <password> called without any password "
+ "configured for the default user. Are you sure "
+ "your configuration is correct?");
+ return;
+ }
+
+ username = shared.default_username;
+ password = c->argv[1];
+ } else {
+ username = c->argv[1];
+ password = c->argv[2];
+ redactClientCommandArgument(c, 2);
+ }
+
+ robj *err = NULL;
+ int result = ACLAuthenticateUser(c, username, password, &err);
+ if (result == AUTH_OK) {
+ addReply(c, shared.ok);
+ } else if (result == AUTH_ERR) {
+ addAuthErrReply(c, err);
+ }
+ if (err) decrRefCount(err);
+}
+
+/* Set the password for the "default" ACL user. This implements supports for
+ * requirepass config, so passing in NULL will set the user to be nopass. */
+void ACLUpdateDefaultUserPassword(sds password) {
+ ACLSetUser(DefaultUser,"resetpass",-1);
+ if (password) {
+ sds aclop = sdscatlen(sdsnew(">"), password, sdslen(password));
+ ACLSetUser(DefaultUser,aclop,sdslen(aclop));
+ sdsfree(aclop);
+ } else {
+ ACLSetUser(DefaultUser,"nopass",-1);
+ }
+}
diff --git a/src/adlist.c b/src/adlist.c
new file mode 100644
index 0000000..f031c46
--- /dev/null
+++ b/src/adlist.c
@@ -0,0 +1,417 @@
+/* adlist.c - A generic doubly linked list implementation
+ *
+ * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <stdlib.h>
+#include "adlist.h"
+#include "zmalloc.h"
+
+/* Create a new list. The created list can be freed with
+ * listRelease(), but private value of every node need to be freed
+ * by the user before to call listRelease(), or by setting a free method using
+ * listSetFreeMethod.
+ *
+ * On error, NULL is returned. Otherwise the pointer to the new list. */
+list *listCreate(void)
+{
+ struct list *list;
+
+ if ((list = zmalloc(sizeof(*list))) == NULL)
+ return NULL;
+ list->head = list->tail = NULL;
+ list->len = 0;
+ list->dup = NULL;
+ list->free = NULL;
+ list->match = NULL;
+ return list;
+}
+
+/* Remove all the elements from the list without destroying the list itself. */
+void listEmpty(list *list)
+{
+ unsigned long len;
+ listNode *current, *next;
+
+ current = list->head;
+ len = list->len;
+ while(len--) {
+ next = current->next;
+ if (list->free) list->free(current->value);
+ zfree(current);
+ current = next;
+ }
+ list->head = list->tail = NULL;
+ list->len = 0;
+}
+
+/* Free the whole list.
+ *
+ * This function can't fail. */
+void listRelease(list *list)
+{
+ listEmpty(list);
+ zfree(list);
+}
+
+/* Add a new node to the list, to head, containing the specified 'value'
+ * pointer as value.
+ *
+ * On error, NULL is returned and no operation is performed (i.e. the
+ * list remains unaltered).
+ * On success the 'list' pointer you pass to the function is returned. */
+list *listAddNodeHead(list *list, void *value)
+{
+ listNode *node;
+
+ if ((node = zmalloc(sizeof(*node))) == NULL)
+ return NULL;
+ node->value = value;
+ listLinkNodeHead(list, node);
+ return list;
+}
+
+/*
+ * Add a node that has already been allocated to the head of list
+ */
+void listLinkNodeHead(list* list, listNode *node) {
+ if (list->len == 0) {
+ list->head = list->tail = node;
+ node->prev = node->next = NULL;
+ } else {
+ node->prev = NULL;
+ node->next = list->head;
+ list->head->prev = node;
+ list->head = node;
+ }
+ list->len++;
+}
+
+/* Add a new node to the list, to tail, containing the specified 'value'
+ * pointer as value.
+ *
+ * On error, NULL is returned and no operation is performed (i.e. the
+ * list remains unaltered).
+ * On success the 'list' pointer you pass to the function is returned. */
+list *listAddNodeTail(list *list, void *value)
+{
+ listNode *node;
+
+ if ((node = zmalloc(sizeof(*node))) == NULL)
+ return NULL;
+ node->value = value;
+ listLinkNodeTail(list, node);
+ return list;
+}
+
+/*
+ * Add a node that has already been allocated to the tail of list
+ */
+void listLinkNodeTail(list *list, listNode *node) {
+ if (list->len == 0) {
+ list->head = list->tail = node;
+ node->prev = node->next = NULL;
+ } else {
+ node->prev = list->tail;
+ node->next = NULL;
+ list->tail->next = node;
+ list->tail = node;
+ }
+ list->len++;
+}
+
+list *listInsertNode(list *list, listNode *old_node, void *value, int after) {
+ listNode *node;
+
+ if ((node = zmalloc(sizeof(*node))) == NULL)
+ return NULL;
+ node->value = value;
+ if (after) {
+ node->prev = old_node;
+ node->next = old_node->next;
+ if (list->tail == old_node) {
+ list->tail = node;
+ }
+ } else {
+ node->next = old_node;
+ node->prev = old_node->prev;
+ if (list->head == old_node) {
+ list->head = node;
+ }
+ }
+ if (node->prev != NULL) {
+ node->prev->next = node;
+ }
+ if (node->next != NULL) {
+ node->next->prev = node;
+ }
+ list->len++;
+ return list;
+}
+
+/* Remove the specified node from the specified list.
+ * The node is freed. If free callback is provided the value is freed as well.
+ *
+ * This function can't fail. */
+void listDelNode(list *list, listNode *node)
+{
+ listUnlinkNode(list, node);
+ if (list->free) list->free(node->value);
+ zfree(node);
+}
+
+/*
+ * Remove the specified node from the list without freeing it.
+ */
+void listUnlinkNode(list *list, listNode *node) {
+ if (node->prev)
+ node->prev->next = node->next;
+ else
+ list->head = node->next;
+ if (node->next)
+ node->next->prev = node->prev;
+ else
+ list->tail = node->prev;
+
+ node->next = NULL;
+ node->prev = NULL;
+
+ list->len--;
+}
+
+/* Returns a list iterator 'iter'. After the initialization every
+ * call to listNext() will return the next element of the list.
+ *
+ * This function can't fail. */
+listIter *listGetIterator(list *list, int direction)
+{
+ listIter *iter;
+
+ if ((iter = zmalloc(sizeof(*iter))) == NULL) return NULL;
+ if (direction == AL_START_HEAD)
+ iter->next = list->head;
+ else
+ iter->next = list->tail;
+ iter->direction = direction;
+ return iter;
+}
+
+/* Release the iterator memory */
+void listReleaseIterator(listIter *iter) {
+ zfree(iter);
+}
+
+/* Create an iterator in the list private iterator structure */
+void listRewind(list *list, listIter *li) {
+ li->next = list->head;
+ li->direction = AL_START_HEAD;
+}
+
+void listRewindTail(list *list, listIter *li) {
+ li->next = list->tail;
+ li->direction = AL_START_TAIL;
+}
+
+/* Return the next element of an iterator.
+ * It's valid to remove the currently returned element using
+ * listDelNode(), but not to remove other elements.
+ *
+ * The function returns a pointer to the next element of the list,
+ * or NULL if there are no more elements, so the classical usage
+ * pattern is:
+ *
+ * iter = listGetIterator(list,<direction>);
+ * while ((node = listNext(iter)) != NULL) {
+ * doSomethingWith(listNodeValue(node));
+ * }
+ *
+ * */
+listNode *listNext(listIter *iter)
+{
+ listNode *current = iter->next;
+
+ if (current != NULL) {
+ if (iter->direction == AL_START_HEAD)
+ iter->next = current->next;
+ else
+ iter->next = current->prev;
+ }
+ return current;
+}
+
+/* Duplicate the whole list. On out of memory NULL is returned.
+ * On success a copy of the original list is returned.
+ *
+ * The 'Dup' method set with listSetDupMethod() function is used
+ * to copy the node value. Otherwise the same pointer value of
+ * the original node is used as value of the copied node.
+ *
+ * The original list both on success or error is never modified. */
+list *listDup(list *orig)
+{
+ list *copy;
+ listIter iter;
+ listNode *node;
+
+ if ((copy = listCreate()) == NULL)
+ return NULL;
+ copy->dup = orig->dup;
+ copy->free = orig->free;
+ copy->match = orig->match;
+ listRewind(orig, &iter);
+ while((node = listNext(&iter)) != NULL) {
+ void *value;
+
+ if (copy->dup) {
+ value = copy->dup(node->value);
+ if (value == NULL) {
+ listRelease(copy);
+ return NULL;
+ }
+ } else {
+ value = node->value;
+ }
+
+ if (listAddNodeTail(copy, value) == NULL) {
+ /* Free value if dup succeed but listAddNodeTail failed. */
+ if (copy->free) copy->free(value);
+
+ listRelease(copy);
+ return NULL;
+ }
+ }
+ return copy;
+}
+
+/* Search the list for a node matching a given key.
+ * The match is performed using the 'match' method
+ * set with listSetMatchMethod(). If no 'match' method
+ * is set, the 'value' pointer of every node is directly
+ * compared with the 'key' pointer.
+ *
+ * On success the first matching node pointer is returned
+ * (search starts from head). If no matching node exists
+ * NULL is returned. */
+listNode *listSearchKey(list *list, void *key)
+{
+ listIter iter;
+ listNode *node;
+
+ listRewind(list, &iter);
+ while((node = listNext(&iter)) != NULL) {
+ if (list->match) {
+ if (list->match(node->value, key)) {
+ return node;
+ }
+ } else {
+ if (key == node->value) {
+ return node;
+ }
+ }
+ }
+ return NULL;
+}
+
+/* Return the element at the specified zero-based index
+ * where 0 is the head, 1 is the element next to head
+ * and so on. Negative integers are used in order to count
+ * from the tail, -1 is the last element, -2 the penultimate
+ * and so on. If the index is out of range NULL is returned. */
+listNode *listIndex(list *list, long index) {
+ listNode *n;
+
+ if (index < 0) {
+ index = (-index)-1;
+ n = list->tail;
+ while(index-- && n) n = n->prev;
+ } else {
+ n = list->head;
+ while(index-- && n) n = n->next;
+ }
+ return n;
+}
+
+/* Rotate the list removing the tail node and inserting it to the head. */
+void listRotateTailToHead(list *list) {
+ if (listLength(list) <= 1) return;
+
+ /* Detach current tail */
+ listNode *tail = list->tail;
+ list->tail = tail->prev;
+ list->tail->next = NULL;
+ /* Move it as head */
+ list->head->prev = tail;
+ tail->prev = NULL;
+ tail->next = list->head;
+ list->head = tail;
+}
+
+/* Rotate the list removing the head node and inserting it to the tail. */
+void listRotateHeadToTail(list *list) {
+ if (listLength(list) <= 1) return;
+
+ listNode *head = list->head;
+ /* Detach current head */
+ list->head = head->next;
+ list->head->prev = NULL;
+ /* Move it as tail */
+ list->tail->next = head;
+ head->next = NULL;
+ head->prev = list->tail;
+ list->tail = head;
+}
+
+/* Add all the elements of the list 'o' at the end of the
+ * list 'l'. The list 'other' remains empty but otherwise valid. */
+void listJoin(list *l, list *o) {
+ if (o->len == 0) return;
+
+ o->head->prev = l->tail;
+
+ if (l->tail)
+ l->tail->next = o->head;
+ else
+ l->head = o->head;
+
+ l->tail = o->tail;
+ l->len += o->len;
+
+ /* Setup other as an empty list. */
+ o->head = o->tail = NULL;
+ o->len = 0;
+}
+
+/* Initializes the node's value and sets its pointers
+ * so that it is initially not a member of any list.
+ */
+void listInitNode(listNode *node, void *value) {
+ node->prev = NULL;
+ node->next = NULL;
+ node->value = value;
+}
diff --git a/src/adlist.h b/src/adlist.h
new file mode 100644
index 0000000..7c54437
--- /dev/null
+++ b/src/adlist.h
@@ -0,0 +1,100 @@
+/* adlist.h - A generic doubly linked list implementation
+ *
+ * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ADLIST_H__
+#define __ADLIST_H__
+
+/* Node, List, and Iterator are the only data structures used currently. */
+
+typedef struct listNode {
+ struct listNode *prev;
+ struct listNode *next;
+ void *value;
+} listNode;
+
+typedef struct listIter {
+ listNode *next;
+ int direction;
+} listIter;
+
+typedef struct list {
+ listNode *head;
+ listNode *tail;
+ void *(*dup)(void *ptr);
+ void (*free)(void *ptr);
+ int (*match)(void *ptr, void *key);
+ unsigned long len;
+} list;
+
+/* Functions implemented as macros */
+#define listLength(l) ((l)->len)
+#define listFirst(l) ((l)->head)
+#define listLast(l) ((l)->tail)
+#define listPrevNode(n) ((n)->prev)
+#define listNextNode(n) ((n)->next)
+#define listNodeValue(n) ((n)->value)
+
+#define listSetDupMethod(l,m) ((l)->dup = (m))
+#define listSetFreeMethod(l,m) ((l)->free = (m))
+#define listSetMatchMethod(l,m) ((l)->match = (m))
+
+#define listGetDupMethod(l) ((l)->dup)
+#define listGetFreeMethod(l) ((l)->free)
+#define listGetMatchMethod(l) ((l)->match)
+
+/* Prototypes */
+list *listCreate(void);
+void listRelease(list *list);
+void listEmpty(list *list);
+list *listAddNodeHead(list *list, void *value);
+list *listAddNodeTail(list *list, void *value);
+list *listInsertNode(list *list, listNode *old_node, void *value, int after);
+void listDelNode(list *list, listNode *node);
+listIter *listGetIterator(list *list, int direction);
+listNode *listNext(listIter *iter);
+void listReleaseIterator(listIter *iter);
+list *listDup(list *orig);
+listNode *listSearchKey(list *list, void *key);
+listNode *listIndex(list *list, long index);
+void listRewind(list *list, listIter *li);
+void listRewindTail(list *list, listIter *li);
+void listRotateTailToHead(list *list);
+void listRotateHeadToTail(list *list);
+void listJoin(list *l, list *o);
+void listInitNode(listNode *node, void *value);
+void listLinkNodeHead(list *list, listNode *node);
+void listLinkNodeTail(list *list, listNode *node);
+void listUnlinkNode(list *list, listNode *node);
+
+/* Directions for iterators */
+#define AL_START_HEAD 0
+#define AL_START_TAIL 1
+
+#endif /* __ADLIST_H__ */
diff --git a/src/ae.c b/src/ae.c
new file mode 100644
index 0000000..ff60630
--- /dev/null
+++ b/src/ae.c
@@ -0,0 +1,512 @@
+/* A simple event-driven programming library. Originally I wrote this code
+ * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated
+ * it in form of a library for easy reuse.
+ *
+ * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ae.h"
+#include "anet.h"
+#include "redisassert.h"
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <poll.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+
+#include "zmalloc.h"
+#include "config.h"
+
+/* Include the best multiplexing layer supported by this system.
+ * The following should be ordered by performances, descending. */
+#ifdef HAVE_EVPORT
+#include "ae_evport.c"
+#else
+ #ifdef HAVE_EPOLL
+ #include "ae_epoll.c"
+ #else
+ #ifdef HAVE_KQUEUE
+ #include "ae_kqueue.c"
+ #else
+ #include "ae_select.c"
+ #endif
+ #endif
+#endif
+
+
+aeEventLoop *aeCreateEventLoop(int setsize) {
+ aeEventLoop *eventLoop;
+ int i;
+
+ monotonicInit(); /* just in case the calling app didn't initialize */
+
+ if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err;
+ eventLoop->events = zmalloc(sizeof(aeFileEvent)*setsize);
+ eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*setsize);
+ if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err;
+ eventLoop->setsize = setsize;
+ eventLoop->timeEventHead = NULL;
+ eventLoop->timeEventNextId = 0;
+ eventLoop->stop = 0;
+ eventLoop->maxfd = -1;
+ eventLoop->beforesleep = NULL;
+ eventLoop->aftersleep = NULL;
+ eventLoop->flags = 0;
+ if (aeApiCreate(eventLoop) == -1) goto err;
+ /* Events with mask == AE_NONE are not set. So let's initialize the
+ * vector with it. */
+ for (i = 0; i < setsize; i++)
+ eventLoop->events[i].mask = AE_NONE;
+ return eventLoop;
+
+err:
+ if (eventLoop) {
+ zfree(eventLoop->events);
+ zfree(eventLoop->fired);
+ zfree(eventLoop);
+ }
+ return NULL;
+}
+
+/* Return the current set size. */
+int aeGetSetSize(aeEventLoop *eventLoop) {
+ return eventLoop->setsize;
+}
+
+/*
+ * Tell the event processing to change the wait timeout as soon as possible.
+ *
+ * Note: it just means you turn on/off the global AE_DONT_WAIT.
+ */
+void aeSetDontWait(aeEventLoop *eventLoop, int noWait) {
+ if (noWait)
+ eventLoop->flags |= AE_DONT_WAIT;
+ else
+ eventLoop->flags &= ~AE_DONT_WAIT;
+}
+
+/* Resize the maximum set size of the event loop.
+ * If the requested set size is smaller than the current set size, but
+ * there is already a file descriptor in use that is >= the requested
+ * set size minus one, AE_ERR is returned and the operation is not
+ * performed at all.
+ *
+ * Otherwise AE_OK is returned and the operation is successful. */
+int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) {
+ int i;
+
+ if (setsize == eventLoop->setsize) return AE_OK;
+ if (eventLoop->maxfd >= setsize) return AE_ERR;
+ if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR;
+
+ eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize);
+ eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize);
+ eventLoop->setsize = setsize;
+
+ /* Make sure that if we created new slots, they are initialized with
+ * an AE_NONE mask. */
+ for (i = eventLoop->maxfd+1; i < setsize; i++)
+ eventLoop->events[i].mask = AE_NONE;
+ return AE_OK;
+}
+
+void aeDeleteEventLoop(aeEventLoop *eventLoop) {
+ aeApiFree(eventLoop);
+ zfree(eventLoop->events);
+ zfree(eventLoop->fired);
+
+ /* Free the time events list. */
+ aeTimeEvent *next_te, *te = eventLoop->timeEventHead;
+ while (te) {
+ next_te = te->next;
+ zfree(te);
+ te = next_te;
+ }
+ zfree(eventLoop);
+}
+
+void aeStop(aeEventLoop *eventLoop) {
+ eventLoop->stop = 1;
+}
+
+int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
+ aeFileProc *proc, void *clientData)
+{
+ if (fd >= eventLoop->setsize) {
+ errno = ERANGE;
+ return AE_ERR;
+ }
+ aeFileEvent *fe = &eventLoop->events[fd];
+
+ if (aeApiAddEvent(eventLoop, fd, mask) == -1)
+ return AE_ERR;
+ fe->mask |= mask;
+ if (mask & AE_READABLE) fe->rfileProc = proc;
+ if (mask & AE_WRITABLE) fe->wfileProc = proc;
+ fe->clientData = clientData;
+ if (fd > eventLoop->maxfd)
+ eventLoop->maxfd = fd;
+ return AE_OK;
+}
+
+void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask)
+{
+ if (fd >= eventLoop->setsize) return;
+ aeFileEvent *fe = &eventLoop->events[fd];
+ if (fe->mask == AE_NONE) return;
+
+ /* We want to always remove AE_BARRIER if set when AE_WRITABLE
+ * is removed. */
+ if (mask & AE_WRITABLE) mask |= AE_BARRIER;
+
+ aeApiDelEvent(eventLoop, fd, mask);
+ fe->mask = fe->mask & (~mask);
+ if (fd == eventLoop->maxfd && fe->mask == AE_NONE) {
+ /* Update the max fd */
+ int j;
+
+ for (j = eventLoop->maxfd-1; j >= 0; j--)
+ if (eventLoop->events[j].mask != AE_NONE) break;
+ eventLoop->maxfd = j;
+ }
+}
+
+void *aeGetFileClientData(aeEventLoop *eventLoop, int fd) {
+ if (fd >= eventLoop->setsize) return NULL;
+ aeFileEvent *fe = &eventLoop->events[fd];
+ if (fe->mask == AE_NONE) return NULL;
+
+ return fe->clientData;
+}
+
+int aeGetFileEvents(aeEventLoop *eventLoop, int fd) {
+ if (fd >= eventLoop->setsize) return 0;
+ aeFileEvent *fe = &eventLoop->events[fd];
+
+ return fe->mask;
+}
+
+long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
+ aeTimeProc *proc, void *clientData,
+ aeEventFinalizerProc *finalizerProc)
+{
+ long long id = eventLoop->timeEventNextId++;
+ aeTimeEvent *te;
+
+ te = zmalloc(sizeof(*te));
+ if (te == NULL) return AE_ERR;
+ te->id = id;
+ te->when = getMonotonicUs() + milliseconds * 1000;
+ te->timeProc = proc;
+ te->finalizerProc = finalizerProc;
+ te->clientData = clientData;
+ te->prev = NULL;
+ te->next = eventLoop->timeEventHead;
+ te->refcount = 0;
+ if (te->next)
+ te->next->prev = te;
+ eventLoop->timeEventHead = te;
+ return id;
+}
+
+int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id)
+{
+ aeTimeEvent *te = eventLoop->timeEventHead;
+ while(te) {
+ if (te->id == id) {
+ te->id = AE_DELETED_EVENT_ID;
+ return AE_OK;
+ }
+ te = te->next;
+ }
+ return AE_ERR; /* NO event with the specified ID found */
+}
+
+/* How many microseconds until the first timer should fire.
+ * If there are no timers, -1 is returned.
+ *
+ * Note that's O(N) since time events are unsorted.
+ * Possible optimizations (not needed by Redis so far, but...):
+ * 1) Insert the event in order, so that the nearest is just the head.
+ * Much better but still insertion or deletion of timers is O(N).
+ * 2) Use a skiplist to have this operation as O(1) and insertion as O(log(N)).
+ */
+static int64_t usUntilEarliestTimer(aeEventLoop *eventLoop) {
+ aeTimeEvent *te = eventLoop->timeEventHead;
+ if (te == NULL) return -1;
+
+ aeTimeEvent *earliest = NULL;
+ while (te) {
+ if ((!earliest || te->when < earliest->when) && te->id != AE_DELETED_EVENT_ID)
+ earliest = te;
+ te = te->next;
+ }
+
+ monotime now = getMonotonicUs();
+ return (now >= earliest->when) ? 0 : earliest->when - now;
+}
+
+/* Process time events */
+static int processTimeEvents(aeEventLoop *eventLoop) {
+ int processed = 0;
+ aeTimeEvent *te;
+ long long maxId;
+
+ te = eventLoop->timeEventHead;
+ maxId = eventLoop->timeEventNextId-1;
+ monotime now = getMonotonicUs();
+ while(te) {
+ long long id;
+
+ /* Remove events scheduled for deletion. */
+ if (te->id == AE_DELETED_EVENT_ID) {
+ aeTimeEvent *next = te->next;
+ /* If a reference exists for this timer event,
+ * don't free it. This is currently incremented
+ * for recursive timerProc calls */
+ if (te->refcount) {
+ te = next;
+ continue;
+ }
+ if (te->prev)
+ te->prev->next = te->next;
+ else
+ eventLoop->timeEventHead = te->next;
+ if (te->next)
+ te->next->prev = te->prev;
+ if (te->finalizerProc) {
+ te->finalizerProc(eventLoop, te->clientData);
+ now = getMonotonicUs();
+ }
+ zfree(te);
+ te = next;
+ continue;
+ }
+
+ /* Make sure we don't process time events created by time events in
+ * this iteration. Note that this check is currently useless: we always
+ * add new timers on the head, however if we change the implementation
+ * detail, this check may be useful again: we keep it here for future
+ * defense. */
+ if (te->id > maxId) {
+ te = te->next;
+ continue;
+ }
+
+ if (te->when <= now) {
+ int retval;
+
+ id = te->id;
+ te->refcount++;
+ retval = te->timeProc(eventLoop, id, te->clientData);
+ te->refcount--;
+ processed++;
+ now = getMonotonicUs();
+ if (retval != AE_NOMORE) {
+ te->when = now + (monotime)retval * 1000;
+ } else {
+ te->id = AE_DELETED_EVENT_ID;
+ }
+ }
+ te = te->next;
+ }
+ return processed;
+}
+
+/* Process every pending time event, then every pending file event
+ * (that may be registered by time event callbacks just processed).
+ * Without special flags the function sleeps until some file event
+ * fires, or when the next time event occurs (if any).
+ *
+ * If flags is 0, the function does nothing and returns.
+ * if flags has AE_ALL_EVENTS set, all the kind of events are processed.
+ * if flags has AE_FILE_EVENTS set, file events are processed.
+ * if flags has AE_TIME_EVENTS set, time events are processed.
+ * if flags has AE_DONT_WAIT set, the function returns ASAP once all
+ * the events that can be handled without a wait are processed.
+ * if flags has AE_CALL_AFTER_SLEEP set, the aftersleep callback is called.
+ * if flags has AE_CALL_BEFORE_SLEEP set, the beforesleep callback is called.
+ *
+ * The function returns the number of events processed. */
+int aeProcessEvents(aeEventLoop *eventLoop, int flags)
+{
+ int processed = 0, numevents;
+
+ /* Nothing to do? return ASAP */
+ if (!(flags & AE_TIME_EVENTS) && !(flags & AE_FILE_EVENTS)) return 0;
+
+ /* Note that we want to call aeApiPoll() even if there are no
+ * file events to process as long as we want to process time
+ * events, in order to sleep until the next time event is ready
+ * to fire. */
+ if (eventLoop->maxfd != -1 ||
+ ((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) {
+ int j;
+ struct timeval tv, *tvp = NULL; /* NULL means infinite wait. */
+ int64_t usUntilTimer;
+
+ if (eventLoop->beforesleep != NULL && (flags & AE_CALL_BEFORE_SLEEP))
+ eventLoop->beforesleep(eventLoop);
+
+ /* The eventLoop->flags may be changed inside beforesleep.
+ * So we should check it after beforesleep be called. At the same time,
+ * the parameter flags always should have the highest priority.
+ * That is to say, once the parameter flag is set to AE_DONT_WAIT,
+ * no matter what value eventLoop->flags is set to, we should ignore it. */
+ if ((flags & AE_DONT_WAIT) || (eventLoop->flags & AE_DONT_WAIT)) {
+ tv.tv_sec = tv.tv_usec = 0;
+ tvp = &tv;
+ } else if (flags & AE_TIME_EVENTS) {
+ usUntilTimer = usUntilEarliestTimer(eventLoop);
+ if (usUntilTimer >= 0) {
+ tv.tv_sec = usUntilTimer / 1000000;
+ tv.tv_usec = usUntilTimer % 1000000;
+ tvp = &tv;
+ }
+ }
+ /* Call the multiplexing API, will return only on timeout or when
+ * some event fires. */
+ numevents = aeApiPoll(eventLoop, tvp);
+
+ /* Don't process file events if not requested. */
+ if (!(flags & AE_FILE_EVENTS)) {
+ numevents = 0;
+ }
+
+ /* After sleep callback. */
+ if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP)
+ eventLoop->aftersleep(eventLoop);
+
+ for (j = 0; j < numevents; j++) {
+ int fd = eventLoop->fired[j].fd;
+ aeFileEvent *fe = &eventLoop->events[fd];
+ int mask = eventLoop->fired[j].mask;
+ int fired = 0; /* Number of events fired for current fd. */
+
+ /* Normally we execute the readable event first, and the writable
+ * event later. This is useful as sometimes we may be able
+ * to serve the reply of a query immediately after processing the
+ * query.
+ *
+ * However if AE_BARRIER is set in the mask, our application is
+ * asking us to do the reverse: never fire the writable event
+ * after the readable. In such a case, we invert the calls.
+ * This is useful when, for instance, we want to do things
+ * in the beforeSleep() hook, like fsyncing a file to disk,
+ * before replying to a client. */
+ int invert = fe->mask & AE_BARRIER;
+
+ /* Note the "fe->mask & mask & ..." code: maybe an already
+ * processed event removed an element that fired and we still
+ * didn't processed, so we check if the event is still valid.
+ *
+ * Fire the readable event if the call sequence is not
+ * inverted. */
+ if (!invert && fe->mask & mask & AE_READABLE) {
+ fe->rfileProc(eventLoop,fd,fe->clientData,mask);
+ fired++;
+ fe = &eventLoop->events[fd]; /* Refresh in case of resize. */
+ }
+
+ /* Fire the writable event. */
+ if (fe->mask & mask & AE_WRITABLE) {
+ if (!fired || fe->wfileProc != fe->rfileProc) {
+ fe->wfileProc(eventLoop,fd,fe->clientData,mask);
+ fired++;
+ }
+ }
+
+ /* If we have to invert the call, fire the readable event now
+ * after the writable one. */
+ if (invert) {
+ fe = &eventLoop->events[fd]; /* Refresh in case of resize. */
+ if ((fe->mask & mask & AE_READABLE) &&
+ (!fired || fe->wfileProc != fe->rfileProc))
+ {
+ fe->rfileProc(eventLoop,fd,fe->clientData,mask);
+ fired++;
+ }
+ }
+
+ processed++;
+ }
+ }
+ /* Check time events */
+ if (flags & AE_TIME_EVENTS)
+ processed += processTimeEvents(eventLoop);
+
+ return processed; /* return the number of processed file/time events */
+}
+
+/* Wait for milliseconds until the given file descriptor becomes
+ * writable/readable/exception */
+int aeWait(int fd, int mask, long long milliseconds) {
+ struct pollfd pfd;
+ int retmask = 0, retval;
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = fd;
+ if (mask & AE_READABLE) pfd.events |= POLLIN;
+ if (mask & AE_WRITABLE) pfd.events |= POLLOUT;
+
+ if ((retval = poll(&pfd, 1, milliseconds))== 1) {
+ if (pfd.revents & POLLIN) retmask |= AE_READABLE;
+ if (pfd.revents & POLLOUT) retmask |= AE_WRITABLE;
+ if (pfd.revents & POLLERR) retmask |= AE_WRITABLE;
+ if (pfd.revents & POLLHUP) retmask |= AE_WRITABLE;
+ return retmask;
+ } else {
+ return retval;
+ }
+}
+
+void aeMain(aeEventLoop *eventLoop) {
+ eventLoop->stop = 0;
+ while (!eventLoop->stop) {
+ aeProcessEvents(eventLoop, AE_ALL_EVENTS|
+ AE_CALL_BEFORE_SLEEP|
+ AE_CALL_AFTER_SLEEP);
+ }
+}
+
+char *aeGetApiName(void) {
+ return aeApiName();
+}
+
+void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) {
+ eventLoop->beforesleep = beforesleep;
+}
+
+void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep) {
+ eventLoop->aftersleep = aftersleep;
+}
diff --git a/src/ae.h b/src/ae.h
new file mode 100644
index 0000000..70ce8a2
--- /dev/null
+++ b/src/ae.h
@@ -0,0 +1,136 @@
+/* A simple event-driven programming library. Originally I wrote this code
+ * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated
+ * it in form of a library for easy reuse.
+ *
+ * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __AE_H__
+#define __AE_H__
+
+#include "monotonic.h"
+
+#define AE_OK 0
+#define AE_ERR -1
+
+#define AE_NONE 0 /* No events registered. */
+#define AE_READABLE 1 /* Fire when descriptor is readable. */
+#define AE_WRITABLE 2 /* Fire when descriptor is writable. */
+#define AE_BARRIER 4 /* With WRITABLE, never fire the event if the
+ READABLE event already fired in the same event
+ loop iteration. Useful when you want to persist
+ things to disk before sending replies, and want
+ to do that in a group fashion. */
+
+#define AE_FILE_EVENTS (1<<0)
+#define AE_TIME_EVENTS (1<<1)
+#define AE_ALL_EVENTS (AE_FILE_EVENTS|AE_TIME_EVENTS)
+#define AE_DONT_WAIT (1<<2)
+#define AE_CALL_BEFORE_SLEEP (1<<3)
+#define AE_CALL_AFTER_SLEEP (1<<4)
+
+#define AE_NOMORE -1
+#define AE_DELETED_EVENT_ID -1
+
+/* Macros */
+#define AE_NOTUSED(V) ((void) V)
+
+struct aeEventLoop;
+
+/* Types and data structures */
+typedef void aeFileProc(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask);
+typedef int aeTimeProc(struct aeEventLoop *eventLoop, long long id, void *clientData);
+typedef void aeEventFinalizerProc(struct aeEventLoop *eventLoop, void *clientData);
+typedef void aeBeforeSleepProc(struct aeEventLoop *eventLoop);
+
+/* File event structure */
+typedef struct aeFileEvent {
+ int mask; /* one of AE_(READABLE|WRITABLE|BARRIER) */
+ aeFileProc *rfileProc;
+ aeFileProc *wfileProc;
+ void *clientData;
+} aeFileEvent;
+
+/* Time event structure */
+typedef struct aeTimeEvent {
+ long long id; /* time event identifier. */
+ monotime when;
+ aeTimeProc *timeProc;
+ aeEventFinalizerProc *finalizerProc;
+ void *clientData;
+ struct aeTimeEvent *prev;
+ struct aeTimeEvent *next;
+ int refcount; /* refcount to prevent timer events from being
+ * freed in recursive time event calls. */
+} aeTimeEvent;
+
+/* A fired event */
+typedef struct aeFiredEvent {
+ int fd;
+ int mask;
+} aeFiredEvent;
+
+/* State of an event based program */
+typedef struct aeEventLoop {
+ int maxfd; /* highest file descriptor currently registered */
+ int setsize; /* max number of file descriptors tracked */
+ long long timeEventNextId;
+ aeFileEvent *events; /* Registered events */
+ aeFiredEvent *fired; /* Fired events */
+ aeTimeEvent *timeEventHead;
+ int stop;
+ void *apidata; /* This is used for polling API specific data */
+ aeBeforeSleepProc *beforesleep;
+ aeBeforeSleepProc *aftersleep;
+ int flags;
+} aeEventLoop;
+
+/* Prototypes */
+aeEventLoop *aeCreateEventLoop(int setsize);
+void aeDeleteEventLoop(aeEventLoop *eventLoop);
+void aeStop(aeEventLoop *eventLoop);
+int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
+ aeFileProc *proc, void *clientData);
+void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask);
+int aeGetFileEvents(aeEventLoop *eventLoop, int fd);
+void *aeGetFileClientData(aeEventLoop *eventLoop, int fd);
+long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
+ aeTimeProc *proc, void *clientData,
+ aeEventFinalizerProc *finalizerProc);
+int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id);
+int aeProcessEvents(aeEventLoop *eventLoop, int flags);
+int aeWait(int fd, int mask, long long milliseconds);
+void aeMain(aeEventLoop *eventLoop);
+char *aeGetApiName(void);
+void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep);
+void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep);
+int aeGetSetSize(aeEventLoop *eventLoop);
+int aeResizeSetSize(aeEventLoop *eventLoop, int setsize);
+void aeSetDontWait(aeEventLoop *eventLoop, int noWait);
+
+#endif
diff --git a/src/ae_epoll.c b/src/ae_epoll.c
new file mode 100644
index 0000000..493ffca
--- /dev/null
+++ b/src/ae_epoll.c
@@ -0,0 +1,139 @@
+/* Linux epoll(2) based ae.c module
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/epoll.h>
+
+typedef struct aeApiState {
+ int epfd;
+ struct epoll_event *events;
+} aeApiState;
+
+static int aeApiCreate(aeEventLoop *eventLoop) {
+ aeApiState *state = zmalloc(sizeof(aeApiState));
+
+ if (!state) return -1;
+ state->events = zmalloc(sizeof(struct epoll_event)*eventLoop->setsize);
+ if (!state->events) {
+ zfree(state);
+ return -1;
+ }
+ state->epfd = epoll_create(1024); /* 1024 is just a hint for the kernel */
+ if (state->epfd == -1) {
+ zfree(state->events);
+ zfree(state);
+ return -1;
+ }
+ anetCloexec(state->epfd);
+ eventLoop->apidata = state;
+ return 0;
+}
+
+static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
+ aeApiState *state = eventLoop->apidata;
+
+ state->events = zrealloc(state->events, sizeof(struct epoll_event)*setsize);
+ return 0;
+}
+
+static void aeApiFree(aeEventLoop *eventLoop) {
+ aeApiState *state = eventLoop->apidata;
+
+ close(state->epfd);
+ zfree(state->events);
+ zfree(state);
+}
+
+static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
+ aeApiState *state = eventLoop->apidata;
+ struct epoll_event ee = {0}; /* avoid valgrind warning */
+ /* If the fd was already monitored for some event, we need a MOD
+ * operation. Otherwise we need an ADD operation. */
+ int op = eventLoop->events[fd].mask == AE_NONE ?
+ EPOLL_CTL_ADD : EPOLL_CTL_MOD;
+
+ ee.events = 0;
+ mask |= eventLoop->events[fd].mask; /* Merge old events */
+ if (mask & AE_READABLE) ee.events |= EPOLLIN;
+ if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
+ ee.data.fd = fd;
+ if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1;
+ return 0;
+}
+
+static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) {
+ aeApiState *state = eventLoop->apidata;
+ struct epoll_event ee = {0}; /* avoid valgrind warning */
+ int mask = eventLoop->events[fd].mask & (~delmask);
+
+ ee.events = 0;
+ if (mask & AE_READABLE) ee.events |= EPOLLIN;
+ if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
+ ee.data.fd = fd;
+ if (mask != AE_NONE) {
+ epoll_ctl(state->epfd,EPOLL_CTL_MOD,fd,&ee);
+ } else {
+ /* Note, Kernel < 2.6.9 requires a non null event pointer even for
+ * EPOLL_CTL_DEL. */
+ epoll_ctl(state->epfd,EPOLL_CTL_DEL,fd,&ee);
+ }
+}
+
+static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
+ aeApiState *state = eventLoop->apidata;
+ int retval, numevents = 0;
+
+ retval = epoll_wait(state->epfd,state->events,eventLoop->setsize,
+ tvp ? (tvp->tv_sec*1000 + (tvp->tv_usec + 999)/1000) : -1);
+ if (retval > 0) {
+ int j;
+
+ numevents = retval;
+ for (j = 0; j < numevents; j++) {
+ int mask = 0;
+ struct epoll_event *e = state->events+j;
+
+ if (e->events & EPOLLIN) mask |= AE_READABLE;
+ if (e->events & EPOLLOUT) mask |= AE_WRITABLE;
+ if (e->events & EPOLLERR) mask |= AE_WRITABLE|AE_READABLE;
+ if (e->events & EPOLLHUP) mask |= AE_WRITABLE|AE_READABLE;
+ eventLoop->fired[j].fd = e->data.fd;
+ eventLoop->fired[j].mask = mask;
+ }
+ } else if (retval == -1 && errno != EINTR) {
+ panic("aeApiPoll: epoll_wait, %s", strerror(errno));
+ }
+
+ return numevents;
+}
+
+static char *aeApiName(void) {
+ return "epoll";
+}
diff --git a/src/ae_evport.c b/src/ae_evport.c
new file mode 100644
index 0000000..55393e9
--- /dev/null
+++ b/src/ae_evport.c
@@ -0,0 +1,321 @@
+/* ae.c module for illumos event ports.
+ *
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <errno.h>
+#include <port.h>
+#include <poll.h>
+
+#include <sys/types.h>
+#include <sys/time.h>
+
+#include <stdio.h>
+
+static int evport_debug = 0;
+
+/*
+ * This file implements the ae API using event ports, present on Solaris-based
+ * systems since Solaris 10. Using the event port interface, we associate file
+ * descriptors with the port. Each association also includes the set of poll(2)
+ * events that the consumer is interested in (e.g., POLLIN and POLLOUT).
+ *
+ * There's one tricky piece to this implementation: when we return events via
+ * aeApiPoll, the corresponding file descriptors become dissociated from the
+ * port. This is necessary because poll events are level-triggered, so if the
+ * fd didn't become dissociated, it would immediately fire another event since
+ * the underlying state hasn't changed yet. We must re-associate the file
+ * descriptor, but only after we know that our caller has actually read from it.
+ * The ae API does not tell us exactly when that happens, but we do know that
+ * it must happen by the time aeApiPoll is called again. Our solution is to
+ * keep track of the last fds returned by aeApiPoll and re-associate them next
+ * time aeApiPoll is invoked.
+ *
+ * To summarize, in this module, each fd association is EITHER (a) represented
+ * only via the in-kernel association OR (b) represented by pending_fds and
+ * pending_masks. (b) is only true for the last fds we returned from aeApiPoll,
+ * and only until we enter aeApiPoll again (at which point we restore the
+ * in-kernel association).
+ */
+#define MAX_EVENT_BATCHSZ 512
+
+typedef struct aeApiState {
+ int portfd; /* event port */
+ uint_t npending; /* # of pending fds */
+ int pending_fds[MAX_EVENT_BATCHSZ]; /* pending fds */
+ int pending_masks[MAX_EVENT_BATCHSZ]; /* pending fds' masks */
+} aeApiState;
+
+static int aeApiCreate(aeEventLoop *eventLoop) {
+ int i;
+ aeApiState *state = zmalloc(sizeof(aeApiState));
+ if (!state) return -1;
+
+ state->portfd = port_create();
+ if (state->portfd == -1) {
+ zfree(state);
+ return -1;
+ }
+ anetCloexec(state->portfd);
+
+ state->npending = 0;
+
+ for (i = 0; i < MAX_EVENT_BATCHSZ; i++) {
+ state->pending_fds[i] = -1;
+ state->pending_masks[i] = AE_NONE;
+ }
+
+ eventLoop->apidata = state;
+ return 0;
+}
+
+static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
+ (void) eventLoop;
+ (void) setsize;
+ /* Nothing to resize here. */
+ return 0;
+}
+
+static void aeApiFree(aeEventLoop *eventLoop) {
+ aeApiState *state = eventLoop->apidata;
+
+ close(state->portfd);
+ zfree(state);
+}
+
+static int aeApiLookupPending(aeApiState *state, int fd) {
+ uint_t i;
+
+ for (i = 0; i < state->npending; i++) {
+ if (state->pending_fds[i] == fd)
+ return (i);
+ }
+
+ return (-1);
+}
+
+/*
+ * Helper function to invoke port_associate for the given fd and mask.
+ */
+static int aeApiAssociate(const char *where, int portfd, int fd, int mask) {
+ int events = 0;
+ int rv, err;
+
+ if (mask & AE_READABLE)
+ events |= POLLIN;
+ if (mask & AE_WRITABLE)
+ events |= POLLOUT;
+
+ if (evport_debug)
+ fprintf(stderr, "%s: port_associate(%d, 0x%x) = ", where, fd, events);
+
+ rv = port_associate(portfd, PORT_SOURCE_FD, fd, events,
+ (void *)(uintptr_t)mask);
+ err = errno;
+
+ if (evport_debug)
+ fprintf(stderr, "%d (%s)\n", rv, rv == 0 ? "no error" : strerror(err));
+
+ if (rv == -1) {
+ fprintf(stderr, "%s: port_associate: %s\n", where, strerror(err));
+
+ if (err == EAGAIN)
+ fprintf(stderr, "aeApiAssociate: event port limit exceeded.");
+ }
+
+ return rv;
+}
+
+static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
+ aeApiState *state = eventLoop->apidata;
+ int fullmask, pfd;
+
+ if (evport_debug)
+ fprintf(stderr, "aeApiAddEvent: fd %d mask 0x%x\n", fd, mask);
+
+ /*
+ * Since port_associate's "events" argument replaces any existing events, we
+ * must be sure to include whatever events are already associated when
+ * we call port_associate() again.
+ */
+ fullmask = mask | eventLoop->events[fd].mask;
+ pfd = aeApiLookupPending(state, fd);
+
+ if (pfd != -1) {
+ /*
+ * This fd was recently returned from aeApiPoll. It should be safe to
+ * assume that the consumer has processed that poll event, but we play
+ * it safer by simply updating pending_mask. The fd will be
+ * re-associated as usual when aeApiPoll is called again.
+ */
+ if (evport_debug)
+ fprintf(stderr, "aeApiAddEvent: adding to pending fd %d\n", fd);
+ state->pending_masks[pfd] |= fullmask;
+ return 0;
+ }
+
+ return (aeApiAssociate("aeApiAddEvent", state->portfd, fd, fullmask));
+}
+
+static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
+ aeApiState *state = eventLoop->apidata;
+ int fullmask, pfd;
+
+ if (evport_debug)
+ fprintf(stderr, "del fd %d mask 0x%x\n", fd, mask);
+
+ pfd = aeApiLookupPending(state, fd);
+
+ if (pfd != -1) {
+ if (evport_debug)
+ fprintf(stderr, "deleting event from pending fd %d\n", fd);
+
+ /*
+ * This fd was just returned from aeApiPoll, so it's not currently
+ * associated with the port. All we need to do is update
+ * pending_mask appropriately.
+ */
+ state->pending_masks[pfd] &= ~mask;
+
+ if (state->pending_masks[pfd] == AE_NONE)
+ state->pending_fds[pfd] = -1;
+
+ return;
+ }
+
+ /*
+ * The fd is currently associated with the port. Like with the add case
+ * above, we must look at the full mask for the file descriptor before
+ * updating that association. We don't have a good way of knowing what the
+ * events are without looking into the eventLoop state directly. We rely on
+ * the fact that our caller has already updated the mask in the eventLoop.
+ */
+
+ fullmask = eventLoop->events[fd].mask;
+ if (fullmask == AE_NONE) {
+ /*
+ * We're removing *all* events, so use port_dissociate to remove the
+ * association completely. Failure here indicates a bug.
+ */
+ if (evport_debug)
+ fprintf(stderr, "aeApiDelEvent: port_dissociate(%d)\n", fd);
+
+ if (port_dissociate(state->portfd, PORT_SOURCE_FD, fd) != 0) {
+ perror("aeApiDelEvent: port_dissociate");
+ abort(); /* will not return */
+ }
+ } else if (aeApiAssociate("aeApiDelEvent", state->portfd, fd,
+ fullmask) != 0) {
+ /*
+ * ENOMEM is a potentially transient condition, but the kernel won't
+ * generally return it unless things are really bad. EAGAIN indicates
+ * we've reached a resource limit, for which it doesn't make sense to
+ * retry (counter-intuitively). All other errors indicate a bug. In any
+ * of these cases, the best we can do is to abort.
+ */
+ abort(); /* will not return */
+ }
+}
+
+static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
+ aeApiState *state = eventLoop->apidata;
+ struct timespec timeout, *tsp;
+ uint_t mask, i;
+ uint_t nevents;
+ port_event_t event[MAX_EVENT_BATCHSZ];
+
+ /*
+ * If we've returned fd events before, we must re-associate them with the
+ * port now, before calling port_get(). See the block comment at the top of
+ * this file for an explanation of why.
+ */
+ for (i = 0; i < state->npending; i++) {
+ if (state->pending_fds[i] == -1)
+ /* This fd has since been deleted. */
+ continue;
+
+ if (aeApiAssociate("aeApiPoll", state->portfd,
+ state->pending_fds[i], state->pending_masks[i]) != 0) {
+ /* See aeApiDelEvent for why this case is fatal. */
+ abort();
+ }
+
+ state->pending_masks[i] = AE_NONE;
+ state->pending_fds[i] = -1;
+ }
+
+ state->npending = 0;
+
+ if (tvp != NULL) {
+ timeout.tv_sec = tvp->tv_sec;
+ timeout.tv_nsec = tvp->tv_usec * 1000;
+ tsp = &timeout;
+ } else {
+ tsp = NULL;
+ }
+
+ /*
+ * port_getn can return with errno == ETIME having returned some events (!).
+ * So if we get ETIME, we check nevents, too.
+ */
+ nevents = 1;
+ if (port_getn(state->portfd, event, MAX_EVENT_BATCHSZ, &nevents,
+ tsp) == -1 && (errno != ETIME || nevents == 0)) {
+ if (errno == ETIME || errno == EINTR)
+ return 0;
+
+ /* Any other error indicates a bug. */
+ panic("aeApiPoll: port_getn, %s", strerror(errno));
+ }
+
+ state->npending = nevents;
+
+ for (i = 0; i < nevents; i++) {
+ mask = 0;
+ if (event[i].portev_events & POLLIN)
+ mask |= AE_READABLE;
+ if (event[i].portev_events & POLLOUT)
+ mask |= AE_WRITABLE;
+
+ eventLoop->fired[i].fd = event[i].portev_object;
+ eventLoop->fired[i].mask = mask;
+
+ if (evport_debug)
+ fprintf(stderr, "aeApiPoll: fd %d mask 0x%x\n",
+ (int)event[i].portev_object, mask);
+
+ state->pending_fds[i] = event[i].portev_object;
+ state->pending_masks[i] = (uintptr_t)event[i].portev_user;
+ }
+
+ return nevents;
+}
+
+static char *aeApiName(void) {
+ return "evport";
+}
diff --git a/src/ae_kqueue.c b/src/ae_kqueue.c
new file mode 100644
index 0000000..e845424
--- /dev/null
+++ b/src/ae_kqueue.c
@@ -0,0 +1,190 @@
+/* Kqueue(2)-based ae.c module
+ *
+ * Copyright (C) 2009 Harish Mallipeddi - harish.mallipeddi@gmail.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/time.h>
+
+typedef struct aeApiState {
+ int kqfd;
+ struct kevent *events;
+
+ /* Events mask for merge read and write event.
+ * To reduce memory consumption, we use 2 bits to store the mask
+ * of an event, so that 1 byte will store the mask of 4 events. */
+ char *eventsMask;
+} aeApiState;
+
+#define EVENT_MASK_MALLOC_SIZE(sz) (((sz) + 3) / 4)
+#define EVENT_MASK_OFFSET(fd) ((fd) % 4 * 2)
+#define EVENT_MASK_ENCODE(fd, mask) (((mask) & 0x3) << EVENT_MASK_OFFSET(fd))
+
+static inline int getEventMask(const char *eventsMask, int fd) {
+ return (eventsMask[fd/4] >> EVENT_MASK_OFFSET(fd)) & 0x3;
+}
+
+static inline void addEventMask(char *eventsMask, int fd, int mask) {
+ eventsMask[fd/4] |= EVENT_MASK_ENCODE(fd, mask);
+}
+
+static inline void resetEventMask(char *eventsMask, int fd) {
+ eventsMask[fd/4] &= ~EVENT_MASK_ENCODE(fd, 0x3);
+}
+
+static int aeApiCreate(aeEventLoop *eventLoop) {
+ aeApiState *state = zmalloc(sizeof(aeApiState));
+
+ if (!state) return -1;
+ state->events = zmalloc(sizeof(struct kevent)*eventLoop->setsize);
+ if (!state->events) {
+ zfree(state);
+ return -1;
+ }
+ state->kqfd = kqueue();
+ if (state->kqfd == -1) {
+ zfree(state->events);
+ zfree(state);
+ return -1;
+ }
+ anetCloexec(state->kqfd);
+ state->eventsMask = zmalloc(EVENT_MASK_MALLOC_SIZE(eventLoop->setsize));
+ memset(state->eventsMask, 0, EVENT_MASK_MALLOC_SIZE(eventLoop->setsize));
+ eventLoop->apidata = state;
+ return 0;
+}
+
+static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
+ aeApiState *state = eventLoop->apidata;
+
+ state->events = zrealloc(state->events, sizeof(struct kevent)*setsize);
+ state->eventsMask = zrealloc(state->eventsMask, EVENT_MASK_MALLOC_SIZE(setsize));
+ memset(state->eventsMask, 0, EVENT_MASK_MALLOC_SIZE(setsize));
+ return 0;
+}
+
+static void aeApiFree(aeEventLoop *eventLoop) {
+ aeApiState *state = eventLoop->apidata;
+
+ close(state->kqfd);
+ zfree(state->events);
+ zfree(state->eventsMask);
+ zfree(state);
+}
+
+static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
+ aeApiState *state = eventLoop->apidata;
+ struct kevent ke;
+
+ if (mask & AE_READABLE) {
+ EV_SET(&ke, fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+ if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1;
+ }
+ if (mask & AE_WRITABLE) {
+ EV_SET(&ke, fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL);
+ if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1;
+ }
+ return 0;
+}
+
+static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
+ aeApiState *state = eventLoop->apidata;
+ struct kevent ke;
+
+ if (mask & AE_READABLE) {
+ EV_SET(&ke, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
+ kevent(state->kqfd, &ke, 1, NULL, 0, NULL);
+ }
+ if (mask & AE_WRITABLE) {
+ EV_SET(&ke, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
+ kevent(state->kqfd, &ke, 1, NULL, 0, NULL);
+ }
+}
+
+static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
+ aeApiState *state = eventLoop->apidata;
+ int retval, numevents = 0;
+
+ if (tvp != NULL) {
+ struct timespec timeout;
+ timeout.tv_sec = tvp->tv_sec;
+ timeout.tv_nsec = tvp->tv_usec * 1000;
+ retval = kevent(state->kqfd, NULL, 0, state->events, eventLoop->setsize,
+ &timeout);
+ } else {
+ retval = kevent(state->kqfd, NULL, 0, state->events, eventLoop->setsize,
+ NULL);
+ }
+
+ if (retval > 0) {
+ int j;
+
+ /* Normally we execute the read event first and then the write event.
+ * When the barrier is set, we will do it reverse.
+ *
+ * However, under kqueue, read and write events would be separate
+ * events, which would make it impossible to control the order of
+ * reads and writes. So we store the event's mask we've got and merge
+ * the same fd events later. */
+ for (j = 0; j < retval; j++) {
+ struct kevent *e = state->events+j;
+ int fd = e->ident;
+ int mask = 0;
+
+ if (e->filter == EVFILT_READ) mask = AE_READABLE;
+ else if (e->filter == EVFILT_WRITE) mask = AE_WRITABLE;
+ addEventMask(state->eventsMask, fd, mask);
+ }
+
+ /* Re-traversal to merge read and write events, and set the fd's mask to
+ * 0 so that events are not added again when the fd is encountered again. */
+ numevents = 0;
+ for (j = 0; j < retval; j++) {
+ struct kevent *e = state->events+j;
+ int fd = e->ident;
+ int mask = getEventMask(state->eventsMask, fd);
+
+ if (mask) {
+ eventLoop->fired[numevents].fd = fd;
+ eventLoop->fired[numevents].mask = mask;
+ resetEventMask(state->eventsMask, fd);
+ numevents++;
+ }
+ }
+ } else if (retval == -1 && errno != EINTR) {
+ panic("aeApiPoll: kevent, %s", strerror(errno));
+ }
+
+ return numevents;
+}
+
+static char *aeApiName(void) {
+ return "kqueue";
+}
diff --git a/src/ae_select.c b/src/ae_select.c
new file mode 100644
index 0000000..f8ef959
--- /dev/null
+++ b/src/ae_select.c
@@ -0,0 +1,110 @@
+/* Select()-based ae.c module.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/select.h>
+#include <string.h>
+
+typedef struct aeApiState {
+ fd_set rfds, wfds;
+ /* We need to have a copy of the fd sets as it's not safe to reuse
+ * FD sets after select(). */
+ fd_set _rfds, _wfds;
+} aeApiState;
+
+static int aeApiCreate(aeEventLoop *eventLoop) {
+ aeApiState *state = zmalloc(sizeof(aeApiState));
+
+ if (!state) return -1;
+ FD_ZERO(&state->rfds);
+ FD_ZERO(&state->wfds);
+ eventLoop->apidata = state;
+ return 0;
+}
+
+static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
+ AE_NOTUSED(eventLoop);
+ /* Just ensure we have enough room in the fd_set type. */
+ if (setsize >= FD_SETSIZE) return -1;
+ return 0;
+}
+
+static void aeApiFree(aeEventLoop *eventLoop) {
+ zfree(eventLoop->apidata);
+}
+
+static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
+ aeApiState *state = eventLoop->apidata;
+
+ if (mask & AE_READABLE) FD_SET(fd,&state->rfds);
+ if (mask & AE_WRITABLE) FD_SET(fd,&state->wfds);
+ return 0;
+}
+
+static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
+ aeApiState *state = eventLoop->apidata;
+
+ if (mask & AE_READABLE) FD_CLR(fd,&state->rfds);
+ if (mask & AE_WRITABLE) FD_CLR(fd,&state->wfds);
+}
+
+static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
+ aeApiState *state = eventLoop->apidata;
+ int retval, j, numevents = 0;
+
+ memcpy(&state->_rfds,&state->rfds,sizeof(fd_set));
+ memcpy(&state->_wfds,&state->wfds,sizeof(fd_set));
+
+ retval = select(eventLoop->maxfd+1,
+ &state->_rfds,&state->_wfds,NULL,tvp);
+ if (retval > 0) {
+ for (j = 0; j <= eventLoop->maxfd; j++) {
+ int mask = 0;
+ aeFileEvent *fe = &eventLoop->events[j];
+
+ if (fe->mask == AE_NONE) continue;
+ if (fe->mask & AE_READABLE && FD_ISSET(j,&state->_rfds))
+ mask |= AE_READABLE;
+ if (fe->mask & AE_WRITABLE && FD_ISSET(j,&state->_wfds))
+ mask |= AE_WRITABLE;
+ eventLoop->fired[numevents].fd = j;
+ eventLoop->fired[numevents].mask = mask;
+ numevents++;
+ }
+ } else if (retval == -1 && errno != EINTR) {
+ panic("aeApiPoll: select, %s", strerror(errno));
+ }
+
+ return numevents;
+}
+
+static char *aeApiName(void) {
+ return "select";
+}
diff --git a/src/anet.c b/src/anet.c
new file mode 100644
index 0000000..64824a2
--- /dev/null
+++ b/src/anet.c
@@ -0,0 +1,706 @@
+/* anet.c -- Basic TCP socket stuff made a bit less boring
+ *
+ * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fmacros.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <netdb.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "anet.h"
+#include "config.h"
+#include "util.h"
+
+#define UNUSED(x) (void)(x)
+
+static void anetSetError(char *err, const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!err) return;
+ va_start(ap, fmt);
+ vsnprintf(err, ANET_ERR_LEN, fmt, ap);
+ va_end(ap);
+}
+
+int anetGetError(int fd) {
+ int sockerr = 0;
+ socklen_t errlen = sizeof(sockerr);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1)
+ sockerr = errno;
+ return sockerr;
+}
+
+int anetSetBlock(char *err, int fd, int non_block) {
+ int flags;
+
+ /* Set the socket blocking (if non_block is zero) or non-blocking.
+ * Note that fcntl(2) for F_GETFL and F_SETFL can't be
+ * interrupted by a signal. */
+ if ((flags = fcntl(fd, F_GETFL)) == -1) {
+ anetSetError(err, "fcntl(F_GETFL): %s", strerror(errno));
+ return ANET_ERR;
+ }
+
+ /* Check if this flag has been set or unset, if so,
+ * then there is no need to call fcntl to set/unset it again. */
+ if (!!(flags & O_NONBLOCK) == !!non_block)
+ return ANET_OK;
+
+ if (non_block)
+ flags |= O_NONBLOCK;
+ else
+ flags &= ~O_NONBLOCK;
+
+ if (fcntl(fd, F_SETFL, flags) == -1) {
+ anetSetError(err, "fcntl(F_SETFL,O_NONBLOCK): %s", strerror(errno));
+ return ANET_ERR;
+ }
+ return ANET_OK;
+}
+
+int anetNonBlock(char *err, int fd) {
+ return anetSetBlock(err,fd,1);
+}
+
+int anetBlock(char *err, int fd) {
+ return anetSetBlock(err,fd,0);
+}
+
+/* Enable the FD_CLOEXEC on the given fd to avoid fd leaks.
+ * This function should be invoked for fd's on specific places
+ * where fork + execve system calls are called. */
+int anetCloexec(int fd) {
+ int r;
+ int flags;
+
+ do {
+ r = fcntl(fd, F_GETFD);
+ } while (r == -1 && errno == EINTR);
+
+ if (r == -1 || (r & FD_CLOEXEC))
+ return r;
+
+ flags = r | FD_CLOEXEC;
+
+ do {
+ r = fcntl(fd, F_SETFD, flags);
+ } while (r == -1 && errno == EINTR);
+
+ return r;
+}
+
+/* Set TCP keep alive option to detect dead peers. The interval option
+ * is only used for Linux as we are using Linux-specific APIs to set
+ * the probe send time, interval, and count. */
+int anetKeepAlive(char *err, int fd, int interval)
+{
+ int val = 1;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) == -1)
+ {
+ anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno));
+ return ANET_ERR;
+ }
+
+#ifdef __linux__
+ /* Default settings are more or less garbage, with the keepalive time
+ * set to 7200 by default on Linux. Modify settings to make the feature
+ * actually useful. */
+
+ /* Send first probe after interval. */
+ val = interval;
+ if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) {
+ anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno));
+ return ANET_ERR;
+ }
+
+ /* Send next probes after the specified interval. Note that we set the
+ * delay as interval / 3, as we send three probes before detecting
+ * an error (see the next setsockopt call). */
+ val = interval/3;
+ if (val == 0) val = 1;
+ if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0) {
+ anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
+ return ANET_ERR;
+ }
+
+ /* Consider the socket in error state after three we send three ACK
+ * probes without getting a reply. */
+ val = 3;
+ if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0) {
+ anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
+ return ANET_ERR;
+ }
+#elif defined(__APPLE__)
+ /* Set idle time with interval */
+ val = interval;
+ if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &val, sizeof(val)) < 0) {
+ anetSetError(err, "setsockopt TCP_KEEPALIVE: %s\n", strerror(errno));
+ return ANET_ERR;
+ }
+#else
+ ((void) interval); /* Avoid unused var warning for non Linux systems. */
+#endif
+
+ return ANET_OK;
+}
+
+static int anetSetTcpNoDelay(char *err, int fd, int val)
+{
+ if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) == -1)
+ {
+ anetSetError(err, "setsockopt TCP_NODELAY: %s", strerror(errno));
+ return ANET_ERR;
+ }
+ return ANET_OK;
+}
+
+int anetEnableTcpNoDelay(char *err, int fd)
+{
+ return anetSetTcpNoDelay(err, fd, 1);
+}
+
+int anetDisableTcpNoDelay(char *err, int fd)
+{
+ return anetSetTcpNoDelay(err, fd, 0);
+}
+
+/* Set the socket send timeout (SO_SNDTIMEO socket option) to the specified
+ * number of milliseconds, or disable it if the 'ms' argument is zero. */
+int anetSendTimeout(char *err, int fd, long long ms) {
+ struct timeval tv;
+
+ tv.tv_sec = ms/1000;
+ tv.tv_usec = (ms%1000)*1000;
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) {
+ anetSetError(err, "setsockopt SO_SNDTIMEO: %s", strerror(errno));
+ return ANET_ERR;
+ }
+ return ANET_OK;
+}
+
+/* Set the socket receive timeout (SO_RCVTIMEO socket option) to the specified
+ * number of milliseconds, or disable it if the 'ms' argument is zero. */
+int anetRecvTimeout(char *err, int fd, long long ms) {
+ struct timeval tv;
+
+ tv.tv_sec = ms/1000;
+ tv.tv_usec = (ms%1000)*1000;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) {
+ anetSetError(err, "setsockopt SO_RCVTIMEO: %s", strerror(errno));
+ return ANET_ERR;
+ }
+ return ANET_OK;
+}
+
+/* Resolve the hostname "host" and set the string representation of the
+ * IP address into the buffer pointed by "ipbuf".
+ *
+ * If flags is set to ANET_IP_ONLY the function only resolves hostnames
+ * that are actually already IPv4 or IPv6 addresses. This turns the function
+ * into a validating / normalizing function. */
+int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len,
+ int flags)
+{
+ struct addrinfo hints, *info;
+ int rv;
+
+ memset(&hints,0,sizeof(hints));
+ if (flags & ANET_IP_ONLY) hints.ai_flags = AI_NUMERICHOST;
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM; /* specify socktype to avoid dups */
+
+ if ((rv = getaddrinfo(host, NULL, &hints, &info)) != 0) {
+ anetSetError(err, "%s", gai_strerror(rv));
+ return ANET_ERR;
+ }
+ if (info->ai_family == AF_INET) {
+ struct sockaddr_in *sa = (struct sockaddr_in *)info->ai_addr;
+ inet_ntop(AF_INET, &(sa->sin_addr), ipbuf, ipbuf_len);
+ } else {
+ struct sockaddr_in6 *sa = (struct sockaddr_in6 *)info->ai_addr;
+ inet_ntop(AF_INET6, &(sa->sin6_addr), ipbuf, ipbuf_len);
+ }
+
+ freeaddrinfo(info);
+ return ANET_OK;
+}
+
+static int anetSetReuseAddr(char *err, int fd) {
+ int yes = 1;
+ /* Make sure connection-intensive things like the redis benchmark
+ * will be able to close/open sockets a zillion of times */
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) == -1) {
+ anetSetError(err, "setsockopt SO_REUSEADDR: %s", strerror(errno));
+ return ANET_ERR;
+ }
+ return ANET_OK;
+}
+
+static int anetCreateSocket(char *err, int domain) {
+ int s;
+ if ((s = socket(domain, SOCK_STREAM, 0)) == -1) {
+ anetSetError(err, "creating socket: %s", strerror(errno));
+ return ANET_ERR;
+ }
+
+ /* Make sure connection-intensive things like the redis benchmark
+ * will be able to close/open sockets a zillion of times */
+ if (anetSetReuseAddr(err,s) == ANET_ERR) {
+ close(s);
+ return ANET_ERR;
+ }
+ return s;
+}
+
+#define ANET_CONNECT_NONE 0
+#define ANET_CONNECT_NONBLOCK 1
+#define ANET_CONNECT_BE_BINDING 2 /* Best effort binding. */
+static int anetTcpGenericConnect(char *err, const char *addr, int port,
+ const char *source_addr, int flags)
+{
+ int s = ANET_ERR, rv;
+ char portstr[6]; /* strlen("65535") + 1; */
+ struct addrinfo hints, *servinfo, *bservinfo, *p, *b;
+
+ snprintf(portstr,sizeof(portstr),"%d",port);
+ memset(&hints,0,sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+
+ if ((rv = getaddrinfo(addr,portstr,&hints,&servinfo)) != 0) {
+ anetSetError(err, "%s", gai_strerror(rv));
+ return ANET_ERR;
+ }
+ for (p = servinfo; p != NULL; p = p->ai_next) {
+ /* Try to create the socket and to connect it.
+ * If we fail in the socket() call, or on connect(), we retry with
+ * the next entry in servinfo. */
+ if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1)
+ continue;
+ if (anetSetReuseAddr(err,s) == ANET_ERR) goto error;
+ if (flags & ANET_CONNECT_NONBLOCK && anetNonBlock(err,s) != ANET_OK)
+ goto error;
+ if (source_addr) {
+ int bound = 0;
+ /* Using getaddrinfo saves us from self-determining IPv4 vs IPv6 */
+ if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0)
+ {
+ anetSetError(err, "%s", gai_strerror(rv));
+ goto error;
+ }
+ for (b = bservinfo; b != NULL; b = b->ai_next) {
+ if (bind(s,b->ai_addr,b->ai_addrlen) != -1) {
+ bound = 1;
+ break;
+ }
+ }
+ freeaddrinfo(bservinfo);
+ if (!bound) {
+ anetSetError(err, "bind: %s", strerror(errno));
+ goto error;
+ }
+ }
+ if (connect(s,p->ai_addr,p->ai_addrlen) == -1) {
+ /* If the socket is non-blocking, it is ok for connect() to
+ * return an EINPROGRESS error here. */
+ if (errno == EINPROGRESS && flags & ANET_CONNECT_NONBLOCK)
+ goto end;
+ close(s);
+ s = ANET_ERR;
+ continue;
+ }
+
+ /* If we ended an iteration of the for loop without errors, we
+ * have a connected socket. Let's return to the caller. */
+ goto end;
+ }
+ if (p == NULL)
+ anetSetError(err, "creating socket: %s", strerror(errno));
+
+error:
+ if (s != ANET_ERR) {
+ close(s);
+ s = ANET_ERR;
+ }
+
+end:
+ freeaddrinfo(servinfo);
+
+ /* Handle best effort binding: if a binding address was used, but it is
+ * not possible to create a socket, try again without a binding address. */
+ if (s == ANET_ERR && source_addr && (flags & ANET_CONNECT_BE_BINDING)) {
+ return anetTcpGenericConnect(err,addr,port,NULL,flags);
+ } else {
+ return s;
+ }
+}
+
+int anetTcpNonBlockConnect(char *err, const char *addr, int port)
+{
+ return anetTcpGenericConnect(err,addr,port,NULL,ANET_CONNECT_NONBLOCK);
+}
+
+int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port,
+ const char *source_addr)
+{
+ return anetTcpGenericConnect(err,addr,port,source_addr,
+ ANET_CONNECT_NONBLOCK|ANET_CONNECT_BE_BINDING);
+}
+
+int anetUnixGenericConnect(char *err, const char *path, int flags)
+{
+ int s;
+ struct sockaddr_un sa;
+
+ if ((s = anetCreateSocket(err,AF_LOCAL)) == ANET_ERR)
+ return ANET_ERR;
+
+ sa.sun_family = AF_LOCAL;
+ redis_strlcpy(sa.sun_path,path,sizeof(sa.sun_path));
+ if (flags & ANET_CONNECT_NONBLOCK) {
+ if (anetNonBlock(err,s) != ANET_OK) {
+ close(s);
+ return ANET_ERR;
+ }
+ }
+ if (connect(s,(struct sockaddr*)&sa,sizeof(sa)) == -1) {
+ if (errno == EINPROGRESS &&
+ flags & ANET_CONNECT_NONBLOCK)
+ return s;
+
+ anetSetError(err, "connect: %s", strerror(errno));
+ close(s);
+ return ANET_ERR;
+ }
+ return s;
+}
+
+static int anetListen(char *err, int s, struct sockaddr *sa, socklen_t len, int backlog, mode_t perm) {
+ if (bind(s,sa,len) == -1) {
+ anetSetError(err, "bind: %s", strerror(errno));
+ close(s);
+ return ANET_ERR;
+ }
+
+ if (sa->sa_family == AF_LOCAL && perm)
+ chmod(((struct sockaddr_un *) sa)->sun_path, perm);
+
+ if (listen(s, backlog) == -1) {
+ anetSetError(err, "listen: %s", strerror(errno));
+ close(s);
+ return ANET_ERR;
+ }
+ return ANET_OK;
+}
+
+static int anetV6Only(char *err, int s) {
+ int yes = 1;
+ if (setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,&yes,sizeof(yes)) == -1) {
+ anetSetError(err, "setsockopt: %s", strerror(errno));
+ return ANET_ERR;
+ }
+ return ANET_OK;
+}
+
+static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backlog)
+{
+ int s = -1, rv;
+ char _port[6]; /* strlen("65535") */
+ struct addrinfo hints, *servinfo, *p;
+
+ snprintf(_port,6,"%d",port);
+ memset(&hints,0,sizeof(hints));
+ hints.ai_family = af;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE; /* No effect if bindaddr != NULL */
+ if (bindaddr && !strcmp("*", bindaddr))
+ bindaddr = NULL;
+ if (af == AF_INET6 && bindaddr && !strcmp("::*", bindaddr))
+ bindaddr = NULL;
+
+ if ((rv = getaddrinfo(bindaddr,_port,&hints,&servinfo)) != 0) {
+ anetSetError(err, "%s", gai_strerror(rv));
+ return ANET_ERR;
+ }
+ for (p = servinfo; p != NULL; p = p->ai_next) {
+ if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1)
+ continue;
+
+ if (af == AF_INET6 && anetV6Only(err,s) == ANET_ERR) goto error;
+ if (anetSetReuseAddr(err,s) == ANET_ERR) goto error;
+ if (anetListen(err,s,p->ai_addr,p->ai_addrlen,backlog,0) == ANET_ERR) s = ANET_ERR;
+ goto end;
+ }
+ if (p == NULL) {
+ anetSetError(err, "unable to bind socket, errno: %d", errno);
+ goto error;
+ }
+
+error:
+ if (s != -1) close(s);
+ s = ANET_ERR;
+end:
+ freeaddrinfo(servinfo);
+ return s;
+}
+
+int anetTcpServer(char *err, int port, char *bindaddr, int backlog)
+{
+ return _anetTcpServer(err, port, bindaddr, AF_INET, backlog);
+}
+
+int anetTcp6Server(char *err, int port, char *bindaddr, int backlog)
+{
+ return _anetTcpServer(err, port, bindaddr, AF_INET6, backlog);
+}
+
+int anetUnixServer(char *err, char *path, mode_t perm, int backlog)
+{
+ int s;
+ struct sockaddr_un sa;
+
+ if (strlen(path) > sizeof(sa.sun_path)-1) {
+ anetSetError(err,"unix socket path too long (%zu), must be under %zu", strlen(path), sizeof(sa.sun_path));
+ return ANET_ERR;
+ }
+ if ((s = anetCreateSocket(err,AF_LOCAL)) == ANET_ERR)
+ return ANET_ERR;
+
+ memset(&sa,0,sizeof(sa));
+ sa.sun_family = AF_LOCAL;
+ redis_strlcpy(sa.sun_path,path,sizeof(sa.sun_path));
+ if (anetListen(err,s,(struct sockaddr*)&sa,sizeof(sa),backlog,perm) == ANET_ERR)
+ return ANET_ERR;
+ return s;
+}
+
+/* Accept a connection and also make sure the socket is non-blocking, and CLOEXEC.
+ * returns the new socket FD, or -1 on error. */
+static int anetGenericAccept(char *err, int s, struct sockaddr *sa, socklen_t *len) {
+ int fd;
+ do {
+ /* Use the accept4() call on linux to simultaneously accept and
+ * set a socket as non-blocking. */
+#ifdef HAVE_ACCEPT4
+ fd = accept4(s, sa, len, SOCK_NONBLOCK | SOCK_CLOEXEC);
+#else
+ fd = accept(s,sa,len);
+#endif
+ } while(fd == -1 && errno == EINTR);
+ if (fd == -1) {
+ anetSetError(err, "accept: %s", strerror(errno));
+ return ANET_ERR;
+ }
+#ifndef HAVE_ACCEPT4
+ if (anetCloexec(fd) == -1) {
+ anetSetError(err, "anetCloexec: %s", strerror(errno));
+ close(fd);
+ return ANET_ERR;
+ }
+ if (anetNonBlock(err, fd) != ANET_OK) {
+ close(fd);
+ return ANET_ERR;
+ }
+#endif
+ return fd;
+}
+
+/* Accept a connection and also make sure the socket is non-blocking, and CLOEXEC.
+ * returns the new socket FD, or -1 on error. */
+int anetTcpAccept(char *err, int serversock, char *ip, size_t ip_len, int *port) {
+ int fd;
+ struct sockaddr_storage sa;
+ socklen_t salen = sizeof(sa);
+ if ((fd = anetGenericAccept(err,serversock,(struct sockaddr*)&sa,&salen)) == ANET_ERR)
+ return ANET_ERR;
+
+ if (sa.ss_family == AF_INET) {
+ struct sockaddr_in *s = (struct sockaddr_in *)&sa;
+ if (ip) inet_ntop(AF_INET,(void*)&(s->sin_addr),ip,ip_len);
+ if (port) *port = ntohs(s->sin_port);
+ } else {
+ struct sockaddr_in6 *s = (struct sockaddr_in6 *)&sa;
+ if (ip) inet_ntop(AF_INET6,(void*)&(s->sin6_addr),ip,ip_len);
+ if (port) *port = ntohs(s->sin6_port);
+ }
+ return fd;
+}
+
+/* Accept a connection and also make sure the socket is non-blocking, and CLOEXEC.
+ * returns the new socket FD, or -1 on error. */
+int anetUnixAccept(char *err, int s) {
+ int fd;
+ struct sockaddr_un sa;
+ socklen_t salen = sizeof(sa);
+ if ((fd = anetGenericAccept(err,s,(struct sockaddr*)&sa,&salen)) == ANET_ERR)
+ return ANET_ERR;
+
+ return fd;
+}
+
+int anetFdToString(int fd, char *ip, size_t ip_len, int *port, int remote) {
+ struct sockaddr_storage sa;
+ socklen_t salen = sizeof(sa);
+
+ if (remote) {
+ if (getpeername(fd, (struct sockaddr *)&sa, &salen) == -1) goto error;
+ } else {
+ if (getsockname(fd, (struct sockaddr *)&sa, &salen) == -1) goto error;
+ }
+
+ if (sa.ss_family == AF_INET) {
+ struct sockaddr_in *s = (struct sockaddr_in *)&sa;
+ if (ip) {
+ if (inet_ntop(AF_INET,(void*)&(s->sin_addr),ip,ip_len) == NULL)
+ goto error;
+ }
+ if (port) *port = ntohs(s->sin_port);
+ } else if (sa.ss_family == AF_INET6) {
+ struct sockaddr_in6 *s = (struct sockaddr_in6 *)&sa;
+ if (ip) {
+ if (inet_ntop(AF_INET6,(void*)&(s->sin6_addr),ip,ip_len) == NULL)
+ goto error;
+ }
+ if (port) *port = ntohs(s->sin6_port);
+ } else if (sa.ss_family == AF_UNIX) {
+ if (ip) {
+ int res = snprintf(ip, ip_len, "/unixsocket");
+ if (res < 0 || (unsigned int) res >= ip_len) goto error;
+ }
+ if (port) *port = 0;
+ } else {
+ goto error;
+ }
+ return 0;
+
+error:
+ if (ip) {
+ if (ip_len >= 2) {
+ ip[0] = '?';
+ ip[1] = '\0';
+ } else if (ip_len == 1) {
+ ip[0] = '\0';
+ }
+ }
+ if (port) *port = 0;
+ return -1;
+}
+
+/* Create a pipe buffer with given flags for read end and write end.
+ * Note that it supports the file flags defined by pipe2() and fcntl(F_SETFL),
+ * and one of the use cases is O_CLOEXEC|O_NONBLOCK. */
+int anetPipe(int fds[2], int read_flags, int write_flags) {
+ int pipe_flags = 0;
+#if defined(__linux__) || defined(__FreeBSD__)
+ /* When possible, try to leverage pipe2() to apply flags that are common to both ends.
+ * There is no harm to set O_CLOEXEC to prevent fd leaks. */
+ pipe_flags = O_CLOEXEC | (read_flags & write_flags);
+ if (pipe2(fds, pipe_flags)) {
+ /* Fail on real failures, and fallback to simple pipe if pipe2 is unsupported. */
+ if (errno != ENOSYS && errno != EINVAL)
+ return -1;
+ pipe_flags = 0;
+ } else {
+ /* If the flags on both ends are identical, no need to do anything else. */
+ if ((O_CLOEXEC | read_flags) == (O_CLOEXEC | write_flags))
+ return 0;
+ /* Clear the flags which have already been set using pipe2. */
+ read_flags &= ~pipe_flags;
+ write_flags &= ~pipe_flags;
+ }
+#endif
+
+ /* When we reach here with pipe_flags of 0, it means pipe2 failed (or was not attempted),
+ * so we try to use pipe. Otherwise, we skip and proceed to set specific flags below. */
+ if (pipe_flags == 0 && pipe(fds))
+ return -1;
+
+ /* File descriptor flags.
+ * Currently, only one such flag is defined: FD_CLOEXEC, the close-on-exec flag. */
+ if (read_flags & O_CLOEXEC)
+ if (fcntl(fds[0], F_SETFD, FD_CLOEXEC))
+ goto error;
+ if (write_flags & O_CLOEXEC)
+ if (fcntl(fds[1], F_SETFD, FD_CLOEXEC))
+ goto error;
+
+ /* File status flags after clearing the file descriptor flag O_CLOEXEC. */
+ read_flags &= ~O_CLOEXEC;
+ if (read_flags)
+ if (fcntl(fds[0], F_SETFL, read_flags))
+ goto error;
+ write_flags &= ~O_CLOEXEC;
+ if (write_flags)
+ if (fcntl(fds[1], F_SETFL, write_flags))
+ goto error;
+
+ return 0;
+
+error:
+ close(fds[0]);
+ close(fds[1]);
+ return -1;
+}
+
+int anetSetSockMarkId(char *err, int fd, uint32_t id) {
+#ifdef HAVE_SOCKOPTMARKID
+ if (setsockopt(fd, SOL_SOCKET, SOCKOPTMARKID, (void *)&id, sizeof(id)) == -1) {
+ anetSetError(err, "setsockopt: %s", strerror(errno));
+ return ANET_ERR;
+ }
+ return ANET_OK;
+#else
+ UNUSED(fd);
+ UNUSED(id);
+ anetSetError(err,"anetSetSockMarkid unsupported on this platform");
+ return ANET_OK;
+#endif
+}
+
+int anetIsFifo(char *filepath) {
+ struct stat sb;
+ if (stat(filepath, &sb) == -1) return 0;
+ return S_ISFIFO(sb.st_mode);
+}
diff --git a/src/anet.h b/src/anet.h
new file mode 100644
index 0000000..b13c14f
--- /dev/null
+++ b/src/anet.h
@@ -0,0 +1,75 @@
+/* anet.c -- Basic TCP socket stuff made a bit less boring
+ *
+ * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ANET_H
+#define ANET_H
+
+#include <sys/types.h>
+
+#define ANET_OK 0
+#define ANET_ERR -1
+#define ANET_ERR_LEN 256
+
+/* Flags used with certain functions. */
+#define ANET_NONE 0
+#define ANET_IP_ONLY (1<<0)
+
+#if defined(__sun) || defined(_AIX)
+#define AF_LOCAL AF_UNIX
+#endif
+
+#ifdef _AIX
+#undef ip_len
+#endif
+
+int anetTcpNonBlockConnect(char *err, const char *addr, int port);
+int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port, const char *source_addr);
+int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len, int flags);
+int anetTcpServer(char *err, int port, char *bindaddr, int backlog);
+int anetTcp6Server(char *err, int port, char *bindaddr, int backlog);
+int anetUnixServer(char *err, char *path, mode_t perm, int backlog);
+int anetTcpAccept(char *err, int serversock, char *ip, size_t ip_len, int *port);
+int anetUnixAccept(char *err, int serversock);
+int anetNonBlock(char *err, int fd);
+int anetBlock(char *err, int fd);
+int anetCloexec(int fd);
+int anetEnableTcpNoDelay(char *err, int fd);
+int anetDisableTcpNoDelay(char *err, int fd);
+int anetSendTimeout(char *err, int fd, long long ms);
+int anetRecvTimeout(char *err, int fd, long long ms);
+int anetFdToString(int fd, char *ip, size_t ip_len, int *port, int remote);
+int anetKeepAlive(char *err, int fd, int interval);
+int anetFormatAddr(char *fmt, size_t fmt_len, char *ip, int port);
+int anetPipe(int fds[2], int read_flags, int write_flags);
+int anetSetSockMarkId(char *err, int fd, uint32_t id);
+int anetGetError(int fd);
+int anetIsFifo(char *filepath);
+
+#endif
diff --git a/src/aof.c b/src/aof.c
new file mode 100644
index 0000000..a89142b
--- /dev/null
+++ b/src/aof.c
@@ -0,0 +1,2742 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "bio.h"
+#include "rio.h"
+#include "functions.h"
+
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <sys/param.h>
+
+void freeClientArgv(client *c);
+off_t getAppendOnlyFileSize(sds filename, int *status);
+off_t getBaseAndIncrAppendOnlyFilesSize(aofManifest *am, int *status);
+int getBaseAndIncrAppendOnlyFilesNum(aofManifest *am);
+int aofFileExist(char *filename);
+int rewriteAppendOnlyFile(char *filename);
+aofManifest *aofLoadManifestFromFile(sds am_filepath);
+void aofManifestFreeAndUpdate(aofManifest *am);
+void aof_background_fsync_and_close(int fd);
+
+/* ----------------------------------------------------------------------------
+ * AOF Manifest file implementation.
+ *
+ * The following code implements the read/write logic of AOF manifest file, which
+ * is used to track and manage all AOF files.
+ *
+ * Append-only files consist of three types:
+ *
+ * BASE: Represents a Redis snapshot from the time of last AOF rewrite. The manifest
+ * file contains at most a single BASE file, which will always be the first file in the
+ * list.
+ *
+ * INCR: Represents all write commands executed by Redis following the last successful
+ * AOF rewrite. In some cases it is possible to have several ordered INCR files. For
+ * example:
+ * - During an on-going AOF rewrite
+ * - After an AOF rewrite was aborted/failed, and before the next one succeeded.
+ *
+ * HISTORY: After a successful rewrite, the previous BASE and INCR become HISTORY files.
+ * They will be automatically removed unless garbage collection is disabled.
+ *
+ * The following is a possible AOF manifest file content:
+ *
+ * file appendonly.aof.2.base.rdb seq 2 type b
+ * file appendonly.aof.1.incr.aof seq 1 type h
+ * file appendonly.aof.2.incr.aof seq 2 type h
+ * file appendonly.aof.3.incr.aof seq 3 type h
+ * file appendonly.aof.4.incr.aof seq 4 type i
+ * file appendonly.aof.5.incr.aof seq 5 type i
+ * ------------------------------------------------------------------------- */
+
+/* Naming rules. */
+#define BASE_FILE_SUFFIX ".base"
+#define INCR_FILE_SUFFIX ".incr"
+#define RDB_FORMAT_SUFFIX ".rdb"
+#define AOF_FORMAT_SUFFIX ".aof"
+#define MANIFEST_NAME_SUFFIX ".manifest"
+#define TEMP_FILE_NAME_PREFIX "temp-"
+
+/* AOF manifest key. */
+#define AOF_MANIFEST_KEY_FILE_NAME "file"
+#define AOF_MANIFEST_KEY_FILE_SEQ "seq"
+#define AOF_MANIFEST_KEY_FILE_TYPE "type"
+
+/* Create an empty aofInfo. */
+aofInfo *aofInfoCreate(void) {
+ return zcalloc(sizeof(aofInfo));
+}
+
+/* Free the aofInfo structure (pointed to by ai) and its embedded file_name. */
+void aofInfoFree(aofInfo *ai) {
+ serverAssert(ai != NULL);
+ if (ai->file_name) sdsfree(ai->file_name);
+ zfree(ai);
+}
+
+/* Deep copy an aofInfo. */
+aofInfo *aofInfoDup(aofInfo *orig) {
+ serverAssert(orig != NULL);
+ aofInfo *ai = aofInfoCreate();
+ ai->file_name = sdsdup(orig->file_name);
+ ai->file_seq = orig->file_seq;
+ ai->file_type = orig->file_type;
+ return ai;
+}
+
+/* Format aofInfo as a string and it will be a line in the manifest. */
+sds aofInfoFormat(sds buf, aofInfo *ai) {
+ sds filename_repr = NULL;
+
+ if (sdsneedsrepr(ai->file_name))
+ filename_repr = sdscatrepr(sdsempty(), ai->file_name, sdslen(ai->file_name));
+
+ sds ret = sdscatprintf(buf, "%s %s %s %lld %s %c\n",
+ AOF_MANIFEST_KEY_FILE_NAME, filename_repr ? filename_repr : ai->file_name,
+ AOF_MANIFEST_KEY_FILE_SEQ, ai->file_seq,
+ AOF_MANIFEST_KEY_FILE_TYPE, ai->file_type);
+ sdsfree(filename_repr);
+
+ return ret;
+}
+
+/* Method to free AOF list elements. */
+void aofListFree(void *item) {
+ aofInfo *ai = (aofInfo *)item;
+ aofInfoFree(ai);
+}
+
+/* Method to duplicate AOF list elements. */
+void *aofListDup(void *item) {
+ return aofInfoDup(item);
+}
+
+/* Create an empty aofManifest, which will be called in `aofLoadManifestFromDisk`. */
+aofManifest *aofManifestCreate(void) {
+ aofManifest *am = zcalloc(sizeof(aofManifest));
+ am->incr_aof_list = listCreate();
+ am->history_aof_list = listCreate();
+ listSetFreeMethod(am->incr_aof_list, aofListFree);
+ listSetDupMethod(am->incr_aof_list, aofListDup);
+ listSetFreeMethod(am->history_aof_list, aofListFree);
+ listSetDupMethod(am->history_aof_list, aofListDup);
+ return am;
+}
+
+/* Free the aofManifest structure (pointed to by am) and its embedded members. */
+void aofManifestFree(aofManifest *am) {
+ if (am->base_aof_info) aofInfoFree(am->base_aof_info);
+ if (am->incr_aof_list) listRelease(am->incr_aof_list);
+ if (am->history_aof_list) listRelease(am->history_aof_list);
+ zfree(am);
+}
+
+sds getAofManifestFileName(void) {
+ return sdscatprintf(sdsempty(), "%s%s", server.aof_filename,
+ MANIFEST_NAME_SUFFIX);
+}
+
+sds getTempAofManifestFileName(void) {
+ return sdscatprintf(sdsempty(), "%s%s%s", TEMP_FILE_NAME_PREFIX,
+ server.aof_filename, MANIFEST_NAME_SUFFIX);
+}
+
+/* Returns the string representation of aofManifest pointed to by am.
+ *
+ * The string is multiple lines separated by '\n', and each line represents
+ * an AOF file.
+ *
+ * Each line is space delimited and contains 6 fields, as follows:
+ * "file" [filename] "seq" [sequence] "type" [type]
+ *
+ * Where "file", "seq" and "type" are keywords that describe the next value,
+ * [filename] and [sequence] describe file name and order, and [type] is one
+ * of 'b' (base), 'h' (history) or 'i' (incr).
+ *
+ * The base file, if exists, will always be first, followed by history files,
+ * and incremental files.
+ */
+sds getAofManifestAsString(aofManifest *am) {
+ serverAssert(am != NULL);
+
+ sds buf = sdsempty();
+ listNode *ln;
+ listIter li;
+
+ /* 1. Add BASE File information, it is always at the beginning
+ * of the manifest file. */
+ if (am->base_aof_info) {
+ buf = aofInfoFormat(buf, am->base_aof_info);
+ }
+
+ /* 2. Add HISTORY type AOF information. */
+ listRewind(am->history_aof_list, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ aofInfo *ai = (aofInfo*)ln->value;
+ buf = aofInfoFormat(buf, ai);
+ }
+
+ /* 3. Add INCR type AOF information. */
+ listRewind(am->incr_aof_list, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ aofInfo *ai = (aofInfo*)ln->value;
+ buf = aofInfoFormat(buf, ai);
+ }
+
+ return buf;
+}
+
+/* Load the manifest information from the disk to `server.aof_manifest`
+ * when the Redis server start.
+ *
+ * During loading, this function does strict error checking and will abort
+ * the entire Redis server process on error (I/O error, invalid format, etc.)
+ *
+ * If the AOF directory or manifest file do not exist, this will be ignored
+ * in order to support seamless upgrades from previous versions which did not
+ * use them.
+ */
+void aofLoadManifestFromDisk(void) {
+ server.aof_manifest = aofManifestCreate();
+ if (!dirExists(server.aof_dirname)) {
+ serverLog(LL_DEBUG, "The AOF directory %s doesn't exist", server.aof_dirname);
+ return;
+ }
+
+ sds am_name = getAofManifestFileName();
+ sds am_filepath = makePath(server.aof_dirname, am_name);
+ if (!fileExist(am_filepath)) {
+ serverLog(LL_DEBUG, "The AOF manifest file %s doesn't exist", am_name);
+ sdsfree(am_name);
+ sdsfree(am_filepath);
+ return;
+ }
+
+ aofManifest *am = aofLoadManifestFromFile(am_filepath);
+ if (am) aofManifestFreeAndUpdate(am);
+ sdsfree(am_name);
+ sdsfree(am_filepath);
+}
+
+/* Generic manifest loading function, used in `aofLoadManifestFromDisk` and redis-check-aof tool. */
+#define MANIFEST_MAX_LINE 1024
+aofManifest *aofLoadManifestFromFile(sds am_filepath) {
+ const char *err = NULL;
+ long long maxseq = 0;
+
+ aofManifest *am = aofManifestCreate();
+ FILE *fp = fopen(am_filepath, "r");
+ if (fp == NULL) {
+ serverLog(LL_WARNING, "Fatal error: can't open the AOF manifest "
+ "file %s for reading: %s", am_filepath, strerror(errno));
+ exit(1);
+ }
+
+ char buf[MANIFEST_MAX_LINE+1];
+ sds *argv = NULL;
+ int argc;
+ aofInfo *ai = NULL;
+
+ sds line = NULL;
+ int linenum = 0;
+
+ while (1) {
+ if (fgets(buf, MANIFEST_MAX_LINE+1, fp) == NULL) {
+ if (feof(fp)) {
+ if (linenum == 0) {
+ err = "Found an empty AOF manifest";
+ goto loaderr;
+ } else {
+ break;
+ }
+ } else {
+ err = "Read AOF manifest failed";
+ goto loaderr;
+ }
+ }
+
+ linenum++;
+
+ /* Skip comments lines */
+ if (buf[0] == '#') continue;
+
+ if (strchr(buf, '\n') == NULL) {
+ err = "The AOF manifest file contains too long line";
+ goto loaderr;
+ }
+
+ line = sdstrim(sdsnew(buf), " \t\r\n");
+ if (!sdslen(line)) {
+ err = "Invalid AOF manifest file format";
+ goto loaderr;
+ }
+
+ argv = sdssplitargs(line, &argc);
+ /* 'argc < 6' was done for forward compatibility. */
+ if (argv == NULL || argc < 6 || (argc % 2)) {
+ err = "Invalid AOF manifest file format";
+ goto loaderr;
+ }
+
+ ai = aofInfoCreate();
+ for (int i = 0; i < argc; i += 2) {
+ if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_NAME)) {
+ ai->file_name = sdsnew(argv[i+1]);
+ if (!pathIsBaseName(ai->file_name)) {
+ err = "File can't be a path, just a filename";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_SEQ)) {
+ ai->file_seq = atoll(argv[i+1]);
+ } else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_TYPE)) {
+ ai->file_type = (argv[i+1])[0];
+ }
+ /* else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_OTHER)) {} */
+ }
+
+ /* We have to make sure we load all the information. */
+ if (!ai->file_name || !ai->file_seq || !ai->file_type) {
+ err = "Invalid AOF manifest file format";
+ goto loaderr;
+ }
+
+ sdsfreesplitres(argv, argc);
+ argv = NULL;
+
+ if (ai->file_type == AOF_FILE_TYPE_BASE) {
+ if (am->base_aof_info) {
+ err = "Found duplicate base file information";
+ goto loaderr;
+ }
+ am->base_aof_info = ai;
+ am->curr_base_file_seq = ai->file_seq;
+ } else if (ai->file_type == AOF_FILE_TYPE_HIST) {
+ listAddNodeTail(am->history_aof_list, ai);
+ } else if (ai->file_type == AOF_FILE_TYPE_INCR) {
+ if (ai->file_seq <= maxseq) {
+ err = "Found a non-monotonic sequence number";
+ goto loaderr;
+ }
+ listAddNodeTail(am->incr_aof_list, ai);
+ am->curr_incr_file_seq = ai->file_seq;
+ maxseq = ai->file_seq;
+ } else {
+ err = "Unknown AOF file type";
+ goto loaderr;
+ }
+
+ sdsfree(line);
+ line = NULL;
+ ai = NULL;
+ }
+
+ fclose(fp);
+ return am;
+
+loaderr:
+ /* Sanitizer suppression: may report a false positive if we goto loaderr
+ * and exit(1) without freeing these allocations. */
+ if (argv) sdsfreesplitres(argv, argc);
+ if (ai) aofInfoFree(ai);
+
+ serverLog(LL_WARNING, "\n*** FATAL AOF MANIFEST FILE ERROR ***\n");
+ if (line) {
+ serverLog(LL_WARNING, "Reading the manifest file, at line %d\n", linenum);
+ serverLog(LL_WARNING, ">>> '%s'\n", line);
+ }
+ serverLog(LL_WARNING, "%s\n", err);
+ exit(1);
+}
+
+/* Deep copy an aofManifest from orig.
+ *
+ * In `backgroundRewriteDoneHandler` and `openNewIncrAofForAppend`, we will
+ * first deep copy a temporary AOF manifest from the `server.aof_manifest` and
+ * try to modify it. Once everything is modified, we will atomically make the
+ * `server.aof_manifest` point to this temporary aof_manifest.
+ */
+aofManifest *aofManifestDup(aofManifest *orig) {
+ serverAssert(orig != NULL);
+ aofManifest *am = zcalloc(sizeof(aofManifest));
+
+ am->curr_base_file_seq = orig->curr_base_file_seq;
+ am->curr_incr_file_seq = orig->curr_incr_file_seq;
+ am->dirty = orig->dirty;
+
+ if (orig->base_aof_info) {
+ am->base_aof_info = aofInfoDup(orig->base_aof_info);
+ }
+
+ am->incr_aof_list = listDup(orig->incr_aof_list);
+ am->history_aof_list = listDup(orig->history_aof_list);
+ serverAssert(am->incr_aof_list != NULL);
+ serverAssert(am->history_aof_list != NULL);
+ return am;
+}
+
+/* Change the `server.aof_manifest` pointer to 'am' and free the previous
+ * one if we have. */
+void aofManifestFreeAndUpdate(aofManifest *am) {
+ serverAssert(am != NULL);
+ if (server.aof_manifest) aofManifestFree(server.aof_manifest);
+ server.aof_manifest = am;
+}
+
+/* Called in `backgroundRewriteDoneHandler` to get a new BASE file
+ * name, and mark the previous (if we have) BASE file as HISTORY type.
+ *
+ * BASE file naming rules: `server.aof_filename`.seq.base.format
+ *
+ * for example:
+ * appendonly.aof.1.base.aof (server.aof_use_rdb_preamble is no)
+ * appendonly.aof.1.base.rdb (server.aof_use_rdb_preamble is yes)
+ */
+sds getNewBaseFileNameAndMarkPreAsHistory(aofManifest *am) {
+ serverAssert(am != NULL);
+ if (am->base_aof_info) {
+ serverAssert(am->base_aof_info->file_type == AOF_FILE_TYPE_BASE);
+ am->base_aof_info->file_type = AOF_FILE_TYPE_HIST;
+ listAddNodeHead(am->history_aof_list, am->base_aof_info);
+ }
+
+ char *format_suffix = server.aof_use_rdb_preamble ?
+ RDB_FORMAT_SUFFIX:AOF_FORMAT_SUFFIX;
+
+ aofInfo *ai = aofInfoCreate();
+ ai->file_name = sdscatprintf(sdsempty(), "%s.%lld%s%s", server.aof_filename,
+ ++am->curr_base_file_seq, BASE_FILE_SUFFIX, format_suffix);
+ ai->file_seq = am->curr_base_file_seq;
+ ai->file_type = AOF_FILE_TYPE_BASE;
+ am->base_aof_info = ai;
+ am->dirty = 1;
+ return am->base_aof_info->file_name;
+}
+
+/* Get a new INCR type AOF name.
+ *
+ * INCR AOF naming rules: `server.aof_filename`.seq.incr.aof
+ *
+ * for example:
+ * appendonly.aof.1.incr.aof
+ */
+sds getNewIncrAofName(aofManifest *am) {
+ aofInfo *ai = aofInfoCreate();
+ ai->file_type = AOF_FILE_TYPE_INCR;
+ ai->file_name = sdscatprintf(sdsempty(), "%s.%lld%s%s", server.aof_filename,
+ ++am->curr_incr_file_seq, INCR_FILE_SUFFIX, AOF_FORMAT_SUFFIX);
+ ai->file_seq = am->curr_incr_file_seq;
+ listAddNodeTail(am->incr_aof_list, ai);
+ am->dirty = 1;
+ return ai->file_name;
+}
+
+/* Get temp INCR type AOF name. */
+sds getTempIncrAofName(void) {
+ return sdscatprintf(sdsempty(), "%s%s%s", TEMP_FILE_NAME_PREFIX, server.aof_filename,
+ INCR_FILE_SUFFIX);
+}
+
+/* Get the last INCR AOF name or create a new one. */
+sds getLastIncrAofName(aofManifest *am) {
+ serverAssert(am != NULL);
+
+ /* If 'incr_aof_list' is empty, just create a new one. */
+ if (!listLength(am->incr_aof_list)) {
+ return getNewIncrAofName(am);
+ }
+
+ /* Or return the last one. */
+ listNode *lastnode = listIndex(am->incr_aof_list, -1);
+ aofInfo *ai = listNodeValue(lastnode);
+ return ai->file_name;
+}
+
+/* Called in `backgroundRewriteDoneHandler`. when AOFRW success, This
+ * function will change the AOF file type in 'incr_aof_list' from
+ * AOF_FILE_TYPE_INCR to AOF_FILE_TYPE_HIST, and move them to the
+ * 'history_aof_list'.
+ */
+void markRewrittenIncrAofAsHistory(aofManifest *am) {
+ serverAssert(am != NULL);
+ if (!listLength(am->incr_aof_list)) {
+ return;
+ }
+
+ listNode *ln;
+ listIter li;
+
+ listRewindTail(am->incr_aof_list, &li);
+
+ /* "server.aof_fd != -1" means AOF enabled, then we must skip the
+ * last AOF, because this file is our currently writing. */
+ if (server.aof_fd != -1) {
+ ln = listNext(&li);
+ serverAssert(ln != NULL);
+ }
+
+ /* Move aofInfo from 'incr_aof_list' to 'history_aof_list'. */
+ while ((ln = listNext(&li)) != NULL) {
+ aofInfo *ai = (aofInfo*)ln->value;
+ serverAssert(ai->file_type == AOF_FILE_TYPE_INCR);
+
+ aofInfo *hai = aofInfoDup(ai);
+ hai->file_type = AOF_FILE_TYPE_HIST;
+ listAddNodeHead(am->history_aof_list, hai);
+ listDelNode(am->incr_aof_list, ln);
+ }
+
+ am->dirty = 1;
+}
+
+/* Write the formatted manifest string to disk. */
+int writeAofManifestFile(sds buf) {
+ int ret = C_OK;
+ ssize_t nwritten;
+ int len;
+
+ sds am_name = getAofManifestFileName();
+ sds am_filepath = makePath(server.aof_dirname, am_name);
+ sds tmp_am_name = getTempAofManifestFileName();
+ sds tmp_am_filepath = makePath(server.aof_dirname, tmp_am_name);
+
+ int fd = open(tmp_am_filepath, O_WRONLY|O_TRUNC|O_CREAT, 0644);
+ if (fd == -1) {
+ serverLog(LL_WARNING, "Can't open the AOF manifest file %s: %s",
+ tmp_am_name, strerror(errno));
+
+ ret = C_ERR;
+ goto cleanup;
+ }
+
+ len = sdslen(buf);
+ while(len) {
+ nwritten = write(fd, buf, len);
+
+ if (nwritten < 0) {
+ if (errno == EINTR) continue;
+
+ serverLog(LL_WARNING, "Error trying to write the temporary AOF manifest file %s: %s",
+ tmp_am_name, strerror(errno));
+
+ ret = C_ERR;
+ goto cleanup;
+ }
+
+ len -= nwritten;
+ buf += nwritten;
+ }
+
+ if (redis_fsync(fd) == -1) {
+ serverLog(LL_WARNING, "Fail to fsync the temp AOF file %s: %s.",
+ tmp_am_name, strerror(errno));
+
+ ret = C_ERR;
+ goto cleanup;
+ }
+
+ if (rename(tmp_am_filepath, am_filepath) != 0) {
+ serverLog(LL_WARNING,
+ "Error trying to rename the temporary AOF manifest file %s into %s: %s",
+ tmp_am_name, am_name, strerror(errno));
+
+ ret = C_ERR;
+ goto cleanup;
+ }
+
+ /* Also sync the AOF directory as new AOF files may be added in the directory */
+ if (fsyncFileDir(am_filepath) == -1) {
+ serverLog(LL_WARNING, "Fail to fsync AOF directory %s: %s.",
+ am_filepath, strerror(errno));
+
+ ret = C_ERR;
+ goto cleanup;
+ }
+
+cleanup:
+ if (fd != -1) close(fd);
+ sdsfree(am_name);
+ sdsfree(am_filepath);
+ sdsfree(tmp_am_name);
+ sdsfree(tmp_am_filepath);
+ return ret;
+}
+
+/* Persist the aofManifest information pointed to by am to disk. */
+int persistAofManifest(aofManifest *am) {
+ if (am->dirty == 0) {
+ return C_OK;
+ }
+
+ sds amstr = getAofManifestAsString(am);
+ int ret = writeAofManifestFile(amstr);
+ sdsfree(amstr);
+ if (ret == C_OK) am->dirty = 0;
+ return ret;
+}
+
+/* Called in `loadAppendOnlyFiles` when we upgrade from a old version redis.
+ *
+ * 1) Create AOF directory use 'server.aof_dirname' as the name.
+ * 2) Use 'server.aof_filename' to construct a BASE type aofInfo and add it to
+ * aofManifest, then persist the manifest file to AOF directory.
+ * 3) Move the old AOF file (server.aof_filename) to AOF directory.
+ *
+ * If any of the above steps fails or crash occurs, this will not cause any
+ * problems, and redis will retry the upgrade process when it restarts.
+ */
+void aofUpgradePrepare(aofManifest *am) {
+ serverAssert(!aofFileExist(server.aof_filename));
+
+ /* Create AOF directory use 'server.aof_dirname' as the name. */
+ if (dirCreateIfMissing(server.aof_dirname) == -1) {
+ serverLog(LL_WARNING, "Can't open or create append-only dir %s: %s",
+ server.aof_dirname, strerror(errno));
+ exit(1);
+ }
+
+ /* Manually construct a BASE type aofInfo and add it to aofManifest. */
+ if (am->base_aof_info) aofInfoFree(am->base_aof_info);
+ aofInfo *ai = aofInfoCreate();
+ ai->file_name = sdsnew(server.aof_filename);
+ ai->file_seq = 1;
+ ai->file_type = AOF_FILE_TYPE_BASE;
+ am->base_aof_info = ai;
+ am->curr_base_file_seq = 1;
+ am->dirty = 1;
+
+ /* Persist the manifest file to AOF directory. */
+ if (persistAofManifest(am) != C_OK) {
+ exit(1);
+ }
+
+ /* Move the old AOF file to AOF directory. */
+ sds aof_filepath = makePath(server.aof_dirname, server.aof_filename);
+ if (rename(server.aof_filename, aof_filepath) == -1) {
+ serverLog(LL_WARNING,
+ "Error trying to move the old AOF file %s into dir %s: %s",
+ server.aof_filename,
+ server.aof_dirname,
+ strerror(errno));
+ sdsfree(aof_filepath);
+ exit(1);
+ }
+ sdsfree(aof_filepath);
+
+ serverLog(LL_NOTICE, "Successfully migrated an old-style AOF file (%s) into the AOF directory (%s).",
+ server.aof_filename, server.aof_dirname);
+}
+
+/* When AOFRW success, the previous BASE and INCR AOFs will
+ * become HISTORY type and be moved into 'history_aof_list'.
+ *
+ * The function will traverse the 'history_aof_list' and submit
+ * the delete task to the bio thread.
+ */
+int aofDelHistoryFiles(void) {
+ if (server.aof_manifest == NULL ||
+ server.aof_disable_auto_gc == 1 ||
+ !listLength(server.aof_manifest->history_aof_list))
+ {
+ return C_OK;
+ }
+
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.aof_manifest->history_aof_list, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ aofInfo *ai = (aofInfo*)ln->value;
+ serverAssert(ai->file_type == AOF_FILE_TYPE_HIST);
+ serverLog(LL_NOTICE, "Removing the history file %s in the background", ai->file_name);
+ sds aof_filepath = makePath(server.aof_dirname, ai->file_name);
+ bg_unlink(aof_filepath);
+ sdsfree(aof_filepath);
+ listDelNode(server.aof_manifest->history_aof_list, ln);
+ }
+
+ server.aof_manifest->dirty = 1;
+ return persistAofManifest(server.aof_manifest);
+}
+
+/* Used to clean up temp INCR AOF when AOFRW fails. */
+void aofDelTempIncrAofFile(void) {
+ sds aof_filename = getTempIncrAofName();
+ sds aof_filepath = makePath(server.aof_dirname, aof_filename);
+ serverLog(LL_NOTICE, "Removing the temp incr aof file %s in the background", aof_filename);
+ bg_unlink(aof_filepath);
+ sdsfree(aof_filepath);
+ sdsfree(aof_filename);
+ return;
+}
+
+/* Called after `loadDataFromDisk` when redis start. If `server.aof_state` is
+ * 'AOF_ON', It will do three things:
+ * 1. Force create a BASE file when redis starts with an empty dataset
+ * 2. Open the last opened INCR type AOF for writing, If not, create a new one
+ * 3. Synchronously update the manifest file to the disk
+ *
+ * If any of the above steps fails, the redis process will exit.
+ */
+void aofOpenIfNeededOnServerStart(void) {
+ if (server.aof_state != AOF_ON) {
+ return;
+ }
+
+ serverAssert(server.aof_manifest != NULL);
+ serverAssert(server.aof_fd == -1);
+
+ if (dirCreateIfMissing(server.aof_dirname) == -1) {
+ serverLog(LL_WARNING, "Can't open or create append-only dir %s: %s",
+ server.aof_dirname, strerror(errno));
+ exit(1);
+ }
+
+ /* If we start with an empty dataset, we will force create a BASE file. */
+ size_t incr_aof_len = listLength(server.aof_manifest->incr_aof_list);
+ if (!server.aof_manifest->base_aof_info && !incr_aof_len) {
+ sds base_name = getNewBaseFileNameAndMarkPreAsHistory(server.aof_manifest);
+ sds base_filepath = makePath(server.aof_dirname, base_name);
+ if (rewriteAppendOnlyFile(base_filepath) != C_OK) {
+ exit(1);
+ }
+ sdsfree(base_filepath);
+ serverLog(LL_NOTICE, "Creating AOF base file %s on server start",
+ base_name);
+ }
+
+ /* Because we will 'exit(1)' if open AOF or persistent manifest fails, so
+ * we don't need atomic modification here. */
+ sds aof_name = getLastIncrAofName(server.aof_manifest);
+
+ /* Here we should use 'O_APPEND' flag. */
+ sds aof_filepath = makePath(server.aof_dirname, aof_name);
+ server.aof_fd = open(aof_filepath, O_WRONLY|O_APPEND|O_CREAT, 0644);
+ sdsfree(aof_filepath);
+ if (server.aof_fd == -1) {
+ serverLog(LL_WARNING, "Can't open the append-only file %s: %s",
+ aof_name, strerror(errno));
+ exit(1);
+ }
+
+ /* Persist our changes. */
+ int ret = persistAofManifest(server.aof_manifest);
+ if (ret != C_OK) {
+ exit(1);
+ }
+
+ server.aof_last_incr_size = getAppendOnlyFileSize(aof_name, NULL);
+ server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
+
+ if (incr_aof_len) {
+ serverLog(LL_NOTICE, "Opening AOF incr file %s on server start", aof_name);
+ } else {
+ serverLog(LL_NOTICE, "Creating AOF incr file %s on server start", aof_name);
+ }
+}
+
+int aofFileExist(char *filename) {
+ sds file_path = makePath(server.aof_dirname, filename);
+ int ret = fileExist(file_path);
+ sdsfree(file_path);
+ return ret;
+}
+
+/* Called in `rewriteAppendOnlyFileBackground`. If `server.aof_state`
+ * is 'AOF_ON', It will do two things:
+ * 1. Open a new INCR type AOF for writing
+ * 2. Synchronously update the manifest file to the disk
+ *
+ * The above two steps of modification are atomic, that is, if
+ * any step fails, the entire operation will rollback and returns
+ * C_ERR, and if all succeeds, it returns C_OK.
+ *
+ * If `server.aof_state` is 'AOF_WAIT_REWRITE', It will open a temporary INCR AOF
+ * file to accumulate data during AOF_WAIT_REWRITE, and it will eventually be
+ * renamed in the `backgroundRewriteDoneHandler` and written to the manifest file.
+ * */
+int openNewIncrAofForAppend(void) {
+ serverAssert(server.aof_manifest != NULL);
+ int newfd = -1;
+ aofManifest *temp_am = NULL;
+ sds new_aof_name = NULL;
+
+ /* Only open new INCR AOF when AOF enabled. */
+ if (server.aof_state == AOF_OFF) return C_OK;
+
+ /* Open new AOF. */
+ if (server.aof_state == AOF_WAIT_REWRITE) {
+ /* Use a temporary INCR AOF file to accumulate data during AOF_WAIT_REWRITE. */
+ new_aof_name = getTempIncrAofName();
+ } else {
+ /* Dup a temp aof_manifest to modify. */
+ temp_am = aofManifestDup(server.aof_manifest);
+ new_aof_name = sdsdup(getNewIncrAofName(temp_am));
+ }
+ sds new_aof_filepath = makePath(server.aof_dirname, new_aof_name);
+ newfd = open(new_aof_filepath, O_WRONLY|O_TRUNC|O_CREAT, 0644);
+ sdsfree(new_aof_filepath);
+ if (newfd == -1) {
+ serverLog(LL_WARNING, "Can't open the append-only file %s: %s",
+ new_aof_name, strerror(errno));
+ goto cleanup;
+ }
+
+ if (temp_am) {
+ /* Persist AOF Manifest. */
+ if (persistAofManifest(temp_am) == C_ERR) {
+ goto cleanup;
+ }
+ }
+
+ serverLog(LL_NOTICE, "Creating AOF incr file %s on background rewrite",
+ new_aof_name);
+ sdsfree(new_aof_name);
+
+ /* If reaches here, we can safely modify the `server.aof_manifest`
+ * and `server.aof_fd`. */
+
+ /* fsync and close old aof_fd if needed. In fsync everysec it's ok to delay
+ * the fsync as long as we grantee it happens, and in fsync always the file
+ * is already synced at this point so fsync doesn't matter. */
+ if (server.aof_fd != -1) {
+ aof_background_fsync_and_close(server.aof_fd);
+ server.aof_last_fsync = server.unixtime;
+ }
+ server.aof_fd = newfd;
+
+ /* Reset the aof_last_incr_size. */
+ server.aof_last_incr_size = 0;
+ /* Reset the aof_last_incr_fsync_offset. */
+ server.aof_last_incr_fsync_offset = 0;
+ /* Update `server.aof_manifest`. */
+ if (temp_am) aofManifestFreeAndUpdate(temp_am);
+ return C_OK;
+
+cleanup:
+ if (new_aof_name) sdsfree(new_aof_name);
+ if (newfd != -1) close(newfd);
+ if (temp_am) aofManifestFree(temp_am);
+ return C_ERR;
+}
+
+/* Whether to limit the execution of Background AOF rewrite.
+ *
+ * At present, if AOFRW fails, redis will automatically retry. If it continues
+ * to fail, we may get a lot of very small INCR files. so we need an AOFRW
+ * limiting measure.
+ *
+ * We can't directly use `server.aof_current_size` and `server.aof_last_incr_size`,
+ * because there may be no new writes after AOFRW fails.
+ *
+ * So, we use time delay to achieve our goal. When AOFRW fails, we delay the execution
+ * of the next AOFRW by 1 minute. If the next AOFRW also fails, it will be delayed by 2
+ * minutes. The next is 4, 8, 16, the maximum delay is 60 minutes (1 hour).
+ *
+ * During the limit period, we can still use the 'bgrewriteaof' command to execute AOFRW
+ * immediately.
+ *
+ * Return 1 means that AOFRW is limited and cannot be executed. 0 means that we can execute
+ * AOFRW, which may be that we have reached the 'next_rewrite_time' or the number of INCR
+ * AOFs has not reached the limit threshold.
+ * */
+#define AOF_REWRITE_LIMITE_THRESHOLD 3
+#define AOF_REWRITE_LIMITE_MAX_MINUTES 60 /* 1 hour */
+int aofRewriteLimited(void) {
+ static int next_delay_minutes = 0;
+ static time_t next_rewrite_time = 0;
+
+ if (server.stat_aofrw_consecutive_failures < AOF_REWRITE_LIMITE_THRESHOLD) {
+ /* We may be recovering from limited state, so reset all states. */
+ next_delay_minutes = 0;
+ next_rewrite_time = 0;
+ return 0;
+ }
+
+ /* if it is in the limiting state, then check if the next_rewrite_time is reached */
+ if (next_rewrite_time != 0) {
+ if (server.unixtime < next_rewrite_time) {
+ return 1;
+ } else {
+ next_rewrite_time = 0;
+ return 0;
+ }
+ }
+
+ next_delay_minutes = (next_delay_minutes == 0) ? 1 : (next_delay_minutes * 2);
+ if (next_delay_minutes > AOF_REWRITE_LIMITE_MAX_MINUTES) {
+ next_delay_minutes = AOF_REWRITE_LIMITE_MAX_MINUTES;
+ }
+
+ next_rewrite_time = server.unixtime + next_delay_minutes * 60;
+ serverLog(LL_WARNING,
+ "Background AOF rewrite has repeatedly failed and triggered the limit, will retry in %d minutes", next_delay_minutes);
+ return 1;
+}
+
+/* ----------------------------------------------------------------------------
+ * AOF file implementation
+ * ------------------------------------------------------------------------- */
+
+/* Return true if an AOf fsync is currently already in progress in a
+ * BIO thread. */
+int aofFsyncInProgress(void) {
+ /* Note that we don't care about aof_background_fsync_and_close because
+ * server.aof_fd has been replaced by the new INCR AOF file fd,
+ * see openNewIncrAofForAppend. */
+ return bioPendingJobsOfType(BIO_AOF_FSYNC) != 0;
+}
+
+/* Starts a background task that performs fsync() against the specified
+ * file descriptor (the one of the AOF file) in another thread. */
+void aof_background_fsync(int fd) {
+ bioCreateFsyncJob(fd, server.master_repl_offset, 1);
+}
+
+/* Close the fd on the basis of aof_background_fsync. */
+void aof_background_fsync_and_close(int fd) {
+ bioCreateCloseAofJob(fd, server.master_repl_offset, 1);
+}
+
+/* Kills an AOFRW child process if exists */
+void killAppendOnlyChild(void) {
+ int statloc;
+ /* No AOFRW child? return. */
+ if (server.child_type != CHILD_TYPE_AOF) return;
+ /* Kill AOFRW child, wait for child exit. */
+ serverLog(LL_NOTICE,"Killing running AOF rewrite child: %ld",
+ (long) server.child_pid);
+ if (kill(server.child_pid,SIGUSR1) != -1) {
+ while(waitpid(-1, &statloc, 0) != server.child_pid);
+ }
+ aofRemoveTempFile(server.child_pid);
+ resetChildState();
+ server.aof_rewrite_time_start = -1;
+}
+
+/* Called when the user switches from "appendonly yes" to "appendonly no"
+ * at runtime using the CONFIG command. */
+void stopAppendOnly(void) {
+ serverAssert(server.aof_state != AOF_OFF);
+ flushAppendOnlyFile(1);
+ if (redis_fsync(server.aof_fd) == -1) {
+ serverLog(LL_WARNING,"Fail to fsync the AOF file: %s",strerror(errno));
+ } else {
+ server.aof_last_fsync = server.unixtime;
+ }
+ close(server.aof_fd);
+
+ server.aof_fd = -1;
+ server.aof_selected_db = -1;
+ server.aof_state = AOF_OFF;
+ server.aof_rewrite_scheduled = 0;
+ server.aof_last_incr_size = 0;
+ server.aof_last_incr_fsync_offset = 0;
+ server.fsynced_reploff = -1;
+ atomicSet(server.fsynced_reploff_pending, 0);
+ killAppendOnlyChild();
+ sdsfree(server.aof_buf);
+ server.aof_buf = sdsempty();
+}
+
+/* Called when the user switches from "appendonly no" to "appendonly yes"
+ * at runtime using the CONFIG command. */
+int startAppendOnly(void) {
+ serverAssert(server.aof_state == AOF_OFF);
+
+ server.aof_state = AOF_WAIT_REWRITE;
+ if (hasActiveChildProcess() && server.child_type != CHILD_TYPE_AOF) {
+ server.aof_rewrite_scheduled = 1;
+ serverLog(LL_NOTICE,"AOF was enabled but there is already another background operation. An AOF background was scheduled to start when possible.");
+ } else if (server.in_exec){
+ server.aof_rewrite_scheduled = 1;
+ serverLog(LL_NOTICE,"AOF was enabled during a transaction. An AOF background was scheduled to start when possible.");
+ } else {
+ /* If there is a pending AOF rewrite, we need to switch it off and
+ * start a new one: the old one cannot be reused because it is not
+ * accumulating the AOF buffer. */
+ if (server.child_type == CHILD_TYPE_AOF) {
+ serverLog(LL_NOTICE,"AOF was enabled but there is already an AOF rewriting in background. Stopping background AOF and starting a rewrite now.");
+ killAppendOnlyChild();
+ }
+
+ if (rewriteAppendOnlyFileBackground() == C_ERR) {
+ server.aof_state = AOF_OFF;
+ serverLog(LL_WARNING,"Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.");
+ return C_ERR;
+ }
+ }
+ server.aof_last_fsync = server.unixtime;
+ /* If AOF fsync error in bio job, we just ignore it and log the event. */
+ int aof_bio_fsync_status;
+ atomicGet(server.aof_bio_fsync_status, aof_bio_fsync_status);
+ if (aof_bio_fsync_status == C_ERR) {
+ serverLog(LL_WARNING,
+ "AOF reopen, just ignore the AOF fsync error in bio job");
+ atomicSet(server.aof_bio_fsync_status,C_OK);
+ }
+
+ /* If AOF was in error state, we just ignore it and log the event. */
+ if (server.aof_last_write_status == C_ERR) {
+ serverLog(LL_WARNING,"AOF reopen, just ignore the last error.");
+ server.aof_last_write_status = C_OK;
+ }
+ return C_OK;
+}
+
+/* This is a wrapper to the write syscall in order to retry on short writes
+ * or if the syscall gets interrupted. It could look strange that we retry
+ * on short writes given that we are writing to a block device: normally if
+ * the first call is short, there is a end-of-space condition, so the next
+ * is likely to fail. However apparently in modern systems this is no longer
+ * true, and in general it looks just more resilient to retry the write. If
+ * there is an actual error condition we'll get it at the next try. */
+ssize_t aofWrite(int fd, const char *buf, size_t len) {
+ ssize_t nwritten = 0, totwritten = 0;
+
+ while(len) {
+ nwritten = write(fd, buf, len);
+
+ if (nwritten < 0) {
+ if (errno == EINTR) continue;
+ return totwritten ? totwritten : -1;
+ }
+
+ len -= nwritten;
+ buf += nwritten;
+ totwritten += nwritten;
+ }
+
+ return totwritten;
+}
+
+/* Write the append only file buffer on disk.
+ *
+ * Since we are required to write the AOF before replying to the client,
+ * and the only way the client socket can get a write is entering when
+ * the event loop, we accumulate all the AOF writes in a memory
+ * buffer and write it on disk using this function just before entering
+ * the event loop again.
+ *
+ * About the 'force' argument:
+ *
+ * When the fsync policy is set to 'everysec' we may delay the flush if there
+ * is still an fsync() going on in the background thread, since for instance
+ * on Linux write(2) will be blocked by the background fsync anyway.
+ * When this happens we remember that there is some aof buffer to be
+ * flushed ASAP, and will try to do that in the serverCron() function.
+ *
+ * However if force is set to 1 we'll write regardless of the background
+ * fsync. */
+#define AOF_WRITE_LOG_ERROR_RATE 30 /* Seconds between errors logging. */
+void flushAppendOnlyFile(int force) {
+ ssize_t nwritten;
+ int sync_in_progress = 0;
+ mstime_t latency;
+
+ if (sdslen(server.aof_buf) == 0) {
+ /* Check if we need to do fsync even the aof buffer is empty,
+ * because previously in AOF_FSYNC_EVERYSEC mode, fsync is
+ * called only when aof buffer is not empty, so if users
+ * stop write commands before fsync called in one second,
+ * the data in page cache cannot be flushed in time. */
+ if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
+ server.aof_last_incr_fsync_offset != server.aof_last_incr_size &&
+ server.unixtime > server.aof_last_fsync &&
+ !(sync_in_progress = aofFsyncInProgress())) {
+ goto try_fsync;
+
+ /* Check if we need to do fsync even the aof buffer is empty,
+ * the reason is described in the previous AOF_FSYNC_EVERYSEC block,
+ * and AOF_FSYNC_ALWAYS is also checked here to handle a case where
+ * aof_fsync is changed from everysec to always. */
+ } else if (server.aof_fsync == AOF_FSYNC_ALWAYS &&
+ server.aof_last_incr_fsync_offset != server.aof_last_incr_size)
+ {
+ goto try_fsync;
+ } else {
+ return;
+ }
+ }
+
+ if (server.aof_fsync == AOF_FSYNC_EVERYSEC)
+ sync_in_progress = aofFsyncInProgress();
+
+ if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) {
+ /* With this append fsync policy we do background fsyncing.
+ * If the fsync is still in progress we can try to delay
+ * the write for a couple of seconds. */
+ if (sync_in_progress) {
+ if (server.aof_flush_postponed_start == 0) {
+ /* No previous write postponing, remember that we are
+ * postponing the flush and return. */
+ server.aof_flush_postponed_start = server.unixtime;
+ return;
+ } else if (server.unixtime - server.aof_flush_postponed_start < 2) {
+ /* We were already waiting for fsync to finish, but for less
+ * than two seconds this is still ok. Postpone again. */
+ return;
+ }
+ /* Otherwise fall through, and go write since we can't wait
+ * over two seconds. */
+ server.aof_delayed_fsync++;
+ serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis.");
+ }
+ }
+ /* We want to perform a single write. This should be guaranteed atomic
+ * at least if the filesystem we are writing is a real physical one.
+ * While this will save us against the server being killed I don't think
+ * there is much to do about the whole server stopping for power problems
+ * or alike */
+
+ if (server.aof_flush_sleep && sdslen(server.aof_buf)) {
+ usleep(server.aof_flush_sleep);
+ }
+
+ latencyStartMonitor(latency);
+ nwritten = aofWrite(server.aof_fd,server.aof_buf,sdslen(server.aof_buf));
+ latencyEndMonitor(latency);
+ /* We want to capture different events for delayed writes:
+ * when the delay happens with a pending fsync, or with a saving child
+ * active, and when the above two conditions are missing.
+ * We also use an additional event name to save all samples which is
+ * useful for graphing / monitoring purposes. */
+ if (sync_in_progress) {
+ latencyAddSampleIfNeeded("aof-write-pending-fsync",latency);
+ } else if (hasActiveChildProcess()) {
+ latencyAddSampleIfNeeded("aof-write-active-child",latency);
+ } else {
+ latencyAddSampleIfNeeded("aof-write-alone",latency);
+ }
+ latencyAddSampleIfNeeded("aof-write",latency);
+
+ /* We performed the write so reset the postponed flush sentinel to zero. */
+ server.aof_flush_postponed_start = 0;
+
+ if (nwritten != (ssize_t)sdslen(server.aof_buf)) {
+ static time_t last_write_error_log = 0;
+ int can_log = 0;
+
+ /* Limit logging rate to 1 line per AOF_WRITE_LOG_ERROR_RATE seconds. */
+ if ((server.unixtime - last_write_error_log) > AOF_WRITE_LOG_ERROR_RATE) {
+ can_log = 1;
+ last_write_error_log = server.unixtime;
+ }
+
+ /* Log the AOF write error and record the error code. */
+ if (nwritten == -1) {
+ if (can_log) {
+ serverLog(LL_WARNING,"Error writing to the AOF file: %s",
+ strerror(errno));
+ }
+ server.aof_last_write_errno = errno;
+ } else {
+ if (can_log) {
+ serverLog(LL_WARNING,"Short write while writing to "
+ "the AOF file: (nwritten=%lld, "
+ "expected=%lld)",
+ (long long)nwritten,
+ (long long)sdslen(server.aof_buf));
+ }
+
+ if (ftruncate(server.aof_fd, server.aof_last_incr_size) == -1) {
+ if (can_log) {
+ serverLog(LL_WARNING, "Could not remove short write "
+ "from the append-only file. Redis may refuse "
+ "to load the AOF the next time it starts. "
+ "ftruncate: %s", strerror(errno));
+ }
+ } else {
+ /* If the ftruncate() succeeded we can set nwritten to
+ * -1 since there is no longer partial data into the AOF. */
+ nwritten = -1;
+ }
+ server.aof_last_write_errno = ENOSPC;
+ }
+
+ /* Handle the AOF write error. */
+ if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
+ /* We can't recover when the fsync policy is ALWAYS since the reply
+ * for the client is already in the output buffers (both writes and
+ * reads), and the changes to the db can't be rolled back. Since we
+ * have a contract with the user that on acknowledged or observed
+ * writes are is synced on disk, we must exit. */
+ serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
+ exit(1);
+ } else {
+ /* Recover from failed write leaving data into the buffer. However
+ * set an error to stop accepting writes as long as the error
+ * condition is not cleared. */
+ server.aof_last_write_status = C_ERR;
+
+ /* Trim the sds buffer if there was a partial write, and there
+ * was no way to undo it with ftruncate(2). */
+ if (nwritten > 0) {
+ server.aof_current_size += nwritten;
+ server.aof_last_incr_size += nwritten;
+ sdsrange(server.aof_buf,nwritten,-1);
+ }
+ return; /* We'll try again on the next call... */
+ }
+ } else {
+ /* Successful write(2). If AOF was in error state, restore the
+ * OK state and log the event. */
+ if (server.aof_last_write_status == C_ERR) {
+ serverLog(LL_NOTICE,
+ "AOF write error looks solved, Redis can write again.");
+ server.aof_last_write_status = C_OK;
+ }
+ }
+ server.aof_current_size += nwritten;
+ server.aof_last_incr_size += nwritten;
+
+ /* Re-use AOF buffer when it is small enough. The maximum comes from the
+ * arena size of 4k minus some overhead (but is otherwise arbitrary). */
+ if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) {
+ sdsclear(server.aof_buf);
+ } else {
+ sdsfree(server.aof_buf);
+ server.aof_buf = sdsempty();
+ }
+
+try_fsync:
+ /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are
+ * children doing I/O in the background. */
+ if (server.aof_no_fsync_on_rewrite && hasActiveChildProcess())
+ return;
+
+ /* Perform the fsync if needed. */
+ if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
+ /* redis_fsync is defined as fdatasync() for Linux in order to avoid
+ * flushing metadata. */
+ latencyStartMonitor(latency);
+ /* Let's try to get this data on the disk. To guarantee data safe when
+ * the AOF fsync policy is 'always', we should exit if failed to fsync
+ * AOF (see comment next to the exit(1) after write error above). */
+ if (redis_fsync(server.aof_fd) == -1) {
+ serverLog(LL_WARNING,"Can't persist AOF for fsync error when the "
+ "AOF fsync policy is 'always': %s. Exiting...", strerror(errno));
+ exit(1);
+ }
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("aof-fsync-always",latency);
+ server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
+ server.aof_last_fsync = server.unixtime;
+ atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
+ } else if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
+ server.unixtime > server.aof_last_fsync) {
+ if (!sync_in_progress) {
+ aof_background_fsync(server.aof_fd);
+ server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
+ }
+ server.aof_last_fsync = server.unixtime;
+ }
+}
+
+sds catAppendOnlyGenericCommand(sds dst, int argc, robj **argv) {
+ char buf[32];
+ int len, j;
+ robj *o;
+
+ buf[0] = '*';
+ len = 1+ll2string(buf+1,sizeof(buf)-1,argc);
+ buf[len++] = '\r';
+ buf[len++] = '\n';
+ dst = sdscatlen(dst,buf,len);
+
+ for (j = 0; j < argc; j++) {
+ o = getDecodedObject(argv[j]);
+ buf[0] = '$';
+ len = 1+ll2string(buf+1,sizeof(buf)-1,sdslen(o->ptr));
+ buf[len++] = '\r';
+ buf[len++] = '\n';
+ dst = sdscatlen(dst,buf,len);
+ dst = sdscatlen(dst,o->ptr,sdslen(o->ptr));
+ dst = sdscatlen(dst,"\r\n",2);
+ decrRefCount(o);
+ }
+ return dst;
+}
+
+/* Generate a piece of timestamp annotation for AOF if current record timestamp
+ * in AOF is not equal server unix time. If we specify 'force' argument to 1,
+ * we would generate one without check, currently, it is useful in AOF rewriting
+ * child process which always needs to record one timestamp at the beginning of
+ * rewriting AOF.
+ *
+ * Timestamp annotation format is "#TS:${timestamp}\r\n". "TS" is short of
+ * timestamp and this method could save extra bytes in AOF. */
+sds genAofTimestampAnnotationIfNeeded(int force) {
+ sds ts = NULL;
+
+ if (force || server.aof_cur_timestamp < server.unixtime) {
+ server.aof_cur_timestamp = force ? time(NULL) : server.unixtime;
+ ts = sdscatfmt(sdsempty(), "#TS:%I\r\n", server.aof_cur_timestamp);
+ serverAssert(sdslen(ts) <= AOF_ANNOTATION_LINE_MAX_LEN);
+ }
+ return ts;
+}
+
+/* Write the given command to the aof file.
+ * dictid - dictionary id the command should be applied to,
+ * this is used in order to decide if a `select` command
+ * should also be written to the aof. Value of -1 means
+ * to avoid writing `select` command in any case.
+ * argv - The command to write to the aof.
+ * argc - Number of values in argv
+ */
+void feedAppendOnlyFile(int dictid, robj **argv, int argc) {
+ sds buf = sdsempty();
+
+ serverAssert(dictid == -1 || (dictid >= 0 && dictid < server.dbnum));
+
+ /* Feed timestamp if needed */
+ if (server.aof_timestamp_enabled) {
+ sds ts = genAofTimestampAnnotationIfNeeded(0);
+ if (ts != NULL) {
+ buf = sdscatsds(buf, ts);
+ sdsfree(ts);
+ }
+ }
+
+ /* The DB this command was targeting is not the same as the last command
+ * we appended. To issue a SELECT command is needed. */
+ if (dictid != -1 && dictid != server.aof_selected_db) {
+ char seldb[64];
+
+ snprintf(seldb,sizeof(seldb),"%d",dictid);
+ buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
+ (unsigned long)strlen(seldb),seldb);
+ server.aof_selected_db = dictid;
+ }
+
+ /* All commands should be propagated the same way in AOF as in replication.
+ * No need for AOF-specific translation. */
+ buf = catAppendOnlyGenericCommand(buf,argc,argv);
+
+ /* Append to the AOF buffer. This will be flushed on disk just before
+ * of re-entering the event loop, so before the client will get a
+ * positive reply about the operation performed. */
+ if (server.aof_state == AOF_ON ||
+ (server.aof_state == AOF_WAIT_REWRITE && server.child_type == CHILD_TYPE_AOF))
+ {
+ server.aof_buf = sdscatlen(server.aof_buf, buf, sdslen(buf));
+ }
+
+ sdsfree(buf);
+}
+
+/* ----------------------------------------------------------------------------
+ * AOF loading
+ * ------------------------------------------------------------------------- */
+
+/* In Redis commands are always executed in the context of a client, so in
+ * order to load the append only file we need to create a fake client. */
+struct client *createAOFClient(void) {
+ struct client *c = createClient(NULL);
+
+ c->id = CLIENT_ID_AOF; /* So modules can identify it's the AOF client. */
+
+ /*
+ * The AOF client should never be blocked (unlike master
+ * replication connection).
+ * This is because blocking the AOF client might cause
+ * deadlock (because potentially no one will unblock it).
+ * Also, if the AOF client will be blocked just for
+ * background processing there is a chance that the
+ * command execution order will be violated.
+ */
+ c->flags = CLIENT_DENY_BLOCKING;
+
+ /* We set the fake client as a slave waiting for the synchronization
+ * so that Redis will not try to send replies to this client. */
+ c->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
+ return c;
+}
+
+/* Replay an append log file. On success AOF_OK or AOF_TRUNCATED is returned,
+ * otherwise, one of the following is returned:
+ * AOF_OPEN_ERR: Failed to open the AOF file.
+ * AOF_NOT_EXIST: AOF file doesn't exist.
+ * AOF_EMPTY: The AOF file is empty (nothing to load).
+ * AOF_FAILED: Failed to load the AOF file. */
+int loadSingleAppendOnlyFile(char *filename) {
+ struct client *fakeClient;
+ struct redis_stat sb;
+ int old_aof_state = server.aof_state;
+ long loops = 0;
+ off_t valid_up_to = 0; /* Offset of latest well-formed command loaded. */
+ off_t valid_before_multi = 0; /* Offset before MULTI command loaded. */
+ off_t last_progress_report_size = 0;
+ int ret = AOF_OK;
+
+ sds aof_filepath = makePath(server.aof_dirname, filename);
+ FILE *fp = fopen(aof_filepath, "r");
+ if (fp == NULL) {
+ int en = errno;
+ if (redis_stat(aof_filepath, &sb) == 0 || errno != ENOENT) {
+ serverLog(LL_WARNING,"Fatal error: can't open the append log file %s for reading: %s", filename, strerror(en));
+ sdsfree(aof_filepath);
+ return AOF_OPEN_ERR;
+ } else {
+ serverLog(LL_WARNING,"The append log file %s doesn't exist: %s", filename, strerror(errno));
+ sdsfree(aof_filepath);
+ return AOF_NOT_EXIST;
+ }
+ }
+
+ if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) {
+ fclose(fp);
+ sdsfree(aof_filepath);
+ return AOF_EMPTY;
+ }
+
+ /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
+ * to the same file we're about to read. */
+ server.aof_state = AOF_OFF;
+
+ client *old_cur_client = server.current_client;
+ client *old_exec_client = server.executing_client;
+ fakeClient = createAOFClient();
+ server.current_client = server.executing_client = fakeClient;
+
+ /* Check if the AOF file is in RDB format (it may be RDB encoded base AOF
+ * or old style RDB-preamble AOF). In that case we need to load the RDB file
+ * and later continue loading the AOF tail if it is an old style RDB-preamble AOF. */
+ char sig[5]; /* "REDIS" */
+ if (fread(sig,1,5,fp) != 5 || memcmp(sig,"REDIS",5) != 0) {
+ /* Not in RDB format, seek back at 0 offset. */
+ if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
+ } else {
+ /* RDB format. Pass loading the RDB functions. */
+ rio rdb;
+ int old_style = !strcmp(filename, server.aof_filename);
+ if (old_style)
+ serverLog(LL_NOTICE, "Reading RDB preamble from AOF file...");
+ else
+ serverLog(LL_NOTICE, "Reading RDB base file on AOF loading...");
+
+ if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
+ rioInitWithFile(&rdb,fp);
+ if (rdbLoadRio(&rdb,RDBFLAGS_AOF_PREAMBLE,NULL) != C_OK) {
+ if (old_style)
+ serverLog(LL_WARNING, "Error reading the RDB preamble of the AOF file %s, AOF loading aborted", filename);
+ else
+ serverLog(LL_WARNING, "Error reading the RDB base file %s, AOF loading aborted", filename);
+
+ ret = AOF_FAILED;
+ goto cleanup;
+ } else {
+ loadingAbsProgress(ftello(fp));
+ last_progress_report_size = ftello(fp);
+ if (old_style) serverLog(LL_NOTICE, "Reading the remaining AOF tail...");
+ }
+ }
+
+ /* Read the actual AOF file, in REPL format, command by command. */
+ while(1) {
+ int argc, j;
+ unsigned long len;
+ robj **argv;
+ char buf[AOF_ANNOTATION_LINE_MAX_LEN];
+ sds argsds;
+ struct redisCommand *cmd;
+
+ /* Serve the clients from time to time */
+ if (!(loops++ % 1024)) {
+ off_t progress_delta = ftello(fp) - last_progress_report_size;
+ loadingIncrProgress(progress_delta);
+ last_progress_report_size += progress_delta;
+ processEventsWhileBlocked();
+ processModuleLoadingProgressEvent(1);
+ }
+ if (fgets(buf,sizeof(buf),fp) == NULL) {
+ if (feof(fp)) {
+ break;
+ } else {
+ goto readerr;
+ }
+ }
+ if (buf[0] == '#') continue; /* Skip annotations */
+ if (buf[0] != '*') goto fmterr;
+ if (buf[1] == '\0') goto readerr;
+ argc = atoi(buf+1);
+ if (argc < 1) goto fmterr;
+ if ((size_t)argc > SIZE_MAX / sizeof(robj*)) goto fmterr;
+
+ /* Load the next command in the AOF as our fake client
+ * argv. */
+ argv = zmalloc(sizeof(robj*)*argc);
+ fakeClient->argc = argc;
+ fakeClient->argv = argv;
+ fakeClient->argv_len = argc;
+
+ for (j = 0; j < argc; j++) {
+ /* Parse the argument len. */
+ char *readres = fgets(buf,sizeof(buf),fp);
+ if (readres == NULL || buf[0] != '$') {
+ fakeClient->argc = j; /* Free up to j-1. */
+ freeClientArgv(fakeClient);
+ if (readres == NULL)
+ goto readerr;
+ else
+ goto fmterr;
+ }
+ len = strtol(buf+1,NULL,10);
+
+ /* Read it into a string object. */
+ argsds = sdsnewlen(SDS_NOINIT,len);
+ if (len && fread(argsds,len,1,fp) == 0) {
+ sdsfree(argsds);
+ fakeClient->argc = j; /* Free up to j-1. */
+ freeClientArgv(fakeClient);
+ goto readerr;
+ }
+ argv[j] = createObject(OBJ_STRING,argsds);
+
+ /* Discard CRLF. */
+ if (fread(buf,2,1,fp) == 0) {
+ fakeClient->argc = j+1; /* Free up to j. */
+ freeClientArgv(fakeClient);
+ goto readerr;
+ }
+ }
+
+ /* Command lookup */
+ cmd = lookupCommand(argv,argc);
+ if (!cmd) {
+ serverLog(LL_WARNING,
+ "Unknown command '%s' reading the append only file %s",
+ (char*)argv[0]->ptr, filename);
+ freeClientArgv(fakeClient);
+ ret = AOF_FAILED;
+ goto cleanup;
+ }
+
+ if (cmd->proc == multiCommand) valid_before_multi = valid_up_to;
+
+ /* Run the command in the context of a fake client */
+ fakeClient->cmd = fakeClient->lastcmd = cmd;
+ if (fakeClient->flags & CLIENT_MULTI &&
+ fakeClient->cmd->proc != execCommand)
+ {
+ /* Note: we don't have to attempt calling evalGetCommandFlags,
+ * since this is AOF, the checks in processCommand are not made
+ * anyway.*/
+ queueMultiCommand(fakeClient, cmd->flags);
+ } else {
+ cmd->proc(fakeClient);
+ }
+
+ /* The fake client should not have a reply */
+ serverAssert(fakeClient->bufpos == 0 &&
+ listLength(fakeClient->reply) == 0);
+
+ /* The fake client should never get blocked */
+ serverAssert((fakeClient->flags & CLIENT_BLOCKED) == 0);
+
+ /* Clean up. Command code may have changed argv/argc so we use the
+ * argv/argc of the client instead of the local variables. */
+ freeClientArgv(fakeClient);
+ if (server.aof_load_truncated) valid_up_to = ftello(fp);
+ if (server.key_load_delay)
+ debugDelay(server.key_load_delay);
+ }
+
+ /* This point can only be reached when EOF is reached without errors.
+ * If the client is in the middle of a MULTI/EXEC, handle it as it was
+ * a short read, even if technically the protocol is correct: we want
+ * to remove the unprocessed tail and continue. */
+ if (fakeClient->flags & CLIENT_MULTI) {
+ serverLog(LL_WARNING,
+ "Revert incomplete MULTI/EXEC transaction in AOF file %s", filename);
+ valid_up_to = valid_before_multi;
+ goto uxeof;
+ }
+
+loaded_ok: /* DB loaded, cleanup and return success (AOF_OK or AOF_TRUNCATED). */
+ loadingIncrProgress(ftello(fp) - last_progress_report_size);
+ server.aof_state = old_aof_state;
+ goto cleanup;
+
+readerr: /* Read error. If feof(fp) is true, fall through to unexpected EOF. */
+ if (!feof(fp)) {
+ serverLog(LL_WARNING,"Unrecoverable error reading the append only file %s: %s", filename, strerror(errno));
+ ret = AOF_FAILED;
+ goto cleanup;
+ }
+
+uxeof: /* Unexpected AOF end of file. */
+ if (server.aof_load_truncated) {
+ serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file %s!!!", filename);
+ serverLog(LL_WARNING,"!!! Truncating the AOF %s at offset %llu !!!",
+ filename, (unsigned long long) valid_up_to);
+ if (valid_up_to == -1 || truncate(aof_filepath,valid_up_to) == -1) {
+ if (valid_up_to == -1) {
+ serverLog(LL_WARNING,"Last valid command offset is invalid");
+ } else {
+ serverLog(LL_WARNING,"Error truncating the AOF file %s: %s",
+ filename, strerror(errno));
+ }
+ } else {
+ /* Make sure the AOF file descriptor points to the end of the
+ * file after the truncate call. */
+ if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) {
+ serverLog(LL_WARNING,"Can't seek the end of the AOF file %s: %s",
+ filename, strerror(errno));
+ } else {
+ serverLog(LL_WARNING,
+ "AOF %s loaded anyway because aof-load-truncated is enabled", filename);
+ ret = AOF_TRUNCATED;
+ goto loaded_ok;
+ }
+ }
+ }
+ serverLog(LL_WARNING, "Unexpected end of file reading the append only file %s. You can: "
+ "1) Make a backup of your AOF file, then use ./redis-check-aof --fix <filename.manifest>. "
+ "2) Alternatively you can set the 'aof-load-truncated' configuration option to yes and restart the server.", filename);
+ ret = AOF_FAILED;
+ goto cleanup;
+
+fmterr: /* Format error. */
+ serverLog(LL_WARNING, "Bad file format reading the append only file %s: "
+ "make a backup of your AOF file, then use ./redis-check-aof --fix <filename.manifest>", filename);
+ ret = AOF_FAILED;
+ /* fall through to cleanup. */
+
+cleanup:
+ if (fakeClient) freeClient(fakeClient);
+ server.current_client = old_cur_client;
+ server.executing_client = old_exec_client;
+ fclose(fp);
+ sdsfree(aof_filepath);
+ return ret;
+}
+
+/* Load the AOF files according the aofManifest pointed by am. */
+int loadAppendOnlyFiles(aofManifest *am) {
+ serverAssert(am != NULL);
+ int status, ret = AOF_OK;
+ long long start;
+ off_t total_size = 0, base_size = 0;
+ sds aof_name;
+ int total_num, aof_num = 0, last_file;
+
+ /* If the 'server.aof_filename' file exists in dir, we may be starting
+ * from an old redis version. We will use enter upgrade mode in three situations.
+ *
+ * 1. If the 'server.aof_dirname' directory not exist
+ * 2. If the 'server.aof_dirname' directory exists but the manifest file is missing
+ * 3. If the 'server.aof_dirname' directory exists and the manifest file it contains
+ * has only one base AOF record, and the file name of this base AOF is 'server.aof_filename',
+ * and the 'server.aof_filename' file not exist in 'server.aof_dirname' directory
+ * */
+ if (fileExist(server.aof_filename)) {
+ if (!dirExists(server.aof_dirname) ||
+ (am->base_aof_info == NULL && listLength(am->incr_aof_list) == 0) ||
+ (am->base_aof_info != NULL && listLength(am->incr_aof_list) == 0 &&
+ !strcmp(am->base_aof_info->file_name, server.aof_filename) && !aofFileExist(server.aof_filename)))
+ {
+ aofUpgradePrepare(am);
+ }
+ }
+
+ if (am->base_aof_info == NULL && listLength(am->incr_aof_list) == 0) {
+ return AOF_NOT_EXIST;
+ }
+
+ total_num = getBaseAndIncrAppendOnlyFilesNum(am);
+ serverAssert(total_num > 0);
+
+ /* Here we calculate the total size of all BASE and INCR files in
+ * advance, it will be set to `server.loading_total_bytes`. */
+ total_size = getBaseAndIncrAppendOnlyFilesSize(am, &status);
+ if (status != AOF_OK) {
+ /* If an AOF exists in the manifest but not on the disk, we consider this to be a fatal error. */
+ if (status == AOF_NOT_EXIST) status = AOF_FAILED;
+
+ return status;
+ } else if (total_size == 0) {
+ return AOF_EMPTY;
+ }
+
+ startLoading(total_size, RDBFLAGS_AOF_PREAMBLE, 0);
+
+ /* Load BASE AOF if needed. */
+ if (am->base_aof_info) {
+ serverAssert(am->base_aof_info->file_type == AOF_FILE_TYPE_BASE);
+ aof_name = (char*)am->base_aof_info->file_name;
+ updateLoadingFileName(aof_name);
+ base_size = getAppendOnlyFileSize(aof_name, NULL);
+ last_file = ++aof_num == total_num;
+ start = ustime();
+ ret = loadSingleAppendOnlyFile(aof_name);
+ if (ret == AOF_OK || (ret == AOF_TRUNCATED && last_file)) {
+ serverLog(LL_NOTICE, "DB loaded from base file %s: %.3f seconds",
+ aof_name, (float)(ustime()-start)/1000000);
+ }
+
+ /* If the truncated file is not the last file, we consider this to be a fatal error. */
+ if (ret == AOF_TRUNCATED && !last_file) {
+ ret = AOF_FAILED;
+ serverLog(LL_WARNING, "Fatal error: the truncated file is not the last file");
+ }
+
+ if (ret == AOF_OPEN_ERR || ret == AOF_FAILED) {
+ goto cleanup;
+ }
+ }
+
+ /* Load INCR AOFs if needed. */
+ if (listLength(am->incr_aof_list)) {
+ listNode *ln;
+ listIter li;
+
+ listRewind(am->incr_aof_list, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ aofInfo *ai = (aofInfo*)ln->value;
+ serverAssert(ai->file_type == AOF_FILE_TYPE_INCR);
+ aof_name = (char*)ai->file_name;
+ updateLoadingFileName(aof_name);
+ last_file = ++aof_num == total_num;
+ start = ustime();
+ ret = loadSingleAppendOnlyFile(aof_name);
+ if (ret == AOF_OK || (ret == AOF_TRUNCATED && last_file)) {
+ serverLog(LL_NOTICE, "DB loaded from incr file %s: %.3f seconds",
+ aof_name, (float)(ustime()-start)/1000000);
+ }
+
+ /* We know that (at least) one of the AOF files has data (total_size > 0),
+ * so empty incr AOF file doesn't count as a AOF_EMPTY result */
+ if (ret == AOF_EMPTY) ret = AOF_OK;
+
+ /* If the truncated file is not the last file, we consider this to be a fatal error. */
+ if (ret == AOF_TRUNCATED && !last_file) {
+ ret = AOF_FAILED;
+ serverLog(LL_WARNING, "Fatal error: the truncated file is not the last file");
+ }
+
+ if (ret == AOF_OPEN_ERR || ret == AOF_FAILED) {
+ goto cleanup;
+ }
+ }
+ }
+
+ server.aof_current_size = total_size;
+ /* Ideally, the aof_rewrite_base_size variable should hold the size of the
+ * AOF when the last rewrite ended, this should include the size of the
+ * incremental file that was created during the rewrite since otherwise we
+ * risk the next automatic rewrite to happen too soon (or immediately if
+ * auto-aof-rewrite-percentage is low). However, since we do not persist
+ * aof_rewrite_base_size information anywhere, we initialize it on restart
+ * to the size of BASE AOF file. This might cause the first AOFRW to be
+ * executed early, but that shouldn't be a problem since everything will be
+ * fine after the first AOFRW. */
+ server.aof_rewrite_base_size = base_size;
+
+cleanup:
+ stopLoading(ret == AOF_OK || ret == AOF_TRUNCATED);
+ return ret;
+}
+
+/* ----------------------------------------------------------------------------
+ * AOF rewrite
+ * ------------------------------------------------------------------------- */
+
+/* Delegate writing an object to writing a bulk string or bulk long long.
+ * This is not placed in rio.c since that adds the server.h dependency. */
+int rioWriteBulkObject(rio *r, robj *obj) {
+ /* Avoid using getDecodedObject to help copy-on-write (we are often
+ * in a child process when this function is called). */
+ if (obj->encoding == OBJ_ENCODING_INT) {
+ return rioWriteBulkLongLong(r,(long)obj->ptr);
+ } else if (sdsEncodedObject(obj)) {
+ return rioWriteBulkString(r,obj->ptr,sdslen(obj->ptr));
+ } else {
+ serverPanic("Unknown string encoding");
+ }
+}
+
+/* Emit the commands needed to rebuild a list object.
+ * The function returns 0 on error, 1 on success. */
+int rewriteListObject(rio *r, robj *key, robj *o) {
+ long long count = 0, items = listTypeLength(o);
+
+ listTypeIterator *li = listTypeInitIterator(o,0,LIST_TAIL);
+ listTypeEntry entry;
+ while (listTypeNext(li,&entry)) {
+ if (count == 0) {
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
+ if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
+ !rioWriteBulkString(r,"RPUSH",5) ||
+ !rioWriteBulkObject(r,key))
+ {
+ listTypeReleaseIterator(li);
+ return 0;
+ }
+ }
+
+ unsigned char *vstr;
+ size_t vlen;
+ long long lval;
+ vstr = listTypeGetValue(&entry,&vlen,&lval);
+ if (vstr) {
+ if (!rioWriteBulkString(r,(char*)vstr,vlen)) {
+ listTypeReleaseIterator(li);
+ return 0;
+ }
+ } else {
+ if (!rioWriteBulkLongLong(r,lval)) {
+ listTypeReleaseIterator(li);
+ return 0;
+ }
+ }
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ items--;
+ }
+ listTypeReleaseIterator(li);
+ return 1;
+}
+
+/* Emit the commands needed to rebuild a set object.
+ * The function returns 0 on error, 1 on success. */
+int rewriteSetObject(rio *r, robj *key, robj *o) {
+ long long count = 0, items = setTypeSize(o);
+ setTypeIterator *si = setTypeInitIterator(o);
+ char *str;
+ size_t len;
+ int64_t llval;
+ while (setTypeNext(si, &str, &len, &llval) != -1) {
+ if (count == 0) {
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
+ if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
+ !rioWriteBulkString(r,"SADD",4) ||
+ !rioWriteBulkObject(r,key))
+ {
+ return 0;
+ }
+ }
+ size_t written = str ?
+ rioWriteBulkString(r, str, len) : rioWriteBulkLongLong(r, llval);
+ if (!written) {
+ setTypeReleaseIterator(si);
+ return 0;
+ }
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ items--;
+ }
+ setTypeReleaseIterator(si);
+ return 1;
+}
+
+/* Emit the commands needed to rebuild a sorted set object.
+ * The function returns 0 on error, 1 on success. */
+int rewriteSortedSetObject(rio *r, robj *key, robj *o) {
+ long long count = 0, items = zsetLength(o);
+
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = o->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vll;
+ double score;
+
+ eptr = lpSeek(zl,0);
+ serverAssert(eptr != NULL);
+ sptr = lpNext(zl,eptr);
+ serverAssert(sptr != NULL);
+
+ while (eptr != NULL) {
+ vstr = lpGetValue(eptr,&vlen,&vll);
+ score = zzlGetScore(sptr);
+
+ if (count == 0) {
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
+
+ if (!rioWriteBulkCount(r,'*',2+cmd_items*2) ||
+ !rioWriteBulkString(r,"ZADD",4) ||
+ !rioWriteBulkObject(r,key))
+ {
+ return 0;
+ }
+ }
+ if (!rioWriteBulkDouble(r,score)) return 0;
+ if (vstr != NULL) {
+ if (!rioWriteBulkString(r,(char*)vstr,vlen)) return 0;
+ } else {
+ if (!rioWriteBulkLongLong(r,vll)) return 0;
+ }
+ zzlNext(zl,&eptr,&sptr);
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ items--;
+ }
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = o->ptr;
+ dictIterator *di = dictGetIterator(zs->dict);
+ dictEntry *de;
+
+ while((de = dictNext(di)) != NULL) {
+ sds ele = dictGetKey(de);
+ double *score = dictGetVal(de);
+
+ if (count == 0) {
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
+
+ if (!rioWriteBulkCount(r,'*',2+cmd_items*2) ||
+ !rioWriteBulkString(r,"ZADD",4) ||
+ !rioWriteBulkObject(r,key))
+ {
+ dictReleaseIterator(di);
+ return 0;
+ }
+ }
+ if (!rioWriteBulkDouble(r,*score) ||
+ !rioWriteBulkString(r,ele,sdslen(ele)))
+ {
+ dictReleaseIterator(di);
+ return 0;
+ }
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ items--;
+ }
+ dictReleaseIterator(di);
+ } else {
+ serverPanic("Unknown sorted zset encoding");
+ }
+ return 1;
+}
+
+/* Write either the key or the value of the currently selected item of a hash.
+ * The 'hi' argument passes a valid Redis hash iterator.
+ * The 'what' filed specifies if to write a key or a value and can be
+ * either OBJ_HASH_KEY or OBJ_HASH_VALUE.
+ *
+ * The function returns 0 on error, non-zero on success. */
+static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) {
+ if (hi->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *vstr = NULL;
+ unsigned int vlen = UINT_MAX;
+ long long vll = LLONG_MAX;
+
+ hashTypeCurrentFromListpack(hi, what, &vstr, &vlen, &vll);
+ if (vstr)
+ return rioWriteBulkString(r, (char*)vstr, vlen);
+ else
+ return rioWriteBulkLongLong(r, vll);
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ sds value = hashTypeCurrentFromHashTable(hi, what);
+ return rioWriteBulkString(r, value, sdslen(value));
+ }
+
+ serverPanic("Unknown hash encoding");
+ return 0;
+}
+
+/* Emit the commands needed to rebuild a hash object.
+ * The function returns 0 on error, 1 on success. */
+int rewriteHashObject(rio *r, robj *key, robj *o) {
+ hashTypeIterator *hi;
+ long long count = 0, items = hashTypeLength(o);
+
+ hi = hashTypeInitIterator(o);
+ while (hashTypeNext(hi) != C_ERR) {
+ if (count == 0) {
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
+
+ if (!rioWriteBulkCount(r,'*',2+cmd_items*2) ||
+ !rioWriteBulkString(r,"HMSET",5) ||
+ !rioWriteBulkObject(r,key))
+ {
+ hashTypeReleaseIterator(hi);
+ return 0;
+ }
+ }
+
+ if (!rioWriteHashIteratorCursor(r, hi, OBJ_HASH_KEY) ||
+ !rioWriteHashIteratorCursor(r, hi, OBJ_HASH_VALUE))
+ {
+ hashTypeReleaseIterator(hi);
+ return 0;
+ }
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ items--;
+ }
+
+ hashTypeReleaseIterator(hi);
+
+ return 1;
+}
+
+/* Helper for rewriteStreamObject() that generates a bulk string into the
+ * AOF representing the ID 'id'. */
+int rioWriteBulkStreamID(rio *r,streamID *id) {
+ int retval;
+
+ sds replyid = sdscatfmt(sdsempty(),"%U-%U",id->ms,id->seq);
+ retval = rioWriteBulkString(r,replyid,sdslen(replyid));
+ sdsfree(replyid);
+ return retval;
+}
+
+/* Helper for rewriteStreamObject(): emit the XCLAIM needed in order to
+ * add the message described by 'nack' having the id 'rawid', into the pending
+ * list of the specified consumer. All this in the context of the specified
+ * key and group. */
+int rioWriteStreamPendingEntry(rio *r, robj *key, const char *groupname, size_t groupname_len, streamConsumer *consumer, unsigned char *rawid, streamNACK *nack) {
+ /* XCLAIM <key> <group> <consumer> 0 <id> TIME <milliseconds-unix-time>
+ RETRYCOUNT <count> JUSTID FORCE. */
+ streamID id;
+ streamDecodeID(rawid,&id);
+ if (rioWriteBulkCount(r,'*',12) == 0) return 0;
+ if (rioWriteBulkString(r,"XCLAIM",6) == 0) return 0;
+ if (rioWriteBulkObject(r,key) == 0) return 0;
+ if (rioWriteBulkString(r,groupname,groupname_len) == 0) return 0;
+ if (rioWriteBulkString(r,consumer->name,sdslen(consumer->name)) == 0) return 0;
+ if (rioWriteBulkString(r,"0",1) == 0) return 0;
+ if (rioWriteBulkStreamID(r,&id) == 0) return 0;
+ if (rioWriteBulkString(r,"TIME",4) == 0) return 0;
+ if (rioWriteBulkLongLong(r,nack->delivery_time) == 0) return 0;
+ if (rioWriteBulkString(r,"RETRYCOUNT",10) == 0) return 0;
+ if (rioWriteBulkLongLong(r,nack->delivery_count) == 0) return 0;
+ if (rioWriteBulkString(r,"JUSTID",6) == 0) return 0;
+ if (rioWriteBulkString(r,"FORCE",5) == 0) return 0;
+ return 1;
+}
+
+/* Helper for rewriteStreamObject(): emit the XGROUP CREATECONSUMER is
+ * needed in order to create consumers that do not have any pending entries.
+ * All this in the context of the specified key and group. */
+int rioWriteStreamEmptyConsumer(rio *r, robj *key, const char *groupname, size_t groupname_len, streamConsumer *consumer) {
+ /* XGROUP CREATECONSUMER <key> <group> <consumer> */
+ if (rioWriteBulkCount(r,'*',5) == 0) return 0;
+ if (rioWriteBulkString(r,"XGROUP",6) == 0) return 0;
+ if (rioWriteBulkString(r,"CREATECONSUMER",14) == 0) return 0;
+ if (rioWriteBulkObject(r,key) == 0) return 0;
+ if (rioWriteBulkString(r,groupname,groupname_len) == 0) return 0;
+ if (rioWriteBulkString(r,consumer->name,sdslen(consumer->name)) == 0) return 0;
+ return 1;
+}
+
+/* Emit the commands needed to rebuild a stream object.
+ * The function returns 0 on error, 1 on success. */
+int rewriteStreamObject(rio *r, robj *key, robj *o) {
+ stream *s = o->ptr;
+ streamIterator si;
+ streamIteratorStart(&si,s,NULL,NULL,0);
+ streamID id;
+ int64_t numfields;
+
+ if (s->length) {
+ /* Reconstruct the stream data using XADD commands. */
+ while(streamIteratorGetID(&si,&id,&numfields)) {
+ /* Emit a two elements array for each item. The first is
+ * the ID, the second is an array of field-value pairs. */
+
+ /* Emit the XADD <key> <id> ...fields... command. */
+ if (!rioWriteBulkCount(r,'*',3+numfields*2) ||
+ !rioWriteBulkString(r,"XADD",4) ||
+ !rioWriteBulkObject(r,key) ||
+ !rioWriteBulkStreamID(r,&id))
+ {
+ streamIteratorStop(&si);
+ return 0;
+ }
+ while(numfields--) {
+ unsigned char *field, *value;
+ int64_t field_len, value_len;
+ streamIteratorGetField(&si,&field,&value,&field_len,&value_len);
+ if (!rioWriteBulkString(r,(char*)field,field_len) ||
+ !rioWriteBulkString(r,(char*)value,value_len))
+ {
+ streamIteratorStop(&si);
+ return 0;
+ }
+ }
+ }
+ } else {
+ /* Use the XADD MAXLEN 0 trick to generate an empty stream if
+ * the key we are serializing is an empty string, which is possible
+ * for the Stream type. */
+ id.ms = 0; id.seq = 1;
+ if (!rioWriteBulkCount(r,'*',7) ||
+ !rioWriteBulkString(r,"XADD",4) ||
+ !rioWriteBulkObject(r,key) ||
+ !rioWriteBulkString(r,"MAXLEN",6) ||
+ !rioWriteBulkString(r,"0",1) ||
+ !rioWriteBulkStreamID(r,&id) ||
+ !rioWriteBulkString(r,"x",1) ||
+ !rioWriteBulkString(r,"y",1))
+ {
+ streamIteratorStop(&si);
+ return 0;
+ }
+ }
+
+ /* Append XSETID after XADD, make sure lastid is correct,
+ * in case of XDEL lastid. */
+ if (!rioWriteBulkCount(r,'*',7) ||
+ !rioWriteBulkString(r,"XSETID",6) ||
+ !rioWriteBulkObject(r,key) ||
+ !rioWriteBulkStreamID(r,&s->last_id) ||
+ !rioWriteBulkString(r,"ENTRIESADDED",12) ||
+ !rioWriteBulkLongLong(r,s->entries_added) ||
+ !rioWriteBulkString(r,"MAXDELETEDID",12) ||
+ !rioWriteBulkStreamID(r,&s->max_deleted_entry_id))
+ {
+ streamIteratorStop(&si);
+ return 0;
+ }
+
+
+ /* Create all the stream consumer groups. */
+ if (s->cgroups) {
+ raxIterator ri;
+ raxStart(&ri,s->cgroups);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamCG *group = ri.data;
+ /* Emit the XGROUP CREATE in order to create the group. */
+ if (!rioWriteBulkCount(r,'*',7) ||
+ !rioWriteBulkString(r,"XGROUP",6) ||
+ !rioWriteBulkString(r,"CREATE",6) ||
+ !rioWriteBulkObject(r,key) ||
+ !rioWriteBulkString(r,(char*)ri.key,ri.key_len) ||
+ !rioWriteBulkStreamID(r,&group->last_id) ||
+ !rioWriteBulkString(r,"ENTRIESREAD",11) ||
+ !rioWriteBulkLongLong(r,group->entries_read))
+ {
+ raxStop(&ri);
+ streamIteratorStop(&si);
+ return 0;
+ }
+
+ /* Generate XCLAIMs for each consumer that happens to
+ * have pending entries. Empty consumers would be generated with
+ * XGROUP CREATECONSUMER. */
+ raxIterator ri_cons;
+ raxStart(&ri_cons,group->consumers);
+ raxSeek(&ri_cons,"^",NULL,0);
+ while(raxNext(&ri_cons)) {
+ streamConsumer *consumer = ri_cons.data;
+ /* If there are no pending entries, just emit XGROUP CREATECONSUMER */
+ if (raxSize(consumer->pel) == 0) {
+ if (rioWriteStreamEmptyConsumer(r,key,(char*)ri.key,
+ ri.key_len,consumer) == 0)
+ {
+ raxStop(&ri_cons);
+ raxStop(&ri);
+ streamIteratorStop(&si);
+ return 0;
+ }
+ continue;
+ }
+ /* For the current consumer, iterate all the PEL entries
+ * to emit the XCLAIM protocol. */
+ raxIterator ri_pel;
+ raxStart(&ri_pel,consumer->pel);
+ raxSeek(&ri_pel,"^",NULL,0);
+ while(raxNext(&ri_pel)) {
+ streamNACK *nack = ri_pel.data;
+ if (rioWriteStreamPendingEntry(r,key,(char*)ri.key,
+ ri.key_len,consumer,
+ ri_pel.key,nack) == 0)
+ {
+ raxStop(&ri_pel);
+ raxStop(&ri_cons);
+ raxStop(&ri);
+ streamIteratorStop(&si);
+ return 0;
+ }
+ }
+ raxStop(&ri_pel);
+ }
+ raxStop(&ri_cons);
+ }
+ raxStop(&ri);
+ }
+
+ streamIteratorStop(&si);
+ return 1;
+}
+
+/* Call the module type callback in order to rewrite a data type
+ * that is exported by a module and is not handled by Redis itself.
+ * The function returns 0 on error, 1 on success. */
+int rewriteModuleObject(rio *r, robj *key, robj *o, int dbid) {
+ RedisModuleIO io;
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+ moduleInitIOContext(io,mt,r,key,dbid);
+ mt->aof_rewrite(&io,key,mv->value);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ return io.error ? 0 : 1;
+}
+
+static int rewriteFunctions(rio *aof) {
+ dict *functions = functionsLibGet();
+ dictIterator *iter = dictGetIterator(functions);
+ dictEntry *entry = NULL;
+ while ((entry = dictNext(iter))) {
+ functionLibInfo *li = dictGetVal(entry);
+ if (rioWrite(aof, "*3\r\n", 4) == 0) goto werr;
+ char function_load[] = "$8\r\nFUNCTION\r\n$4\r\nLOAD\r\n";
+ if (rioWrite(aof, function_load, sizeof(function_load) - 1) == 0) goto werr;
+ if (rioWriteBulkString(aof, li->code, sdslen(li->code)) == 0) goto werr;
+ }
+ dictReleaseIterator(iter);
+ return 1;
+
+werr:
+ dictReleaseIterator(iter);
+ return 0;
+}
+
+int rewriteAppendOnlyFileRio(rio *aof) {
+ dictIterator *di = NULL;
+ dictEntry *de;
+ int j;
+ long key_count = 0;
+ long long updated_time = 0;
+
+ /* Record timestamp at the beginning of rewriting AOF. */
+ if (server.aof_timestamp_enabled) {
+ sds ts = genAofTimestampAnnotationIfNeeded(1);
+ if (rioWrite(aof,ts,sdslen(ts)) == 0) { sdsfree(ts); goto werr; }
+ sdsfree(ts);
+ }
+
+ if (rewriteFunctions(aof) == 0) goto werr;
+
+ for (j = 0; j < server.dbnum; j++) {
+ char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
+ redisDb *db = server.db+j;
+ dict *d = db->dict;
+ if (dictSize(d) == 0) continue;
+ di = dictGetSafeIterator(d);
+
+ /* SELECT the new DB */
+ if (rioWrite(aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr;
+ if (rioWriteBulkLongLong(aof,j) == 0) goto werr;
+
+ /* Iterate this DB writing every entry */
+ while((de = dictNext(di)) != NULL) {
+ sds keystr;
+ robj key, *o;
+ long long expiretime;
+ size_t aof_bytes_before_key = aof->processed_bytes;
+
+ keystr = dictGetKey(de);
+ o = dictGetVal(de);
+ initStaticStringObject(key,keystr);
+
+ expiretime = getExpire(db,&key);
+
+ /* Save the key and associated value */
+ if (o->type == OBJ_STRING) {
+ /* Emit a SET command */
+ char cmd[]="*3\r\n$3\r\nSET\r\n";
+ if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr;
+ /* Key and value */
+ if (rioWriteBulkObject(aof,&key) == 0) goto werr;
+ if (rioWriteBulkObject(aof,o) == 0) goto werr;
+ } else if (o->type == OBJ_LIST) {
+ if (rewriteListObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_SET) {
+ if (rewriteSetObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_ZSET) {
+ if (rewriteSortedSetObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_HASH) {
+ if (rewriteHashObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_STREAM) {
+ if (rewriteStreamObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_MODULE) {
+ if (rewriteModuleObject(aof,&key,o,j) == 0) goto werr;
+ } else {
+ serverPanic("Unknown object type");
+ }
+
+ /* In fork child process, we can try to release memory back to the
+ * OS and possibly avoid or decrease COW. We give the dismiss
+ * mechanism a hint about an estimated size of the object we stored. */
+ size_t dump_size = aof->processed_bytes - aof_bytes_before_key;
+ if (server.in_fork_child) dismissObject(o, dump_size);
+
+ /* Save the expire time */
+ if (expiretime != -1) {
+ char cmd[]="*3\r\n$9\r\nPEXPIREAT\r\n";
+ if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr;
+ if (rioWriteBulkObject(aof,&key) == 0) goto werr;
+ if (rioWriteBulkLongLong(aof,expiretime) == 0) goto werr;
+ }
+
+ /* Update info every 1 second (approximately).
+ * in order to avoid calling mstime() on each iteration, we will
+ * check the diff every 1024 keys */
+ if ((key_count++ & 1023) == 0) {
+ long long now = mstime();
+ if (now - updated_time >= 1000) {
+ sendChildInfo(CHILD_INFO_TYPE_CURRENT_INFO, key_count, "AOF rewrite");
+ updated_time = now;
+ }
+ }
+
+ /* Delay before next key if required (for testing) */
+ if (server.rdb_key_save_delay)
+ debugDelay(server.rdb_key_save_delay);
+ }
+ dictReleaseIterator(di);
+ di = NULL;
+ }
+ return C_OK;
+
+werr:
+ if (di) dictReleaseIterator(di);
+ return C_ERR;
+}
+
+/* Write a sequence of commands able to fully rebuild the dataset into
+ * "filename". Used both by REWRITEAOF and BGREWRITEAOF.
+ *
+ * In order to minimize the number of commands needed in the rewritten
+ * log Redis uses variadic commands when possible, such as RPUSH, SADD
+ * and ZADD. However at max AOF_REWRITE_ITEMS_PER_CMD items per time
+ * are inserted using a single command. */
+int rewriteAppendOnlyFile(char *filename) {
+ rio aof;
+ FILE *fp = NULL;
+ char tmpfile[256];
+
+ /* Note that we have to use a different temp name here compared to the
+ * one used by rewriteAppendOnlyFileBackground() function. */
+ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
+ fp = fopen(tmpfile,"w");
+ if (!fp) {
+ serverLog(LL_WARNING, "Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s", strerror(errno));
+ return C_ERR;
+ }
+
+ rioInitWithFile(&aof,fp);
+
+ if (server.aof_rewrite_incremental_fsync) {
+ rioSetAutoSync(&aof,REDIS_AUTOSYNC_BYTES);
+ rioSetReclaimCache(&aof,1);
+ }
+
+ startSaving(RDBFLAGS_AOF_PREAMBLE);
+
+ if (server.aof_use_rdb_preamble) {
+ int error;
+ if (rdbSaveRio(SLAVE_REQ_NONE,&aof,&error,RDBFLAGS_AOF_PREAMBLE,NULL) == C_ERR) {
+ errno = error;
+ goto werr;
+ }
+ } else {
+ if (rewriteAppendOnlyFileRio(&aof) == C_ERR) goto werr;
+ }
+
+ /* Make sure data will not remain on the OS's output buffers */
+ if (fflush(fp)) goto werr;
+ if (fsync(fileno(fp))) goto werr;
+ if (reclaimFilePageCache(fileno(fp), 0, 0) == -1) {
+ /* A minor error. Just log to know what happens */
+ serverLog(LL_NOTICE,"Unable to reclaim page cache: %s", strerror(errno));
+ }
+ if (fclose(fp)) { fp = NULL; goto werr; }
+ fp = NULL;
+
+ /* Use RENAME to make sure the DB file is changed atomically only
+ * if the generate DB file is ok. */
+ if (rename(tmpfile,filename) == -1) {
+ serverLog(LL_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
+ unlink(tmpfile);
+ stopSaving(0);
+ return C_ERR;
+ }
+ stopSaving(1);
+
+ return C_OK;
+
+werr:
+ serverLog(LL_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
+ if (fp) fclose(fp);
+ unlink(tmpfile);
+ stopSaving(0);
+ return C_ERR;
+}
+/* ----------------------------------------------------------------------------
+ * AOF background rewrite
+ * ------------------------------------------------------------------------- */
+
+/* This is how rewriting of the append only file in background works:
+ *
+ * 1) The user calls BGREWRITEAOF
+ * 2) Redis calls this function, that forks():
+ * 2a) the child rewrite the append only file in a temp file.
+ * 2b) the parent open a new INCR AOF file to continue writing.
+ * 3) When the child finished '2a' exists.
+ * 4) The parent will trap the exit code, if it's OK, it will:
+ * 4a) get a new BASE file name and mark the previous (if we have) as the HISTORY type
+ * 4b) rename(2) the temp file in new BASE file name
+ * 4c) mark the rewritten INCR AOFs as history type
+ * 4d) persist AOF manifest file
+ * 4e) Delete the history files use bio
+ */
+int rewriteAppendOnlyFileBackground(void) {
+ pid_t childpid;
+
+ if (hasActiveChildProcess()) return C_ERR;
+
+ if (dirCreateIfMissing(server.aof_dirname) == -1) {
+ serverLog(LL_WARNING, "Can't open or create append-only dir %s: %s",
+ server.aof_dirname, strerror(errno));
+ server.aof_lastbgrewrite_status = C_ERR;
+ return C_ERR;
+ }
+
+ /* We set aof_selected_db to -1 in order to force the next call to the
+ * feedAppendOnlyFile() to issue a SELECT command. */
+ server.aof_selected_db = -1;
+ flushAppendOnlyFile(1);
+ if (openNewIncrAofForAppend() != C_OK) {
+ server.aof_lastbgrewrite_status = C_ERR;
+ return C_ERR;
+ }
+
+ if (server.aof_state == AOF_WAIT_REWRITE) {
+ /* Wait for all bio jobs related to AOF to drain. This prevents a race
+ * between updates to `fsynced_reploff_pending` of the worker thread, belonging
+ * to the previous AOF, and the new one. This concern is specific for a full
+ * sync scenario where we don't wanna risk the ACKed replication offset
+ * jumping backwards or forward when switching to a different master. */
+ bioDrainWorker(BIO_AOF_FSYNC);
+
+ /* Set the initial repl_offset, which will be applied to fsynced_reploff
+ * when AOFRW finishes (after possibly being updated by a bio thread) */
+ atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
+ server.fsynced_reploff = 0;
+ }
+
+ server.stat_aof_rewrites++;
+
+ if ((childpid = redisFork(CHILD_TYPE_AOF)) == 0) {
+ char tmpfile[256];
+
+ /* Child */
+ redisSetProcTitle("redis-aof-rewrite");
+ redisSetCpuAffinity(server.aof_rewrite_cpulist);
+ snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
+ if (rewriteAppendOnlyFile(tmpfile) == C_OK) {
+ serverLog(LL_NOTICE,
+ "Successfully created the temporary AOF base file %s", tmpfile);
+ sendChildCowInfo(CHILD_INFO_TYPE_AOF_COW_SIZE, "AOF rewrite");
+ exitFromChild(0);
+ } else {
+ exitFromChild(1);
+ }
+ } else {
+ /* Parent */
+ if (childpid == -1) {
+ server.aof_lastbgrewrite_status = C_ERR;
+ serverLog(LL_WARNING,
+ "Can't rewrite append only file in background: fork: %s",
+ strerror(errno));
+ return C_ERR;
+ }
+ serverLog(LL_NOTICE,
+ "Background append only file rewriting started by pid %ld",(long) childpid);
+ server.aof_rewrite_scheduled = 0;
+ server.aof_rewrite_time_start = time(NULL);
+ return C_OK;
+ }
+ return C_OK; /* unreached */
+}
+
+void bgrewriteaofCommand(client *c) {
+ if (server.child_type == CHILD_TYPE_AOF) {
+ addReplyError(c,"Background append only file rewriting already in progress");
+ } else if (hasActiveChildProcess() || server.in_exec) {
+ server.aof_rewrite_scheduled = 1;
+ /* When manually triggering AOFRW we reset the count
+ * so that it can be executed immediately. */
+ server.stat_aofrw_consecutive_failures = 0;
+ addReplyStatus(c,"Background append only file rewriting scheduled");
+ } else if (rewriteAppendOnlyFileBackground() == C_OK) {
+ addReplyStatus(c,"Background append only file rewriting started");
+ } else {
+ addReplyError(c,"Can't execute an AOF background rewriting. "
+ "Please check the server logs for more information.");
+ }
+}
+
+void aofRemoveTempFile(pid_t childpid) {
+ char tmpfile[256];
+
+ snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid);
+ bg_unlink(tmpfile);
+
+ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) childpid);
+ bg_unlink(tmpfile);
+}
+
+/* Get size of an AOF file.
+ * The status argument is an optional output argument to be filled with
+ * one of the AOF_ status values. */
+off_t getAppendOnlyFileSize(sds filename, int *status) {
+ struct redis_stat sb;
+ off_t size;
+ mstime_t latency;
+
+ sds aof_filepath = makePath(server.aof_dirname, filename);
+ latencyStartMonitor(latency);
+ if (redis_stat(aof_filepath, &sb) == -1) {
+ if (status) *status = errno == ENOENT ? AOF_NOT_EXIST : AOF_OPEN_ERR;
+ serverLog(LL_WARNING, "Unable to obtain the AOF file %s length. stat: %s",
+ filename, strerror(errno));
+ size = 0;
+ } else {
+ if (status) *status = AOF_OK;
+ size = sb.st_size;
+ }
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("aof-fstat", latency);
+ sdsfree(aof_filepath);
+ return size;
+}
+
+/* Get size of all AOF files referred by the manifest (excluding history).
+ * The status argument is an output argument to be filled with
+ * one of the AOF_ status values. */
+off_t getBaseAndIncrAppendOnlyFilesSize(aofManifest *am, int *status) {
+ off_t size = 0;
+ listNode *ln;
+ listIter li;
+
+ if (am->base_aof_info) {
+ serverAssert(am->base_aof_info->file_type == AOF_FILE_TYPE_BASE);
+
+ size += getAppendOnlyFileSize(am->base_aof_info->file_name, status);
+ if (*status != AOF_OK) return 0;
+ }
+
+ listRewind(am->incr_aof_list, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ aofInfo *ai = (aofInfo*)ln->value;
+ serverAssert(ai->file_type == AOF_FILE_TYPE_INCR);
+ size += getAppendOnlyFileSize(ai->file_name, status);
+ if (*status != AOF_OK) return 0;
+ }
+
+ return size;
+}
+
+int getBaseAndIncrAppendOnlyFilesNum(aofManifest *am) {
+ int num = 0;
+ if (am->base_aof_info) num++;
+ if (am->incr_aof_list) num += listLength(am->incr_aof_list);
+ return num;
+}
+
+/* A background append only file rewriting (BGREWRITEAOF) terminated its work.
+ * Handle this. */
+void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
+ if (!bysignal && exitcode == 0) {
+ char tmpfile[256];
+ long long now = ustime();
+ sds new_base_filepath = NULL;
+ sds new_incr_filepath = NULL;
+ aofManifest *temp_am;
+ mstime_t latency;
+
+ serverLog(LL_NOTICE,
+ "Background AOF rewrite terminated with success");
+
+ snprintf(tmpfile, 256, "temp-rewriteaof-bg-%d.aof",
+ (int)server.child_pid);
+
+ serverAssert(server.aof_manifest != NULL);
+
+ /* Dup a temporary aof_manifest for subsequent modifications. */
+ temp_am = aofManifestDup(server.aof_manifest);
+
+ /* Get a new BASE file name and mark the previous (if we have)
+ * as the HISTORY type. */
+ sds new_base_filename = getNewBaseFileNameAndMarkPreAsHistory(temp_am);
+ serverAssert(new_base_filename != NULL);
+ new_base_filepath = makePath(server.aof_dirname, new_base_filename);
+
+ /* Rename the temporary aof file to 'new_base_filename'. */
+ latencyStartMonitor(latency);
+ if (rename(tmpfile, new_base_filepath) == -1) {
+ serverLog(LL_WARNING,
+ "Error trying to rename the temporary AOF base file %s into %s: %s",
+ tmpfile,
+ new_base_filepath,
+ strerror(errno));
+ aofManifestFree(temp_am);
+ sdsfree(new_base_filepath);
+ server.aof_lastbgrewrite_status = C_ERR;
+ server.stat_aofrw_consecutive_failures++;
+ goto cleanup;
+ }
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("aof-rename", latency);
+ serverLog(LL_NOTICE,
+ "Successfully renamed the temporary AOF base file %s into %s", tmpfile, new_base_filename);
+
+ /* Rename the temporary incr aof file to 'new_incr_filename'. */
+ if (server.aof_state == AOF_WAIT_REWRITE) {
+ /* Get temporary incr aof name. */
+ sds temp_incr_aof_name = getTempIncrAofName();
+ sds temp_incr_filepath = makePath(server.aof_dirname, temp_incr_aof_name);
+ /* Get next new incr aof name. */
+ sds new_incr_filename = getNewIncrAofName(temp_am);
+ new_incr_filepath = makePath(server.aof_dirname, new_incr_filename);
+ latencyStartMonitor(latency);
+ if (rename(temp_incr_filepath, new_incr_filepath) == -1) {
+ serverLog(LL_WARNING,
+ "Error trying to rename the temporary AOF incr file %s into %s: %s",
+ temp_incr_filepath,
+ new_incr_filepath,
+ strerror(errno));
+ bg_unlink(new_base_filepath);
+ sdsfree(new_base_filepath);
+ aofManifestFree(temp_am);
+ sdsfree(temp_incr_filepath);
+ sdsfree(new_incr_filepath);
+ sdsfree(temp_incr_aof_name);
+ server.aof_lastbgrewrite_status = C_ERR;
+ server.stat_aofrw_consecutive_failures++;
+ goto cleanup;
+ }
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("aof-rename", latency);
+ serverLog(LL_NOTICE,
+ "Successfully renamed the temporary AOF incr file %s into %s", temp_incr_aof_name, new_incr_filename);
+ sdsfree(temp_incr_filepath);
+ sdsfree(temp_incr_aof_name);
+ }
+
+ /* Change the AOF file type in 'incr_aof_list' from AOF_FILE_TYPE_INCR
+ * to AOF_FILE_TYPE_HIST, and move them to the 'history_aof_list'. */
+ markRewrittenIncrAofAsHistory(temp_am);
+
+ /* Persist our modifications. */
+ if (persistAofManifest(temp_am) == C_ERR) {
+ bg_unlink(new_base_filepath);
+ aofManifestFree(temp_am);
+ sdsfree(new_base_filepath);
+ if (new_incr_filepath) {
+ bg_unlink(new_incr_filepath);
+ sdsfree(new_incr_filepath);
+ }
+ server.aof_lastbgrewrite_status = C_ERR;
+ server.stat_aofrw_consecutive_failures++;
+ goto cleanup;
+ }
+ sdsfree(new_base_filepath);
+ if (new_incr_filepath) sdsfree(new_incr_filepath);
+
+ /* We can safely let `server.aof_manifest` point to 'temp_am' and free the previous one. */
+ aofManifestFreeAndUpdate(temp_am);
+
+ if (server.aof_state != AOF_OFF) {
+ /* AOF enabled. */
+ server.aof_current_size = getAppendOnlyFileSize(new_base_filename, NULL) + server.aof_last_incr_size;
+ server.aof_rewrite_base_size = server.aof_current_size;
+ }
+
+ /* We don't care about the return value of `aofDelHistoryFiles`, because the history
+ * deletion failure will not cause any problems. */
+ aofDelHistoryFiles();
+
+ server.aof_lastbgrewrite_status = C_OK;
+ server.stat_aofrw_consecutive_failures = 0;
+
+ serverLog(LL_NOTICE, "Background AOF rewrite finished successfully");
+ /* Change state from WAIT_REWRITE to ON if needed */
+ if (server.aof_state == AOF_WAIT_REWRITE) {
+ server.aof_state = AOF_ON;
+
+ /* Update the fsynced replication offset that just now become valid.
+ * This could either be the one we took in startAppendOnly, or a
+ * newer one set by the bio thread. */
+ long long fsynced_reploff_pending;
+ atomicGet(server.fsynced_reploff_pending, fsynced_reploff_pending);
+ server.fsynced_reploff = fsynced_reploff_pending;
+ }
+
+ serverLog(LL_VERBOSE,
+ "Background AOF rewrite signal handler took %lldus", ustime()-now);
+ } else if (!bysignal && exitcode != 0) {
+ server.aof_lastbgrewrite_status = C_ERR;
+ server.stat_aofrw_consecutive_failures++;
+
+ serverLog(LL_WARNING,
+ "Background AOF rewrite terminated with error");
+ } else {
+ /* SIGUSR1 is whitelisted, so we have a way to kill a child without
+ * triggering an error condition. */
+ if (bysignal != SIGUSR1) {
+ server.aof_lastbgrewrite_status = C_ERR;
+ server.stat_aofrw_consecutive_failures++;
+ }
+
+ serverLog(LL_WARNING,
+ "Background AOF rewrite terminated by signal %d", bysignal);
+ }
+
+cleanup:
+ aofRemoveTempFile(server.child_pid);
+ /* Clear AOF buffer and delete temp incr aof for next rewrite. */
+ if (server.aof_state == AOF_WAIT_REWRITE) {
+ sdsfree(server.aof_buf);
+ server.aof_buf = sdsempty();
+ aofDelTempIncrAofFile();
+ }
+ server.aof_rewrite_time_last = time(NULL)-server.aof_rewrite_time_start;
+ server.aof_rewrite_time_start = -1;
+ /* Schedule a new rewrite if we are waiting for it to switch the AOF ON. */
+ if (server.aof_state == AOF_WAIT_REWRITE)
+ server.aof_rewrite_scheduled = 1;
+}
diff --git a/src/asciilogo.h b/src/asciilogo.h
new file mode 100644
index 0000000..a62f68c
--- /dev/null
+++ b/src/asciilogo.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+const char *ascii_logo =
+" _._ \n"
+" _.-``__ ''-._ \n"
+" _.-`` `. `_. ''-._ Redis %s (%s/%d) %s bit\n"
+" .-`` .-```. ```\\/ _.,_ ''-._ \n"
+" ( ' , .-` | `, ) Running in %s mode\n"
+" |`-._`-...-` __...-.``-._|'` _.-'| Port: %d\n"
+" | `-._ `._ / _.-' | PID: %ld\n"
+" `-._ `-._ `-./ _.-' _.-' \n"
+" |`-._`-._ `-.__.-' _.-'_.-'| \n"
+" | `-._`-._ _.-'_.-' | https://redis.io \n"
+" `-._ `-._`-.__.-'_.-' _.-' \n"
+" |`-._`-._ `-.__.-' _.-'_.-'| \n"
+" | `-._`-._ _.-'_.-' | \n"
+" `-._ `-._`-.__.-'_.-' _.-' \n"
+" `-._ `-.__.-' _.-' \n"
+" `-._ _.-' \n"
+" `-.__.-' \n\n";
diff --git a/src/atomicvar.h b/src/atomicvar.h
new file mode 100644
index 0000000..2c2969c
--- /dev/null
+++ b/src/atomicvar.h
@@ -0,0 +1,158 @@
+/* This file implements atomic counters using c11 _Atomic, __atomic or __sync
+ * macros if available, otherwise we will throw an error when compile.
+ *
+ * The exported interface is composed of three macros:
+ *
+ * atomicIncr(var,count) -- Increment the atomic counter
+ * atomicGetIncr(var,oldvalue_var,count) -- Get and increment the atomic counter
+ * atomicDecr(var,count) -- Decrement the atomic counter
+ * atomicGet(var,dstvar) -- Fetch the atomic counter value
+ * atomicSet(var,value) -- Set the atomic counter value
+ * atomicGetWithSync(var,value) -- 'atomicGet' with inter-thread synchronization
+ * atomicSetWithSync(var,value) -- 'atomicSet' with inter-thread synchronization
+ *
+ * Never use return value from the macros, instead use the AtomicGetIncr()
+ * if you need to get the current value and increment it atomically, like
+ * in the following example:
+ *
+ * long oldvalue;
+ * atomicGetIncr(myvar,oldvalue,1);
+ * doSomethingWith(oldvalue);
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <pthread.h>
+#include "config.h"
+
+#ifndef __ATOMIC_VAR_H
+#define __ATOMIC_VAR_H
+
+/* Define redisAtomic for atomic variable. */
+#define redisAtomic
+
+/* To test Redis with Helgrind (a Valgrind tool) it is useful to define
+ * the following macro, so that __sync macros are used: those can be detected
+ * by Helgrind (even if they are less efficient) so that no false positive
+ * is reported. */
+// #define __ATOMIC_VAR_FORCE_SYNC_MACROS
+
+/* There will be many false positives if we test Redis with Helgrind, since
+ * Helgrind can't understand we have imposed ordering on the program, so
+ * we use macros in helgrind.h to tell Helgrind inter-thread happens-before
+ * relationship explicitly for avoiding false positives.
+ *
+ * For more details, please see: valgrind/helgrind.h and
+ * https://www.valgrind.org/docs/manual/hg-manual.html#hg-manual.effective-use
+ *
+ * These macros take effect only when 'make helgrind', and you must first
+ * install Valgrind in the default path configuration. */
+#ifdef __ATOMIC_VAR_FORCE_SYNC_MACROS
+#include <valgrind/helgrind.h>
+#else
+#define ANNOTATE_HAPPENS_BEFORE(v) ((void) v)
+#define ANNOTATE_HAPPENS_AFTER(v) ((void) v)
+#endif
+
+#if !defined(__ATOMIC_VAR_FORCE_SYNC_MACROS) && defined(__STDC_VERSION__) && \
+ (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
+/* Use '_Atomic' keyword if the compiler supports. */
+#undef redisAtomic
+#define redisAtomic _Atomic
+/* Implementation using _Atomic in C11. */
+
+#include <stdatomic.h>
+#define atomicIncr(var,count) atomic_fetch_add_explicit(&var,(count),memory_order_relaxed)
+#define atomicGetIncr(var,oldvalue_var,count) do { \
+ oldvalue_var = atomic_fetch_add_explicit(&var,(count),memory_order_relaxed); \
+} while(0)
+#define atomicDecr(var,count) atomic_fetch_sub_explicit(&var,(count),memory_order_relaxed)
+#define atomicGet(var,dstvar) do { \
+ dstvar = atomic_load_explicit(&var,memory_order_relaxed); \
+} while(0)
+#define atomicSet(var,value) atomic_store_explicit(&var,value,memory_order_relaxed)
+#define atomicGetWithSync(var,dstvar) do { \
+ dstvar = atomic_load_explicit(&var,memory_order_seq_cst); \
+} while(0)
+#define atomicSetWithSync(var,value) \
+ atomic_store_explicit(&var,value,memory_order_seq_cst)
+#define REDIS_ATOMIC_API "c11-builtin"
+
+#elif !defined(__ATOMIC_VAR_FORCE_SYNC_MACROS) && \
+ (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057) && \
+ defined(__ATOMIC_RELAXED) && defined(__ATOMIC_SEQ_CST)
+/* Implementation using __atomic macros. */
+
+#define atomicIncr(var,count) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED)
+#define atomicGetIncr(var,oldvalue_var,count) do { \
+ oldvalue_var = __atomic_fetch_add(&var,(count),__ATOMIC_RELAXED); \
+} while(0)
+#define atomicDecr(var,count) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED)
+#define atomicGet(var,dstvar) do { \
+ dstvar = __atomic_load_n(&var,__ATOMIC_RELAXED); \
+} while(0)
+#define atomicSet(var,value) __atomic_store_n(&var,value,__ATOMIC_RELAXED)
+#define atomicGetWithSync(var,dstvar) do { \
+ dstvar = __atomic_load_n(&var,__ATOMIC_SEQ_CST); \
+} while(0)
+#define atomicSetWithSync(var,value) \
+ __atomic_store_n(&var,value,__ATOMIC_SEQ_CST)
+#define REDIS_ATOMIC_API "atomic-builtin"
+
+#elif defined(HAVE_ATOMIC)
+/* Implementation using __sync macros. */
+
+#define atomicIncr(var,count) __sync_add_and_fetch(&var,(count))
+#define atomicGetIncr(var,oldvalue_var,count) do { \
+ oldvalue_var = __sync_fetch_and_add(&var,(count)); \
+} while(0)
+#define atomicDecr(var,count) __sync_sub_and_fetch(&var,(count))
+#define atomicGet(var,dstvar) do { \
+ dstvar = __sync_sub_and_fetch(&var,0); \
+} while(0)
+#define atomicSet(var,value) do { \
+ while(!__sync_bool_compare_and_swap(&var,var,value)); \
+} while(0)
+/* Actually the builtin issues a full memory barrier by default. */
+#define atomicGetWithSync(var,dstvar) do { \
+ dstvar = __sync_sub_and_fetch(&var,0,__sync_synchronize); \
+ ANNOTATE_HAPPENS_AFTER(&var); \
+} while(0)
+#define atomicSetWithSync(var,value) do { \
+ ANNOTATE_HAPPENS_BEFORE(&var); \
+ while(!__sync_bool_compare_and_swap(&var,var,value,__sync_synchronize)); \
+} while(0)
+#define REDIS_ATOMIC_API "sync-builtin"
+
+#else
+#error "Unable to determine atomic operations for your platform"
+
+#endif
+#endif /* __ATOMIC_VAR_H */
diff --git a/src/bio.c b/src/bio.c
new file mode 100644
index 0000000..10ecf8d
--- /dev/null
+++ b/src/bio.c
@@ -0,0 +1,345 @@
+/* Background I/O service for Redis.
+ *
+ * This file implements operations that we need to perform in the background.
+ * Currently there is only a single operation, that is a background close(2)
+ * system call. This is needed as when the process is the last owner of a
+ * reference to a file closing it means unlinking it, and the deletion of the
+ * file is slow, blocking the server.
+ *
+ * In the future we'll either continue implementing new things we need or
+ * we'll switch to libeio. However there are probably long term uses for this
+ * file as we may want to put here Redis specific background tasks (for instance
+ * it is not impossible that we'll need a non blocking FLUSHDB/FLUSHALL
+ * implementation).
+ *
+ * DESIGN
+ * ------
+ *
+ * The design is simple: We have a structure representing a job to perform,
+ * and several worker threads and job queues. Every job type is assigned to
+ * a specific worker thread, and a single worker may handle several different
+ * job types.
+ * Every thread waits for new jobs in its queue, and processes every job
+ * sequentially.
+ *
+ * Jobs handled by the same worker are guaranteed to be processed from the
+ * least-recently-inserted to the most-recently-inserted (older jobs processed
+ * first).
+ *
+ * Currently there is no way for the creator of the job to be notified about
+ * the completion of the operation, this will only be added when/if needed.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "server.h"
+#include "bio.h"
+
+static char* bio_worker_title[] = {
+ "bio_close_file",
+ "bio_aof",
+ "bio_lazy_free",
+};
+
+#define BIO_WORKER_NUM (sizeof(bio_worker_title) / sizeof(*bio_worker_title))
+
+static unsigned int bio_job_to_worker[] = {
+ [BIO_CLOSE_FILE] = 0,
+ [BIO_AOF_FSYNC] = 1,
+ [BIO_CLOSE_AOF] = 1,
+ [BIO_LAZY_FREE] = 2,
+};
+
+static pthread_t bio_threads[BIO_WORKER_NUM];
+static pthread_mutex_t bio_mutex[BIO_WORKER_NUM];
+static pthread_cond_t bio_newjob_cond[BIO_WORKER_NUM];
+static list *bio_jobs[BIO_WORKER_NUM];
+static unsigned long bio_jobs_counter[BIO_NUM_OPS] = {0};
+
+/* This structure represents a background Job. It is only used locally to this
+ * file as the API does not expose the internals at all. */
+typedef union bio_job {
+ struct {
+ int type; /* Job-type tag. This needs to appear as the first element in all union members. */
+ } header;
+
+ /* Job specific arguments.*/
+ struct {
+ int type;
+ int fd; /* Fd for file based background jobs */
+ long long offset; /* A job-specific offset, if applicable */
+ unsigned need_fsync:1; /* A flag to indicate that a fsync is required before
+ * the file is closed. */
+ unsigned need_reclaim_cache:1; /* A flag to indicate that reclaim cache is required before
+ * the file is closed. */
+ } fd_args;
+
+ struct {
+ int type;
+ lazy_free_fn *free_fn; /* Function that will free the provided arguments */
+ void *free_args[]; /* List of arguments to be passed to the free function */
+ } free_args;
+} bio_job;
+
+void *bioProcessBackgroundJobs(void *arg);
+
+/* Make sure we have enough stack to perform all the things we do in the
+ * main thread. */
+#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
+
+/* Initialize the background system, spawning the thread. */
+void bioInit(void) {
+ pthread_attr_t attr;
+ pthread_t thread;
+ size_t stacksize;
+ unsigned long j;
+
+ /* Initialization of state vars and objects */
+ for (j = 0; j < BIO_WORKER_NUM; j++) {
+ pthread_mutex_init(&bio_mutex[j],NULL);
+ pthread_cond_init(&bio_newjob_cond[j],NULL);
+ bio_jobs[j] = listCreate();
+ }
+
+ /* Set the stack size as by default it may be small in some system */
+ pthread_attr_init(&attr);
+ pthread_attr_getstacksize(&attr,&stacksize);
+ if (!stacksize) stacksize = 1; /* The world is full of Solaris Fixes */
+ while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2;
+ pthread_attr_setstacksize(&attr, stacksize);
+
+ /* Ready to spawn our threads. We use the single argument the thread
+ * function accepts in order to pass the job ID the thread is
+ * responsible for. */
+ for (j = 0; j < BIO_WORKER_NUM; j++) {
+ void *arg = (void*)(unsigned long) j;
+ if (pthread_create(&thread,&attr,bioProcessBackgroundJobs,arg) != 0) {
+ serverLog(LL_WARNING, "Fatal: Can't initialize Background Jobs. Error message: %s", strerror(errno));
+ exit(1);
+ }
+ bio_threads[j] = thread;
+ }
+}
+
+void bioSubmitJob(int type, bio_job *job) {
+ job->header.type = type;
+ unsigned long worker = bio_job_to_worker[type];
+ pthread_mutex_lock(&bio_mutex[worker]);
+ listAddNodeTail(bio_jobs[worker],job);
+ bio_jobs_counter[type]++;
+ pthread_cond_signal(&bio_newjob_cond[worker]);
+ pthread_mutex_unlock(&bio_mutex[worker]);
+}
+
+void bioCreateLazyFreeJob(lazy_free_fn free_fn, int arg_count, ...) {
+ va_list valist;
+ /* Allocate memory for the job structure and all required
+ * arguments */
+ bio_job *job = zmalloc(sizeof(*job) + sizeof(void *) * (arg_count));
+ job->free_args.free_fn = free_fn;
+
+ va_start(valist, arg_count);
+ for (int i = 0; i < arg_count; i++) {
+ job->free_args.free_args[i] = va_arg(valist, void *);
+ }
+ va_end(valist);
+ bioSubmitJob(BIO_LAZY_FREE, job);
+}
+
+void bioCreateCloseJob(int fd, int need_fsync, int need_reclaim_cache) {
+ bio_job *job = zmalloc(sizeof(*job));
+ job->fd_args.fd = fd;
+ job->fd_args.need_fsync = need_fsync;
+ job->fd_args.need_reclaim_cache = need_reclaim_cache;
+
+ bioSubmitJob(BIO_CLOSE_FILE, job);
+}
+
+void bioCreateCloseAofJob(int fd, long long offset, int need_reclaim_cache) {
+ bio_job *job = zmalloc(sizeof(*job));
+ job->fd_args.fd = fd;
+ job->fd_args.offset = offset;
+ job->fd_args.need_fsync = 1;
+ job->fd_args.need_reclaim_cache = need_reclaim_cache;
+
+ bioSubmitJob(BIO_CLOSE_AOF, job);
+}
+
+void bioCreateFsyncJob(int fd, long long offset, int need_reclaim_cache) {
+ bio_job *job = zmalloc(sizeof(*job));
+ job->fd_args.fd = fd;
+ job->fd_args.offset = offset;
+ job->fd_args.need_reclaim_cache = need_reclaim_cache;
+
+ bioSubmitJob(BIO_AOF_FSYNC, job);
+}
+
+void *bioProcessBackgroundJobs(void *arg) {
+ bio_job *job;
+ unsigned long worker = (unsigned long) arg;
+ sigset_t sigset;
+
+ /* Check that the worker is within the right interval. */
+ serverAssert(worker < BIO_WORKER_NUM);
+
+ redis_set_thread_title(bio_worker_title[worker]);
+
+ redisSetCpuAffinity(server.bio_cpulist);
+
+ makeThreadKillable();
+
+ pthread_mutex_lock(&bio_mutex[worker]);
+ /* Block SIGALRM so we are sure that only the main thread will
+ * receive the watchdog signal. */
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGALRM);
+ if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
+ serverLog(LL_WARNING,
+ "Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno));
+
+ while(1) {
+ listNode *ln;
+
+ /* The loop always starts with the lock hold. */
+ if (listLength(bio_jobs[worker]) == 0) {
+ pthread_cond_wait(&bio_newjob_cond[worker], &bio_mutex[worker]);
+ continue;
+ }
+ /* Get the job from the queue. */
+ ln = listFirst(bio_jobs[worker]);
+ job = ln->value;
+ /* It is now possible to unlock the background system as we know have
+ * a stand alone job structure to process.*/
+ pthread_mutex_unlock(&bio_mutex[worker]);
+
+ /* Process the job accordingly to its type. */
+ int job_type = job->header.type;
+
+ if (job_type == BIO_CLOSE_FILE) {
+ if (job->fd_args.need_fsync &&
+ redis_fsync(job->fd_args.fd) == -1 &&
+ errno != EBADF && errno != EINVAL)
+ {
+ serverLog(LL_WARNING, "Fail to fsync the AOF file: %s",strerror(errno));
+ }
+ if (job->fd_args.need_reclaim_cache) {
+ if (reclaimFilePageCache(job->fd_args.fd, 0, 0) == -1) {
+ serverLog(LL_NOTICE,"Unable to reclaim page cache: %s", strerror(errno));
+ }
+ }
+ close(job->fd_args.fd);
+ } else if (job_type == BIO_AOF_FSYNC || job_type == BIO_CLOSE_AOF) {
+ /* The fd may be closed by main thread and reused for another
+ * socket, pipe, or file. We just ignore these errno because
+ * aof fsync did not really fail. */
+ if (redis_fsync(job->fd_args.fd) == -1 &&
+ errno != EBADF && errno != EINVAL)
+ {
+ int last_status;
+ atomicGet(server.aof_bio_fsync_status,last_status);
+ atomicSet(server.aof_bio_fsync_status,C_ERR);
+ atomicSet(server.aof_bio_fsync_errno,errno);
+ if (last_status == C_OK) {
+ serverLog(LL_WARNING,
+ "Fail to fsync the AOF file: %s",strerror(errno));
+ }
+ } else {
+ atomicSet(server.aof_bio_fsync_status,C_OK);
+ atomicSet(server.fsynced_reploff_pending, job->fd_args.offset);
+ }
+
+ if (job->fd_args.need_reclaim_cache) {
+ if (reclaimFilePageCache(job->fd_args.fd, 0, 0) == -1) {
+ serverLog(LL_NOTICE,"Unable to reclaim page cache: %s", strerror(errno));
+ }
+ }
+ if (job_type == BIO_CLOSE_AOF)
+ close(job->fd_args.fd);
+ } else if (job_type == BIO_LAZY_FREE) {
+ job->free_args.free_fn(job->free_args.free_args);
+ } else {
+ serverPanic("Wrong job type in bioProcessBackgroundJobs().");
+ }
+ zfree(job);
+
+ /* Lock again before reiterating the loop, if there are no longer
+ * jobs to process we'll block again in pthread_cond_wait(). */
+ pthread_mutex_lock(&bio_mutex[worker]);
+ listDelNode(bio_jobs[worker], ln);
+ bio_jobs_counter[job_type]--;
+ pthread_cond_signal(&bio_newjob_cond[worker]);
+ }
+}
+
+/* Return the number of pending jobs of the specified type. */
+unsigned long bioPendingJobsOfType(int type) {
+ unsigned int worker = bio_job_to_worker[type];
+
+ pthread_mutex_lock(&bio_mutex[worker]);
+ unsigned long val = bio_jobs_counter[type];
+ pthread_mutex_unlock(&bio_mutex[worker]);
+
+ return val;
+}
+
+/* Wait for the job queue of the worker for jobs of specified type to become empty. */
+void bioDrainWorker(int job_type) {
+ unsigned long worker = bio_job_to_worker[job_type];
+
+ pthread_mutex_lock(&bio_mutex[worker]);
+ while (listLength(bio_jobs[worker]) > 0) {
+ pthread_cond_wait(&bio_newjob_cond[worker], &bio_mutex[worker]);
+ }
+ pthread_mutex_unlock(&bio_mutex[worker]);
+}
+
+/* Kill the running bio threads in an unclean way. This function should be
+ * used only when it's critical to stop the threads for some reason.
+ * Currently Redis does this only on crash (for instance on SIGSEGV) in order
+ * to perform a fast memory check without other threads messing with memory. */
+void bioKillThreads(void) {
+ int err;
+ unsigned long j;
+
+ for (j = 0; j < BIO_WORKER_NUM; j++) {
+ if (bio_threads[j] == pthread_self()) continue;
+ if (bio_threads[j] && pthread_cancel(bio_threads[j]) == 0) {
+ if ((err = pthread_join(bio_threads[j],NULL)) != 0) {
+ serverLog(LL_WARNING,
+ "Bio worker thread #%lu can not be joined: %s",
+ j, strerror(err));
+ } else {
+ serverLog(LL_WARNING,
+ "Bio worker thread #%lu terminated",j);
+ }
+ }
+ }
+}
diff --git a/src/bio.h b/src/bio.h
new file mode 100644
index 0000000..0d1fe9b
--- /dev/null
+++ b/src/bio.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BIO_H
+#define __BIO_H
+
+typedef void lazy_free_fn(void *args[]);
+
+/* Exported API */
+void bioInit(void);
+unsigned long bioPendingJobsOfType(int type);
+void bioDrainWorker(int job_type);
+void bioKillThreads(void);
+void bioCreateCloseJob(int fd, int need_fsync, int need_reclaim_cache);
+void bioCreateCloseAofJob(int fd, long long offset, int need_reclaim_cache);
+void bioCreateFsyncJob(int fd, long long offset, int need_reclaim_cache);
+void bioCreateLazyFreeJob(lazy_free_fn free_fn, int arg_count, ...);
+
+/* Background job opcodes */
+enum {
+ BIO_CLOSE_FILE = 0, /* Deferred close(2) syscall. */
+ BIO_AOF_FSYNC, /* Deferred AOF fsync. */
+ BIO_LAZY_FREE, /* Deferred objects freeing. */
+ BIO_CLOSE_AOF, /* Deferred close for AOF files. */
+ BIO_NUM_OPS
+};
+
+#endif
diff --git a/src/bitops.c b/src/bitops.c
new file mode 100644
index 0000000..23d8055
--- /dev/null
+++ b/src/bitops.c
@@ -0,0 +1,1267 @@
+/* Bit operations.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+/* -----------------------------------------------------------------------------
+ * Helpers and low level bit functions.
+ * -------------------------------------------------------------------------- */
+
+/* Count number of bits set in the binary array pointed by 's' and long
+ * 'count' bytes. The implementation of this function is required to
+ * work with an input string length up to 512 MB or more (server.proto_max_bulk_len) */
+long long redisPopcount(void *s, long count) {
+ long long bits = 0;
+ unsigned char *p = s;
+ uint32_t *p4;
+ static const unsigned char bitsinbyte[256] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8};
+
+ /* Count initial bytes not aligned to 32 bit. */
+ while((unsigned long)p & 3 && count) {
+ bits += bitsinbyte[*p++];
+ count--;
+ }
+
+ /* Count bits 28 bytes at a time */
+ p4 = (uint32_t*)p;
+ while(count>=28) {
+ uint32_t aux1, aux2, aux3, aux4, aux5, aux6, aux7;
+
+ aux1 = *p4++;
+ aux2 = *p4++;
+ aux3 = *p4++;
+ aux4 = *p4++;
+ aux5 = *p4++;
+ aux6 = *p4++;
+ aux7 = *p4++;
+ count -= 28;
+
+ aux1 = aux1 - ((aux1 >> 1) & 0x55555555);
+ aux1 = (aux1 & 0x33333333) + ((aux1 >> 2) & 0x33333333);
+ aux2 = aux2 - ((aux2 >> 1) & 0x55555555);
+ aux2 = (aux2 & 0x33333333) + ((aux2 >> 2) & 0x33333333);
+ aux3 = aux3 - ((aux3 >> 1) & 0x55555555);
+ aux3 = (aux3 & 0x33333333) + ((aux3 >> 2) & 0x33333333);
+ aux4 = aux4 - ((aux4 >> 1) & 0x55555555);
+ aux4 = (aux4 & 0x33333333) + ((aux4 >> 2) & 0x33333333);
+ aux5 = aux5 - ((aux5 >> 1) & 0x55555555);
+ aux5 = (aux5 & 0x33333333) + ((aux5 >> 2) & 0x33333333);
+ aux6 = aux6 - ((aux6 >> 1) & 0x55555555);
+ aux6 = (aux6 & 0x33333333) + ((aux6 >> 2) & 0x33333333);
+ aux7 = aux7 - ((aux7 >> 1) & 0x55555555);
+ aux7 = (aux7 & 0x33333333) + ((aux7 >> 2) & 0x33333333);
+ bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) +
+ ((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) +
+ ((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) +
+ ((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) +
+ ((aux5 + (aux5 >> 4)) & 0x0F0F0F0F) +
+ ((aux6 + (aux6 >> 4)) & 0x0F0F0F0F) +
+ ((aux7 + (aux7 >> 4)) & 0x0F0F0F0F))* 0x01010101) >> 24;
+ }
+ /* Count the remaining bytes. */
+ p = (unsigned char*)p4;
+ while(count--) bits += bitsinbyte[*p++];
+ return bits;
+}
+
+/* Return the position of the first bit set to one (if 'bit' is 1) or
+ * zero (if 'bit' is 0) in the bitmap starting at 's' and long 'count' bytes.
+ *
+ * The function is guaranteed to return a value >= 0 if 'bit' is 0 since if
+ * no zero bit is found, it returns count*8 assuming the string is zero
+ * padded on the right. However if 'bit' is 1 it is possible that there is
+ * not a single set bit in the bitmap. In this special case -1 is returned. */
+long long redisBitpos(void *s, unsigned long count, int bit) {
+ unsigned long *l;
+ unsigned char *c;
+ unsigned long skipval, word = 0, one;
+ long long pos = 0; /* Position of bit, to return to the caller. */
+ unsigned long j;
+ int found;
+
+ /* Process whole words first, seeking for first word that is not
+ * all ones or all zeros respectively if we are looking for zeros
+ * or ones. This is much faster with large strings having contiguous
+ * blocks of 1 or 0 bits compared to the vanilla bit per bit processing.
+ *
+ * Note that if we start from an address that is not aligned
+ * to sizeof(unsigned long) we consume it byte by byte until it is
+ * aligned. */
+
+ /* Skip initial bits not aligned to sizeof(unsigned long) byte by byte. */
+ skipval = bit ? 0 : UCHAR_MAX;
+ c = (unsigned char*) s;
+ found = 0;
+ while((unsigned long)c & (sizeof(*l)-1) && count) {
+ if (*c != skipval) {
+ found = 1;
+ break;
+ }
+ c++;
+ count--;
+ pos += 8;
+ }
+
+ /* Skip bits with full word step. */
+ l = (unsigned long*) c;
+ if (!found) {
+ skipval = bit ? 0 : ULONG_MAX;
+ while (count >= sizeof(*l)) {
+ if (*l != skipval) break;
+ l++;
+ count -= sizeof(*l);
+ pos += sizeof(*l)*8;
+ }
+ }
+
+ /* Load bytes into "word" considering the first byte as the most significant
+ * (we basically consider it as written in big endian, since we consider the
+ * string as a set of bits from left to right, with the first bit at position
+ * zero.
+ *
+ * Note that the loading is designed to work even when the bytes left
+ * (count) are less than a full word. We pad it with zero on the right. */
+ c = (unsigned char*)l;
+ for (j = 0; j < sizeof(*l); j++) {
+ word <<= 8;
+ if (count) {
+ word |= *c;
+ c++;
+ count--;
+ }
+ }
+
+ /* Special case:
+ * If bits in the string are all zero and we are looking for one,
+ * return -1 to signal that there is not a single "1" in the whole
+ * string. This can't happen when we are looking for "0" as we assume
+ * that the right of the string is zero padded. */
+ if (bit == 1 && word == 0) return -1;
+
+ /* Last word left, scan bit by bit. The first thing we need is to
+ * have a single "1" set in the most significant position in an
+ * unsigned long. We don't know the size of the long so we use a
+ * simple trick. */
+ one = ULONG_MAX; /* All bits set to 1.*/
+ one >>= 1; /* All bits set to 1 but the MSB. */
+ one = ~one; /* All bits set to 0 but the MSB. */
+
+ while(one) {
+ if (((one & word) != 0) == bit) return pos;
+ pos++;
+ one >>= 1;
+ }
+
+ /* If we reached this point, there is a bug in the algorithm, since
+ * the case of no match is handled as a special case before. */
+ serverPanic("End of redisBitpos() reached.");
+ return 0; /* Just to avoid warnings. */
+}
+
+/* The following set.*Bitfield and get.*Bitfield functions implement setting
+ * and getting arbitrary size (up to 64 bits) signed and unsigned integers
+ * at arbitrary positions into a bitmap.
+ *
+ * The representation considers the bitmap as having the bit number 0 to be
+ * the most significant bit of the first byte, and so forth, so for example
+ * setting a 5 bits unsigned integer to value 23 at offset 7 into a bitmap
+ * previously set to all zeroes, will produce the following representation:
+ *
+ * +--------+--------+
+ * |00000001|01110000|
+ * +--------+--------+
+ *
+ * When offsets and integer sizes are aligned to bytes boundaries, this is the
+ * same as big endian, however when such alignment does not exist, its important
+ * to also understand how the bits inside a byte are ordered.
+ *
+ * Note that this format follows the same convention as SETBIT and related
+ * commands.
+ */
+
+void setUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, uint64_t value) {
+ uint64_t byte, bit, byteval, bitval, j;
+
+ for (j = 0; j < bits; j++) {
+ bitval = (value & ((uint64_t)1<<(bits-1-j))) != 0;
+ byte = offset >> 3;
+ bit = 7 - (offset & 0x7);
+ byteval = p[byte];
+ byteval &= ~(1 << bit);
+ byteval |= bitval << bit;
+ p[byte] = byteval & 0xff;
+ offset++;
+ }
+}
+
+void setSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, int64_t value) {
+ uint64_t uv = value; /* Casting will add UINT64_MAX + 1 if v is negative. */
+ setUnsignedBitfield(p,offset,bits,uv);
+}
+
+uint64_t getUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) {
+ uint64_t byte, bit, byteval, bitval, j, value = 0;
+
+ for (j = 0; j < bits; j++) {
+ byte = offset >> 3;
+ bit = 7 - (offset & 0x7);
+ byteval = p[byte];
+ bitval = (byteval >> bit) & 1;
+ value = (value<<1) | bitval;
+ offset++;
+ }
+ return value;
+}
+
+int64_t getSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) {
+ int64_t value;
+ union {uint64_t u; int64_t i;} conv;
+
+ /* Converting from unsigned to signed is undefined when the value does
+ * not fit, however here we assume two's complement and the original value
+ * was obtained from signed -> unsigned conversion, so we'll find the
+ * most significant bit set if the original value was negative.
+ *
+ * Note that two's complement is mandatory for exact-width types
+ * according to the C99 standard. */
+ conv.u = getUnsignedBitfield(p,offset,bits);
+ value = conv.i;
+
+ /* If the top significant bit is 1, propagate it to all the
+ * higher bits for two's complement representation of signed
+ * integers. */
+ if (bits < 64 && (value & ((uint64_t)1 << (bits-1))))
+ value |= ((uint64_t)-1) << bits;
+ return value;
+}
+
+/* The following two functions detect overflow of a value in the context
+ * of storing it as an unsigned or signed integer with the specified
+ * number of bits. The functions both take the value and a possible increment.
+ * If no overflow could happen and the value+increment fit inside the limits,
+ * then zero is returned, otherwise in case of overflow, 1 is returned,
+ * otherwise in case of underflow, -1 is returned.
+ *
+ * When non-zero is returned (overflow or underflow), if not NULL, *limit is
+ * set to the value the operation should result when an overflow happens,
+ * depending on the specified overflow semantics:
+ *
+ * For BFOVERFLOW_SAT if 1 is returned, *limit it is set maximum value that
+ * you can store in that integer. when -1 is returned, *limit is set to the
+ * minimum value that an integer of that size can represent.
+ *
+ * For BFOVERFLOW_WRAP *limit is set by performing the operation in order to
+ * "wrap" around towards zero for unsigned integers, or towards the most
+ * negative number that is possible to represent for signed integers. */
+
+#define BFOVERFLOW_WRAP 0
+#define BFOVERFLOW_SAT 1
+#define BFOVERFLOW_FAIL 2 /* Used by the BITFIELD command implementation. */
+
+int checkUnsignedBitfieldOverflow(uint64_t value, int64_t incr, uint64_t bits, int owtype, uint64_t *limit) {
+ uint64_t max = (bits == 64) ? UINT64_MAX : (((uint64_t)1<<bits)-1);
+ int64_t maxincr = max-value;
+ int64_t minincr = -value;
+
+ if (value > max || (incr > 0 && incr > maxincr)) {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = max;
+ }
+ }
+ return 1;
+ } else if (incr < 0 && incr < minincr) {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = 0;
+ }
+ }
+ return -1;
+ }
+ return 0;
+
+handle_wrap:
+ {
+ uint64_t mask = ((uint64_t)-1) << bits;
+ uint64_t res = value+incr;
+
+ res &= ~mask;
+ *limit = res;
+ }
+ return 1;
+}
+
+int checkSignedBitfieldOverflow(int64_t value, int64_t incr, uint64_t bits, int owtype, int64_t *limit) {
+ int64_t max = (bits == 64) ? INT64_MAX : (((int64_t)1<<(bits-1))-1);
+ int64_t min = (-max)-1;
+
+ /* Note that maxincr and minincr could overflow, but we use the values
+ * only after checking 'value' range, so when we use it no overflow
+ * happens. 'uint64_t' cast is there just to prevent undefined behavior on
+ * overflow */
+ int64_t maxincr = (uint64_t)max-value;
+ int64_t minincr = min-value;
+
+ if (value > max || (bits != 64 && incr > maxincr) || (value >= 0 && incr > 0 && incr > maxincr))
+ {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = max;
+ }
+ }
+ return 1;
+ } else if (value < min || (bits != 64 && incr < minincr) || (value < 0 && incr < 0 && incr < minincr)) {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = min;
+ }
+ }
+ return -1;
+ }
+ return 0;
+
+handle_wrap:
+ {
+ uint64_t msb = (uint64_t)1 << (bits-1);
+ uint64_t a = value, b = incr, c;
+ c = a+b; /* Perform addition as unsigned so that's defined. */
+
+ /* If the sign bit is set, propagate to all the higher order
+ * bits, to cap the negative value. If it's clear, mask to
+ * the positive integer limit. */
+ if (bits < 64) {
+ uint64_t mask = ((uint64_t)-1) << bits;
+ if (c & msb) {
+ c |= mask;
+ } else {
+ c &= ~mask;
+ }
+ }
+ *limit = c;
+ }
+ return 1;
+}
+
+/* Debugging function. Just show bits in the specified bitmap. Not used
+ * but here for not having to rewrite it when debugging is needed. */
+void printBits(unsigned char *p, unsigned long count) {
+ unsigned long j, i, byte;
+
+ for (j = 0; j < count; j++) {
+ byte = p[j];
+ for (i = 0x80; i > 0; i /= 2)
+ printf("%c", (byte & i) ? '1' : '0');
+ printf("|");
+ }
+ printf("\n");
+}
+
+/* -----------------------------------------------------------------------------
+ * Bits related string commands: GETBIT, SETBIT, BITCOUNT, BITOP.
+ * -------------------------------------------------------------------------- */
+
+#define BITOP_AND 0
+#define BITOP_OR 1
+#define BITOP_XOR 2
+#define BITOP_NOT 3
+
+#define BITFIELDOP_GET 0
+#define BITFIELDOP_SET 1
+#define BITFIELDOP_INCRBY 2
+
+/* This helper function used by GETBIT / SETBIT parses the bit offset argument
+ * making sure an error is returned if it is negative or if it overflows
+ * Redis 512 MB limit for the string value or more (server.proto_max_bulk_len).
+ *
+ * If the 'hash' argument is true, and 'bits is positive, then the command
+ * will also parse bit offsets prefixed by "#". In such a case the offset
+ * is multiplied by 'bits'. This is useful for the BITFIELD command. */
+int getBitOffsetFromArgument(client *c, robj *o, uint64_t *offset, int hash, int bits) {
+ long long loffset;
+ char *err = "bit offset is not an integer or out of range";
+ char *p = o->ptr;
+ size_t plen = sdslen(p);
+ int usehash = 0;
+
+ /* Handle #<offset> form. */
+ if (p[0] == '#' && hash && bits > 0) usehash = 1;
+
+ if (string2ll(p+usehash,plen-usehash,&loffset) == 0) {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+
+ /* Adjust the offset by 'bits' for #<offset> form. */
+ if (usehash) loffset *= bits;
+
+ /* Limit offset to server.proto_max_bulk_len (512MB in bytes by default) */
+ if (loffset < 0 || (!mustObeyClient(c) && (loffset >> 3) >= server.proto_max_bulk_len))
+ {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+
+ *offset = loffset;
+ return C_OK;
+}
+
+/* This helper function for BITFIELD parses a bitfield type in the form
+ * <sign><bits> where sign is 'u' or 'i' for unsigned and signed, and
+ * the bits is a value between 1 and 64. However 64 bits unsigned integers
+ * are reported as an error because of current limitations of Redis protocol
+ * to return unsigned integer values greater than INT64_MAX.
+ *
+ * On error C_ERR is returned and an error is sent to the client. */
+int getBitfieldTypeFromArgument(client *c, robj *o, int *sign, int *bits) {
+ char *p = o->ptr;
+ char *err = "Invalid bitfield type. Use something like i16 u8. Note that u64 is not supported but i64 is.";
+ long long llbits;
+
+ if (p[0] == 'i') {
+ *sign = 1;
+ } else if (p[0] == 'u') {
+ *sign = 0;
+ } else {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+
+ if ((string2ll(p+1,strlen(p+1),&llbits)) == 0 ||
+ llbits < 1 ||
+ (*sign == 1 && llbits > 64) ||
+ (*sign == 0 && llbits > 63))
+ {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+ *bits = llbits;
+ return C_OK;
+}
+
+/* This is a helper function for commands implementations that need to write
+ * bits to a string object. The command creates or pad with zeroes the string
+ * so that the 'maxbit' bit can be addressed. The object is finally
+ * returned. Otherwise if the key holds a wrong type NULL is returned and
+ * an error is sent to the client. */
+robj *lookupStringForBitCommand(client *c, uint64_t maxbit, int *dirty) {
+ size_t byte = maxbit >> 3;
+ robj *o = lookupKeyWrite(c->db,c->argv[1]);
+ if (checkType(c,o,OBJ_STRING)) return NULL;
+ if (dirty) *dirty = 0;
+
+ if (o == NULL) {
+ o = createObject(OBJ_STRING,sdsnewlen(NULL, byte+1));
+ dbAdd(c->db,c->argv[1],o);
+ if (dirty) *dirty = 1;
+ } else {
+ o = dbUnshareStringValue(c->db,c->argv[1],o);
+ size_t oldlen = sdslen(o->ptr);
+ o->ptr = sdsgrowzero(o->ptr,byte+1);
+ if (dirty && oldlen != sdslen(o->ptr)) *dirty = 1;
+ }
+ return o;
+}
+
+/* Return a pointer to the string object content, and stores its length
+ * in 'len'. The user is required to pass (likely stack allocated) buffer
+ * 'llbuf' of at least LONG_STR_SIZE bytes. Such a buffer is used in the case
+ * the object is integer encoded in order to provide the representation
+ * without using heap allocation.
+ *
+ * The function returns the pointer to the object array of bytes representing
+ * the string it contains, that may be a pointer to 'llbuf' or to the
+ * internal object representation. As a side effect 'len' is filled with
+ * the length of such buffer.
+ *
+ * If the source object is NULL the function is guaranteed to return NULL
+ * and set 'len' to 0. */
+unsigned char *getObjectReadOnlyString(robj *o, long *len, char *llbuf) {
+ serverAssert(!o || o->type == OBJ_STRING);
+ unsigned char *p = NULL;
+
+ /* Set the 'p' pointer to the string, that can be just a stack allocated
+ * array if our string was integer encoded. */
+ if (o && o->encoding == OBJ_ENCODING_INT) {
+ p = (unsigned char*) llbuf;
+ if (len) *len = ll2string(llbuf,LONG_STR_SIZE,(long)o->ptr);
+ } else if (o) {
+ p = (unsigned char*) o->ptr;
+ if (len) *len = sdslen(o->ptr);
+ } else {
+ if (len) *len = 0;
+ }
+ return p;
+}
+
+/* SETBIT key offset bitvalue */
+void setbitCommand(client *c) {
+ robj *o;
+ char *err = "bit is not an integer or out of range";
+ uint64_t bitoffset;
+ ssize_t byte, bit;
+ int byteval, bitval;
+ long on;
+
+ if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != C_OK)
+ return;
+
+ if (getLongFromObjectOrReply(c,c->argv[3],&on,err) != C_OK)
+ return;
+
+ /* Bits can only be set or cleared... */
+ if (on & ~1) {
+ addReplyError(c,err);
+ return;
+ }
+
+ int dirty;
+ if ((o = lookupStringForBitCommand(c,bitoffset,&dirty)) == NULL) return;
+
+ /* Get current values */
+ byte = bitoffset >> 3;
+ byteval = ((uint8_t*)o->ptr)[byte];
+ bit = 7 - (bitoffset & 0x7);
+ bitval = byteval & (1 << bit);
+
+ /* Either it is newly created, changed length, or the bit changes before and after.
+ * Note that the bitval here is actually a decimal number.
+ * So we need to use `!!` to convert it to 0 or 1 for comparison. */
+ if (dirty || (!!bitval != on)) {
+ /* Update byte with new bit value. */
+ byteval &= ~(1 << bit);
+ byteval |= ((on & 0x1) << bit);
+ ((uint8_t*)o->ptr)[byte] = byteval;
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
+ server.dirty++;
+ }
+
+ /* Return original value. */
+ addReply(c, bitval ? shared.cone : shared.czero);
+}
+
+/* GETBIT key offset */
+void getbitCommand(client *c) {
+ robj *o;
+ char llbuf[32];
+ uint64_t bitoffset;
+ size_t byte, bit;
+ size_t bitval = 0;
+
+ if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != C_OK)
+ return;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,OBJ_STRING)) return;
+
+ byte = bitoffset >> 3;
+ bit = 7 - (bitoffset & 0x7);
+ if (sdsEncodedObject(o)) {
+ if (byte < sdslen(o->ptr))
+ bitval = ((uint8_t*)o->ptr)[byte] & (1 << bit);
+ } else {
+ if (byte < (size_t)ll2string(llbuf,sizeof(llbuf),(long)o->ptr))
+ bitval = llbuf[byte] & (1 << bit);
+ }
+
+ addReply(c, bitval ? shared.cone : shared.czero);
+}
+
+/* BITOP op_name target_key src_key1 src_key2 src_key3 ... src_keyN */
+REDIS_NO_SANITIZE("alignment")
+void bitopCommand(client *c) {
+ char *opname = c->argv[1]->ptr;
+ robj *o, *targetkey = c->argv[2];
+ unsigned long op, j, numkeys;
+ robj **objects; /* Array of source objects. */
+ unsigned char **src; /* Array of source strings pointers. */
+ unsigned long *len, maxlen = 0; /* Array of length of src strings,
+ and max len. */
+ unsigned long minlen = 0; /* Min len among the input keys. */
+ unsigned char *res = NULL; /* Resulting string. */
+
+ /* Parse the operation name. */
+ if ((opname[0] == 'a' || opname[0] == 'A') && !strcasecmp(opname,"and"))
+ op = BITOP_AND;
+ else if((opname[0] == 'o' || opname[0] == 'O') && !strcasecmp(opname,"or"))
+ op = BITOP_OR;
+ else if((opname[0] == 'x' || opname[0] == 'X') && !strcasecmp(opname,"xor"))
+ op = BITOP_XOR;
+ else if((opname[0] == 'n' || opname[0] == 'N') && !strcasecmp(opname,"not"))
+ op = BITOP_NOT;
+ else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Sanity check: NOT accepts only a single key argument. */
+ if (op == BITOP_NOT && c->argc != 4) {
+ addReplyError(c,"BITOP NOT must be called with a single source key.");
+ return;
+ }
+
+ /* Lookup keys, and store pointers to the string objects into an array. */
+ numkeys = c->argc - 3;
+ src = zmalloc(sizeof(unsigned char*) * numkeys);
+ len = zmalloc(sizeof(long) * numkeys);
+ objects = zmalloc(sizeof(robj*) * numkeys);
+ for (j = 0; j < numkeys; j++) {
+ o = lookupKeyRead(c->db,c->argv[j+3]);
+ /* Handle non-existing keys as empty strings. */
+ if (o == NULL) {
+ objects[j] = NULL;
+ src[j] = NULL;
+ len[j] = 0;
+ minlen = 0;
+ continue;
+ }
+ /* Return an error if one of the keys is not a string. */
+ if (checkType(c,o,OBJ_STRING)) {
+ unsigned long i;
+ for (i = 0; i < j; i++) {
+ if (objects[i])
+ decrRefCount(objects[i]);
+ }
+ zfree(src);
+ zfree(len);
+ zfree(objects);
+ return;
+ }
+ objects[j] = getDecodedObject(o);
+ src[j] = objects[j]->ptr;
+ len[j] = sdslen(objects[j]->ptr);
+ if (len[j] > maxlen) maxlen = len[j];
+ if (j == 0 || len[j] < minlen) minlen = len[j];
+ }
+
+ /* Compute the bit operation, if at least one string is not empty. */
+ if (maxlen) {
+ res = (unsigned char*) sdsnewlen(NULL,maxlen);
+ unsigned char output, byte;
+ unsigned long i;
+
+ /* Fast path: as far as we have data for all the input bitmaps we
+ * can take a fast path that performs much better than the
+ * vanilla algorithm. On ARM we skip the fast path since it will
+ * result in GCC compiling the code using multiple-words load/store
+ * operations that are not supported even in ARM >= v6. */
+ j = 0;
+ #ifndef USE_ALIGNED_ACCESS
+ if (minlen >= sizeof(unsigned long)*4 && numkeys <= 16) {
+ unsigned long *lp[16];
+ unsigned long *lres = (unsigned long*) res;
+
+ memcpy(lp,src,sizeof(unsigned long*)*numkeys);
+ memcpy(res,src[0],minlen);
+
+ /* Different branches per different operations for speed (sorry). */
+ if (op == BITOP_AND) {
+ while(minlen >= sizeof(unsigned long)*4) {
+ for (i = 1; i < numkeys; i++) {
+ lres[0] &= lp[i][0];
+ lres[1] &= lp[i][1];
+ lres[2] &= lp[i][2];
+ lres[3] &= lp[i][3];
+ lp[i]+=4;
+ }
+ lres+=4;
+ j += sizeof(unsigned long)*4;
+ minlen -= sizeof(unsigned long)*4;
+ }
+ } else if (op == BITOP_OR) {
+ while(minlen >= sizeof(unsigned long)*4) {
+ for (i = 1; i < numkeys; i++) {
+ lres[0] |= lp[i][0];
+ lres[1] |= lp[i][1];
+ lres[2] |= lp[i][2];
+ lres[3] |= lp[i][3];
+ lp[i]+=4;
+ }
+ lres+=4;
+ j += sizeof(unsigned long)*4;
+ minlen -= sizeof(unsigned long)*4;
+ }
+ } else if (op == BITOP_XOR) {
+ while(minlen >= sizeof(unsigned long)*4) {
+ for (i = 1; i < numkeys; i++) {
+ lres[0] ^= lp[i][0];
+ lres[1] ^= lp[i][1];
+ lres[2] ^= lp[i][2];
+ lres[3] ^= lp[i][3];
+ lp[i]+=4;
+ }
+ lres+=4;
+ j += sizeof(unsigned long)*4;
+ minlen -= sizeof(unsigned long)*4;
+ }
+ } else if (op == BITOP_NOT) {
+ while(minlen >= sizeof(unsigned long)*4) {
+ lres[0] = ~lres[0];
+ lres[1] = ~lres[1];
+ lres[2] = ~lres[2];
+ lres[3] = ~lres[3];
+ lres+=4;
+ j += sizeof(unsigned long)*4;
+ minlen -= sizeof(unsigned long)*4;
+ }
+ }
+ }
+ #endif
+
+ /* j is set to the next byte to process by the previous loop. */
+ for (; j < maxlen; j++) {
+ output = (len[0] <= j) ? 0 : src[0][j];
+ if (op == BITOP_NOT) output = ~output;
+ for (i = 1; i < numkeys; i++) {
+ int skip = 0;
+ byte = (len[i] <= j) ? 0 : src[i][j];
+ switch(op) {
+ case BITOP_AND:
+ output &= byte;
+ skip = (output == 0);
+ break;
+ case BITOP_OR:
+ output |= byte;
+ skip = (output == 0xff);
+ break;
+ case BITOP_XOR: output ^= byte; break;
+ }
+
+ if (skip) {
+ break;
+ }
+ }
+ res[j] = output;
+ }
+ }
+ for (j = 0; j < numkeys; j++) {
+ if (objects[j])
+ decrRefCount(objects[j]);
+ }
+ zfree(src);
+ zfree(len);
+ zfree(objects);
+
+ /* Store the computed value into the target key */
+ if (maxlen) {
+ o = createObject(OBJ_STRING,res);
+ setKey(c,c->db,targetkey,o,0);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",targetkey,c->db->id);
+ decrRefCount(o);
+ server.dirty++;
+ } else if (dbDelete(c->db,targetkey)) {
+ signalModifiedKey(c,c->db,targetkey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",targetkey,c->db->id);
+ server.dirty++;
+ }
+ addReplyLongLong(c,maxlen); /* Return the output string length in bytes. */
+}
+
+/* BITCOUNT key [start end [BIT|BYTE]] */
+void bitcountCommand(client *c) {
+ robj *o;
+ long long start, end;
+ long strlen;
+ unsigned char *p;
+ char llbuf[LONG_STR_SIZE];
+ int isbit = 0;
+ unsigned char first_byte_neg_mask = 0, last_byte_neg_mask = 0;
+
+ /* Lookup, check for type, and return 0 for non existing keys. */
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,OBJ_STRING)) return;
+ p = getObjectReadOnlyString(o,&strlen,llbuf);
+
+ /* Parse start/end range if any. */
+ if (c->argc == 4 || c->argc == 5) {
+ long long totlen = strlen;
+ /* Make sure we will not overflow */
+ serverAssert(totlen <= LLONG_MAX >> 3);
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK)
+ return;
+ if (getLongLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK)
+ return;
+ /* Convert negative indexes */
+ if (start < 0 && end < 0 && start > end) {
+ addReply(c,shared.czero);
+ return;
+ }
+ if (c->argc == 5) {
+ if (!strcasecmp(c->argv[4]->ptr,"bit")) isbit = 1;
+ else if (!strcasecmp(c->argv[4]->ptr,"byte")) isbit = 0;
+ else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+ if (isbit) totlen <<= 3;
+ if (start < 0) start = totlen+start;
+ if (end < 0) end = totlen+end;
+ if (start < 0) start = 0;
+ if (end < 0) end = 0;
+ if (end >= totlen) end = totlen-1;
+ if (isbit && start <= end) {
+ /* Before converting bit offset to byte offset, create negative masks
+ * for the edges. */
+ first_byte_neg_mask = ~((1<<(8-(start&7)))-1) & 0xFF;
+ last_byte_neg_mask = (1<<(7-(end&7)))-1;
+ start >>= 3;
+ end >>= 3;
+ }
+ } else if (c->argc == 2) {
+ /* The whole string. */
+ start = 0;
+ end = strlen-1;
+ } else {
+ /* Syntax error. */
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Precondition: end >= 0 && end < strlen, so the only condition where
+ * zero can be returned is: start > end. */
+ if (start > end) {
+ addReply(c,shared.czero);
+ } else {
+ long bytes = (long)(end-start+1);
+ long long count = redisPopcount(p+start,bytes);
+ if (first_byte_neg_mask != 0 || last_byte_neg_mask != 0) {
+ unsigned char firstlast[2] = {0, 0};
+ /* We may count bits of first byte and last byte which are out of
+ * range. So we need to subtract them. Here we use a trick. We set
+ * bits in the range to zero. So these bit will not be excluded. */
+ if (first_byte_neg_mask != 0) firstlast[0] = p[start] & first_byte_neg_mask;
+ if (last_byte_neg_mask != 0) firstlast[1] = p[end] & last_byte_neg_mask;
+ count -= redisPopcount(firstlast,2);
+ }
+ addReplyLongLong(c,count);
+ }
+}
+
+/* BITPOS key bit [start [end [BIT|BYTE]]] */
+void bitposCommand(client *c) {
+ robj *o;
+ long long start, end;
+ long bit, strlen;
+ unsigned char *p;
+ char llbuf[LONG_STR_SIZE];
+ int isbit = 0, end_given = 0;
+ unsigned char first_byte_neg_mask = 0, last_byte_neg_mask = 0;
+
+ /* Parse the bit argument to understand what we are looking for, set
+ * or clear bits. */
+ if (getLongFromObjectOrReply(c,c->argv[2],&bit,NULL) != C_OK)
+ return;
+ if (bit != 0 && bit != 1) {
+ addReplyError(c, "The bit argument must be 1 or 0.");
+ return;
+ }
+
+ /* If the key does not exist, from our point of view it is an infinite
+ * array of 0 bits. If the user is looking for the first clear bit return 0,
+ * If the user is looking for the first set bit, return -1. */
+ if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) {
+ addReplyLongLong(c, bit ? -1 : 0);
+ return;
+ }
+ if (checkType(c,o,OBJ_STRING)) return;
+ p = getObjectReadOnlyString(o,&strlen,llbuf);
+
+ /* Parse start/end range if any. */
+ if (c->argc == 4 || c->argc == 5 || c->argc == 6) {
+ long long totlen = strlen;
+ /* Make sure we will not overflow */
+ serverAssert(totlen <= LLONG_MAX >> 3);
+ if (getLongLongFromObjectOrReply(c,c->argv[3],&start,NULL) != C_OK)
+ return;
+ if (c->argc == 6) {
+ if (!strcasecmp(c->argv[5]->ptr,"bit")) isbit = 1;
+ else if (!strcasecmp(c->argv[5]->ptr,"byte")) isbit = 0;
+ else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+ if (c->argc >= 5) {
+ if (getLongLongFromObjectOrReply(c,c->argv[4],&end,NULL) != C_OK)
+ return;
+ end_given = 1;
+ } else {
+ if (isbit) end = (totlen<<3) + 7;
+ else end = totlen-1;
+ }
+ if (isbit) totlen <<= 3;
+ /* Convert negative indexes */
+ if (start < 0) start = totlen+start;
+ if (end < 0) end = totlen+end;
+ if (start < 0) start = 0;
+ if (end < 0) end = 0;
+ if (end >= totlen) end = totlen-1;
+ if (isbit && start <= end) {
+ /* Before converting bit offset to byte offset, create negative masks
+ * for the edges. */
+ first_byte_neg_mask = ~((1<<(8-(start&7)))-1) & 0xFF;
+ last_byte_neg_mask = (1<<(7-(end&7)))-1;
+ start >>= 3;
+ end >>= 3;
+ }
+ } else if (c->argc == 3) {
+ /* The whole string. */
+ start = 0;
+ end = strlen-1;
+ } else {
+ /* Syntax error. */
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* For empty ranges (start > end) we return -1 as an empty range does
+ * not contain a 0 nor a 1. */
+ if (start > end) {
+ addReplyLongLong(c, -1);
+ } else {
+ long bytes = end-start+1;
+ long long pos;
+ unsigned char tmpchar;
+ if (first_byte_neg_mask) {
+ if (bit) tmpchar = p[start] & ~first_byte_neg_mask;
+ else tmpchar = p[start] | first_byte_neg_mask;
+ /* Special case, there is only one byte */
+ if (last_byte_neg_mask && bytes == 1) {
+ if (bit) tmpchar = tmpchar & ~last_byte_neg_mask;
+ else tmpchar = tmpchar | last_byte_neg_mask;
+ }
+ pos = redisBitpos(&tmpchar,1,bit);
+ /* If there are no more bytes or we get valid pos, we can exit early */
+ if (bytes == 1 || (pos != -1 && pos != 8)) goto result;
+ start++;
+ bytes--;
+ }
+ /* If the last byte has not bits in the range, we should exclude it */
+ long curbytes = bytes - (last_byte_neg_mask ? 1 : 0);
+ if (curbytes > 0) {
+ pos = redisBitpos(p+start,curbytes,bit);
+ /* If there is no more bytes or we get valid pos, we can exit early */
+ if (bytes == curbytes || (pos != -1 && pos != (long long)curbytes<<3)) goto result;
+ start += curbytes;
+ bytes -= curbytes;
+ }
+ if (bit) tmpchar = p[end] & ~last_byte_neg_mask;
+ else tmpchar = p[end] | last_byte_neg_mask;
+ pos = redisBitpos(&tmpchar,1,bit);
+
+ result:
+ /* If we are looking for clear bits, and the user specified an exact
+ * range with start-end, we can't consider the right of the range as
+ * zero padded (as we do when no explicit end is given).
+ *
+ * So if redisBitpos() returns the first bit outside the range,
+ * we return -1 to the caller, to mean, in the specified range there
+ * is not a single "0" bit. */
+ if (end_given && bit == 0 && pos == (long long)bytes<<3) {
+ addReplyLongLong(c,-1);
+ return;
+ }
+ if (pos != -1) pos += (long long)start<<3; /* Adjust for the bytes we skipped. */
+ addReplyLongLong(c,pos);
+ }
+}
+
+/* BITFIELD key subcommand-1 arg ... subcommand-2 arg ... subcommand-N ...
+ *
+ * Supported subcommands:
+ *
+ * GET <type> <offset>
+ * SET <type> <offset> <value>
+ * INCRBY <type> <offset> <increment>
+ * OVERFLOW [WRAP|SAT|FAIL]
+ */
+
+#define BITFIELD_FLAG_NONE 0
+#define BITFIELD_FLAG_READONLY (1<<0)
+
+struct bitfieldOp {
+ uint64_t offset; /* Bitfield offset. */
+ int64_t i64; /* Increment amount (INCRBY) or SET value */
+ int opcode; /* Operation id. */
+ int owtype; /* Overflow type to use. */
+ int bits; /* Integer bitfield bits width. */
+ int sign; /* True if signed, otherwise unsigned op. */
+};
+
+/* This implements both the BITFIELD command and the BITFIELD_RO command
+ * when flags is set to BITFIELD_FLAG_READONLY: in this case only the
+ * GET subcommand is allowed, other subcommands will return an error. */
+void bitfieldGeneric(client *c, int flags) {
+ robj *o;
+ uint64_t bitoffset;
+ int j, numops = 0, changes = 0, dirty = 0;
+ struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */
+ int owtype = BFOVERFLOW_WRAP; /* Overflow type. */
+ int readonly = 1;
+ uint64_t highest_write_offset = 0;
+
+ for (j = 2; j < c->argc; j++) {
+ int remargs = c->argc-j-1; /* Remaining args other than current. */
+ char *subcmd = c->argv[j]->ptr; /* Current command name. */
+ int opcode; /* Current operation code. */
+ long long i64 = 0; /* Signed SET value. */
+ int sign = 0; /* Signed or unsigned type? */
+ int bits = 0; /* Bitfield width in bits. */
+
+ if (!strcasecmp(subcmd,"get") && remargs >= 2)
+ opcode = BITFIELDOP_GET;
+ else if (!strcasecmp(subcmd,"set") && remargs >= 3)
+ opcode = BITFIELDOP_SET;
+ else if (!strcasecmp(subcmd,"incrby") && remargs >= 3)
+ opcode = BITFIELDOP_INCRBY;
+ else if (!strcasecmp(subcmd,"overflow") && remargs >= 1) {
+ char *owtypename = c->argv[j+1]->ptr;
+ j++;
+ if (!strcasecmp(owtypename,"wrap"))
+ owtype = BFOVERFLOW_WRAP;
+ else if (!strcasecmp(owtypename,"sat"))
+ owtype = BFOVERFLOW_SAT;
+ else if (!strcasecmp(owtypename,"fail"))
+ owtype = BFOVERFLOW_FAIL;
+ else {
+ addReplyError(c,"Invalid OVERFLOW type specified");
+ zfree(ops);
+ return;
+ }
+ continue;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ zfree(ops);
+ return;
+ }
+
+ /* Get the type and offset arguments, common to all the ops. */
+ if (getBitfieldTypeFromArgument(c,c->argv[j+1],&sign,&bits) != C_OK) {
+ zfree(ops);
+ return;
+ }
+
+ if (getBitOffsetFromArgument(c,c->argv[j+2],&bitoffset,1,bits) != C_OK){
+ zfree(ops);
+ return;
+ }
+
+ if (opcode != BITFIELDOP_GET) {
+ readonly = 0;
+ if (highest_write_offset < bitoffset + bits - 1)
+ highest_write_offset = bitoffset + bits - 1;
+ /* INCRBY and SET require another argument. */
+ if (getLongLongFromObjectOrReply(c,c->argv[j+3],&i64,NULL) != C_OK){
+ zfree(ops);
+ return;
+ }
+ }
+
+ /* Populate the array of operations we'll process. */
+ ops = zrealloc(ops,sizeof(*ops)*(numops+1));
+ ops[numops].offset = bitoffset;
+ ops[numops].i64 = i64;
+ ops[numops].opcode = opcode;
+ ops[numops].owtype = owtype;
+ ops[numops].bits = bits;
+ ops[numops].sign = sign;
+ numops++;
+
+ j += 3 - (opcode == BITFIELDOP_GET);
+ }
+
+ if (readonly) {
+ /* Lookup for read is ok if key doesn't exit, but errors
+ * if it's not a string. */
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o != NULL && checkType(c,o,OBJ_STRING)) {
+ zfree(ops);
+ return;
+ }
+ } else {
+ if (flags & BITFIELD_FLAG_READONLY) {
+ zfree(ops);
+ addReplyError(c, "BITFIELD_RO only supports the GET subcommand");
+ return;
+ }
+
+ /* Lookup by making room up to the farthest bit reached by
+ * this operation. */
+ if ((o = lookupStringForBitCommand(c,
+ highest_write_offset,&dirty)) == NULL) {
+ zfree(ops);
+ return;
+ }
+ }
+
+ addReplyArrayLen(c,numops);
+
+ /* Actually process the operations. */
+ for (j = 0; j < numops; j++) {
+ struct bitfieldOp *thisop = ops+j;
+
+ /* Execute the operation. */
+ if (thisop->opcode == BITFIELDOP_SET ||
+ thisop->opcode == BITFIELDOP_INCRBY)
+ {
+ /* SET and INCRBY: We handle both with the same code path
+ * for simplicity. SET return value is the previous value so
+ * we need fetch & store as well. */
+
+ /* We need two different but very similar code paths for signed
+ * and unsigned operations, since the set of functions to get/set
+ * the integers and the used variables types are different. */
+ if (thisop->sign) {
+ int64_t oldval, newval, wrapped, retval;
+ int overflow;
+
+ oldval = getSignedBitfield(o->ptr,thisop->offset,
+ thisop->bits);
+
+ if (thisop->opcode == BITFIELDOP_INCRBY) {
+ overflow = checkSignedBitfieldOverflow(oldval,
+ thisop->i64,thisop->bits,thisop->owtype,&wrapped);
+ newval = overflow ? wrapped : oldval + thisop->i64;
+ retval = newval;
+ } else {
+ newval = thisop->i64;
+ overflow = checkSignedBitfieldOverflow(newval,
+ 0,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = oldval;
+ }
+
+ /* On overflow of type is "FAIL", don't write and return
+ * NULL to signal the condition. */
+ if (!(overflow && thisop->owtype == BFOVERFLOW_FAIL)) {
+ addReplyLongLong(c,retval);
+ setSignedBitfield(o->ptr,thisop->offset,
+ thisop->bits,newval);
+
+ if (dirty || (oldval != newval))
+ changes++;
+ } else {
+ addReplyNull(c);
+ }
+ } else {
+ /* Initialization of 'wrapped' is required to avoid
+ * false-positive warning "-Wmaybe-uninitialized" */
+ uint64_t oldval, newval, retval, wrapped = 0;
+ int overflow;
+
+ oldval = getUnsignedBitfield(o->ptr,thisop->offset,
+ thisop->bits);
+
+ if (thisop->opcode == BITFIELDOP_INCRBY) {
+ newval = oldval + thisop->i64;
+ overflow = checkUnsignedBitfieldOverflow(oldval,
+ thisop->i64,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = newval;
+ } else {
+ newval = thisop->i64;
+ overflow = checkUnsignedBitfieldOverflow(newval,
+ 0,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = oldval;
+ }
+ /* On overflow of type is "FAIL", don't write and return
+ * NULL to signal the condition. */
+ if (!(overflow && thisop->owtype == BFOVERFLOW_FAIL)) {
+ addReplyLongLong(c,retval);
+ setUnsignedBitfield(o->ptr,thisop->offset,
+ thisop->bits,newval);
+
+ if (dirty || (oldval != newval))
+ changes++;
+ } else {
+ addReplyNull(c);
+ }
+ }
+ } else {
+ /* GET */
+ unsigned char buf[9];
+ long strlen = 0;
+ unsigned char *src = NULL;
+ char llbuf[LONG_STR_SIZE];
+
+ if (o != NULL)
+ src = getObjectReadOnlyString(o,&strlen,llbuf);
+
+ /* For GET we use a trick: before executing the operation
+ * copy up to 9 bytes to a local buffer, so that we can easily
+ * execute up to 64 bit operations that are at actual string
+ * object boundaries. */
+ memset(buf,0,9);
+ int i;
+ uint64_t byte = thisop->offset >> 3;
+ for (i = 0; i < 9; i++) {
+ if (src == NULL || i+byte >= (uint64_t)strlen) break;
+ buf[i] = src[i+byte];
+ }
+
+ /* Now operate on the copied buffer which is guaranteed
+ * to be zero-padded. */
+ if (thisop->sign) {
+ int64_t val = getSignedBitfield(buf,thisop->offset-(byte*8),
+ thisop->bits);
+ addReplyLongLong(c,val);
+ } else {
+ uint64_t val = getUnsignedBitfield(buf,thisop->offset-(byte*8),
+ thisop->bits);
+ addReplyLongLong(c,val);
+ }
+ }
+ }
+
+ if (changes) {
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
+ server.dirty += changes;
+ }
+ zfree(ops);
+}
+
+void bitfieldCommand(client *c) {
+ bitfieldGeneric(c, BITFIELD_FLAG_NONE);
+}
+
+void bitfieldroCommand(client *c) {
+ bitfieldGeneric(c, BITFIELD_FLAG_READONLY);
+}
diff --git a/src/blocked.c b/src/blocked.c
new file mode 100644
index 0000000..6ad4667
--- /dev/null
+++ b/src/blocked.c
@@ -0,0 +1,763 @@
+/* blocked.c - generic support for blocking operations like BLPOP & WAIT.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---------------------------------------------------------------------------
+ *
+ * API:
+ *
+ * blockClient() set the CLIENT_BLOCKED flag in the client, and set the
+ * specified block type 'btype' filed to one of BLOCKED_* macros.
+ *
+ * unblockClient() unblocks the client doing the following:
+ * 1) It calls the btype-specific function to cleanup the state.
+ * 2) It unblocks the client by unsetting the CLIENT_BLOCKED flag.
+ * 3) It puts the client into a list of just unblocked clients that are
+ * processed ASAP in the beforeSleep() event loop callback, so that
+ * if there is some query buffer to process, we do it. This is also
+ * required because otherwise there is no 'readable' event fired, we
+ * already read the pending commands. We also set the CLIENT_UNBLOCKED
+ * flag to remember the client is in the unblocked_clients list.
+ *
+ * processUnblockedClients() is called inside the beforeSleep() function
+ * to process the query buffer from unblocked clients and remove the clients
+ * from the blocked_clients queue.
+ *
+ * replyToBlockedClientTimedOut() is called by the cron function when
+ * a client blocked reaches the specified timeout (if the timeout is set
+ * to 0, no timeout is processed).
+ * It usually just needs to send a reply to the client.
+ *
+ * When implementing a new type of blocking operation, the implementation
+ * should modify unblockClient() and replyToBlockedClientTimedOut() in order
+ * to handle the btype-specific behavior of this two functions.
+ * If the blocking operation waits for certain keys to change state, the
+ * clusterRedirectBlockedClientIfNeeded() function should also be updated.
+ */
+
+#include "server.h"
+#include "slowlog.h"
+#include "latency.h"
+#include "monotonic.h"
+
+/* forward declarations */
+static void unblockClientWaitingData(client *c);
+static void handleClientsBlockedOnKey(readyList *rl);
+static void unblockClientOnKey(client *c, robj *key);
+static void moduleUnblockClientOnKey(client *c, robj *key);
+static void releaseBlockedEntry(client *c, dictEntry *de, int remove_key);
+
+void initClientBlockingState(client *c) {
+ c->bstate.btype = BLOCKED_NONE;
+ c->bstate.timeout = 0;
+ c->bstate.keys = dictCreate(&objectKeyHeapPointerValueDictType);
+ c->bstate.numreplicas = 0;
+ c->bstate.reploffset = 0;
+ c->bstate.unblock_on_nokey = 0;
+ c->bstate.async_rm_call_handle = NULL;
+}
+
+/* Block a client for the specific operation type. Once the CLIENT_BLOCKED
+ * flag is set client query buffer is not longer processed, but accumulated,
+ * and will be processed when the client is unblocked. */
+void blockClient(client *c, int btype) {
+ /* Master client should never be blocked unless pause or module */
+ serverAssert(!(c->flags & CLIENT_MASTER &&
+ btype != BLOCKED_MODULE &&
+ btype != BLOCKED_POSTPONE));
+
+ c->flags |= CLIENT_BLOCKED;
+ c->bstate.btype = btype;
+ if (!(c->flags & CLIENT_MODULE)) server.blocked_clients++; /* We count blocked client stats on regular clients and not on module clients */
+ server.blocked_clients_by_type[btype]++;
+ addClientToTimeoutTable(c);
+}
+
+/* Usually when a client is unblocked due to being blocked while processing some command
+ * he will attempt to reprocess the command which will update the statistics.
+ * However in case the client was timed out or in case of module blocked client is being unblocked
+ * the command will not be reprocessed and we need to make stats update.
+ * This function will make updates to the commandstats, slowlog and monitors.*/
+void updateStatsOnUnblock(client *c, long blocked_us, long reply_us, int had_errors){
+ const ustime_t total_cmd_duration = c->duration + blocked_us + reply_us;
+ c->lastcmd->microseconds += total_cmd_duration;
+ c->lastcmd->calls++;
+ server.stat_numcommands++;
+ if (had_errors)
+ c->lastcmd->failed_calls++;
+ if (server.latency_tracking_enabled)
+ updateCommandLatencyHistogram(&(c->lastcmd->latency_histogram), total_cmd_duration*1000);
+ /* Log the command into the Slow log if needed. */
+ slowlogPushCurrentCommand(c, c->lastcmd, total_cmd_duration);
+ c->duration = 0;
+ /* Log the reply duration event. */
+ latencyAddSampleIfNeeded("command-unblocking",reply_us/1000);
+}
+
+/* This function is called in the beforeSleep() function of the event loop
+ * in order to process the pending input buffer of clients that were
+ * unblocked after a blocking operation. */
+void processUnblockedClients(void) {
+ listNode *ln;
+ client *c;
+
+ while (listLength(server.unblocked_clients)) {
+ ln = listFirst(server.unblocked_clients);
+ serverAssert(ln != NULL);
+ c = ln->value;
+ listDelNode(server.unblocked_clients,ln);
+ c->flags &= ~CLIENT_UNBLOCKED;
+
+ if (c->flags & CLIENT_MODULE) {
+ if (!(c->flags & CLIENT_BLOCKED)) {
+ moduleCallCommandUnblockedHandler(c);
+ }
+ continue;
+ }
+
+ /* Process remaining data in the input buffer, unless the client
+ * is blocked again. Actually processInputBuffer() checks that the
+ * client is not blocked before to proceed, but things may change and
+ * the code is conceptually more correct this way. */
+ if (!(c->flags & CLIENT_BLOCKED)) {
+ /* If we have a queued command, execute it now. */
+ if (processPendingCommandAndInputBuffer(c) == C_ERR) {
+ c = NULL;
+ }
+ }
+ beforeNextClient(c);
+ }
+}
+
+/* This function will schedule the client for reprocessing at a safe time.
+ *
+ * This is useful when a client was blocked for some reason (blocking operation,
+ * CLIENT PAUSE, or whatever), because it may end with some accumulated query
+ * buffer that needs to be processed ASAP:
+ *
+ * 1. When a client is blocked, its readable handler is still active.
+ * 2. However in this case it only gets data into the query buffer, but the
+ * query is not parsed or executed once there is enough to proceed as
+ * usually (because the client is blocked... so we can't execute commands).
+ * 3. When the client is unblocked, without this function, the client would
+ * have to write some query in order for the readable handler to finally
+ * call processQueryBuffer*() on it.
+ * 4. With this function instead we can put the client in a queue that will
+ * process it for queries ready to be executed at a safe time.
+ */
+void queueClientForReprocessing(client *c) {
+ /* The client may already be into the unblocked list because of a previous
+ * blocking operation, don't add back it into the list multiple times. */
+ if (!(c->flags & CLIENT_UNBLOCKED)) {
+ c->flags |= CLIENT_UNBLOCKED;
+ listAddNodeTail(server.unblocked_clients,c);
+ }
+}
+
+/* Unblock a client calling the right function depending on the kind
+ * of operation the client is blocking for. */
+void unblockClient(client *c, int queue_for_reprocessing) {
+ if (c->bstate.btype == BLOCKED_LIST ||
+ c->bstate.btype == BLOCKED_ZSET ||
+ c->bstate.btype == BLOCKED_STREAM) {
+ unblockClientWaitingData(c);
+ } else if (c->bstate.btype == BLOCKED_WAIT || c->bstate.btype == BLOCKED_WAITAOF) {
+ unblockClientWaitingReplicas(c);
+ } else if (c->bstate.btype == BLOCKED_MODULE) {
+ if (moduleClientIsBlockedOnKeys(c)) unblockClientWaitingData(c);
+ unblockClientFromModule(c);
+ } else if (c->bstate.btype == BLOCKED_POSTPONE) {
+ listDelNode(server.postponed_clients,c->postponed_list_node);
+ c->postponed_list_node = NULL;
+ } else if (c->bstate.btype == BLOCKED_SHUTDOWN) {
+ /* No special cleanup. */
+ } else {
+ serverPanic("Unknown btype in unblockClient().");
+ }
+
+ /* Reset the client for a new query, unless the client has pending command to process
+ * or in case a shutdown operation was canceled and we are still in the processCommand sequence */
+ if (!(c->flags & CLIENT_PENDING_COMMAND) && c->bstate.btype != BLOCKED_SHUTDOWN) {
+ freeClientOriginalArgv(c);
+ /* Clients that are not blocked on keys are not reprocessed so we must
+ * call reqresAppendResponse here (for clients blocked on key,
+ * unblockClientOnKey is called, which eventually calls processCommand,
+ * which calls reqresAppendResponse) */
+ reqresAppendResponse(c);
+ resetClient(c);
+ }
+
+ /* Clear the flags, and put the client in the unblocked list so that
+ * we'll process new commands in its query buffer ASAP. */
+ if (!(c->flags & CLIENT_MODULE)) server.blocked_clients--; /* We count blocked client stats on regular clients and not on module clients */
+ server.blocked_clients_by_type[c->bstate.btype]--;
+ c->flags &= ~CLIENT_BLOCKED;
+ c->bstate.btype = BLOCKED_NONE;
+ c->bstate.unblock_on_nokey = 0;
+ removeClientFromTimeoutTable(c);
+ if (queue_for_reprocessing) queueClientForReprocessing(c);
+}
+
+/* This function gets called when a blocked client timed out in order to
+ * send it a reply of some kind. After this function is called,
+ * unblockClient() will be called with the same client as argument. */
+void replyToBlockedClientTimedOut(client *c) {
+ if (c->bstate.btype == BLOCKED_LIST ||
+ c->bstate.btype == BLOCKED_ZSET ||
+ c->bstate.btype == BLOCKED_STREAM) {
+ addReplyNullArray(c);
+ updateStatsOnUnblock(c, 0, 0, 0);
+ } else if (c->bstate.btype == BLOCKED_WAIT) {
+ addReplyLongLong(c,replicationCountAcksByOffset(c->bstate.reploffset));
+ } else if (c->bstate.btype == BLOCKED_WAITAOF) {
+ addReplyArrayLen(c,2);
+ addReplyLongLong(c,server.fsynced_reploff >= c->bstate.reploffset);
+ addReplyLongLong(c,replicationCountAOFAcksByOffset(c->bstate.reploffset));
+ } else if (c->bstate.btype == BLOCKED_MODULE) {
+ moduleBlockedClientTimedOut(c);
+ } else {
+ serverPanic("Unknown btype in replyToBlockedClientTimedOut().");
+ }
+}
+
+/* If one or more clients are blocked on the SHUTDOWN command, this function
+ * sends them an error reply and unblocks them. */
+void replyToClientsBlockedOnShutdown(void) {
+ if (server.blocked_clients_by_type[BLOCKED_SHUTDOWN] == 0) return;
+ listNode *ln;
+ listIter li;
+ listRewind(server.clients, &li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ if (c->flags & CLIENT_BLOCKED && c->bstate.btype == BLOCKED_SHUTDOWN) {
+ addReplyError(c, "Errors trying to SHUTDOWN. Check logs.");
+ unblockClient(c, 1);
+ }
+ }
+}
+
+/* Mass-unblock clients because something changed in the instance that makes
+ * blocking no longer safe. For example clients blocked in list operations
+ * in an instance which turns from master to slave is unsafe, so this function
+ * is called when a master turns into a slave.
+ *
+ * The semantics is to send an -UNBLOCKED error to the client, disconnecting
+ * it at the same time. */
+void disconnectAllBlockedClients(void) {
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.clients,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+
+ if (c->flags & CLIENT_BLOCKED) {
+ /* POSTPONEd clients are an exception, when they'll be unblocked, the
+ * command processing will start from scratch, and the command will
+ * be either executed or rejected. (unlike LIST blocked clients for
+ * which the command is already in progress in a way. */
+ if (c->bstate.btype == BLOCKED_POSTPONE)
+ continue;
+
+ unblockClientOnError(c,
+ "-UNBLOCKED force unblock from blocking operation, "
+ "instance state changed (master -> replica?)");
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ }
+ }
+}
+
+/* This function should be called by Redis every time a single command,
+ * a MULTI/EXEC block, or a Lua script, terminated its execution after
+ * being called by a client. It handles serving clients blocked in all scenarios
+ * where a specific key access requires to block until that key is available.
+ *
+ * All the keys with at least one client blocked that are signaled as ready
+ * are accumulated into the server.ready_keys list. This function will run
+ * the list and will serve clients accordingly.
+ * Note that the function will iterate again and again (for example as a result of serving BLMOVE
+ * we can have new blocking clients to serve because of the PUSH side of BLMOVE.)
+ *
+ * This function is normally "fair", that is, it will serve clients
+ * using a FIFO behavior. However this fairness is violated in certain
+ * edge cases, that is, when we have clients blocked at the same time
+ * in a sorted set and in a list, for the same key (a very odd thing to
+ * do client side, indeed!). Because mismatching clients (blocking for
+ * a different type compared to the current key type) are moved in the
+ * other side of the linked list. However as long as the key starts to
+ * be used only for a single type, like virtually any Redis application will
+ * do, the function is already fair. */
+void handleClientsBlockedOnKeys(void) {
+
+ /* In case we are already in the process of unblocking clients we should
+ * not make a recursive call, in order to prevent breaking fairness. */
+ static int in_handling_blocked_clients = 0;
+ if (in_handling_blocked_clients)
+ return;
+ in_handling_blocked_clients = 1;
+
+ /* This function is called only when also_propagate is in its basic state
+ * (i.e. not from call(), module context, etc.) */
+ serverAssert(server.also_propagate.numops == 0);
+
+ /* If a command being unblocked causes another command to get unblocked,
+ * like a BLMOVE would do, then the new unblocked command will get processed
+ * right away rather than wait for later. */
+ while(listLength(server.ready_keys) != 0) {
+ list *l;
+
+ /* Point server.ready_keys to a fresh list and save the current one
+ * locally. This way as we run the old list we are free to call
+ * signalKeyAsReady() that may push new elements in server.ready_keys
+ * when handling clients blocked into BLMOVE. */
+ l = server.ready_keys;
+ server.ready_keys = listCreate();
+
+ while(listLength(l) != 0) {
+ listNode *ln = listFirst(l);
+ readyList *rl = ln->value;
+
+ /* First of all remove this key from db->ready_keys so that
+ * we can safely call signalKeyAsReady() against this key. */
+ dictDelete(rl->db->ready_keys,rl->key);
+
+ handleClientsBlockedOnKey(rl);
+
+ /* Free this item. */
+ decrRefCount(rl->key);
+ zfree(rl);
+ listDelNode(l,ln);
+ }
+ listRelease(l); /* We have the new list on place at this point. */
+ }
+ in_handling_blocked_clients = 0;
+}
+
+/* Set a client in blocking mode for the specified key, with the specified timeout.
+ * The 'type' argument is BLOCKED_LIST,BLOCKED_ZSET or BLOCKED_STREAM depending on the kind of operation we are
+ * waiting for an empty key in order to awake the client. The client is blocked
+ * for all the 'numkeys' keys as in the 'keys' argument.
+ * The client will unblocked as soon as one of the keys in 'keys' value was updated.
+ * the parameter unblock_on_nokey can be used to force client to be unblocked even in the case the key
+ * is updated to become unavailable, either by type change (override), deletion or swapdb */
+void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, int unblock_on_nokey) {
+ dictEntry *db_blocked_entry, *db_blocked_existing_entry, *client_blocked_entry;
+ list *l;
+ int j;
+
+ c->bstate.timeout = timeout;
+ for (j = 0; j < numkeys; j++) {
+ /* If the key already exists in the dictionary ignore it. */
+ if (!(client_blocked_entry = dictAddRaw(c->bstate.keys,keys[j],NULL))) {
+ continue;
+ }
+ incrRefCount(keys[j]);
+
+ /* And in the other "side", to map keys -> clients */
+ db_blocked_entry = dictAddRaw(c->db->blocking_keys,keys[j], &db_blocked_existing_entry);
+
+ /* In case key[j] did not have blocking clients yet, we need to create a new list */
+ if (db_blocked_entry != NULL) {
+ l = listCreate();
+ dictSetVal(c->db->blocking_keys, db_blocked_entry, l);
+ incrRefCount(keys[j]);
+ } else {
+ l = dictGetVal(db_blocked_existing_entry);
+ }
+ listAddNodeTail(l,c);
+ dictSetVal(c->bstate.keys,client_blocked_entry,listLast(l));
+
+
+ /* We need to add the key to blocking_keys_unblock_on_nokey, if the client
+ * wants to be awakened if key is deleted (like XREADGROUP) */
+ if (unblock_on_nokey) {
+ db_blocked_entry = dictAddRaw(c->db->blocking_keys_unblock_on_nokey, keys[j], &db_blocked_existing_entry);
+ if (db_blocked_entry) {
+ incrRefCount(keys[j]);
+ dictSetUnsignedIntegerVal(db_blocked_entry, 1);
+ } else {
+ dictIncrUnsignedIntegerVal(db_blocked_existing_entry, 1);
+ }
+ }
+ }
+ c->bstate.unblock_on_nokey = unblock_on_nokey;
+ /* Currently we assume key blocking will require reprocessing the command.
+ * However in case of modules, they have a different way to handle the reprocessing
+ * which does not require setting the pending command flag */
+ if (btype != BLOCKED_MODULE)
+ c->flags |= CLIENT_PENDING_COMMAND;
+ blockClient(c,btype);
+}
+
+/* Helper function to unblock a client that's waiting in a blocking operation such as BLPOP.
+ * Internal function for unblockClient() */
+static void unblockClientWaitingData(client *c) {
+ dictEntry *de;
+ dictIterator *di;
+
+ if (dictSize(c->bstate.keys) == 0)
+ return;
+
+ di = dictGetIterator(c->bstate.keys);
+ /* The client may wait for multiple keys, so unblock it for every key. */
+ while((de = dictNext(di)) != NULL) {
+ releaseBlockedEntry(c, de, 0);
+ }
+ dictReleaseIterator(di);
+ dictEmpty(c->bstate.keys, NULL);
+}
+
+static blocking_type getBlockedTypeByType(int type) {
+ switch (type) {
+ case OBJ_LIST: return BLOCKED_LIST;
+ case OBJ_ZSET: return BLOCKED_ZSET;
+ case OBJ_MODULE: return BLOCKED_MODULE;
+ case OBJ_STREAM: return BLOCKED_STREAM;
+ default: return BLOCKED_NONE;
+ }
+}
+
+/* If the specified key has clients blocked waiting for list pushes, this
+ * function will put the key reference into the server.ready_keys list.
+ * Note that db->ready_keys is a hash table that allows us to avoid putting
+ * the same key again and again in the list in case of multiple pushes
+ * made by a script or in the context of MULTI/EXEC.
+ *
+ * The list will be finally processed by handleClientsBlockedOnKeys() */
+static void signalKeyAsReadyLogic(redisDb *db, robj *key, int type, int deleted) {
+ readyList *rl;
+
+ /* Quick returns. */
+ int btype = getBlockedTypeByType(type);
+ if (btype == BLOCKED_NONE) {
+ /* The type can never block. */
+ return;
+ }
+ if (!server.blocked_clients_by_type[btype] &&
+ !server.blocked_clients_by_type[BLOCKED_MODULE]) {
+ /* No clients block on this type. Note: Blocked modules are represented
+ * by BLOCKED_MODULE, even if the intention is to wake up by normal
+ * types (list, zset, stream), so we need to check that there are no
+ * blocked modules before we do a quick return here. */
+ return;
+ }
+
+ if (deleted) {
+ /* Key deleted and no clients blocking for this key? No need to queue it. */
+ if (dictFind(db->blocking_keys_unblock_on_nokey,key) == NULL)
+ return;
+ /* Note: if we made it here it means the key is also present in db->blocking_keys */
+ } else {
+ /* No clients blocking for this key? No need to queue it. */
+ if (dictFind(db->blocking_keys,key) == NULL)
+ return;
+ }
+
+ dictEntry *de, *existing;
+ de = dictAddRaw(db->ready_keys, key, &existing);
+ if (de) {
+ /* We add the key in the db->ready_keys dictionary in order
+ * to avoid adding it multiple times into a list with a simple O(1)
+ * check. */
+ incrRefCount(key);
+ } else {
+ /* Key was already signaled? No need to queue it again. */
+ return;
+ }
+
+ /* Ok, we need to queue this key into server.ready_keys. */
+ rl = zmalloc(sizeof(*rl));
+ rl->key = key;
+ rl->db = db;
+ incrRefCount(key);
+ listAddNodeTail(server.ready_keys,rl);
+}
+
+/* Helper function to wrap the logic of removing a client blocked key entry
+ * In this case we would like to do the following:
+ * 1. unlink the client from the global DB locked client list
+ * 2. remove the entry from the global db blocking list in case the list is empty
+ * 3. in case the global list is empty, also remove the key from the global dict of keys
+ * which should trigger unblock on key deletion
+ * 4. remove key from the client blocking keys list - NOTE, since client can be blocked on lots of keys,
+ * but unblocked when only one of them is triggered, we would like to avoid deleting each key separately
+ * and instead clear the dictionary in one-shot. this is why the remove_key argument is provided
+ * to support this logic in unblockClientWaitingData
+ */
+static void releaseBlockedEntry(client *c, dictEntry *de, int remove_key) {
+ list *l;
+ listNode *pos;
+ void *key;
+ dictEntry *unblock_on_nokey_entry;
+
+ key = dictGetKey(de);
+ pos = dictGetVal(de);
+ /* Remove this client from the list of clients waiting for this key. */
+ l = dictFetchValue(c->db->blocking_keys, key);
+ serverAssertWithInfo(c,key,l != NULL);
+ listUnlinkNode(l,pos);
+ /* If the list is empty we need to remove it to avoid wasting memory
+ * We will also remove the key (if exists) from the blocking_keys_unblock_on_nokey dict.
+ * However, in case the list is not empty, we will have to still perform reference accounting
+ * on the blocking_keys_unblock_on_nokey and delete the entry in case of zero reference.
+ * Why? because it is possible that some more clients are blocked on the same key but without
+ * require to be triggered on key deletion, we do not want these to be later triggered by the
+ * signalDeletedKeyAsReady. */
+ if (listLength(l) == 0) {
+ dictDelete(c->db->blocking_keys, key);
+ dictDelete(c->db->blocking_keys_unblock_on_nokey,key);
+ } else if (c->bstate.unblock_on_nokey) {
+ unblock_on_nokey_entry = dictFind(c->db->blocking_keys_unblock_on_nokey,key);
+ /* it is not possible to have a client blocked on nokey with no matching entry */
+ serverAssertWithInfo(c,key,unblock_on_nokey_entry != NULL);
+ if (!dictIncrUnsignedIntegerVal(unblock_on_nokey_entry, -1)) {
+ /* in case the count is zero, we can delete the entry */
+ dictDelete(c->db->blocking_keys_unblock_on_nokey,key);
+ }
+ }
+ if (remove_key)
+ dictDelete(c->bstate.keys, key);
+}
+
+void signalKeyAsReady(redisDb *db, robj *key, int type) {
+ signalKeyAsReadyLogic(db, key, type, 0);
+}
+
+void signalDeletedKeyAsReady(redisDb *db, robj *key, int type) {
+ signalKeyAsReadyLogic(db, key, type, 1);
+}
+
+/* Helper function for handleClientsBlockedOnKeys(). This function is called
+ * whenever a key is ready. we iterate over all the clients blocked on this key
+ * and try to re-execute the command (in case the key is still available). */
+static void handleClientsBlockedOnKey(readyList *rl) {
+
+ /* We serve clients in the same order they blocked for
+ * this key, from the first blocked to the last. */
+ dictEntry *de = dictFind(rl->db->blocking_keys,rl->key);
+
+ if (de) {
+ list *clients = dictGetVal(de);
+ listNode *ln;
+ listIter li;
+ listRewind(clients,&li);
+
+ /* Avoid processing more than the initial count so that we're not stuck
+ * in an endless loop in case the reprocessing of the command blocks again. */
+ long count = listLength(clients);
+ while ((ln = listNext(&li)) && count--) {
+ client *receiver = listNodeValue(ln);
+ robj *o = lookupKeyReadWithFlags(rl->db, rl->key, LOOKUP_NOEFFECTS);
+ /* 1. In case new key was added/touched we need to verify it satisfy the
+ * blocked type, since we might process the wrong key type.
+ * 2. We want to serve clients blocked on module keys
+ * regardless of the object type: we don't know what the
+ * module is trying to accomplish right now.
+ * 3. In case of XREADGROUP call we will want to unblock on any change in object type
+ * or in case the key was deleted, since the group is no longer valid. */
+ if ((o != NULL && (receiver->bstate.btype == getBlockedTypeByType(o->type))) ||
+ (o != NULL && (receiver->bstate.btype == BLOCKED_MODULE)) ||
+ (receiver->bstate.unblock_on_nokey))
+ {
+ if (receiver->bstate.btype != BLOCKED_MODULE)
+ unblockClientOnKey(receiver, rl->key);
+ else
+ moduleUnblockClientOnKey(receiver, rl->key);
+ }
+ }
+ }
+}
+
+/* block a client due to wait command */
+void blockForReplication(client *c, mstime_t timeout, long long offset, long numreplicas) {
+ c->bstate.timeout = timeout;
+ c->bstate.reploffset = offset;
+ c->bstate.numreplicas = numreplicas;
+ listAddNodeHead(server.clients_waiting_acks,c);
+ blockClient(c,BLOCKED_WAIT);
+}
+
+/* block a client due to waitaof command */
+void blockForAofFsync(client *c, mstime_t timeout, long long offset, int numlocal, long numreplicas) {
+ c->bstate.timeout = timeout;
+ c->bstate.reploffset = offset;
+ c->bstate.numreplicas = numreplicas;
+ c->bstate.numlocal = numlocal;
+ listAddNodeHead(server.clients_waiting_acks,c);
+ blockClient(c,BLOCKED_WAITAOF);
+}
+
+/* Postpone client from executing a command. For example the server might be busy
+ * requesting to avoid processing clients commands which will be processed later
+ * when the it is ready to accept them. */
+void blockPostponeClient(client *c) {
+ c->bstate.timeout = 0;
+ blockClient(c,BLOCKED_POSTPONE);
+ listAddNodeTail(server.postponed_clients, c);
+ c->postponed_list_node = listLast(server.postponed_clients);
+ /* Mark this client to execute its command */
+ c->flags |= CLIENT_PENDING_COMMAND;
+}
+
+/* Block client due to shutdown command */
+void blockClientShutdown(client *c) {
+ blockClient(c, BLOCKED_SHUTDOWN);
+}
+
+/* Unblock a client once a specific key became available for it.
+ * This function will remove the client from the list of clients blocked on this key
+ * and also remove the key from the dictionary of keys this client is blocked on.
+ * in case the client has a command pending it will process it immediately. */
+static void unblockClientOnKey(client *c, robj *key) {
+ dictEntry *de;
+
+ de = dictFind(c->bstate.keys, key);
+ releaseBlockedEntry(c, de, 1);
+
+ /* Only in case of blocking API calls, we might be blocked on several keys.
+ however we should force unblock the entire blocking keys */
+ serverAssert(c->bstate.btype == BLOCKED_STREAM ||
+ c->bstate.btype == BLOCKED_LIST ||
+ c->bstate.btype == BLOCKED_ZSET);
+
+ /* We need to unblock the client before calling processCommandAndResetClient
+ * because it checks the CLIENT_BLOCKED flag */
+ unblockClient(c, 0);
+ /* In case this client was blocked on keys during command
+ * we need to re process the command again */
+ if (c->flags & CLIENT_PENDING_COMMAND) {
+ c->flags &= ~CLIENT_PENDING_COMMAND;
+ /* We want the command processing and the unblock handler (see RM_Call 'K' option)
+ * to run atomically, this is why we must enter the execution unit here before
+ * running the command, and exit the execution unit after calling the unblock handler (if exists).
+ * Notice that we also must set the current client so it will be available
+ * when we will try to send the client side caching notification (done on 'afterCommand'). */
+ client *old_client = server.current_client;
+ server.current_client = c;
+ enterExecutionUnit(1, 0);
+ processCommandAndResetClient(c);
+ if (!(c->flags & CLIENT_BLOCKED)) {
+ if (c->flags & CLIENT_MODULE) {
+ moduleCallCommandUnblockedHandler(c);
+ } else {
+ queueClientForReprocessing(c);
+ }
+ }
+ exitExecutionUnit();
+ afterCommand(c);
+ server.current_client = old_client;
+ }
+}
+
+/* Unblock a client blocked on the specific key from module context.
+ * This function will try to serve the module call, and in case it succeeds,
+ * it will add the client to the list of module unblocked clients which will
+ * be processed in moduleHandleBlockedClients. */
+static void moduleUnblockClientOnKey(client *c, robj *key) {
+ long long prev_error_replies = server.stat_total_error_replies;
+ client *old_client = server.current_client;
+ server.current_client = c;
+ monotime replyTimer;
+ elapsedStart(&replyTimer);
+
+ if (moduleTryServeClientBlockedOnKey(c, key)) {
+ updateStatsOnUnblock(c, 0, elapsedUs(replyTimer), server.stat_total_error_replies != prev_error_replies);
+ moduleUnblockClient(c);
+ }
+ /* We need to call afterCommand even if the client was not unblocked
+ * in order to propagate any changes that could have been done inside
+ * moduleTryServeClientBlockedOnKey */
+ afterCommand(c);
+ server.current_client = old_client;
+}
+
+/* Unblock a client which is currently Blocked on and provided a timeout.
+ * The implementation will first reply to the blocked client with null response
+ * or, in case of module blocked client the timeout callback will be used.
+ * In this case since we might have a command pending
+ * we want to remove the pending flag to indicate we already responded to the
+ * command with timeout reply. */
+void unblockClientOnTimeout(client *c) {
+ replyToBlockedClientTimedOut(c);
+ if (c->flags & CLIENT_PENDING_COMMAND)
+ c->flags &= ~CLIENT_PENDING_COMMAND;
+ unblockClient(c, 1);
+}
+
+/* Unblock a client which is currently Blocked with error.
+ * If err_str is provided it will be used to reply to the blocked client */
+void unblockClientOnError(client *c, const char *err_str) {
+ if (err_str)
+ addReplyError(c, err_str);
+ updateStatsOnUnblock(c, 0, 0, 1);
+ if (c->flags & CLIENT_PENDING_COMMAND)
+ c->flags &= ~CLIENT_PENDING_COMMAND;
+ unblockClient(c, 1);
+}
+
+/* sets blocking_keys to the total number of keys which has at least one client blocked on them
+ * sets blocking_keys_on_nokey to the total number of keys which has at least one client
+ * blocked on them to be written or deleted */
+void totalNumberOfBlockingKeys(unsigned long *blocking_keys, unsigned long *bloking_keys_on_nokey) {
+ unsigned long bkeys=0, bkeys_on_nokey=0;
+ for (int j = 0; j < server.dbnum; j++) {
+ bkeys += dictSize(server.db[j].blocking_keys);
+ bkeys_on_nokey += dictSize(server.db[j].blocking_keys_unblock_on_nokey);
+ }
+ if (blocking_keys)
+ *blocking_keys = bkeys;
+ if (bloking_keys_on_nokey)
+ *bloking_keys_on_nokey = bkeys_on_nokey;
+}
+
+void blockedBeforeSleep(void) {
+ /* Handle precise timeouts of blocked clients. */
+ handleBlockedClientsTimeout();
+
+ /* Unblock all the clients blocked for synchronous replication
+ * in WAIT or WAITAOF. */
+ if (listLength(server.clients_waiting_acks))
+ processClientsWaitingReplicas();
+
+ /* Try to process blocked clients every once in while.
+ *
+ * Example: A module calls RM_SignalKeyAsReady from within a timer callback
+ * (So we don't visit processCommand() at all).
+ *
+ * This may unblock clients, so must be done before processUnblockedClients */
+ handleClientsBlockedOnKeys();
+
+ /* Check if there are clients unblocked by modules that implement
+ * blocking commands. */
+ if (moduleCount())
+ moduleHandleBlockedClients();
+
+ /* Try to process pending commands for clients that were just unblocked. */
+ if (listLength(server.unblocked_clients))
+ processUnblockedClients();
+}
diff --git a/src/call_reply.c b/src/call_reply.c
new file mode 100644
index 0000000..ccd1b36
--- /dev/null
+++ b/src/call_reply.c
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2009-2021, Redis Labs Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "call_reply.h"
+
+#define REPLY_FLAG_ROOT (1<<0)
+#define REPLY_FLAG_PARSED (1<<1)
+#define REPLY_FLAG_RESP3 (1<<2)
+
+/* --------------------------------------------------------
+ * An opaque struct used to parse a RESP protocol reply and
+ * represent it. Used when parsing replies such as in RM_Call
+ * or Lua scripts.
+ * -------------------------------------------------------- */
+struct CallReply {
+ void *private_data;
+ sds original_proto; /* Available only for root reply. */
+ const char *proto;
+ size_t proto_len;
+ int type; /* REPLY_... */
+ int flags; /* REPLY_FLAG... */
+ size_t len; /* Length of a string, or the number elements in an array. */
+ union {
+ const char *str; /* String pointer for string and error replies. This
+ * does not need to be freed, always points inside
+ * a reply->proto buffer of the reply object or, in
+ * case of array elements, of parent reply objects. */
+ struct {
+ const char *str;
+ const char *format;
+ } verbatim_str; /* Reply value for verbatim string */
+ long long ll; /* Reply value for integer reply. */
+ double d; /* Reply value for double reply. */
+ struct CallReply *array; /* Array of sub-reply elements. used for set, array, map, and attribute */
+ } val;
+ list *deferred_error_list; /* list of errors in sds form or NULL */
+ struct CallReply *attribute; /* attribute reply, NULL if not exists */
+};
+
+static void callReplySetSharedData(CallReply *rep, int type, const char *proto, size_t proto_len, int extra_flags) {
+ rep->type = type;
+ rep->proto = proto;
+ rep->proto_len = proto_len;
+ rep->flags |= extra_flags;
+}
+
+static void callReplyNull(void *ctx, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_NULL, proto, proto_len, REPLY_FLAG_RESP3);
+}
+
+static void callReplyNullBulkString(void *ctx, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_NULL, proto, proto_len, 0);
+}
+
+static void callReplyNullArray(void *ctx, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_NULL, proto, proto_len, 0);
+}
+
+static void callReplyBulkString(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_STRING, proto, proto_len, 0);
+ rep->len = len;
+ rep->val.str = str;
+}
+
+static void callReplyError(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_ERROR, proto, proto_len, 0);
+ rep->len = len;
+ rep->val.str = str;
+}
+
+static void callReplySimpleStr(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_STRING, proto, proto_len, 0);
+ rep->len = len;
+ rep->val.str = str;
+}
+
+static void callReplyLong(void *ctx, long long val, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_INTEGER, proto, proto_len, 0);
+ rep->val.ll = val;
+}
+
+static void callReplyDouble(void *ctx, double val, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_DOUBLE, proto, proto_len, REPLY_FLAG_RESP3);
+ rep->val.d = val;
+}
+
+static void callReplyVerbatimString(void *ctx, const char *format, const char *str, size_t len, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_VERBATIM_STRING, proto, proto_len, REPLY_FLAG_RESP3);
+ rep->len = len;
+ rep->val.verbatim_str.str = str;
+ rep->val.verbatim_str.format = format;
+}
+
+static void callReplyBigNumber(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_BIG_NUMBER, proto, proto_len, REPLY_FLAG_RESP3);
+ rep->len = len;
+ rep->val.str = str;
+}
+
+static void callReplyBool(void *ctx, int val, const char *proto, size_t proto_len) {
+ CallReply *rep = ctx;
+ callReplySetSharedData(rep, REDISMODULE_REPLY_BOOL, proto, proto_len, REPLY_FLAG_RESP3);
+ rep->val.ll = val;
+}
+
+static void callReplyParseCollection(ReplyParser *parser, CallReply *rep, size_t len, const char *proto, size_t elements_per_entry) {
+ rep->len = len;
+ rep->val.array = zcalloc(elements_per_entry * len * sizeof(CallReply));
+ for (size_t i = 0; i < len * elements_per_entry; i += elements_per_entry) {
+ for (size_t j = 0 ; j < elements_per_entry ; ++j) {
+ rep->val.array[i + j].private_data = rep->private_data;
+ parseReply(parser, rep->val.array + i + j);
+ rep->val.array[i + j].flags |= REPLY_FLAG_PARSED;
+ if (rep->val.array[i + j].flags & REPLY_FLAG_RESP3) {
+ /* If one of the sub-replies is RESP3, then the current reply is also RESP3. */
+ rep->flags |= REPLY_FLAG_RESP3;
+ }
+ }
+ }
+ rep->proto = proto;
+ rep->proto_len = parser->curr_location - proto;
+}
+
+static void callReplyAttribute(ReplyParser *parser, void *ctx, size_t len, const char *proto) {
+ CallReply *rep = ctx;
+ rep->attribute = zcalloc(sizeof(CallReply));
+
+ /* Continue parsing the attribute reply */
+ rep->attribute->len = len;
+ rep->attribute->type = REDISMODULE_REPLY_ATTRIBUTE;
+ callReplyParseCollection(parser, rep->attribute, len, proto, 2);
+ rep->attribute->flags |= REPLY_FLAG_PARSED | REPLY_FLAG_RESP3;
+ rep->attribute->private_data = rep->private_data;
+
+ /* Continue parsing the reply */
+ parseReply(parser, rep);
+
+ /* In this case we need to fix the proto address and len, it should start from the attribute */
+ rep->proto = proto;
+ rep->proto_len = parser->curr_location - proto;
+ rep->flags |= REPLY_FLAG_RESP3;
+}
+
+static void callReplyArray(ReplyParser *parser, void *ctx, size_t len, const char *proto) {
+ CallReply *rep = ctx;
+ rep->type = REDISMODULE_REPLY_ARRAY;
+ callReplyParseCollection(parser, rep, len, proto, 1);
+}
+
+static void callReplySet(ReplyParser *parser, void *ctx, size_t len, const char *proto) {
+ CallReply *rep = ctx;
+ rep->type = REDISMODULE_REPLY_SET;
+ callReplyParseCollection(parser, rep, len, proto, 1);
+ rep->flags |= REPLY_FLAG_RESP3;
+}
+
+static void callReplyMap(ReplyParser *parser, void *ctx, size_t len, const char *proto) {
+ CallReply *rep = ctx;
+ rep->type = REDISMODULE_REPLY_MAP;
+ callReplyParseCollection(parser, rep, len, proto, 2);
+ rep->flags |= REPLY_FLAG_RESP3;
+}
+
+static void callReplyParseError(void *ctx) {
+ CallReply *rep = ctx;
+ rep->type = REDISMODULE_REPLY_UNKNOWN;
+}
+
+/* Recursively free the current call reply and its sub-replies. */
+static void freeCallReplyInternal(CallReply *rep) {
+ if (rep->type == REDISMODULE_REPLY_ARRAY || rep->type == REDISMODULE_REPLY_SET) {
+ for (size_t i = 0 ; i < rep->len ; ++i) {
+ freeCallReplyInternal(rep->val.array + i);
+ }
+ zfree(rep->val.array);
+ }
+
+ if (rep->type == REDISMODULE_REPLY_MAP || rep->type == REDISMODULE_REPLY_ATTRIBUTE) {
+ for (size_t i = 0 ; i < rep->len ; ++i) {
+ freeCallReplyInternal(rep->val.array + i * 2);
+ freeCallReplyInternal(rep->val.array + i * 2 + 1);
+ }
+ zfree(rep->val.array);
+ }
+
+ if (rep->attribute) {
+ freeCallReplyInternal(rep->attribute);
+ zfree(rep->attribute);
+ }
+}
+
+/* Free the given call reply and its children (in case of nested reply) recursively.
+ * If private data was set when the CallReply was created it will not be freed, as it's
+ * the caller's responsibility to free it before calling freeCallReply(). */
+void freeCallReply(CallReply *rep) {
+ if (!(rep->flags & REPLY_FLAG_ROOT)) {
+ return;
+ }
+ if (rep->flags & REPLY_FLAG_PARSED) {
+ if (rep->type == REDISMODULE_REPLY_PROMISE) {
+ zfree(rep);
+ return;
+ }
+ freeCallReplyInternal(rep);
+ }
+ sdsfree(rep->original_proto);
+ if (rep->deferred_error_list)
+ listRelease(rep->deferred_error_list);
+ zfree(rep);
+}
+
+CallReply *callReplyCreatePromise(void *private_data) {
+ CallReply *res = zmalloc(sizeof(*res));
+ res->type = REDISMODULE_REPLY_PROMISE;
+ /* Mark the reply as parsed so there will be not attempt to parse
+ * it when calling reply API such as freeCallReply.
+ * Also mark the reply as root so freeCallReply will not ignore it. */
+ res->flags |= REPLY_FLAG_PARSED | REPLY_FLAG_ROOT;
+ res->private_data = private_data;
+ return res;
+}
+
+static const ReplyParserCallbacks DefaultParserCallbacks = {
+ .null_callback = callReplyNull,
+ .bulk_string_callback = callReplyBulkString,
+ .null_bulk_string_callback = callReplyNullBulkString,
+ .null_array_callback = callReplyNullArray,
+ .error_callback = callReplyError,
+ .simple_str_callback = callReplySimpleStr,
+ .long_callback = callReplyLong,
+ .array_callback = callReplyArray,
+ .set_callback = callReplySet,
+ .map_callback = callReplyMap,
+ .double_callback = callReplyDouble,
+ .bool_callback = callReplyBool,
+ .big_number_callback = callReplyBigNumber,
+ .verbatim_string_callback = callReplyVerbatimString,
+ .attribute_callback = callReplyAttribute,
+ .error = callReplyParseError,
+};
+
+/* Parse the buffer located in rep->original_proto and update the CallReply
+ * structure to represent its contents. */
+static void callReplyParse(CallReply *rep) {
+ if (rep->flags & REPLY_FLAG_PARSED) {
+ return;
+ }
+
+ ReplyParser parser = {.curr_location = rep->proto, .callbacks = DefaultParserCallbacks};
+
+ parseReply(&parser, rep);
+ rep->flags |= REPLY_FLAG_PARSED;
+}
+
+/* Return the call reply type (REDISMODULE_REPLY_...). */
+int callReplyType(CallReply *rep) {
+ if (!rep) return REDISMODULE_REPLY_UNKNOWN;
+ callReplyParse(rep);
+ return rep->type;
+}
+
+/* Return reply string as buffer and len. Applicable to:
+ * - REDISMODULE_REPLY_STRING
+ * - REDISMODULE_REPLY_ERROR
+ *
+ * The return value is borrowed from CallReply, so it must not be freed
+ * explicitly or used after CallReply itself is freed.
+ *
+ * The returned value is not NULL terminated and its length is returned by
+ * reference through len, which must not be NULL.
+ */
+const char *callReplyGetString(CallReply *rep, size_t *len) {
+ callReplyParse(rep);
+ if (rep->type != REDISMODULE_REPLY_STRING &&
+ rep->type != REDISMODULE_REPLY_ERROR) return NULL;
+ if (len) *len = rep->len;
+ return rep->val.str;
+}
+
+/* Return a long long reply value. Applicable to:
+ * - REDISMODULE_REPLY_INTEGER
+ */
+long long callReplyGetLongLong(CallReply *rep) {
+ callReplyParse(rep);
+ if (rep->type != REDISMODULE_REPLY_INTEGER) return LLONG_MIN;
+ return rep->val.ll;
+}
+
+/* Return a double reply value. Applicable to:
+ * - REDISMODULE_REPLY_DOUBLE
+ */
+double callReplyGetDouble(CallReply *rep) {
+ callReplyParse(rep);
+ if (rep->type != REDISMODULE_REPLY_DOUBLE) return LLONG_MIN;
+ return rep->val.d;
+}
+
+/* Return a reply Boolean value. Applicable to:
+ * - REDISMODULE_REPLY_BOOL
+ */
+int callReplyGetBool(CallReply *rep) {
+ callReplyParse(rep);
+ if (rep->type != REDISMODULE_REPLY_BOOL) return INT_MIN;
+ return rep->val.ll;
+}
+
+/* Return reply length. Applicable to:
+ * - REDISMODULE_REPLY_STRING
+ * - REDISMODULE_REPLY_ERROR
+ * - REDISMODULE_REPLY_ARRAY
+ * - REDISMODULE_REPLY_SET
+ * - REDISMODULE_REPLY_MAP
+ * - REDISMODULE_REPLY_ATTRIBUTE
+ */
+size_t callReplyGetLen(CallReply *rep) {
+ callReplyParse(rep);
+ switch(rep->type) {
+ case REDISMODULE_REPLY_STRING:
+ case REDISMODULE_REPLY_ERROR:
+ case REDISMODULE_REPLY_ARRAY:
+ case REDISMODULE_REPLY_SET:
+ case REDISMODULE_REPLY_MAP:
+ case REDISMODULE_REPLY_ATTRIBUTE:
+ return rep->len;
+ default:
+ return 0;
+ }
+}
+
+static CallReply *callReplyGetCollectionElement(CallReply *rep, size_t idx, int elements_per_entry) {
+ if (idx >= rep->len * elements_per_entry) return NULL; // real len is rep->len * elements_per_entry
+ return rep->val.array+idx;
+}
+
+/* Return a reply array element at a given index. Applicable to:
+ * - REDISMODULE_REPLY_ARRAY
+ *
+ * The return value is borrowed from CallReply, so it must not be freed
+ * explicitly or used after CallReply itself is freed.
+ */
+CallReply *callReplyGetArrayElement(CallReply *rep, size_t idx) {
+ callReplyParse(rep);
+ if (rep->type != REDISMODULE_REPLY_ARRAY) return NULL;
+ return callReplyGetCollectionElement(rep, idx, 1);
+}
+
+/* Return a reply set element at a given index. Applicable to:
+ * - REDISMODULE_REPLY_SET
+ *
+ * The return value is borrowed from CallReply, so it must not be freed
+ * explicitly or used after CallReply itself is freed.
+ */
+CallReply *callReplyGetSetElement(CallReply *rep, size_t idx) {
+ callReplyParse(rep);
+ if (rep->type != REDISMODULE_REPLY_SET) return NULL;
+ return callReplyGetCollectionElement(rep, idx, 1);
+}
+
+static int callReplyGetMapElementInternal(CallReply *rep, size_t idx, CallReply **key, CallReply **val, int type) {
+ callReplyParse(rep);
+ if (rep->type != type) return C_ERR;
+ if (idx >= rep->len) return C_ERR;
+ if (key) *key = callReplyGetCollectionElement(rep, idx * 2, 2);
+ if (val) *val = callReplyGetCollectionElement(rep, idx * 2 + 1, 2);
+ return C_OK;
+}
+
+/* Retrieve a map reply key and value at a given index. Applicable to:
+ * - REDISMODULE_REPLY_MAP
+ *
+ * The key and value are returned by reference through key and val,
+ * which may also be NULL if not needed.
+ *
+ * Returns C_OK on success or C_ERR if reply type mismatches, or if idx is out
+ * of range.
+ *
+ * The returned values are borrowed from CallReply, so they must not be freed
+ * explicitly or used after CallReply itself is freed.
+ */
+int callReplyGetMapElement(CallReply *rep, size_t idx, CallReply **key, CallReply **val) {
+ return callReplyGetMapElementInternal(rep, idx, key, val, REDISMODULE_REPLY_MAP);
+}
+
+/* Return reply attribute, or NULL if it does not exist. Applicable to all replies.
+ *
+ * The returned values are borrowed from CallReply, so they must not be freed
+ * explicitly or used after CallReply itself is freed.
+ */
+CallReply *callReplyGetAttribute(CallReply *rep) {
+ return rep->attribute;
+}
+
+/* Retrieve attribute reply key and value at a given index. Applicable to:
+ * - REDISMODULE_REPLY_ATTRIBUTE
+ *
+ * The key and value are returned by reference through key and val,
+ * which may also be NULL if not needed.
+ *
+ * Returns C_OK on success or C_ERR if reply type mismatches, or if idx is out
+ * of range.
+ *
+ * The returned values are borrowed from CallReply, so they must not be freed
+ * explicitly or used after CallReply itself is freed.
+ */
+int callReplyGetAttributeElement(CallReply *rep, size_t idx, CallReply **key, CallReply **val) {
+ return callReplyGetMapElementInternal(rep, idx, key, val, REDISMODULE_REPLY_MAP);
+}
+
+/* Return a big number reply value. Applicable to:
+ * - REDISMODULE_REPLY_BIG_NUMBER
+ *
+ * The returned values are borrowed from CallReply, so they must not be freed
+ * explicitly or used after CallReply itself is freed.
+ *
+ * The return value is guaranteed to be a big number, as described in the RESP3
+ * protocol specifications.
+ *
+ * The returned value is not NULL terminated and its length is returned by
+ * reference through len, which must not be NULL.
+ */
+const char *callReplyGetBigNumber(CallReply *rep, size_t *len) {
+ callReplyParse(rep);
+ if (rep->type != REDISMODULE_REPLY_BIG_NUMBER) return NULL;
+ *len = rep->len;
+ return rep->val.str;
+}
+
+/* Return a verbatim string reply value. Applicable to:
+ * - REDISMODULE_REPLY_VERBATIM_STRING
+ *
+ * If format is non-NULL, the verbatim reply format is also returned by value.
+ *
+ * The optional output argument can be given to get a verbatim reply
+ * format, or can be set NULL if not needed.
+ *
+ * The return value is borrowed from CallReply, so it must not be freed
+ * explicitly or used after CallReply itself is freed.
+ *
+ * The returned value is not NULL terminated and its length is returned by
+ * reference through len, which must not be NULL.
+ */
+const char *callReplyGetVerbatim(CallReply *rep, size_t *len, const char **format){
+ callReplyParse(rep);
+ if (rep->type != REDISMODULE_REPLY_VERBATIM_STRING) return NULL;
+ *len = rep->len;
+ if (format) *format = rep->val.verbatim_str.format;
+ return rep->val.verbatim_str.str;
+}
+
+/* Return the current reply blob.
+ *
+ * The return value is borrowed from CallReply, so it must not be freed
+ * explicitly or used after CallReply itself is freed.
+ */
+const char *callReplyGetProto(CallReply *rep, size_t *proto_len) {
+ *proto_len = rep->proto_len;
+ return rep->proto;
+}
+
+/* Return CallReply private data, as set by the caller on callReplyCreate().
+ */
+void *callReplyGetPrivateData(CallReply *rep) {
+ return rep->private_data;
+}
+
+/* Return true if the reply or one of it sub-replies is RESP3 formatted. */
+int callReplyIsResp3(CallReply *rep) {
+ return rep->flags & REPLY_FLAG_RESP3;
+}
+
+/* Returns a list of errors in sds form, or NULL. */
+list *callReplyDeferredErrorList(CallReply *rep) {
+ return rep->deferred_error_list;
+}
+
+/* Create a new CallReply struct from the reply blob.
+ *
+ * The function will own the reply blob, so it must not be used or freed by
+ * the caller after passing it to this function.
+ *
+ * The reply blob will be freed when the returned CallReply struct is later
+ * freed using freeCallReply().
+ *
+ * The deferred_error_list is an optional list of errors that are present
+ * in the reply blob, if given, this function will take ownership on it.
+ *
+ * The private_data is optional and can later be accessed using
+ * callReplyGetPrivateData().
+ *
+ * NOTE: The parser used for parsing the reply and producing CallReply is
+ * designed to handle valid replies created by Redis itself. IT IS NOT
+ * DESIGNED TO HANDLE USER INPUT and using it to parse invalid replies is
+ * unsafe.
+ */
+CallReply *callReplyCreate(sds reply, list *deferred_error_list, void *private_data) {
+ CallReply *res = zmalloc(sizeof(*res));
+ res->flags = REPLY_FLAG_ROOT;
+ res->original_proto = reply;
+ res->proto = reply;
+ res->proto_len = sdslen(reply);
+ res->private_data = private_data;
+ res->attribute = NULL;
+ res->deferred_error_list = deferred_error_list;
+ return res;
+}
+
+/* Create a new CallReply struct from the reply blob representing an error message.
+ * Automatically creating deferred_error_list and set a copy of the reply in it.
+ * Refer to callReplyCreate for detailed explanation.
+ * Reply string can come in one of two forms:
+ * 1. A protocol reply starting with "-CODE" and ending with "\r\n"
+ * 2. A plain string, in which case this function adds the protocol header and footer. */
+CallReply *callReplyCreateError(sds reply, void *private_data) {
+ sds err_buff = reply;
+ if (err_buff[0] != '-') {
+ err_buff = sdscatfmt(sdsempty(), "-ERR %S\r\n", reply);
+ sdsfree(reply);
+ }
+ list *deferred_error_list = listCreate();
+ listSetFreeMethod(deferred_error_list, (void (*)(void*))sdsfree);
+ listAddNodeTail(deferred_error_list, sdsnew(err_buff));
+ return callReplyCreate(err_buff, deferred_error_list, private_data);
+}
diff --git a/src/call_reply.h b/src/call_reply.h
new file mode 100644
index 0000000..657f247
--- /dev/null
+++ b/src/call_reply.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2009-2021, Redis Labs Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SRC_CALL_REPLY_H_
+#define SRC_CALL_REPLY_H_
+
+#include "resp_parser.h"
+
+typedef struct CallReply CallReply;
+typedef void (*RedisModuleOnUnblocked)(void *ctx, CallReply *reply, void *private_data);
+
+CallReply *callReplyCreate(sds reply, list *deferred_error_list, void *private_data);
+CallReply *callReplyCreateError(sds reply, void *private_data);
+int callReplyType(CallReply *rep);
+const char *callReplyGetString(CallReply *rep, size_t *len);
+long long callReplyGetLongLong(CallReply *rep);
+double callReplyGetDouble(CallReply *rep);
+int callReplyGetBool(CallReply *rep);
+size_t callReplyGetLen(CallReply *rep);
+CallReply *callReplyGetArrayElement(CallReply *rep, size_t idx);
+CallReply *callReplyGetSetElement(CallReply *rep, size_t idx);
+int callReplyGetMapElement(CallReply *rep, size_t idx, CallReply **key, CallReply **val);
+CallReply *callReplyGetAttribute(CallReply *rep);
+int callReplyGetAttributeElement(CallReply *rep, size_t idx, CallReply **key, CallReply **val);
+const char *callReplyGetBigNumber(CallReply *rep, size_t *len);
+const char *callReplyGetVerbatim(CallReply *rep, size_t *len, const char **format);
+const char *callReplyGetProto(CallReply *rep, size_t *len);
+void *callReplyGetPrivateData(CallReply *rep);
+int callReplyIsResp3(CallReply *rep);
+list *callReplyDeferredErrorList(CallReply *rep);
+void freeCallReply(CallReply *rep);
+CallReply *callReplyCreatePromise(void *private_data);
+
+#endif /* SRC_CALL_REPLY_H_ */
diff --git a/src/childinfo.c b/src/childinfo.c
new file mode 100644
index 0000000..1303dd0
--- /dev/null
+++ b/src/childinfo.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include <unistd.h>
+#include <fcntl.h>
+
+typedef struct {
+ size_t keys;
+ size_t cow;
+ monotime cow_updated;
+ double progress;
+ childInfoType information_type; /* Type of information */
+} child_info_data;
+
+/* Open a child-parent channel used in order to move information about the
+ * RDB / AOF saving process from the child to the parent (for instance
+ * the amount of copy on write memory used) */
+void openChildInfoPipe(void) {
+ if (anetPipe(server.child_info_pipe, O_NONBLOCK, 0) == -1) {
+ /* On error our two file descriptors should be still set to -1,
+ * but we call anyway closeChildInfoPipe() since can't hurt. */
+ closeChildInfoPipe();
+ } else {
+ server.child_info_nread = 0;
+ }
+}
+
+/* Close the pipes opened with openChildInfoPipe(). */
+void closeChildInfoPipe(void) {
+ if (server.child_info_pipe[0] != -1 ||
+ server.child_info_pipe[1] != -1)
+ {
+ close(server.child_info_pipe[0]);
+ close(server.child_info_pipe[1]);
+ server.child_info_pipe[0] = -1;
+ server.child_info_pipe[1] = -1;
+ server.child_info_nread = 0;
+ }
+}
+
+/* Send save data to parent. */
+void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress, char *pname) {
+ if (server.child_info_pipe[1] == -1) return;
+
+ static monotime cow_updated = 0;
+ static uint64_t cow_update_cost = 0;
+ static size_t cow = 0;
+ static size_t peak_cow = 0;
+ static size_t update_count = 0;
+ static unsigned long long sum_cow = 0;
+
+ child_info_data data = {0}; /* zero everything, including padding to satisfy valgrind */
+
+ /* When called to report current info, we need to throttle down CoW updates as they
+ * can be very expensive. To do that, we measure the time it takes to get a reading
+ * and schedule the next reading to happen not before time*CHILD_COW_COST_FACTOR
+ * passes. */
+
+ monotime now = getMonotonicUs();
+ if (info_type != CHILD_INFO_TYPE_CURRENT_INFO ||
+ !cow_updated ||
+ now - cow_updated > cow_update_cost * CHILD_COW_DUTY_CYCLE)
+ {
+ cow = zmalloc_get_private_dirty(-1);
+ cow_updated = getMonotonicUs();
+ cow_update_cost = cow_updated - now;
+ if (cow > peak_cow) peak_cow = cow;
+ sum_cow += cow;
+ update_count++;
+
+ int cow_info = (info_type != CHILD_INFO_TYPE_CURRENT_INFO);
+ if (cow || cow_info) {
+ serverLog(cow_info ? LL_NOTICE : LL_VERBOSE,
+ "Fork CoW for %s: current %zu MB, peak %zu MB, average %llu MB",
+ pname, cow>>20, peak_cow>>20, (sum_cow/update_count)>>20);
+ }
+ }
+
+ data.information_type = info_type;
+ data.keys = keys;
+ data.cow = cow;
+ data.cow_updated = cow_updated;
+ data.progress = progress;
+
+ ssize_t wlen = sizeof(data);
+
+ if (write(server.child_info_pipe[1], &data, wlen) != wlen) {
+ /* Failed writing to parent, it could have been killed, exit. */
+ serverLog(LL_WARNING,"Child failed reporting info to parent, exiting. %s", strerror(errno));
+ exitFromChild(1);
+ }
+}
+
+/* Update Child info. */
+void updateChildInfo(childInfoType information_type, size_t cow, monotime cow_updated, size_t keys, double progress) {
+ if (cow > server.stat_current_cow_peak) server.stat_current_cow_peak = cow;
+
+ if (information_type == CHILD_INFO_TYPE_CURRENT_INFO) {
+ server.stat_current_cow_bytes = cow;
+ server.stat_current_cow_updated = cow_updated;
+ server.stat_current_save_keys_processed = keys;
+ if (progress != -1) server.stat_module_progress = progress;
+ } else if (information_type == CHILD_INFO_TYPE_AOF_COW_SIZE) {
+ server.stat_aof_cow_bytes = server.stat_current_cow_peak;
+ } else if (information_type == CHILD_INFO_TYPE_RDB_COW_SIZE) {
+ server.stat_rdb_cow_bytes = server.stat_current_cow_peak;
+ } else if (information_type == CHILD_INFO_TYPE_MODULE_COW_SIZE) {
+ server.stat_module_cow_bytes = server.stat_current_cow_peak;
+ }
+}
+
+/* Read child info data from the pipe.
+ * if complete data read into the buffer,
+ * data is stored into *buffer, and returns 1.
+ * otherwise, the partial data is left in the buffer, waiting for the next read, and returns 0. */
+int readChildInfo(childInfoType *information_type, size_t *cow, monotime *cow_updated, size_t *keys, double* progress) {
+ /* We are using here a static buffer in combination with the server.child_info_nread to handle short reads */
+ static child_info_data buffer;
+ ssize_t wlen = sizeof(buffer);
+
+ /* Do not overlap */
+ if (server.child_info_nread == wlen) server.child_info_nread = 0;
+
+ int nread = read(server.child_info_pipe[0], (char *)&buffer + server.child_info_nread, wlen - server.child_info_nread);
+ if (nread > 0) {
+ server.child_info_nread += nread;
+ }
+
+ /* We have complete child info */
+ if (server.child_info_nread == wlen) {
+ *information_type = buffer.information_type;
+ *cow = buffer.cow;
+ *cow_updated = buffer.cow_updated;
+ *keys = buffer.keys;
+ *progress = buffer.progress;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Receive info data from child. */
+void receiveChildInfo(void) {
+ if (server.child_info_pipe[0] == -1) return;
+
+ size_t cow;
+ monotime cow_updated;
+ size_t keys;
+ double progress;
+ childInfoType information_type;
+
+ /* Drain the pipe and update child info so that we get the final message. */
+ while (readChildInfo(&information_type, &cow, &cow_updated, &keys, &progress)) {
+ updateChildInfo(information_type, cow, cow_updated, keys, progress);
+ }
+}
diff --git a/src/cli_commands.c b/src/cli_commands.c
new file mode 100644
index 0000000..e56d48c
--- /dev/null
+++ b/src/cli_commands.c
@@ -0,0 +1,13 @@
+#include <stddef.h>
+#include "cli_commands.h"
+
+/* Definitions to configure commands.c to generate the above structs. */
+#define MAKE_CMD(name,summary,complexity,since,doc_flags,replaced,deprecated,group,group_enum,history,num_history,tips,num_tips,function,arity,flags,acl,key_specs,key_specs_num,get_keys,numargs) name,summary,group,since,numargs
+#define MAKE_ARG(name,type,key_spec_index,token,summary,since,flags,numsubargs,deprecated_since) name,type,token,since,flags,numsubargs
+#define COMMAND_ARG cliCommandArg
+#define COMMAND_STRUCT commandDocs
+#define SKIP_CMD_HISTORY_TABLE
+#define SKIP_CMD_TIPS_TABLE
+#define SKIP_CMD_KEY_SPECS_TABLE
+
+#include "commands.def"
diff --git a/src/cli_commands.h b/src/cli_commands.h
new file mode 100644
index 0000000..eb5a476
--- /dev/null
+++ b/src/cli_commands.h
@@ -0,0 +1,46 @@
+/* This file is used by redis-cli in place of server.h when including commands.c
+ * It contains alternative structs which omit the parts of the commands table
+ * that are not suitable for redis-cli, e.g. the command proc. */
+
+#ifndef __REDIS_CLI_COMMANDS_H
+#define __REDIS_CLI_COMMANDS_H
+
+#include <stddef.h>
+#include "commands.h"
+
+/* Syntax specifications for a command argument. */
+typedef struct cliCommandArg {
+ char *name;
+ redisCommandArgType type;
+ char *token;
+ char *since;
+ int flags;
+ int numsubargs;
+ struct cliCommandArg *subargs;
+ const char *display_text;
+
+ /*
+ * For use at runtime.
+ * Fields used to keep track of input word matches for command-line hinting.
+ */
+ int matched; /* How many input words have been matched by this argument? */
+ int matched_token; /* Has the token been matched? */
+ int matched_name; /* Has the name been matched? */
+ int matched_all; /* Has the whole argument been consumed (no hint needed)? */
+} cliCommandArg;
+
+/* Command documentation info used for help output */
+struct commandDocs {
+ char *name;
+ char *summary;
+ char *group;
+ char *since;
+ int numargs;
+ cliCommandArg *args; /* An array of the command arguments. */
+ struct commandDocs *subcommands;
+ char *params; /* A string describing the syntax of the command arguments. */
+};
+
+extern struct commandDocs redisCommandTable[];
+
+#endif
diff --git a/src/cli_common.c b/src/cli_common.c
new file mode 100644
index 0000000..421e7d3
--- /dev/null
+++ b/src/cli_common.c
@@ -0,0 +1,408 @@
+/* CLI (command line interface) common methods
+ *
+ * Copyright (c) 2020, Redis Labs
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fmacros.h"
+#include "cli_common.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <hiredis.h>
+#include <sdscompat.h> /* Use hiredis' sds compat header that maps sds calls to their hi_ variants */
+#include <sds.h> /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#ifdef USE_OPENSSL
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <hiredis_ssl.h>
+#endif
+
+#define UNUSED(V) ((void) V)
+
+/* Wrapper around redisSecureConnection to avoid hiredis_ssl dependencies if
+ * not building with TLS support.
+ */
+int cliSecureConnection(redisContext *c, cliSSLconfig config, const char **err) {
+#ifdef USE_OPENSSL
+ static SSL_CTX *ssl_ctx = NULL;
+
+ if (!ssl_ctx) {
+ ssl_ctx = SSL_CTX_new(SSLv23_client_method());
+ if (!ssl_ctx) {
+ *err = "Failed to create SSL_CTX";
+ goto error;
+ }
+ SSL_CTX_set_options(ssl_ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3);
+ SSL_CTX_set_verify(ssl_ctx, config.skip_cert_verify ? SSL_VERIFY_NONE : SSL_VERIFY_PEER, NULL);
+
+ if (config.cacert || config.cacertdir) {
+ if (!SSL_CTX_load_verify_locations(ssl_ctx, config.cacert, config.cacertdir)) {
+ *err = "Invalid CA Certificate File/Directory";
+ goto error;
+ }
+ } else {
+ if (!SSL_CTX_set_default_verify_paths(ssl_ctx)) {
+ *err = "Failed to use default CA paths";
+ goto error;
+ }
+ }
+
+ if (config.cert && !SSL_CTX_use_certificate_chain_file(ssl_ctx, config.cert)) {
+ *err = "Invalid client certificate";
+ goto error;
+ }
+
+ if (config.key && !SSL_CTX_use_PrivateKey_file(ssl_ctx, config.key, SSL_FILETYPE_PEM)) {
+ *err = "Invalid private key";
+ goto error;
+ }
+ if (config.ciphers && !SSL_CTX_set_cipher_list(ssl_ctx, config.ciphers)) {
+ *err = "Error while configuring ciphers";
+ goto error;
+ }
+#ifdef TLS1_3_VERSION
+ if (config.ciphersuites && !SSL_CTX_set_ciphersuites(ssl_ctx, config.ciphersuites)) {
+ *err = "Error while setting cypher suites";
+ goto error;
+ }
+#endif
+ }
+
+ SSL *ssl = SSL_new(ssl_ctx);
+ if (!ssl) {
+ *err = "Failed to create SSL object";
+ return REDIS_ERR;
+ }
+
+ if (config.sni && !SSL_set_tlsext_host_name(ssl, config.sni)) {
+ *err = "Failed to configure SNI";
+ SSL_free(ssl);
+ return REDIS_ERR;
+ }
+
+ return redisInitiateSSL(c, ssl);
+
+error:
+ SSL_CTX_free(ssl_ctx);
+ ssl_ctx = NULL;
+ return REDIS_ERR;
+#else
+ (void) config;
+ (void) c;
+ (void) err;
+ return REDIS_OK;
+#endif
+}
+
+/* Wrapper around hiredis to allow arbitrary reads and writes.
+ *
+ * We piggybacks on top of hiredis to achieve transparent TLS support,
+ * and use its internal buffers so it can co-exist with commands
+ * previously/later issued on the connection.
+ *
+ * Interface is close to enough to read()/write() so things should mostly
+ * work transparently.
+ */
+
+/* Write a raw buffer through a redisContext. If we already have something
+ * in the buffer (leftovers from hiredis operations) it will be written
+ * as well.
+ */
+ssize_t cliWriteConn(redisContext *c, const char *buf, size_t buf_len)
+{
+ int done = 0;
+
+ /* Append data to buffer which is *usually* expected to be empty
+ * but we don't assume that, and write.
+ */
+ c->obuf = sdscatlen(c->obuf, buf, buf_len);
+ if (redisBufferWrite(c, &done) == REDIS_ERR) {
+ if (!(c->flags & REDIS_BLOCK))
+ errno = EAGAIN;
+
+ /* On error, we assume nothing was written and we roll back the
+ * buffer to its original state.
+ */
+ if (sdslen(c->obuf) > buf_len)
+ sdsrange(c->obuf, 0, -(buf_len+1));
+ else
+ sdsclear(c->obuf);
+
+ return -1;
+ }
+
+ /* If we're done, free up everything. We may have written more than
+ * buf_len (if c->obuf was not initially empty) but we don't have to
+ * tell.
+ */
+ if (done) {
+ sdsclear(c->obuf);
+ return buf_len;
+ }
+
+ /* Write was successful but we have some leftovers which we should
+ * remove from the buffer.
+ *
+ * Do we still have data that was there prior to our buf? If so,
+ * restore buffer to it's original state and report no new data was
+ * written.
+ */
+ if (sdslen(c->obuf) > buf_len) {
+ sdsrange(c->obuf, 0, -(buf_len+1));
+ return 0;
+ }
+
+ /* At this point we're sure no prior data is left. We flush the buffer
+ * and report how much we've written.
+ */
+ size_t left = sdslen(c->obuf);
+ sdsclear(c->obuf);
+ return buf_len - left;
+}
+
+/* Wrapper around OpenSSL (libssl and libcrypto) initialisation
+ */
+int cliSecureInit(void)
+{
+#ifdef USE_OPENSSL
+ ERR_load_crypto_strings();
+ SSL_load_error_strings();
+ SSL_library_init();
+#endif
+ return REDIS_OK;
+}
+
+/* Create an sds from stdin */
+sds readArgFromStdin(void) {
+ char buf[1024];
+ sds arg = sdsempty();
+
+ while(1) {
+ int nread = read(fileno(stdin),buf,1024);
+
+ if (nread == 0) break;
+ else if (nread == -1) {
+ perror("Reading from standard input");
+ exit(1);
+ }
+ arg = sdscatlen(arg,buf,nread);
+ }
+ return arg;
+}
+
+/* Create an sds array from argv, either as-is or by dequoting every
+ * element. When quoted is non-zero, may return a NULL to indicate an
+ * invalid quoted string.
+ *
+ * The caller should free the resulting array of sds strings with
+ * sdsfreesplitres().
+ */
+sds *getSdsArrayFromArgv(int argc,char **argv, int quoted) {
+ sds *res = sds_malloc(sizeof(sds) * argc);
+
+ for (int j = 0; j < argc; j++) {
+ if (quoted) {
+ sds unquoted = unquoteCString(argv[j]);
+ if (!unquoted) {
+ while (--j >= 0) sdsfree(res[j]);
+ sds_free(res);
+ return NULL;
+ }
+ res[j] = unquoted;
+ } else {
+ res[j] = sdsnew(argv[j]);
+ }
+ }
+
+ return res;
+}
+
+/* Unquote a null-terminated string and return it as a binary-safe sds. */
+sds unquoteCString(char *str) {
+ int count;
+ sds *unquoted = sdssplitargs(str, &count);
+ sds res = NULL;
+
+ if (unquoted && count == 1) {
+ res = unquoted[0];
+ unquoted[0] = NULL;
+ }
+
+ if (unquoted)
+ sdsfreesplitres(unquoted, count);
+
+ return res;
+}
+
+
+/* URL-style percent decoding. */
+#define isHexChar(c) (isdigit(c) || ((c) >= 'a' && (c) <= 'f'))
+#define decodeHexChar(c) (isdigit(c) ? (c) - '0' : (c) - 'a' + 10)
+#define decodeHex(h, l) ((decodeHexChar(h) << 4) + decodeHexChar(l))
+
+static sds percentDecode(const char *pe, size_t len) {
+ const char *end = pe + len;
+ sds ret = sdsempty();
+ const char *curr = pe;
+
+ while (curr < end) {
+ if (*curr == '%') {
+ if ((end - curr) < 2) {
+ fprintf(stderr, "Incomplete URI encoding\n");
+ exit(1);
+ }
+
+ char h = tolower(*(++curr));
+ char l = tolower(*(++curr));
+ if (!isHexChar(h) || !isHexChar(l)) {
+ fprintf(stderr, "Illegal character in URI encoding\n");
+ exit(1);
+ }
+ char c = decodeHex(h, l);
+ ret = sdscatlen(ret, &c, 1);
+ curr++;
+ } else {
+ ret = sdscatlen(ret, curr++, 1);
+ }
+ }
+
+ return ret;
+}
+
+/* Parse a URI and extract the server connection information.
+ * URI scheme is based on the provisional specification[1] excluding support
+ * for query parameters. Valid URIs are:
+ * scheme: "redis://"
+ * authority: [[<username> ":"] <password> "@"] [<hostname> [":" <port>]]
+ * path: ["/" [<db>]]
+ *
+ * [1]: https://www.iana.org/assignments/uri-schemes/prov/redis */
+void parseRedisUri(const char *uri, const char* tool_name, cliConnInfo *connInfo, int *tls_flag) {
+#ifdef USE_OPENSSL
+ UNUSED(tool_name);
+#else
+ UNUSED(tls_flag);
+#endif
+
+ const char *scheme = "redis://";
+ const char *tlsscheme = "rediss://";
+ const char *curr = uri;
+ const char *end = uri + strlen(uri);
+ const char *userinfo, *username, *port, *host, *path;
+
+ /* URI must start with a valid scheme. */
+ if (!strncasecmp(tlsscheme, curr, strlen(tlsscheme))) {
+#ifdef USE_OPENSSL
+ *tls_flag = 1;
+ curr += strlen(tlsscheme);
+#else
+ fprintf(stderr,"rediss:// is only supported when %s is compiled with OpenSSL\n", tool_name);
+ exit(1);
+#endif
+ } else if (!strncasecmp(scheme, curr, strlen(scheme))) {
+ curr += strlen(scheme);
+ } else {
+ fprintf(stderr,"Invalid URI scheme\n");
+ exit(1);
+ }
+ if (curr == end) return;
+
+ /* Extract user info. */
+ if ((userinfo = strchr(curr,'@'))) {
+ if ((username = strchr(curr, ':')) && username < userinfo) {
+ connInfo->user = percentDecode(curr, username - curr);
+ curr = username + 1;
+ }
+
+ connInfo->auth = percentDecode(curr, userinfo - curr);
+ curr = userinfo + 1;
+ }
+ if (curr == end) return;
+
+ /* Extract host and port. */
+ path = strchr(curr, '/');
+ if (*curr != '/') {
+ host = path ? path - 1 : end;
+ if (*curr == '[') {
+ curr += 1;
+ if ((port = strchr(curr, ']'))) {
+ if (*(port+1) == ':') {
+ connInfo->hostport = atoi(port + 2);
+ }
+ host = port - 1;
+ }
+ } else {
+ if ((port = strchr(curr, ':'))) {
+ connInfo->hostport = atoi(port + 1);
+ host = port - 1;
+ }
+ }
+ sdsfree(connInfo->hostip);
+ connInfo->hostip = sdsnewlen(curr, host - curr + 1);
+ }
+ curr = path ? path + 1 : end;
+ if (curr == end) return;
+
+ /* Extract database number. */
+ connInfo->input_dbnum = atoi(curr);
+}
+
+void freeCliConnInfo(cliConnInfo connInfo){
+ if (connInfo.hostip) sdsfree(connInfo.hostip);
+ if (connInfo.auth) sdsfree(connInfo.auth);
+ if (connInfo.user) sdsfree(connInfo.user);
+}
+
+/*
+ * Escape a Unicode string for JSON output (--json), following RFC 7159:
+ * https://datatracker.ietf.org/doc/html/rfc7159#section-7
+*/
+sds escapeJsonString(sds s, const char *p, size_t len) {
+ s = sdscatlen(s,"\"",1);
+ while(len--) {
+ switch(*p) {
+ case '\\':
+ case '"':
+ s = sdscatprintf(s,"\\%c",*p);
+ break;
+ case '\n': s = sdscatlen(s,"\\n",2); break;
+ case '\f': s = sdscatlen(s,"\\f",2); break;
+ case '\r': s = sdscatlen(s,"\\r",2); break;
+ case '\t': s = sdscatlen(s,"\\t",2); break;
+ case '\b': s = sdscatlen(s,"\\b",2); break;
+ default:
+ s = sdscatprintf(s,*(unsigned char *)p <= 0x1f ? "\\u%04x" : "%c",*p);
+ }
+ p++;
+ }
+ return sdscatlen(s,"\"",1);
+}
diff --git a/src/cli_common.h b/src/cli_common.h
new file mode 100644
index 0000000..cffdee6
--- /dev/null
+++ b/src/cli_common.h
@@ -0,0 +1,54 @@
+#ifndef __CLICOMMON_H
+#define __CLICOMMON_H
+
+#include <hiredis.h>
+#include <sdscompat.h> /* Use hiredis' sds compat header that maps sds calls to their hi_ variants */
+
+typedef struct cliSSLconfig {
+ /* Requested SNI, or NULL */
+ char *sni;
+ /* CA Certificate file, or NULL */
+ char *cacert;
+ /* Directory where trusted CA certificates are stored, or NULL */
+ char *cacertdir;
+ /* Skip server certificate verification. */
+ int skip_cert_verify;
+ /* Client certificate to authenticate with, or NULL */
+ char *cert;
+ /* Private key file to authenticate with, or NULL */
+ char *key;
+ /* Preferred cipher list, or NULL (applies only to <= TLSv1.2) */
+ char* ciphers;
+ /* Preferred ciphersuites list, or NULL (applies only to TLSv1.3) */
+ char* ciphersuites;
+} cliSSLconfig;
+
+
+/* server connection information object, used to describe an ip:port pair, db num user input, and user:pass. */
+typedef struct cliConnInfo {
+ char *hostip;
+ int hostport;
+ int input_dbnum;
+ char *auth;
+ char *user;
+} cliConnInfo;
+
+int cliSecureConnection(redisContext *c, cliSSLconfig config, const char **err);
+
+ssize_t cliWriteConn(redisContext *c, const char *buf, size_t buf_len);
+
+int cliSecureInit(void);
+
+sds readArgFromStdin(void);
+
+sds *getSdsArrayFromArgv(int argc,char **argv, int quoted);
+
+sds unquoteCString(char *str);
+
+void parseRedisUri(const char *uri, const char* tool_name, cliConnInfo *connInfo, int *tls_flag);
+
+void freeCliConnInfo(cliConnInfo connInfo);
+
+sds escapeJsonString(sds s, const char *p, size_t len);
+
+#endif /* __CLICOMMON_H */
diff --git a/src/cluster.c b/src/cluster.c
new file mode 100644
index 0000000..c985d0b
--- /dev/null
+++ b/src/cluster.c
@@ -0,0 +1,7717 @@
+/* Redis Cluster implementation.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "cluster.h"
+#include "endianconv.h"
+#include "connection.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <math.h>
+#include <ctype.h>
+
+/* A global reference to myself is handy to make code more clear.
+ * Myself always points to server.cluster->myself, that is, the clusterNode
+ * that represents this node. */
+clusterNode *myself = NULL;
+
+clusterNode *createClusterNode(char *nodename, int flags);
+void clusterAddNode(clusterNode *node);
+void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void clusterReadHandler(connection *conn);
+void clusterSendPing(clusterLink *link, int type);
+void clusterSendFail(char *nodename);
+void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request);
+void clusterUpdateState(void);
+int clusterNodeGetSlotBit(clusterNode *n, int slot);
+list *clusterGetNodesInMyShard(clusterNode *node);
+int clusterNodeAddSlave(clusterNode *master, clusterNode *slave);
+int clusterAddSlot(clusterNode *n, int slot);
+int clusterDelSlot(int slot);
+int clusterDelNodeSlots(clusterNode *node);
+int clusterNodeSetSlotBit(clusterNode *n, int slot);
+void clusterSetMaster(clusterNode *n);
+void clusterHandleSlaveFailover(void);
+void clusterHandleSlaveMigration(int max_slaves);
+int bitmapTestBit(unsigned char *bitmap, int pos);
+void bitmapSetBit(unsigned char *bitmap, int pos);
+void bitmapClearBit(unsigned char *bitmap, int pos);
+void clusterDoBeforeSleep(int flags);
+void clusterSendUpdate(clusterLink *link, clusterNode *node);
+void resetManualFailover(void);
+void clusterCloseAllSlots(void);
+void clusterSetNodeAsMaster(clusterNode *n);
+void clusterDelNode(clusterNode *delnode);
+sds representClusterNodeFlags(sds ci, uint16_t flags);
+sds representSlotInfo(sds ci, uint16_t *slot_info_pairs, int slot_info_pairs_count);
+void clusterFreeNodesSlotsInfo(clusterNode *n);
+uint64_t clusterGetMaxEpoch(void);
+int clusterBumpConfigEpochWithoutConsensus(void);
+void moduleCallClusterReceivers(const char *sender_id, uint64_t module_id, uint8_t type, const unsigned char *payload, uint32_t len);
+const char *clusterGetMessageTypeString(int type);
+void removeChannelsInSlot(unsigned int slot);
+unsigned int countKeysInSlot(unsigned int hashslot);
+unsigned int countChannelsInSlot(unsigned int hashslot);
+unsigned int delKeysInSlot(unsigned int hashslot);
+void clusterAddNodeToShard(const char *shard_id, clusterNode *node);
+list *clusterLookupNodeListByShardId(const char *shard_id);
+void clusterRemoveNodeFromShard(clusterNode *node);
+int auxShardIdSetter(clusterNode *n, void *value, int length);
+sds auxShardIdGetter(clusterNode *n, sds s);
+int auxShardIdPresent(clusterNode *n);
+int auxHumanNodenameSetter(clusterNode *n, void *value, int length);
+sds auxHumanNodenameGetter(clusterNode *n, sds s);
+int auxHumanNodenamePresent(clusterNode *n);
+int auxTcpPortSetter(clusterNode *n, void *value, int length);
+sds auxTcpPortGetter(clusterNode *n, sds s);
+int auxTcpPortPresent(clusterNode *n);
+int auxTlsPortSetter(clusterNode *n, void *value, int length);
+sds auxTlsPortGetter(clusterNode *n, sds s);
+int auxTlsPortPresent(clusterNode *n);
+static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen);
+
+int getNodeDefaultClientPort(clusterNode *n) {
+ return server.tls_cluster ? n->tls_port : n->tcp_port;
+}
+
+static inline int getNodeDefaultReplicationPort(clusterNode *n) {
+ return server.tls_replication ? n->tls_port : n->tcp_port;
+}
+
+static inline int getNodeClientPort(clusterNode *n, int use_tls) {
+ return use_tls ? n->tls_port : n->tcp_port;
+}
+
+static inline int defaultClientPort(void) {
+ return server.tls_cluster ? server.tls_port : server.port;
+}
+
+/* When a cluster command is called, we need to decide whether to return TLS info or
+ * non-TLS info by the client's connection type. However if the command is called by
+ * a Lua script or RM_call, there is no connection in the fake client, so we use
+ * server.current_client here to get the real client if available. And if it is not
+ * available (modules may call commands without a real client), we return the default
+ * info, which is determined by server.tls_cluster. */
+static int shouldReturnTlsInfo(void) {
+ if (server.current_client && server.current_client->conn) {
+ return connIsTLS(server.current_client->conn);
+ } else {
+ return server.tls_cluster;
+ }
+}
+
+/* Links to the next and previous entries for keys in the same slot are stored
+ * in the dict entry metadata. See Slot to Key API below. */
+#define dictEntryNextInSlot(de) \
+ (((clusterDictEntryMetadata *)dictEntryMetadata(de))->next)
+#define dictEntryPrevInSlot(de) \
+ (((clusterDictEntryMetadata *)dictEntryMetadata(de))->prev)
+
+#define isSlotUnclaimed(slot) \
+ (server.cluster->slots[slot] == NULL || \
+ bitmapTestBit(server.cluster->owner_not_claiming_slot, slot))
+
+#define RCVBUF_INIT_LEN 1024
+#define RCVBUF_MAX_PREALLOC (1<<20) /* 1MB */
+
+/* Cluster nodes hash table, mapping nodes addresses 1.2.3.4:6379 to
+ * clusterNode structures. */
+dictType clusterNodesDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Cluster re-addition blacklist. This maps node IDs to the time
+ * we can re-add this node. The goal is to avoid reading a removed
+ * node for some time. */
+dictType clusterNodesBlackListDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+static ConnectionType *connTypeOfCluster(void) {
+ if (server.tls_cluster) {
+ return connectionTypeTls();
+ }
+
+ return connectionTypeTcp();
+}
+/* Cluster shards hash table, mapping shard id to list of nodes */
+dictType clusterSdsToListType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictListDestructor, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Aux fields are introduced in Redis 7.2 to support the persistence
+ * of various important node properties, such as shard id, in nodes.conf.
+ * Aux fields take an explicit format of name=value pairs and have no
+ * intrinsic order among them. Aux fields are always grouped together
+ * at the end of the second column of each row after the node's IP
+ * address/port/cluster_port and the optional hostname. Aux fields
+ * are separated by ','. */
+
+/* Aux field setter function prototype
+ * return C_OK when the update is successful; C_ERR otherwise */
+typedef int (aux_value_setter) (clusterNode* n, void *value, int length);
+/* Aux field getter function prototype
+ * return an sds that is a concatenation of the input sds string and
+ * the aux value */
+typedef sds (aux_value_getter) (clusterNode* n, sds s);
+
+typedef int (aux_value_present) (clusterNode* n);
+
+typedef struct {
+ char *field;
+ aux_value_setter *setter;
+ aux_value_getter *getter;
+ aux_value_present *isPresent;
+} auxFieldHandler;
+
+/* Assign index to each aux field */
+typedef enum {
+ af_shard_id,
+ af_human_nodename,
+ af_tcp_port,
+ af_tls_port,
+ af_count,
+} auxFieldIndex;
+
+/* Note that
+ * 1. the order of the elements below must match that of their
+ * indices as defined in auxFieldIndex
+ * 2. aux name can contain characters that pass the isValidAuxChar check only */
+auxFieldHandler auxFieldHandlers[] = {
+ {"shard-id", auxShardIdSetter, auxShardIdGetter, auxShardIdPresent},
+ {"nodename", auxHumanNodenameSetter, auxHumanNodenameGetter, auxHumanNodenamePresent},
+ {"tcp-port", auxTcpPortSetter, auxTcpPortGetter, auxTcpPortPresent},
+ {"tls-port", auxTlsPortSetter, auxTlsPortGetter, auxTlsPortPresent},
+};
+
+int isValidAuxChar(int c) {
+ return isalnum(c) || (strchr("!#$%&()*+:;<>?@[]^{|}~", c) == NULL);
+}
+
+int isValidAuxString(char *s, unsigned int length) {
+ for (unsigned i = 0; i < length; i++) {
+ if (!isValidAuxChar(s[i])) return 0;
+ }
+ return 1;
+}
+
+int auxShardIdSetter(clusterNode *n, void *value, int length) {
+ if (verifyClusterNodeId(value, length) == C_ERR) {
+ return C_ERR;
+ }
+ memcpy(n->shard_id, value, CLUSTER_NAMELEN);
+ /* if n already has replicas, make sure they all agree
+ * on the shard id */
+ for (int i = 0; i < n->numslaves; i++) {
+ if (memcmp(n->slaves[i]->shard_id, n->shard_id, CLUSTER_NAMELEN) != 0) {
+ return C_ERR;
+ }
+ }
+ clusterAddNodeToShard(value, n);
+ return C_OK;
+}
+
+sds auxShardIdGetter(clusterNode *n, sds s) {
+ return sdscatprintf(s, "%.40s", n->shard_id);
+}
+
+int auxShardIdPresent(clusterNode *n) {
+ return strlen(n->shard_id);
+}
+
+int auxHumanNodenameSetter(clusterNode *n, void *value, int length) {
+ if (n && !strncmp(value, n->human_nodename, length)) {
+ return C_OK;
+ } else if (!n && (length == 0)) {
+ return C_OK;
+ }
+ if (n) {
+ n->human_nodename = sdscpylen(n->human_nodename, value, length);
+ } else if (sdslen(n->human_nodename) != 0) {
+ sdsclear(n->human_nodename);
+ } else {
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+sds auxHumanNodenameGetter(clusterNode *n, sds s) {
+ return sdscatprintf(s, "%s", n->human_nodename);
+}
+
+int auxHumanNodenamePresent(clusterNode *n) {
+ return sdslen(n->human_nodename);
+}
+
+int auxTcpPortSetter(clusterNode *n, void *value, int length) {
+ if (length > 5 || length < 1) {
+ return C_ERR;
+ }
+ char buf[length + 1];
+ memcpy(buf, (char*)value, length);
+ buf[length] = '\0';
+ n->tcp_port = atoi(buf);
+ return (n->tcp_port < 0 || n->tcp_port >= 65536) ? C_ERR : C_OK;
+}
+
+sds auxTcpPortGetter(clusterNode *n, sds s) {
+ return sdscatprintf(s, "%d", n->tcp_port);
+}
+
+int auxTcpPortPresent(clusterNode *n) {
+ return n->tcp_port >= 0 && n->tcp_port < 65536;
+}
+
+int auxTlsPortSetter(clusterNode *n, void *value, int length) {
+ if (length > 5 || length < 1) {
+ return C_ERR;
+ }
+ char buf[length + 1];
+ memcpy(buf, (char*)value, length);
+ buf[length] = '\0';
+ n->tls_port = atoi(buf);
+ return (n->tls_port < 0 || n->tls_port >= 65536) ? C_ERR : C_OK;
+}
+
+sds auxTlsPortGetter(clusterNode *n, sds s) {
+ return sdscatprintf(s, "%d", n->tls_port);
+}
+
+int auxTlsPortPresent(clusterNode *n) {
+ return n->tls_port >= 0 && n->tls_port < 65536;
+}
+
+/* clusterLink send queue blocks */
+typedef struct {
+ size_t totlen; /* Total length of this block including the message */
+ int refcount; /* Number of cluster link send msg queues containing the message */
+ clusterMsg msg;
+} clusterMsgSendBlock;
+
+/* -----------------------------------------------------------------------------
+ * Initialization
+ * -------------------------------------------------------------------------- */
+
+/* Load the cluster config from 'filename'.
+ *
+ * If the file does not exist or is zero-length (this may happen because
+ * when we lock the nodes.conf file, we create a zero-length one for the
+ * sake of locking if it does not already exist), C_ERR is returned.
+ * If the configuration was loaded from the file, C_OK is returned. */
+int clusterLoadConfig(char *filename) {
+ FILE *fp = fopen(filename,"r");
+ struct stat sb;
+ char *line;
+ int maxline, j;
+
+ if (fp == NULL) {
+ if (errno == ENOENT) {
+ return C_ERR;
+ } else {
+ serverLog(LL_WARNING,
+ "Loading the cluster node config from %s: %s",
+ filename, strerror(errno));
+ exit(1);
+ }
+ }
+
+ if (redis_fstat(fileno(fp),&sb) == -1) {
+ serverLog(LL_WARNING,
+ "Unable to obtain the cluster node config file stat %s: %s",
+ filename, strerror(errno));
+ exit(1);
+ }
+ /* Check if the file is zero-length: if so return C_ERR to signal
+ * we have to write the config. */
+ if (sb.st_size == 0) {
+ fclose(fp);
+ return C_ERR;
+ }
+
+ /* Parse the file. Note that single lines of the cluster config file can
+ * be really long as they include all the hash slots of the node.
+ * This means in the worst possible case, half of the Redis slots will be
+ * present in a single line, possibly in importing or migrating state, so
+ * together with the node ID of the sender/receiver.
+ *
+ * To simplify we allocate 1024+CLUSTER_SLOTS*128 bytes per line. */
+ maxline = 1024+CLUSTER_SLOTS*128;
+ line = zmalloc(maxline);
+ while(fgets(line,maxline,fp) != NULL) {
+ int argc, aux_argc;
+ sds *argv, *aux_argv;
+ clusterNode *n, *master;
+ char *p, *s;
+
+ /* Skip blank lines, they can be created either by users manually
+ * editing nodes.conf or by the config writing process if stopped
+ * before the truncate() call. */
+ if (line[0] == '\n' || line[0] == '\0') continue;
+
+ /* Split the line into arguments for processing. */
+ argv = sdssplitargs(line,&argc);
+ if (argv == NULL) goto fmterr;
+
+ /* Handle the special "vars" line. Don't pretend it is the last
+ * line even if it actually is when generated by Redis. */
+ if (strcasecmp(argv[0],"vars") == 0) {
+ if (!(argc % 2)) goto fmterr;
+ for (j = 1; j < argc; j += 2) {
+ if (strcasecmp(argv[j],"currentEpoch") == 0) {
+ server.cluster->currentEpoch =
+ strtoull(argv[j+1],NULL,10);
+ } else if (strcasecmp(argv[j],"lastVoteEpoch") == 0) {
+ server.cluster->lastVoteEpoch =
+ strtoull(argv[j+1],NULL,10);
+ } else {
+ serverLog(LL_NOTICE,
+ "Skipping unknown cluster config variable '%s'",
+ argv[j]);
+ }
+ }
+ sdsfreesplitres(argv,argc);
+ continue;
+ }
+
+ /* Regular config lines have at least eight fields */
+ if (argc < 8) {
+ sdsfreesplitres(argv,argc);
+ goto fmterr;
+ }
+
+ /* Create this node if it does not exist */
+ if (verifyClusterNodeId(argv[0], sdslen(argv[0])) == C_ERR) {
+ sdsfreesplitres(argv, argc);
+ goto fmterr;
+ }
+ n = clusterLookupNode(argv[0], sdslen(argv[0]));
+ if (!n) {
+ n = createClusterNode(argv[0],0);
+ clusterAddNode(n);
+ }
+ /* Format for the node address and auxiliary argument information:
+ * ip:port[@cport][,hostname][,aux=val]*] */
+
+ aux_argv = sdssplitlen(argv[1], sdslen(argv[1]), ",", 1, &aux_argc);
+ if (aux_argv == NULL) {
+ sdsfreesplitres(argv,argc);
+ goto fmterr;
+ }
+
+ /* Hostname is an optional argument that defines the endpoint
+ * that can be reported to clients instead of IP. */
+ if (aux_argc > 1 && sdslen(aux_argv[1]) > 0) {
+ n->hostname = sdscpy(n->hostname, aux_argv[1]);
+ } else if (sdslen(n->hostname) != 0) {
+ sdsclear(n->hostname);
+ }
+
+ /* All fields after hostname are auxiliary and they take on
+ * the format of "aux=val" where both aux and val can contain
+ * characters that pass the isValidAuxChar check only. The order
+ * of the aux fields is insignificant. */
+ int aux_tcp_port = 0;
+ int aux_tls_port = 0;
+ for (int i = 2; i < aux_argc; i++) {
+ int field_argc;
+ sds *field_argv;
+ field_argv = sdssplitlen(aux_argv[i], sdslen(aux_argv[i]), "=", 1, &field_argc);
+ if (field_argv == NULL || field_argc != 2) {
+ /* Invalid aux field format */
+ if (field_argv != NULL) sdsfreesplitres(field_argv, field_argc);
+ sdsfreesplitres(argv,argc);
+ goto fmterr;
+ }
+
+ /* Validate that both aux and value contain valid characters only */
+ for (unsigned j = 0; j < 2; j++) {
+ if (!isValidAuxString(field_argv[j],sdslen(field_argv[j]))){
+ /* Invalid aux field format */
+ sdsfreesplitres(field_argv, field_argc);
+ sdsfreesplitres(argv,argc);
+ goto fmterr;
+ }
+ }
+
+ /* Note that we don't expect lots of aux fields in the foreseeable
+ * future so a linear search is completely fine. */
+ int field_found = 0;
+ for (unsigned j = 0; j < numElements(auxFieldHandlers); j++) {
+ if (sdslen(field_argv[0]) != strlen(auxFieldHandlers[j].field) ||
+ memcmp(field_argv[0], auxFieldHandlers[j].field, sdslen(field_argv[0])) != 0) {
+ continue;
+ }
+ field_found = 1;
+ aux_tcp_port |= j == af_tcp_port;
+ aux_tls_port |= j == af_tls_port;
+ if (auxFieldHandlers[j].setter(n, field_argv[1], sdslen(field_argv[1])) != C_OK) {
+ /* Invalid aux field format */
+ sdsfreesplitres(field_argv, field_argc);
+ sdsfreesplitres(argv,argc);
+ goto fmterr;
+ }
+ }
+
+ if (field_found == 0) {
+ /* Invalid aux field format */
+ sdsfreesplitres(field_argv, field_argc);
+ sdsfreesplitres(argv,argc);
+ goto fmterr;
+ }
+
+ sdsfreesplitres(field_argv, field_argc);
+ }
+ /* Address and port */
+ if ((p = strrchr(aux_argv[0],':')) == NULL) {
+ sdsfreesplitres(aux_argv, aux_argc);
+ sdsfreesplitres(argv,argc);
+ goto fmterr;
+ }
+ *p = '\0';
+ memcpy(n->ip,aux_argv[0],strlen(aux_argv[0])+1);
+ char *port = p+1;
+ char *busp = strchr(port,'@');
+ if (busp) {
+ *busp = '\0';
+ busp++;
+ }
+ /* If neither TCP or TLS port is found in aux field, it is considered
+ * an old version of nodes.conf file.*/
+ if (!aux_tcp_port && !aux_tls_port) {
+ if (server.tls_cluster) {
+ n->tls_port = atoi(port);
+ } else {
+ n->tcp_port = atoi(port);
+ }
+ } else if (!aux_tcp_port) {
+ n->tcp_port = atoi(port);
+ } else if (!aux_tls_port) {
+ n->tls_port = atoi(port);
+ }
+ /* In older versions of nodes.conf the "@busport" part is missing.
+ * In this case we set it to the default offset of 10000 from the
+ * base port. */
+ n->cport = busp ? atoi(busp) : (getNodeDefaultClientPort(n) + CLUSTER_PORT_INCR);
+
+ /* The plaintext port for client in a TLS cluster (n->pport) is not
+ * stored in nodes.conf. It is received later over the bus protocol. */
+
+ sdsfreesplitres(aux_argv, aux_argc);
+
+ /* Parse flags */
+ p = s = argv[2];
+ while(p) {
+ p = strchr(s,',');
+ if (p) *p = '\0';
+ if (!strcasecmp(s,"myself")) {
+ serverAssert(server.cluster->myself == NULL);
+ myself = server.cluster->myself = n;
+ n->flags |= CLUSTER_NODE_MYSELF;
+ } else if (!strcasecmp(s,"master")) {
+ n->flags |= CLUSTER_NODE_MASTER;
+ } else if (!strcasecmp(s,"slave")) {
+ n->flags |= CLUSTER_NODE_SLAVE;
+ } else if (!strcasecmp(s,"fail?")) {
+ n->flags |= CLUSTER_NODE_PFAIL;
+ } else if (!strcasecmp(s,"fail")) {
+ n->flags |= CLUSTER_NODE_FAIL;
+ n->fail_time = mstime();
+ } else if (!strcasecmp(s,"handshake")) {
+ n->flags |= CLUSTER_NODE_HANDSHAKE;
+ } else if (!strcasecmp(s,"noaddr")) {
+ n->flags |= CLUSTER_NODE_NOADDR;
+ } else if (!strcasecmp(s,"nofailover")) {
+ n->flags |= CLUSTER_NODE_NOFAILOVER;
+ } else if (!strcasecmp(s,"noflags")) {
+ /* nothing to do */
+ } else {
+ serverPanic("Unknown flag in redis cluster config file");
+ }
+ if (p) s = p+1;
+ }
+
+ /* Get master if any. Set the master and populate master's
+ * slave list. */
+ if (argv[3][0] != '-') {
+ if (verifyClusterNodeId(argv[3], sdslen(argv[3])) == C_ERR) {
+ sdsfreesplitres(argv, argc);
+ goto fmterr;
+ }
+ master = clusterLookupNode(argv[3], sdslen(argv[3]));
+ if (!master) {
+ master = createClusterNode(argv[3],0);
+ clusterAddNode(master);
+ }
+ /* shard_id can be absent if we are loading a nodes.conf generated
+ * by an older version of Redis; we should follow the primary's
+ * shard_id in this case */
+ if (auxFieldHandlers[af_shard_id].isPresent(n) == 0) {
+ memcpy(n->shard_id, master->shard_id, CLUSTER_NAMELEN);
+ clusterAddNodeToShard(master->shard_id, n);
+ } else if (clusterGetNodesInMyShard(master) != NULL &&
+ memcmp(master->shard_id, n->shard_id, CLUSTER_NAMELEN) != 0)
+ {
+ /* If the primary has been added to a shard, make sure this
+ * node has the same persisted shard id as the primary. */
+ goto fmterr;
+ }
+ n->slaveof = master;
+ clusterNodeAddSlave(master,n);
+ } else if (auxFieldHandlers[af_shard_id].isPresent(n) == 0) {
+ /* n is a primary but it does not have a persisted shard_id.
+ * This happens if we are loading a nodes.conf generated by
+ * an older version of Redis. We should manually update the
+ * shard membership in this case */
+ clusterAddNodeToShard(n->shard_id, n);
+ }
+
+ /* Set ping sent / pong received timestamps */
+ if (atoi(argv[4])) n->ping_sent = mstime();
+ if (atoi(argv[5])) n->pong_received = mstime();
+
+ /* Set configEpoch for this node.
+ * If the node is a replica, set its config epoch to 0.
+ * If it's a primary, load the config epoch from the configuration file. */
+ n->configEpoch = (nodeIsSlave(n) && n->slaveof) ? 0 : strtoull(argv[6],NULL,10);
+
+ /* Populate hash slots served by this instance. */
+ for (j = 8; j < argc; j++) {
+ int start, stop;
+
+ if (argv[j][0] == '[') {
+ /* Here we handle migrating / importing slots */
+ int slot;
+ char direction;
+ clusterNode *cn;
+
+ p = strchr(argv[j],'-');
+ serverAssert(p != NULL);
+ *p = '\0';
+ direction = p[1]; /* Either '>' or '<' */
+ slot = atoi(argv[j]+1);
+ if (slot < 0 || slot >= CLUSTER_SLOTS) {
+ sdsfreesplitres(argv,argc);
+ goto fmterr;
+ }
+ p += 3;
+
+ char *pr = strchr(p, ']');
+ size_t node_len = pr - p;
+ if (pr == NULL || verifyClusterNodeId(p, node_len) == C_ERR) {
+ sdsfreesplitres(argv, argc);
+ goto fmterr;
+ }
+ cn = clusterLookupNode(p, CLUSTER_NAMELEN);
+ if (!cn) {
+ cn = createClusterNode(p,0);
+ clusterAddNode(cn);
+ }
+ if (direction == '>') {
+ server.cluster->migrating_slots_to[slot] = cn;
+ } else {
+ server.cluster->importing_slots_from[slot] = cn;
+ }
+ continue;
+ } else if ((p = strchr(argv[j],'-')) != NULL) {
+ *p = '\0';
+ start = atoi(argv[j]);
+ stop = atoi(p+1);
+ } else {
+ start = stop = atoi(argv[j]);
+ }
+ if (start < 0 || start >= CLUSTER_SLOTS ||
+ stop < 0 || stop >= CLUSTER_SLOTS)
+ {
+ sdsfreesplitres(argv,argc);
+ goto fmterr;
+ }
+ while(start <= stop) clusterAddSlot(n, start++);
+ }
+
+ sdsfreesplitres(argv,argc);
+ }
+ /* Config sanity check */
+ if (server.cluster->myself == NULL) goto fmterr;
+
+ zfree(line);
+ fclose(fp);
+
+ serverLog(LL_NOTICE,"Node configuration loaded, I'm %.40s", myself->name);
+
+ /* Something that should never happen: currentEpoch smaller than
+ * the max epoch found in the nodes configuration. However we handle this
+ * as some form of protection against manual editing of critical files. */
+ if (clusterGetMaxEpoch() > server.cluster->currentEpoch) {
+ server.cluster->currentEpoch = clusterGetMaxEpoch();
+ }
+ return C_OK;
+
+fmterr:
+ serverLog(LL_WARNING,
+ "Unrecoverable error: corrupted cluster config file \"%s\".", line);
+ zfree(line);
+ if (fp) fclose(fp);
+ exit(1);
+}
+
+/* Cluster node configuration is exactly the same as CLUSTER NODES output.
+ *
+ * This function writes the node config and returns 0, on error -1
+ * is returned.
+ *
+ * Note: we need to write the file in an atomic way from the point of view
+ * of the POSIX filesystem semantics, so that if the server is stopped
+ * or crashes during the write, we'll end with either the old file or the
+ * new one. Since we have the full payload to write available we can use
+ * a single write to write the whole file. If the pre-existing file was
+ * bigger we pad our payload with newlines that are anyway ignored and truncate
+ * the file afterward. */
+int clusterSaveConfig(int do_fsync) {
+ sds ci,tmpfilename;
+ size_t content_size,offset = 0;
+ ssize_t written_bytes;
+ int fd = -1;
+ int retval = C_ERR;
+
+ server.cluster->todo_before_sleep &= ~CLUSTER_TODO_SAVE_CONFIG;
+
+ /* Get the nodes description and concatenate our "vars" directive to
+ * save currentEpoch and lastVoteEpoch. */
+ ci = clusterGenNodesDescription(NULL, CLUSTER_NODE_HANDSHAKE, 0);
+ ci = sdscatprintf(ci,"vars currentEpoch %llu lastVoteEpoch %llu\n",
+ (unsigned long long) server.cluster->currentEpoch,
+ (unsigned long long) server.cluster->lastVoteEpoch);
+ content_size = sdslen(ci);
+
+ /* Create a temp file with the new content. */
+ tmpfilename = sdscatfmt(sdsempty(),"%s.tmp-%i-%I",
+ server.cluster_configfile,(int) getpid(),mstime());
+ if ((fd = open(tmpfilename,O_WRONLY|O_CREAT,0644)) == -1) {
+ serverLog(LL_WARNING,"Could not open temp cluster config file: %s",strerror(errno));
+ goto cleanup;
+ }
+
+ while (offset < content_size) {
+ written_bytes = write(fd,ci + offset,content_size - offset);
+ if (written_bytes <= 0) {
+ if (errno == EINTR) continue;
+ serverLog(LL_WARNING,"Failed after writing (%zd) bytes to tmp cluster config file: %s",
+ offset,strerror(errno));
+ goto cleanup;
+ }
+ offset += written_bytes;
+ }
+
+ if (do_fsync) {
+ server.cluster->todo_before_sleep &= ~CLUSTER_TODO_FSYNC_CONFIG;
+ if (redis_fsync(fd) == -1) {
+ serverLog(LL_WARNING,"Could not sync tmp cluster config file: %s",strerror(errno));
+ goto cleanup;
+ }
+ }
+
+ if (rename(tmpfilename, server.cluster_configfile) == -1) {
+ serverLog(LL_WARNING,"Could not rename tmp cluster config file: %s",strerror(errno));
+ goto cleanup;
+ }
+
+ if (do_fsync) {
+ if (fsyncFileDir(server.cluster_configfile) == -1) {
+ serverLog(LL_WARNING,"Could not sync cluster config file dir: %s",strerror(errno));
+ goto cleanup;
+ }
+ }
+ retval = C_OK; /* If we reached this point, everything is fine. */
+
+cleanup:
+ if (fd != -1) close(fd);
+ if (retval) unlink(tmpfilename);
+ sdsfree(tmpfilename);
+ sdsfree(ci);
+ return retval;
+}
+
+void clusterSaveConfigOrDie(int do_fsync) {
+ if (clusterSaveConfig(do_fsync) == -1) {
+ serverLog(LL_WARNING,"Fatal: can't update cluster config file.");
+ exit(1);
+ }
+}
+
+/* Lock the cluster config using flock(), and retain the file descriptor used to
+ * acquire the lock so that the file will be locked as long as the process is up.
+ *
+ * This works because we always update nodes.conf with a new version
+ * in-place, reopening the file, and writing to it in place (later adjusting
+ * the length with ftruncate()).
+ *
+ * On success C_OK is returned, otherwise an error is logged and
+ * the function returns C_ERR to signal a lock was not acquired. */
+int clusterLockConfig(char *filename) {
+/* flock() does not exist on Solaris
+ * and a fcntl-based solution won't help, as we constantly re-open that file,
+ * which will release _all_ locks anyway
+ */
+#if !defined(__sun)
+ /* To lock it, we need to open the file in a way it is created if
+ * it does not exist, otherwise there is a race condition with other
+ * processes. */
+ int fd = open(filename,O_WRONLY|O_CREAT|O_CLOEXEC,0644);
+ if (fd == -1) {
+ serverLog(LL_WARNING,
+ "Can't open %s in order to acquire a lock: %s",
+ filename, strerror(errno));
+ return C_ERR;
+ }
+
+ if (flock(fd,LOCK_EX|LOCK_NB) == -1) {
+ if (errno == EWOULDBLOCK) {
+ serverLog(LL_WARNING,
+ "Sorry, the cluster configuration file %s is already used "
+ "by a different Redis Cluster node. Please make sure that "
+ "different nodes use different cluster configuration "
+ "files.", filename);
+ } else {
+ serverLog(LL_WARNING,
+ "Impossible to lock %s: %s", filename, strerror(errno));
+ }
+ close(fd);
+ return C_ERR;
+ }
+ /* Lock acquired: leak the 'fd' by not closing it until shutdown time, so that
+ * we'll retain the lock to the file as long as the process exists.
+ *
+ * After fork, the child process will get the fd opened by the parent process,
+ * we need save `fd` to `cluster_config_file_lock_fd`, so that in redisFork(),
+ * it will be closed in the child process.
+ * If it is not closed, when the main process is killed -9, but the child process
+ * (redis-aof-rewrite) is still alive, the fd(lock) will still be held by the
+ * child process, and the main process will fail to get lock, means fail to start. */
+ server.cluster_config_file_lock_fd = fd;
+#else
+ UNUSED(filename);
+#endif /* __sun */
+
+ return C_OK;
+}
+
+/* Derives our ports to be announced in the cluster bus. */
+void deriveAnnouncedPorts(int *announced_tcp_port, int *announced_tls_port,
+ int *announced_cport) {
+ /* Config overriding announced ports. */
+ *announced_tcp_port = server.cluster_announce_port ?
+ server.cluster_announce_port : server.port;
+ *announced_tls_port = server.cluster_announce_tls_port ?
+ server.cluster_announce_tls_port : server.tls_port;
+ /* Derive cluster bus port. */
+ if (server.cluster_announce_bus_port) {
+ *announced_cport = server.cluster_announce_bus_port;
+ } else if (server.cluster_port) {
+ *announced_cport = server.cluster_port;
+ } else {
+ *announced_cport = defaultClientPort() + CLUSTER_PORT_INCR;
+ }
+}
+
+/* Some flags (currently just the NOFAILOVER flag) may need to be updated
+ * in the "myself" node based on the current configuration of the node,
+ * that may change at runtime via CONFIG SET. This function changes the
+ * set of flags in myself->flags accordingly. */
+void clusterUpdateMyselfFlags(void) {
+ if (!myself) return;
+ int oldflags = myself->flags;
+ int nofailover = server.cluster_slave_no_failover ?
+ CLUSTER_NODE_NOFAILOVER : 0;
+ myself->flags &= ~CLUSTER_NODE_NOFAILOVER;
+ myself->flags |= nofailover;
+ if (myself->flags != oldflags) {
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE);
+ }
+}
+
+
+/* We want to take myself->port/cport/pport in sync with the
+* cluster-announce-port/cluster-announce-bus-port/cluster-announce-tls-port option.
+* The option can be set at runtime via CONFIG SET. */
+void clusterUpdateMyselfAnnouncedPorts(void) {
+ if (!myself) return;
+ deriveAnnouncedPorts(&myself->tcp_port,&myself->tls_port,&myself->cport);
+}
+
+/* We want to take myself->ip in sync with the cluster-announce-ip option.
+* The option can be set at runtime via CONFIG SET. */
+void clusterUpdateMyselfIp(void) {
+ if (!myself) return;
+ static char *prev_ip = NULL;
+ char *curr_ip = server.cluster_announce_ip;
+ int changed = 0;
+
+ if (prev_ip == NULL && curr_ip != NULL) changed = 1;
+ else if (prev_ip != NULL && curr_ip == NULL) changed = 1;
+ else if (prev_ip && curr_ip && strcmp(prev_ip,curr_ip)) changed = 1;
+
+ if (changed) {
+ if (prev_ip) zfree(prev_ip);
+ prev_ip = curr_ip;
+
+ if (curr_ip) {
+ /* We always take a copy of the previous IP address, by
+ * duplicating the string. This way later we can check if
+ * the address really changed. */
+ prev_ip = zstrdup(prev_ip);
+ redis_strlcpy(myself->ip,server.cluster_announce_ip,NET_IP_STR_LEN);
+ } else {
+ myself->ip[0] = '\0'; /* Force autodetection. */
+ }
+ }
+}
+
+/* Update the hostname for the specified node with the provided C string. */
+static void updateAnnouncedHostname(clusterNode *node, char *new) {
+ /* Previous and new hostname are the same, no need to update. */
+ if (new && !strcmp(new, node->hostname)) {
+ return;
+ } else if (!new && (sdslen(node->hostname) == 0)) {
+ return;
+ }
+
+ if (new) {
+ node->hostname = sdscpy(node->hostname, new);
+ } else if (sdslen(node->hostname) != 0) {
+ sdsclear(node->hostname);
+ }
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+}
+
+static void updateAnnouncedHumanNodename(clusterNode *node, char *new) {
+ if (new && !strcmp(new, node->human_nodename)) {
+ return;
+ } else if (!new && (sdslen(node->human_nodename) == 0)) {
+ return;
+ }
+
+ if (new) {
+ node->human_nodename = sdscpy(node->human_nodename, new);
+ } else if (sdslen(node->human_nodename) != 0) {
+ sdsclear(node->human_nodename);
+ }
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+}
+
+
+static void updateShardId(clusterNode *node, const char *shard_id) {
+ if (shard_id && memcmp(node->shard_id, shard_id, CLUSTER_NAMELEN) != 0) {
+ clusterRemoveNodeFromShard(node);
+ memcpy(node->shard_id, shard_id, CLUSTER_NAMELEN);
+ clusterAddNodeToShard(shard_id, node);
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+ }
+ if (shard_id && myself != node && myself->slaveof == node) {
+ if (memcmp(myself->shard_id, shard_id, CLUSTER_NAMELEN) != 0) {
+ /* shard-id can diverge right after a rolling upgrade
+ * from pre-7.2 releases */
+ clusterRemoveNodeFromShard(myself);
+ memcpy(myself->shard_id, shard_id, CLUSTER_NAMELEN);
+ clusterAddNodeToShard(shard_id, myself);
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_FSYNC_CONFIG);
+ }
+ }
+}
+
+/* Update my hostname based on server configuration values */
+void clusterUpdateMyselfHostname(void) {
+ if (!myself) return;
+ updateAnnouncedHostname(myself, server.cluster_announce_hostname);
+}
+
+void clusterUpdateMyselfHumanNodename(void) {
+ if (!myself) return;
+ updateAnnouncedHumanNodename(myself, server.cluster_announce_human_nodename);
+}
+
+void clusterInit(void) {
+ int saveconf = 0;
+
+ server.cluster = zmalloc(sizeof(clusterState));
+ server.cluster->myself = NULL;
+ server.cluster->currentEpoch = 0;
+ server.cluster->state = CLUSTER_FAIL;
+ server.cluster->size = 1;
+ server.cluster->todo_before_sleep = 0;
+ server.cluster->nodes = dictCreate(&clusterNodesDictType);
+ server.cluster->shards = dictCreate(&clusterSdsToListType);
+ server.cluster->nodes_black_list =
+ dictCreate(&clusterNodesBlackListDictType);
+ server.cluster->failover_auth_time = 0;
+ server.cluster->failover_auth_count = 0;
+ server.cluster->failover_auth_rank = 0;
+ server.cluster->failover_auth_epoch = 0;
+ server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
+ server.cluster->lastVoteEpoch = 0;
+
+ /* Initialize stats */
+ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+ server.cluster->stats_bus_messages_sent[i] = 0;
+ server.cluster->stats_bus_messages_received[i] = 0;
+ }
+ server.cluster->stats_pfail_nodes = 0;
+ server.cluster->stat_cluster_links_buffer_limit_exceeded = 0;
+
+ memset(server.cluster->slots,0, sizeof(server.cluster->slots));
+ clusterCloseAllSlots();
+
+ memset(server.cluster->owner_not_claiming_slot, 0, sizeof(server.cluster->owner_not_claiming_slot));
+
+ /* Lock the cluster config file to make sure every node uses
+ * its own nodes.conf. */
+ server.cluster_config_file_lock_fd = -1;
+ if (clusterLockConfig(server.cluster_configfile) == C_ERR)
+ exit(1);
+
+ /* Load or create a new nodes configuration. */
+ if (clusterLoadConfig(server.cluster_configfile) == C_ERR) {
+ /* No configuration found. We will just use the random name provided
+ * by the createClusterNode() function. */
+ myself = server.cluster->myself =
+ createClusterNode(NULL,CLUSTER_NODE_MYSELF|CLUSTER_NODE_MASTER);
+ serverLog(LL_NOTICE,"No cluster configuration found, I'm %.40s",
+ myself->name);
+ clusterAddNode(myself);
+ clusterAddNodeToShard(myself->shard_id, myself);
+ saveconf = 1;
+ }
+ if (saveconf) clusterSaveConfigOrDie(1);
+
+ /* Port sanity check II
+ * The other handshake port check is triggered too late to stop
+ * us from trying to use a too-high cluster port number. */
+ int port = defaultClientPort();
+ if (!server.cluster_port && port > (65535-CLUSTER_PORT_INCR)) {
+ serverLog(LL_WARNING, "Redis port number too high. "
+ "Cluster communication port is 10,000 port "
+ "numbers higher than your Redis port. "
+ "Your Redis port number must be 55535 or less.");
+ exit(1);
+ }
+ if (!server.bindaddr_count) {
+ serverLog(LL_WARNING, "No bind address is configured, but it is required for the Cluster bus.");
+ exit(1);
+ }
+
+ /* Initialize data for the Slot to key API. */
+ slotToKeyInit(server.db);
+
+ /* The slots -> channels map is a radix tree. Initialize it here. */
+ server.cluster->slots_to_channels = raxNew();
+
+ /* Set myself->port/cport/pport to my listening ports, we'll just need to
+ * discover the IP address via MEET messages. */
+ deriveAnnouncedPorts(&myself->tcp_port, &myself->tls_port, &myself->cport);
+
+ server.cluster->mf_end = 0;
+ server.cluster->mf_slave = NULL;
+ resetManualFailover();
+ clusterUpdateMyselfFlags();
+ clusterUpdateMyselfIp();
+ clusterUpdateMyselfHostname();
+ clusterUpdateMyselfHumanNodename();
+}
+
+void clusterInitListeners(void) {
+ if (connectionIndexByType(connTypeOfCluster()->get_type(NULL)) < 0) {
+ serverLog(LL_WARNING, "Missing connection type %s, but it is required for the Cluster bus.", connTypeOfCluster()->get_type(NULL));
+ exit(1);
+ }
+
+ int port = defaultClientPort();
+ connListener *listener = &server.clistener;
+ listener->count = 0;
+ listener->bindaddr = server.bindaddr;
+ listener->bindaddr_count = server.bindaddr_count;
+ listener->port = server.cluster_port ? server.cluster_port : port + CLUSTER_PORT_INCR;
+ listener->ct = connTypeOfCluster();
+ if (connListen(listener) == C_ERR ) {
+ /* Note: the following log text is matched by the test suite. */
+ serverLog(LL_WARNING, "Failed listening on port %u (cluster), aborting.", listener->port);
+ exit(1);
+ }
+
+ if (createSocketAcceptHandler(&server.clistener, clusterAcceptHandler) != C_OK) {
+ serverPanic("Unrecoverable error creating Redis Cluster socket accept handler.");
+ }
+}
+
+/* Reset a node performing a soft or hard reset:
+ *
+ * 1) All other nodes are forgotten.
+ * 2) All the assigned / open slots are released.
+ * 3) If the node is a slave, it turns into a master.
+ * 4) Only for hard reset: a new Node ID is generated.
+ * 5) Only for hard reset: currentEpoch and configEpoch are set to 0.
+ * 6) The new configuration is saved and the cluster state updated.
+ * 7) If the node was a slave, the whole data set is flushed away. */
+void clusterReset(int hard) {
+ dictIterator *di;
+ dictEntry *de;
+ int j;
+
+ /* Turn into master. */
+ if (nodeIsSlave(myself)) {
+ clusterSetNodeAsMaster(myself);
+ replicationUnsetMaster();
+ emptyData(-1,EMPTYDB_NO_FLAGS,NULL);
+ }
+
+ /* Close slots, reset manual failover state. */
+ clusterCloseAllSlots();
+ resetManualFailover();
+
+ /* Unassign all the slots. */
+ for (j = 0; j < CLUSTER_SLOTS; j++) clusterDelSlot(j);
+
+ /* Recreate shards dict */
+ dictEmpty(server.cluster->shards, NULL);
+
+ /* Forget all the nodes, but myself. */
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+
+ if (node == myself) continue;
+ clusterDelNode(node);
+ }
+ dictReleaseIterator(di);
+
+ /* Empty the nodes blacklist. */
+ dictEmpty(server.cluster->nodes_black_list, NULL);
+
+ /* Hard reset only: set epochs to 0, change node ID. */
+ if (hard) {
+ sds oldname;
+
+ server.cluster->currentEpoch = 0;
+ server.cluster->lastVoteEpoch = 0;
+ myself->configEpoch = 0;
+ serverLog(LL_NOTICE, "configEpoch set to 0 via CLUSTER RESET HARD");
+
+ /* To change the Node ID we need to remove the old name from the
+ * nodes table, change the ID, and re-add back with new name. */
+ oldname = sdsnewlen(myself->name, CLUSTER_NAMELEN);
+ dictDelete(server.cluster->nodes,oldname);
+ sdsfree(oldname);
+ getRandomHexChars(myself->name, CLUSTER_NAMELEN);
+ getRandomHexChars(myself->shard_id, CLUSTER_NAMELEN);
+ clusterAddNode(myself);
+ serverLog(LL_NOTICE,"Node hard reset, now I'm %.40s", myself->name);
+ }
+
+ /* Re-populate shards */
+ clusterAddNodeToShard(myself->shard_id, myself);
+
+ /* Make sure to persist the new config and update the state. */
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE|
+ CLUSTER_TODO_FSYNC_CONFIG);
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER communication link
+ * -------------------------------------------------------------------------- */
+static clusterMsgSendBlock *createClusterMsgSendBlock(int type, uint32_t msglen) {
+ uint32_t blocklen = msglen + sizeof(clusterMsgSendBlock) - sizeof(clusterMsg);
+ clusterMsgSendBlock *msgblock = zcalloc(blocklen);
+ msgblock->refcount = 1;
+ msgblock->totlen = blocklen;
+ server.stat_cluster_links_memory += blocklen;
+ clusterBuildMessageHdr(&msgblock->msg,type,msglen);
+ return msgblock;
+}
+
+static void clusterMsgSendBlockDecrRefCount(void *node) {
+ clusterMsgSendBlock *msgblock = (clusterMsgSendBlock*)node;
+ msgblock->refcount--;
+ serverAssert(msgblock->refcount >= 0);
+ if (msgblock->refcount == 0) {
+ server.stat_cluster_links_memory -= msgblock->totlen;
+ zfree(msgblock);
+ }
+}
+
+clusterLink *createClusterLink(clusterNode *node) {
+ clusterLink *link = zmalloc(sizeof(*link));
+ link->ctime = mstime();
+ link->send_msg_queue = listCreate();
+ listSetFreeMethod(link->send_msg_queue, clusterMsgSendBlockDecrRefCount);
+ link->head_msg_send_offset = 0;
+ link->send_msg_queue_mem = sizeof(list);
+ link->rcvbuf = zmalloc(link->rcvbuf_alloc = RCVBUF_INIT_LEN);
+ link->rcvbuf_len = 0;
+ server.stat_cluster_links_memory += link->rcvbuf_alloc + link->send_msg_queue_mem;
+ link->conn = NULL;
+ link->node = node;
+ /* Related node can only possibly be known at link creation time if this is an outbound link */
+ link->inbound = (node == NULL);
+ if (!link->inbound) {
+ node->link = link;
+ }
+ return link;
+}
+
+/* Free a cluster link, but does not free the associated node of course.
+ * This function will just make sure that the original node associated
+ * with this link will have the 'link' field set to NULL. */
+void freeClusterLink(clusterLink *link) {
+ if (link->conn) {
+ connClose(link->conn);
+ link->conn = NULL;
+ }
+ server.stat_cluster_links_memory -= sizeof(list) + listLength(link->send_msg_queue)*sizeof(listNode);
+ listRelease(link->send_msg_queue);
+ server.stat_cluster_links_memory -= link->rcvbuf_alloc;
+ zfree(link->rcvbuf);
+ if (link->node) {
+ if (link->node->link == link) {
+ serverAssert(!link->inbound);
+ link->node->link = NULL;
+ } else if (link->node->inbound_link == link) {
+ serverAssert(link->inbound);
+ link->node->inbound_link = NULL;
+ }
+ }
+ zfree(link);
+}
+
+void setClusterNodeToInboundClusterLink(clusterNode *node, clusterLink *link) {
+ serverAssert(!link->node);
+ serverAssert(link->inbound);
+ if (node->inbound_link) {
+ /* A peer may disconnect and then reconnect with us, and it's not guaranteed that
+ * we would always process the disconnection of the existing inbound link before
+ * accepting a new existing inbound link. Therefore, it's possible to have more than
+ * one inbound link from the same node at the same time. Our cleanup logic assumes
+ * a one to one relationship between nodes and inbound links, so we need to kill
+ * one of the links. The existing link is more likely the outdated one, but it's
+ * possible the other node may need to open another link. */
+ serverLog(LL_DEBUG, "Replacing inbound link fd %d from node %.40s with fd %d",
+ node->inbound_link->conn->fd, node->name, link->conn->fd);
+ freeClusterLink(node->inbound_link);
+ }
+ serverAssert(!node->inbound_link);
+ node->inbound_link = link;
+ link->node = node;
+}
+
+static void clusterConnAcceptHandler(connection *conn) {
+ clusterLink *link;
+
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_VERBOSE,
+ "Error accepting cluster node connection: %s", connGetLastError(conn));
+ connClose(conn);
+ return;
+ }
+
+ /* Create a link object we use to handle the connection.
+ * It gets passed to the readable handler when data is available.
+ * Initially the link->node pointer is set to NULL as we don't know
+ * which node is, but the right node is references once we know the
+ * node identity. */
+ link = createClusterLink(NULL);
+ link->conn = conn;
+ connSetPrivateData(conn, link);
+
+ /* Register read handler */
+ connSetReadHandler(conn, clusterReadHandler);
+}
+
+#define MAX_CLUSTER_ACCEPTS_PER_CALL 1000
+void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
+ int cport, cfd;
+ int max = MAX_CLUSTER_ACCEPTS_PER_CALL;
+ char cip[NET_IP_STR_LEN];
+ int require_auth = TLS_CLIENT_AUTH_YES;
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
+
+ /* If the server is starting up, don't accept cluster connections:
+ * UPDATE messages may interact with the database content. */
+ if (server.masterhost == NULL && server.loading) return;
+
+ while(max--) {
+ cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
+ if (cfd == ANET_ERR) {
+ if (errno != EWOULDBLOCK)
+ serverLog(LL_VERBOSE,
+ "Error accepting cluster node: %s", server.neterr);
+ return;
+ }
+
+ connection *conn = connCreateAccepted(connTypeOfCluster(), cfd, &require_auth);
+
+ /* Make sure connection is not in an error state */
+ if (connGetState(conn) != CONN_STATE_ACCEPTING) {
+ serverLog(LL_VERBOSE,
+ "Error creating an accepting connection for cluster node: %s",
+ connGetLastError(conn));
+ connClose(conn);
+ return;
+ }
+ connEnableTcpNoDelay(conn);
+ connKeepAlive(conn,server.cluster_node_timeout / 1000 * 2);
+
+ /* Use non-blocking I/O for cluster messages. */
+ serverLog(LL_VERBOSE,"Accepting cluster node connection from %s:%d", cip, cport);
+
+ /* Accept the connection now. connAccept() may call our handler directly
+ * or schedule it for later depending on connection implementation.
+ */
+ if (connAccept(conn, clusterConnAcceptHandler) == C_ERR) {
+ if (connGetState(conn) == CONN_STATE_ERROR)
+ serverLog(LL_VERBOSE,
+ "Error accepting cluster node connection: %s",
+ connGetLastError(conn));
+ connClose(conn);
+ return;
+ }
+ }
+}
+
+/* Return the approximated number of sockets we are using in order to
+ * take the cluster bus connections. */
+unsigned long getClusterConnectionsCount(void) {
+ /* We decrement the number of nodes by one, since there is the
+ * "myself" node too in the list. Each node uses two file descriptors,
+ * one incoming and one outgoing, thus the multiplication by 2. */
+ return server.cluster_enabled ?
+ ((dictSize(server.cluster->nodes)-1)*2) : 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Key space handling
+ * -------------------------------------------------------------------------- */
+
+/* We have 16384 hash slots. The hash slot of a given key is obtained
+ * as the least significant 14 bits of the crc16 of the key.
+ *
+ * However if the key contains the {...} pattern, only the part between
+ * { and } is hashed. This may be useful in the future to force certain
+ * keys to be in the same node (assuming no resharding is in progress). */
+unsigned int keyHashSlot(char *key, int keylen) {
+ int s, e; /* start-end indexes of { and } */
+
+ for (s = 0; s < keylen; s++)
+ if (key[s] == '{') break;
+
+ /* No '{' ? Hash the whole key. This is the base case. */
+ if (s == keylen) return crc16(key,keylen) & 0x3FFF;
+
+ /* '{' found? Check if we have the corresponding '}'. */
+ for (e = s+1; e < keylen; e++)
+ if (key[e] == '}') break;
+
+ /* No '}' or nothing between {} ? Hash the whole key. */
+ if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF;
+
+ /* If we are here there is both a { and a } on its right. Hash
+ * what is in the middle between { and }. */
+ return crc16(key+s+1,e-s-1) & 0x3FFF;
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER node API
+ * -------------------------------------------------------------------------- */
+
+/* Create a new cluster node, with the specified flags.
+ * If "nodename" is NULL this is considered a first handshake and a random
+ * node name is assigned to this node (it will be fixed later when we'll
+ * receive the first pong).
+ *
+ * The node is created and returned to the user, but it is not automatically
+ * added to the nodes hash table. */
+clusterNode *createClusterNode(char *nodename, int flags) {
+ clusterNode *node = zmalloc(sizeof(*node));
+
+ if (nodename)
+ memcpy(node->name, nodename, CLUSTER_NAMELEN);
+ else
+ getRandomHexChars(node->name, CLUSTER_NAMELEN);
+ getRandomHexChars(node->shard_id, CLUSTER_NAMELEN);
+ node->ctime = mstime();
+ node->configEpoch = 0;
+ node->flags = flags;
+ memset(node->slots,0,sizeof(node->slots));
+ node->slot_info_pairs = NULL;
+ node->slot_info_pairs_count = 0;
+ node->numslots = 0;
+ node->numslaves = 0;
+ node->slaves = NULL;
+ node->slaveof = NULL;
+ node->last_in_ping_gossip = 0;
+ node->ping_sent = node->pong_received = 0;
+ node->data_received = 0;
+ node->fail_time = 0;
+ node->link = NULL;
+ node->inbound_link = NULL;
+ memset(node->ip,0,sizeof(node->ip));
+ node->hostname = sdsempty();
+ node->human_nodename = sdsempty();
+ node->tcp_port = 0;
+ node->cport = 0;
+ node->tls_port = 0;
+ node->fail_reports = listCreate();
+ node->voted_time = 0;
+ node->orphaned_time = 0;
+ node->repl_offset_time = 0;
+ node->repl_offset = 0;
+ listSetFreeMethod(node->fail_reports,zfree);
+ return node;
+}
+
+/* This function is called every time we get a failure report from a node.
+ * The side effect is to populate the fail_reports list (or to update
+ * the timestamp of an existing report).
+ *
+ * 'failing' is the node that is in failure state according to the
+ * 'sender' node.
+ *
+ * The function returns 0 if it just updates a timestamp of an existing
+ * failure report from the same sender. 1 is returned if a new failure
+ * report is created. */
+int clusterNodeAddFailureReport(clusterNode *failing, clusterNode *sender) {
+ list *l = failing->fail_reports;
+ listNode *ln;
+ listIter li;
+ clusterNodeFailReport *fr;
+
+ /* If a failure report from the same sender already exists, just update
+ * the timestamp. */
+ listRewind(l,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ fr = ln->value;
+ if (fr->node == sender) {
+ fr->time = mstime();
+ return 0;
+ }
+ }
+
+ /* Otherwise create a new report. */
+ fr = zmalloc(sizeof(*fr));
+ fr->node = sender;
+ fr->time = mstime();
+ listAddNodeTail(l,fr);
+ return 1;
+}
+
+/* Remove failure reports that are too old, where too old means reasonably
+ * older than the global node timeout. Note that anyway for a node to be
+ * flagged as FAIL we need to have a local PFAIL state that is at least
+ * older than the global node timeout, so we don't just trust the number
+ * of failure reports from other nodes. */
+void clusterNodeCleanupFailureReports(clusterNode *node) {
+ list *l = node->fail_reports;
+ listNode *ln;
+ listIter li;
+ clusterNodeFailReport *fr;
+ mstime_t maxtime = server.cluster_node_timeout *
+ CLUSTER_FAIL_REPORT_VALIDITY_MULT;
+ mstime_t now = mstime();
+
+ listRewind(l,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ fr = ln->value;
+ if (now - fr->time > maxtime) listDelNode(l,ln);
+ }
+}
+
+/* Remove the failing report for 'node' if it was previously considered
+ * failing by 'sender'. This function is called when a node informs us via
+ * gossip that a node is OK from its point of view (no FAIL or PFAIL flags).
+ *
+ * Note that this function is called relatively often as it gets called even
+ * when there are no nodes failing, and is O(N), however when the cluster is
+ * fine the failure reports list is empty so the function runs in constant
+ * time.
+ *
+ * The function returns 1 if the failure report was found and removed.
+ * Otherwise 0 is returned. */
+int clusterNodeDelFailureReport(clusterNode *node, clusterNode *sender) {
+ list *l = node->fail_reports;
+ listNode *ln;
+ listIter li;
+ clusterNodeFailReport *fr;
+
+ /* Search for a failure report from this sender. */
+ listRewind(l,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ fr = ln->value;
+ if (fr->node == sender) break;
+ }
+ if (!ln) return 0; /* No failure report from this sender. */
+
+ /* Remove the failure report. */
+ listDelNode(l,ln);
+ clusterNodeCleanupFailureReports(node);
+ return 1;
+}
+
+/* Return the number of external nodes that believe 'node' is failing,
+ * not including this node, that may have a PFAIL or FAIL state for this
+ * node as well. */
+int clusterNodeFailureReportsCount(clusterNode *node) {
+ clusterNodeCleanupFailureReports(node);
+ return listLength(node->fail_reports);
+}
+
+int clusterNodeRemoveSlave(clusterNode *master, clusterNode *slave) {
+ int j;
+
+ for (j = 0; j < master->numslaves; j++) {
+ if (master->slaves[j] == slave) {
+ if ((j+1) < master->numslaves) {
+ int remaining_slaves = (master->numslaves - j) - 1;
+ memmove(master->slaves+j,master->slaves+(j+1),
+ (sizeof(*master->slaves) * remaining_slaves));
+ }
+ master->numslaves--;
+ if (master->numslaves == 0)
+ master->flags &= ~CLUSTER_NODE_MIGRATE_TO;
+ return C_OK;
+ }
+ }
+ return C_ERR;
+}
+
+int clusterNodeAddSlave(clusterNode *master, clusterNode *slave) {
+ int j;
+
+ /* If it's already a slave, don't add it again. */
+ for (j = 0; j < master->numslaves; j++)
+ if (master->slaves[j] == slave) return C_ERR;
+ master->slaves = zrealloc(master->slaves,
+ sizeof(clusterNode*)*(master->numslaves+1));
+ master->slaves[master->numslaves] = slave;
+ master->numslaves++;
+ master->flags |= CLUSTER_NODE_MIGRATE_TO;
+ return C_OK;
+}
+
+int clusterCountNonFailingSlaves(clusterNode *n) {
+ int j, okslaves = 0;
+
+ for (j = 0; j < n->numslaves; j++)
+ if (!nodeFailed(n->slaves[j])) okslaves++;
+ return okslaves;
+}
+
+/* Low level cleanup of the node structure. Only called by clusterDelNode(). */
+void freeClusterNode(clusterNode *n) {
+ sds nodename;
+ int j;
+
+ /* If the node has associated slaves, we have to set
+ * all the slaves->slaveof fields to NULL (unknown). */
+ for (j = 0; j < n->numslaves; j++)
+ n->slaves[j]->slaveof = NULL;
+
+ /* Remove this node from the list of slaves of its master. */
+ if (nodeIsSlave(n) && n->slaveof) clusterNodeRemoveSlave(n->slaveof,n);
+
+ /* Unlink from the set of nodes. */
+ nodename = sdsnewlen(n->name, CLUSTER_NAMELEN);
+ serverAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK);
+ sdsfree(nodename);
+ sdsfree(n->hostname);
+ sdsfree(n->human_nodename);
+
+ /* Release links and associated data structures. */
+ if (n->link) freeClusterLink(n->link);
+ if (n->inbound_link) freeClusterLink(n->inbound_link);
+ listRelease(n->fail_reports);
+ zfree(n->slaves);
+ zfree(n);
+}
+
+/* Add a node to the nodes hash table */
+void clusterAddNode(clusterNode *node) {
+ int retval;
+
+ retval = dictAdd(server.cluster->nodes,
+ sdsnewlen(node->name,CLUSTER_NAMELEN), node);
+ serverAssert(retval == DICT_OK);
+}
+
+/* Remove a node from the cluster. The function performs the high level
+ * cleanup, calling freeClusterNode() for the low level cleanup.
+ * Here we do the following:
+ *
+ * 1) Mark all the slots handled by it as unassigned.
+ * 2) Remove all the failure reports sent by this node and referenced by
+ * other nodes.
+ * 3) Remove the node from the owning shard
+ * 4) Free the node with freeClusterNode() that will in turn remove it
+ * from the hash table and from the list of slaves of its master, if
+ * it is a slave node.
+ */
+void clusterDelNode(clusterNode *delnode) {
+ int j;
+ dictIterator *di;
+ dictEntry *de;
+
+ /* 1) Mark slots as unassigned. */
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (server.cluster->importing_slots_from[j] == delnode)
+ server.cluster->importing_slots_from[j] = NULL;
+ if (server.cluster->migrating_slots_to[j] == delnode)
+ server.cluster->migrating_slots_to[j] = NULL;
+ if (server.cluster->slots[j] == delnode)
+ clusterDelSlot(j);
+ }
+
+ /* 2) Remove failure reports. */
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+
+ if (node == delnode) continue;
+ clusterNodeDelFailureReport(node,delnode);
+ }
+ dictReleaseIterator(di);
+
+ /* 3) Remove the node from the owning shard */
+ clusterRemoveNodeFromShard(delnode);
+
+ /* 4) Free the node, unlinking it from the cluster. */
+ freeClusterNode(delnode);
+}
+
+/* Cluster node sanity check. Returns C_OK if the node id
+ * is valid an C_ERR otherwise. */
+int verifyClusterNodeId(const char *name, int length) {
+ if (length != CLUSTER_NAMELEN) return C_ERR;
+ for (int i = 0; i < length; i++) {
+ if (name[i] >= 'a' && name[i] <= 'z') continue;
+ if (name[i] >= '0' && name[i] <= '9') continue;
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+/* Node lookup by name */
+clusterNode *clusterLookupNode(const char *name, int length) {
+ if (verifyClusterNodeId(name, length) != C_OK) return NULL;
+ sds s = sdsnewlen(name, length);
+ dictEntry *de = dictFind(server.cluster->nodes, s);
+ sdsfree(s);
+ if (de == NULL) return NULL;
+ return dictGetVal(de);
+}
+
+/* Get all the nodes in my shard.
+ * Note that the list returned is not computed on the fly
+ * via slaveof; rather, it is maintained permanently to
+ * track the shard membership and its life cycle is tied
+ * to this Redis process. Therefore, the caller must not
+ * release the list. */
+list *clusterGetNodesInMyShard(clusterNode *node) {
+ sds s = sdsnewlen(node->shard_id, CLUSTER_NAMELEN);
+ dictEntry *de = dictFind(server.cluster->shards,s);
+ sdsfree(s);
+ return (de != NULL) ? dictGetVal(de) : NULL;
+}
+
+/* This is only used after the handshake. When we connect a given IP/PORT
+ * as a result of CLUSTER MEET we don't have the node name yet, so we
+ * pick a random one, and will fix it when we receive the PONG request using
+ * this function. */
+void clusterRenameNode(clusterNode *node, char *newname) {
+ int retval;
+ sds s = sdsnewlen(node->name, CLUSTER_NAMELEN);
+
+ serverLog(LL_DEBUG,"Renaming node %.40s into %.40s",
+ node->name, newname);
+ retval = dictDelete(server.cluster->nodes, s);
+ sdsfree(s);
+ serverAssert(retval == DICT_OK);
+ memcpy(node->name, newname, CLUSTER_NAMELEN);
+ clusterAddNode(node);
+ clusterAddNodeToShard(node->shard_id, node);
+}
+
+void clusterAddNodeToShard(const char *shard_id, clusterNode *node) {
+ sds s = sdsnewlen(shard_id, CLUSTER_NAMELEN);
+ dictEntry *de = dictFind(server.cluster->shards,s);
+ if (de == NULL) {
+ list *l = listCreate();
+ listAddNodeTail(l, node);
+ serverAssert(dictAdd(server.cluster->shards, s, l) == DICT_OK);
+ } else {
+ list *l = dictGetVal(de);
+ if (listSearchKey(l, node) == NULL) {
+ listAddNodeTail(l, node);
+ }
+ sdsfree(s);
+ }
+}
+
+void clusterRemoveNodeFromShard(clusterNode *node) {
+ sds s = sdsnewlen(node->shard_id, CLUSTER_NAMELEN);
+ dictEntry *de = dictFind(server.cluster->shards, s);
+ if (de != NULL) {
+ list *l = dictGetVal(de);
+ listNode *ln = listSearchKey(l, node);
+ if (ln != NULL) {
+ listDelNode(l, ln);
+ }
+ if (listLength(l) == 0) {
+ dictDelete(server.cluster->shards, s);
+ }
+ }
+ sdsfree(s);
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER config epoch handling
+ * -------------------------------------------------------------------------- */
+
+/* Return the greatest configEpoch found in the cluster, or the current
+ * epoch if greater than any node configEpoch. */
+uint64_t clusterGetMaxEpoch(void) {
+ uint64_t max = 0;
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+ if (node->configEpoch > max) max = node->configEpoch;
+ }
+ dictReleaseIterator(di);
+ if (max < server.cluster->currentEpoch) max = server.cluster->currentEpoch;
+ return max;
+}
+
+/* If this node epoch is zero or is not already the greatest across the
+ * cluster (from the POV of the local configuration), this function will:
+ *
+ * 1) Generate a new config epoch, incrementing the current epoch.
+ * 2) Assign the new epoch to this node, WITHOUT any consensus.
+ * 3) Persist the configuration on disk before sending packets with the
+ * new configuration.
+ *
+ * If the new config epoch is generated and assigned, C_OK is returned,
+ * otherwise C_ERR is returned (since the node has already the greatest
+ * configuration around) and no operation is performed.
+ *
+ * Important note: this function violates the principle that config epochs
+ * should be generated with consensus and should be unique across the cluster.
+ * However Redis Cluster uses this auto-generated new config epochs in two
+ * cases:
+ *
+ * 1) When slots are closed after importing. Otherwise resharding would be
+ * too expensive.
+ * 2) When CLUSTER FAILOVER is called with options that force a slave to
+ * failover its master even if there is not master majority able to
+ * create a new configuration epoch.
+ *
+ * Redis Cluster will not explode using this function, even in the case of
+ * a collision between this node and another node, generating the same
+ * configuration epoch unilaterally, because the config epoch conflict
+ * resolution algorithm will eventually move colliding nodes to different
+ * config epochs. However using this function may violate the "last failover
+ * wins" rule, so should only be used with care. */
+int clusterBumpConfigEpochWithoutConsensus(void) {
+ uint64_t maxEpoch = clusterGetMaxEpoch();
+
+ if (myself->configEpoch == 0 ||
+ myself->configEpoch != maxEpoch)
+ {
+ server.cluster->currentEpoch++;
+ myself->configEpoch = server.cluster->currentEpoch;
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_FSYNC_CONFIG);
+ serverLog(LL_NOTICE,
+ "New configEpoch set to %llu",
+ (unsigned long long) myself->configEpoch);
+ return C_OK;
+ } else {
+ return C_ERR;
+ }
+}
+
+/* This function is called when this node is a master, and we receive from
+ * another master a configuration epoch that is equal to our configuration
+ * epoch.
+ *
+ * BACKGROUND
+ *
+ * It is not possible that different slaves get the same config
+ * epoch during a failover election, because the slaves need to get voted
+ * by a majority. However when we perform a manual resharding of the cluster
+ * the node will assign a configuration epoch to itself without to ask
+ * for agreement. Usually resharding happens when the cluster is working well
+ * and is supervised by the sysadmin, however it is possible for a failover
+ * to happen exactly while the node we are resharding a slot to assigns itself
+ * a new configuration epoch, but before it is able to propagate it.
+ *
+ * So technically it is possible in this condition that two nodes end with
+ * the same configuration epoch.
+ *
+ * Another possibility is that there are bugs in the implementation causing
+ * this to happen.
+ *
+ * Moreover when a new cluster is created, all the nodes start with the same
+ * configEpoch. This collision resolution code allows nodes to automatically
+ * end with a different configEpoch at startup automatically.
+ *
+ * In all the cases, we want a mechanism that resolves this issue automatically
+ * as a safeguard. The same configuration epoch for masters serving different
+ * set of slots is not harmful, but it is if the nodes end serving the same
+ * slots for some reason (manual errors or software bugs) without a proper
+ * failover procedure.
+ *
+ * In general we want a system that eventually always ends with different
+ * masters having different configuration epochs whatever happened, since
+ * nothing is worse than a split-brain condition in a distributed system.
+ *
+ * BEHAVIOR
+ *
+ * When this function gets called, what happens is that if this node
+ * has the lexicographically smaller Node ID compared to the other node
+ * with the conflicting epoch (the 'sender' node), it will assign itself
+ * the greatest configuration epoch currently detected among nodes plus 1.
+ *
+ * This means that even if there are multiple nodes colliding, the node
+ * with the greatest Node ID never moves forward, so eventually all the nodes
+ * end with a different configuration epoch.
+ */
+void clusterHandleConfigEpochCollision(clusterNode *sender) {
+ /* Prerequisites: nodes have the same configEpoch and are both masters. */
+ if (sender->configEpoch != myself->configEpoch ||
+ !nodeIsMaster(sender) || !nodeIsMaster(myself)) return;
+ /* Don't act if the colliding node has a smaller Node ID. */
+ if (memcmp(sender->name,myself->name,CLUSTER_NAMELEN) <= 0) return;
+ /* Get the next ID available at the best of this node knowledge. */
+ server.cluster->currentEpoch++;
+ myself->configEpoch = server.cluster->currentEpoch;
+ clusterSaveConfigOrDie(1);
+ serverLog(LL_VERBOSE,
+ "WARNING: configEpoch collision with node %.40s (%s)."
+ " configEpoch set to %llu",
+ sender->name,sender->human_nodename,
+ (unsigned long long) myself->configEpoch);
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER nodes blacklist
+ *
+ * The nodes blacklist is just a way to ensure that a given node with a given
+ * Node ID is not re-added before some time elapsed (this time is specified
+ * in seconds in CLUSTER_BLACKLIST_TTL).
+ *
+ * This is useful when we want to remove a node from the cluster completely:
+ * when CLUSTER FORGET is called, it also puts the node into the blacklist so
+ * that even if we receive gossip messages from other nodes that still remember
+ * about the node we want to remove, we don't re-add it before some time.
+ *
+ * Currently the CLUSTER_BLACKLIST_TTL is set to 1 minute, this means
+ * that redis-cli has 60 seconds to send CLUSTER FORGET messages to nodes
+ * in the cluster without dealing with the problem of other nodes re-adding
+ * back the node to nodes we already sent the FORGET command to.
+ *
+ * The data structure used is a hash table with an sds string representing
+ * the node ID as key, and the time when it is ok to re-add the node as
+ * value.
+ * -------------------------------------------------------------------------- */
+
+#define CLUSTER_BLACKLIST_TTL 60 /* 1 minute. */
+
+
+/* Before of the addNode() or Exists() operations we always remove expired
+ * entries from the black list. This is an O(N) operation but it is not a
+ * problem since add / exists operations are called very infrequently and
+ * the hash table is supposed to contain very little elements at max.
+ * However without the cleanup during long uptime and with some automated
+ * node add/removal procedures, entries could accumulate. */
+void clusterBlacklistCleanup(void) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes_black_list);
+ while((de = dictNext(di)) != NULL) {
+ int64_t expire = dictGetUnsignedIntegerVal(de);
+
+ if (expire < server.unixtime)
+ dictDelete(server.cluster->nodes_black_list,dictGetKey(de));
+ }
+ dictReleaseIterator(di);
+}
+
+/* Cleanup the blacklist and add a new node ID to the black list. */
+void clusterBlacklistAddNode(clusterNode *node) {
+ dictEntry *de;
+ sds id = sdsnewlen(node->name,CLUSTER_NAMELEN);
+
+ clusterBlacklistCleanup();
+ if (dictAdd(server.cluster->nodes_black_list,id,NULL) == DICT_OK) {
+ /* If the key was added, duplicate the sds string representation of
+ * the key for the next lookup. We'll free it at the end. */
+ id = sdsdup(id);
+ }
+ de = dictFind(server.cluster->nodes_black_list,id);
+ dictSetUnsignedIntegerVal(de,time(NULL)+CLUSTER_BLACKLIST_TTL);
+ sdsfree(id);
+}
+
+/* Return non-zero if the specified node ID exists in the blacklist.
+ * You don't need to pass an sds string here, any pointer to 40 bytes
+ * will work. */
+int clusterBlacklistExists(char *nodeid) {
+ sds id = sdsnewlen(nodeid,CLUSTER_NAMELEN);
+ int retval;
+
+ clusterBlacklistCleanup();
+ retval = dictFind(server.cluster->nodes_black_list,id) != NULL;
+ sdsfree(id);
+ return retval;
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER messages exchange - PING/PONG and gossip
+ * -------------------------------------------------------------------------- */
+
+/* This function checks if a given node should be marked as FAIL.
+ * It happens if the following conditions are met:
+ *
+ * 1) We received enough failure reports from other master nodes via gossip.
+ * Enough means that the majority of the masters signaled the node is
+ * down recently.
+ * 2) We believe this node is in PFAIL state.
+ *
+ * If a failure is detected we also inform the whole cluster about this
+ * event trying to force every other node to set the FAIL flag for the node.
+ *
+ * Note that the form of agreement used here is weak, as we collect the majority
+ * of masters state during some time, and even if we force agreement by
+ * propagating the FAIL message, because of partitions we may not reach every
+ * node. However:
+ *
+ * 1) Either we reach the majority and eventually the FAIL state will propagate
+ * to all the cluster.
+ * 2) Or there is no majority so no slave promotion will be authorized and the
+ * FAIL flag will be cleared after some time.
+ */
+void markNodeAsFailingIfNeeded(clusterNode *node) {
+ int failures;
+ int needed_quorum = (server.cluster->size / 2) + 1;
+
+ if (!nodeTimedOut(node)) return; /* We can reach it. */
+ if (nodeFailed(node)) return; /* Already FAILing. */
+
+ failures = clusterNodeFailureReportsCount(node);
+ /* Also count myself as a voter if I'm a master. */
+ if (nodeIsMaster(myself)) failures++;
+ if (failures < needed_quorum) return; /* No weak agreement from masters. */
+
+ serverLog(LL_NOTICE,
+ "Marking node %.40s (%s) as failing (quorum reached).", node->name, node->human_nodename);
+
+ /* Mark the node as failing. */
+ node->flags &= ~CLUSTER_NODE_PFAIL;
+ node->flags |= CLUSTER_NODE_FAIL;
+ node->fail_time = mstime();
+
+ /* Broadcast the failing node name to everybody, forcing all the other
+ * reachable nodes to flag the node as FAIL.
+ * We do that even if this node is a replica and not a master: anyway
+ * the failing state is triggered collecting failure reports from masters,
+ * so here the replica is only helping propagating this status. */
+ clusterSendFail(node->name);
+ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+}
+
+/* This function is called only if a node is marked as FAIL, but we are able
+ * to reach it again. It checks if there are the conditions to undo the FAIL
+ * state. */
+void clearNodeFailureIfNeeded(clusterNode *node) {
+ mstime_t now = mstime();
+
+ serverAssert(nodeFailed(node));
+
+ /* For slaves we always clear the FAIL flag if we can contact the
+ * node again. */
+ if (nodeIsSlave(node) || node->numslots == 0) {
+ serverLog(LL_NOTICE,
+ "Clear FAIL state for node %.40s (%s):%s is reachable again.",
+ node->name,node->human_nodename,
+ nodeIsSlave(node) ? "replica" : "master without slots");
+ node->flags &= ~CLUSTER_NODE_FAIL;
+ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+ }
+
+ /* If it is a master and...
+ * 1) The FAIL state is old enough.
+ * 2) It is yet serving slots from our point of view (not failed over).
+ * Apparently no one is going to fix these slots, clear the FAIL flag. */
+ if (nodeIsMaster(node) && node->numslots > 0 &&
+ (now - node->fail_time) >
+ (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT))
+ {
+ serverLog(LL_NOTICE,
+ "Clear FAIL state for node %.40s (%s): is reachable again and nobody is serving its slots after some time.",
+ node->name, node->human_nodename);
+ node->flags &= ~CLUSTER_NODE_FAIL;
+ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+ }
+}
+
+/* Return true if we already have a node in HANDSHAKE state matching the
+ * specified ip address and port number. This function is used in order to
+ * avoid adding a new handshake node for the same address multiple times. */
+int clusterHandshakeInProgress(char *ip, int port, int cport) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+
+ if (!nodeInHandshake(node)) continue;
+ if (!strcasecmp(node->ip,ip) &&
+ getNodeDefaultClientPort(node) == port &&
+ node->cport == cport) break;
+ }
+ dictReleaseIterator(di);
+ return de != NULL;
+}
+
+/* Start a handshake with the specified address if there is not one
+ * already in progress. Returns non-zero if the handshake was actually
+ * started. On error zero is returned and errno is set to one of the
+ * following values:
+ *
+ * EAGAIN - There is already a handshake in progress for this address.
+ * EINVAL - IP or port are not valid. */
+int clusterStartHandshake(char *ip, int port, int cport) {
+ clusterNode *n;
+ char norm_ip[NET_IP_STR_LEN];
+ struct sockaddr_storage sa;
+
+ /* IP sanity check */
+ if (inet_pton(AF_INET,ip,
+ &(((struct sockaddr_in *)&sa)->sin_addr)))
+ {
+ sa.ss_family = AF_INET;
+ } else if (inet_pton(AF_INET6,ip,
+ &(((struct sockaddr_in6 *)&sa)->sin6_addr)))
+ {
+ sa.ss_family = AF_INET6;
+ } else {
+ errno = EINVAL;
+ return 0;
+ }
+
+ /* Port sanity check */
+ if (port <= 0 || port > 65535 || cport <= 0 || cport > 65535) {
+ errno = EINVAL;
+ return 0;
+ }
+
+ /* Set norm_ip as the normalized string representation of the node
+ * IP address. */
+ memset(norm_ip,0,NET_IP_STR_LEN);
+ if (sa.ss_family == AF_INET)
+ inet_ntop(AF_INET,
+ (void*)&(((struct sockaddr_in *)&sa)->sin_addr),
+ norm_ip,NET_IP_STR_LEN);
+ else
+ inet_ntop(AF_INET6,
+ (void*)&(((struct sockaddr_in6 *)&sa)->sin6_addr),
+ norm_ip,NET_IP_STR_LEN);
+
+ if (clusterHandshakeInProgress(norm_ip,port,cport)) {
+ errno = EAGAIN;
+ return 0;
+ }
+
+ /* Add the node with a random address (NULL as first argument to
+ * createClusterNode()). Everything will be fixed during the
+ * handshake. */
+ n = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_MEET);
+ memcpy(n->ip,norm_ip,sizeof(n->ip));
+ if (server.tls_cluster) {
+ n->tls_port = port;
+ } else {
+ n->tcp_port = port;
+ }
+ n->cport = cport;
+ clusterAddNode(n);
+ return 1;
+}
+
+static void getClientPortFromClusterMsg(clusterMsg *hdr, int *tls_port, int *tcp_port) {
+ if (server.tls_cluster) {
+ *tls_port = ntohs(hdr->port);
+ *tcp_port = ntohs(hdr->pport);
+ } else {
+ *tls_port = ntohs(hdr->pport);
+ *tcp_port = ntohs(hdr->port);
+ }
+}
+
+static void getClientPortFromGossip(clusterMsgDataGossip *g, int *tls_port, int *tcp_port) {
+ if (server.tls_cluster) {
+ *tls_port = ntohs(g->port);
+ *tcp_port = ntohs(g->pport);
+ } else {
+ *tls_port = ntohs(g->pport);
+ *tcp_port = ntohs(g->port);
+ }
+}
+
+/* Process the gossip section of PING or PONG packets.
+ * Note that this function assumes that the packet is already sanity-checked
+ * by the caller, not in the content of the gossip section, but in the
+ * length. */
+void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
+ uint16_t count = ntohs(hdr->count);
+ clusterMsgDataGossip *g = (clusterMsgDataGossip*) hdr->data.ping.gossip;
+ clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
+
+ while(count--) {
+ uint16_t flags = ntohs(g->flags);
+ clusterNode *node;
+ sds ci;
+
+ if (server.verbosity == LL_DEBUG) {
+ ci = representClusterNodeFlags(sdsempty(), flags);
+ serverLog(LL_DEBUG,"GOSSIP %.40s %s:%d@%d %s",
+ g->nodename,
+ g->ip,
+ ntohs(g->port),
+ ntohs(g->cport),
+ ci);
+ sdsfree(ci);
+ }
+
+ /* Convert port and pport into TCP port and TLS port. */
+ int msg_tls_port, msg_tcp_port;
+ getClientPortFromGossip(g, &msg_tls_port, &msg_tcp_port);
+
+ /* Update our state accordingly to the gossip sections */
+ node = clusterLookupNode(g->nodename, CLUSTER_NAMELEN);
+ if (node) {
+ /* We already know this node.
+ Handle failure reports, only when the sender is a master. */
+ if (sender && nodeIsMaster(sender) && node != myself) {
+ if (flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) {
+ if (clusterNodeAddFailureReport(node,sender)) {
+ serverLog(LL_VERBOSE,
+ "Node %.40s (%s) reported node %.40s (%s) as not reachable.",
+ sender->name, sender->human_nodename, node->name, node->human_nodename);
+ }
+ markNodeAsFailingIfNeeded(node);
+ } else {
+ if (clusterNodeDelFailureReport(node,sender)) {
+ serverLog(LL_VERBOSE,
+ "Node %.40s (%s) reported node %.40s (%s) is back online.",
+ sender->name, sender->human_nodename, node->name, node->human_nodename);
+ }
+ }
+ }
+
+ /* If from our POV the node is up (no failure flags are set),
+ * we have no pending ping for the node, nor we have failure
+ * reports for this node, update the last pong time with the
+ * one we see from the other nodes. */
+ if (!(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) &&
+ node->ping_sent == 0 &&
+ clusterNodeFailureReportsCount(node) == 0)
+ {
+ mstime_t pongtime = ntohl(g->pong_received);
+ pongtime *= 1000; /* Convert back to milliseconds. */
+
+ /* Replace the pong time with the received one only if
+ * it's greater than our view but is not in the future
+ * (with 500 milliseconds tolerance) from the POV of our
+ * clock. */
+ if (pongtime <= (server.mstime+500) &&
+ pongtime > node->pong_received)
+ {
+ node->pong_received = pongtime;
+ }
+ }
+
+ /* If we already know this node, but it is not reachable, and
+ * we see a different address in the gossip section of a node that
+ * can talk with this other node, update the address, disconnect
+ * the old link if any, so that we'll attempt to connect with the
+ * new address. */
+ if (node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL) &&
+ !(flags & CLUSTER_NODE_NOADDR) &&
+ !(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) &&
+ (strcasecmp(node->ip,g->ip) ||
+ node->tls_port != (server.tls_cluster ? ntohs(g->port) : ntohs(g->pport)) ||
+ node->tcp_port != (server.tls_cluster ? ntohs(g->pport) : ntohs(g->port)) ||
+ node->cport != ntohs(g->cport)))
+ {
+ if (node->link) freeClusterLink(node->link);
+ memcpy(node->ip,g->ip,NET_IP_STR_LEN);
+ node->tcp_port = msg_tcp_port;
+ node->tls_port = msg_tls_port;
+ node->cport = ntohs(g->cport);
+ node->flags &= ~CLUSTER_NODE_NOADDR;
+ }
+ } else {
+ /* If it's not in NOADDR state and we don't have it, we
+ * add it to our trusted dict with exact nodeid and flag.
+ * Note that we cannot simply start a handshake against
+ * this IP/PORT pairs, since IP/PORT can be reused already,
+ * otherwise we risk joining another cluster.
+ *
+ * Note that we require that the sender of this gossip message
+ * is a well known node in our cluster, otherwise we risk
+ * joining another cluster. */
+ if (sender &&
+ !(flags & CLUSTER_NODE_NOADDR) &&
+ !clusterBlacklistExists(g->nodename))
+ {
+ clusterNode *node;
+ node = createClusterNode(g->nodename, flags);
+ memcpy(node->ip,g->ip,NET_IP_STR_LEN);
+ node->tcp_port = msg_tcp_port;
+ node->tls_port = msg_tls_port;
+ node->cport = ntohs(g->cport);
+ clusterAddNode(node);
+ clusterAddNodeToShard(node->shard_id, node);
+ }
+ }
+
+ /* Next node */
+ g++;
+ }
+}
+
+/* IP -> string conversion. 'buf' is supposed to at least be 46 bytes.
+ * If 'announced_ip' length is non-zero, it is used instead of extracting
+ * the IP from the socket peer address. */
+int nodeIp2String(char *buf, clusterLink *link, char *announced_ip) {
+ if (announced_ip[0] != '\0') {
+ memcpy(buf,announced_ip,NET_IP_STR_LEN);
+ buf[NET_IP_STR_LEN-1] = '\0'; /* We are not sure the input is sane. */
+ return C_OK;
+ } else {
+ if (connAddrPeerName(link->conn, buf, NET_IP_STR_LEN, NULL) == -1) {
+ serverLog(LL_NOTICE, "Error converting peer IP to string: %s",
+ link->conn ? connGetLastError(link->conn) : "no link");
+ return C_ERR;
+ }
+ return C_OK;
+ }
+}
+
+/* Update the node address to the IP address that can be extracted
+ * from link->fd, or if hdr->myip is non empty, to the address the node
+ * is announcing us. The port is taken from the packet header as well.
+ *
+ * If the address or port changed, disconnect the node link so that we'll
+ * connect again to the new address.
+ *
+ * If the ip/port pair are already correct no operation is performed at
+ * all.
+ *
+ * The function returns 0 if the node address is still the same,
+ * otherwise 1 is returned. */
+int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link,
+ clusterMsg *hdr)
+{
+ char ip[NET_IP_STR_LEN] = {0};
+ int cport = ntohs(hdr->cport);
+ int tcp_port, tls_port;
+ getClientPortFromClusterMsg(hdr, &tls_port, &tcp_port);
+
+ /* We don't proceed if the link is the same as the sender link, as this
+ * function is designed to see if the node link is consistent with the
+ * symmetric link that is used to receive PINGs from the node.
+ *
+ * As a side effect this function never frees the passed 'link', so
+ * it is safe to call during packet processing. */
+ if (link == node->link) return 0;
+
+ /* If the peer IP is unavailable for some reasons like invalid fd or closed
+ * link, just give up the update this time, and the update will be retried
+ * in the next round of PINGs */
+ if (nodeIp2String(ip,link,hdr->myip) == C_ERR) return 0;
+
+ if (node->tcp_port == tcp_port && node->cport == cport && node->tls_port == tls_port &&
+ strcmp(ip,node->ip) == 0) return 0;
+
+ /* IP / port is different, update it. */
+ memcpy(node->ip,ip,sizeof(ip));
+ node->tcp_port = tcp_port;
+ node->tls_port = tls_port;
+ node->cport = cport;
+ if (node->link) freeClusterLink(node->link);
+ node->flags &= ~CLUSTER_NODE_NOADDR;
+ serverLog(LL_NOTICE,"Address updated for node %.40s (%s), now %s:%d",
+ node->name, node->human_nodename, node->ip, getNodeDefaultClientPort(node));
+
+ /* Check if this is our master and we have to change the
+ * replication target as well. */
+ if (nodeIsSlave(myself) && myself->slaveof == node)
+ replicationSetMaster(node->ip, getNodeDefaultReplicationPort(node));
+ return 1;
+}
+
+/* Reconfigure the specified node 'n' as a master. This function is called when
+ * a node that we believed to be a slave is now acting as master in order to
+ * update the state of the node. */
+void clusterSetNodeAsMaster(clusterNode *n) {
+ if (nodeIsMaster(n)) return;
+
+ if (n->slaveof) {
+ clusterNodeRemoveSlave(n->slaveof,n);
+ if (n != myself) n->flags |= CLUSTER_NODE_MIGRATE_TO;
+ }
+ n->flags &= ~CLUSTER_NODE_SLAVE;
+ n->flags |= CLUSTER_NODE_MASTER;
+ n->slaveof = NULL;
+
+ /* Update config and state. */
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE);
+}
+
+/* This function is called when we receive a master configuration via a
+ * PING, PONG or UPDATE packet. What we receive is a node, a configEpoch of the
+ * node, and the set of slots claimed under this configEpoch.
+ *
+ * What we do is to rebind the slots with newer configuration compared to our
+ * local configuration, and if needed, we turn ourself into a replica of the
+ * node (see the function comments for more info).
+ *
+ * The 'sender' is the node for which we received a configuration update.
+ * Sometimes it is not actually the "Sender" of the information, like in the
+ * case we receive the info via an UPDATE packet. */
+void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) {
+ int j;
+ clusterNode *curmaster = NULL, *newmaster = NULL;
+ /* The dirty slots list is a list of slots for which we lose the ownership
+ * while having still keys inside. This usually happens after a failover
+ * or after a manual cluster reconfiguration operated by the admin.
+ *
+ * If the update message is not able to demote a master to slave (in this
+ * case we'll resync with the master updating the whole key space), we
+ * need to delete all the keys in the slots we lost ownership. */
+ uint16_t dirty_slots[CLUSTER_SLOTS];
+ int dirty_slots_count = 0;
+
+ /* We should detect if sender is new master of our shard.
+ * We will know it if all our slots were migrated to sender, and sender
+ * has no slots except ours */
+ int sender_slots = 0;
+ int migrated_our_slots = 0;
+
+ /* Here we set curmaster to this node or the node this node
+ * replicates to if it's a slave. In the for loop we are
+ * interested to check if slots are taken away from curmaster. */
+ curmaster = nodeIsMaster(myself) ? myself : myself->slaveof;
+
+ if (sender == myself) {
+ serverLog(LL_NOTICE,"Discarding UPDATE message about myself.");
+ return;
+ }
+
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (bitmapTestBit(slots,j)) {
+ sender_slots++;
+
+ /* The slot is already bound to the sender of this message. */
+ if (server.cluster->slots[j] == sender) {
+ bitmapClearBit(server.cluster->owner_not_claiming_slot, j);
+ continue;
+ }
+
+ /* The slot is in importing state, it should be modified only
+ * manually via redis-cli (example: a resharding is in progress
+ * and the migrating side slot was already closed and is advertising
+ * a new config. We still want the slot to be closed manually). */
+ if (server.cluster->importing_slots_from[j]) continue;
+
+ /* We rebind the slot to the new node claiming it if:
+ * 1) The slot was unassigned or the previous owner no longer owns the slot or
+ * the new node claims it with a greater configEpoch.
+ * 2) We are not currently importing the slot. */
+ if (isSlotUnclaimed(j) ||
+ server.cluster->slots[j]->configEpoch < senderConfigEpoch)
+ {
+ /* Was this slot mine, and still contains keys? Mark it as
+ * a dirty slot. */
+ if (server.cluster->slots[j] == myself &&
+ countKeysInSlot(j) &&
+ sender != myself)
+ {
+ dirty_slots[dirty_slots_count] = j;
+ dirty_slots_count++;
+ }
+
+ if (server.cluster->slots[j] == curmaster) {
+ newmaster = sender;
+ migrated_our_slots++;
+ }
+ clusterDelSlot(j);
+ clusterAddSlot(sender,j);
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE|
+ CLUSTER_TODO_FSYNC_CONFIG);
+ }
+ } else if (server.cluster->slots[j] == sender) {
+ /* The slot is currently bound to the sender but the sender is no longer
+ * claiming it. We don't want to unbind the slot yet as it can cause the cluster
+ * to move to FAIL state and also throw client error. Keeping the slot bound to
+ * the previous owner will cause a few client side redirects, but won't throw
+ * any errors. We will keep track of the uncertainty in ownership to avoid
+ * propagating misinformation about this slot's ownership using UPDATE
+ * messages. */
+ bitmapSetBit(server.cluster->owner_not_claiming_slot, j);
+ }
+ }
+
+ /* After updating the slots configuration, don't do any actual change
+ * in the state of the server if a module disabled Redis Cluster
+ * keys redirections. */
+ if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION)
+ return;
+
+ /* If at least one slot was reassigned from a node to another node
+ * with a greater configEpoch, it is possible that:
+ * 1) We are a master left without slots. This means that we were
+ * failed over and we should turn into a replica of the new
+ * master.
+ * 2) We are a slave and our master is left without slots. We need
+ * to replicate to the new slots owner. */
+ if (newmaster && curmaster->numslots == 0 &&
+ (server.cluster_allow_replica_migration ||
+ sender_slots == migrated_our_slots)) {
+ serverLog(LL_NOTICE,
+ "Configuration change detected. Reconfiguring myself "
+ "as a replica of %.40s (%s)", sender->name, sender->human_nodename);
+ clusterSetMaster(sender);
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE|
+ CLUSTER_TODO_FSYNC_CONFIG);
+ } else if (myself->slaveof && myself->slaveof->slaveof &&
+ /* In some rare case when CLUSTER FAILOVER TAKEOVER is used, it
+ * can happen that myself is a replica of a replica of myself. If
+ * this happens, we do nothing to avoid a crash and wait for the
+ * admin to repair the cluster. */
+ myself->slaveof->slaveof != myself)
+ {
+ /* Safeguard against sub-replicas. A replica's master can turn itself
+ * into a replica if its last slot is removed. If no other node takes
+ * over the slot, there is nothing else to trigger replica migration. */
+ serverLog(LL_NOTICE,
+ "I'm a sub-replica! Reconfiguring myself as a replica of grandmaster %.40s (%s)",
+ myself->slaveof->slaveof->name, myself->slaveof->slaveof->human_nodename);
+ clusterSetMaster(myself->slaveof->slaveof);
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE|
+ CLUSTER_TODO_FSYNC_CONFIG);
+ } else if (dirty_slots_count) {
+ /* If we are here, we received an update message which removed
+ * ownership for certain slots we still have keys about, but still
+ * we are serving some slots, so this master node was not demoted to
+ * a slave.
+ *
+ * In order to maintain a consistent state between keys and slots
+ * we need to remove all the keys from the slots we lost. */
+ for (j = 0; j < dirty_slots_count; j++)
+ delKeysInSlot(dirty_slots[j]);
+ }
+}
+
+/* Cluster ping extensions.
+ *
+ * The ping/pong/meet messages support arbitrary extensions to add additional
+ * metadata to the messages that are sent between the various nodes in the
+ * cluster. The extensions take the form:
+ * [ Header length + type (8 bytes) ]
+ * [ Extension information (Arbitrary length, but must be 8 byte padded) ]
+ */
+
+
+/* Returns the length of a given extension */
+static uint32_t getPingExtLength(clusterMsgPingExt *ext) {
+ return ntohl(ext->length);
+}
+
+/* Returns the initial position of ping extensions. May return an invalid
+ * address if there are no ping extensions. */
+static clusterMsgPingExt *getInitialPingExt(clusterMsg *hdr, int count) {
+ clusterMsgPingExt *initial = (clusterMsgPingExt*) &(hdr->data.ping.gossip[count]);
+ return initial;
+}
+
+/* Given a current ping extension, returns the start of the next extension. May return
+ * an invalid address if there are no further ping extensions. */
+static clusterMsgPingExt *getNextPingExt(clusterMsgPingExt *ext) {
+ clusterMsgPingExt *next = (clusterMsgPingExt *) (((char *) ext) + getPingExtLength(ext));
+ return next;
+}
+
+/* All PING extensions must be 8-byte aligned */
+uint32_t getAlignedPingExtSize(uint32_t dataSize) {
+
+ return sizeof(clusterMsgPingExt) + EIGHT_BYTE_ALIGN(dataSize);
+}
+
+uint32_t getHostnamePingExtSize(void) {
+ if (sdslen(myself->hostname) == 0) {
+ return 0;
+ }
+ return getAlignedPingExtSize(sdslen(myself->hostname) + 1);
+}
+
+uint32_t getHumanNodenamePingExtSize(void) {
+ if (sdslen(myself->human_nodename) == 0) {
+ return 0;
+ }
+ return getAlignedPingExtSize(sdslen(myself->human_nodename) + 1);
+}
+
+uint32_t getShardIdPingExtSize(void) {
+ return getAlignedPingExtSize(sizeof(clusterMsgPingExtShardId));
+}
+
+uint32_t getForgottenNodeExtSize(void) {
+ return getAlignedPingExtSize(sizeof(clusterMsgPingExtForgottenNode));
+}
+
+void *preparePingExt(clusterMsgPingExt *ext, uint16_t type, uint32_t length) {
+ ext->type = htons(type);
+ ext->length = htonl(length);
+ return &ext->ext[0];
+}
+
+clusterMsgPingExt *nextPingExt(clusterMsgPingExt *ext) {
+ return (clusterMsgPingExt *)((char*)ext + ntohl(ext->length));
+}
+
+/* 1. If a NULL hdr is provided, compute the extension size;
+ * 2. If a non-NULL hdr is provided, write the hostname ping
+ * extension at the start of the cursor. This function
+ * will update the cursor to point to the end of the
+ * written extension and will return the amount of bytes
+ * written. */
+uint32_t writePingExt(clusterMsg *hdr, int gossipcount) {
+ uint16_t extensions = 0;
+ uint32_t totlen = 0;
+ clusterMsgPingExt *cursor = NULL;
+ /* Set the initial extension position */
+ if (hdr != NULL) {
+ cursor = getInitialPingExt(hdr, gossipcount);
+ }
+
+ /* hostname is optional */
+ if (sdslen(myself->hostname) != 0) {
+ if (cursor != NULL) {
+ /* Populate hostname */
+ clusterMsgPingExtHostname *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_HOSTNAME, getHostnamePingExtSize());
+ memcpy(ext->hostname, myself->hostname, sdslen(myself->hostname));
+
+ /* Move the write cursor */
+ cursor = nextPingExt(cursor);
+ }
+
+ totlen += getHostnamePingExtSize();
+ extensions++;
+ }
+
+ if (sdslen(myself->human_nodename) != 0) {
+ if (cursor != NULL) {
+ /* Populate human_nodename */
+ clusterMsgPingExtHumanNodename *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME, getHumanNodenamePingExtSize());
+ memcpy(ext->human_nodename, myself->human_nodename, sdslen(myself->human_nodename));
+
+ /* Move the write cursor */
+ cursor = nextPingExt(cursor);
+ }
+
+ totlen += getHumanNodenamePingExtSize();
+ extensions++;
+ }
+
+ /* Gossip forgotten nodes */
+ if (dictSize(server.cluster->nodes_black_list) > 0) {
+ dictIterator *di = dictGetIterator(server.cluster->nodes_black_list);
+ dictEntry *de;
+ while ((de = dictNext(di)) != NULL) {
+ if (cursor != NULL) {
+ uint64_t expire = dictGetUnsignedIntegerVal(de);
+ if ((time_t)expire < server.unixtime) continue; /* already expired */
+ uint64_t ttl = expire - server.unixtime;
+ clusterMsgPingExtForgottenNode *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE, getForgottenNodeExtSize());
+ memcpy(ext->name, dictGetKey(de), CLUSTER_NAMELEN);
+ ext->ttl = htonu64(ttl);
+
+ /* Move the write cursor */
+ cursor = nextPingExt(cursor);
+ }
+ totlen += getForgottenNodeExtSize();
+ extensions++;
+ }
+ dictReleaseIterator(di);
+ }
+
+ /* Populate shard_id */
+ if (cursor != NULL) {
+ clusterMsgPingExtShardId *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_SHARDID, getShardIdPingExtSize());
+ memcpy(ext->shard_id, myself->shard_id, CLUSTER_NAMELEN);
+
+ /* Move the write cursor */
+ cursor = nextPingExt(cursor);
+ }
+ totlen += getShardIdPingExtSize();
+ extensions++;
+
+ if (hdr != NULL) {
+ if (extensions != 0) {
+ hdr->mflags[0] |= CLUSTERMSG_FLAG0_EXT_DATA;
+ }
+ hdr->extensions = htons(extensions);
+ }
+
+ return totlen;
+}
+
+/* We previously validated the extensions, so this function just needs to
+ * handle the extensions. */
+void clusterProcessPingExtensions(clusterMsg *hdr, clusterLink *link) {
+ clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
+ char *ext_hostname = NULL;
+ char *ext_humannodename = NULL;
+ char *ext_shardid = NULL;
+ uint16_t extensions = ntohs(hdr->extensions);
+ /* Loop through all the extensions and process them */
+ clusterMsgPingExt *ext = getInitialPingExt(hdr, ntohs(hdr->count));
+ while (extensions--) {
+ uint16_t type = ntohs(ext->type);
+ if (type == CLUSTERMSG_EXT_TYPE_HOSTNAME) {
+ clusterMsgPingExtHostname *hostname_ext = (clusterMsgPingExtHostname *) &(ext->ext[0].hostname);
+ ext_hostname = hostname_ext->hostname;
+ } else if (type == CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME) {
+ clusterMsgPingExtHumanNodename *humannodename_ext = (clusterMsgPingExtHumanNodename *) &(ext->ext[0].human_nodename);
+ ext_humannodename = humannodename_ext->human_nodename;
+ } else if (type == CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE) {
+ clusterMsgPingExtForgottenNode *forgotten_node_ext = &(ext->ext[0].forgotten_node);
+ clusterNode *n = clusterLookupNode(forgotten_node_ext->name, CLUSTER_NAMELEN);
+ if (n && n != myself && !(nodeIsSlave(myself) && myself->slaveof == n)) {
+ sds id = sdsnewlen(forgotten_node_ext->name, CLUSTER_NAMELEN);
+ dictEntry *de = dictAddOrFind(server.cluster->nodes_black_list, id);
+ uint64_t expire = server.unixtime + ntohu64(forgotten_node_ext->ttl);
+ dictSetUnsignedIntegerVal(de, expire);
+ clusterDelNode(n);
+ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|
+ CLUSTER_TODO_SAVE_CONFIG);
+ }
+ } else if (type == CLUSTERMSG_EXT_TYPE_SHARDID) {
+ clusterMsgPingExtShardId *shardid_ext = (clusterMsgPingExtShardId *) &(ext->ext[0].shard_id);
+ ext_shardid = shardid_ext->shard_id;
+ } else {
+ /* Unknown type, we will ignore it but log what happened. */
+ serverLog(LL_WARNING, "Received unknown extension type %d", type);
+ }
+
+ /* We know this will be valid since we validated it ahead of time */
+ ext = getNextPingExt(ext);
+ }
+
+ /* If the node did not send us a hostname extension, assume
+ * they don't have an announced hostname. Otherwise, we'll
+ * set it now. */
+ updateAnnouncedHostname(sender, ext_hostname);
+ updateAnnouncedHumanNodename(sender, ext_humannodename);
+
+ /* If the node did not send us a shard-id extension, it means the sender
+ * does not support it (old version), node->shard_id is randomly generated.
+ * A cluster-wide consensus for the node's shard_id is not necessary.
+ * The key is maintaining consistency of the shard_id on each individual 7.2 node.
+ * As the cluster progressively upgrades to version 7.2, we can expect the shard_ids
+ * across all nodes to naturally converge and align.
+ *
+ * If sender is a replica, set the shard_id to the shard_id of its master.
+ * Otherwise, we'll set it now. */
+ if (ext_shardid == NULL) ext_shardid = clusterNodeGetMaster(sender)->shard_id;
+
+ updateShardId(sender, ext_shardid);
+}
+
+static clusterNode *getNodeFromLinkAndMsg(clusterLink *link, clusterMsg *hdr) {
+ clusterNode *sender;
+ if (link->node && !nodeInHandshake(link->node)) {
+ /* If the link has an associated node, use that so that we don't have to look it
+ * up every time, except when the node is still in handshake, the node still has
+ * a random name thus not truly "known". */
+ sender = link->node;
+ } else {
+ /* Otherwise, fetch sender based on the message */
+ sender = clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
+ /* We know the sender node but haven't associate it with the link. This must
+ * be an inbound link because only for inbound links we didn't know which node
+ * to associate when they were created. */
+ if (sender && !link->node) {
+ setClusterNodeToInboundClusterLink(sender, link);
+ }
+ }
+ return sender;
+}
+
+/* When this function is called, there is a packet to process starting
+ * at link->rcvbuf. Releasing the buffer is up to the caller, so this
+ * function should just handle the higher level stuff of processing the
+ * packet, modifying the cluster state if needed.
+ *
+ * The function returns 1 if the link is still valid after the packet
+ * was processed, otherwise 0 if the link was freed since the packet
+ * processing lead to some inconsistency error (for instance a PONG
+ * received from the wrong sender ID). */
+int clusterProcessPacket(clusterLink *link) {
+ clusterMsg *hdr = (clusterMsg*) link->rcvbuf;
+ uint32_t totlen = ntohl(hdr->totlen);
+ uint16_t type = ntohs(hdr->type);
+ mstime_t now = mstime();
+
+ if (type < CLUSTERMSG_TYPE_COUNT)
+ server.cluster->stats_bus_messages_received[type]++;
+ serverLog(LL_DEBUG,"--- Processing packet of type %s, %lu bytes",
+ clusterGetMessageTypeString(type), (unsigned long) totlen);
+
+ /* Perform sanity checks */
+ if (totlen < 16) return 1; /* At least signature, version, totlen, count. */
+ if (totlen > link->rcvbuf_len) return 1;
+
+ if (ntohs(hdr->ver) != CLUSTER_PROTO_VER) {
+ /* Can't handle messages of different versions. */
+ return 1;
+ }
+
+ if (type == server.cluster_drop_packet_filter) {
+ serverLog(LL_WARNING, "Dropping packet that matches debug drop filter");
+ return 1;
+ }
+
+ uint16_t flags = ntohs(hdr->flags);
+ uint16_t extensions = ntohs(hdr->extensions);
+ uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0;
+ uint32_t explen; /* expected length of this packet */
+ clusterNode *sender;
+
+ if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
+ type == CLUSTERMSG_TYPE_MEET)
+ {
+ uint16_t count = ntohs(hdr->count);
+
+ explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ explen += (sizeof(clusterMsgDataGossip)*count);
+
+ /* If there is extension data, which doesn't have a fixed length,
+ * loop through them and validate the length of it now. */
+ if (hdr->mflags[0] & CLUSTERMSG_FLAG0_EXT_DATA) {
+ clusterMsgPingExt *ext = getInitialPingExt(hdr, count);
+ while (extensions--) {
+ uint16_t extlen = getPingExtLength(ext);
+ if (extlen % 8 != 0) {
+ serverLog(LL_WARNING, "Received a %s packet without proper padding (%d bytes)",
+ clusterGetMessageTypeString(type), (int) extlen);
+ return 1;
+ }
+ if ((totlen - explen) < extlen) {
+ serverLog(LL_WARNING, "Received invalid %s packet with extension data that exceeds "
+ "total packet length (%lld)", clusterGetMessageTypeString(type),
+ (unsigned long long) totlen);
+ return 1;
+ }
+ explen += extlen;
+ ext = getNextPingExt(ext);
+ }
+ }
+ } else if (type == CLUSTERMSG_TYPE_FAIL) {
+ explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ explen += sizeof(clusterMsgDataFail);
+ } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) {
+ explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ explen += sizeof(clusterMsgDataPublish) -
+ 8 +
+ ntohl(hdr->data.publish.msg.channel_len) +
+ ntohl(hdr->data.publish.msg.message_len);
+ } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST ||
+ type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK ||
+ type == CLUSTERMSG_TYPE_MFSTART)
+ {
+ explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ } else if (type == CLUSTERMSG_TYPE_UPDATE) {
+ explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ explen += sizeof(clusterMsgDataUpdate);
+ } else if (type == CLUSTERMSG_TYPE_MODULE) {
+ explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ explen += sizeof(clusterMsgModule) -
+ 3 + ntohl(hdr->data.module.msg.len);
+ } else {
+ /* We don't know this type of packet, so we assume it's well formed. */
+ explen = totlen;
+ }
+
+ if (totlen != explen) {
+ serverLog(LL_WARNING, "Received invalid %s packet of length %lld but expected length %lld",
+ clusterGetMessageTypeString(type), (unsigned long long) totlen, (unsigned long long) explen);
+ return 1;
+ }
+
+ sender = getNodeFromLinkAndMsg(link, hdr);
+
+ /* Update the last time we saw any data from this node. We
+ * use this in order to avoid detecting a timeout from a node that
+ * is just sending a lot of data in the cluster bus, for instance
+ * because of Pub/Sub. */
+ if (sender) sender->data_received = now;
+
+ if (sender && !nodeInHandshake(sender)) {
+ /* Update our currentEpoch if we see a newer epoch in the cluster. */
+ senderCurrentEpoch = ntohu64(hdr->currentEpoch);
+ senderConfigEpoch = ntohu64(hdr->configEpoch);
+ if (senderCurrentEpoch > server.cluster->currentEpoch)
+ server.cluster->currentEpoch = senderCurrentEpoch;
+ /* Update the sender configEpoch if it is publishing a newer one. */
+ if (senderConfigEpoch > sender->configEpoch) {
+ sender->configEpoch = senderConfigEpoch;
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_FSYNC_CONFIG);
+ }
+ /* Update the replication offset info for this node. */
+ sender->repl_offset = ntohu64(hdr->offset);
+ sender->repl_offset_time = now;
+ /* If we are a slave performing a manual failover and our master
+ * sent its offset while already paused, populate the MF state. */
+ if (server.cluster->mf_end &&
+ nodeIsSlave(myself) &&
+ myself->slaveof == sender &&
+ hdr->mflags[0] & CLUSTERMSG_FLAG0_PAUSED &&
+ server.cluster->mf_master_offset == -1)
+ {
+ server.cluster->mf_master_offset = sender->repl_offset;
+ clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_MANUALFAILOVER);
+ serverLog(LL_NOTICE,
+ "Received replication offset for paused "
+ "master manual failover: %lld",
+ server.cluster->mf_master_offset);
+ }
+ }
+
+ /* Initial processing of PING and MEET requests replying with a PONG. */
+ if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_MEET) {
+ /* We use incoming MEET messages in order to set the address
+ * for 'myself', since only other cluster nodes will send us
+ * MEET messages on handshakes, when the cluster joins, or
+ * later if we changed address, and those nodes will use our
+ * official address to connect to us. So by obtaining this address
+ * from the socket is a simple way to discover / update our own
+ * address in the cluster without it being hardcoded in the config.
+ *
+ * However if we don't have an address at all, we update the address
+ * even with a normal PING packet. If it's wrong it will be fixed
+ * by MEET later. */
+ if ((type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') &&
+ server.cluster_announce_ip == NULL)
+ {
+ char ip[NET_IP_STR_LEN];
+
+ if (connAddrSockName(link->conn,ip,sizeof(ip),NULL) != -1 &&
+ strcmp(ip,myself->ip))
+ {
+ memcpy(myself->ip,ip,NET_IP_STR_LEN);
+ serverLog(LL_NOTICE,"IP address for this node updated to %s",
+ myself->ip);
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+ }
+ }
+
+ /* Add this node if it is new for us and the msg type is MEET.
+ * In this stage we don't try to add the node with the right
+ * flags, slaveof pointer, and so forth, as this details will be
+ * resolved when we'll receive PONGs from the node. */
+ if (!sender && type == CLUSTERMSG_TYPE_MEET) {
+ clusterNode *node;
+
+ node = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE);
+ serverAssert(nodeIp2String(node->ip,link,hdr->myip) == C_OK);
+ getClientPortFromClusterMsg(hdr, &node->tls_port, &node->tcp_port);
+ node->cport = ntohs(hdr->cport);
+ clusterAddNode(node);
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+ }
+
+ /* If this is a MEET packet from an unknown node, we still process
+ * the gossip section here since we have to trust the sender because
+ * of the message type. */
+ if (!sender && type == CLUSTERMSG_TYPE_MEET)
+ clusterProcessGossipSection(hdr,link);
+
+ /* Anyway reply with a PONG */
+ clusterSendPing(link,CLUSTERMSG_TYPE_PONG);
+ }
+
+ /* PING, PONG, MEET: process config information. */
+ if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
+ type == CLUSTERMSG_TYPE_MEET)
+ {
+ serverLog(LL_DEBUG,"%s packet received: %.40s",
+ clusterGetMessageTypeString(type),
+ link->node ? link->node->name : "NULL");
+ if (!link->inbound) {
+ if (nodeInHandshake(link->node)) {
+ /* If we already have this node, try to change the
+ * IP/port of the node with the new one. */
+ if (sender) {
+ serverLog(LL_VERBOSE,
+ "Handshake: we already know node %.40s (%s), "
+ "updating the address if needed.", sender->name, sender->human_nodename);
+ if (nodeUpdateAddressIfNeeded(sender,link,hdr))
+ {
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE);
+ }
+ /* Free this node as we already have it. This will
+ * cause the link to be freed as well. */
+ clusterDelNode(link->node);
+ return 0;
+ }
+
+ /* First thing to do is replacing the random name with the
+ * right node name if this was a handshake stage. */
+ clusterRenameNode(link->node, hdr->sender);
+ serverLog(LL_DEBUG,"Handshake with node %.40s completed.",
+ link->node->name);
+ link->node->flags &= ~CLUSTER_NODE_HANDSHAKE;
+ link->node->flags |= flags&(CLUSTER_NODE_MASTER|CLUSTER_NODE_SLAVE);
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+ } else if (memcmp(link->node->name,hdr->sender,
+ CLUSTER_NAMELEN) != 0)
+ {
+ /* If the reply has a non matching node ID we
+ * disconnect this node and set it as not having an associated
+ * address. */
+ serverLog(LL_DEBUG,"PONG contains mismatching sender ID. About node %.40s added %d ms ago, having flags %d",
+ link->node->name,
+ (int)(now-(link->node->ctime)),
+ link->node->flags);
+ link->node->flags |= CLUSTER_NODE_NOADDR;
+ link->node->ip[0] = '\0';
+ link->node->tcp_port = 0;
+ link->node->tls_port = 0;
+ link->node->cport = 0;
+ freeClusterLink(link);
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+ return 0;
+ }
+ }
+
+ /* Copy the CLUSTER_NODE_NOFAILOVER flag from what the sender
+ * announced. This is a dynamic flag that we receive from the
+ * sender, and the latest status must be trusted. We need it to
+ * be propagated because the slave ranking used to understand the
+ * delay of each slave in the voting process, needs to know
+ * what are the instances really competing. */
+ if (sender) {
+ int nofailover = flags & CLUSTER_NODE_NOFAILOVER;
+ sender->flags &= ~CLUSTER_NODE_NOFAILOVER;
+ sender->flags |= nofailover;
+ }
+
+ /* Update the node address if it changed. */
+ if (sender && type == CLUSTERMSG_TYPE_PING &&
+ !nodeInHandshake(sender) &&
+ nodeUpdateAddressIfNeeded(sender,link,hdr))
+ {
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE);
+ }
+
+ /* Update our info about the node */
+ if (!link->inbound && type == CLUSTERMSG_TYPE_PONG) {
+ link->node->pong_received = now;
+ link->node->ping_sent = 0;
+
+ /* The PFAIL condition can be reversed without external
+ * help if it is momentary (that is, if it does not
+ * turn into a FAIL state).
+ *
+ * The FAIL condition is also reversible under specific
+ * conditions detected by clearNodeFailureIfNeeded(). */
+ if (nodeTimedOut(link->node)) {
+ link->node->flags &= ~CLUSTER_NODE_PFAIL;
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE);
+ } else if (nodeFailed(link->node)) {
+ clearNodeFailureIfNeeded(link->node);
+ }
+ }
+
+ /* Check for role switch: slave -> master or master -> slave. */
+ if (sender) {
+ if (!memcmp(hdr->slaveof,CLUSTER_NODE_NULL_NAME,
+ sizeof(hdr->slaveof)))
+ {
+ /* Node is a master. */
+ clusterSetNodeAsMaster(sender);
+ } else {
+ /* Node is a slave. */
+ clusterNode *master = clusterLookupNode(hdr->slaveof, CLUSTER_NAMELEN);
+
+ if (nodeIsMaster(sender)) {
+ /* Master turned into a slave! Reconfigure the node. */
+ clusterDelNodeSlots(sender);
+ sender->flags &= ~(CLUSTER_NODE_MASTER|
+ CLUSTER_NODE_MIGRATE_TO);
+ sender->flags |= CLUSTER_NODE_SLAVE;
+
+ /* Update config and state. */
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE);
+ }
+
+ /* Master node changed for this slave? */
+ if (master && sender->slaveof != master) {
+ if (sender->slaveof)
+ clusterNodeRemoveSlave(sender->slaveof,sender);
+ clusterNodeAddSlave(master,sender);
+ sender->slaveof = master;
+
+ /* Update the shard_id when a replica is connected to its
+ * primary in the very first time. */
+ updateShardId(sender, master->shard_id);
+
+ /* Update config. */
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+ }
+ }
+ }
+
+ /* Update our info about served slots.
+ *
+ * Note: this MUST happen after we update the master/slave state
+ * so that CLUSTER_NODE_MASTER flag will be set. */
+
+ /* Many checks are only needed if the set of served slots this
+ * instance claims is different compared to the set of slots we have
+ * for it. Check this ASAP to avoid other computational expansive
+ * checks later. */
+ clusterNode *sender_master = NULL; /* Sender or its master if slave. */
+ int dirty_slots = 0; /* Sender claimed slots don't match my view? */
+
+ if (sender) {
+ sender_master = nodeIsMaster(sender) ? sender : sender->slaveof;
+ if (sender_master) {
+ dirty_slots = memcmp(sender_master->slots,
+ hdr->myslots,sizeof(hdr->myslots)) != 0;
+ }
+ }
+
+ /* 1) If the sender of the message is a master, and we detected that
+ * the set of slots it claims changed, scan the slots to see if we
+ * need to update our configuration. */
+ if (sender && nodeIsMaster(sender) && dirty_slots)
+ clusterUpdateSlotsConfigWith(sender,senderConfigEpoch,hdr->myslots);
+
+ /* 2) We also check for the reverse condition, that is, the sender
+ * claims to serve slots we know are served by a master with a
+ * greater configEpoch. If this happens we inform the sender.
+ *
+ * This is useful because sometimes after a partition heals, a
+ * reappearing master may be the last one to claim a given set of
+ * hash slots, but with a configuration that other instances know to
+ * be deprecated. Example:
+ *
+ * A and B are master and slave for slots 1,2,3.
+ * A is partitioned away, B gets promoted.
+ * B is partitioned away, and A returns available.
+ *
+ * Usually B would PING A publishing its set of served slots and its
+ * configEpoch, but because of the partition B can't inform A of the
+ * new configuration, so other nodes that have an updated table must
+ * do it. In this way A will stop to act as a master (or can try to
+ * failover if there are the conditions to win the election). */
+ if (sender && dirty_slots) {
+ int j;
+
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (bitmapTestBit(hdr->myslots,j)) {
+ if (server.cluster->slots[j] == sender ||
+ isSlotUnclaimed(j)) continue;
+ if (server.cluster->slots[j]->configEpoch >
+ senderConfigEpoch)
+ {
+ serverLog(LL_VERBOSE,
+ "Node %.40s has old slots configuration, sending "
+ "an UPDATE message about %.40s",
+ sender->name, server.cluster->slots[j]->name);
+ clusterSendUpdate(sender->link,
+ server.cluster->slots[j]);
+
+ /* TODO: instead of exiting the loop send every other
+ * UPDATE packet for other nodes that are the new owner
+ * of sender's slots. */
+ break;
+ }
+ }
+ }
+ }
+
+ /* If our config epoch collides with the sender's try to fix
+ * the problem. */
+ if (sender &&
+ nodeIsMaster(myself) && nodeIsMaster(sender) &&
+ senderConfigEpoch == myself->configEpoch)
+ {
+ clusterHandleConfigEpochCollision(sender);
+ }
+
+ /* Get info from the gossip section */
+ if (sender) {
+ clusterProcessGossipSection(hdr,link);
+ clusterProcessPingExtensions(hdr,link);
+ }
+ } else if (type == CLUSTERMSG_TYPE_FAIL) {
+ clusterNode *failing;
+
+ if (sender) {
+ failing = clusterLookupNode(hdr->data.fail.about.nodename, CLUSTER_NAMELEN);
+ if (failing &&
+ !(failing->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_MYSELF)))
+ {
+ serverLog(LL_NOTICE,
+ "FAIL message received from %.40s (%s) about %.40s (%s)",
+ hdr->sender, sender->human_nodename, hdr->data.fail.about.nodename, failing->human_nodename);
+ failing->flags |= CLUSTER_NODE_FAIL;
+ failing->fail_time = now;
+ failing->flags &= ~CLUSTER_NODE_PFAIL;
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE);
+ }
+ } else {
+ serverLog(LL_NOTICE,
+ "Ignoring FAIL message from unknown node %.40s about %.40s",
+ hdr->sender, hdr->data.fail.about.nodename);
+ }
+ } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) {
+ if (!sender) return 1; /* We don't know that node. */
+
+ robj *channel, *message;
+ uint32_t channel_len, message_len;
+
+ /* Don't bother creating useless objects if there are no
+ * Pub/Sub subscribers. */
+ if ((type == CLUSTERMSG_TYPE_PUBLISH
+ && serverPubsubSubscriptionCount() > 0)
+ || (type == CLUSTERMSG_TYPE_PUBLISHSHARD
+ && serverPubsubShardSubscriptionCount() > 0))
+ {
+ channel_len = ntohl(hdr->data.publish.msg.channel_len);
+ message_len = ntohl(hdr->data.publish.msg.message_len);
+ channel = createStringObject(
+ (char*)hdr->data.publish.msg.bulk_data,channel_len);
+ message = createStringObject(
+ (char*)hdr->data.publish.msg.bulk_data+channel_len,
+ message_len);
+ pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD);
+ decrRefCount(channel);
+ decrRefCount(message);
+ }
+ } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST) {
+ if (!sender) return 1; /* We don't know that node. */
+ clusterSendFailoverAuthIfNeeded(sender,hdr);
+ } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK) {
+ if (!sender) return 1; /* We don't know that node. */
+ /* We consider this vote only if the sender is a master serving
+ * a non zero number of slots, and its currentEpoch is greater or
+ * equal to epoch where this node started the election. */
+ if (nodeIsMaster(sender) && sender->numslots > 0 &&
+ senderCurrentEpoch >= server.cluster->failover_auth_epoch)
+ {
+ server.cluster->failover_auth_count++;
+ /* Maybe we reached a quorum here, set a flag to make sure
+ * we check ASAP. */
+ clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
+ }
+ } else if (type == CLUSTERMSG_TYPE_MFSTART) {
+ /* This message is acceptable only if I'm a master and the sender
+ * is one of my slaves. */
+ if (!sender || sender->slaveof != myself) return 1;
+ /* Manual failover requested from slaves. Initialize the state
+ * accordingly. */
+ resetManualFailover();
+ server.cluster->mf_end = now + CLUSTER_MF_TIMEOUT;
+ server.cluster->mf_slave = sender;
+ pauseActions(PAUSE_DURING_FAILOVER,
+ now + (CLUSTER_MF_TIMEOUT * CLUSTER_MF_PAUSE_MULT),
+ PAUSE_ACTIONS_CLIENT_WRITE_SET);
+ serverLog(LL_NOTICE,"Manual failover requested by replica %.40s (%s).",
+ sender->name, sender->human_nodename);
+ /* We need to send a ping message to the replica, as it would carry
+ * `server.cluster->mf_master_offset`, which means the master paused clients
+ * at offset `server.cluster->mf_master_offset`, so that the replica would
+ * know that it is safe to set its `server.cluster->mf_can_start` to 1 so as
+ * to complete failover as quickly as possible. */
+ clusterSendPing(link, CLUSTERMSG_TYPE_PING);
+ } else if (type == CLUSTERMSG_TYPE_UPDATE) {
+ clusterNode *n; /* The node the update is about. */
+ uint64_t reportedConfigEpoch =
+ ntohu64(hdr->data.update.nodecfg.configEpoch);
+
+ if (!sender) return 1; /* We don't know the sender. */
+ n = clusterLookupNode(hdr->data.update.nodecfg.nodename, CLUSTER_NAMELEN);
+ if (!n) return 1; /* We don't know the reported node. */
+ if (n->configEpoch >= reportedConfigEpoch) return 1; /* Nothing new. */
+
+ /* If in our current config the node is a slave, set it as a master. */
+ if (nodeIsSlave(n)) clusterSetNodeAsMaster(n);
+
+ /* Update the node's configEpoch. */
+ n->configEpoch = reportedConfigEpoch;
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_FSYNC_CONFIG);
+
+ /* Check the bitmap of served slots and update our
+ * config accordingly. */
+ clusterUpdateSlotsConfigWith(n,reportedConfigEpoch,
+ hdr->data.update.nodecfg.slots);
+ } else if (type == CLUSTERMSG_TYPE_MODULE) {
+ if (!sender) return 1; /* Protect the module from unknown nodes. */
+ /* We need to route this message back to the right module subscribed
+ * for the right message type. */
+ uint64_t module_id = hdr->data.module.msg.module_id; /* Endian-safe ID */
+ uint32_t len = ntohl(hdr->data.module.msg.len);
+ uint8_t type = hdr->data.module.msg.type;
+ unsigned char *payload = hdr->data.module.msg.bulk_data;
+ moduleCallClusterReceivers(sender->name,module_id,type,payload,len);
+ } else {
+ serverLog(LL_WARNING,"Received unknown packet type: %d", type);
+ }
+ return 1;
+}
+
+/* This function is called when we detect the link with this node is lost.
+ We set the node as no longer connected. The Cluster Cron will detect
+ this connection and will try to get it connected again.
+
+ Instead if the node is a temporary node used to accept a query, we
+ completely free the node on error. */
+void handleLinkIOError(clusterLink *link) {
+ freeClusterLink(link);
+}
+
+/* Send the messages queued for the link. */
+void clusterWriteHandler(connection *conn) {
+ clusterLink *link = connGetPrivateData(conn);
+ ssize_t nwritten;
+ size_t totwritten = 0;
+
+ while (totwritten < NET_MAX_WRITES_PER_EVENT && listLength(link->send_msg_queue) > 0) {
+ listNode *head = listFirst(link->send_msg_queue);
+ clusterMsgSendBlock *msgblock = (clusterMsgSendBlock*)head->value;
+ clusterMsg *msg = &msgblock->msg;
+ size_t msg_offset = link->head_msg_send_offset;
+ size_t msg_len = ntohl(msg->totlen);
+
+ nwritten = connWrite(conn, (char*)msg + msg_offset, msg_len - msg_offset);
+ if (nwritten <= 0) {
+ serverLog(LL_DEBUG,"I/O error writing to node link: %s",
+ (nwritten == -1) ? connGetLastError(conn) : "short write");
+ handleLinkIOError(link);
+ return;
+ }
+ if (msg_offset + nwritten < msg_len) {
+ /* If full message wasn't written, record the offset
+ * and continue sending from this point next time */
+ link->head_msg_send_offset += nwritten;
+ return;
+ }
+ serverAssert((msg_offset + nwritten) == msg_len);
+ link->head_msg_send_offset = 0;
+
+ /* Delete the node and update our memory tracking */
+ uint32_t blocklen = msgblock->totlen;
+ listDelNode(link->send_msg_queue, head);
+ server.stat_cluster_links_memory -= sizeof(listNode);
+ link->send_msg_queue_mem -= sizeof(listNode) + blocklen;
+
+ totwritten += nwritten;
+ }
+
+ if (listLength(link->send_msg_queue) == 0)
+ connSetWriteHandler(link->conn, NULL);
+}
+
+/* A connect handler that gets called when a connection to another node
+ * gets established.
+ */
+void clusterLinkConnectHandler(connection *conn) {
+ clusterLink *link = connGetPrivateData(conn);
+ clusterNode *node = link->node;
+
+ /* Check if connection succeeded */
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_VERBOSE, "Connection with Node %.40s at %s:%d failed: %s",
+ node->name, node->ip, node->cport,
+ connGetLastError(conn));
+ freeClusterLink(link);
+ return;
+ }
+
+ /* Register a read handler from now on */
+ connSetReadHandler(conn, clusterReadHandler);
+
+ /* Queue a PING in the new connection ASAP: this is crucial
+ * to avoid false positives in failure detection.
+ *
+ * If the node is flagged as MEET, we send a MEET message instead
+ * of a PING one, to force the receiver to add us in its node
+ * table. */
+ mstime_t old_ping_sent = node->ping_sent;
+ clusterSendPing(link, node->flags & CLUSTER_NODE_MEET ?
+ CLUSTERMSG_TYPE_MEET : CLUSTERMSG_TYPE_PING);
+ if (old_ping_sent) {
+ /* If there was an active ping before the link was
+ * disconnected, we want to restore the ping time, otherwise
+ * replaced by the clusterSendPing() call. */
+ node->ping_sent = old_ping_sent;
+ }
+ /* We can clear the flag after the first packet is sent.
+ * If we'll never receive a PONG, we'll never send new packets
+ * to this node. Instead after the PONG is received and we
+ * are no longer in meet/handshake status, we want to send
+ * normal PING packets. */
+ node->flags &= ~CLUSTER_NODE_MEET;
+
+ serverLog(LL_DEBUG,"Connecting with Node %.40s at %s:%d",
+ node->name, node->ip, node->cport);
+}
+
+/* Read data. Try to read the first field of the header first to check the
+ * full length of the packet. When a whole packet is in memory this function
+ * will call the function to process the packet. And so forth. */
+void clusterReadHandler(connection *conn) {
+ clusterMsg buf[1];
+ ssize_t nread;
+ clusterMsg *hdr;
+ clusterLink *link = connGetPrivateData(conn);
+ unsigned int readlen, rcvbuflen;
+
+ while(1) { /* Read as long as there is data to read. */
+ rcvbuflen = link->rcvbuf_len;
+ if (rcvbuflen < 8) {
+ /* First, obtain the first 8 bytes to get the full message
+ * length. */
+ readlen = 8 - rcvbuflen;
+ } else {
+ /* Finally read the full message. */
+ hdr = (clusterMsg*) link->rcvbuf;
+ if (rcvbuflen == 8) {
+ /* Perform some sanity check on the message signature
+ * and length. */
+ if (memcmp(hdr->sig,"RCmb",4) != 0 ||
+ ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN)
+ {
+ char ip[NET_IP_STR_LEN];
+ int port;
+ if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) {
+ serverLog(LL_WARNING,
+ "Bad message length or signature received "
+ "on the Cluster bus.");
+ } else {
+ serverLog(LL_WARNING,
+ "Bad message length or signature received "
+ "on the Cluster bus from %s:%d", ip, port);
+ }
+ handleLinkIOError(link);
+ return;
+ }
+ }
+ readlen = ntohl(hdr->totlen) - rcvbuflen;
+ if (readlen > sizeof(buf)) readlen = sizeof(buf);
+ }
+
+ nread = connRead(conn,buf,readlen);
+ if (nread == -1 && (connGetState(conn) == CONN_STATE_CONNECTED)) return; /* No more data ready. */
+
+ if (nread <= 0) {
+ /* I/O error... */
+ serverLog(LL_DEBUG,"I/O error reading from node link: %s",
+ (nread == 0) ? "connection closed" : connGetLastError(conn));
+ handleLinkIOError(link);
+ return;
+ } else {
+ /* Read data and recast the pointer to the new buffer. */
+ size_t unused = link->rcvbuf_alloc - link->rcvbuf_len;
+ if ((size_t)nread > unused) {
+ size_t required = link->rcvbuf_len + nread;
+ size_t prev_rcvbuf_alloc = link->rcvbuf_alloc;
+ /* If less than 1mb, grow to twice the needed size, if larger grow by 1mb. */
+ link->rcvbuf_alloc = required < RCVBUF_MAX_PREALLOC ? required * 2: required + RCVBUF_MAX_PREALLOC;
+ link->rcvbuf = zrealloc(link->rcvbuf, link->rcvbuf_alloc);
+ server.stat_cluster_links_memory += link->rcvbuf_alloc - prev_rcvbuf_alloc;
+ }
+ memcpy(link->rcvbuf + link->rcvbuf_len, buf, nread);
+ link->rcvbuf_len += nread;
+ hdr = (clusterMsg*) link->rcvbuf;
+ rcvbuflen += nread;
+ }
+
+ /* Total length obtained? Process this packet. */
+ if (rcvbuflen >= 8 && rcvbuflen == ntohl(hdr->totlen)) {
+ if (clusterProcessPacket(link)) {
+ if (link->rcvbuf_alloc > RCVBUF_INIT_LEN) {
+ size_t prev_rcvbuf_alloc = link->rcvbuf_alloc;
+ zfree(link->rcvbuf);
+ link->rcvbuf = zmalloc(link->rcvbuf_alloc = RCVBUF_INIT_LEN);
+ server.stat_cluster_links_memory += link->rcvbuf_alloc - prev_rcvbuf_alloc;
+ }
+ link->rcvbuf_len = 0;
+ } else {
+ return; /* Link no longer valid. */
+ }
+ }
+ }
+}
+
+/* Put the message block into the link's send queue.
+ *
+ * It is guaranteed that this function will never have as a side effect
+ * the link to be invalidated, so it is safe to call this function
+ * from event handlers that will do stuff with the same link later. */
+void clusterSendMessage(clusterLink *link, clusterMsgSendBlock *msgblock) {
+ if (!link) {
+ return;
+ }
+ if (listLength(link->send_msg_queue) == 0 && msgblock->msg.totlen != 0)
+ connSetWriteHandlerWithBarrier(link->conn, clusterWriteHandler, 1);
+
+ listAddNodeTail(link->send_msg_queue, msgblock);
+ msgblock->refcount++;
+
+ /* Update memory tracking */
+ link->send_msg_queue_mem += sizeof(listNode) + msgblock->totlen;
+ server.stat_cluster_links_memory += sizeof(listNode);
+
+ /* Populate sent messages stats. */
+ uint16_t type = ntohs(msgblock->msg.type);
+ if (type < CLUSTERMSG_TYPE_COUNT)
+ server.cluster->stats_bus_messages_sent[type]++;
+}
+
+/* Send a message to all the nodes that are part of the cluster having
+ * a connected link.
+ *
+ * It is guaranteed that this function will never have as a side effect
+ * some node->link to be invalidated, so it is safe to call this function
+ * from event handlers that will do stuff with node links later. */
+void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+
+ if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
+ continue;
+ clusterSendMessage(node->link,msgblock);
+ }
+ dictReleaseIterator(di);
+}
+
+/* Build the message header. hdr must point to a buffer at least
+ * sizeof(clusterMsg) in bytes. */
+static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) {
+ uint64_t offset;
+ clusterNode *master;
+
+ /* If this node is a master, we send its slots bitmap and configEpoch.
+ * If this node is a slave we send the master's information instead (the
+ * node is flagged as slave so the receiver knows that it is NOT really
+ * in charge for this slots. */
+ master = (nodeIsSlave(myself) && myself->slaveof) ?
+ myself->slaveof : myself;
+
+ hdr->ver = htons(CLUSTER_PROTO_VER);
+ hdr->sig[0] = 'R';
+ hdr->sig[1] = 'C';
+ hdr->sig[2] = 'm';
+ hdr->sig[3] = 'b';
+ hdr->type = htons(type);
+ memcpy(hdr->sender,myself->name,CLUSTER_NAMELEN);
+
+ /* If cluster-announce-ip option is enabled, force the receivers of our
+ * packets to use the specified address for this node. Otherwise if the
+ * first byte is zero, they'll do auto discovery. */
+ memset(hdr->myip,0,NET_IP_STR_LEN);
+ if (server.cluster_announce_ip) {
+ redis_strlcpy(hdr->myip,server.cluster_announce_ip,NET_IP_STR_LEN);
+ }
+
+ /* Handle cluster-announce-[tls-|bus-]port. */
+ int announced_tcp_port, announced_tls_port, announced_cport;
+ deriveAnnouncedPorts(&announced_tcp_port, &announced_tls_port, &announced_cport);
+
+ memcpy(hdr->myslots,master->slots,sizeof(hdr->myslots));
+ memset(hdr->slaveof,0,CLUSTER_NAMELEN);
+ if (myself->slaveof != NULL)
+ memcpy(hdr->slaveof,myself->slaveof->name, CLUSTER_NAMELEN);
+ if (server.tls_cluster) {
+ hdr->port = htons(announced_tls_port);
+ hdr->pport = htons(announced_tcp_port);
+ } else {
+ hdr->port = htons(announced_tcp_port);
+ hdr->pport = htons(announced_tls_port);
+ }
+ hdr->cport = htons(announced_cport);
+ hdr->flags = htons(myself->flags);
+ hdr->state = server.cluster->state;
+
+ /* Set the currentEpoch and configEpochs. */
+ hdr->currentEpoch = htonu64(server.cluster->currentEpoch);
+ hdr->configEpoch = htonu64(master->configEpoch);
+
+ /* Set the replication offset. */
+ if (nodeIsSlave(myself))
+ offset = replicationGetSlaveOffset();
+ else
+ offset = server.master_repl_offset;
+ hdr->offset = htonu64(offset);
+
+ /* Set the message flags. */
+ if (nodeIsMaster(myself) && server.cluster->mf_end)
+ hdr->mflags[0] |= CLUSTERMSG_FLAG0_PAUSED;
+
+ hdr->totlen = htonl(msglen);
+}
+
+/* Set the i-th entry of the gossip section in the message pointed by 'hdr'
+ * to the info of the specified node 'n'. */
+void clusterSetGossipEntry(clusterMsg *hdr, int i, clusterNode *n) {
+ clusterMsgDataGossip *gossip;
+ gossip = &(hdr->data.ping.gossip[i]);
+ memcpy(gossip->nodename,n->name,CLUSTER_NAMELEN);
+ gossip->ping_sent = htonl(n->ping_sent/1000);
+ gossip->pong_received = htonl(n->pong_received/1000);
+ memcpy(gossip->ip,n->ip,sizeof(n->ip));
+ if (server.tls_cluster) {
+ gossip->port = htons(n->tls_port);
+ gossip->pport = htons(n->tcp_port);
+ } else {
+ gossip->port = htons(n->tcp_port);
+ gossip->pport = htons(n->tls_port);
+ }
+ gossip->cport = htons(n->cport);
+ gossip->flags = htons(n->flags);
+ gossip->notused1 = 0;
+}
+
+/* Send a PING or PONG packet to the specified node, making sure to add enough
+ * gossip information. */
+void clusterSendPing(clusterLink *link, int type) {
+ static unsigned long long cluster_pings_sent = 0;
+ cluster_pings_sent++;
+ int gossipcount = 0; /* Number of gossip sections added so far. */
+ int wanted; /* Number of gossip sections we want to append if possible. */
+ int estlen; /* Upper bound on estimated packet length */
+ /* freshnodes is the max number of nodes we can hope to append at all:
+ * nodes available minus two (ourself and the node we are sending the
+ * message to). However practically there may be less valid nodes since
+ * nodes in handshake state, disconnected, are not considered. */
+ int freshnodes = dictSize(server.cluster->nodes)-2;
+
+ /* How many gossip sections we want to add? 1/10 of the number of nodes
+ * and anyway at least 3. Why 1/10?
+ *
+ * If we have N masters, with N/10 entries, and we consider that in
+ * node_timeout we exchange with each other node at least 4 packets
+ * (we ping in the worst case in node_timeout/2 time, and we also
+ * receive two pings from the host), we have a total of 8 packets
+ * in the node_timeout*2 failure reports validity time. So we have
+ * that, for a single PFAIL node, we can expect to receive the following
+ * number of failure reports (in the specified window of time):
+ *
+ * PROB * GOSSIP_ENTRIES_PER_PACKET * TOTAL_PACKETS:
+ *
+ * PROB = probability of being featured in a single gossip entry,
+ * which is 1 / NUM_OF_NODES.
+ * ENTRIES = 10.
+ * TOTAL_PACKETS = 2 * 4 * NUM_OF_MASTERS.
+ *
+ * If we assume we have just masters (so num of nodes and num of masters
+ * is the same), with 1/10 we always get over the majority, and specifically
+ * 80% of the number of nodes, to account for many masters failing at the
+ * same time.
+ *
+ * Since we have non-voting slaves that lower the probability of an entry
+ * to feature our node, we set the number of entries per packet as
+ * 10% of the total nodes we have. */
+ wanted = floor(dictSize(server.cluster->nodes)/10);
+ if (wanted < 3) wanted = 3;
+ if (wanted > freshnodes) wanted = freshnodes;
+
+ /* Include all the nodes in PFAIL state, so that failure reports are
+ * faster to propagate to go from PFAIL to FAIL state. */
+ int pfail_wanted = server.cluster->stats_pfail_nodes;
+
+ /* Compute the maximum estlen to allocate our buffer. We'll fix the estlen
+ * later according to the number of gossip sections we really were able
+ * to put inside the packet. */
+ estlen = sizeof(clusterMsg) - sizeof(union clusterMsgData);
+ estlen += (sizeof(clusterMsgDataGossip)*(wanted + pfail_wanted));
+ estlen += writePingExt(NULL, 0);
+ /* Note: clusterBuildMessageHdr() expects the buffer to be always at least
+ * sizeof(clusterMsg) or more. */
+ if (estlen < (int)sizeof(clusterMsg)) estlen = sizeof(clusterMsg);
+ clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, estlen);
+ clusterMsg *hdr = &msgblock->msg;
+
+ if (!link->inbound && type == CLUSTERMSG_TYPE_PING)
+ link->node->ping_sent = mstime();
+
+ /* Populate the gossip fields */
+ int maxiterations = wanted*3;
+ while(freshnodes > 0 && gossipcount < wanted && maxiterations--) {
+ dictEntry *de = dictGetRandomKey(server.cluster->nodes);
+ clusterNode *this = dictGetVal(de);
+
+ /* Don't include this node: the whole packet header is about us
+ * already, so we just gossip about other nodes. */
+ if (this == myself) continue;
+
+ /* PFAIL nodes will be added later. */
+ if (this->flags & CLUSTER_NODE_PFAIL) continue;
+
+ /* In the gossip section don't include:
+ * 1) Nodes in HANDSHAKE state.
+ * 3) Nodes with the NOADDR flag set.
+ * 4) Disconnected nodes if they don't have configured slots.
+ */
+ if (this->flags & (CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_NOADDR) ||
+ (this->link == NULL && this->numslots == 0))
+ {
+ freshnodes--; /* Technically not correct, but saves CPU. */
+ continue;
+ }
+
+ /* Do not add a node we already have. */
+ if (this->last_in_ping_gossip == cluster_pings_sent) continue;
+
+ /* Add it */
+ clusterSetGossipEntry(hdr,gossipcount,this);
+ this->last_in_ping_gossip = cluster_pings_sent;
+ freshnodes--;
+ gossipcount++;
+ }
+
+ /* If there are PFAIL nodes, add them at the end. */
+ if (pfail_wanted) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL && pfail_wanted > 0) {
+ clusterNode *node = dictGetVal(de);
+ if (node->flags & CLUSTER_NODE_HANDSHAKE) continue;
+ if (node->flags & CLUSTER_NODE_NOADDR) continue;
+ if (!(node->flags & CLUSTER_NODE_PFAIL)) continue;
+ clusterSetGossipEntry(hdr,gossipcount,node);
+ gossipcount++;
+ /* We take the count of the slots we allocated, since the
+ * PFAIL stats may not match perfectly with the current number
+ * of PFAIL nodes. */
+ pfail_wanted--;
+ }
+ dictReleaseIterator(di);
+ }
+
+ /* Compute the actual total length and send! */
+ uint32_t totlen = 0;
+ totlen += writePingExt(hdr, gossipcount);
+ totlen += sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ totlen += (sizeof(clusterMsgDataGossip)*gossipcount);
+ serverAssert(gossipcount < USHRT_MAX);
+ hdr->count = htons(gossipcount);
+ hdr->totlen = htonl(totlen);
+
+ clusterSendMessage(link,msgblock);
+ clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Send a PONG packet to every connected node that's not in handshake state
+ * and for which we have a valid link.
+ *
+ * In Redis Cluster pongs are not used just for failure detection, but also
+ * to carry important configuration information. So broadcasting a pong is
+ * useful when something changes in the configuration and we want to make
+ * the cluster aware ASAP (for instance after a slave promotion).
+ *
+ * The 'target' argument specifies the receiving instances using the
+ * defines below:
+ *
+ * CLUSTER_BROADCAST_ALL -> All known instances.
+ * CLUSTER_BROADCAST_LOCAL_SLAVES -> All slaves in my master-slaves ring.
+ */
+#define CLUSTER_BROADCAST_ALL 0
+#define CLUSTER_BROADCAST_LOCAL_SLAVES 1
+void clusterBroadcastPong(int target) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+
+ if (!node->link) continue;
+ if (node == myself || nodeInHandshake(node)) continue;
+ if (target == CLUSTER_BROADCAST_LOCAL_SLAVES) {
+ int local_slave =
+ nodeIsSlave(node) && node->slaveof &&
+ (node->slaveof == myself || node->slaveof == myself->slaveof);
+ if (!local_slave) continue;
+ }
+ clusterSendPing(node->link,CLUSTERMSG_TYPE_PONG);
+ }
+ dictReleaseIterator(di);
+}
+
+/* Create a PUBLISH message block.
+ *
+ * Sanitizer suppression: In clusterMsgDataPublish, sizeof(bulk_data) is 8.
+ * As all the struct is used as a buffer, when more than 8 bytes are copied into
+ * the 'bulk_data', sanitizer generates an out-of-bounds error which is a false
+ * positive in this context. */
+REDIS_NO_SANITIZE("bounds")
+clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) {
+
+ uint32_t channel_len, message_len;
+
+ channel = getDecodedObject(channel);
+ message = getDecodedObject(message);
+ channel_len = sdslen(channel->ptr);
+ message_len = sdslen(message->ptr);
+
+ size_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len;
+ clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen);
+
+ clusterMsg *hdr = &msgblock->msg;
+ hdr->data.publish.msg.channel_len = htonl(channel_len);
+ hdr->data.publish.msg.message_len = htonl(message_len);
+ memcpy(hdr->data.publish.msg.bulk_data,channel->ptr,sdslen(channel->ptr));
+ memcpy(hdr->data.publish.msg.bulk_data+sdslen(channel->ptr),
+ message->ptr,sdslen(message->ptr));
+
+ decrRefCount(channel);
+ decrRefCount(message);
+
+ return msgblock;
+}
+
+/* Send a FAIL message to all the nodes we are able to contact.
+ * The FAIL message is sent when we detect that a node is failing
+ * (CLUSTER_NODE_PFAIL) and we also receive a gossip confirmation of this:
+ * we switch the node state to CLUSTER_NODE_FAIL and ask all the other
+ * nodes to do the same ASAP. */
+void clusterSendFail(char *nodename) {
+ uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData)
+ + sizeof(clusterMsgDataFail);
+ clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAIL, msglen);
+
+ clusterMsg *hdr = &msgblock->msg;
+ memcpy(hdr->data.fail.about.nodename,nodename,CLUSTER_NAMELEN);
+
+ clusterBroadcastMessage(msgblock);
+ clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Send an UPDATE message to the specified link carrying the specified 'node'
+ * slots configuration. The node name, slots bitmap, and configEpoch info
+ * are included. */
+void clusterSendUpdate(clusterLink *link, clusterNode *node) {
+ if (link == NULL) return;
+
+ uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData)
+ + sizeof(clusterMsgDataUpdate);
+ clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_UPDATE, msglen);
+
+ clusterMsg *hdr = &msgblock->msg;
+ memcpy(hdr->data.update.nodecfg.nodename,node->name,CLUSTER_NAMELEN);
+ hdr->data.update.nodecfg.configEpoch = htonu64(node->configEpoch);
+ memcpy(hdr->data.update.nodecfg.slots,node->slots,sizeof(node->slots));
+ for (unsigned int i = 0; i < sizeof(node->slots); i++) {
+ /* Don't advertise slots that the node stopped claiming */
+ hdr->data.update.nodecfg.slots[i] = hdr->data.update.nodecfg.slots[i] & (~server.cluster->owner_not_claiming_slot[i]);
+ }
+
+ clusterSendMessage(link,msgblock);
+ clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Send a MODULE message.
+ *
+ * If link is NULL, then the message is broadcasted to the whole cluster. */
+void clusterSendModule(clusterLink *link, uint64_t module_id, uint8_t type,
+ const char *payload, uint32_t len) {
+ uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ msglen += sizeof(clusterMsgModule) - 3 + len;
+ clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_MODULE, msglen);
+
+ clusterMsg *hdr = &msgblock->msg;
+ hdr->data.module.msg.module_id = module_id; /* Already endian adjusted. */
+ hdr->data.module.msg.type = type;
+ hdr->data.module.msg.len = htonl(len);
+ memcpy(hdr->data.module.msg.bulk_data,payload,len);
+
+ if (link)
+ clusterSendMessage(link,msgblock);
+ else
+ clusterBroadcastMessage(msgblock);
+
+ clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* This function gets a cluster node ID string as target, the same way the nodes
+ * addresses are represented in the modules side, resolves the node, and sends
+ * the message. If the target is NULL the message is broadcasted.
+ *
+ * The function returns C_OK if the target is valid, otherwise C_ERR is
+ * returned. */
+int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uint8_t type, const char *payload, uint32_t len) {
+ clusterNode *node = NULL;
+
+ if (target != NULL) {
+ node = clusterLookupNode(target, strlen(target));
+ if (node == NULL || node->link == NULL) return C_ERR;
+ }
+
+ clusterSendModule(target ? node->link : NULL,
+ module_id, type, payload, len);
+ return C_OK;
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER Pub/Sub support
+ *
+ * If `sharded` is 0:
+ * For now we do very little, just propagating [S]PUBLISH messages across the whole
+ * cluster. In the future we'll try to get smarter and avoiding propagating those
+ * messages to hosts without receives for a given channel.
+ * Otherwise:
+ * Publish this message across the slot (primary/replica).
+ * -------------------------------------------------------------------------- */
+void clusterPropagatePublish(robj *channel, robj *message, int sharded) {
+ clusterMsgSendBlock *msgblock;
+
+ if (!sharded) {
+ msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISH);
+ clusterBroadcastMessage(msgblock);
+ clusterMsgSendBlockDecrRefCount(msgblock);
+ return;
+ }
+
+ listIter li;
+ listNode *ln;
+ list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself);
+ serverAssert(nodes_for_slot != NULL);
+ listRewind(nodes_for_slot, &li);
+ msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISHSHARD);
+ while((ln = listNext(&li))) {
+ clusterNode *node = listNodeValue(ln);
+ if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
+ continue;
+ clusterSendMessage(node->link,msgblock);
+ }
+ clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* -----------------------------------------------------------------------------
+ * SLAVE node specific functions
+ * -------------------------------------------------------------------------- */
+
+/* This function sends a FAILOVER_AUTH_REQUEST message to every node in order to
+ * see if there is the quorum for this slave instance to failover its failing
+ * master.
+ *
+ * Note that we send the failover request to everybody, master and slave nodes,
+ * but only the masters are supposed to reply to our query. */
+void clusterRequestFailoverAuth(void) {
+ uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST, msglen);
+
+ clusterMsg *hdr = &msgblock->msg;
+ /* If this is a manual failover, set the CLUSTERMSG_FLAG0_FORCEACK bit
+ * in the header to communicate the nodes receiving the message that
+ * they should authorized the failover even if the master is working. */
+ if (server.cluster->mf_end) hdr->mflags[0] |= CLUSTERMSG_FLAG0_FORCEACK;
+ clusterBroadcastMessage(msgblock);
+ clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Send a FAILOVER_AUTH_ACK message to the specified node. */
+void clusterSendFailoverAuth(clusterNode *node) {
+ if (!node->link) return;
+
+ uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK, msglen);
+
+ clusterSendMessage(node->link,msgblock);
+ clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Send a MFSTART message to the specified node. */
+void clusterSendMFStart(clusterNode *node) {
+ if (!node->link) return;
+
+ uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_MFSTART, msglen);
+
+ clusterSendMessage(node->link,msgblock);
+ clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Vote for the node asking for our vote if there are the conditions. */
+void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
+ clusterNode *master = node->slaveof;
+ uint64_t requestCurrentEpoch = ntohu64(request->currentEpoch);
+ uint64_t requestConfigEpoch = ntohu64(request->configEpoch);
+ unsigned char *claimed_slots = request->myslots;
+ int force_ack = request->mflags[0] & CLUSTERMSG_FLAG0_FORCEACK;
+ int j;
+
+ /* IF we are not a master serving at least 1 slot, we don't have the
+ * right to vote, as the cluster size in Redis Cluster is the number
+ * of masters serving at least one slot, and quorum is the cluster
+ * size + 1 */
+ if (nodeIsSlave(myself) || myself->numslots == 0) return;
+
+ /* Request epoch must be >= our currentEpoch.
+ * Note that it is impossible for it to actually be greater since
+ * our currentEpoch was updated as a side effect of receiving this
+ * request, if the request epoch was greater. */
+ if (requestCurrentEpoch < server.cluster->currentEpoch) {
+ serverLog(LL_WARNING,
+ "Failover auth denied to %.40s (%s): reqEpoch (%llu) < curEpoch(%llu)",
+ node->name, node->human_nodename,
+ (unsigned long long) requestCurrentEpoch,
+ (unsigned long long) server.cluster->currentEpoch);
+ return;
+ }
+
+ /* I already voted for this epoch? Return ASAP. */
+ if (server.cluster->lastVoteEpoch == server.cluster->currentEpoch) {
+ serverLog(LL_WARNING,
+ "Failover auth denied to %.40s (%s): already voted for epoch %llu",
+ node->name, node->human_nodename,
+ (unsigned long long) server.cluster->currentEpoch);
+ return;
+ }
+
+ /* Node must be a slave and its master down.
+ * The master can be non failing if the request is flagged
+ * with CLUSTERMSG_FLAG0_FORCEACK (manual failover). */
+ if (nodeIsMaster(node) || master == NULL ||
+ (!nodeFailed(master) && !force_ack))
+ {
+ if (nodeIsMaster(node)) {
+ serverLog(LL_WARNING,
+ "Failover auth denied to %.40s (%s): it is a master node",
+ node->name, node->human_nodename);
+ } else if (master == NULL) {
+ serverLog(LL_WARNING,
+ "Failover auth denied to %.40s (%s): I don't know its master",
+ node->name, node->human_nodename);
+ } else if (!nodeFailed(master)) {
+ serverLog(LL_WARNING,
+ "Failover auth denied to %.40s (%s): its master is up",
+ node->name, node->human_nodename);
+ }
+ return;
+ }
+
+ /* We did not voted for a slave about this master for two
+ * times the node timeout. This is not strictly needed for correctness
+ * of the algorithm but makes the base case more linear. */
+ if (mstime() - node->slaveof->voted_time < server.cluster_node_timeout * 2)
+ {
+ serverLog(LL_WARNING,
+ "Failover auth denied to %.40s %s: "
+ "can't vote about this master before %lld milliseconds",
+ node->name, node->human_nodename,
+ (long long) ((server.cluster_node_timeout*2)-
+ (mstime() - node->slaveof->voted_time)));
+ return;
+ }
+
+ /* The slave requesting the vote must have a configEpoch for the claimed
+ * slots that is >= the one of the masters currently serving the same
+ * slots in the current configuration. */
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (bitmapTestBit(claimed_slots, j) == 0) continue;
+ if (isSlotUnclaimed(j) ||
+ server.cluster->slots[j]->configEpoch <= requestConfigEpoch)
+ {
+ continue;
+ }
+ /* If we reached this point we found a slot that in our current slots
+ * is served by a master with a greater configEpoch than the one claimed
+ * by the slave requesting our vote. Refuse to vote for this slave. */
+ serverLog(LL_WARNING,
+ "Failover auth denied to %.40s (%s): "
+ "slot %d epoch (%llu) > reqEpoch (%llu)",
+ node->name, node->human_nodename, j,
+ (unsigned long long) server.cluster->slots[j]->configEpoch,
+ (unsigned long long) requestConfigEpoch);
+ return;
+ }
+
+ /* We can vote for this slave. */
+ server.cluster->lastVoteEpoch = server.cluster->currentEpoch;
+ node->slaveof->voted_time = mstime();
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_FSYNC_CONFIG);
+ clusterSendFailoverAuth(node);
+ serverLog(LL_NOTICE, "Failover auth granted to %.40s (%s) for epoch %llu",
+ node->name, node->human_nodename, (unsigned long long) server.cluster->currentEpoch);
+}
+
+/* This function returns the "rank" of this instance, a slave, in the context
+ * of its master-slaves ring. The rank of the slave is given by the number of
+ * other slaves for the same master that have a better replication offset
+ * compared to the local one (better means, greater, so they claim more data).
+ *
+ * A slave with rank 0 is the one with the greatest (most up to date)
+ * replication offset, and so forth. Note that because how the rank is computed
+ * multiple slaves may have the same rank, in case they have the same offset.
+ *
+ * The slave rank is used to add a delay to start an election in order to
+ * get voted and replace a failing master. Slaves with better replication
+ * offsets are more likely to win. */
+int clusterGetSlaveRank(void) {
+ long long myoffset;
+ int j, rank = 0;
+ clusterNode *master;
+
+ serverAssert(nodeIsSlave(myself));
+ master = myself->slaveof;
+ if (master == NULL) return 0; /* Never called by slaves without master. */
+
+ myoffset = replicationGetSlaveOffset();
+ for (j = 0; j < master->numslaves; j++)
+ if (master->slaves[j] != myself &&
+ !nodeCantFailover(master->slaves[j]) &&
+ master->slaves[j]->repl_offset > myoffset) rank++;
+ return rank;
+}
+
+/* This function is called by clusterHandleSlaveFailover() in order to
+ * let the slave log why it is not able to failover. Sometimes there are
+ * not the conditions, but since the failover function is called again and
+ * again, we can't log the same things continuously.
+ *
+ * This function works by logging only if a given set of conditions are
+ * true:
+ *
+ * 1) The reason for which the failover can't be initiated changed.
+ * The reasons also include a NONE reason we reset the state to
+ * when the slave finds that its master is fine (no FAIL flag).
+ * 2) Also, the log is emitted again if the master is still down and
+ * the reason for not failing over is still the same, but more than
+ * CLUSTER_CANT_FAILOVER_RELOG_PERIOD seconds elapsed.
+ * 3) Finally, the function only logs if the slave is down for more than
+ * five seconds + NODE_TIMEOUT. This way nothing is logged when a
+ * failover starts in a reasonable time.
+ *
+ * The function is called with the reason why the slave can't failover
+ * which is one of the integer macros CLUSTER_CANT_FAILOVER_*.
+ *
+ * The function is guaranteed to be called only if 'myself' is a slave. */
+void clusterLogCantFailover(int reason) {
+ char *msg;
+ static time_t lastlog_time = 0;
+ mstime_t nolog_fail_time = server.cluster_node_timeout + 5000;
+
+ /* Don't log if we have the same reason for some time. */
+ if (reason == server.cluster->cant_failover_reason &&
+ time(NULL)-lastlog_time < CLUSTER_CANT_FAILOVER_RELOG_PERIOD)
+ return;
+
+ server.cluster->cant_failover_reason = reason;
+
+ /* We also don't emit any log if the master failed no long ago, the
+ * goal of this function is to log slaves in a stalled condition for
+ * a long time. */
+ if (myself->slaveof &&
+ nodeFailed(myself->slaveof) &&
+ (mstime() - myself->slaveof->fail_time) < nolog_fail_time) return;
+
+ switch(reason) {
+ case CLUSTER_CANT_FAILOVER_DATA_AGE:
+ msg = "Disconnected from master for longer than allowed. "
+ "Please check the 'cluster-replica-validity-factor' configuration "
+ "option.";
+ break;
+ case CLUSTER_CANT_FAILOVER_WAITING_DELAY:
+ msg = "Waiting the delay before I can start a new failover.";
+ break;
+ case CLUSTER_CANT_FAILOVER_EXPIRED:
+ msg = "Failover attempt expired.";
+ break;
+ case CLUSTER_CANT_FAILOVER_WAITING_VOTES:
+ msg = "Waiting for votes, but majority still not reached.";
+ break;
+ default:
+ msg = "Unknown reason code.";
+ break;
+ }
+ lastlog_time = time(NULL);
+ serverLog(LL_NOTICE,"Currently unable to failover: %s", msg);
+
+ int cur_vote = server.cluster->failover_auth_count;
+ int cur_quorum = (server.cluster->size / 2) + 1;
+ /* Emits a log when an election is in progress and waiting for votes or when the failover attempt expired. */
+ if (reason == CLUSTER_CANT_FAILOVER_WAITING_VOTES || reason == CLUSTER_CANT_FAILOVER_EXPIRED) {
+ serverLog(LL_NOTICE, "Needed quorum: %d. Number of votes received so far: %d", cur_quorum, cur_vote);
+ }
+}
+
+/* This function implements the final part of automatic and manual failovers,
+ * where the slave grabs its master's hash slots, and propagates the new
+ * configuration.
+ *
+ * Note that it's up to the caller to be sure that the node got a new
+ * configuration epoch already. */
+void clusterFailoverReplaceYourMaster(void) {
+ int j;
+ clusterNode *oldmaster = myself->slaveof;
+
+ if (nodeIsMaster(myself) || oldmaster == NULL) return;
+
+ /* 1) Turn this node into a master. */
+ clusterSetNodeAsMaster(myself);
+ replicationUnsetMaster();
+
+ /* 2) Claim all the slots assigned to our master. */
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (clusterNodeGetSlotBit(oldmaster,j)) {
+ clusterDelSlot(j);
+ clusterAddSlot(myself,j);
+ }
+ }
+
+ /* 3) Update state and save config. */
+ clusterUpdateState();
+ clusterSaveConfigOrDie(1);
+
+ /* 4) Pong all the other nodes so that they can update the state
+ * accordingly and detect that we switched to master role. */
+ clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
+
+ /* 5) If there was a manual failover in progress, clear the state. */
+ resetManualFailover();
+}
+
+/* This function is called if we are a slave node and our master serving
+ * a non-zero amount of hash slots is in FAIL state.
+ *
+ * The goal of this function is:
+ * 1) To check if we are able to perform a failover, is our data updated?
+ * 2) Try to get elected by masters.
+ * 3) Perform the failover informing all the other nodes.
+ */
+void clusterHandleSlaveFailover(void) {
+ mstime_t data_age;
+ mstime_t auth_age = mstime() - server.cluster->failover_auth_time;
+ int needed_quorum = (server.cluster->size / 2) + 1;
+ int manual_failover = server.cluster->mf_end != 0 &&
+ server.cluster->mf_can_start;
+ mstime_t auth_timeout, auth_retry_time;
+
+ server.cluster->todo_before_sleep &= ~CLUSTER_TODO_HANDLE_FAILOVER;
+
+ /* Compute the failover timeout (the max time we have to send votes
+ * and wait for replies), and the failover retry time (the time to wait
+ * before trying to get voted again).
+ *
+ * Timeout is MAX(NODE_TIMEOUT*2,2000) milliseconds.
+ * Retry is two times the Timeout.
+ */
+ auth_timeout = server.cluster_node_timeout*2;
+ if (auth_timeout < 2000) auth_timeout = 2000;
+ auth_retry_time = auth_timeout*2;
+
+ /* Pre conditions to run the function, that must be met both in case
+ * of an automatic or manual failover:
+ * 1) We are a slave.
+ * 2) Our master is flagged as FAIL, or this is a manual failover.
+ * 3) We don't have the no failover configuration set, and this is
+ * not a manual failover.
+ * 4) It is serving slots. */
+ if (nodeIsMaster(myself) ||
+ myself->slaveof == NULL ||
+ (!nodeFailed(myself->slaveof) && !manual_failover) ||
+ (server.cluster_slave_no_failover && !manual_failover) ||
+ myself->slaveof->numslots == 0)
+ {
+ /* There are no reasons to failover, so we set the reason why we
+ * are returning without failing over to NONE. */
+ server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
+ return;
+ }
+
+ /* Set data_age to the number of milliseconds we are disconnected from
+ * the master. */
+ if (server.repl_state == REPL_STATE_CONNECTED) {
+ data_age = (mstime_t)(server.unixtime - server.master->lastinteraction)
+ * 1000;
+ } else {
+ data_age = (mstime_t)(server.unixtime - server.repl_down_since) * 1000;
+ }
+
+ /* Remove the node timeout from the data age as it is fine that we are
+ * disconnected from our master at least for the time it was down to be
+ * flagged as FAIL, that's the baseline. */
+ if (data_age > server.cluster_node_timeout)
+ data_age -= server.cluster_node_timeout;
+
+ /* Check if our data is recent enough according to the slave validity
+ * factor configured by the user.
+ *
+ * Check bypassed for manual failovers. */
+ if (server.cluster_slave_validity_factor &&
+ data_age >
+ (((mstime_t)server.repl_ping_slave_period * 1000) +
+ (server.cluster_node_timeout * server.cluster_slave_validity_factor)))
+ {
+ if (!manual_failover) {
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_DATA_AGE);
+ return;
+ }
+ }
+
+ /* If the previous failover attempt timeout and the retry time has
+ * elapsed, we can setup a new one. */
+ if (auth_age > auth_retry_time) {
+ server.cluster->failover_auth_time = mstime() +
+ 500 + /* Fixed delay of 500 milliseconds, let FAIL msg propagate. */
+ random() % 500; /* Random delay between 0 and 500 milliseconds. */
+ server.cluster->failover_auth_count = 0;
+ server.cluster->failover_auth_sent = 0;
+ server.cluster->failover_auth_rank = clusterGetSlaveRank();
+ /* We add another delay that is proportional to the slave rank.
+ * Specifically 1 second * rank. This way slaves that have a probably
+ * less updated replication offset, are penalized. */
+ server.cluster->failover_auth_time +=
+ server.cluster->failover_auth_rank * 1000;
+ /* However if this is a manual failover, no delay is needed. */
+ if (server.cluster->mf_end) {
+ server.cluster->failover_auth_time = mstime();
+ server.cluster->failover_auth_rank = 0;
+ clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
+ }
+ serverLog(LL_NOTICE,
+ "Start of election delayed for %lld milliseconds "
+ "(rank #%d, offset %lld).",
+ server.cluster->failover_auth_time - mstime(),
+ server.cluster->failover_auth_rank,
+ replicationGetSlaveOffset());
+ /* Now that we have a scheduled election, broadcast our offset
+ * to all the other slaves so that they'll updated their offsets
+ * if our offset is better. */
+ clusterBroadcastPong(CLUSTER_BROADCAST_LOCAL_SLAVES);
+ return;
+ }
+
+ /* It is possible that we received more updated offsets from other
+ * slaves for the same master since we computed our election delay.
+ * Update the delay if our rank changed.
+ *
+ * Not performed if this is a manual failover. */
+ if (server.cluster->failover_auth_sent == 0 &&
+ server.cluster->mf_end == 0)
+ {
+ int newrank = clusterGetSlaveRank();
+ if (newrank > server.cluster->failover_auth_rank) {
+ long long added_delay =
+ (newrank - server.cluster->failover_auth_rank) * 1000;
+ server.cluster->failover_auth_time += added_delay;
+ server.cluster->failover_auth_rank = newrank;
+ serverLog(LL_NOTICE,
+ "Replica rank updated to #%d, added %lld milliseconds of delay.",
+ newrank, added_delay);
+ }
+ }
+
+ /* Return ASAP if we can't still start the election. */
+ if (mstime() < server.cluster->failover_auth_time) {
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_DELAY);
+ return;
+ }
+
+ /* Return ASAP if the election is too old to be valid. */
+ if (auth_age > auth_timeout) {
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_EXPIRED);
+ return;
+ }
+
+ /* Ask for votes if needed. */
+ if (server.cluster->failover_auth_sent == 0) {
+ server.cluster->currentEpoch++;
+ server.cluster->failover_auth_epoch = server.cluster->currentEpoch;
+ serverLog(LL_NOTICE,"Starting a failover election for epoch %llu.",
+ (unsigned long long) server.cluster->currentEpoch);
+ clusterRequestFailoverAuth();
+ server.cluster->failover_auth_sent = 1;
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE|
+ CLUSTER_TODO_FSYNC_CONFIG);
+ return; /* Wait for replies. */
+ }
+
+ /* Check if we reached the quorum. */
+ if (server.cluster->failover_auth_count >= needed_quorum) {
+ /* We have the quorum, we can finally failover the master. */
+
+ serverLog(LL_NOTICE,
+ "Failover election won: I'm the new master.");
+
+ /* Update my configEpoch to the epoch of the election. */
+ if (myself->configEpoch < server.cluster->failover_auth_epoch) {
+ myself->configEpoch = server.cluster->failover_auth_epoch;
+ serverLog(LL_NOTICE,
+ "configEpoch set to %llu after successful failover",
+ (unsigned long long) myself->configEpoch);
+ }
+
+ /* Take responsibility for the cluster slots. */
+ clusterFailoverReplaceYourMaster();
+ } else {
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_VOTES);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER slave migration
+ *
+ * Slave migration is the process that allows a slave of a master that is
+ * already covered by at least another slave, to "migrate" to a master that
+ * is orphaned, that is, left with no working slaves.
+ * ------------------------------------------------------------------------- */
+
+/* This function is responsible to decide if this replica should be migrated
+ * to a different (orphaned) master. It is called by the clusterCron() function
+ * only if:
+ *
+ * 1) We are a slave node.
+ * 2) It was detected that there is at least one orphaned master in
+ * the cluster.
+ * 3) We are a slave of one of the masters with the greatest number of
+ * slaves.
+ *
+ * This checks are performed by the caller since it requires to iterate
+ * the nodes anyway, so we spend time into clusterHandleSlaveMigration()
+ * if definitely needed.
+ *
+ * The function is called with a pre-computed max_slaves, that is the max
+ * number of working (not in FAIL state) slaves for a single master.
+ *
+ * Additional conditions for migration are examined inside the function.
+ */
+void clusterHandleSlaveMigration(int max_slaves) {
+ int j, okslaves = 0;
+ clusterNode *mymaster = myself->slaveof, *target = NULL, *candidate = NULL;
+ dictIterator *di;
+ dictEntry *de;
+
+ /* Step 1: Don't migrate if the cluster state is not ok. */
+ if (server.cluster->state != CLUSTER_OK) return;
+
+ /* Step 2: Don't migrate if my master will not be left with at least
+ * 'migration-barrier' slaves after my migration. */
+ if (mymaster == NULL) return;
+ for (j = 0; j < mymaster->numslaves; j++)
+ if (!nodeFailed(mymaster->slaves[j]) &&
+ !nodeTimedOut(mymaster->slaves[j])) okslaves++;
+ if (okslaves <= server.cluster_migration_barrier) return;
+
+ /* Step 3: Identify a candidate for migration, and check if among the
+ * masters with the greatest number of ok slaves, I'm the one with the
+ * smallest node ID (the "candidate slave").
+ *
+ * Note: this means that eventually a replica migration will occur
+ * since slaves that are reachable again always have their FAIL flag
+ * cleared, so eventually there must be a candidate.
+ * There is a possible race condition causing multiple
+ * slaves to migrate at the same time, but this is unlikely to
+ * happen and relatively harmless when it does. */
+ candidate = myself;
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+ int okslaves = 0, is_orphaned = 1;
+
+ /* We want to migrate only if this master is working, orphaned, and
+ * used to have slaves or if failed over a master that had slaves
+ * (MIGRATE_TO flag). This way we only migrate to instances that were
+ * supposed to have replicas. */
+ if (nodeIsSlave(node) || nodeFailed(node)) is_orphaned = 0;
+ if (!(node->flags & CLUSTER_NODE_MIGRATE_TO)) is_orphaned = 0;
+
+ /* Check number of working slaves. */
+ if (nodeIsMaster(node)) okslaves = clusterCountNonFailingSlaves(node);
+ if (okslaves > 0) is_orphaned = 0;
+
+ if (is_orphaned) {
+ if (!target && node->numslots > 0) target = node;
+
+ /* Track the starting time of the orphaned condition for this
+ * master. */
+ if (!node->orphaned_time) node->orphaned_time = mstime();
+ } else {
+ node->orphaned_time = 0;
+ }
+
+ /* Check if I'm the slave candidate for the migration: attached
+ * to a master with the maximum number of slaves and with the smallest
+ * node ID. */
+ if (okslaves == max_slaves) {
+ for (j = 0; j < node->numslaves; j++) {
+ if (memcmp(node->slaves[j]->name,
+ candidate->name,
+ CLUSTER_NAMELEN) < 0)
+ {
+ candidate = node->slaves[j];
+ }
+ }
+ }
+ }
+ dictReleaseIterator(di);
+
+ /* Step 4: perform the migration if there is a target, and if I'm the
+ * candidate, but only if the master is continuously orphaned for a
+ * couple of seconds, so that during failovers, we give some time to
+ * the natural slaves of this instance to advertise their switch from
+ * the old master to the new one. */
+ if (target && candidate == myself &&
+ (mstime()-target->orphaned_time) > CLUSTER_SLAVE_MIGRATION_DELAY &&
+ !(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
+ {
+ serverLog(LL_NOTICE,"Migrating to orphaned master %.40s",
+ target->name);
+ clusterSetMaster(target);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER manual failover
+ *
+ * This are the important steps performed by slaves during a manual failover:
+ * 1) User send CLUSTER FAILOVER command. The failover state is initialized
+ * setting mf_end to the millisecond unix time at which we'll abort the
+ * attempt.
+ * 2) Slave sends a MFSTART message to the master requesting to pause clients
+ * for two times the manual failover timeout CLUSTER_MF_TIMEOUT.
+ * When master is paused for manual failover, it also starts to flag
+ * packets with CLUSTERMSG_FLAG0_PAUSED.
+ * 3) Slave waits for master to send its replication offset flagged as PAUSED.
+ * 4) If slave received the offset from the master, and its offset matches,
+ * mf_can_start is set to 1, and clusterHandleSlaveFailover() will perform
+ * the failover as usually, with the difference that the vote request
+ * will be modified to force masters to vote for a slave that has a
+ * working master.
+ *
+ * From the point of view of the master things are simpler: when a
+ * PAUSE_CLIENTS packet is received the master sets mf_end as well and
+ * the sender in mf_slave. During the time limit for the manual failover
+ * the master will just send PINGs more often to this slave, flagged with
+ * the PAUSED flag, so that the slave will set mf_master_offset when receiving
+ * a packet from the master with this flag set.
+ *
+ * The goal of the manual failover is to perform a fast failover without
+ * data loss due to the asynchronous master-slave replication.
+ * -------------------------------------------------------------------------- */
+
+/* Reset the manual failover state. This works for both masters and slaves
+ * as all the state about manual failover is cleared.
+ *
+ * The function can be used both to initialize the manual failover state at
+ * startup or to abort a manual failover in progress. */
+void resetManualFailover(void) {
+ if (server.cluster->mf_slave) {
+ /* We were a master failing over, so we paused clients and related actions.
+ * Regardless of the outcome we unpause now to allow traffic again. */
+ unpauseActions(PAUSE_DURING_FAILOVER);
+ }
+ server.cluster->mf_end = 0; /* No manual failover in progress. */
+ server.cluster->mf_can_start = 0;
+ server.cluster->mf_slave = NULL;
+ server.cluster->mf_master_offset = -1;
+}
+
+/* If a manual failover timed out, abort it. */
+void manualFailoverCheckTimeout(void) {
+ if (server.cluster->mf_end && server.cluster->mf_end < mstime()) {
+ serverLog(LL_WARNING,"Manual failover timed out.");
+ resetManualFailover();
+ }
+}
+
+/* This function is called from the cluster cron function in order to go
+ * forward with a manual failover state machine. */
+void clusterHandleManualFailover(void) {
+ /* Return ASAP if no manual failover is in progress. */
+ if (server.cluster->mf_end == 0) return;
+
+ /* If mf_can_start is non-zero, the failover was already triggered so the
+ * next steps are performed by clusterHandleSlaveFailover(). */
+ if (server.cluster->mf_can_start) return;
+
+ if (server.cluster->mf_master_offset == -1) return; /* Wait for offset... */
+
+ if (server.cluster->mf_master_offset == replicationGetSlaveOffset()) {
+ /* Our replication offset matches the master replication offset
+ * announced after clients were paused. We can start the failover. */
+ server.cluster->mf_can_start = 1;
+ serverLog(LL_NOTICE,
+ "All master replication stream processed, "
+ "manual failover can start.");
+ clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
+ return;
+ }
+ clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_MANUALFAILOVER);
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER cron job
+ * -------------------------------------------------------------------------- */
+
+/* Check if the node is disconnected and re-establish the connection.
+ * Also update a few stats while we are here, that can be used to make
+ * better decisions in other part of the code. */
+static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t handshake_timeout, mstime_t now) {
+ /* Not interested in reconnecting the link with myself or nodes
+ * for which we have no address. */
+ if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR)) return 1;
+
+ if (node->flags & CLUSTER_NODE_PFAIL)
+ server.cluster->stats_pfail_nodes++;
+
+ /* A Node in HANDSHAKE state has a limited lifespan equal to the
+ * configured node timeout. */
+ if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) {
+ clusterDelNode(node);
+ return 1;
+ }
+
+ if (node->link == NULL) {
+ clusterLink *link = createClusterLink(node);
+ link->conn = connCreate(connTypeOfCluster());
+ connSetPrivateData(link->conn, link);
+ if (connConnect(link->conn, node->ip, node->cport, server.bind_source_addr,
+ clusterLinkConnectHandler) == C_ERR) {
+ /* We got a synchronous error from connect before
+ * clusterSendPing() had a chance to be called.
+ * If node->ping_sent is zero, failure detection can't work,
+ * so we claim we actually sent a ping now (that will
+ * be really sent as soon as the link is obtained). */
+ if (node->ping_sent == 0) node->ping_sent = mstime();
+ serverLog(LL_DEBUG, "Unable to connect to "
+ "Cluster Node [%s]:%d -> %s", node->ip,
+ node->cport, server.neterr);
+
+ freeClusterLink(link);
+ return 0;
+ }
+ }
+ return 0;
+}
+
+static void freeClusterLinkOnBufferLimitReached(clusterLink *link) {
+ if (link == NULL || server.cluster_link_msg_queue_limit_bytes == 0) {
+ return;
+ }
+
+ unsigned long long mem_link = link->send_msg_queue_mem;
+ if (mem_link > server.cluster_link_msg_queue_limit_bytes) {
+ serverLog(LL_WARNING, "Freeing cluster link(%s node %.40s, used memory: %llu) due to "
+ "exceeding send buffer memory limit.", link->inbound ? "from" : "to",
+ link->node ? link->node->name : "", mem_link);
+ freeClusterLink(link);
+ server.cluster->stat_cluster_links_buffer_limit_exceeded++;
+ }
+}
+
+/* Free outbound link to a node if its send buffer size exceeded limit. */
+static void clusterNodeCronFreeLinkOnBufferLimitReached(clusterNode *node) {
+ freeClusterLinkOnBufferLimitReached(node->link);
+ freeClusterLinkOnBufferLimitReached(node->inbound_link);
+}
+
+/* This is executed 10 times every second */
+void clusterCron(void) {
+ dictIterator *di;
+ dictEntry *de;
+ int update_state = 0;
+ int orphaned_masters; /* How many masters there are without ok slaves. */
+ int max_slaves; /* Max number of ok slaves for a single master. */
+ int this_slaves; /* Number of ok slaves for our master (if we are slave). */
+ mstime_t min_pong = 0, now = mstime();
+ clusterNode *min_pong_node = NULL;
+ static unsigned long long iteration = 0;
+ mstime_t handshake_timeout;
+
+ iteration++; /* Number of times this function was called so far. */
+
+ clusterUpdateMyselfHostname();
+
+ /* The handshake timeout is the time after which a handshake node that was
+ * not turned into a normal node is removed from the nodes. Usually it is
+ * just the NODE_TIMEOUT value, but when NODE_TIMEOUT is too small we use
+ * the value of 1 second. */
+ handshake_timeout = server.cluster_node_timeout;
+ if (handshake_timeout < 1000) handshake_timeout = 1000;
+
+ /* Clear so clusterNodeCronHandleReconnect can count the number of nodes in PFAIL. */
+ server.cluster->stats_pfail_nodes = 0;
+ /* Run through some of the operations we want to do on each cluster node. */
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+ /* We free the inbound or outboud link to the node if the link has an
+ * oversized message send queue and immediately try reconnecting. */
+ clusterNodeCronFreeLinkOnBufferLimitReached(node);
+ /* The protocol is that function(s) below return non-zero if the node was
+ * terminated.
+ */
+ if(clusterNodeCronHandleReconnect(node, handshake_timeout, now)) continue;
+ }
+ dictReleaseIterator(di);
+
+ /* Ping some random node 1 time every 10 iterations, so that we usually ping
+ * one random node every second. */
+ if (!(iteration % 10)) {
+ int j;
+
+ /* Check a few random nodes and ping the one with the oldest
+ * pong_received time. */
+ for (j = 0; j < 5; j++) {
+ de = dictGetRandomKey(server.cluster->nodes);
+ clusterNode *this = dictGetVal(de);
+
+ /* Don't ping nodes disconnected or with a ping currently active. */
+ if (this->link == NULL || this->ping_sent != 0) continue;
+ if (this->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
+ continue;
+ if (min_pong_node == NULL || min_pong > this->pong_received) {
+ min_pong_node = this;
+ min_pong = this->pong_received;
+ }
+ }
+ if (min_pong_node) {
+ serverLog(LL_DEBUG,"Pinging node %.40s", min_pong_node->name);
+ clusterSendPing(min_pong_node->link, CLUSTERMSG_TYPE_PING);
+ }
+ }
+
+ /* Iterate nodes to check if we need to flag something as failing.
+ * This loop is also responsible to:
+ * 1) Check if there are orphaned masters (masters without non failing
+ * slaves).
+ * 2) Count the max number of non failing slaves for a single master.
+ * 3) Count the number of slaves for our master, if we are a slave. */
+ orphaned_masters = 0;
+ max_slaves = 0;
+ this_slaves = 0;
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+ now = mstime(); /* Use an updated time at every iteration. */
+
+ if (node->flags &
+ (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE))
+ continue;
+
+ /* Orphaned master check, useful only if the current instance
+ * is a slave that may migrate to another master. */
+ if (nodeIsSlave(myself) && nodeIsMaster(node) && !nodeFailed(node)) {
+ int okslaves = clusterCountNonFailingSlaves(node);
+
+ /* A master is orphaned if it is serving a non-zero number of
+ * slots, have no working slaves, but used to have at least one
+ * slave, or failed over a master that used to have slaves. */
+ if (okslaves == 0 && node->numslots > 0 &&
+ node->flags & CLUSTER_NODE_MIGRATE_TO)
+ {
+ orphaned_masters++;
+ }
+ if (okslaves > max_slaves) max_slaves = okslaves;
+ if (myself->slaveof == node)
+ this_slaves = okslaves;
+ }
+
+ /* If we are not receiving any data for more than half the cluster
+ * timeout, reconnect the link: maybe there is a connection
+ * issue even if the node is alive. */
+ mstime_t ping_delay = now - node->ping_sent;
+ mstime_t data_delay = now - node->data_received;
+ if (node->link && /* is connected */
+ now - node->link->ctime >
+ server.cluster_node_timeout && /* was not already reconnected */
+ node->ping_sent && /* we already sent a ping */
+ /* and we are waiting for the pong more than timeout/2 */
+ ping_delay > server.cluster_node_timeout/2 &&
+ /* and in such interval we are not seeing any traffic at all. */
+ data_delay > server.cluster_node_timeout/2)
+ {
+ /* Disconnect the link, it will be reconnected automatically. */
+ freeClusterLink(node->link);
+ }
+
+ /* If we have currently no active ping in this instance, and the
+ * received PONG is older than half the cluster timeout, send
+ * a new ping now, to ensure all the nodes are pinged without
+ * a too big delay. */
+ mstime_t ping_interval = server.cluster_ping_interval ?
+ server.cluster_ping_interval : server.cluster_node_timeout/2;
+ if (node->link &&
+ node->ping_sent == 0 &&
+ (now - node->pong_received) > ping_interval)
+ {
+ clusterSendPing(node->link, CLUSTERMSG_TYPE_PING);
+ continue;
+ }
+
+ /* If we are a master and one of the slaves requested a manual
+ * failover, ping it continuously. */
+ if (server.cluster->mf_end &&
+ nodeIsMaster(myself) &&
+ server.cluster->mf_slave == node &&
+ node->link)
+ {
+ clusterSendPing(node->link, CLUSTERMSG_TYPE_PING);
+ continue;
+ }
+
+ /* Check only if we have an active ping for this instance. */
+ if (node->ping_sent == 0) continue;
+
+ /* Check if this node looks unreachable.
+ * Note that if we already received the PONG, then node->ping_sent
+ * is zero, so can't reach this code at all, so we don't risk of
+ * checking for a PONG delay if we didn't sent the PING.
+ *
+ * We also consider every incoming data as proof of liveness, since
+ * our cluster bus link is also used for data: under heavy data
+ * load pong delays are possible. */
+ mstime_t node_delay = (ping_delay < data_delay) ? ping_delay :
+ data_delay;
+
+ if (node_delay > server.cluster_node_timeout) {
+ /* Timeout reached. Set the node as possibly failing if it is
+ * not already in this state. */
+ if (!(node->flags & (CLUSTER_NODE_PFAIL|CLUSTER_NODE_FAIL))) {
+ serverLog(LL_DEBUG,"*** NODE %.40s possibly failing",
+ node->name);
+ node->flags |= CLUSTER_NODE_PFAIL;
+ update_state = 1;
+ }
+ }
+ }
+ dictReleaseIterator(di);
+
+ /* If we are a slave node but the replication is still turned off,
+ * enable it if we know the address of our master and it appears to
+ * be up. */
+ if (nodeIsSlave(myself) &&
+ server.masterhost == NULL &&
+ myself->slaveof &&
+ nodeHasAddr(myself->slaveof))
+ {
+ replicationSetMaster(myself->slaveof->ip, getNodeDefaultReplicationPort(myself->slaveof));
+ }
+
+ /* Abort a manual failover if the timeout is reached. */
+ manualFailoverCheckTimeout();
+
+ if (nodeIsSlave(myself)) {
+ clusterHandleManualFailover();
+ if (!(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
+ clusterHandleSlaveFailover();
+ /* If there are orphaned slaves, and we are a slave among the masters
+ * with the max number of non-failing slaves, consider migrating to
+ * the orphaned masters. Note that it does not make sense to try
+ * a migration if there is no master with at least *two* working
+ * slaves. */
+ if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves &&
+ server.cluster_allow_replica_migration)
+ clusterHandleSlaveMigration(max_slaves);
+ }
+
+ if (update_state || server.cluster->state == CLUSTER_FAIL)
+ clusterUpdateState();
+}
+
+/* This function is called before the event handler returns to sleep for
+ * events. It is useful to perform operations that must be done ASAP in
+ * reaction to events fired but that are not safe to perform inside event
+ * handlers, or to perform potentially expansive tasks that we need to do
+ * a single time before replying to clients. */
+void clusterBeforeSleep(void) {
+ int flags = server.cluster->todo_before_sleep;
+
+ /* Reset our flags (not strictly needed since every single function
+ * called for flags set should be able to clear its flag). */
+ server.cluster->todo_before_sleep = 0;
+
+ if (flags & CLUSTER_TODO_HANDLE_MANUALFAILOVER) {
+ /* Handle manual failover as soon as possible so that won't have a 100ms
+ * as it was handled only in clusterCron */
+ if(nodeIsSlave(myself)) {
+ clusterHandleManualFailover();
+ if (!(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
+ clusterHandleSlaveFailover();
+ }
+ } else if (flags & CLUSTER_TODO_HANDLE_FAILOVER) {
+ /* Handle failover, this is needed when it is likely that there is already
+ * the quorum from masters in order to react fast. */
+ clusterHandleSlaveFailover();
+ }
+
+ /* Update the cluster state. */
+ if (flags & CLUSTER_TODO_UPDATE_STATE)
+ clusterUpdateState();
+
+ /* Save the config, possibly using fsync. */
+ if (flags & CLUSTER_TODO_SAVE_CONFIG) {
+ int fsync = flags & CLUSTER_TODO_FSYNC_CONFIG;
+ clusterSaveConfigOrDie(fsync);
+ }
+}
+
+void clusterDoBeforeSleep(int flags) {
+ server.cluster->todo_before_sleep |= flags;
+}
+
+/* -----------------------------------------------------------------------------
+ * Slots management
+ * -------------------------------------------------------------------------- */
+
+/* Test bit 'pos' in a generic bitmap. Return 1 if the bit is set,
+ * otherwise 0. */
+int bitmapTestBit(unsigned char *bitmap, int pos) {
+ off_t byte = pos/8;
+ int bit = pos&7;
+ return (bitmap[byte] & (1<<bit)) != 0;
+}
+
+/* Set the bit at position 'pos' in a bitmap. */
+void bitmapSetBit(unsigned char *bitmap, int pos) {
+ off_t byte = pos/8;
+ int bit = pos&7;
+ bitmap[byte] |= 1<<bit;
+}
+
+/* Clear the bit at position 'pos' in a bitmap. */
+void bitmapClearBit(unsigned char *bitmap, int pos) {
+ off_t byte = pos/8;
+ int bit = pos&7;
+ bitmap[byte] &= ~(1<<bit);
+}
+
+/* Return non-zero if there is at least one master with slaves in the cluster.
+ * Otherwise zero is returned. Used by clusterNodeSetSlotBit() to set the
+ * MIGRATE_TO flag the when a master gets the first slot. */
+int clusterMastersHaveSlaves(void) {
+ dictIterator *di = dictGetSafeIterator(server.cluster->nodes);
+ dictEntry *de;
+ int slaves = 0;
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+
+ if (nodeIsSlave(node)) continue;
+ slaves += node->numslaves;
+ }
+ dictReleaseIterator(di);
+ return slaves != 0;
+}
+
+/* Set the slot bit and return the old value. */
+int clusterNodeSetSlotBit(clusterNode *n, int slot) {
+ int old = bitmapTestBit(n->slots,slot);
+ if (!old) {
+ bitmapSetBit(n->slots,slot);
+ n->numslots++;
+ /* When a master gets its first slot, even if it has no slaves,
+ * it gets flagged with MIGRATE_TO, that is, the master is a valid
+ * target for replicas migration, if and only if at least one of
+ * the other masters has slaves right now.
+ *
+ * Normally masters are valid targets of replica migration if:
+ * 1. The used to have slaves (but no longer have).
+ * 2. They are slaves failing over a master that used to have slaves.
+ *
+ * However new masters with slots assigned are considered valid
+ * migration targets if the rest of the cluster is not a slave-less.
+ *
+ * See https://github.com/redis/redis/issues/3043 for more info. */
+ if (n->numslots == 1 && clusterMastersHaveSlaves())
+ n->flags |= CLUSTER_NODE_MIGRATE_TO;
+ }
+ return old;
+}
+
+/* Clear the slot bit and return the old value. */
+int clusterNodeClearSlotBit(clusterNode *n, int slot) {
+ int old = bitmapTestBit(n->slots,slot);
+ if (old) {
+ bitmapClearBit(n->slots,slot);
+ n->numslots--;
+ }
+ return old;
+}
+
+/* Return the slot bit from the cluster node structure. */
+int clusterNodeGetSlotBit(clusterNode *n, int slot) {
+ return bitmapTestBit(n->slots,slot);
+}
+
+/* Add the specified slot to the list of slots that node 'n' will
+ * serve. Return C_OK if the operation ended with success.
+ * If the slot is already assigned to another instance this is considered
+ * an error and C_ERR is returned. */
+int clusterAddSlot(clusterNode *n, int slot) {
+ if (server.cluster->slots[slot]) return C_ERR;
+ clusterNodeSetSlotBit(n,slot);
+ server.cluster->slots[slot] = n;
+ return C_OK;
+}
+
+/* Delete the specified slot marking it as unassigned.
+ * Returns C_OK if the slot was assigned, otherwise if the slot was
+ * already unassigned C_ERR is returned. */
+int clusterDelSlot(int slot) {
+ clusterNode *n = server.cluster->slots[slot];
+
+ if (!n) return C_ERR;
+
+ /* Cleanup the channels in master/replica as part of slot deletion. */
+ removeChannelsInSlot(slot);
+ /* Clear the slot bit. */
+ serverAssert(clusterNodeClearSlotBit(n,slot) == 1);
+ server.cluster->slots[slot] = NULL;
+ /* Make owner_not_claiming_slot flag consistent with slot ownership information. */
+ bitmapClearBit(server.cluster->owner_not_claiming_slot, slot);
+ return C_OK;
+}
+
+/* Delete all the slots associated with the specified node.
+ * The number of deleted slots is returned. */
+int clusterDelNodeSlots(clusterNode *node) {
+ int deleted = 0, j;
+
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (clusterNodeGetSlotBit(node,j)) {
+ clusterDelSlot(j);
+ deleted++;
+ }
+ }
+ return deleted;
+}
+
+/* Clear the migrating / importing state for all the slots.
+ * This is useful at initialization and when turning a master into slave. */
+void clusterCloseAllSlots(void) {
+ memset(server.cluster->migrating_slots_to,0,
+ sizeof(server.cluster->migrating_slots_to));
+ memset(server.cluster->importing_slots_from,0,
+ sizeof(server.cluster->importing_slots_from));
+}
+
+/* -----------------------------------------------------------------------------
+ * Cluster state evaluation function
+ * -------------------------------------------------------------------------- */
+
+/* The following are defines that are only used in the evaluation function
+ * and are based on heuristics. Actually the main point about the rejoin and
+ * writable delay is that they should be a few orders of magnitude larger
+ * than the network latency. */
+#define CLUSTER_MAX_REJOIN_DELAY 5000
+#define CLUSTER_MIN_REJOIN_DELAY 500
+#define CLUSTER_WRITABLE_DELAY 2000
+
+void clusterUpdateState(void) {
+ int j, new_state;
+ int reachable_masters = 0;
+ static mstime_t among_minority_time;
+ static mstime_t first_call_time = 0;
+
+ server.cluster->todo_before_sleep &= ~CLUSTER_TODO_UPDATE_STATE;
+
+ /* If this is a master node, wait some time before turning the state
+ * into OK, since it is not a good idea to rejoin the cluster as a writable
+ * master, after a reboot, without giving the cluster a chance to
+ * reconfigure this node. Note that the delay is calculated starting from
+ * the first call to this function and not since the server start, in order
+ * to not count the DB loading time. */
+ if (first_call_time == 0) first_call_time = mstime();
+ if (nodeIsMaster(myself) &&
+ server.cluster->state == CLUSTER_FAIL &&
+ mstime() - first_call_time < CLUSTER_WRITABLE_DELAY) return;
+
+ /* Start assuming the state is OK. We'll turn it into FAIL if there
+ * are the right conditions. */
+ new_state = CLUSTER_OK;
+
+ /* Check if all the slots are covered. */
+ if (server.cluster_require_full_coverage) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (server.cluster->slots[j] == NULL ||
+ server.cluster->slots[j]->flags & (CLUSTER_NODE_FAIL))
+ {
+ new_state = CLUSTER_FAIL;
+ break;
+ }
+ }
+ }
+
+ /* Compute the cluster size, that is the number of master nodes
+ * serving at least a single slot.
+ *
+ * At the same time count the number of reachable masters having
+ * at least one slot. */
+ {
+ dictIterator *di;
+ dictEntry *de;
+
+ server.cluster->size = 0;
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+
+ if (nodeIsMaster(node) && node->numslots) {
+ server.cluster->size++;
+ if ((node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) == 0)
+ reachable_masters++;
+ }
+ }
+ dictReleaseIterator(di);
+ }
+
+ /* If we are in a minority partition, change the cluster state
+ * to FAIL. */
+ {
+ int needed_quorum = (server.cluster->size / 2) + 1;
+
+ if (reachable_masters < needed_quorum) {
+ new_state = CLUSTER_FAIL;
+ among_minority_time = mstime();
+ }
+ }
+
+ /* Log a state change */
+ if (new_state != server.cluster->state) {
+ mstime_t rejoin_delay = server.cluster_node_timeout;
+
+ /* If the instance is a master and was partitioned away with the
+ * minority, don't let it accept queries for some time after the
+ * partition heals, to make sure there is enough time to receive
+ * a configuration update. */
+ if (rejoin_delay > CLUSTER_MAX_REJOIN_DELAY)
+ rejoin_delay = CLUSTER_MAX_REJOIN_DELAY;
+ if (rejoin_delay < CLUSTER_MIN_REJOIN_DELAY)
+ rejoin_delay = CLUSTER_MIN_REJOIN_DELAY;
+
+ if (new_state == CLUSTER_OK &&
+ nodeIsMaster(myself) &&
+ mstime() - among_minority_time < rejoin_delay)
+ {
+ return;
+ }
+
+ /* Change the state and log the event. */
+ serverLog(new_state == CLUSTER_OK ? LL_NOTICE : LL_WARNING,
+ "Cluster state changed: %s",
+ new_state == CLUSTER_OK ? "ok" : "fail");
+ server.cluster->state = new_state;
+ }
+}
+
+/* This function is called after the node startup in order to verify that data
+ * loaded from disk is in agreement with the cluster configuration:
+ *
+ * 1) If we find keys about hash slots we have no responsibility for, the
+ * following happens:
+ * A) If no other node is in charge according to the current cluster
+ * configuration, we add these slots to our node.
+ * B) If according to our config other nodes are already in charge for
+ * this slots, we set the slots as IMPORTING from our point of view
+ * in order to justify we have those slots, and in order to make
+ * redis-cli aware of the issue, so that it can try to fix it.
+ * 2) If we find data in a DB different than DB0 we return C_ERR to
+ * signal the caller it should quit the server with an error message
+ * or take other actions.
+ *
+ * The function always returns C_OK even if it will try to correct
+ * the error described in "1". However if data is found in DB different
+ * from DB0, C_ERR is returned.
+ *
+ * The function also uses the logging facility in order to warn the user
+ * about desynchronizations between the data we have in memory and the
+ * cluster configuration. */
+int verifyClusterConfigWithData(void) {
+ int j;
+ int update_config = 0;
+
+ /* Return ASAP if a module disabled cluster redirections. In that case
+ * every master can store keys about every possible hash slot. */
+ if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION)
+ return C_OK;
+
+ /* If this node is a slave, don't perform the check at all as we
+ * completely depend on the replication stream. */
+ if (nodeIsSlave(myself)) return C_OK;
+
+ /* Make sure we only have keys in DB0. */
+ for (j = 1; j < server.dbnum; j++) {
+ if (dictSize(server.db[j].dict)) return C_ERR;
+ }
+
+ /* Check that all the slots we see populated memory have a corresponding
+ * entry in the cluster table. Otherwise fix the table. */
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (!countKeysInSlot(j)) continue; /* No keys in this slot. */
+ /* Check if we are assigned to this slot or if we are importing it.
+ * In both cases check the next slot as the configuration makes
+ * sense. */
+ if (server.cluster->slots[j] == myself ||
+ server.cluster->importing_slots_from[j] != NULL) continue;
+
+ /* If we are here data and cluster config don't agree, and we have
+ * slot 'j' populated even if we are not importing it, nor we are
+ * assigned to this slot. Fix this condition. */
+
+ update_config++;
+ /* Case A: slot is unassigned. Take responsibility for it. */
+ if (server.cluster->slots[j] == NULL) {
+ serverLog(LL_NOTICE, "I have keys for unassigned slot %d. "
+ "Taking responsibility for it.",j);
+ clusterAddSlot(myself,j);
+ } else {
+ serverLog(LL_NOTICE, "I have keys for slot %d, but the slot is "
+ "assigned to another node. "
+ "Setting it to importing state.",j);
+ server.cluster->importing_slots_from[j] = server.cluster->slots[j];
+ }
+ }
+ if (update_config) clusterSaveConfigOrDie(1);
+ return C_OK;
+}
+
+/* -----------------------------------------------------------------------------
+ * SLAVE nodes handling
+ * -------------------------------------------------------------------------- */
+
+/* Set the specified node 'n' as master for this node.
+ * If this node is currently a master, it is turned into a slave. */
+void clusterSetMaster(clusterNode *n) {
+ serverAssert(n != myself);
+ serverAssert(myself->numslots == 0);
+
+ if (nodeIsMaster(myself)) {
+ myself->flags &= ~(CLUSTER_NODE_MASTER|CLUSTER_NODE_MIGRATE_TO);
+ myself->flags |= CLUSTER_NODE_SLAVE;
+ clusterCloseAllSlots();
+ } else {
+ if (myself->slaveof)
+ clusterNodeRemoveSlave(myself->slaveof,myself);
+ }
+ myself->slaveof = n;
+ updateShardId(myself, n->shard_id);
+ clusterNodeAddSlave(n,myself);
+ replicationSetMaster(n->ip, getNodeDefaultReplicationPort(n));
+ resetManualFailover();
+}
+
+/* -----------------------------------------------------------------------------
+ * Nodes to string representation functions.
+ * -------------------------------------------------------------------------- */
+
+struct redisNodeFlags {
+ uint16_t flag;
+ char *name;
+};
+
+static struct redisNodeFlags redisNodeFlagsTable[] = {
+ {CLUSTER_NODE_MYSELF, "myself,"},
+ {CLUSTER_NODE_MASTER, "master,"},
+ {CLUSTER_NODE_SLAVE, "slave,"},
+ {CLUSTER_NODE_PFAIL, "fail?,"},
+ {CLUSTER_NODE_FAIL, "fail,"},
+ {CLUSTER_NODE_HANDSHAKE, "handshake,"},
+ {CLUSTER_NODE_NOADDR, "noaddr,"},
+ {CLUSTER_NODE_NOFAILOVER, "nofailover,"}
+};
+
+/* Concatenate the comma separated list of node flags to the given SDS
+ * string 'ci'. */
+sds representClusterNodeFlags(sds ci, uint16_t flags) {
+ size_t orig_len = sdslen(ci);
+ int i, size = sizeof(redisNodeFlagsTable)/sizeof(struct redisNodeFlags);
+ for (i = 0; i < size; i++) {
+ struct redisNodeFlags *nodeflag = redisNodeFlagsTable + i;
+ if (flags & nodeflag->flag) ci = sdscat(ci, nodeflag->name);
+ }
+ /* If no flag was added, add the "noflags" special flag. */
+ if (sdslen(ci) == orig_len) ci = sdscat(ci,"noflags,");
+ sdsIncrLen(ci,-1); /* Remove trailing comma. */
+ return ci;
+}
+
+/* Concatenate the slot ownership information to the given SDS string 'ci'.
+ * If the slot ownership is in a contiguous block, it's represented as start-end pair,
+ * else each slot is added separately. */
+sds representSlotInfo(sds ci, uint16_t *slot_info_pairs, int slot_info_pairs_count) {
+ for (int i = 0; i< slot_info_pairs_count; i+=2) {
+ unsigned long start = slot_info_pairs[i];
+ unsigned long end = slot_info_pairs[i+1];
+ if (start == end) {
+ ci = sdscatfmt(ci, " %i", start);
+ } else {
+ ci = sdscatfmt(ci, " %i-%i", start, end);
+ }
+ }
+ return ci;
+}
+
+/* Generate a csv-alike representation of the specified cluster node.
+ * See clusterGenNodesDescription() top comment for more information.
+ *
+ * The function returns the string representation as an SDS string. */
+sds clusterGenNodeDescription(client *c, clusterNode *node, int tls_primary) {
+ int j, start;
+ sds ci;
+ int port = getNodeClientPort(node, tls_primary);
+
+ /* Node coordinates */
+ ci = sdscatlen(sdsempty(),node->name,CLUSTER_NAMELEN);
+ ci = sdscatfmt(ci," %s:%i@%i",
+ node->ip,
+ port,
+ node->cport);
+ if (sdslen(node->hostname) != 0) {
+ ci = sdscatfmt(ci,",%s", node->hostname);
+ }
+ /* Don't expose aux fields to any clients yet but do allow them
+ * to be persisted to nodes.conf */
+ if (c == NULL) {
+ if (sdslen(node->hostname) == 0) {
+ ci = sdscatfmt(ci,",", 1);
+ }
+ for (int i = af_count-1; i >=0; i--) {
+ if ((tls_primary && i == af_tls_port) || (!tls_primary && i == af_tcp_port)) {
+ continue;
+ }
+ if (auxFieldHandlers[i].isPresent(node)) {
+ ci = sdscatprintf(ci, ",%s=", auxFieldHandlers[i].field);
+ ci = auxFieldHandlers[i].getter(node, ci);
+ }
+ }
+ }
+
+ /* Flags */
+ ci = sdscatlen(ci," ",1);
+ ci = representClusterNodeFlags(ci, node->flags);
+
+ /* Slave of... or just "-" */
+ ci = sdscatlen(ci," ",1);
+ if (node->slaveof)
+ ci = sdscatlen(ci,node->slaveof->name,CLUSTER_NAMELEN);
+ else
+ ci = sdscatlen(ci,"-",1);
+
+ unsigned long long nodeEpoch = node->configEpoch;
+ if (nodeIsSlave(node) && node->slaveof) {
+ nodeEpoch = node->slaveof->configEpoch;
+ }
+ /* Latency from the POV of this node, config epoch, link status */
+ ci = sdscatfmt(ci," %I %I %U %s",
+ (long long) node->ping_sent,
+ (long long) node->pong_received,
+ nodeEpoch,
+ (node->link || node->flags & CLUSTER_NODE_MYSELF) ?
+ "connected" : "disconnected");
+
+ /* Slots served by this instance. If we already have slots info,
+ * append it directly, otherwise, generate slots only if it has. */
+ if (node->slot_info_pairs) {
+ ci = representSlotInfo(ci, node->slot_info_pairs, node->slot_info_pairs_count);
+ } else if (node->numslots > 0) {
+ start = -1;
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ int bit;
+
+ if ((bit = clusterNodeGetSlotBit(node,j)) != 0) {
+ if (start == -1) start = j;
+ }
+ if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
+ if (bit && j == CLUSTER_SLOTS-1) j++;
+
+ if (start == j-1) {
+ ci = sdscatfmt(ci," %i",start);
+ } else {
+ ci = sdscatfmt(ci," %i-%i",start,j-1);
+ }
+ start = -1;
+ }
+ }
+ }
+
+ /* Just for MYSELF node we also dump info about slots that
+ * we are migrating to other instances or importing from other
+ * instances. */
+ if (node->flags & CLUSTER_NODE_MYSELF) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (server.cluster->migrating_slots_to[j]) {
+ ci = sdscatprintf(ci," [%d->-%.40s]",j,
+ server.cluster->migrating_slots_to[j]->name);
+ } else if (server.cluster->importing_slots_from[j]) {
+ ci = sdscatprintf(ci," [%d-<-%.40s]",j,
+ server.cluster->importing_slots_from[j]->name);
+ }
+ }
+ }
+ return ci;
+}
+
+/* Generate the slot topology for all nodes and store the string representation
+ * in the slots_info struct on the node. This is used to improve the efficiency
+ * of clusterGenNodesDescription() because it removes looping of the slot space
+ * for generating the slot info for each node individually. */
+void clusterGenNodesSlotsInfo(int filter) {
+ clusterNode *n = NULL;
+ int start = -1;
+
+ for (int i = 0; i <= CLUSTER_SLOTS; i++) {
+ /* Find start node and slot id. */
+ if (n == NULL) {
+ if (i == CLUSTER_SLOTS) break;
+ n = server.cluster->slots[i];
+ start = i;
+ continue;
+ }
+
+ /* Generate slots info when occur different node with start
+ * or end of slot. */
+ if (i == CLUSTER_SLOTS || n != server.cluster->slots[i]) {
+ if (!(n->flags & filter)) {
+ if (!n->slot_info_pairs) {
+ n->slot_info_pairs = zmalloc(2 * n->numslots * sizeof(uint16_t));
+ }
+ serverAssert((n->slot_info_pairs_count + 1) < (2 * n->numslots));
+ n->slot_info_pairs[n->slot_info_pairs_count++] = start;
+ n->slot_info_pairs[n->slot_info_pairs_count++] = i-1;
+ }
+ if (i == CLUSTER_SLOTS) break;
+ n = server.cluster->slots[i];
+ start = i;
+ }
+ }
+}
+
+void clusterFreeNodesSlotsInfo(clusterNode *n) {
+ zfree(n->slot_info_pairs);
+ n->slot_info_pairs = NULL;
+ n->slot_info_pairs_count = 0;
+}
+
+/* Generate a csv-alike representation of the nodes we are aware of,
+ * including the "myself" node, and return an SDS string containing the
+ * representation (it is up to the caller to free it).
+ *
+ * All the nodes matching at least one of the node flags specified in
+ * "filter" are excluded from the output, so using zero as a filter will
+ * include all the known nodes in the representation, including nodes in
+ * the HANDSHAKE state.
+ *
+ * Setting tls_primary to 1 to put TLS port in the main <ip>:<port>
+ * field and put TCP port in aux field, instead of the opposite way.
+ *
+ * The representation obtained using this function is used for the output
+ * of the CLUSTER NODES function, and as format for the cluster
+ * configuration file (nodes.conf) for a given node. */
+sds clusterGenNodesDescription(client *c, int filter, int tls_primary) {
+ sds ci = sdsempty(), ni;
+ dictIterator *di;
+ dictEntry *de;
+
+ /* Generate all nodes slots info firstly. */
+ clusterGenNodesSlotsInfo(filter);
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+
+ if (node->flags & filter) continue;
+ ni = clusterGenNodeDescription(c, node, tls_primary);
+ ci = sdscatsds(ci,ni);
+ sdsfree(ni);
+ ci = sdscatlen(ci,"\n",1);
+
+ /* Release slots info. */
+ clusterFreeNodesSlotsInfo(node);
+ }
+ dictReleaseIterator(di);
+ return ci;
+}
+
+/* Add to the output buffer of the given client the description of the given cluster link.
+ * The description is a map with each entry being an attribute of the link. */
+void addReplyClusterLinkDescription(client *c, clusterLink *link) {
+ addReplyMapLen(c, 6);
+
+ addReplyBulkCString(c, "direction");
+ addReplyBulkCString(c, link->inbound ? "from" : "to");
+
+ /* addReplyClusterLinkDescription is only called for links that have been
+ * associated with nodes. The association is always bi-directional, so
+ * in addReplyClusterLinkDescription, link->node should never be NULL. */
+ serverAssert(link->node);
+ sds node_name = sdsnewlen(link->node->name, CLUSTER_NAMELEN);
+ addReplyBulkCString(c, "node");
+ addReplyBulkCString(c, node_name);
+ sdsfree(node_name);
+
+ addReplyBulkCString(c, "create-time");
+ addReplyLongLong(c, link->ctime);
+
+ char events[3], *p;
+ p = events;
+ if (link->conn) {
+ if (connHasReadHandler(link->conn)) *p++ = 'r';
+ if (connHasWriteHandler(link->conn)) *p++ = 'w';
+ }
+ *p = '\0';
+ addReplyBulkCString(c, "events");
+ addReplyBulkCString(c, events);
+
+ addReplyBulkCString(c, "send-buffer-allocated");
+ addReplyLongLong(c, link->send_msg_queue_mem);
+
+ addReplyBulkCString(c, "send-buffer-used");
+ addReplyLongLong(c, link->send_msg_queue_mem);
+}
+
+/* Add to the output buffer of the given client an array of cluster link descriptions,
+ * with array entry being a description of a single current cluster link. */
+void addReplyClusterLinksDescription(client *c) {
+ dictIterator *di;
+ dictEntry *de;
+ void *arraylen_ptr = NULL;
+ int num_links = 0;
+
+ arraylen_ptr = addReplyDeferredLen(c);
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+ if (node->link) {
+ num_links++;
+ addReplyClusterLinkDescription(c, node->link);
+ }
+ if (node->inbound_link) {
+ num_links++;
+ addReplyClusterLinkDescription(c, node->inbound_link);
+ }
+ }
+ dictReleaseIterator(di);
+
+ setDeferredArrayLen(c, arraylen_ptr, num_links);
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER command
+ * -------------------------------------------------------------------------- */
+
+const char *getPreferredEndpoint(clusterNode *n) {
+ switch(server.cluster_preferred_endpoint_type) {
+ case CLUSTER_ENDPOINT_TYPE_IP: return n->ip;
+ case CLUSTER_ENDPOINT_TYPE_HOSTNAME: return (sdslen(n->hostname) != 0) ? n->hostname : "?";
+ case CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT: return "";
+ }
+ return "unknown";
+}
+
+const char *clusterGetMessageTypeString(int type) {
+ switch(type) {
+ case CLUSTERMSG_TYPE_PING: return "ping";
+ case CLUSTERMSG_TYPE_PONG: return "pong";
+ case CLUSTERMSG_TYPE_MEET: return "meet";
+ case CLUSTERMSG_TYPE_FAIL: return "fail";
+ case CLUSTERMSG_TYPE_PUBLISH: return "publish";
+ case CLUSTERMSG_TYPE_PUBLISHSHARD: return "publishshard";
+ case CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST: return "auth-req";
+ case CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK: return "auth-ack";
+ case CLUSTERMSG_TYPE_UPDATE: return "update";
+ case CLUSTERMSG_TYPE_MFSTART: return "mfstart";
+ case CLUSTERMSG_TYPE_MODULE: return "module";
+ }
+ return "unknown";
+}
+
+int getSlotOrReply(client *c, robj *o) {
+ long long slot;
+
+ if (getLongLongFromObject(o,&slot) != C_OK ||
+ slot < 0 || slot >= CLUSTER_SLOTS)
+ {
+ addReplyError(c,"Invalid or out of range slot");
+ return -1;
+ }
+ return (int) slot;
+}
+
+/* Returns an indication if the replica node is fully available
+ * and should be listed in CLUSTER SLOTS response.
+ * Returns 1 for available nodes, 0 for nodes that have
+ * not finished their initial sync, in failed state, or are
+ * otherwise considered not available to serve read commands. */
+static int isReplicaAvailable(clusterNode *node) {
+ if (nodeFailed(node)) {
+ return 0;
+ }
+ long long repl_offset = node->repl_offset;
+ if (node->flags & CLUSTER_NODE_MYSELF) {
+ /* Nodes do not update their own information
+ * in the cluster node list. */
+ repl_offset = replicationGetSlaveOffset();
+ }
+ return (repl_offset != 0);
+}
+
+int checkSlotAssignmentsOrReply(client *c, unsigned char *slots, int del, int start_slot, int end_slot) {
+ int slot;
+ for (slot = start_slot; slot <= end_slot; slot++) {
+ if (del && server.cluster->slots[slot] == NULL) {
+ addReplyErrorFormat(c,"Slot %d is already unassigned", slot);
+ return C_ERR;
+ } else if (!del && server.cluster->slots[slot]) {
+ addReplyErrorFormat(c,"Slot %d is already busy", slot);
+ return C_ERR;
+ }
+ if (slots[slot]++ == 1) {
+ addReplyErrorFormat(c,"Slot %d specified multiple times",(int)slot);
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
+void clusterUpdateSlots(client *c, unsigned char *slots, int del) {
+ int j;
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (slots[j]) {
+ int retval;
+
+ /* If this slot was set as importing we can clear this
+ * state as now we are the real owner of the slot. */
+ if (server.cluster->importing_slots_from[j])
+ server.cluster->importing_slots_from[j] = NULL;
+
+ retval = del ? clusterDelSlot(j) :
+ clusterAddSlot(myself,j);
+ serverAssertWithInfo(c,NULL,retval == C_OK);
+ }
+ }
+}
+
+void addNodeToNodeReply(client *c, clusterNode *node) {
+ addReplyArrayLen(c, 4);
+ if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_IP) {
+ addReplyBulkCString(c, node->ip);
+ } else if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_HOSTNAME) {
+ if (sdslen(node->hostname) != 0) {
+ addReplyBulkCBuffer(c, node->hostname, sdslen(node->hostname));
+ } else {
+ addReplyBulkCString(c, "?");
+ }
+ } else if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT) {
+ addReplyNull(c);
+ } else {
+ serverPanic("Unrecognized preferred endpoint type");
+ }
+
+ /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
+ addReplyLongLong(c, getNodeClientPort(node, shouldReturnTlsInfo()));
+ addReplyBulkCBuffer(c, node->name, CLUSTER_NAMELEN);
+
+ /* Add the additional endpoint information, this is all the known networking information
+ * that is not the preferred endpoint. Note the logic is evaluated twice so we can
+ * correctly report the number of additional network arguments without using a deferred
+ * map, an assertion is made at the end to check we set the right length. */
+ int length = 0;
+ if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_IP) {
+ length++;
+ }
+ if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_HOSTNAME
+ && sdslen(node->hostname) != 0)
+ {
+ length++;
+ }
+ addReplyMapLen(c, length);
+
+ if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_IP) {
+ addReplyBulkCString(c, "ip");
+ addReplyBulkCString(c, node->ip);
+ length--;
+ }
+ if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_HOSTNAME
+ && sdslen(node->hostname) != 0)
+ {
+ addReplyBulkCString(c, "hostname");
+ addReplyBulkCBuffer(c, node->hostname, sdslen(node->hostname));
+ length--;
+ }
+ serverAssert(length == 0);
+}
+
+void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, int end_slot) {
+ int i, nested_elements = 3; /* slots (2) + master addr (1) */
+ for (i = 0; i < node->numslaves; i++) {
+ if (!isReplicaAvailable(node->slaves[i])) continue;
+ nested_elements++;
+ }
+ addReplyArrayLen(c, nested_elements);
+ addReplyLongLong(c, start_slot);
+ addReplyLongLong(c, end_slot);
+ addNodeToNodeReply(c, node);
+
+ /* Remaining nodes in reply are replicas for slot range */
+ for (i = 0; i < node->numslaves; i++) {
+ /* This loop is copy/pasted from clusterGenNodeDescription()
+ * with modifications for per-slot node aggregation. */
+ if (!isReplicaAvailable(node->slaves[i])) continue;
+ addNodeToNodeReply(c, node->slaves[i]);
+ nested_elements--;
+ }
+ serverAssert(nested_elements == 3); /* Original 3 elements */
+}
+
+/* Add detailed information of a node to the output buffer of the given client. */
+void addNodeDetailsToShardReply(client *c, clusterNode *node) {
+ int reply_count = 0;
+ void *node_replylen = addReplyDeferredLen(c);
+ addReplyBulkCString(c, "id");
+ addReplyBulkCBuffer(c, node->name, CLUSTER_NAMELEN);
+ reply_count++;
+
+ if (node->tcp_port) {
+ addReplyBulkCString(c, "port");
+ addReplyLongLong(c, node->tcp_port);
+ reply_count++;
+ }
+
+ if (node->tls_port) {
+ addReplyBulkCString(c, "tls-port");
+ addReplyLongLong(c, node->tls_port);
+ reply_count++;
+ }
+
+ addReplyBulkCString(c, "ip");
+ addReplyBulkCString(c, node->ip);
+ reply_count++;
+
+ addReplyBulkCString(c, "endpoint");
+ addReplyBulkCString(c, getPreferredEndpoint(node));
+ reply_count++;
+
+ if (sdslen(node->hostname) != 0) {
+ addReplyBulkCString(c, "hostname");
+ addReplyBulkCBuffer(c, node->hostname, sdslen(node->hostname));
+ reply_count++;
+ }
+
+ long long node_offset;
+ if (node->flags & CLUSTER_NODE_MYSELF) {
+ node_offset = nodeIsSlave(node) ? replicationGetSlaveOffset() : server.master_repl_offset;
+ } else {
+ node_offset = node->repl_offset;
+ }
+
+ addReplyBulkCString(c, "role");
+ addReplyBulkCString(c, nodeIsSlave(node) ? "replica" : "master");
+ reply_count++;
+
+ addReplyBulkCString(c, "replication-offset");
+ addReplyLongLong(c, node_offset);
+ reply_count++;
+
+ addReplyBulkCString(c, "health");
+ const char *health_msg = NULL;
+ if (nodeFailed(node)) {
+ health_msg = "fail";
+ } else if (nodeIsSlave(node) && node_offset == 0) {
+ health_msg = "loading";
+ } else {
+ health_msg = "online";
+ }
+ addReplyBulkCString(c, health_msg);
+ reply_count++;
+
+ setDeferredMapLen(c, node_replylen, reply_count);
+}
+
+/* Add the shard reply of a single shard based off the given primary node. */
+void addShardReplyForClusterShards(client *c, list *nodes) {
+ serverAssert(listLength(nodes) > 0);
+ clusterNode *n = listNodeValue(listFirst(nodes));
+ addReplyMapLen(c, 2);
+ addReplyBulkCString(c, "slots");
+
+ /* Use slot_info_pairs from the primary only */
+ n = clusterNodeGetMaster(n);
+
+ if (n->slot_info_pairs != NULL) {
+ serverAssert((n->slot_info_pairs_count % 2) == 0);
+ addReplyArrayLen(c, n->slot_info_pairs_count);
+ for (int i = 0; i < n->slot_info_pairs_count; i++)
+ addReplyLongLong(c, (unsigned long)n->slot_info_pairs[i]);
+ } else {
+ /* If no slot info pair is provided, the node owns no slots */
+ addReplyArrayLen(c, 0);
+ }
+
+ addReplyBulkCString(c, "nodes");
+ addReplyArrayLen(c, listLength(nodes));
+ listIter li;
+ listRewind(nodes, &li);
+ for (listNode *ln = listNext(&li); ln != NULL; ln = listNext(&li)) {
+ clusterNode *n = listNodeValue(ln);
+ addNodeDetailsToShardReply(c, n);
+ clusterFreeNodesSlotsInfo(n);
+ }
+}
+
+/* Add to the output buffer of the given client, an array of slot (start, end)
+ * pair owned by the shard, also the primary and set of replica(s) along with
+ * information about each node. */
+void clusterReplyShards(client *c) {
+ addReplyArrayLen(c, dictSize(server.cluster->shards));
+ /* This call will add slot_info_pairs to all nodes */
+ clusterGenNodesSlotsInfo(0);
+ dictIterator *di = dictGetSafeIterator(server.cluster->shards);
+ for(dictEntry *de = dictNext(di); de != NULL; de = dictNext(di)) {
+ addShardReplyForClusterShards(c, dictGetVal(de));
+ }
+ dictReleaseIterator(di);
+}
+
+void clusterReplyMultiBulkSlots(client * c) {
+ /* Format: 1) 1) start slot
+ * 2) end slot
+ * 3) 1) master IP
+ * 2) master port
+ * 3) node ID
+ * 4) 1) replica IP
+ * 2) replica port
+ * 3) node ID
+ * ... continued until done
+ */
+ clusterNode *n = NULL;
+ int num_masters = 0, start = -1;
+ void *slot_replylen = addReplyDeferredLen(c);
+
+ for (int i = 0; i <= CLUSTER_SLOTS; i++) {
+ /* Find start node and slot id. */
+ if (n == NULL) {
+ if (i == CLUSTER_SLOTS) break;
+ n = server.cluster->slots[i];
+ start = i;
+ continue;
+ }
+
+ /* Add cluster slots info when occur different node with start
+ * or end of slot. */
+ if (i == CLUSTER_SLOTS || n != server.cluster->slots[i]) {
+ addNodeReplyForClusterSlot(c, n, start, i-1);
+ num_masters++;
+ if (i == CLUSTER_SLOTS) break;
+ n = server.cluster->slots[i];
+ start = i;
+ }
+ }
+ setDeferredArrayLen(c, slot_replylen, num_masters);
+}
+
+sds genClusterInfoString(void) {
+ sds info = sdsempty();
+ char *statestr[] = {"ok","fail"};
+ int slots_assigned = 0, slots_ok = 0, slots_pfail = 0, slots_fail = 0;
+ uint64_t myepoch;
+ int j;
+
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ clusterNode *n = server.cluster->slots[j];
+
+ if (n == NULL) continue;
+ slots_assigned++;
+ if (nodeFailed(n)) {
+ slots_fail++;
+ } else if (nodeTimedOut(n)) {
+ slots_pfail++;
+ } else {
+ slots_ok++;
+ }
+ }
+
+ myepoch = (nodeIsSlave(myself) && myself->slaveof) ?
+ myself->slaveof->configEpoch : myself->configEpoch;
+
+ info = sdscatprintf(info,
+ "cluster_state:%s\r\n"
+ "cluster_slots_assigned:%d\r\n"
+ "cluster_slots_ok:%d\r\n"
+ "cluster_slots_pfail:%d\r\n"
+ "cluster_slots_fail:%d\r\n"
+ "cluster_known_nodes:%lu\r\n"
+ "cluster_size:%d\r\n"
+ "cluster_current_epoch:%llu\r\n"
+ "cluster_my_epoch:%llu\r\n"
+ , statestr[server.cluster->state],
+ slots_assigned,
+ slots_ok,
+ slots_pfail,
+ slots_fail,
+ dictSize(server.cluster->nodes),
+ server.cluster->size,
+ (unsigned long long) server.cluster->currentEpoch,
+ (unsigned long long) myepoch
+ );
+
+ /* Show stats about messages sent and received. */
+ long long tot_msg_sent = 0;
+ long long tot_msg_received = 0;
+
+ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+ if (server.cluster->stats_bus_messages_sent[i] == 0) continue;
+ tot_msg_sent += server.cluster->stats_bus_messages_sent[i];
+ info = sdscatprintf(info,
+ "cluster_stats_messages_%s_sent:%lld\r\n",
+ clusterGetMessageTypeString(i),
+ server.cluster->stats_bus_messages_sent[i]);
+ }
+ info = sdscatprintf(info,
+ "cluster_stats_messages_sent:%lld\r\n", tot_msg_sent);
+
+ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+ if (server.cluster->stats_bus_messages_received[i] == 0) continue;
+ tot_msg_received += server.cluster->stats_bus_messages_received[i];
+ info = sdscatprintf(info,
+ "cluster_stats_messages_%s_received:%lld\r\n",
+ clusterGetMessageTypeString(i),
+ server.cluster->stats_bus_messages_received[i]);
+ }
+ info = sdscatprintf(info,
+ "cluster_stats_messages_received:%lld\r\n", tot_msg_received);
+
+ info = sdscatprintf(info,
+ "total_cluster_links_buffer_limit_exceeded:%llu\r\n",
+ server.cluster->stat_cluster_links_buffer_limit_exceeded);
+
+ return info;
+}
+
+void clusterCommand(client *c) {
+ if (server.cluster_enabled == 0) {
+ addReplyError(c,"This instance has cluster support disabled");
+ return;
+ }
+
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"ADDSLOTS <slot> [<slot> ...]",
+" Assign slots to current node.",
+"ADDSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...]",
+" Assign slots which are between <start-slot> and <end-slot> to current node.",
+"BUMPEPOCH",
+" Advance the cluster config epoch.",
+"COUNT-FAILURE-REPORTS <node-id>",
+" Return number of failure reports for <node-id>.",
+"COUNTKEYSINSLOT <slot>",
+" Return the number of keys in <slot>.",
+"DELSLOTS <slot> [<slot> ...]",
+" Delete slots information from current node.",
+"DELSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...]",
+" Delete slots information which are between <start-slot> and <end-slot> from current node.",
+"FAILOVER [FORCE|TAKEOVER]",
+" Promote current replica node to being a master.",
+"FORGET <node-id>",
+" Remove a node from the cluster.",
+"GETKEYSINSLOT <slot> <count>",
+" Return key names stored by current node in a slot.",
+"FLUSHSLOTS",
+" Delete current node own slots information.",
+"INFO",
+" Return information about the cluster.",
+"KEYSLOT <key>",
+" Return the hash slot for <key>.",
+"MEET <ip> <port> [<bus-port>]",
+" Connect nodes into a working cluster.",
+"MYID",
+" Return the node id.",
+"MYSHARDID",
+" Return the node's shard id.",
+"NODES",
+" Return cluster configuration seen by node. Output format:",
+" <id> <ip:port@bus-port[,hostname]> <flags> <master> <pings> <pongs> <epoch> <link> <slot> ...",
+"REPLICATE <node-id>",
+" Configure current node as replica to <node-id>.",
+"RESET [HARD|SOFT]",
+" Reset current node (default: soft).",
+"SET-CONFIG-EPOCH <epoch>",
+" Set config epoch of current node.",
+"SETSLOT <slot> (IMPORTING <node-id>|MIGRATING <node-id>|STABLE|NODE <node-id>)",
+" Set slot state.",
+"REPLICAS <node-id>",
+" Return <node-id> replicas.",
+"SAVECONFIG",
+" Force saving cluster configuration on disk.",
+"SLOTS",
+" Return information about slots range mappings. Each range is made of:",
+" start, end, master and replicas IP addresses, ports and ids",
+"SHARDS",
+" Return information about slot range mappings and the nodes associated with them.",
+"LINKS",
+" Return information about all network links between this node and its peers.",
+" Output format is an array where each array element is a map containing attributes of a link",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"meet") && (c->argc == 4 || c->argc == 5)) {
+ /* CLUSTER MEET <ip> <port> [cport] */
+ long long port, cport;
+
+ if (getLongLongFromObject(c->argv[3], &port) != C_OK) {
+ addReplyErrorFormat(c,"Invalid base port specified: %s",
+ (char*)c->argv[3]->ptr);
+ return;
+ }
+
+ if (c->argc == 5) {
+ if (getLongLongFromObject(c->argv[4], &cport) != C_OK) {
+ addReplyErrorFormat(c,"Invalid bus port specified: %s",
+ (char*)c->argv[4]->ptr);
+ return;
+ }
+ } else {
+ cport = port + CLUSTER_PORT_INCR;
+ }
+
+ if (clusterStartHandshake(c->argv[2]->ptr,port,cport) == 0 &&
+ errno == EINVAL)
+ {
+ addReplyErrorFormat(c,"Invalid node address specified: %s:%s",
+ (char*)c->argv[2]->ptr, (char*)c->argv[3]->ptr);
+ } else {
+ addReply(c,shared.ok);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"nodes") && c->argc == 2) {
+ /* CLUSTER NODES */
+ /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
+ sds nodes = clusterGenNodesDescription(c, 0, shouldReturnTlsInfo());
+ addReplyVerbatim(c,nodes,sdslen(nodes),"txt");
+ sdsfree(nodes);
+ } else if (!strcasecmp(c->argv[1]->ptr,"myid") && c->argc == 2) {
+ /* CLUSTER MYID */
+ addReplyBulkCBuffer(c,myself->name, CLUSTER_NAMELEN);
+ } else if (!strcasecmp(c->argv[1]->ptr,"myshardid") && c->argc == 2) {
+ /* CLUSTER MYSHARDID */
+ addReplyBulkCBuffer(c,myself->shard_id, CLUSTER_NAMELEN);
+ } else if (!strcasecmp(c->argv[1]->ptr,"slots") && c->argc == 2) {
+ /* CLUSTER SLOTS */
+ clusterReplyMultiBulkSlots(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"shards") && c->argc == 2) {
+ /* CLUSTER SHARDS */
+ clusterReplyShards(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"flushslots") && c->argc == 2) {
+ /* CLUSTER FLUSHSLOTS */
+ if (dictSize(server.db[0].dict) != 0) {
+ addReplyError(c,"DB must be empty to perform CLUSTER FLUSHSLOTS.");
+ return;
+ }
+ clusterDelNodeSlots(myself);
+ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+ addReply(c,shared.ok);
+ } else if ((!strcasecmp(c->argv[1]->ptr,"addslots") ||
+ !strcasecmp(c->argv[1]->ptr,"delslots")) && c->argc >= 3)
+ {
+ /* CLUSTER ADDSLOTS <slot> [slot] ... */
+ /* CLUSTER DELSLOTS <slot> [slot] ... */
+ int j, slot;
+ unsigned char *slots = zmalloc(CLUSTER_SLOTS);
+ int del = !strcasecmp(c->argv[1]->ptr,"delslots");
+
+ memset(slots,0,CLUSTER_SLOTS);
+ /* Check that all the arguments are parseable.*/
+ for (j = 2; j < c->argc; j++) {
+ if ((slot = getSlotOrReply(c,c->argv[j])) == C_ERR) {
+ zfree(slots);
+ return;
+ }
+ }
+ /* Check that the slots are not already busy. */
+ for (j = 2; j < c->argc; j++) {
+ slot = getSlotOrReply(c,c->argv[j]);
+ if (checkSlotAssignmentsOrReply(c, slots, del, slot, slot) == C_ERR) {
+ zfree(slots);
+ return;
+ }
+ }
+ clusterUpdateSlots(c, slots, del);
+ zfree(slots);
+ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+ addReply(c,shared.ok);
+ } else if ((!strcasecmp(c->argv[1]->ptr,"addslotsrange") ||
+ !strcasecmp(c->argv[1]->ptr,"delslotsrange")) && c->argc >= 4) {
+ if (c->argc % 2 == 1) {
+ addReplyErrorArity(c);
+ return;
+ }
+ /* CLUSTER ADDSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...] */
+ /* CLUSTER DELSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...] */
+ int j, startslot, endslot;
+ unsigned char *slots = zmalloc(CLUSTER_SLOTS);
+ int del = !strcasecmp(c->argv[1]->ptr,"delslotsrange");
+
+ memset(slots,0,CLUSTER_SLOTS);
+ /* Check that all the arguments are parseable and that all the
+ * slots are not already busy. */
+ for (j = 2; j < c->argc; j += 2) {
+ if ((startslot = getSlotOrReply(c,c->argv[j])) == C_ERR) {
+ zfree(slots);
+ return;
+ }
+ if ((endslot = getSlotOrReply(c,c->argv[j+1])) == C_ERR) {
+ zfree(slots);
+ return;
+ }
+ if (startslot > endslot) {
+ addReplyErrorFormat(c,"start slot number %d is greater than end slot number %d", startslot, endslot);
+ zfree(slots);
+ return;
+ }
+
+ if (checkSlotAssignmentsOrReply(c, slots, del, startslot, endslot) == C_ERR) {
+ zfree(slots);
+ return;
+ }
+ }
+ clusterUpdateSlots(c, slots, del);
+ zfree(slots);
+ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"setslot") && c->argc >= 4) {
+ /* SETSLOT 10 MIGRATING <node ID> */
+ /* SETSLOT 10 IMPORTING <node ID> */
+ /* SETSLOT 10 STABLE */
+ /* SETSLOT 10 NODE <node ID> */
+ int slot;
+ clusterNode *n;
+
+ if (nodeIsSlave(myself)) {
+ addReplyError(c,"Please use SETSLOT only with masters.");
+ return;
+ }
+
+ if ((slot = getSlotOrReply(c,c->argv[2])) == -1) return;
+
+ if (!strcasecmp(c->argv[3]->ptr,"migrating") && c->argc == 5) {
+ if (server.cluster->slots[slot] != myself) {
+ addReplyErrorFormat(c,"I'm not the owner of hash slot %u",slot);
+ return;
+ }
+ n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
+ if (n == NULL) {
+ addReplyErrorFormat(c,"I don't know about node %s",
+ (char*)c->argv[4]->ptr);
+ return;
+ }
+ if (nodeIsSlave(n)) {
+ addReplyError(c,"Target node is not a master");
+ return;
+ }
+ server.cluster->migrating_slots_to[slot] = n;
+ } else if (!strcasecmp(c->argv[3]->ptr,"importing") && c->argc == 5) {
+ if (server.cluster->slots[slot] == myself) {
+ addReplyErrorFormat(c,
+ "I'm already the owner of hash slot %u",slot);
+ return;
+ }
+ n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
+ if (n == NULL) {
+ addReplyErrorFormat(c,"I don't know about node %s",
+ (char*)c->argv[4]->ptr);
+ return;
+ }
+ if (nodeIsSlave(n)) {
+ addReplyError(c,"Target node is not a master");
+ return;
+ }
+ server.cluster->importing_slots_from[slot] = n;
+ } else if (!strcasecmp(c->argv[3]->ptr,"stable") && c->argc == 4) {
+ /* CLUSTER SETSLOT <SLOT> STABLE */
+ server.cluster->importing_slots_from[slot] = NULL;
+ server.cluster->migrating_slots_to[slot] = NULL;
+ } else if (!strcasecmp(c->argv[3]->ptr,"node") && c->argc == 5) {
+ /* CLUSTER SETSLOT <SLOT> NODE <NODE ID> */
+ n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
+ if (!n) {
+ addReplyErrorFormat(c,"Unknown node %s",
+ (char*)c->argv[4]->ptr);
+ return;
+ }
+ if (nodeIsSlave(n)) {
+ addReplyError(c,"Target node is not a master");
+ return;
+ }
+ /* If this hash slot was served by 'myself' before to switch
+ * make sure there are no longer local keys for this hash slot. */
+ if (server.cluster->slots[slot] == myself && n != myself) {
+ if (countKeysInSlot(slot) != 0) {
+ addReplyErrorFormat(c,
+ "Can't assign hashslot %d to a different node "
+ "while I still hold keys for this hash slot.", slot);
+ return;
+ }
+ }
+ /* If this slot is in migrating status but we have no keys
+ * for it assigning the slot to another node will clear
+ * the migrating status. */
+ if (countKeysInSlot(slot) == 0 &&
+ server.cluster->migrating_slots_to[slot])
+ server.cluster->migrating_slots_to[slot] = NULL;
+
+ int slot_was_mine = server.cluster->slots[slot] == myself;
+ clusterDelSlot(slot);
+ clusterAddSlot(n,slot);
+
+ /* If we are a master left without slots, we should turn into a
+ * replica of the new master. */
+ if (slot_was_mine &&
+ n != myself &&
+ myself->numslots == 0 &&
+ server.cluster_allow_replica_migration)
+ {
+ serverLog(LL_NOTICE,
+ "Configuration change detected. Reconfiguring myself "
+ "as a replica of %.40s (%s)", n->name, n->human_nodename);
+ clusterSetMaster(n);
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG |
+ CLUSTER_TODO_UPDATE_STATE |
+ CLUSTER_TODO_FSYNC_CONFIG);
+ }
+
+ /* If this node was importing this slot, assigning the slot to
+ * itself also clears the importing status. */
+ if (n == myself &&
+ server.cluster->importing_slots_from[slot])
+ {
+ /* This slot was manually migrated, set this node configEpoch
+ * to a new epoch so that the new version can be propagated
+ * by the cluster.
+ *
+ * Note that if this ever results in a collision with another
+ * node getting the same configEpoch, for example because a
+ * failover happens at the same time we close the slot, the
+ * configEpoch collision resolution will fix it assigning
+ * a different epoch to each node. */
+ if (clusterBumpConfigEpochWithoutConsensus() == C_OK) {
+ serverLog(LL_NOTICE,
+ "configEpoch updated after importing slot %d", slot);
+ }
+ server.cluster->importing_slots_from[slot] = NULL;
+ /* After importing this slot, let the other nodes know as
+ * soon as possible. */
+ clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
+ }
+ } else {
+ addReplyError(c,
+ "Invalid CLUSTER SETSLOT action or number of arguments. Try CLUSTER HELP");
+ return;
+ }
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_UPDATE_STATE);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"bumpepoch") && c->argc == 2) {
+ /* CLUSTER BUMPEPOCH */
+ int retval = clusterBumpConfigEpochWithoutConsensus();
+ sds reply = sdscatprintf(sdsempty(),"+%s %llu\r\n",
+ (retval == C_OK) ? "BUMPED" : "STILL",
+ (unsigned long long) myself->configEpoch);
+ addReplySds(c,reply);
+ } else if (!strcasecmp(c->argv[1]->ptr,"info") && c->argc == 2) {
+ /* CLUSTER INFO */
+
+ sds info = genClusterInfoString();
+
+ /* Produce the reply protocol. */
+ addReplyVerbatim(c,info,sdslen(info),"txt");
+ sdsfree(info);
+ } else if (!strcasecmp(c->argv[1]->ptr,"saveconfig") && c->argc == 2) {
+ int retval = clusterSaveConfig(1);
+
+ if (retval == 0)
+ addReply(c,shared.ok);
+ else
+ addReplyErrorFormat(c,"error saving the cluster node config: %s",
+ strerror(errno));
+ } else if (!strcasecmp(c->argv[1]->ptr,"keyslot") && c->argc == 3) {
+ /* CLUSTER KEYSLOT <key> */
+ sds key = c->argv[2]->ptr;
+
+ addReplyLongLong(c,keyHashSlot(key,sdslen(key)));
+ } else if (!strcasecmp(c->argv[1]->ptr,"countkeysinslot") && c->argc == 3) {
+ /* CLUSTER COUNTKEYSINSLOT <slot> */
+ long long slot;
+
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK)
+ return;
+ if (slot < 0 || slot >= CLUSTER_SLOTS) {
+ addReplyError(c,"Invalid slot");
+ return;
+ }
+ addReplyLongLong(c,countKeysInSlot(slot));
+ } else if (!strcasecmp(c->argv[1]->ptr,"getkeysinslot") && c->argc == 4) {
+ /* CLUSTER GETKEYSINSLOT <slot> <count> */
+ long long maxkeys, slot;
+
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK)
+ return;
+ if (getLongLongFromObjectOrReply(c,c->argv[3],&maxkeys,NULL)
+ != C_OK)
+ return;
+ if (slot < 0 || slot >= CLUSTER_SLOTS || maxkeys < 0) {
+ addReplyError(c,"Invalid slot or number of keys");
+ return;
+ }
+
+ unsigned int keys_in_slot = countKeysInSlot(slot);
+ unsigned int numkeys = maxkeys > keys_in_slot ? keys_in_slot : maxkeys;
+ addReplyArrayLen(c,numkeys);
+ dictEntry *de = (*server.db->slots_to_keys).by_slot[slot].head;
+ for (unsigned int j = 0; j < numkeys; j++) {
+ serverAssert(de != NULL);
+ sds sdskey = dictGetKey(de);
+ addReplyBulkCBuffer(c, sdskey, sdslen(sdskey));
+ de = dictEntryNextInSlot(de);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"forget") && c->argc == 3) {
+ /* CLUSTER FORGET <NODE ID> */
+ clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
+ if (!n) {
+ if (clusterBlacklistExists((char*)c->argv[2]->ptr))
+ /* Already forgotten. The deletion may have been gossipped by
+ * another node, so we pretend it succeeded. */
+ addReply(c,shared.ok);
+ else
+ addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+ return;
+ } else if (n == myself) {
+ addReplyError(c,"I tried hard but I can't forget myself...");
+ return;
+ } else if (nodeIsSlave(myself) && myself->slaveof == n) {
+ addReplyError(c,"Can't forget my master!");
+ return;
+ }
+ clusterBlacklistAddNode(n);
+ clusterDelNode(n);
+ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|
+ CLUSTER_TODO_SAVE_CONFIG);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"replicate") && c->argc == 3) {
+ /* CLUSTER REPLICATE <NODE ID> */
+ /* Lookup the specified node in our table. */
+ clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
+ if (!n) {
+ addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+ return;
+ }
+
+ /* I can't replicate myself. */
+ if (n == myself) {
+ addReplyError(c,"Can't replicate myself");
+ return;
+ }
+
+ /* Can't replicate a slave. */
+ if (nodeIsSlave(n)) {
+ addReplyError(c,"I can only replicate a master, not a replica.");
+ return;
+ }
+
+ /* If the instance is currently a master, it should have no assigned
+ * slots nor keys to accept to replicate some other node.
+ * Slaves can switch to another master without issues. */
+ if (nodeIsMaster(myself) &&
+ (myself->numslots != 0 || dictSize(server.db[0].dict) != 0)) {
+ addReplyError(c,
+ "To set a master the node must be empty and "
+ "without assigned slots.");
+ return;
+ }
+
+ /* Set the master. */
+ clusterSetMaster(n);
+ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+ addReply(c,shared.ok);
+ } else if ((!strcasecmp(c->argv[1]->ptr,"slaves") ||
+ !strcasecmp(c->argv[1]->ptr,"replicas")) && c->argc == 3) {
+ /* CLUSTER SLAVES <NODE ID> */
+ /* CLUSTER REPLICAS <NODE ID> */
+ clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
+ int j;
+
+ /* Lookup the specified node in our table. */
+ if (!n) {
+ addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+ return;
+ }
+
+ if (nodeIsSlave(n)) {
+ addReplyError(c,"The specified node is not a master");
+ return;
+ }
+
+ /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
+ addReplyArrayLen(c,n->numslaves);
+ for (j = 0; j < n->numslaves; j++) {
+ sds ni = clusterGenNodeDescription(c, n->slaves[j], shouldReturnTlsInfo());
+ addReplyBulkCString(c,ni);
+ sdsfree(ni);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"count-failure-reports") &&
+ c->argc == 3)
+ {
+ /* CLUSTER COUNT-FAILURE-REPORTS <NODE ID> */
+ clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
+
+ if (!n) {
+ addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+ return;
+ } else {
+ addReplyLongLong(c,clusterNodeFailureReportsCount(n));
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"failover") &&
+ (c->argc == 2 || c->argc == 3))
+ {
+ /* CLUSTER FAILOVER [FORCE|TAKEOVER] */
+ int force = 0, takeover = 0;
+
+ if (c->argc == 3) {
+ if (!strcasecmp(c->argv[2]->ptr,"force")) {
+ force = 1;
+ } else if (!strcasecmp(c->argv[2]->ptr,"takeover")) {
+ takeover = 1;
+ force = 1; /* Takeover also implies force. */
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* Check preconditions. */
+ if (nodeIsMaster(myself)) {
+ addReplyError(c,"You should send CLUSTER FAILOVER to a replica");
+ return;
+ } else if (myself->slaveof == NULL) {
+ addReplyError(c,"I'm a replica but my master is unknown to me");
+ return;
+ } else if (!force &&
+ (nodeFailed(myself->slaveof) ||
+ myself->slaveof->link == NULL))
+ {
+ addReplyError(c,"Master is down or failed, "
+ "please use CLUSTER FAILOVER FORCE");
+ return;
+ }
+ resetManualFailover();
+ server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT;
+
+ if (takeover) {
+ /* A takeover does not perform any initial check. It just
+ * generates a new configuration epoch for this node without
+ * consensus, claims the master's slots, and broadcast the new
+ * configuration. */
+ serverLog(LL_NOTICE,"Taking over the master (user request).");
+ clusterBumpConfigEpochWithoutConsensus();
+ clusterFailoverReplaceYourMaster();
+ } else if (force) {
+ /* If this is a forced failover, we don't need to talk with our
+ * master to agree about the offset. We just failover taking over
+ * it without coordination. */
+ serverLog(LL_NOTICE,"Forced failover user request accepted.");
+ server.cluster->mf_can_start = 1;
+ } else {
+ serverLog(LL_NOTICE,"Manual failover user request accepted.");
+ clusterSendMFStart(myself->slaveof);
+ }
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"set-config-epoch") && c->argc == 3)
+ {
+ /* CLUSTER SET-CONFIG-EPOCH <epoch>
+ *
+ * The user is allowed to set the config epoch only when a node is
+ * totally fresh: no config epoch, no other known node, and so forth.
+ * This happens at cluster creation time to start with a cluster where
+ * every node has a different node ID, without to rely on the conflicts
+ * resolution system which is too slow when a big cluster is created. */
+ long long epoch;
+
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&epoch,NULL) != C_OK)
+ return;
+
+ if (epoch < 0) {
+ addReplyErrorFormat(c,"Invalid config epoch specified: %lld",epoch);
+ } else if (dictSize(server.cluster->nodes) > 1) {
+ addReplyError(c,"The user can assign a config epoch only when the "
+ "node does not know any other node.");
+ } else if (myself->configEpoch != 0) {
+ addReplyError(c,"Node config epoch is already non-zero");
+ } else {
+ myself->configEpoch = epoch;
+ serverLog(LL_NOTICE,
+ "configEpoch set to %llu via CLUSTER SET-CONFIG-EPOCH",
+ (unsigned long long) myself->configEpoch);
+
+ if (server.cluster->currentEpoch < (uint64_t)epoch)
+ server.cluster->currentEpoch = epoch;
+ /* No need to fsync the config here since in the unlucky event
+ * of a failure to persist the config, the conflict resolution code
+ * will assign a unique config to this node. */
+ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|
+ CLUSTER_TODO_SAVE_CONFIG);
+ addReply(c,shared.ok);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"reset") &&
+ (c->argc == 2 || c->argc == 3))
+ {
+ /* CLUSTER RESET [SOFT|HARD] */
+ int hard = 0;
+
+ /* Parse soft/hard argument. Default is soft. */
+ if (c->argc == 3) {
+ if (!strcasecmp(c->argv[2]->ptr,"hard")) {
+ hard = 1;
+ } else if (!strcasecmp(c->argv[2]->ptr,"soft")) {
+ hard = 0;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* Slaves can be reset while containing data, but not master nodes
+ * that must be empty. */
+ if (nodeIsMaster(myself) && dictSize(c->db->dict) != 0) {
+ addReplyError(c,"CLUSTER RESET can't be called with "
+ "master nodes containing keys");
+ return;
+ }
+ clusterReset(hard);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"links") && c->argc == 2) {
+ /* CLUSTER LINKS */
+ addReplyClusterLinksDescription(c);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+}
+
+void removeChannelsInSlot(unsigned int slot) {
+ unsigned int channelcount = countChannelsInSlot(slot);
+ if (channelcount == 0) return;
+
+ /* Retrieve all the channels for the slot. */
+ robj **channels = zmalloc(sizeof(robj*)*channelcount);
+ raxIterator iter;
+ int j = 0;
+ unsigned char indexed[2];
+
+ indexed[0] = (slot >> 8) & 0xff;
+ indexed[1] = slot & 0xff;
+ raxStart(&iter,server.cluster->slots_to_channels);
+ raxSeek(&iter,">=",indexed,2);
+ while(raxNext(&iter)) {
+ if (iter.key[0] != indexed[0] || iter.key[1] != indexed[1]) break;
+ channels[j++] = createStringObject((char*)iter.key + 2, iter.key_len - 2);
+ }
+ raxStop(&iter);
+
+ pubsubUnsubscribeShardChannels(channels, channelcount);
+ zfree(channels);
+}
+
+/* -----------------------------------------------------------------------------
+ * DUMP, RESTORE and MIGRATE commands
+ * -------------------------------------------------------------------------- */
+
+/* Generates a DUMP-format representation of the object 'o', adding it to the
+ * io stream pointed by 'rio'. This function can't fail. */
+void createDumpPayload(rio *payload, robj *o, robj *key, int dbid) {
+ unsigned char buf[2];
+ uint64_t crc;
+
+ /* Serialize the object in an RDB-like format. It consist of an object type
+ * byte followed by the serialized object. This is understood by RESTORE. */
+ rioInitWithBuffer(payload,sdsempty());
+ serverAssert(rdbSaveObjectType(payload,o));
+ serverAssert(rdbSaveObject(payload,o,key,dbid));
+
+ /* Write the footer, this is how it looks like:
+ * ----------------+---------------------+---------------+
+ * ... RDB payload | 2 bytes RDB version | 8 bytes CRC64 |
+ * ----------------+---------------------+---------------+
+ * RDB version and CRC are both in little endian.
+ */
+
+ /* RDB version */
+ buf[0] = RDB_VERSION & 0xff;
+ buf[1] = (RDB_VERSION >> 8) & 0xff;
+ payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,buf,2);
+
+ /* CRC64 */
+ crc = crc64(0,(unsigned char*)payload->io.buffer.ptr,
+ sdslen(payload->io.buffer.ptr));
+ memrev64ifbe(&crc);
+ payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,&crc,8);
+}
+
+/* Verify that the RDB version of the dump payload matches the one of this Redis
+ * instance and that the checksum is ok.
+ * If the DUMP payload looks valid C_OK is returned, otherwise C_ERR
+ * is returned. If rdbver_ptr is not NULL, its populated with the value read
+ * from the input buffer. */
+int verifyDumpPayload(unsigned char *p, size_t len, uint16_t *rdbver_ptr) {
+ unsigned char *footer;
+ uint16_t rdbver;
+ uint64_t crc;
+
+ /* At least 2 bytes of RDB version and 8 of CRC64 should be present. */
+ if (len < 10) return C_ERR;
+ footer = p+(len-10);
+
+ /* Set and verify RDB version. */
+ rdbver = (footer[1] << 8) | footer[0];
+ if (rdbver_ptr) {
+ *rdbver_ptr = rdbver;
+ }
+ if (rdbver > RDB_VERSION) return C_ERR;
+
+ if (server.skip_checksum_validation)
+ return C_OK;
+
+ /* Verify CRC64 */
+ crc = crc64(0,p,len-8);
+ memrev64ifbe(&crc);
+ return (memcmp(&crc,footer+2,8) == 0) ? C_OK : C_ERR;
+}
+
+/* DUMP keyname
+ * DUMP is actually not used by Redis Cluster but it is the obvious
+ * complement of RESTORE and can be useful for different applications. */
+void dumpCommand(client *c) {
+ robj *o;
+ rio payload;
+
+ /* Check if the key is here. */
+ if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) {
+ addReplyNull(c);
+ return;
+ }
+
+ /* Create the DUMP encoded representation. */
+ createDumpPayload(&payload,o,c->argv[1],c->db->id);
+
+ /* Transfer to the client */
+ addReplyBulkSds(c,payload.io.buffer.ptr);
+ return;
+}
+
+/* RESTORE key ttl serialized-value [REPLACE] [ABSTTL] [IDLETIME seconds] [FREQ frequency] */
+void restoreCommand(client *c) {
+ long long ttl, lfu_freq = -1, lru_idle = -1, lru_clock = -1;
+ rio payload;
+ int j, type, replace = 0, absttl = 0;
+ robj *obj;
+
+ /* Parse additional options */
+ for (j = 4; j < c->argc; j++) {
+ int additional = c->argc-j-1;
+ if (!strcasecmp(c->argv[j]->ptr,"replace")) {
+ replace = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"absttl")) {
+ absttl = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"idletime") && additional >= 1 &&
+ lfu_freq == -1)
+ {
+ if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lru_idle,NULL)
+ != C_OK) return;
+ if (lru_idle < 0) {
+ addReplyError(c,"Invalid IDLETIME value, must be >= 0");
+ return;
+ }
+ lru_clock = LRU_CLOCK();
+ j++; /* Consume additional arg. */
+ } else if (!strcasecmp(c->argv[j]->ptr,"freq") && additional >= 1 &&
+ lru_idle == -1)
+ {
+ if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lfu_freq,NULL)
+ != C_OK) return;
+ if (lfu_freq < 0 || lfu_freq > 255) {
+ addReplyError(c,"Invalid FREQ value, must be >= 0 and <= 255");
+ return;
+ }
+ j++; /* Consume additional arg. */
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* Make sure this key does not already exist here... */
+ robj *key = c->argv[1];
+ if (!replace && lookupKeyWrite(c->db,key) != NULL) {
+ addReplyErrorObject(c,shared.busykeyerr);
+ return;
+ }
+
+ /* Check if the TTL value makes sense */
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&ttl,NULL) != C_OK) {
+ return;
+ } else if (ttl < 0) {
+ addReplyError(c,"Invalid TTL value, must be >= 0");
+ return;
+ }
+
+ /* Verify RDB version and data checksum. */
+ if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr),NULL) == C_ERR)
+ {
+ addReplyError(c,"DUMP payload version or checksum are wrong");
+ return;
+ }
+
+ rioInitWithBuffer(&payload,c->argv[3]->ptr);
+ if (((type = rdbLoadObjectType(&payload)) == -1) ||
+ ((obj = rdbLoadObject(type,&payload,key->ptr,c->db->id,NULL)) == NULL))
+ {
+ addReplyError(c,"Bad data format");
+ return;
+ }
+
+ /* Remove the old key if needed. */
+ int deleted = 0;
+ if (replace)
+ deleted = dbDelete(c->db,key);
+
+ if (ttl && !absttl) ttl+=commandTimeSnapshot();
+ if (ttl && checkAlreadyExpired(ttl)) {
+ if (deleted) {
+ robj *aux = server.lazyfree_lazy_server_del ? shared.unlink : shared.del;
+ rewriteClientCommandVector(c, 2, aux, key);
+ signalModifiedKey(c,c->db,key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
+ server.dirty++;
+ }
+ decrRefCount(obj);
+ addReply(c, shared.ok);
+ return;
+ }
+
+ /* Create the key and set the TTL if any */
+ dbAdd(c->db,key,obj);
+ if (ttl) {
+ setExpire(c,c->db,key,ttl);
+ if (!absttl) {
+ /* Propagate TTL as absolute timestamp */
+ robj *ttl_obj = createStringObjectFromLongLong(ttl);
+ rewriteClientCommandArgument(c,2,ttl_obj);
+ decrRefCount(ttl_obj);
+ rewriteClientCommandArgument(c,c->argc,shared.absttl);
+ }
+ }
+ objectSetLRUOrLFU(obj,lfu_freq,lru_idle,lru_clock,1000);
+ signalModifiedKey(c,c->db,key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"restore",key,c->db->id);
+ addReply(c,shared.ok);
+ server.dirty++;
+}
+
+/* MIGRATE socket cache implementation.
+ *
+ * We take a map between host:ip and a TCP socket that we used to connect
+ * to this instance in recent time.
+ * This sockets are closed when the max number we cache is reached, and also
+ * in serverCron() when they are around for more than a few seconds. */
+#define MIGRATE_SOCKET_CACHE_ITEMS 64 /* max num of items in the cache. */
+#define MIGRATE_SOCKET_CACHE_TTL 10 /* close cached sockets after 10 sec. */
+
+typedef struct migrateCachedSocket {
+ connection *conn;
+ long last_dbid;
+ time_t last_use_time;
+} migrateCachedSocket;
+
+/* Return a migrateCachedSocket containing a TCP socket connected with the
+ * target instance, possibly returning a cached one.
+ *
+ * This function is responsible of sending errors to the client if a
+ * connection can't be established. In this case -1 is returned.
+ * Otherwise on success the socket is returned, and the caller should not
+ * attempt to free it after usage.
+ *
+ * If the caller detects an error while using the socket, migrateCloseSocket()
+ * should be called so that the connection will be created from scratch
+ * the next time. */
+migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long timeout) {
+ connection *conn;
+ sds name = sdsempty();
+ migrateCachedSocket *cs;
+
+ /* Check if we have an already cached socket for this ip:port pair. */
+ name = sdscatlen(name,host->ptr,sdslen(host->ptr));
+ name = sdscatlen(name,":",1);
+ name = sdscatlen(name,port->ptr,sdslen(port->ptr));
+ cs = dictFetchValue(server.migrate_cached_sockets,name);
+ if (cs) {
+ sdsfree(name);
+ cs->last_use_time = server.unixtime;
+ return cs;
+ }
+
+ /* No cached socket, create one. */
+ if (dictSize(server.migrate_cached_sockets) == MIGRATE_SOCKET_CACHE_ITEMS) {
+ /* Too many items, drop one at random. */
+ dictEntry *de = dictGetRandomKey(server.migrate_cached_sockets);
+ cs = dictGetVal(de);
+ connClose(cs->conn);
+ zfree(cs);
+ dictDelete(server.migrate_cached_sockets,dictGetKey(de));
+ }
+
+ /* Create the connection */
+ conn = connCreate(connTypeOfCluster());
+ if (connBlockingConnect(conn, host->ptr, atoi(port->ptr), timeout)
+ != C_OK) {
+ addReplyError(c,"-IOERR error or timeout connecting to the client");
+ connClose(conn);
+ sdsfree(name);
+ return NULL;
+ }
+ connEnableTcpNoDelay(conn);
+
+ /* Add to the cache and return it to the caller. */
+ cs = zmalloc(sizeof(*cs));
+ cs->conn = conn;
+
+ cs->last_dbid = -1;
+ cs->last_use_time = server.unixtime;
+ dictAdd(server.migrate_cached_sockets,name,cs);
+ return cs;
+}
+
+/* Free a migrate cached connection. */
+void migrateCloseSocket(robj *host, robj *port) {
+ sds name = sdsempty();
+ migrateCachedSocket *cs;
+
+ name = sdscatlen(name,host->ptr,sdslen(host->ptr));
+ name = sdscatlen(name,":",1);
+ name = sdscatlen(name,port->ptr,sdslen(port->ptr));
+ cs = dictFetchValue(server.migrate_cached_sockets,name);
+ if (!cs) {
+ sdsfree(name);
+ return;
+ }
+
+ connClose(cs->conn);
+ zfree(cs);
+ dictDelete(server.migrate_cached_sockets,name);
+ sdsfree(name);
+}
+
+void migrateCloseTimedoutSockets(void) {
+ dictIterator *di = dictGetSafeIterator(server.migrate_cached_sockets);
+ dictEntry *de;
+
+ while((de = dictNext(di)) != NULL) {
+ migrateCachedSocket *cs = dictGetVal(de);
+
+ if ((server.unixtime - cs->last_use_time) > MIGRATE_SOCKET_CACHE_TTL) {
+ connClose(cs->conn);
+ zfree(cs);
+ dictDelete(server.migrate_cached_sockets,dictGetKey(de));
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* MIGRATE host port key dbid timeout [COPY | REPLACE | AUTH password |
+ * AUTH2 username password]
+ *
+ * On in the multiple keys form:
+ *
+ * MIGRATE host port "" dbid timeout [COPY | REPLACE | AUTH password |
+ * AUTH2 username password] KEYS key1 key2 ... keyN */
+void migrateCommand(client *c) {
+ migrateCachedSocket *cs;
+ int copy = 0, replace = 0, j;
+ char *username = NULL;
+ char *password = NULL;
+ long timeout;
+ long dbid;
+ robj **ov = NULL; /* Objects to migrate. */
+ robj **kv = NULL; /* Key names. */
+ robj **newargv = NULL; /* Used to rewrite the command as DEL ... keys ... */
+ rio cmd, payload;
+ int may_retry = 1;
+ int write_error = 0;
+ int argv_rewritten = 0;
+
+ /* To support the KEYS option we need the following additional state. */
+ int first_key = 3; /* Argument index of the first key. */
+ int num_keys = 1; /* By default only migrate the 'key' argument. */
+
+ /* Parse additional options */
+ for (j = 6; j < c->argc; j++) {
+ int moreargs = (c->argc-1) - j;
+ if (!strcasecmp(c->argv[j]->ptr,"copy")) {
+ copy = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"replace")) {
+ replace = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"auth")) {
+ if (!moreargs) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ j++;
+ password = c->argv[j]->ptr;
+ redactClientCommandArgument(c,j);
+ } else if (!strcasecmp(c->argv[j]->ptr,"auth2")) {
+ if (moreargs < 2) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ username = c->argv[++j]->ptr;
+ redactClientCommandArgument(c,j);
+ password = c->argv[++j]->ptr;
+ redactClientCommandArgument(c,j);
+ } else if (!strcasecmp(c->argv[j]->ptr,"keys")) {
+ if (sdslen(c->argv[3]->ptr) != 0) {
+ addReplyError(c,
+ "When using MIGRATE KEYS option, the key argument"
+ " must be set to the empty string");
+ return;
+ }
+ first_key = j+1;
+ num_keys = c->argc - j - 1;
+ break; /* All the remaining args are keys. */
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* Sanity check */
+ if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != C_OK ||
+ getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != C_OK)
+ {
+ return;
+ }
+ if (timeout <= 0) timeout = 1000;
+
+ /* Check if the keys are here. If at least one key is to migrate, do it
+ * otherwise if all the keys are missing reply with "NOKEY" to signal
+ * the caller there was nothing to migrate. We don't return an error in
+ * this case, since often this is due to a normal condition like the key
+ * expiring in the meantime. */
+ ov = zrealloc(ov,sizeof(robj*)*num_keys);
+ kv = zrealloc(kv,sizeof(robj*)*num_keys);
+ int oi = 0;
+
+ for (j = 0; j < num_keys; j++) {
+ if ((ov[oi] = lookupKeyRead(c->db,c->argv[first_key+j])) != NULL) {
+ kv[oi] = c->argv[first_key+j];
+ oi++;
+ }
+ }
+ num_keys = oi;
+ if (num_keys == 0) {
+ zfree(ov); zfree(kv);
+ addReplySds(c,sdsnew("+NOKEY\r\n"));
+ return;
+ }
+
+try_again:
+ write_error = 0;
+
+ /* Connect */
+ cs = migrateGetSocket(c,c->argv[1],c->argv[2],timeout);
+ if (cs == NULL) {
+ zfree(ov); zfree(kv);
+ return; /* error sent to the client by migrateGetSocket() */
+ }
+
+ rioInitWithBuffer(&cmd,sdsempty());
+
+ /* Authentication */
+ if (password) {
+ int arity = username ? 3 : 2;
+ serverAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',arity));
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"AUTH",4));
+ if (username) {
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,username,
+ sdslen(username)));
+ }
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,password,
+ sdslen(password)));
+ }
+
+ /* Send the SELECT command if the current DB is not already selected. */
+ int select = cs->last_dbid != dbid; /* Should we emit SELECT? */
+ if (select) {
+ serverAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2));
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6));
+ serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid));
+ }
+
+ int non_expired = 0; /* Number of keys that we'll find non expired.
+ Note that serializing large keys may take some time
+ so certain keys that were found non expired by the
+ lookupKey() function, may be expired later. */
+
+ /* Create RESTORE payload and generate the protocol to call the command. */
+ for (j = 0; j < num_keys; j++) {
+ long long ttl = 0;
+ long long expireat = getExpire(c->db,kv[j]);
+
+ if (expireat != -1) {
+ ttl = expireat-commandTimeSnapshot();
+ if (ttl < 0) {
+ continue;
+ }
+ if (ttl < 1) ttl = 1;
+ }
+
+ /* Relocate valid (non expired) keys and values into the array in successive
+ * positions to remove holes created by the keys that were present
+ * in the first lookup but are now expired after the second lookup. */
+ ov[non_expired] = ov[j];
+ kv[non_expired++] = kv[j];
+
+ serverAssertWithInfo(c,NULL,
+ rioWriteBulkCount(&cmd,'*',replace ? 5 : 4));
+
+ if (server.cluster_enabled)
+ serverAssertWithInfo(c,NULL,
+ rioWriteBulkString(&cmd,"RESTORE-ASKING",14));
+ else
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7));
+ serverAssertWithInfo(c,NULL,sdsEncodedObject(kv[j]));
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,kv[j]->ptr,
+ sdslen(kv[j]->ptr)));
+ serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl));
+
+ /* Emit the payload argument, that is the serialized object using
+ * the DUMP format. */
+ createDumpPayload(&payload,ov[j],kv[j],dbid);
+ serverAssertWithInfo(c,NULL,
+ rioWriteBulkString(&cmd,payload.io.buffer.ptr,
+ sdslen(payload.io.buffer.ptr)));
+ sdsfree(payload.io.buffer.ptr);
+
+ /* Add the REPLACE option to the RESTORE command if it was specified
+ * as a MIGRATE option. */
+ if (replace)
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"REPLACE",7));
+ }
+
+ /* Fix the actual number of keys we are migrating. */
+ num_keys = non_expired;
+
+ /* Transfer the query to the other node in 64K chunks. */
+ errno = 0;
+ {
+ sds buf = cmd.io.buffer.ptr;
+ size_t pos = 0, towrite;
+ int nwritten = 0;
+
+ while ((towrite = sdslen(buf)-pos) > 0) {
+ towrite = (towrite > (64*1024) ? (64*1024) : towrite);
+ nwritten = connSyncWrite(cs->conn,buf+pos,towrite,timeout);
+ if (nwritten != (signed)towrite) {
+ write_error = 1;
+ goto socket_err;
+ }
+ pos += nwritten;
+ }
+ }
+
+ char buf0[1024]; /* Auth reply. */
+ char buf1[1024]; /* Select reply. */
+ char buf2[1024]; /* Restore reply. */
+
+ /* Read the AUTH reply if needed. */
+ if (password && connSyncReadLine(cs->conn, buf0, sizeof(buf0), timeout) <= 0)
+ goto socket_err;
+
+ /* Read the SELECT reply if needed. */
+ if (select && connSyncReadLine(cs->conn, buf1, sizeof(buf1), timeout) <= 0)
+ goto socket_err;
+
+ /* Read the RESTORE replies. */
+ int error_from_target = 0;
+ int socket_error = 0;
+ int del_idx = 1; /* Index of the key argument for the replicated DEL op. */
+
+ /* Allocate the new argument vector that will replace the current command,
+ * to propagate the MIGRATE as a DEL command (if no COPY option was given).
+ * We allocate num_keys+1 because the additional argument is for "DEL"
+ * command name itself. */
+ if (!copy) newargv = zmalloc(sizeof(robj*)*(num_keys+1));
+
+ for (j = 0; j < num_keys; j++) {
+ if (connSyncReadLine(cs->conn, buf2, sizeof(buf2), timeout) <= 0) {
+ socket_error = 1;
+ break;
+ }
+ if ((password && buf0[0] == '-') ||
+ (select && buf1[0] == '-') ||
+ buf2[0] == '-')
+ {
+ /* On error assume that last_dbid is no longer valid. */
+ if (!error_from_target) {
+ cs->last_dbid = -1;
+ char *errbuf;
+ if (password && buf0[0] == '-') errbuf = buf0;
+ else if (select && buf1[0] == '-') errbuf = buf1;
+ else errbuf = buf2;
+
+ error_from_target = 1;
+ addReplyErrorFormat(c,"Target instance replied with error: %s",
+ errbuf+1);
+ }
+ } else {
+ if (!copy) {
+ /* No COPY option: remove the local key, signal the change. */
+ dbDelete(c->db,kv[j]);
+ signalModifiedKey(c,c->db,kv[j]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",kv[j],c->db->id);
+ server.dirty++;
+
+ /* Populate the argument vector to replace the old one. */
+ newargv[del_idx++] = kv[j];
+ incrRefCount(kv[j]);
+ }
+ }
+ }
+
+ /* On socket error, if we want to retry, do it now before rewriting the
+ * command vector. We only retry if we are sure nothing was processed
+ * and we failed to read the first reply (j == 0 test). */
+ if (!error_from_target && socket_error && j == 0 && may_retry &&
+ errno != ETIMEDOUT)
+ {
+ goto socket_err; /* A retry is guaranteed because of tested conditions.*/
+ }
+
+ /* On socket errors, close the migration socket now that we still have
+ * the original host/port in the ARGV. Later the original command may be
+ * rewritten to DEL and will be too later. */
+ if (socket_error) migrateCloseSocket(c->argv[1],c->argv[2]);
+
+ if (!copy) {
+ /* Translate MIGRATE as DEL for replication/AOF. Note that we do
+ * this only for the keys for which we received an acknowledgement
+ * from the receiving Redis server, by using the del_idx index. */
+ if (del_idx > 1) {
+ newargv[0] = createStringObject("DEL",3);
+ /* Note that the following call takes ownership of newargv. */
+ replaceClientCommandVector(c,del_idx,newargv);
+ argv_rewritten = 1;
+ } else {
+ /* No key transfer acknowledged, no need to rewrite as DEL. */
+ zfree(newargv);
+ }
+ newargv = NULL; /* Make it safe to call zfree() on it in the future. */
+ }
+
+ /* If we are here and a socket error happened, we don't want to retry.
+ * Just signal the problem to the client, but only do it if we did not
+ * already queue a different error reported by the destination server. */
+ if (!error_from_target && socket_error) {
+ may_retry = 0;
+ goto socket_err;
+ }
+
+ if (!error_from_target) {
+ /* Success! Update the last_dbid in migrateCachedSocket, so that we can
+ * avoid SELECT the next time if the target DB is the same. Reply +OK.
+ *
+ * Note: If we reached this point, even if socket_error is true
+ * still the SELECT command succeeded (otherwise the code jumps to
+ * socket_err label. */
+ cs->last_dbid = dbid;
+ addReply(c,shared.ok);
+ } else {
+ /* On error we already sent it in the for loop above, and set
+ * the currently selected socket to -1 to force SELECT the next time. */
+ }
+
+ sdsfree(cmd.io.buffer.ptr);
+ zfree(ov); zfree(kv); zfree(newargv);
+ return;
+
+/* On socket errors we try to close the cached socket and try again.
+ * It is very common for the cached socket to get closed, if just reopening
+ * it works it's a shame to notify the error to the caller. */
+socket_err:
+ /* Cleanup we want to perform in both the retry and no retry case.
+ * Note: Closing the migrate socket will also force SELECT next time. */
+ sdsfree(cmd.io.buffer.ptr);
+
+ /* If the command was rewritten as DEL and there was a socket error,
+ * we already closed the socket earlier. While migrateCloseSocket()
+ * is idempotent, the host/port arguments are now gone, so don't do it
+ * again. */
+ if (!argv_rewritten) migrateCloseSocket(c->argv[1],c->argv[2]);
+ zfree(newargv);
+ newargv = NULL; /* This will get reallocated on retry. */
+
+ /* Retry only if it's not a timeout and we never attempted a retry
+ * (or the code jumping here did not set may_retry to zero). */
+ if (errno != ETIMEDOUT && may_retry) {
+ may_retry = 0;
+ goto try_again;
+ }
+
+ /* Cleanup we want to do if no retry is attempted. */
+ zfree(ov); zfree(kv);
+ addReplyErrorSds(c, sdscatprintf(sdsempty(),
+ "-IOERR error or timeout %s to target instance",
+ write_error ? "writing" : "reading"));
+ return;
+}
+
+/* -----------------------------------------------------------------------------
+ * Cluster functions related to serving / redirecting clients
+ * -------------------------------------------------------------------------- */
+
+/* The ASKING command is required after a -ASK redirection.
+ * The client should issue ASKING before to actually send the command to
+ * the target instance. See the Redis Cluster specification for more
+ * information. */
+void askingCommand(client *c) {
+ if (server.cluster_enabled == 0) {
+ addReplyError(c,"This instance has cluster support disabled");
+ return;
+ }
+ c->flags |= CLIENT_ASKING;
+ addReply(c,shared.ok);
+}
+
+/* The READONLY command is used by clients to enter the read-only mode.
+ * In this mode slaves will not redirect clients as long as clients access
+ * with read-only commands to keys that are served by the slave's master. */
+void readonlyCommand(client *c) {
+ if (server.cluster_enabled == 0) {
+ addReplyError(c,"This instance has cluster support disabled");
+ return;
+ }
+ c->flags |= CLIENT_READONLY;
+ addReply(c,shared.ok);
+}
+
+/* The READWRITE command just clears the READONLY command state. */
+void readwriteCommand(client *c) {
+ if (server.cluster_enabled == 0) {
+ addReplyError(c,"This instance has cluster support disabled");
+ return;
+ }
+ c->flags &= ~CLIENT_READONLY;
+ addReply(c,shared.ok);
+}
+
+/* Return the pointer to the cluster node that is able to serve the command.
+ * For the function to succeed the command should only target either:
+ *
+ * 1) A single key (even multiple times like RPOPLPUSH mylist mylist).
+ * 2) Multiple keys in the same hash slot, while the slot is stable (no
+ * resharding in progress).
+ *
+ * On success the function returns the node that is able to serve the request.
+ * If the node is not 'myself' a redirection must be performed. The kind of
+ * redirection is specified setting the integer passed by reference
+ * 'error_code', which will be set to CLUSTER_REDIR_ASK or
+ * CLUSTER_REDIR_MOVED.
+ *
+ * When the node is 'myself' 'error_code' is set to CLUSTER_REDIR_NONE.
+ *
+ * If the command fails NULL is returned, and the reason of the failure is
+ * provided via 'error_code', which will be set to:
+ *
+ * CLUSTER_REDIR_CROSS_SLOT if the request contains multiple keys that
+ * don't belong to the same hash slot.
+ *
+ * CLUSTER_REDIR_UNSTABLE if the request contains multiple keys
+ * belonging to the same slot, but the slot is not stable (in migration or
+ * importing state, likely because a resharding is in progress).
+ *
+ * CLUSTER_REDIR_DOWN_UNBOUND if the request addresses a slot which is
+ * not bound to any node. In this case the cluster global state should be
+ * already "down" but it is fragile to rely on the update of the global state,
+ * so we also handle it here.
+ *
+ * CLUSTER_REDIR_DOWN_STATE and CLUSTER_REDIR_DOWN_RO_STATE if the cluster is
+ * down but the user attempts to execute a command that addresses one or more keys. */
+clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *error_code) {
+ clusterNode *n = NULL;
+ robj *firstkey = NULL;
+ int multiple_keys = 0;
+ multiState *ms, _ms;
+ multiCmd mc;
+ int i, slot = 0, migrating_slot = 0, importing_slot = 0, missing_keys = 0,
+ existing_keys = 0;
+
+ /* Allow any key to be set if a module disabled cluster redirections. */
+ if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION)
+ return myself;
+
+ /* Set error code optimistically for the base case. */
+ if (error_code) *error_code = CLUSTER_REDIR_NONE;
+
+ /* Modules can turn off Redis Cluster redirection: this is useful
+ * when writing a module that implements a completely different
+ * distributed system. */
+
+ /* We handle all the cases as if they were EXEC commands, so we have
+ * a common code path for everything */
+ if (cmd->proc == execCommand) {
+ /* If CLIENT_MULTI flag is not set EXEC is just going to return an
+ * error. */
+ if (!(c->flags & CLIENT_MULTI)) return myself;
+ ms = &c->mstate;
+ } else {
+ /* In order to have a single codepath create a fake Multi State
+ * structure if the client is not in MULTI/EXEC state, this way
+ * we have a single codepath below. */
+ ms = &_ms;
+ _ms.commands = &mc;
+ _ms.count = 1;
+ mc.argv = argv;
+ mc.argc = argc;
+ mc.cmd = cmd;
+ }
+
+ int is_pubsubshard = cmd->proc == ssubscribeCommand ||
+ cmd->proc == sunsubscribeCommand ||
+ cmd->proc == spublishCommand;
+
+ /* Check that all the keys are in the same hash slot, and obtain this
+ * slot and the node associated. */
+ for (i = 0; i < ms->count; i++) {
+ struct redisCommand *mcmd;
+ robj **margv;
+ int margc, numkeys, j;
+ keyReference *keyindex;
+
+ mcmd = ms->commands[i].cmd;
+ margc = ms->commands[i].argc;
+ margv = ms->commands[i].argv;
+
+ getKeysResult result = GETKEYS_RESULT_INIT;
+ numkeys = getKeysFromCommand(mcmd,margv,margc,&result);
+ keyindex = result.keys;
+
+ for (j = 0; j < numkeys; j++) {
+ robj *thiskey = margv[keyindex[j].pos];
+ int thisslot = keyHashSlot((char*)thiskey->ptr,
+ sdslen(thiskey->ptr));
+
+ if (firstkey == NULL) {
+ /* This is the first key we see. Check what is the slot
+ * and node. */
+ firstkey = thiskey;
+ slot = thisslot;
+ n = server.cluster->slots[slot];
+
+ /* Error: If a slot is not served, we are in "cluster down"
+ * state. However the state is yet to be updated, so this was
+ * not trapped earlier in processCommand(). Report the same
+ * error to the client. */
+ if (n == NULL) {
+ getKeysFreeResult(&result);
+ if (error_code)
+ *error_code = CLUSTER_REDIR_DOWN_UNBOUND;
+ return NULL;
+ }
+
+ /* If we are migrating or importing this slot, we need to check
+ * if we have all the keys in the request (the only way we
+ * can safely serve the request, otherwise we return a TRYAGAIN
+ * error). To do so we set the importing/migrating state and
+ * increment a counter for every missing key. */
+ if (n == myself &&
+ server.cluster->migrating_slots_to[slot] != NULL)
+ {
+ migrating_slot = 1;
+ } else if (server.cluster->importing_slots_from[slot] != NULL) {
+ importing_slot = 1;
+ }
+ } else {
+ /* If it is not the first key/channel, make sure it is exactly
+ * the same key/channel as the first we saw. */
+ if (slot != thisslot) {
+ /* Error: multiple keys from different slots. */
+ getKeysFreeResult(&result);
+ if (error_code)
+ *error_code = CLUSTER_REDIR_CROSS_SLOT;
+ return NULL;
+ }
+ if (importing_slot && !multiple_keys && !equalStringObjects(firstkey,thiskey)) {
+ /* Flag this request as one with multiple different
+ * keys/channels when the slot is in importing state. */
+ multiple_keys = 1;
+ }
+ }
+
+ /* Migrating / Importing slot? Count keys we don't have.
+ * If it is pubsubshard command, it isn't required to check
+ * the channel being present or not in the node during the
+ * slot migration, the channel will be served from the source
+ * node until the migration completes with CLUSTER SETSLOT <slot>
+ * NODE <node-id>. */
+ int flags = LOOKUP_NOTOUCH | LOOKUP_NOSTATS | LOOKUP_NONOTIFY | LOOKUP_NOEXPIRE;
+ if ((migrating_slot || importing_slot) && !is_pubsubshard)
+ {
+ if (lookupKeyReadWithFlags(&server.db[0], thiskey, flags) == NULL) missing_keys++;
+ else existing_keys++;
+ }
+ }
+ getKeysFreeResult(&result);
+ }
+
+ /* No key at all in command? then we can serve the request
+ * without redirections or errors in all the cases. */
+ if (n == NULL) return myself;
+
+ uint64_t cmd_flags = getCommandFlags(c);
+ /* Cluster is globally down but we got keys? We only serve the request
+ * if it is a read command and when allow_reads_when_down is enabled. */
+ if (server.cluster->state != CLUSTER_OK) {
+ if (is_pubsubshard) {
+ if (!server.cluster_allow_pubsubshard_when_down) {
+ if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE;
+ return NULL;
+ }
+ } else if (!server.cluster_allow_reads_when_down) {
+ /* The cluster is configured to block commands when the
+ * cluster is down. */
+ if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE;
+ return NULL;
+ } else if (cmd_flags & CMD_WRITE) {
+ /* The cluster is configured to allow read only commands */
+ if (error_code) *error_code = CLUSTER_REDIR_DOWN_RO_STATE;
+ return NULL;
+ } else {
+ /* Fall through and allow the command to be executed:
+ * this happens when server.cluster_allow_reads_when_down is
+ * true and the command is not a write command */
+ }
+ }
+
+ /* Return the hashslot by reference. */
+ if (hashslot) *hashslot = slot;
+
+ /* MIGRATE always works in the context of the local node if the slot
+ * is open (migrating or importing state). We need to be able to freely
+ * move keys among instances in this case. */
+ if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand)
+ return myself;
+
+ /* If we don't have all the keys and we are migrating the slot, send
+ * an ASK redirection or TRYAGAIN. */
+ if (migrating_slot && missing_keys) {
+ /* If we have keys but we don't have all keys, we return TRYAGAIN */
+ if (existing_keys) {
+ if (error_code) *error_code = CLUSTER_REDIR_UNSTABLE;
+ return NULL;
+ } else {
+ if (error_code) *error_code = CLUSTER_REDIR_ASK;
+ return server.cluster->migrating_slots_to[slot];
+ }
+ }
+
+ /* If we are receiving the slot, and the client correctly flagged the
+ * request as "ASKING", we can serve the request. However if the request
+ * involves multiple keys and we don't have them all, the only option is
+ * to send a TRYAGAIN error. */
+ if (importing_slot &&
+ (c->flags & CLIENT_ASKING || cmd_flags & CMD_ASKING))
+ {
+ if (multiple_keys && missing_keys) {
+ if (error_code) *error_code = CLUSTER_REDIR_UNSTABLE;
+ return NULL;
+ } else {
+ return myself;
+ }
+ }
+
+ /* Handle the read-only client case reading from a slave: if this
+ * node is a slave and the request is about a hash slot our master
+ * is serving, we can reply without redirection. */
+ int is_write_command = (cmd_flags & CMD_WRITE) ||
+ (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE));
+ if (((c->flags & CLIENT_READONLY) || is_pubsubshard) &&
+ !is_write_command &&
+ nodeIsSlave(myself) &&
+ myself->slaveof == n)
+ {
+ return myself;
+ }
+
+ /* Base case: just return the right node. However if this node is not
+ * myself, set error_code to MOVED since we need to issue a redirection. */
+ if (n != myself && error_code) *error_code = CLUSTER_REDIR_MOVED;
+ return n;
+}
+
+/* Send the client the right redirection code, according to error_code
+ * that should be set to one of CLUSTER_REDIR_* macros.
+ *
+ * If CLUSTER_REDIR_ASK or CLUSTER_REDIR_MOVED error codes
+ * are used, then the node 'n' should not be NULL, but should be the
+ * node we want to mention in the redirection. Moreover hashslot should
+ * be set to the hash slot that caused the redirection. */
+void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code) {
+ if (error_code == CLUSTER_REDIR_CROSS_SLOT) {
+ addReplyError(c,"-CROSSSLOT Keys in request don't hash to the same slot");
+ } else if (error_code == CLUSTER_REDIR_UNSTABLE) {
+ /* The request spawns multiple keys in the same slot,
+ * but the slot is not "stable" currently as there is
+ * a migration or import in progress. */
+ addReplyError(c,"-TRYAGAIN Multiple keys request during rehashing of slot");
+ } else if (error_code == CLUSTER_REDIR_DOWN_STATE) {
+ addReplyError(c,"-CLUSTERDOWN The cluster is down");
+ } else if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
+ addReplyError(c,"-CLUSTERDOWN The cluster is down and only accepts read commands");
+ } else if (error_code == CLUSTER_REDIR_DOWN_UNBOUND) {
+ addReplyError(c,"-CLUSTERDOWN Hash slot not served");
+ } else if (error_code == CLUSTER_REDIR_MOVED ||
+ error_code == CLUSTER_REDIR_ASK)
+ {
+ /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
+ int port = getNodeClientPort(n, shouldReturnTlsInfo());
+ addReplyErrorSds(c,sdscatprintf(sdsempty(),
+ "-%s %d %s:%d",
+ (error_code == CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
+ hashslot, getPreferredEndpoint(n), port));
+ } else {
+ serverPanic("getNodeByQuery() unknown error.");
+ }
+}
+
+/* This function is called by the function processing clients incrementally
+ * to detect timeouts, in order to handle the following case:
+ *
+ * 1) A client blocks with BLPOP or similar blocking operation.
+ * 2) The master migrates the hash slot elsewhere or turns into a slave.
+ * 3) The client may remain blocked forever (or up to the max timeout time)
+ * waiting for a key change that will never happen.
+ *
+ * If the client is found to be blocked into a hash slot this node no
+ * longer handles, the client is sent a redirection error, and the function
+ * returns 1. Otherwise 0 is returned and no operation is performed. */
+int clusterRedirectBlockedClientIfNeeded(client *c) {
+ if (c->flags & CLIENT_BLOCKED &&
+ (c->bstate.btype == BLOCKED_LIST ||
+ c->bstate.btype == BLOCKED_ZSET ||
+ c->bstate.btype == BLOCKED_STREAM ||
+ c->bstate.btype == BLOCKED_MODULE))
+ {
+ dictEntry *de;
+ dictIterator *di;
+
+ /* If the cluster is down, unblock the client with the right error.
+ * If the cluster is configured to allow reads on cluster down, we
+ * still want to emit this error since a write will be required
+ * to unblock them which may never come. */
+ if (server.cluster->state == CLUSTER_FAIL) {
+ clusterRedirectClient(c,NULL,0,CLUSTER_REDIR_DOWN_STATE);
+ return 1;
+ }
+
+ /* If the client is blocked on module, but not on a specific key,
+ * don't unblock it (except for the CLUSTER_FAIL case above). */
+ if (c->bstate.btype == BLOCKED_MODULE && !moduleClientIsBlockedOnKeys(c))
+ return 0;
+
+ /* All keys must belong to the same slot, so check first key only. */
+ di = dictGetIterator(c->bstate.keys);
+ if ((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ int slot = keyHashSlot((char*)key->ptr, sdslen(key->ptr));
+ clusterNode *node = server.cluster->slots[slot];
+
+ /* if the client is read-only and attempting to access key that our
+ * replica can handle, allow it. */
+ if ((c->flags & CLIENT_READONLY) &&
+ !(c->lastcmd->flags & CMD_WRITE) &&
+ nodeIsSlave(myself) && myself->slaveof == node)
+ {
+ node = myself;
+ }
+
+ /* We send an error and unblock the client if:
+ * 1) The slot is unassigned, emitting a cluster down error.
+ * 2) The slot is not handled by this node, nor being imported. */
+ if (node != myself &&
+ server.cluster->importing_slots_from[slot] == NULL)
+ {
+ if (node == NULL) {
+ clusterRedirectClient(c,NULL,0,
+ CLUSTER_REDIR_DOWN_UNBOUND);
+ } else {
+ clusterRedirectClient(c,node,slot,
+ CLUSTER_REDIR_MOVED);
+ }
+ dictReleaseIterator(di);
+ return 1;
+ }
+ }
+ dictReleaseIterator(di);
+ }
+ return 0;
+}
+
+/* Slot to Key API. This is used by Redis Cluster in order to obtain in
+ * a fast way a key that belongs to a specified hash slot. This is useful
+ * while rehashing the cluster and in other conditions when we need to
+ * understand if we have keys for a given hash slot. */
+
+void slotToKeyAddEntry(dictEntry *entry, redisDb *db) {
+ sds key = dictGetKey(entry);
+ unsigned int hashslot = keyHashSlot(key, sdslen(key));
+ slotToKeys *slot_to_keys = &(*db->slots_to_keys).by_slot[hashslot];
+ slot_to_keys->count++;
+
+ /* Insert entry before the first element in the list. */
+ dictEntry *first = slot_to_keys->head;
+ dictEntryNextInSlot(entry) = first;
+ if (first != NULL) {
+ serverAssert(dictEntryPrevInSlot(first) == NULL);
+ dictEntryPrevInSlot(first) = entry;
+ }
+ serverAssert(dictEntryPrevInSlot(entry) == NULL);
+ slot_to_keys->head = entry;
+}
+
+void slotToKeyDelEntry(dictEntry *entry, redisDb *db) {
+ sds key = dictGetKey(entry);
+ unsigned int hashslot = keyHashSlot(key, sdslen(key));
+ slotToKeys *slot_to_keys = &(*db->slots_to_keys).by_slot[hashslot];
+ slot_to_keys->count--;
+
+ /* Connect previous and next entries to each other. */
+ dictEntry *next = dictEntryNextInSlot(entry);
+ dictEntry *prev = dictEntryPrevInSlot(entry);
+ if (next != NULL) {
+ dictEntryPrevInSlot(next) = prev;
+ }
+ if (prev != NULL) {
+ dictEntryNextInSlot(prev) = next;
+ } else {
+ /* The removed entry was the first in the list. */
+ serverAssert(slot_to_keys->head == entry);
+ slot_to_keys->head = next;
+ }
+}
+
+/* Updates neighbour entries when an entry has been replaced (e.g. reallocated
+ * during active defrag). */
+void slotToKeyReplaceEntry(dict *d, dictEntry *entry) {
+ dictEntry *next = dictEntryNextInSlot(entry);
+ dictEntry *prev = dictEntryPrevInSlot(entry);
+ if (next != NULL) {
+ dictEntryPrevInSlot(next) = entry;
+ }
+ if (prev != NULL) {
+ dictEntryNextInSlot(prev) = entry;
+ } else {
+ /* The replaced entry was the first in the list. */
+ sds key = dictGetKey(entry);
+ unsigned int hashslot = keyHashSlot(key, sdslen(key));
+ clusterDictMetadata *dictmeta = dictMetadata(d);
+ redisDb *db = dictmeta->db;
+ slotToKeys *slot_to_keys = &(*db->slots_to_keys).by_slot[hashslot];
+ slot_to_keys->head = entry;
+ }
+}
+
+/* Initialize slots-keys map of given db. */
+void slotToKeyInit(redisDb *db) {
+ db->slots_to_keys = zcalloc(sizeof(clusterSlotToKeyMapping));
+ clusterDictMetadata *dictmeta = dictMetadata(db->dict);
+ dictmeta->db = db;
+}
+
+/* Empty slots-keys map of given db. */
+void slotToKeyFlush(redisDb *db) {
+ memset(db->slots_to_keys, 0,
+ sizeof(clusterSlotToKeyMapping));
+}
+
+/* Free slots-keys map of given db. */
+void slotToKeyDestroy(redisDb *db) {
+ zfree(db->slots_to_keys);
+ db->slots_to_keys = NULL;
+}
+
+/* Remove all the keys in the specified hash slot.
+ * The number of removed items is returned. */
+unsigned int delKeysInSlot(unsigned int hashslot) {
+ unsigned int j = 0;
+
+ dictEntry *de = (*server.db->slots_to_keys).by_slot[hashslot].head;
+ while (de != NULL) {
+ sds sdskey = dictGetKey(de);
+ de = dictEntryNextInSlot(de);
+ robj *key = createStringObject(sdskey, sdslen(sdskey));
+ dbDelete(&server.db[0], key);
+ propagateDeletion(&server.db[0], key, server.lazyfree_lazy_server_del);
+ signalModifiedKey(NULL, &server.db[0], key);
+ moduleNotifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, server.db[0].id);
+ postExecutionUnitOperations();
+ decrRefCount(key);
+ j++;
+ server.dirty++;
+ }
+
+ return j;
+}
+
+unsigned int countKeysInSlot(unsigned int hashslot) {
+ return (*server.db->slots_to_keys).by_slot[hashslot].count;
+}
+
+clusterNode *clusterNodeGetMaster(clusterNode *node) {
+ while (node->slaveof != NULL) node = node->slaveof;
+ return node;
+}
+
+/* -----------------------------------------------------------------------------
+ * Operation(s) on channel rax tree.
+ * -------------------------------------------------------------------------- */
+
+void slotToChannelUpdate(sds channel, int add) {
+ size_t keylen = sdslen(channel);
+ unsigned int hashslot = keyHashSlot(channel,keylen);
+ unsigned char buf[64];
+ unsigned char *indexed = buf;
+
+ if (keylen+2 > 64) indexed = zmalloc(keylen+2);
+ indexed[0] = (hashslot >> 8) & 0xff;
+ indexed[1] = hashslot & 0xff;
+ memcpy(indexed+2,channel,keylen);
+ if (add) {
+ raxInsert(server.cluster->slots_to_channels,indexed,keylen+2,NULL,NULL);
+ } else {
+ raxRemove(server.cluster->slots_to_channels,indexed,keylen+2,NULL);
+ }
+ if (indexed != buf) zfree(indexed);
+}
+
+void slotToChannelAdd(sds channel) {
+ slotToChannelUpdate(channel,1);
+}
+
+void slotToChannelDel(sds channel) {
+ slotToChannelUpdate(channel,0);
+}
+
+/* Get the count of the channels for a given slot. */
+unsigned int countChannelsInSlot(unsigned int hashslot) {
+ raxIterator iter;
+ int j = 0;
+ unsigned char indexed[2];
+
+ indexed[0] = (hashslot >> 8) & 0xff;
+ indexed[1] = hashslot & 0xff;
+ raxStart(&iter,server.cluster->slots_to_channels);
+ raxSeek(&iter,">=",indexed,2);
+ while(raxNext(&iter)) {
+ if (iter.key[0] != indexed[0] || iter.key[1] != indexed[1]) break;
+ j++;
+ }
+ raxStop(&iter);
+ return j;
+}
diff --git a/src/cluster.h b/src/cluster.h
new file mode 100644
index 0000000..db05ebc
--- /dev/null
+++ b/src/cluster.h
@@ -0,0 +1,447 @@
+#ifndef __CLUSTER_H
+#define __CLUSTER_H
+
+/*-----------------------------------------------------------------------------
+ * Redis cluster data structures, defines, exported API.
+ *----------------------------------------------------------------------------*/
+
+#define CLUSTER_SLOTS 16384
+#define CLUSTER_OK 0 /* Everything looks ok */
+#define CLUSTER_FAIL 1 /* The cluster can't work */
+#define CLUSTER_NAMELEN 40 /* sha1 hex length */
+#define CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
+
+/* The following defines are amount of time, sometimes expressed as
+ * multiplicators of the node timeout value (when ending with MULT). */
+#define CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
+#define CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
+#define CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */
+#define CLUSTER_MF_PAUSE_MULT 2 /* Master pause manual failover mult. */
+#define CLUSTER_SLAVE_MIGRATION_DELAY 5000 /* Delay for slave migration. */
+
+/* Redirection errors returned by getNodeByQuery(). */
+#define CLUSTER_REDIR_NONE 0 /* Node can serve the request. */
+#define CLUSTER_REDIR_CROSS_SLOT 1 /* -CROSSSLOT request. */
+#define CLUSTER_REDIR_UNSTABLE 2 /* -TRYAGAIN redirection required */
+#define CLUSTER_REDIR_ASK 3 /* -ASK redirection required. */
+#define CLUSTER_REDIR_MOVED 4 /* -MOVED redirection required. */
+#define CLUSTER_REDIR_DOWN_STATE 5 /* -CLUSTERDOWN, global state. */
+#define CLUSTER_REDIR_DOWN_UNBOUND 6 /* -CLUSTERDOWN, unbound slot. */
+#define CLUSTER_REDIR_DOWN_RO_STATE 7 /* -CLUSTERDOWN, allow reads. */
+
+struct clusterNode;
+
+/* clusterLink encapsulates everything needed to talk with a remote node. */
+typedef struct clusterLink {
+ mstime_t ctime; /* Link creation time */
+ connection *conn; /* Connection to remote node */
+ list *send_msg_queue; /* List of messages to be sent */
+ size_t head_msg_send_offset; /* Number of bytes already sent of message at head of queue */
+ unsigned long long send_msg_queue_mem; /* Memory in bytes used by message queue */
+ char *rcvbuf; /* Packet reception buffer */
+ size_t rcvbuf_len; /* Used size of rcvbuf */
+ size_t rcvbuf_alloc; /* Allocated size of rcvbuf */
+ struct clusterNode *node; /* Node related to this link. Initialized to NULL when unknown */
+ int inbound; /* 1 if this link is an inbound link accepted from the related node */
+} clusterLink;
+
+/* Cluster node flags and macros. */
+#define CLUSTER_NODE_MASTER 1 /* The node is a master */
+#define CLUSTER_NODE_SLAVE 2 /* The node is a slave */
+#define CLUSTER_NODE_PFAIL 4 /* Failure? Need acknowledge */
+#define CLUSTER_NODE_FAIL 8 /* The node is believed to be malfunctioning */
+#define CLUSTER_NODE_MYSELF 16 /* This node is myself */
+#define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
+#define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */
+#define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */
+#define CLUSTER_NODE_MIGRATE_TO 256 /* Master eligible for replica migration. */
+#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failover. */
+#define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
+
+#define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER)
+#define nodeIsSlave(n) ((n)->flags & CLUSTER_NODE_SLAVE)
+#define nodeInHandshake(n) ((n)->flags & CLUSTER_NODE_HANDSHAKE)
+#define nodeHasAddr(n) (!((n)->flags & CLUSTER_NODE_NOADDR))
+#define nodeWithoutAddr(n) ((n)->flags & CLUSTER_NODE_NOADDR)
+#define nodeTimedOut(n) ((n)->flags & CLUSTER_NODE_PFAIL)
+#define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL)
+#define nodeCantFailover(n) ((n)->flags & CLUSTER_NODE_NOFAILOVER)
+
+/* Reasons why a slave is not able to failover. */
+#define CLUSTER_CANT_FAILOVER_NONE 0
+#define CLUSTER_CANT_FAILOVER_DATA_AGE 1
+#define CLUSTER_CANT_FAILOVER_WAITING_DELAY 2
+#define CLUSTER_CANT_FAILOVER_EXPIRED 3
+#define CLUSTER_CANT_FAILOVER_WAITING_VOTES 4
+#define CLUSTER_CANT_FAILOVER_RELOG_PERIOD (10) /* seconds. */
+
+/* clusterState todo_before_sleep flags. */
+#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0)
+#define CLUSTER_TODO_UPDATE_STATE (1<<1)
+#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
+#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
+#define CLUSTER_TODO_HANDLE_MANUALFAILOVER (1<<4)
+
+/* Message types.
+ *
+ * Note that the PING, PONG and MEET messages are actually the same exact
+ * kind of packet. PONG is the reply to ping, in the exact format as a PING,
+ * while MEET is a special PING that forces the receiver to add the sender
+ * as a node (if it is not already in the list). */
+#define CLUSTERMSG_TYPE_PING 0 /* Ping */
+#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */
+#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */
+#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */
+#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propagation */
+#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
+#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6 /* Yes, you have my vote */
+#define CLUSTERMSG_TYPE_UPDATE 7 /* Another node slots configuration */
+#define CLUSTERMSG_TYPE_MFSTART 8 /* Pause clients for manual failover */
+#define CLUSTERMSG_TYPE_MODULE 9 /* Module cluster API message. */
+#define CLUSTERMSG_TYPE_PUBLISHSHARD 10 /* Pub/Sub Publish shard propagation */
+#define CLUSTERMSG_TYPE_COUNT 11 /* Total number of message types. */
+
+/* Flags that a module can set in order to prevent certain Redis Cluster
+ * features to be enabled. Useful when implementing a different distributed
+ * system on top of Redis Cluster message bus, using modules. */
+#define CLUSTER_MODULE_FLAG_NONE 0
+#define CLUSTER_MODULE_FLAG_NO_FAILOVER (1<<1)
+#define CLUSTER_MODULE_FLAG_NO_REDIRECTION (1<<2)
+
+/* This structure represent elements of node->fail_reports. */
+typedef struct clusterNodeFailReport {
+ struct clusterNode *node; /* Node reporting the failure condition. */
+ mstime_t time; /* Time of the last report from this node. */
+} clusterNodeFailReport;
+
+typedef struct clusterNode {
+ mstime_t ctime; /* Node object creation time. */
+ char name[CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
+ char shard_id[CLUSTER_NAMELEN]; /* shard id, hex string, sha1-size */
+ int flags; /* CLUSTER_NODE_... */
+ uint64_t configEpoch; /* Last configEpoch observed for this node */
+ unsigned char slots[CLUSTER_SLOTS/8]; /* slots handled by this node */
+ uint16_t *slot_info_pairs; /* Slots info represented as (start/end) pair (consecutive index). */
+ int slot_info_pairs_count; /* Used number of slots in slot_info_pairs */
+ int numslots; /* Number of slots handled by this node */
+ int numslaves; /* Number of slave nodes, if this is a master */
+ struct clusterNode **slaves; /* pointers to slave nodes */
+ struct clusterNode *slaveof; /* pointer to the master node. Note that it
+ may be NULL even if the node is a slave
+ if we don't have the master node in our
+ tables. */
+ unsigned long long last_in_ping_gossip; /* The number of the last carried in the ping gossip section */
+ mstime_t ping_sent; /* Unix time we sent latest ping */
+ mstime_t pong_received; /* Unix time we received the pong */
+ mstime_t data_received; /* Unix time we received any data */
+ mstime_t fail_time; /* Unix time when FAIL flag was set */
+ mstime_t voted_time; /* Last time we voted for a slave of this master */
+ mstime_t repl_offset_time; /* Unix time we received offset for this node */
+ mstime_t orphaned_time; /* Starting time of orphaned master condition */
+ long long repl_offset; /* Last known repl offset for this node. */
+ char ip[NET_IP_STR_LEN]; /* Latest known IP address of this node */
+ sds hostname; /* The known hostname for this node */
+ sds human_nodename; /* The known human readable nodename for this node */
+ int tcp_port; /* Latest known clients TCP port. */
+ int tls_port; /* Latest known clients TLS port */
+ int cport; /* Latest known cluster port of this node. */
+ clusterLink *link; /* TCP/IP link established toward this node */
+ clusterLink *inbound_link; /* TCP/IP link accepted from this node */
+ list *fail_reports; /* List of nodes signaling this as failing */
+} clusterNode;
+
+/* Slot to keys for a single slot. The keys in the same slot are linked together
+ * using dictEntry metadata. */
+typedef struct slotToKeys {
+ uint64_t count; /* Number of keys in the slot. */
+ dictEntry *head; /* The first key-value entry in the slot. */
+} slotToKeys;
+
+/* Slot to keys mapping for all slots, opaque outside this file. */
+struct clusterSlotToKeyMapping {
+ slotToKeys by_slot[CLUSTER_SLOTS];
+};
+
+/* Dict entry metadata for cluster mode, used for the Slot to Key API to form a
+ * linked list of the entries belonging to the same slot. */
+typedef struct clusterDictEntryMetadata {
+ dictEntry *prev; /* Prev entry with key in the same slot */
+ dictEntry *next; /* Next entry with key in the same slot */
+} clusterDictEntryMetadata;
+
+typedef struct {
+ redisDb *db; /* A link back to the db this dict belongs to */
+} clusterDictMetadata;
+
+typedef struct clusterState {
+ clusterNode *myself; /* This node */
+ uint64_t currentEpoch;
+ int state; /* CLUSTER_OK, CLUSTER_FAIL, ... */
+ int size; /* Num of master nodes with at least one slot */
+ dict *nodes; /* Hash table of name -> clusterNode structures */
+ dict *shards; /* Hash table of shard_id -> list (of nodes) structures */
+ dict *nodes_black_list; /* Nodes we don't re-add for a few seconds. */
+ clusterNode *migrating_slots_to[CLUSTER_SLOTS];
+ clusterNode *importing_slots_from[CLUSTER_SLOTS];
+ clusterNode *slots[CLUSTER_SLOTS];
+ rax *slots_to_channels;
+ /* The following fields are used to take the slave state on elections. */
+ mstime_t failover_auth_time; /* Time of previous or next election. */
+ int failover_auth_count; /* Number of votes received so far. */
+ int failover_auth_sent; /* True if we already asked for votes. */
+ int failover_auth_rank; /* This slave rank for current auth request. */
+ uint64_t failover_auth_epoch; /* Epoch of the current election. */
+ int cant_failover_reason; /* Why a slave is currently not able to
+ failover. See the CANT_FAILOVER_* macros. */
+ /* Manual failover state in common. */
+ mstime_t mf_end; /* Manual failover time limit (ms unixtime).
+ It is zero if there is no MF in progress. */
+ /* Manual failover state of master. */
+ clusterNode *mf_slave; /* Slave performing the manual failover. */
+ /* Manual failover state of slave. */
+ long long mf_master_offset; /* Master offset the slave needs to start MF
+ or -1 if still not received. */
+ int mf_can_start; /* If non-zero signal that the manual failover
+ can start requesting masters vote. */
+ /* The following fields are used by masters to take state on elections. */
+ uint64_t lastVoteEpoch; /* Epoch of the last vote granted. */
+ int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */
+ /* Stats */
+ /* Messages received and sent by type. */
+ long long stats_bus_messages_sent[CLUSTERMSG_TYPE_COUNT];
+ long long stats_bus_messages_received[CLUSTERMSG_TYPE_COUNT];
+ long long stats_pfail_nodes; /* Number of nodes in PFAIL status,
+ excluding nodes without address. */
+ unsigned long long stat_cluster_links_buffer_limit_exceeded; /* Total number of cluster links freed due to exceeding buffer limit */
+
+ /* Bit map for slots that are no longer claimed by the owner in cluster PING
+ * messages. During slot migration, the owner will stop claiming the slot after
+ * the ownership transfer. Set the bit corresponding to the slot when a node
+ * stops claiming the slot. This prevents spreading incorrect information (that
+ * source still owns the slot) using UPDATE messages. */
+ unsigned char owner_not_claiming_slot[CLUSTER_SLOTS / 8];
+} clusterState;
+
+/* Redis cluster messages header */
+
+/* Initially we don't know our "name", but we'll find it once we connect
+ * to the first node, using the getsockname() function. Then we'll use this
+ * address for all the next messages. */
+typedef struct {
+ char nodename[CLUSTER_NAMELEN];
+ uint32_t ping_sent;
+ uint32_t pong_received;
+ char ip[NET_IP_STR_LEN]; /* IP address last time it was seen */
+ uint16_t port; /* primary port last time it was seen */
+ uint16_t cport; /* cluster port last time it was seen */
+ uint16_t flags; /* node->flags copy */
+ uint16_t pport; /* secondary port last time it was seen */
+ uint16_t notused1;
+} clusterMsgDataGossip;
+
+typedef struct {
+ char nodename[CLUSTER_NAMELEN];
+} clusterMsgDataFail;
+
+typedef struct {
+ uint32_t channel_len;
+ uint32_t message_len;
+ unsigned char bulk_data[8]; /* 8 bytes just as placeholder. */
+} clusterMsgDataPublish;
+
+typedef struct {
+ uint64_t configEpoch; /* Config epoch of the specified instance. */
+ char nodename[CLUSTER_NAMELEN]; /* Name of the slots owner. */
+ unsigned char slots[CLUSTER_SLOTS/8]; /* Slots bitmap. */
+} clusterMsgDataUpdate;
+
+typedef struct {
+ uint64_t module_id; /* ID of the sender module. */
+ uint32_t len; /* ID of the sender module. */
+ uint8_t type; /* Type from 0 to 255. */
+ unsigned char bulk_data[3]; /* 3 bytes just as placeholder. */
+} clusterMsgModule;
+
+/* The cluster supports optional extension messages that can be sent
+ * along with ping/pong/meet messages to give additional info in a
+ * consistent manner. */
+typedef enum {
+ CLUSTERMSG_EXT_TYPE_HOSTNAME,
+ CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME,
+ CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE,
+ CLUSTERMSG_EXT_TYPE_SHARDID,
+} clusterMsgPingtypes;
+
+/* Helper function for making sure extensions are eight byte aligned. */
+#define EIGHT_BYTE_ALIGN(size) ((((size) + 7) / 8) * 8)
+
+typedef struct {
+ char hostname[1]; /* The announced hostname, ends with \0. */
+} clusterMsgPingExtHostname;
+
+typedef struct {
+ char human_nodename[1]; /* The announced nodename, ends with \0. */
+} clusterMsgPingExtHumanNodename;
+
+typedef struct {
+ char name[CLUSTER_NAMELEN]; /* Node name. */
+ uint64_t ttl; /* Remaining time to blacklist the node, in seconds. */
+} clusterMsgPingExtForgottenNode;
+
+static_assert(sizeof(clusterMsgPingExtForgottenNode) % 8 == 0, "");
+
+typedef struct {
+ char shard_id[CLUSTER_NAMELEN]; /* The shard_id, 40 bytes fixed. */
+} clusterMsgPingExtShardId;
+
+typedef struct {
+ uint32_t length; /* Total length of this extension message (including this header) */
+ uint16_t type; /* Type of this extension message (see clusterMsgPingExtTypes) */
+ uint16_t unused; /* 16 bits of padding to make this structure 8 byte aligned. */
+ union {
+ clusterMsgPingExtHostname hostname;
+ clusterMsgPingExtHumanNodename human_nodename;
+ clusterMsgPingExtForgottenNode forgotten_node;
+ clusterMsgPingExtShardId shard_id;
+ } ext[]; /* Actual extension information, formatted so that the data is 8
+ * byte aligned, regardless of its content. */
+} clusterMsgPingExt;
+
+union clusterMsgData {
+ /* PING, MEET and PONG */
+ struct {
+ /* Array of N clusterMsgDataGossip structures */
+ clusterMsgDataGossip gossip[1];
+ /* Extension data that can optionally be sent for ping/meet/pong
+ * messages. We can't explicitly define them here though, since
+ * the gossip array isn't the real length of the gossip data. */
+ } ping;
+
+ /* FAIL */
+ struct {
+ clusterMsgDataFail about;
+ } fail;
+
+ /* PUBLISH */
+ struct {
+ clusterMsgDataPublish msg;
+ } publish;
+
+ /* UPDATE */
+ struct {
+ clusterMsgDataUpdate nodecfg;
+ } update;
+
+ /* MODULE */
+ struct {
+ clusterMsgModule msg;
+ } module;
+};
+
+#define CLUSTER_PROTO_VER 1 /* Cluster bus protocol version. */
+
+typedef struct {
+ char sig[4]; /* Signature "RCmb" (Redis Cluster message bus). */
+ uint32_t totlen; /* Total length of this message */
+ uint16_t ver; /* Protocol version, currently set to 1. */
+ uint16_t port; /* Primary port number (TCP or TLS). */
+ uint16_t type; /* Message type */
+ uint16_t count; /* Only used for some kind of messages. */
+ uint64_t currentEpoch; /* The epoch accordingly to the sending node. */
+ uint64_t configEpoch; /* The config epoch if it's a master, or the last
+ epoch advertised by its master if it is a
+ slave. */
+ uint64_t offset; /* Master replication offset if node is a master or
+ processed replication offset if node is a slave. */
+ char sender[CLUSTER_NAMELEN]; /* Name of the sender node */
+ unsigned char myslots[CLUSTER_SLOTS/8];
+ char slaveof[CLUSTER_NAMELEN];
+ char myip[NET_IP_STR_LEN]; /* Sender IP, if not all zeroed. */
+ uint16_t extensions; /* Number of extensions sent along with this packet. */
+ char notused1[30]; /* 30 bytes reserved for future usage. */
+ uint16_t pport; /* Secondary port number: if primary port is TCP port, this is
+ TLS port, and if primary port is TLS port, this is TCP port.*/
+ uint16_t cport; /* Sender TCP cluster bus port */
+ uint16_t flags; /* Sender node flags */
+ unsigned char state; /* Cluster state from the POV of the sender */
+ unsigned char mflags[3]; /* Message flags: CLUSTERMSG_FLAG[012]_... */
+ union clusterMsgData data;
+} clusterMsg;
+
+/* clusterMsg defines the gossip wire protocol exchanged among Redis cluster
+ * members, which can be running different versions of redis-server bits,
+ * especially during cluster rolling upgrades.
+ *
+ * Therefore, fields in this struct should remain at the same offset from
+ * release to release. The static asserts below ensures that incompatible
+ * changes in clusterMsg be caught at compile time.
+ */
+
+static_assert(offsetof(clusterMsg, sig) == 0, "unexpected field offset");
+static_assert(offsetof(clusterMsg, totlen) == 4, "unexpected field offset");
+static_assert(offsetof(clusterMsg, ver) == 8, "unexpected field offset");
+static_assert(offsetof(clusterMsg, port) == 10, "unexpected field offset");
+static_assert(offsetof(clusterMsg, type) == 12, "unexpected field offset");
+static_assert(offsetof(clusterMsg, count) == 14, "unexpected field offset");
+static_assert(offsetof(clusterMsg, currentEpoch) == 16, "unexpected field offset");
+static_assert(offsetof(clusterMsg, configEpoch) == 24, "unexpected field offset");
+static_assert(offsetof(clusterMsg, offset) == 32, "unexpected field offset");
+static_assert(offsetof(clusterMsg, sender) == 40, "unexpected field offset");
+static_assert(offsetof(clusterMsg, myslots) == 80, "unexpected field offset");
+static_assert(offsetof(clusterMsg, slaveof) == 2128, "unexpected field offset");
+static_assert(offsetof(clusterMsg, myip) == 2168, "unexpected field offset");
+static_assert(offsetof(clusterMsg, extensions) == 2214, "unexpected field offset");
+static_assert(offsetof(clusterMsg, notused1) == 2216, "unexpected field offset");
+static_assert(offsetof(clusterMsg, pport) == 2246, "unexpected field offset");
+static_assert(offsetof(clusterMsg, cport) == 2248, "unexpected field offset");
+static_assert(offsetof(clusterMsg, flags) == 2250, "unexpected field offset");
+static_assert(offsetof(clusterMsg, state) == 2252, "unexpected field offset");
+static_assert(offsetof(clusterMsg, mflags) == 2253, "unexpected field offset");
+static_assert(offsetof(clusterMsg, data) == 2256, "unexpected field offset");
+
+#define CLUSTERMSG_MIN_LEN (sizeof(clusterMsg)-sizeof(union clusterMsgData))
+
+/* Message flags better specify the packet content or are used to
+ * provide some information about the node state. */
+#define CLUSTERMSG_FLAG0_PAUSED (1<<0) /* Master paused for manual failover. */
+#define CLUSTERMSG_FLAG0_FORCEACK (1<<1) /* Give ACK to AUTH_REQUEST even if
+ master is up. */
+#define CLUSTERMSG_FLAG0_EXT_DATA (1<<2) /* Message contains extension data */
+
+/* ---------------------- API exported outside cluster.c -------------------- */
+void clusterInit(void);
+void clusterInitListeners(void);
+void clusterCron(void);
+void clusterBeforeSleep(void);
+clusterNode *clusterNodeGetMaster(clusterNode *node);
+clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
+int verifyClusterNodeId(const char *name, int length);
+clusterNode *clusterLookupNode(const char *name, int length);
+int clusterRedirectBlockedClientIfNeeded(client *c);
+void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code);
+void migrateCloseTimedoutSockets(void);
+int verifyClusterConfigWithData(void);
+unsigned long getClusterConnectionsCount(void);
+int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uint8_t type, const char *payload, uint32_t len);
+void clusterPropagatePublish(robj *channel, robj *message, int sharded);
+unsigned int keyHashSlot(char *key, int keylen);
+void slotToKeyAddEntry(dictEntry *entry, redisDb *db);
+void slotToKeyDelEntry(dictEntry *entry, redisDb *db);
+void slotToKeyReplaceEntry(dict *d, dictEntry *entry);
+void slotToKeyInit(redisDb *db);
+void slotToKeyFlush(redisDb *db);
+void slotToKeyDestroy(redisDb *db);
+void clusterUpdateMyselfFlags(void);
+void clusterUpdateMyselfIp(void);
+void slotToChannelAdd(sds channel);
+void slotToChannelDel(sds channel);
+void clusterUpdateMyselfHostname(void);
+void clusterUpdateMyselfAnnouncedPorts(void);
+sds clusterGenNodesDescription(client *c, int filter, int tls_primary);
+sds genClusterInfoString(void);
+void freeClusterLink(clusterLink *link);
+void clusterUpdateMyselfHumanNodename(void);
+int isValidAuxString(char *s, unsigned int length);
+int getNodeDefaultClientPort(clusterNode *n);
+
+#endif /* __CLUSTER_H */
diff --git a/src/commands.c b/src/commands.c
new file mode 100644
index 0000000..5dcfe19
--- /dev/null
+++ b/src/commands.c
@@ -0,0 +1,13 @@
+#include "commands.h"
+#include "server.h"
+
+#define MAKE_CMD(name,summary,complexity,since,doc_flags,replaced,deprecated,group,group_enum,history,num_history,tips,num_tips,function,arity,flags,acl,key_specs,key_specs_num,get_keys,numargs) name,summary,complexity,since,doc_flags,replaced,deprecated,group_enum,history,num_history,tips,num_tips,function,arity,flags,acl,key_specs,key_specs_num,get_keys,numargs
+#define MAKE_ARG(name,type,key_spec_index,token,summary,since,flags,numsubargs,deprecated_since) name,type,key_spec_index,token,summary,since,flags,deprecated_since,numsubargs
+#define COMMAND_STRUCT redisCommand
+#define COMMAND_ARG redisCommandArg
+
+#ifdef LOG_REQ_RES
+#include "commands_with_reply_schema.def"
+#else
+#include "commands.def"
+#endif
diff --git a/src/commands.def b/src/commands.def
new file mode 100644
index 0000000..10fbd9f
--- /dev/null
+++ b/src/commands.def
@@ -0,0 +1,10899 @@
+/* Automatically generated by generate-command-code.py, do not edit. */
+
+
+/* We have fabulous commands from
+ * the fantastic
+ * Redis Command Table! */
+
+/* Must match redisCommandGroup */
+const char *COMMAND_GROUP_STR[] = {
+ "generic",
+ "string",
+ "list",
+ "set",
+ "sorted-set",
+ "hash",
+ "pubsub",
+ "transactions",
+ "connection",
+ "server",
+ "scripting",
+ "hyperloglog",
+ "cluster",
+ "sentinel",
+ "geo",
+ "stream",
+ "bitmap",
+ "module"
+};
+
+const char *commandGroupStr(int index) {
+ return COMMAND_GROUP_STR[index];
+}
+/********** BITCOUNT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BITCOUNT history */
+commandHistory BITCOUNT_History[] = {
+{"7.0.0","Added the `BYTE|BIT` option."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BITCOUNT tips */
+#define BITCOUNT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BITCOUNT key specs */
+keySpec BITCOUNT_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* BITCOUNT range unit argument table */
+struct COMMAND_ARG BITCOUNT_range_unit_Subargs[] = {
+{MAKE_ARG("byte",ARG_TYPE_PURE_TOKEN,-1,"BYTE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("bit",ARG_TYPE_PURE_TOKEN,-1,"BIT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BITCOUNT range argument table */
+struct COMMAND_ARG BITCOUNT_range_Subargs[] = {
+{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,"7.0.0",CMD_ARG_OPTIONAL,2,NULL),.subargs=BITCOUNT_range_unit_Subargs},
+};
+
+/* BITCOUNT argument table */
+struct COMMAND_ARG BITCOUNT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("range",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=BITCOUNT_range_Subargs},
+};
+
+/********** BITFIELD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BITFIELD history */
+#define BITFIELD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BITFIELD tips */
+#define BITFIELD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BITFIELD key specs */
+keySpec BITFIELD_Keyspecs[1] = {
+{"This command allows both access and modification of the key",CMD_KEY_RW|CMD_KEY_UPDATE|CMD_KEY_ACCESS|CMD_KEY_VARIABLE_FLAGS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* BITFIELD operation get_block argument table */
+struct COMMAND_ARG BITFIELD_operation_get_block_Subargs[] = {
+{MAKE_ARG("encoding",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BITFIELD operation write overflow_block argument table */
+struct COMMAND_ARG BITFIELD_operation_write_overflow_block_Subargs[] = {
+{MAKE_ARG("wrap",ARG_TYPE_PURE_TOKEN,-1,"WRAP",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("sat",ARG_TYPE_PURE_TOKEN,-1,"SAT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("fail",ARG_TYPE_PURE_TOKEN,-1,"FAIL",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BITFIELD operation write write_operation set_block argument table */
+struct COMMAND_ARG BITFIELD_operation_write_write_operation_set_block_Subargs[] = {
+{MAKE_ARG("encoding",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BITFIELD operation write write_operation incrby_block argument table */
+struct COMMAND_ARG BITFIELD_operation_write_write_operation_incrby_block_Subargs[] = {
+{MAKE_ARG("encoding",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("increment",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BITFIELD operation write write_operation argument table */
+struct COMMAND_ARG BITFIELD_operation_write_write_operation_Subargs[] = {
+{MAKE_ARG("set-block",ARG_TYPE_BLOCK,-1,"SET",NULL,NULL,CMD_ARG_NONE,3,NULL),.subargs=BITFIELD_operation_write_write_operation_set_block_Subargs},
+{MAKE_ARG("incrby-block",ARG_TYPE_BLOCK,-1,"INCRBY",NULL,NULL,CMD_ARG_NONE,3,NULL),.subargs=BITFIELD_operation_write_write_operation_incrby_block_Subargs},
+};
+
+/* BITFIELD operation write argument table */
+struct COMMAND_ARG BITFIELD_operation_write_Subargs[] = {
+{MAKE_ARG("overflow-block",ARG_TYPE_ONEOF,-1,"OVERFLOW",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=BITFIELD_operation_write_overflow_block_Subargs},
+{MAKE_ARG("write-operation",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=BITFIELD_operation_write_write_operation_Subargs},
+};
+
+/* BITFIELD operation argument table */
+struct COMMAND_ARG BITFIELD_operation_Subargs[] = {
+{MAKE_ARG("get-block",ARG_TYPE_BLOCK,-1,"GET",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=BITFIELD_operation_get_block_Subargs},
+{MAKE_ARG("write",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=BITFIELD_operation_write_Subargs},
+};
+
+/* BITFIELD argument table */
+struct COMMAND_ARG BITFIELD_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("operation",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,2,NULL),.subargs=BITFIELD_operation_Subargs},
+};
+
+/********** BITFIELD_RO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BITFIELD_RO history */
+#define BITFIELD_RO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BITFIELD_RO tips */
+#define BITFIELD_RO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BITFIELD_RO key specs */
+keySpec BITFIELD_RO_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* BITFIELD_RO get_block argument table */
+struct COMMAND_ARG BITFIELD_RO_get_block_Subargs[] = {
+{MAKE_ARG("encoding",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BITFIELD_RO argument table */
+struct COMMAND_ARG BITFIELD_RO_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("get-block",ARG_TYPE_BLOCK,-1,"GET",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE|CMD_ARG_MULTIPLE_TOKEN,2,NULL),.subargs=BITFIELD_RO_get_block_Subargs},
+};
+
+/********** BITOP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BITOP history */
+#define BITOP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BITOP tips */
+#define BITOP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BITOP key specs */
+keySpec BITOP_Keyspecs[2] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={3},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* BITOP operation argument table */
+struct COMMAND_ARG BITOP_operation_Subargs[] = {
+{MAKE_ARG("and",ARG_TYPE_PURE_TOKEN,-1,"AND",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("or",ARG_TYPE_PURE_TOKEN,-1,"OR",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xor",ARG_TYPE_PURE_TOKEN,-1,"XOR",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("not",ARG_TYPE_PURE_TOKEN,-1,"NOT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BITOP argument table */
+struct COMMAND_ARG BITOP_Args[] = {
+{MAKE_ARG("operation",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=BITOP_operation_Subargs},
+{MAKE_ARG("destkey",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** BITPOS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BITPOS history */
+commandHistory BITPOS_History[] = {
+{"7.0.0","Added the `BYTE|BIT` option."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BITPOS tips */
+#define BITPOS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BITPOS key specs */
+keySpec BITPOS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* BITPOS range end_unit_block unit argument table */
+struct COMMAND_ARG BITPOS_range_end_unit_block_unit_Subargs[] = {
+{MAKE_ARG("byte",ARG_TYPE_PURE_TOKEN,-1,"BYTE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("bit",ARG_TYPE_PURE_TOKEN,-1,"BIT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BITPOS range end_unit_block argument table */
+struct COMMAND_ARG BITPOS_range_end_unit_block_Subargs[] = {
+{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,"7.0.0",CMD_ARG_OPTIONAL,2,NULL),.subargs=BITPOS_range_end_unit_block_unit_Subargs},
+};
+
+/* BITPOS range argument table */
+struct COMMAND_ARG BITPOS_range_Subargs[] = {
+{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("end-unit-block",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=BITPOS_range_end_unit_block_Subargs},
+};
+
+/* BITPOS argument table */
+struct COMMAND_ARG BITPOS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("bit",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("range",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=BITPOS_range_Subargs},
+};
+
+/********** GETBIT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GETBIT history */
+#define GETBIT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GETBIT tips */
+#define GETBIT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GETBIT key specs */
+keySpec GETBIT_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GETBIT argument table */
+struct COMMAND_ARG GETBIT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SETBIT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SETBIT history */
+#define SETBIT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SETBIT tips */
+#define SETBIT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SETBIT key specs */
+keySpec SETBIT_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SETBIT argument table */
+struct COMMAND_ARG SETBIT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ASKING ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ASKING history */
+#define ASKING_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ASKING tips */
+#define ASKING_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ASKING key specs */
+#define ASKING_Keyspecs NULL
+#endif
+
+/********** CLUSTER ADDSLOTS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER ADDSLOTS history */
+#define CLUSTER_ADDSLOTS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER ADDSLOTS tips */
+#define CLUSTER_ADDSLOTS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER ADDSLOTS key specs */
+#define CLUSTER_ADDSLOTS_Keyspecs NULL
+#endif
+
+/* CLUSTER ADDSLOTS argument table */
+struct COMMAND_ARG CLUSTER_ADDSLOTS_Args[] = {
+{MAKE_ARG("slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** CLUSTER ADDSLOTSRANGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER ADDSLOTSRANGE history */
+#define CLUSTER_ADDSLOTSRANGE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER ADDSLOTSRANGE tips */
+#define CLUSTER_ADDSLOTSRANGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER ADDSLOTSRANGE key specs */
+#define CLUSTER_ADDSLOTSRANGE_Keyspecs NULL
+#endif
+
+/* CLUSTER ADDSLOTSRANGE range argument table */
+struct COMMAND_ARG CLUSTER_ADDSLOTSRANGE_range_Subargs[] = {
+{MAKE_ARG("start-slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("end-slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLUSTER ADDSLOTSRANGE argument table */
+struct COMMAND_ARG CLUSTER_ADDSLOTSRANGE_Args[] = {
+{MAKE_ARG("range",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=CLUSTER_ADDSLOTSRANGE_range_Subargs},
+};
+
+/********** CLUSTER BUMPEPOCH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER BUMPEPOCH history */
+#define CLUSTER_BUMPEPOCH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER BUMPEPOCH tips */
+const char *CLUSTER_BUMPEPOCH_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER BUMPEPOCH key specs */
+#define CLUSTER_BUMPEPOCH_Keyspecs NULL
+#endif
+
+/********** CLUSTER COUNT_FAILURE_REPORTS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER COUNT_FAILURE_REPORTS history */
+#define CLUSTER_COUNT_FAILURE_REPORTS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER COUNT_FAILURE_REPORTS tips */
+const char *CLUSTER_COUNT_FAILURE_REPORTS_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER COUNT_FAILURE_REPORTS key specs */
+#define CLUSTER_COUNT_FAILURE_REPORTS_Keyspecs NULL
+#endif
+
+/* CLUSTER COUNT_FAILURE_REPORTS argument table */
+struct COMMAND_ARG CLUSTER_COUNT_FAILURE_REPORTS_Args[] = {
+{MAKE_ARG("node-id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLUSTER COUNTKEYSINSLOT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER COUNTKEYSINSLOT history */
+#define CLUSTER_COUNTKEYSINSLOT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER COUNTKEYSINSLOT tips */
+#define CLUSTER_COUNTKEYSINSLOT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER COUNTKEYSINSLOT key specs */
+#define CLUSTER_COUNTKEYSINSLOT_Keyspecs NULL
+#endif
+
+/* CLUSTER COUNTKEYSINSLOT argument table */
+struct COMMAND_ARG CLUSTER_COUNTKEYSINSLOT_Args[] = {
+{MAKE_ARG("slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLUSTER DELSLOTS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER DELSLOTS history */
+#define CLUSTER_DELSLOTS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER DELSLOTS tips */
+#define CLUSTER_DELSLOTS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER DELSLOTS key specs */
+#define CLUSTER_DELSLOTS_Keyspecs NULL
+#endif
+
+/* CLUSTER DELSLOTS argument table */
+struct COMMAND_ARG CLUSTER_DELSLOTS_Args[] = {
+{MAKE_ARG("slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** CLUSTER DELSLOTSRANGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER DELSLOTSRANGE history */
+#define CLUSTER_DELSLOTSRANGE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER DELSLOTSRANGE tips */
+#define CLUSTER_DELSLOTSRANGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER DELSLOTSRANGE key specs */
+#define CLUSTER_DELSLOTSRANGE_Keyspecs NULL
+#endif
+
+/* CLUSTER DELSLOTSRANGE range argument table */
+struct COMMAND_ARG CLUSTER_DELSLOTSRANGE_range_Subargs[] = {
+{MAKE_ARG("start-slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("end-slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLUSTER DELSLOTSRANGE argument table */
+struct COMMAND_ARG CLUSTER_DELSLOTSRANGE_Args[] = {
+{MAKE_ARG("range",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=CLUSTER_DELSLOTSRANGE_range_Subargs},
+};
+
+/********** CLUSTER FAILOVER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER FAILOVER history */
+#define CLUSTER_FAILOVER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER FAILOVER tips */
+#define CLUSTER_FAILOVER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER FAILOVER key specs */
+#define CLUSTER_FAILOVER_Keyspecs NULL
+#endif
+
+/* CLUSTER FAILOVER options argument table */
+struct COMMAND_ARG CLUSTER_FAILOVER_options_Subargs[] = {
+{MAKE_ARG("force",ARG_TYPE_PURE_TOKEN,-1,"FORCE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("takeover",ARG_TYPE_PURE_TOKEN,-1,"TAKEOVER",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLUSTER FAILOVER argument table */
+struct COMMAND_ARG CLUSTER_FAILOVER_Args[] = {
+{MAKE_ARG("options",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=CLUSTER_FAILOVER_options_Subargs},
+};
+
+/********** CLUSTER FLUSHSLOTS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER FLUSHSLOTS history */
+#define CLUSTER_FLUSHSLOTS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER FLUSHSLOTS tips */
+#define CLUSTER_FLUSHSLOTS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER FLUSHSLOTS key specs */
+#define CLUSTER_FLUSHSLOTS_Keyspecs NULL
+#endif
+
+/********** CLUSTER FORGET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER FORGET history */
+#define CLUSTER_FORGET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER FORGET tips */
+#define CLUSTER_FORGET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER FORGET key specs */
+#define CLUSTER_FORGET_Keyspecs NULL
+#endif
+
+/* CLUSTER FORGET argument table */
+struct COMMAND_ARG CLUSTER_FORGET_Args[] = {
+{MAKE_ARG("node-id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLUSTER GETKEYSINSLOT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER GETKEYSINSLOT history */
+#define CLUSTER_GETKEYSINSLOT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER GETKEYSINSLOT tips */
+const char *CLUSTER_GETKEYSINSLOT_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER GETKEYSINSLOT key specs */
+#define CLUSTER_GETKEYSINSLOT_Keyspecs NULL
+#endif
+
+/* CLUSTER GETKEYSINSLOT argument table */
+struct COMMAND_ARG CLUSTER_GETKEYSINSLOT_Args[] = {
+{MAKE_ARG("slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLUSTER HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER HELP history */
+#define CLUSTER_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER HELP tips */
+#define CLUSTER_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER HELP key specs */
+#define CLUSTER_HELP_Keyspecs NULL
+#endif
+
+/********** CLUSTER INFO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER INFO history */
+#define CLUSTER_INFO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER INFO tips */
+const char *CLUSTER_INFO_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER INFO key specs */
+#define CLUSTER_INFO_Keyspecs NULL
+#endif
+
+/********** CLUSTER KEYSLOT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER KEYSLOT history */
+#define CLUSTER_KEYSLOT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER KEYSLOT tips */
+#define CLUSTER_KEYSLOT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER KEYSLOT key specs */
+#define CLUSTER_KEYSLOT_Keyspecs NULL
+#endif
+
+/* CLUSTER KEYSLOT argument table */
+struct COMMAND_ARG CLUSTER_KEYSLOT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLUSTER LINKS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER LINKS history */
+#define CLUSTER_LINKS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER LINKS tips */
+const char *CLUSTER_LINKS_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER LINKS key specs */
+#define CLUSTER_LINKS_Keyspecs NULL
+#endif
+
+/********** CLUSTER MEET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER MEET history */
+commandHistory CLUSTER_MEET_History[] = {
+{"4.0.0","Added the optional `cluster_bus_port` argument."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER MEET tips */
+#define CLUSTER_MEET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER MEET key specs */
+#define CLUSTER_MEET_Keyspecs NULL
+#endif
+
+/* CLUSTER MEET argument table */
+struct COMMAND_ARG CLUSTER_MEET_Args[] = {
+{MAKE_ARG("ip",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("port",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("cluster-bus-port",ARG_TYPE_INTEGER,-1,NULL,NULL,"4.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** CLUSTER MYID ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER MYID history */
+#define CLUSTER_MYID_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER MYID tips */
+#define CLUSTER_MYID_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER MYID key specs */
+#define CLUSTER_MYID_Keyspecs NULL
+#endif
+
+/********** CLUSTER MYSHARDID ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER MYSHARDID history */
+#define CLUSTER_MYSHARDID_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER MYSHARDID tips */
+const char *CLUSTER_MYSHARDID_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER MYSHARDID key specs */
+#define CLUSTER_MYSHARDID_Keyspecs NULL
+#endif
+
+/********** CLUSTER NODES ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER NODES history */
+#define CLUSTER_NODES_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER NODES tips */
+const char *CLUSTER_NODES_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER NODES key specs */
+#define CLUSTER_NODES_Keyspecs NULL
+#endif
+
+/********** CLUSTER REPLICAS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER REPLICAS history */
+#define CLUSTER_REPLICAS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER REPLICAS tips */
+const char *CLUSTER_REPLICAS_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER REPLICAS key specs */
+#define CLUSTER_REPLICAS_Keyspecs NULL
+#endif
+
+/* CLUSTER REPLICAS argument table */
+struct COMMAND_ARG CLUSTER_REPLICAS_Args[] = {
+{MAKE_ARG("node-id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLUSTER REPLICATE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER REPLICATE history */
+#define CLUSTER_REPLICATE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER REPLICATE tips */
+#define CLUSTER_REPLICATE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER REPLICATE key specs */
+#define CLUSTER_REPLICATE_Keyspecs NULL
+#endif
+
+/* CLUSTER REPLICATE argument table */
+struct COMMAND_ARG CLUSTER_REPLICATE_Args[] = {
+{MAKE_ARG("node-id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLUSTER RESET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER RESET history */
+#define CLUSTER_RESET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER RESET tips */
+#define CLUSTER_RESET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER RESET key specs */
+#define CLUSTER_RESET_Keyspecs NULL
+#endif
+
+/* CLUSTER RESET reset_type argument table */
+struct COMMAND_ARG CLUSTER_RESET_reset_type_Subargs[] = {
+{MAKE_ARG("hard",ARG_TYPE_PURE_TOKEN,-1,"HARD",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("soft",ARG_TYPE_PURE_TOKEN,-1,"SOFT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLUSTER RESET argument table */
+struct COMMAND_ARG CLUSTER_RESET_Args[] = {
+{MAKE_ARG("reset-type",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=CLUSTER_RESET_reset_type_Subargs},
+};
+
+/********** CLUSTER SAVECONFIG ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER SAVECONFIG history */
+#define CLUSTER_SAVECONFIG_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER SAVECONFIG tips */
+#define CLUSTER_SAVECONFIG_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER SAVECONFIG key specs */
+#define CLUSTER_SAVECONFIG_Keyspecs NULL
+#endif
+
+/********** CLUSTER SET_CONFIG_EPOCH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER SET_CONFIG_EPOCH history */
+#define CLUSTER_SET_CONFIG_EPOCH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER SET_CONFIG_EPOCH tips */
+#define CLUSTER_SET_CONFIG_EPOCH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER SET_CONFIG_EPOCH key specs */
+#define CLUSTER_SET_CONFIG_EPOCH_Keyspecs NULL
+#endif
+
+/* CLUSTER SET_CONFIG_EPOCH argument table */
+struct COMMAND_ARG CLUSTER_SET_CONFIG_EPOCH_Args[] = {
+{MAKE_ARG("config-epoch",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLUSTER SETSLOT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER SETSLOT history */
+#define CLUSTER_SETSLOT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER SETSLOT tips */
+#define CLUSTER_SETSLOT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER SETSLOT key specs */
+#define CLUSTER_SETSLOT_Keyspecs NULL
+#endif
+
+/* CLUSTER SETSLOT subcommand argument table */
+struct COMMAND_ARG CLUSTER_SETSLOT_subcommand_Subargs[] = {
+{MAKE_ARG("importing",ARG_TYPE_STRING,-1,"IMPORTING",NULL,NULL,CMD_ARG_NONE,0,NULL),.display_text="node-id"},
+{MAKE_ARG("migrating",ARG_TYPE_STRING,-1,"MIGRATING",NULL,NULL,CMD_ARG_NONE,0,NULL),.display_text="node-id"},
+{MAKE_ARG("node",ARG_TYPE_STRING,-1,"NODE",NULL,NULL,CMD_ARG_NONE,0,NULL),.display_text="node-id"},
+{MAKE_ARG("stable",ARG_TYPE_PURE_TOKEN,-1,"STABLE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLUSTER SETSLOT argument table */
+struct COMMAND_ARG CLUSTER_SETSLOT_Args[] = {
+{MAKE_ARG("slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("subcommand",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=CLUSTER_SETSLOT_subcommand_Subargs},
+};
+
+/********** CLUSTER SHARDS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER SHARDS history */
+#define CLUSTER_SHARDS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER SHARDS tips */
+const char *CLUSTER_SHARDS_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER SHARDS key specs */
+#define CLUSTER_SHARDS_Keyspecs NULL
+#endif
+
+/********** CLUSTER SLAVES ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER SLAVES history */
+#define CLUSTER_SLAVES_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER SLAVES tips */
+const char *CLUSTER_SLAVES_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER SLAVES key specs */
+#define CLUSTER_SLAVES_Keyspecs NULL
+#endif
+
+/* CLUSTER SLAVES argument table */
+struct COMMAND_ARG CLUSTER_SLAVES_Args[] = {
+{MAKE_ARG("node-id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLUSTER SLOTS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER SLOTS history */
+commandHistory CLUSTER_SLOTS_History[] = {
+{"4.0.0","Added node IDs."},
+{"7.0.0","Added additional networking metadata field."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER SLOTS tips */
+const char *CLUSTER_SLOTS_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER SLOTS key specs */
+#define CLUSTER_SLOTS_Keyspecs NULL
+#endif
+
+/* CLUSTER command table */
+struct COMMAND_STRUCT CLUSTER_Subcommands[] = {
+{MAKE_CMD("addslots","Assigns new hash slots to a node.","O(N) where N is the total number of hash slot arguments","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_ADDSLOTS_History,0,CLUSTER_ADDSLOTS_Tips,0,clusterCommand,-3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_ADDSLOTS_Keyspecs,0,NULL,1),.args=CLUSTER_ADDSLOTS_Args},
+{MAKE_CMD("addslotsrange","Assigns new hash slot ranges to a node.","O(N) where N is the total number of the slots between the start slot and end slot arguments.","7.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_ADDSLOTSRANGE_History,0,CLUSTER_ADDSLOTSRANGE_Tips,0,clusterCommand,-4,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_ADDSLOTSRANGE_Keyspecs,0,NULL,1),.args=CLUSTER_ADDSLOTSRANGE_Args},
+{MAKE_CMD("bumpepoch","Advances the cluster config epoch.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_BUMPEPOCH_History,0,CLUSTER_BUMPEPOCH_Tips,1,clusterCommand,2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_BUMPEPOCH_Keyspecs,0,NULL,0)},
+{MAKE_CMD("count-failure-reports","Returns the number of active failure reports active for a node.","O(N) where N is the number of failure reports","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_COUNT_FAILURE_REPORTS_History,0,CLUSTER_COUNT_FAILURE_REPORTS_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_COUNT_FAILURE_REPORTS_Keyspecs,0,NULL,1),.args=CLUSTER_COUNT_FAILURE_REPORTS_Args},
+{MAKE_CMD("countkeysinslot","Returns the number of keys in a hash slot.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_COUNTKEYSINSLOT_History,0,CLUSTER_COUNTKEYSINSLOT_Tips,0,clusterCommand,3,CMD_STALE,0,CLUSTER_COUNTKEYSINSLOT_Keyspecs,0,NULL,1),.args=CLUSTER_COUNTKEYSINSLOT_Args},
+{MAKE_CMD("delslots","Sets hash slots as unbound for a node.","O(N) where N is the total number of hash slot arguments","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_DELSLOTS_History,0,CLUSTER_DELSLOTS_Tips,0,clusterCommand,-3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_DELSLOTS_Keyspecs,0,NULL,1),.args=CLUSTER_DELSLOTS_Args},
+{MAKE_CMD("delslotsrange","Sets hash slot ranges as unbound for a node.","O(N) where N is the total number of the slots between the start slot and end slot arguments.","7.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_DELSLOTSRANGE_History,0,CLUSTER_DELSLOTSRANGE_Tips,0,clusterCommand,-4,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_DELSLOTSRANGE_Keyspecs,0,NULL,1),.args=CLUSTER_DELSLOTSRANGE_Args},
+{MAKE_CMD("failover","Forces a replica to perform a manual failover of its master.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FAILOVER_History,0,CLUSTER_FAILOVER_Tips,0,clusterCommand,-2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FAILOVER_Keyspecs,0,NULL,1),.args=CLUSTER_FAILOVER_Args},
+{MAKE_CMD("flushslots","Deletes all slots information from a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FLUSHSLOTS_History,0,CLUSTER_FLUSHSLOTS_Tips,0,clusterCommand,2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FLUSHSLOTS_Keyspecs,0,NULL,0)},
+{MAKE_CMD("forget","Removes a node from the nodes table.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FORGET_History,0,CLUSTER_FORGET_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FORGET_Keyspecs,0,NULL,1),.args=CLUSTER_FORGET_Args},
+{MAKE_CMD("getkeysinslot","Returns the key names in a hash slot.","O(N) where N is the number of requested keys","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_GETKEYSINSLOT_History,0,CLUSTER_GETKEYSINSLOT_Tips,1,clusterCommand,4,CMD_STALE,0,CLUSTER_GETKEYSINSLOT_Keyspecs,0,NULL,2),.args=CLUSTER_GETKEYSINSLOT_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_HELP_History,0,CLUSTER_HELP_Tips,0,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("info","Returns information about the state of a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_INFO_History,0,CLUSTER_INFO_Tips,1,clusterCommand,2,CMD_STALE,0,CLUSTER_INFO_Keyspecs,0,NULL,0)},
+{MAKE_CMD("keyslot","Returns the hash slot for a key.","O(N) where N is the number of bytes in the key","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_KEYSLOT_History,0,CLUSTER_KEYSLOT_Tips,0,clusterCommand,3,CMD_STALE,0,CLUSTER_KEYSLOT_Keyspecs,0,NULL,1),.args=CLUSTER_KEYSLOT_Args},
+{MAKE_CMD("links","Returns a list of all TCP links to and from peer nodes.","O(N) where N is the total number of Cluster nodes","7.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_LINKS_History,0,CLUSTER_LINKS_Tips,1,clusterCommand,2,CMD_STALE,0,CLUSTER_LINKS_Keyspecs,0,NULL,0)},
+{MAKE_CMD("meet","Forces a node to handshake with another node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_MEET_History,1,CLUSTER_MEET_Tips,0,clusterCommand,-4,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_MEET_Keyspecs,0,NULL,3),.args=CLUSTER_MEET_Args},
+{MAKE_CMD("myid","Returns the ID of a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_MYID_History,0,CLUSTER_MYID_Tips,0,clusterCommand,2,CMD_STALE,0,CLUSTER_MYID_Keyspecs,0,NULL,0)},
+{MAKE_CMD("myshardid","Returns the shard ID of a node.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_MYSHARDID_History,0,CLUSTER_MYSHARDID_Tips,1,clusterCommand,2,CMD_STALE,0,CLUSTER_MYSHARDID_Keyspecs,0,NULL,0)},
+{MAKE_CMD("nodes","Returns the cluster configuration for a node.","O(N) where N is the total number of Cluster nodes","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_NODES_History,0,CLUSTER_NODES_Tips,1,clusterCommand,2,CMD_STALE,0,CLUSTER_NODES_Keyspecs,0,NULL,0)},
+{MAKE_CMD("replicas","Lists the replica nodes of a master node.","O(N) where N is the number of replicas.","5.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICAS_History,0,CLUSTER_REPLICAS_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICAS_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICAS_Args},
+{MAKE_CMD("replicate","Configure a node as replica of a master node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICATE_History,0,CLUSTER_REPLICATE_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICATE_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICATE_Args},
+{MAKE_CMD("reset","Resets a node.","O(N) where N is the number of known nodes. The command may execute a FLUSHALL as a side effect.","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_RESET_History,0,CLUSTER_RESET_Tips,0,clusterCommand,-2,CMD_ADMIN|CMD_STALE|CMD_NOSCRIPT,0,CLUSTER_RESET_Keyspecs,0,NULL,1),.args=CLUSTER_RESET_Args},
+{MAKE_CMD("saveconfig","Forces a node to save the cluster configuration to disk.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SAVECONFIG_History,0,CLUSTER_SAVECONFIG_Tips,0,clusterCommand,2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_SAVECONFIG_Keyspecs,0,NULL,0)},
+{MAKE_CMD("set-config-epoch","Sets the configuration epoch for a new node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SET_CONFIG_EPOCH_History,0,CLUSTER_SET_CONFIG_EPOCH_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_SET_CONFIG_EPOCH_Keyspecs,0,NULL,1),.args=CLUSTER_SET_CONFIG_EPOCH_Args},
+{MAKE_CMD("setslot","Binds a hash slot to a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SETSLOT_History,0,CLUSTER_SETSLOT_Tips,0,clusterCommand,-4,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_SETSLOT_Keyspecs,0,NULL,2),.args=CLUSTER_SETSLOT_Args},
+{MAKE_CMD("shards","Returns the mapping of cluster slots to shards.","O(N) where N is the total number of cluster nodes","7.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SHARDS_History,0,CLUSTER_SHARDS_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_SHARDS_Keyspecs,0,NULL,0)},
+{MAKE_CMD("slaves","Lists the replica nodes of a master node.","O(N) where N is the number of replicas.","3.0.0",CMD_DOC_DEPRECATED,"`CLUSTER REPLICAS`","5.0.0","cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLAVES_History,0,CLUSTER_SLAVES_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_SLAVES_Keyspecs,0,NULL,1),.args=CLUSTER_SLAVES_Args},
+{MAKE_CMD("slots","Returns the mapping of cluster slots to nodes.","O(N) where N is the total number of Cluster nodes","3.0.0",CMD_DOC_DEPRECATED,"`CLUSTER SHARDS`","7.0.0","cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLOTS_History,2,CLUSTER_SLOTS_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_SLOTS_Keyspecs,0,NULL,0)},
+{0}
+};
+
+/********** CLUSTER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER history */
+#define CLUSTER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER tips */
+#define CLUSTER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER key specs */
+#define CLUSTER_Keyspecs NULL
+#endif
+
+/********** READONLY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* READONLY history */
+#define READONLY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* READONLY tips */
+#define READONLY_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* READONLY key specs */
+#define READONLY_Keyspecs NULL
+#endif
+
+/********** READWRITE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* READWRITE history */
+#define READWRITE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* READWRITE tips */
+#define READWRITE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* READWRITE key specs */
+#define READWRITE_Keyspecs NULL
+#endif
+
+/********** AUTH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* AUTH history */
+commandHistory AUTH_History[] = {
+{"6.0.0","Added ACL style (username and password)."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* AUTH tips */
+#define AUTH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* AUTH key specs */
+#define AUTH_Keyspecs NULL
+#endif
+
+/* AUTH argument table */
+struct COMMAND_ARG AUTH_Args[] = {
+{MAKE_ARG("username",ARG_TYPE_STRING,-1,NULL,NULL,"6.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("password",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLIENT CACHING ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT CACHING history */
+#define CLIENT_CACHING_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT CACHING tips */
+#define CLIENT_CACHING_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT CACHING key specs */
+#define CLIENT_CACHING_Keyspecs NULL
+#endif
+
+/* CLIENT CACHING mode argument table */
+struct COMMAND_ARG CLIENT_CACHING_mode_Subargs[] = {
+{MAKE_ARG("yes",ARG_TYPE_PURE_TOKEN,-1,"YES",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("no",ARG_TYPE_PURE_TOKEN,-1,"NO",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT CACHING argument table */
+struct COMMAND_ARG CLIENT_CACHING_Args[] = {
+{MAKE_ARG("mode",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLIENT_CACHING_mode_Subargs},
+};
+
+/********** CLIENT GETNAME ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT GETNAME history */
+#define CLIENT_GETNAME_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT GETNAME tips */
+#define CLIENT_GETNAME_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT GETNAME key specs */
+#define CLIENT_GETNAME_Keyspecs NULL
+#endif
+
+/********** CLIENT GETREDIR ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT GETREDIR history */
+#define CLIENT_GETREDIR_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT GETREDIR tips */
+#define CLIENT_GETREDIR_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT GETREDIR key specs */
+#define CLIENT_GETREDIR_Keyspecs NULL
+#endif
+
+/********** CLIENT HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT HELP history */
+#define CLIENT_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT HELP tips */
+#define CLIENT_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT HELP key specs */
+#define CLIENT_HELP_Keyspecs NULL
+#endif
+
+/********** CLIENT ID ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT ID history */
+#define CLIENT_ID_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT ID tips */
+#define CLIENT_ID_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT ID key specs */
+#define CLIENT_ID_Keyspecs NULL
+#endif
+
+/********** CLIENT INFO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT INFO history */
+#define CLIENT_INFO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT INFO tips */
+const char *CLIENT_INFO_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT INFO key specs */
+#define CLIENT_INFO_Keyspecs NULL
+#endif
+
+/********** CLIENT KILL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT KILL history */
+commandHistory CLIENT_KILL_History[] = {
+{"2.8.12","Added new filter format."},
+{"2.8.12","`ID` option."},
+{"3.2.0","Added `master` type in for `TYPE` option."},
+{"5.0.0","Replaced `slave` `TYPE` with `replica`. `slave` still supported for backward compatibility."},
+{"6.2.0","`LADDR` option."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT KILL tips */
+#define CLIENT_KILL_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT KILL key specs */
+#define CLIENT_KILL_Keyspecs NULL
+#endif
+
+/* CLIENT KILL filter new_format client_type argument table */
+struct COMMAND_ARG CLIENT_KILL_filter_new_format_client_type_Subargs[] = {
+{MAKE_ARG("normal",ARG_TYPE_PURE_TOKEN,-1,"NORMAL",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("master",ARG_TYPE_PURE_TOKEN,-1,"MASTER",NULL,"3.2.0",CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("slave",ARG_TYPE_PURE_TOKEN,-1,"SLAVE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("replica",ARG_TYPE_PURE_TOKEN,-1,"REPLICA",NULL,"5.0.0",CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("pubsub",ARG_TYPE_PURE_TOKEN,-1,"PUBSUB",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT KILL filter new_format skipme argument table */
+struct COMMAND_ARG CLIENT_KILL_filter_new_format_skipme_Subargs[] = {
+{MAKE_ARG("yes",ARG_TYPE_PURE_TOKEN,-1,"YES",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("no",ARG_TYPE_PURE_TOKEN,-1,"NO",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT KILL filter new_format argument table */
+struct COMMAND_ARG CLIENT_KILL_filter_new_format_Subargs[] = {
+{MAKE_ARG("client-id",ARG_TYPE_INTEGER,-1,"ID",NULL,"2.8.12",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("client-type",ARG_TYPE_ONEOF,-1,"TYPE",NULL,"2.8.12",CMD_ARG_OPTIONAL,5,NULL),.subargs=CLIENT_KILL_filter_new_format_client_type_Subargs},
+{MAKE_ARG("username",ARG_TYPE_STRING,-1,"USER",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("addr",ARG_TYPE_STRING,-1,"ADDR",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL),.display_text="ip:port"},
+{MAKE_ARG("laddr",ARG_TYPE_STRING,-1,"LADDR",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL),.display_text="ip:port"},
+{MAKE_ARG("skipme",ARG_TYPE_ONEOF,-1,"SKIPME",NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=CLIENT_KILL_filter_new_format_skipme_Subargs},
+};
+
+/* CLIENT KILL filter argument table */
+struct COMMAND_ARG CLIENT_KILL_filter_Subargs[] = {
+{MAKE_ARG("old-format",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,"2.8.12"),.display_text="ip:port"},
+{MAKE_ARG("new-format",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,6,NULL),.subargs=CLIENT_KILL_filter_new_format_Subargs},
+};
+
+/* CLIENT KILL argument table */
+struct COMMAND_ARG CLIENT_KILL_Args[] = {
+{MAKE_ARG("filter",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLIENT_KILL_filter_Subargs},
+};
+
+/********** CLIENT LIST ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT LIST history */
+commandHistory CLIENT_LIST_History[] = {
+{"2.8.12","Added unique client `id` field."},
+{"5.0.0","Added optional `TYPE` filter."},
+{"6.0.0","Added `user` field."},
+{"6.2.0","Added `argv-mem`, `tot-mem`, `laddr` and `redir` fields and the optional `ID` filter."},
+{"7.0.0","Added `resp`, `multi-mem`, `rbs` and `rbp` fields."},
+{"7.0.3","Added `ssub` field."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT LIST tips */
+const char *CLIENT_LIST_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT LIST key specs */
+#define CLIENT_LIST_Keyspecs NULL
+#endif
+
+/* CLIENT LIST client_type argument table */
+struct COMMAND_ARG CLIENT_LIST_client_type_Subargs[] = {
+{MAKE_ARG("normal",ARG_TYPE_PURE_TOKEN,-1,"NORMAL",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("master",ARG_TYPE_PURE_TOKEN,-1,"MASTER",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("replica",ARG_TYPE_PURE_TOKEN,-1,"REPLICA",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("pubsub",ARG_TYPE_PURE_TOKEN,-1,"PUBSUB",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT LIST argument table */
+struct COMMAND_ARG CLIENT_LIST_Args[] = {
+{MAKE_ARG("client-type",ARG_TYPE_ONEOF,-1,"TYPE",NULL,"5.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=CLIENT_LIST_client_type_Subargs},
+{MAKE_ARG("client-id",ARG_TYPE_INTEGER,-1,"ID",NULL,"6.2.0",CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** CLIENT NO_EVICT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT NO_EVICT history */
+#define CLIENT_NO_EVICT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT NO_EVICT tips */
+#define CLIENT_NO_EVICT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT NO_EVICT key specs */
+#define CLIENT_NO_EVICT_Keyspecs NULL
+#endif
+
+/* CLIENT NO_EVICT enabled argument table */
+struct COMMAND_ARG CLIENT_NO_EVICT_enabled_Subargs[] = {
+{MAKE_ARG("on",ARG_TYPE_PURE_TOKEN,-1,"ON",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("off",ARG_TYPE_PURE_TOKEN,-1,"OFF",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT NO_EVICT argument table */
+struct COMMAND_ARG CLIENT_NO_EVICT_Args[] = {
+{MAKE_ARG("enabled",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLIENT_NO_EVICT_enabled_Subargs},
+};
+
+/********** CLIENT NO_TOUCH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT NO_TOUCH history */
+#define CLIENT_NO_TOUCH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT NO_TOUCH tips */
+#define CLIENT_NO_TOUCH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT NO_TOUCH key specs */
+#define CLIENT_NO_TOUCH_Keyspecs NULL
+#endif
+
+/* CLIENT NO_TOUCH enabled argument table */
+struct COMMAND_ARG CLIENT_NO_TOUCH_enabled_Subargs[] = {
+{MAKE_ARG("on",ARG_TYPE_PURE_TOKEN,-1,"ON",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("off",ARG_TYPE_PURE_TOKEN,-1,"OFF",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT NO_TOUCH argument table */
+struct COMMAND_ARG CLIENT_NO_TOUCH_Args[] = {
+{MAKE_ARG("enabled",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLIENT_NO_TOUCH_enabled_Subargs},
+};
+
+/********** CLIENT PAUSE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT PAUSE history */
+commandHistory CLIENT_PAUSE_History[] = {
+{"6.2.0","`CLIENT PAUSE WRITE` mode added along with the `mode` option."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT PAUSE tips */
+#define CLIENT_PAUSE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT PAUSE key specs */
+#define CLIENT_PAUSE_Keyspecs NULL
+#endif
+
+/* CLIENT PAUSE mode argument table */
+struct COMMAND_ARG CLIENT_PAUSE_mode_Subargs[] = {
+{MAKE_ARG("write",ARG_TYPE_PURE_TOKEN,-1,"WRITE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("all",ARG_TYPE_PURE_TOKEN,-1,"ALL",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT PAUSE argument table */
+struct COMMAND_ARG CLIENT_PAUSE_Args[] = {
+{MAKE_ARG("timeout",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("mode",ARG_TYPE_ONEOF,-1,NULL,NULL,"6.2.0",CMD_ARG_OPTIONAL,2,NULL),.subargs=CLIENT_PAUSE_mode_Subargs},
+};
+
+/********** CLIENT REPLY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT REPLY history */
+#define CLIENT_REPLY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT REPLY tips */
+#define CLIENT_REPLY_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT REPLY key specs */
+#define CLIENT_REPLY_Keyspecs NULL
+#endif
+
+/* CLIENT REPLY action argument table */
+struct COMMAND_ARG CLIENT_REPLY_action_Subargs[] = {
+{MAKE_ARG("on",ARG_TYPE_PURE_TOKEN,-1,"ON",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("off",ARG_TYPE_PURE_TOKEN,-1,"OFF",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("skip",ARG_TYPE_PURE_TOKEN,-1,"SKIP",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT REPLY argument table */
+struct COMMAND_ARG CLIENT_REPLY_Args[] = {
+{MAKE_ARG("action",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,3,NULL),.subargs=CLIENT_REPLY_action_Subargs},
+};
+
+/********** CLIENT SETINFO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT SETINFO history */
+#define CLIENT_SETINFO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT SETINFO tips */
+const char *CLIENT_SETINFO_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT SETINFO key specs */
+#define CLIENT_SETINFO_Keyspecs NULL
+#endif
+
+/* CLIENT SETINFO attr argument table */
+struct COMMAND_ARG CLIENT_SETINFO_attr_Subargs[] = {
+{MAKE_ARG("libname",ARG_TYPE_STRING,-1,"LIB-NAME",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("libver",ARG_TYPE_STRING,-1,"LIB-VER",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT SETINFO argument table */
+struct COMMAND_ARG CLIENT_SETINFO_Args[] = {
+{MAKE_ARG("attr",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLIENT_SETINFO_attr_Subargs},
+};
+
+/********** CLIENT SETNAME ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT SETNAME history */
+#define CLIENT_SETNAME_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT SETNAME tips */
+const char *CLIENT_SETNAME_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT SETNAME key specs */
+#define CLIENT_SETNAME_Keyspecs NULL
+#endif
+
+/* CLIENT SETNAME argument table */
+struct COMMAND_ARG CLIENT_SETNAME_Args[] = {
+{MAKE_ARG("connection-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** CLIENT TRACKING ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT TRACKING history */
+#define CLIENT_TRACKING_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT TRACKING tips */
+#define CLIENT_TRACKING_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT TRACKING key specs */
+#define CLIENT_TRACKING_Keyspecs NULL
+#endif
+
+/* CLIENT TRACKING status argument table */
+struct COMMAND_ARG CLIENT_TRACKING_status_Subargs[] = {
+{MAKE_ARG("on",ARG_TYPE_PURE_TOKEN,-1,"ON",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("off",ARG_TYPE_PURE_TOKEN,-1,"OFF",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT TRACKING argument table */
+struct COMMAND_ARG CLIENT_TRACKING_Args[] = {
+{MAKE_ARG("status",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLIENT_TRACKING_status_Subargs},
+{MAKE_ARG("client-id",ARG_TYPE_INTEGER,-1,"REDIRECT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("prefix",ARG_TYPE_STRING,-1,"PREFIX",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE|CMD_ARG_MULTIPLE_TOKEN,0,NULL)},
+{MAKE_ARG("bcast",ARG_TYPE_PURE_TOKEN,-1,"BCAST",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("optin",ARG_TYPE_PURE_TOKEN,-1,"OPTIN",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("optout",ARG_TYPE_PURE_TOKEN,-1,"OPTOUT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("noloop",ARG_TYPE_PURE_TOKEN,-1,"NOLOOP",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** CLIENT TRACKINGINFO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT TRACKINGINFO history */
+#define CLIENT_TRACKINGINFO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT TRACKINGINFO tips */
+#define CLIENT_TRACKINGINFO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT TRACKINGINFO key specs */
+#define CLIENT_TRACKINGINFO_Keyspecs NULL
+#endif
+
+/********** CLIENT UNBLOCK ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT UNBLOCK history */
+#define CLIENT_UNBLOCK_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT UNBLOCK tips */
+#define CLIENT_UNBLOCK_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT UNBLOCK key specs */
+#define CLIENT_UNBLOCK_Keyspecs NULL
+#endif
+
+/* CLIENT UNBLOCK unblock_type argument table */
+struct COMMAND_ARG CLIENT_UNBLOCK_unblock_type_Subargs[] = {
+{MAKE_ARG("timeout",ARG_TYPE_PURE_TOKEN,-1,"TIMEOUT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("error",ARG_TYPE_PURE_TOKEN,-1,"ERROR",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLIENT UNBLOCK argument table */
+struct COMMAND_ARG CLIENT_UNBLOCK_Args[] = {
+{MAKE_ARG("client-id",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unblock-type",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=CLIENT_UNBLOCK_unblock_type_Subargs},
+};
+
+/********** CLIENT UNPAUSE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT UNPAUSE history */
+#define CLIENT_UNPAUSE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT UNPAUSE tips */
+#define CLIENT_UNPAUSE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT UNPAUSE key specs */
+#define CLIENT_UNPAUSE_Keyspecs NULL
+#endif
+
+/* CLIENT command table */
+struct COMMAND_STRUCT CLIENT_Subcommands[] = {
+{MAKE_CMD("caching","Instructs the server whether to track the keys in the next request.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_CACHING_History,0,CLIENT_CACHING_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_CACHING_Keyspecs,0,NULL,1),.args=CLIENT_CACHING_Args},
+{MAKE_CMD("getname","Returns the name of the connection.","O(1)","2.6.9",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_GETNAME_History,0,CLIENT_GETNAME_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_GETNAME_Keyspecs,0,NULL,0)},
+{MAKE_CMD("getredir","Returns the client ID to which the connection's tracking notifications are redirected.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_GETREDIR_History,0,CLIENT_GETREDIR_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_GETREDIR_Keyspecs,0,NULL,0)},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_HELP_History,0,CLIENT_HELP_Tips,0,clientCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("id","Returns the unique client ID of the connection.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_ID_History,0,CLIENT_ID_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_ID_Keyspecs,0,NULL,0)},
+{MAKE_CMD("info","Returns information about the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_INFO_History,0,CLIENT_INFO_Tips,1,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_INFO_Keyspecs,0,NULL,0)},
+{MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,5,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args},
+{MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,6,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args},
+{MAKE_CMD("no-evict","Sets the client eviction mode of the connection.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_EVICT_History,0,CLIENT_NO_EVICT_Tips,0,clientCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_NO_EVICT_Keyspecs,0,NULL,1),.args=CLIENT_NO_EVICT_Args},
+{MAKE_CMD("no-touch","Controls whether commands sent by the client affect the LRU/LFU of accessed keys.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_TOUCH_History,0,CLIENT_NO_TOUCH_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_NO_TOUCH_Keyspecs,0,NULL,1),.args=CLIENT_NO_TOUCH_Args},
+{MAKE_CMD("pause","Suspends commands processing.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_PAUSE_History,1,CLIENT_PAUSE_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_PAUSE_Keyspecs,0,NULL,2),.args=CLIENT_PAUSE_Args},
+{MAKE_CMD("reply","Instructs the server whether to reply to commands.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_REPLY_History,0,CLIENT_REPLY_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_REPLY_Keyspecs,0,NULL,1),.args=CLIENT_REPLY_Args},
+{MAKE_CMD("setinfo","Sets information specific to the client or connection.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_SETINFO_History,0,CLIENT_SETINFO_Tips,2,clientSetinfoCommand,4,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_SETINFO_Keyspecs,0,NULL,1),.args=CLIENT_SETINFO_Args},
+{MAKE_CMD("setname","Sets the connection name.","O(1)","2.6.9",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_SETNAME_History,0,CLIENT_SETNAME_Tips,2,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_SETNAME_Keyspecs,0,NULL,1),.args=CLIENT_SETNAME_Args},
+{MAKE_CMD("tracking","Controls server-assisted client-side caching for the connection.","O(1). Some options may introduce additional complexity.","6.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_TRACKING_History,0,CLIENT_TRACKING_Tips,0,clientCommand,-3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_TRACKING_Keyspecs,0,NULL,7),.args=CLIENT_TRACKING_Args},
+{MAKE_CMD("trackinginfo","Returns information about server-assisted client-side caching for the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_TRACKINGINFO_History,0,CLIENT_TRACKINGINFO_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_TRACKINGINFO_Keyspecs,0,NULL,0)},
+{MAKE_CMD("unblock","Unblocks a client blocked by a blocking command from a different connection.","O(log N) where N is the number of client connections","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_UNBLOCK_History,0,CLIENT_UNBLOCK_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_UNBLOCK_Keyspecs,0,NULL,2),.args=CLIENT_UNBLOCK_Args},
+{MAKE_CMD("unpause","Resumes processing commands from paused clients.","O(N) Where N is the number of paused clients","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_UNPAUSE_History,0,CLIENT_UNPAUSE_Tips,0,clientCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_UNPAUSE_Keyspecs,0,NULL,0)},
+{0}
+};
+
+/********** CLIENT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT history */
+#define CLIENT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT tips */
+#define CLIENT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT key specs */
+#define CLIENT_Keyspecs NULL
+#endif
+
+/********** ECHO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ECHO history */
+#define ECHO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ECHO tips */
+#define ECHO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ECHO key specs */
+#define ECHO_Keyspecs NULL
+#endif
+
+/* ECHO argument table */
+struct COMMAND_ARG ECHO_Args[] = {
+{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** HELLO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HELLO history */
+commandHistory HELLO_History[] = {
+{"6.2.0","`protover` made optional; when called without arguments the command reports the current connection's context."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HELLO tips */
+#define HELLO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HELLO key specs */
+#define HELLO_Keyspecs NULL
+#endif
+
+/* HELLO arguments auth argument table */
+struct COMMAND_ARG HELLO_arguments_auth_Subargs[] = {
+{MAKE_ARG("username",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("password",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* HELLO arguments argument table */
+struct COMMAND_ARG HELLO_arguments_Subargs[] = {
+{MAKE_ARG("protover",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("auth",ARG_TYPE_BLOCK,-1,"AUTH",NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=HELLO_arguments_auth_Subargs},
+{MAKE_ARG("clientname",ARG_TYPE_STRING,-1,"SETNAME",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* HELLO argument table */
+struct COMMAND_ARG HELLO_Args[] = {
+{MAKE_ARG("arguments",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=HELLO_arguments_Subargs},
+};
+
+/********** PING ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PING history */
+#define PING_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PING tips */
+const char *PING_Tips[] = {
+"request_policy:all_shards",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PING key specs */
+#define PING_Keyspecs NULL
+#endif
+
+/* PING argument table */
+struct COMMAND_ARG PING_Args[] = {
+{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** QUIT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* QUIT history */
+#define QUIT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* QUIT tips */
+#define QUIT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* QUIT key specs */
+#define QUIT_Keyspecs NULL
+#endif
+
+/********** RESET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* RESET history */
+#define RESET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* RESET tips */
+#define RESET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* RESET key specs */
+#define RESET_Keyspecs NULL
+#endif
+
+/********** SELECT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SELECT history */
+#define SELECT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SELECT tips */
+#define SELECT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SELECT key specs */
+#define SELECT_Keyspecs NULL
+#endif
+
+/* SELECT argument table */
+struct COMMAND_ARG SELECT_Args[] = {
+{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** COPY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* COPY history */
+#define COPY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* COPY tips */
+#define COPY_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* COPY key specs */
+keySpec COPY_Keyspecs[2] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* COPY argument table */
+struct COMMAND_ARG COPY_Args[] = {
+{MAKE_ARG("source",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("destination",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("destination-db",ARG_TYPE_INTEGER,-1,"DB",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("replace",ARG_TYPE_PURE_TOKEN,-1,"REPLACE",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** DEL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* DEL history */
+#define DEL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* DEL tips */
+const char *DEL_Tips[] = {
+"request_policy:multi_shard",
+"response_policy:agg_sum",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* DEL key specs */
+keySpec DEL_Keyspecs[1] = {
+{NULL,CMD_KEY_RM|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* DEL argument table */
+struct COMMAND_ARG DEL_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** DUMP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* DUMP history */
+#define DUMP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* DUMP tips */
+const char *DUMP_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* DUMP key specs */
+keySpec DUMP_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* DUMP argument table */
+struct COMMAND_ARG DUMP_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** EXISTS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* EXISTS history */
+commandHistory EXISTS_History[] = {
+{"3.0.3","Accepts multiple `key` arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* EXISTS tips */
+const char *EXISTS_Tips[] = {
+"request_policy:multi_shard",
+"response_policy:agg_sum",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* EXISTS key specs */
+keySpec EXISTS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* EXISTS argument table */
+struct COMMAND_ARG EXISTS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** EXPIRE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* EXPIRE history */
+commandHistory EXPIRE_History[] = {
+{"7.0.0","Added options: `NX`, `XX`, `GT` and `LT`."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* EXPIRE tips */
+#define EXPIRE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* EXPIRE key specs */
+keySpec EXPIRE_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* EXPIRE condition argument table */
+struct COMMAND_ARG EXPIRE_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* EXPIRE argument table */
+struct COMMAND_ARG EXPIRE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"7.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=EXPIRE_condition_Subargs},
+};
+
+/********** EXPIREAT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* EXPIREAT history */
+commandHistory EXPIREAT_History[] = {
+{"7.0.0","Added options: `NX`, `XX`, `GT` and `LT`."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* EXPIREAT tips */
+#define EXPIREAT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* EXPIREAT key specs */
+keySpec EXPIREAT_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* EXPIREAT condition argument table */
+struct COMMAND_ARG EXPIREAT_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* EXPIREAT argument table */
+struct COMMAND_ARG EXPIREAT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unix-time-seconds",ARG_TYPE_UNIX_TIME,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"7.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=EXPIREAT_condition_Subargs},
+};
+
+/********** EXPIRETIME ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* EXPIRETIME history */
+#define EXPIRETIME_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* EXPIRETIME tips */
+#define EXPIRETIME_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* EXPIRETIME key specs */
+keySpec EXPIRETIME_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* EXPIRETIME argument table */
+struct COMMAND_ARG EXPIRETIME_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** KEYS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* KEYS history */
+#define KEYS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* KEYS tips */
+const char *KEYS_Tips[] = {
+"request_policy:all_shards",
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* KEYS key specs */
+#define KEYS_Keyspecs NULL
+#endif
+
+/* KEYS argument table */
+struct COMMAND_ARG KEYS_Args[] = {
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** MIGRATE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MIGRATE history */
+commandHistory MIGRATE_History[] = {
+{"3.0.0","Added the `COPY` and `REPLACE` options."},
+{"3.0.6","Added the `KEYS` option."},
+{"4.0.7","Added the `AUTH` option."},
+{"6.0.0","Added the `AUTH2` option."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MIGRATE tips */
+const char *MIGRATE_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MIGRATE key specs */
+keySpec MIGRATE_Keyspecs[2] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={3},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE|CMD_KEY_INCOMPLETE,KSPEC_BS_KEYWORD,.bs.keyword={"KEYS",-2},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* MIGRATE key_selector argument table */
+struct COMMAND_ARG MIGRATE_key_selector_Subargs[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("empty-string",ARG_TYPE_PURE_TOKEN,-1,"""",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* MIGRATE authentication auth2 argument table */
+struct COMMAND_ARG MIGRATE_authentication_auth2_Subargs[] = {
+{MAKE_ARG("username",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("password",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* MIGRATE authentication argument table */
+struct COMMAND_ARG MIGRATE_authentication_Subargs[] = {
+{MAKE_ARG("auth",ARG_TYPE_STRING,-1,"AUTH",NULL,"4.0.7",CMD_ARG_NONE,0,NULL),.display_text="password"},
+{MAKE_ARG("auth2",ARG_TYPE_BLOCK,-1,"AUTH2",NULL,"6.0.0",CMD_ARG_NONE,2,NULL),.subargs=MIGRATE_authentication_auth2_Subargs},
+};
+
+/* MIGRATE argument table */
+struct COMMAND_ARG MIGRATE_Args[] = {
+{MAKE_ARG("host",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("port",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key-selector",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=MIGRATE_key_selector_Subargs},
+{MAKE_ARG("destination-db",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("timeout",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("copy",ARG_TYPE_PURE_TOKEN,-1,"COPY",NULL,"3.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("replace",ARG_TYPE_PURE_TOKEN,-1,"REPLACE",NULL,"3.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("authentication",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=MIGRATE_authentication_Subargs},
+{MAKE_ARG("keys",ARG_TYPE_KEY,1,"KEYS",NULL,"3.0.6",CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL),.display_text="key"},
+};
+
+/********** MOVE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MOVE history */
+#define MOVE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MOVE tips */
+#define MOVE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MOVE key specs */
+keySpec MOVE_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* MOVE argument table */
+struct COMMAND_ARG MOVE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("db",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** OBJECT ENCODING ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* OBJECT ENCODING history */
+#define OBJECT_ENCODING_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* OBJECT ENCODING tips */
+const char *OBJECT_ENCODING_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* OBJECT ENCODING key specs */
+keySpec OBJECT_ENCODING_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* OBJECT ENCODING argument table */
+struct COMMAND_ARG OBJECT_ENCODING_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** OBJECT FREQ ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* OBJECT FREQ history */
+#define OBJECT_FREQ_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* OBJECT FREQ tips */
+const char *OBJECT_FREQ_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* OBJECT FREQ key specs */
+keySpec OBJECT_FREQ_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* OBJECT FREQ argument table */
+struct COMMAND_ARG OBJECT_FREQ_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** OBJECT HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* OBJECT HELP history */
+#define OBJECT_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* OBJECT HELP tips */
+#define OBJECT_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* OBJECT HELP key specs */
+#define OBJECT_HELP_Keyspecs NULL
+#endif
+
+/********** OBJECT IDLETIME ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* OBJECT IDLETIME history */
+#define OBJECT_IDLETIME_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* OBJECT IDLETIME tips */
+const char *OBJECT_IDLETIME_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* OBJECT IDLETIME key specs */
+keySpec OBJECT_IDLETIME_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* OBJECT IDLETIME argument table */
+struct COMMAND_ARG OBJECT_IDLETIME_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** OBJECT REFCOUNT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* OBJECT REFCOUNT history */
+#define OBJECT_REFCOUNT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* OBJECT REFCOUNT tips */
+const char *OBJECT_REFCOUNT_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* OBJECT REFCOUNT key specs */
+keySpec OBJECT_REFCOUNT_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* OBJECT REFCOUNT argument table */
+struct COMMAND_ARG OBJECT_REFCOUNT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* OBJECT command table */
+struct COMMAND_STRUCT OBJECT_Subcommands[] = {
+{MAKE_CMD("encoding","Returns the internal encoding of a Redis object.","O(1)","2.2.3",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,OBJECT_ENCODING_History,0,OBJECT_ENCODING_Tips,1,objectCommand,3,CMD_READONLY,ACL_CATEGORY_KEYSPACE,OBJECT_ENCODING_Keyspecs,1,NULL,1),.args=OBJECT_ENCODING_Args},
+{MAKE_CMD("freq","Returns the logarithmic access frequency counter of a Redis object.","O(1)","4.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,OBJECT_FREQ_History,0,OBJECT_FREQ_Tips,1,objectCommand,3,CMD_READONLY,ACL_CATEGORY_KEYSPACE,OBJECT_FREQ_Keyspecs,1,NULL,1),.args=OBJECT_FREQ_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,OBJECT_HELP_History,0,OBJECT_HELP_Tips,0,objectCommand,2,CMD_LOADING|CMD_STALE,ACL_CATEGORY_KEYSPACE,OBJECT_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("idletime","Returns the time since the last access to a Redis object.","O(1)","2.2.3",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,OBJECT_IDLETIME_History,0,OBJECT_IDLETIME_Tips,1,objectCommand,3,CMD_READONLY,ACL_CATEGORY_KEYSPACE,OBJECT_IDLETIME_Keyspecs,1,NULL,1),.args=OBJECT_IDLETIME_Args},
+{MAKE_CMD("refcount","Returns the reference count of a value of a key.","O(1)","2.2.3",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,OBJECT_REFCOUNT_History,0,OBJECT_REFCOUNT_Tips,1,objectCommand,3,CMD_READONLY,ACL_CATEGORY_KEYSPACE,OBJECT_REFCOUNT_Keyspecs,1,NULL,1),.args=OBJECT_REFCOUNT_Args},
+{0}
+};
+
+/********** OBJECT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* OBJECT history */
+#define OBJECT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* OBJECT tips */
+#define OBJECT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* OBJECT key specs */
+#define OBJECT_Keyspecs NULL
+#endif
+
+/********** PERSIST ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PERSIST history */
+#define PERSIST_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PERSIST tips */
+#define PERSIST_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PERSIST key specs */
+keySpec PERSIST_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* PERSIST argument table */
+struct COMMAND_ARG PERSIST_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** PEXPIRE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PEXPIRE history */
+commandHistory PEXPIRE_History[] = {
+{"7.0.0","Added options: `NX`, `XX`, `GT` and `LT`."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PEXPIRE tips */
+#define PEXPIRE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PEXPIRE key specs */
+keySpec PEXPIRE_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* PEXPIRE condition argument table */
+struct COMMAND_ARG PEXPIRE_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* PEXPIRE argument table */
+struct COMMAND_ARG PEXPIRE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"7.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=PEXPIRE_condition_Subargs},
+};
+
+/********** PEXPIREAT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PEXPIREAT history */
+commandHistory PEXPIREAT_History[] = {
+{"7.0.0","Added options: `NX`, `XX`, `GT` and `LT`."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PEXPIREAT tips */
+#define PEXPIREAT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PEXPIREAT key specs */
+keySpec PEXPIREAT_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* PEXPIREAT condition argument table */
+struct COMMAND_ARG PEXPIREAT_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* PEXPIREAT argument table */
+struct COMMAND_ARG PEXPIREAT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"7.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=PEXPIREAT_condition_Subargs},
+};
+
+/********** PEXPIRETIME ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PEXPIRETIME history */
+#define PEXPIRETIME_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PEXPIRETIME tips */
+#define PEXPIRETIME_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PEXPIRETIME key specs */
+keySpec PEXPIRETIME_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* PEXPIRETIME argument table */
+struct COMMAND_ARG PEXPIRETIME_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** PTTL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PTTL history */
+commandHistory PTTL_History[] = {
+{"2.8.0","Added the -2 reply."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PTTL tips */
+const char *PTTL_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PTTL key specs */
+keySpec PTTL_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* PTTL argument table */
+struct COMMAND_ARG PTTL_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** RANDOMKEY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* RANDOMKEY history */
+#define RANDOMKEY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* RANDOMKEY tips */
+const char *RANDOMKEY_Tips[] = {
+"request_policy:all_shards",
+"response_policy:special",
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* RANDOMKEY key specs */
+#define RANDOMKEY_Keyspecs NULL
+#endif
+
+/********** RENAME ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* RENAME history */
+#define RENAME_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* RENAME tips */
+#define RENAME_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* RENAME key specs */
+keySpec RENAME_Keyspecs[2] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* RENAME argument table */
+struct COMMAND_ARG RENAME_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("newkey",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** RENAMENX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* RENAMENX history */
+commandHistory RENAMENX_History[] = {
+{"3.2.0","The command no longer returns an error when source and destination names are the same."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* RENAMENX tips */
+#define RENAMENX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* RENAMENX key specs */
+keySpec RENAMENX_Keyspecs[2] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_OW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* RENAMENX argument table */
+struct COMMAND_ARG RENAMENX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("newkey",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** RESTORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* RESTORE history */
+commandHistory RESTORE_History[] = {
+{"3.0.0","Added the `REPLACE` modifier."},
+{"5.0.0","Added the `ABSTTL` modifier."},
+{"5.0.0","Added the `IDLETIME` and `FREQ` options."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* RESTORE tips */
+#define RESTORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* RESTORE key specs */
+keySpec RESTORE_Keyspecs[1] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* RESTORE argument table */
+struct COMMAND_ARG RESTORE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ttl",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("serialized-value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("replace",ARG_TYPE_PURE_TOKEN,-1,"REPLACE",NULL,"3.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("absttl",ARG_TYPE_PURE_TOKEN,-1,"ABSTTL",NULL,"5.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,"IDLETIME",NULL,"5.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("frequency",ARG_TYPE_INTEGER,-1,"FREQ",NULL,"5.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** SCAN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SCAN history */
+commandHistory SCAN_History[] = {
+{"6.0.0","Added the `TYPE` subcommand."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SCAN tips */
+const char *SCAN_Tips[] = {
+"nondeterministic_output",
+"request_policy:special",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SCAN key specs */
+#define SCAN_Keyspecs NULL
+#endif
+
+/* SCAN argument table */
+struct COMMAND_ARG SCAN_Args[] = {
+{MAKE_ARG("cursor",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,"MATCH",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("type",ARG_TYPE_STRING,-1,"TYPE",NULL,"6.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** SORT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SORT history */
+#define SORT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SORT tips */
+#define SORT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SORT key specs */
+keySpec SORT_Keyspecs[3] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{"For the optional BY/GET keyword. It is marked 'unknown' because the key names derive from the content of the key we sort",CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_UNKNOWN,{{0}},KSPEC_FK_UNKNOWN,{{0}}},{"For the optional STORE keyword. It is marked 'unknown' because the keyword can appear anywhere in the argument array",CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_UNKNOWN,{{0}},KSPEC_FK_UNKNOWN,{{0}}}
+};
+#endif
+
+/* SORT limit argument table */
+struct COMMAND_ARG SORT_limit_Subargs[] = {
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SORT order argument table */
+struct COMMAND_ARG SORT_order_Subargs[] = {
+{MAKE_ARG("asc",ARG_TYPE_PURE_TOKEN,-1,"ASC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("desc",ARG_TYPE_PURE_TOKEN,-1,"DESC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SORT argument table */
+struct COMMAND_ARG SORT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("by-pattern",ARG_TYPE_PATTERN,1,"BY",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL),.display_text="pattern"},
+{MAKE_ARG("limit",ARG_TYPE_BLOCK,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=SORT_limit_Subargs},
+{MAKE_ARG("get-pattern",ARG_TYPE_PATTERN,1,"GET",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE|CMD_ARG_MULTIPLE_TOKEN,0,NULL),.display_text="pattern"},
+{MAKE_ARG("order",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=SORT_order_Subargs},
+{MAKE_ARG("sorting",ARG_TYPE_PURE_TOKEN,-1,"ALPHA",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("destination",ARG_TYPE_KEY,2,"STORE",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** SORT_RO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SORT_RO history */
+#define SORT_RO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SORT_RO tips */
+#define SORT_RO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SORT_RO key specs */
+keySpec SORT_RO_Keyspecs[2] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{"For the optional BY/GET keyword. It is marked 'unknown' because the key names derive from the content of the key we sort",CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_UNKNOWN,{{0}},KSPEC_FK_UNKNOWN,{{0}}}
+};
+#endif
+
+/* SORT_RO limit argument table */
+struct COMMAND_ARG SORT_RO_limit_Subargs[] = {
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SORT_RO order argument table */
+struct COMMAND_ARG SORT_RO_order_Subargs[] = {
+{MAKE_ARG("asc",ARG_TYPE_PURE_TOKEN,-1,"ASC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("desc",ARG_TYPE_PURE_TOKEN,-1,"DESC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SORT_RO argument table */
+struct COMMAND_ARG SORT_RO_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("by-pattern",ARG_TYPE_PATTERN,1,"BY",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL),.display_text="pattern"},
+{MAKE_ARG("limit",ARG_TYPE_BLOCK,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=SORT_RO_limit_Subargs},
+{MAKE_ARG("get-pattern",ARG_TYPE_PATTERN,1,"GET",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE|CMD_ARG_MULTIPLE_TOKEN,0,NULL),.display_text="pattern"},
+{MAKE_ARG("order",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=SORT_RO_order_Subargs},
+{MAKE_ARG("sorting",ARG_TYPE_PURE_TOKEN,-1,"ALPHA",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** TOUCH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* TOUCH history */
+#define TOUCH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* TOUCH tips */
+const char *TOUCH_Tips[] = {
+"request_policy:multi_shard",
+"response_policy:agg_sum",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* TOUCH key specs */
+keySpec TOUCH_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* TOUCH argument table */
+struct COMMAND_ARG TOUCH_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** TTL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* TTL history */
+commandHistory TTL_History[] = {
+{"2.8.0","Added the -2 reply."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* TTL tips */
+const char *TTL_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* TTL key specs */
+keySpec TTL_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* TTL argument table */
+struct COMMAND_ARG TTL_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** TYPE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* TYPE history */
+#define TYPE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* TYPE tips */
+#define TYPE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* TYPE key specs */
+keySpec TYPE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* TYPE argument table */
+struct COMMAND_ARG TYPE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** UNLINK ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* UNLINK history */
+#define UNLINK_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* UNLINK tips */
+const char *UNLINK_Tips[] = {
+"request_policy:multi_shard",
+"response_policy:agg_sum",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* UNLINK key specs */
+keySpec UNLINK_Keyspecs[1] = {
+{NULL,CMD_KEY_RM|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* UNLINK argument table */
+struct COMMAND_ARG UNLINK_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** WAIT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* WAIT history */
+#define WAIT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* WAIT tips */
+const char *WAIT_Tips[] = {
+"request_policy:all_shards",
+"response_policy:agg_min",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* WAIT key specs */
+#define WAIT_Keyspecs NULL
+#endif
+
+/* WAIT argument table */
+struct COMMAND_ARG WAIT_Args[] = {
+{MAKE_ARG("numreplicas",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("timeout",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** WAITAOF ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* WAITAOF history */
+#define WAITAOF_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* WAITAOF tips */
+const char *WAITAOF_Tips[] = {
+"request_policy:all_shards",
+"response_policy:agg_min",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* WAITAOF key specs */
+#define WAITAOF_Keyspecs NULL
+#endif
+
+/* WAITAOF argument table */
+struct COMMAND_ARG WAITAOF_Args[] = {
+{MAKE_ARG("numlocal",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numreplicas",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("timeout",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** GEOADD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GEOADD history */
+commandHistory GEOADD_History[] = {
+{"6.2.0","Added the `CH`, `NX` and `XX` options."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GEOADD tips */
+#define GEOADD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GEOADD key specs */
+keySpec GEOADD_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GEOADD condition argument table */
+struct COMMAND_ARG GEOADD_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEOADD data argument table */
+struct COMMAND_ARG GEOADD_data_Subargs[] = {
+{MAKE_ARG("longitude",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("latitude",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEOADD argument table */
+struct COMMAND_ARG GEOADD_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"6.2.0",CMD_ARG_OPTIONAL,2,NULL),.subargs=GEOADD_condition_Subargs},
+{MAKE_ARG("change",ARG_TYPE_PURE_TOKEN,-1,"CH",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,3,NULL),.subargs=GEOADD_data_Subargs},
+};
+
+/********** GEODIST ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GEODIST history */
+#define GEODIST_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GEODIST tips */
+#define GEODIST_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GEODIST key specs */
+keySpec GEODIST_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GEODIST unit argument table */
+struct COMMAND_ARG GEODIST_unit_Subargs[] = {
+{MAKE_ARG("m",ARG_TYPE_PURE_TOKEN,-1,"M",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("km",ARG_TYPE_PURE_TOKEN,-1,"KM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ft",ARG_TYPE_PURE_TOKEN,-1,"FT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("mi",ARG_TYPE_PURE_TOKEN,-1,"MI",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEODIST argument table */
+struct COMMAND_ARG GEODIST_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member1",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member2",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=GEODIST_unit_Subargs},
+};
+
+/********** GEOHASH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GEOHASH history */
+#define GEOHASH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GEOHASH tips */
+#define GEOHASH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GEOHASH key specs */
+keySpec GEOHASH_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GEOHASH argument table */
+struct COMMAND_ARG GEOHASH_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** GEOPOS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GEOPOS history */
+#define GEOPOS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GEOPOS tips */
+#define GEOPOS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GEOPOS key specs */
+keySpec GEOPOS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GEOPOS argument table */
+struct COMMAND_ARG GEOPOS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** GEORADIUS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GEORADIUS history */
+commandHistory GEORADIUS_History[] = {
+{"6.2.0","Added the `ANY` option for `COUNT`."},
+{"7.0.0","Added support for uppercase unit names."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GEORADIUS tips */
+#define GEORADIUS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GEORADIUS key specs */
+keySpec GEORADIUS_Keyspecs[3] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_KEYWORD,.bs.keyword={"STORE",6},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_KEYWORD,.bs.keyword={"STOREDIST",6},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GEORADIUS unit argument table */
+struct COMMAND_ARG GEORADIUS_unit_Subargs[] = {
+{MAKE_ARG("m",ARG_TYPE_PURE_TOKEN,-1,"M",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("km",ARG_TYPE_PURE_TOKEN,-1,"KM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ft",ARG_TYPE_PURE_TOKEN,-1,"FT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("mi",ARG_TYPE_PURE_TOKEN,-1,"MI",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEORADIUS count_block argument table */
+struct COMMAND_ARG GEORADIUS_count_block_Subargs[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("any",ARG_TYPE_PURE_TOKEN,-1,"ANY",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* GEORADIUS order argument table */
+struct COMMAND_ARG GEORADIUS_order_Subargs[] = {
+{MAKE_ARG("asc",ARG_TYPE_PURE_TOKEN,-1,"ASC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("desc",ARG_TYPE_PURE_TOKEN,-1,"DESC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEORADIUS store argument table */
+struct COMMAND_ARG GEORADIUS_store_Subargs[] = {
+{MAKE_ARG("storekey",ARG_TYPE_KEY,1,"STORE",NULL,NULL,CMD_ARG_NONE,0,NULL),.display_text="key"},
+{MAKE_ARG("storedistkey",ARG_TYPE_KEY,2,"STOREDIST",NULL,NULL,CMD_ARG_NONE,0,NULL),.display_text="key"},
+};
+
+/* GEORADIUS argument table */
+struct COMMAND_ARG GEORADIUS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("longitude",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("latitude",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("radius",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=GEORADIUS_unit_Subargs},
+{MAKE_ARG("withcoord",ARG_TYPE_PURE_TOKEN,-1,"WITHCOORD",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withdist",ARG_TYPE_PURE_TOKEN,-1,"WITHDIST",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withhash",ARG_TYPE_PURE_TOKEN,-1,"WITHHASH",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("count-block",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEORADIUS_count_block_Subargs},
+{MAKE_ARG("order",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEORADIUS_order_Subargs},
+{MAKE_ARG("store",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEORADIUS_store_Subargs},
+};
+
+/********** GEORADIUSBYMEMBER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GEORADIUSBYMEMBER history */
+commandHistory GEORADIUSBYMEMBER_History[] = {
+{"7.0.0","Added support for uppercase unit names."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GEORADIUSBYMEMBER tips */
+#define GEORADIUSBYMEMBER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GEORADIUSBYMEMBER key specs */
+keySpec GEORADIUSBYMEMBER_Keyspecs[3] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_KEYWORD,.bs.keyword={"STORE",5},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_KEYWORD,.bs.keyword={"STOREDIST",5},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GEORADIUSBYMEMBER unit argument table */
+struct COMMAND_ARG GEORADIUSBYMEMBER_unit_Subargs[] = {
+{MAKE_ARG("m",ARG_TYPE_PURE_TOKEN,-1,"M",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("km",ARG_TYPE_PURE_TOKEN,-1,"KM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ft",ARG_TYPE_PURE_TOKEN,-1,"FT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("mi",ARG_TYPE_PURE_TOKEN,-1,"MI",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEORADIUSBYMEMBER count_block argument table */
+struct COMMAND_ARG GEORADIUSBYMEMBER_count_block_Subargs[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("any",ARG_TYPE_PURE_TOKEN,-1,"ANY",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* GEORADIUSBYMEMBER order argument table */
+struct COMMAND_ARG GEORADIUSBYMEMBER_order_Subargs[] = {
+{MAKE_ARG("asc",ARG_TYPE_PURE_TOKEN,-1,"ASC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("desc",ARG_TYPE_PURE_TOKEN,-1,"DESC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEORADIUSBYMEMBER store argument table */
+struct COMMAND_ARG GEORADIUSBYMEMBER_store_Subargs[] = {
+{MAKE_ARG("storekey",ARG_TYPE_KEY,1,"STORE",NULL,NULL,CMD_ARG_NONE,0,NULL),.display_text="key"},
+{MAKE_ARG("storedistkey",ARG_TYPE_KEY,2,"STOREDIST",NULL,NULL,CMD_ARG_NONE,0,NULL),.display_text="key"},
+};
+
+/* GEORADIUSBYMEMBER argument table */
+struct COMMAND_ARG GEORADIUSBYMEMBER_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("radius",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=GEORADIUSBYMEMBER_unit_Subargs},
+{MAKE_ARG("withcoord",ARG_TYPE_PURE_TOKEN,-1,"WITHCOORD",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withdist",ARG_TYPE_PURE_TOKEN,-1,"WITHDIST",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withhash",ARG_TYPE_PURE_TOKEN,-1,"WITHHASH",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("count-block",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEORADIUSBYMEMBER_count_block_Subargs},
+{MAKE_ARG("order",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEORADIUSBYMEMBER_order_Subargs},
+{MAKE_ARG("store",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEORADIUSBYMEMBER_store_Subargs},
+};
+
+/********** GEORADIUSBYMEMBER_RO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GEORADIUSBYMEMBER_RO history */
+#define GEORADIUSBYMEMBER_RO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GEORADIUSBYMEMBER_RO tips */
+#define GEORADIUSBYMEMBER_RO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GEORADIUSBYMEMBER_RO key specs */
+keySpec GEORADIUSBYMEMBER_RO_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GEORADIUSBYMEMBER_RO unit argument table */
+struct COMMAND_ARG GEORADIUSBYMEMBER_RO_unit_Subargs[] = {
+{MAKE_ARG("m",ARG_TYPE_PURE_TOKEN,-1,"M",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("km",ARG_TYPE_PURE_TOKEN,-1,"KM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ft",ARG_TYPE_PURE_TOKEN,-1,"FT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("mi",ARG_TYPE_PURE_TOKEN,-1,"MI",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEORADIUSBYMEMBER_RO count_block argument table */
+struct COMMAND_ARG GEORADIUSBYMEMBER_RO_count_block_Subargs[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("any",ARG_TYPE_PURE_TOKEN,-1,"ANY",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* GEORADIUSBYMEMBER_RO order argument table */
+struct COMMAND_ARG GEORADIUSBYMEMBER_RO_order_Subargs[] = {
+{MAKE_ARG("asc",ARG_TYPE_PURE_TOKEN,-1,"ASC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("desc",ARG_TYPE_PURE_TOKEN,-1,"DESC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEORADIUSBYMEMBER_RO argument table */
+struct COMMAND_ARG GEORADIUSBYMEMBER_RO_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("radius",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=GEORADIUSBYMEMBER_RO_unit_Subargs},
+{MAKE_ARG("withcoord",ARG_TYPE_PURE_TOKEN,-1,"WITHCOORD",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withdist",ARG_TYPE_PURE_TOKEN,-1,"WITHDIST",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withhash",ARG_TYPE_PURE_TOKEN,-1,"WITHHASH",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("count-block",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEORADIUSBYMEMBER_RO_count_block_Subargs},
+{MAKE_ARG("order",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEORADIUSBYMEMBER_RO_order_Subargs},
+};
+
+/********** GEORADIUS_RO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GEORADIUS_RO history */
+commandHistory GEORADIUS_RO_History[] = {
+{"6.2.0","Added the `ANY` option for `COUNT`."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GEORADIUS_RO tips */
+#define GEORADIUS_RO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GEORADIUS_RO key specs */
+keySpec GEORADIUS_RO_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GEORADIUS_RO unit argument table */
+struct COMMAND_ARG GEORADIUS_RO_unit_Subargs[] = {
+{MAKE_ARG("m",ARG_TYPE_PURE_TOKEN,-1,"M",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("km",ARG_TYPE_PURE_TOKEN,-1,"KM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ft",ARG_TYPE_PURE_TOKEN,-1,"FT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("mi",ARG_TYPE_PURE_TOKEN,-1,"MI",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEORADIUS_RO count_block argument table */
+struct COMMAND_ARG GEORADIUS_RO_count_block_Subargs[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("any",ARG_TYPE_PURE_TOKEN,-1,"ANY",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* GEORADIUS_RO order argument table */
+struct COMMAND_ARG GEORADIUS_RO_order_Subargs[] = {
+{MAKE_ARG("asc",ARG_TYPE_PURE_TOKEN,-1,"ASC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("desc",ARG_TYPE_PURE_TOKEN,-1,"DESC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEORADIUS_RO argument table */
+struct COMMAND_ARG GEORADIUS_RO_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("longitude",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("latitude",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("radius",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=GEORADIUS_RO_unit_Subargs},
+{MAKE_ARG("withcoord",ARG_TYPE_PURE_TOKEN,-1,"WITHCOORD",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withdist",ARG_TYPE_PURE_TOKEN,-1,"WITHDIST",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withhash",ARG_TYPE_PURE_TOKEN,-1,"WITHHASH",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("count-block",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEORADIUS_RO_count_block_Subargs},
+{MAKE_ARG("order",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEORADIUS_RO_order_Subargs},
+};
+
+/********** GEOSEARCH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GEOSEARCH history */
+commandHistory GEOSEARCH_History[] = {
+{"7.0.0","Added support for uppercase unit names."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GEOSEARCH tips */
+#define GEOSEARCH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GEOSEARCH key specs */
+keySpec GEOSEARCH_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GEOSEARCH from fromlonlat argument table */
+struct COMMAND_ARG GEOSEARCH_from_fromlonlat_Subargs[] = {
+{MAKE_ARG("longitude",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("latitude",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEOSEARCH from argument table */
+struct COMMAND_ARG GEOSEARCH_from_Subargs[] = {
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,"FROMMEMBER",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("fromlonlat",ARG_TYPE_BLOCK,-1,"FROMLONLAT",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=GEOSEARCH_from_fromlonlat_Subargs},
+};
+
+/* GEOSEARCH by circle unit argument table */
+struct COMMAND_ARG GEOSEARCH_by_circle_unit_Subargs[] = {
+{MAKE_ARG("m",ARG_TYPE_PURE_TOKEN,-1,"M",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("km",ARG_TYPE_PURE_TOKEN,-1,"KM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ft",ARG_TYPE_PURE_TOKEN,-1,"FT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("mi",ARG_TYPE_PURE_TOKEN,-1,"MI",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEOSEARCH by circle argument table */
+struct COMMAND_ARG GEOSEARCH_by_circle_Subargs[] = {
+{MAKE_ARG("radius",ARG_TYPE_DOUBLE,-1,"BYRADIUS",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=GEOSEARCH_by_circle_unit_Subargs},
+};
+
+/* GEOSEARCH by box unit argument table */
+struct COMMAND_ARG GEOSEARCH_by_box_unit_Subargs[] = {
+{MAKE_ARG("m",ARG_TYPE_PURE_TOKEN,-1,"M",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("km",ARG_TYPE_PURE_TOKEN,-1,"KM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ft",ARG_TYPE_PURE_TOKEN,-1,"FT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("mi",ARG_TYPE_PURE_TOKEN,-1,"MI",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEOSEARCH by box argument table */
+struct COMMAND_ARG GEOSEARCH_by_box_Subargs[] = {
+{MAKE_ARG("width",ARG_TYPE_DOUBLE,-1,"BYBOX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("height",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=GEOSEARCH_by_box_unit_Subargs},
+};
+
+/* GEOSEARCH by argument table */
+struct COMMAND_ARG GEOSEARCH_by_Subargs[] = {
+{MAKE_ARG("circle",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=GEOSEARCH_by_circle_Subargs},
+{MAKE_ARG("box",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,3,NULL),.subargs=GEOSEARCH_by_box_Subargs},
+};
+
+/* GEOSEARCH order argument table */
+struct COMMAND_ARG GEOSEARCH_order_Subargs[] = {
+{MAKE_ARG("asc",ARG_TYPE_PURE_TOKEN,-1,"ASC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("desc",ARG_TYPE_PURE_TOKEN,-1,"DESC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEOSEARCH count_block argument table */
+struct COMMAND_ARG GEOSEARCH_count_block_Subargs[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("any",ARG_TYPE_PURE_TOKEN,-1,"ANY",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* GEOSEARCH argument table */
+struct COMMAND_ARG GEOSEARCH_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("from",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=GEOSEARCH_from_Subargs},
+{MAKE_ARG("by",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=GEOSEARCH_by_Subargs},
+{MAKE_ARG("order",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEOSEARCH_order_Subargs},
+{MAKE_ARG("count-block",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEOSEARCH_count_block_Subargs},
+{MAKE_ARG("withcoord",ARG_TYPE_PURE_TOKEN,-1,"WITHCOORD",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withdist",ARG_TYPE_PURE_TOKEN,-1,"WITHDIST",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withhash",ARG_TYPE_PURE_TOKEN,-1,"WITHHASH",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** GEOSEARCHSTORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GEOSEARCHSTORE history */
+commandHistory GEOSEARCHSTORE_History[] = {
+{"7.0.0","Added support for uppercase unit names."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GEOSEARCHSTORE tips */
+#define GEOSEARCHSTORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GEOSEARCHSTORE key specs */
+keySpec GEOSEARCHSTORE_Keyspecs[2] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GEOSEARCHSTORE from fromlonlat argument table */
+struct COMMAND_ARG GEOSEARCHSTORE_from_fromlonlat_Subargs[] = {
+{MAKE_ARG("longitude",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("latitude",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEOSEARCHSTORE from argument table */
+struct COMMAND_ARG GEOSEARCHSTORE_from_Subargs[] = {
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,"FROMMEMBER",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("fromlonlat",ARG_TYPE_BLOCK,-1,"FROMLONLAT",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=GEOSEARCHSTORE_from_fromlonlat_Subargs},
+};
+
+/* GEOSEARCHSTORE by circle unit argument table */
+struct COMMAND_ARG GEOSEARCHSTORE_by_circle_unit_Subargs[] = {
+{MAKE_ARG("m",ARG_TYPE_PURE_TOKEN,-1,"M",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("km",ARG_TYPE_PURE_TOKEN,-1,"KM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ft",ARG_TYPE_PURE_TOKEN,-1,"FT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("mi",ARG_TYPE_PURE_TOKEN,-1,"MI",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEOSEARCHSTORE by circle argument table */
+struct COMMAND_ARG GEOSEARCHSTORE_by_circle_Subargs[] = {
+{MAKE_ARG("radius",ARG_TYPE_DOUBLE,-1,"BYRADIUS",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=GEOSEARCHSTORE_by_circle_unit_Subargs},
+};
+
+/* GEOSEARCHSTORE by box unit argument table */
+struct COMMAND_ARG GEOSEARCHSTORE_by_box_unit_Subargs[] = {
+{MAKE_ARG("m",ARG_TYPE_PURE_TOKEN,-1,"M",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("km",ARG_TYPE_PURE_TOKEN,-1,"KM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ft",ARG_TYPE_PURE_TOKEN,-1,"FT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("mi",ARG_TYPE_PURE_TOKEN,-1,"MI",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEOSEARCHSTORE by box argument table */
+struct COMMAND_ARG GEOSEARCHSTORE_by_box_Subargs[] = {
+{MAKE_ARG("width",ARG_TYPE_DOUBLE,-1,"BYBOX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("height",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unit",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=GEOSEARCHSTORE_by_box_unit_Subargs},
+};
+
+/* GEOSEARCHSTORE by argument table */
+struct COMMAND_ARG GEOSEARCHSTORE_by_Subargs[] = {
+{MAKE_ARG("circle",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=GEOSEARCHSTORE_by_circle_Subargs},
+{MAKE_ARG("box",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,3,NULL),.subargs=GEOSEARCHSTORE_by_box_Subargs},
+};
+
+/* GEOSEARCHSTORE order argument table */
+struct COMMAND_ARG GEOSEARCHSTORE_order_Subargs[] = {
+{MAKE_ARG("asc",ARG_TYPE_PURE_TOKEN,-1,"ASC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("desc",ARG_TYPE_PURE_TOKEN,-1,"DESC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GEOSEARCHSTORE count_block argument table */
+struct COMMAND_ARG GEOSEARCHSTORE_count_block_Subargs[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("any",ARG_TYPE_PURE_TOKEN,-1,"ANY",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* GEOSEARCHSTORE argument table */
+struct COMMAND_ARG GEOSEARCHSTORE_Args[] = {
+{MAKE_ARG("destination",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("source",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("from",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=GEOSEARCHSTORE_from_Subargs},
+{MAKE_ARG("by",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=GEOSEARCHSTORE_by_Subargs},
+{MAKE_ARG("order",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEOSEARCHSTORE_order_Subargs},
+{MAKE_ARG("count-block",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=GEOSEARCHSTORE_count_block_Subargs},
+{MAKE_ARG("storedist",ARG_TYPE_PURE_TOKEN,-1,"STOREDIST",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** HDEL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HDEL history */
+commandHistory HDEL_History[] = {
+{"2.4.0","Accepts multiple `field` arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HDEL tips */
+#define HDEL_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HDEL key specs */
+keySpec HDEL_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HDEL argument table */
+struct COMMAND_ARG HDEL_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** HEXISTS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HEXISTS history */
+#define HEXISTS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HEXISTS tips */
+#define HEXISTS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HEXISTS key specs */
+keySpec HEXISTS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HEXISTS argument table */
+struct COMMAND_ARG HEXISTS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** HGET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HGET history */
+#define HGET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HGET tips */
+#define HGET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HGET key specs */
+keySpec HGET_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HGET argument table */
+struct COMMAND_ARG HGET_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** HGETALL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HGETALL history */
+#define HGETALL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HGETALL tips */
+const char *HGETALL_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HGETALL key specs */
+keySpec HGETALL_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HGETALL argument table */
+struct COMMAND_ARG HGETALL_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** HINCRBY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HINCRBY history */
+#define HINCRBY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HINCRBY tips */
+#define HINCRBY_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HINCRBY key specs */
+keySpec HINCRBY_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HINCRBY argument table */
+struct COMMAND_ARG HINCRBY_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("increment",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** HINCRBYFLOAT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HINCRBYFLOAT history */
+#define HINCRBYFLOAT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HINCRBYFLOAT tips */
+#define HINCRBYFLOAT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HINCRBYFLOAT key specs */
+keySpec HINCRBYFLOAT_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HINCRBYFLOAT argument table */
+struct COMMAND_ARG HINCRBYFLOAT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("increment",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** HKEYS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HKEYS history */
+#define HKEYS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HKEYS tips */
+const char *HKEYS_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HKEYS key specs */
+keySpec HKEYS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HKEYS argument table */
+struct COMMAND_ARG HKEYS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** HLEN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HLEN history */
+#define HLEN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HLEN tips */
+#define HLEN_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HLEN key specs */
+keySpec HLEN_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HLEN argument table */
+struct COMMAND_ARG HLEN_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** HMGET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HMGET history */
+#define HMGET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HMGET tips */
+#define HMGET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HMGET key specs */
+keySpec HMGET_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HMGET argument table */
+struct COMMAND_ARG HMGET_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** HMSET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HMSET history */
+#define HMSET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HMSET tips */
+#define HMSET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HMSET key specs */
+keySpec HMSET_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HMSET data argument table */
+struct COMMAND_ARG HMSET_data_Subargs[] = {
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* HMSET argument table */
+struct COMMAND_ARG HMSET_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=HMSET_data_Subargs},
+};
+
+/********** HRANDFIELD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HRANDFIELD history */
+#define HRANDFIELD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HRANDFIELD tips */
+const char *HRANDFIELD_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HRANDFIELD key specs */
+keySpec HRANDFIELD_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HRANDFIELD options argument table */
+struct COMMAND_ARG HRANDFIELD_options_Subargs[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("withvalues",ARG_TYPE_PURE_TOKEN,-1,"WITHVALUES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* HRANDFIELD argument table */
+struct COMMAND_ARG HRANDFIELD_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("options",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=HRANDFIELD_options_Subargs},
+};
+
+/********** HSCAN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HSCAN history */
+#define HSCAN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HSCAN tips */
+const char *HSCAN_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HSCAN key specs */
+keySpec HSCAN_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HSCAN argument table */
+struct COMMAND_ARG HSCAN_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("cursor",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,"MATCH",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** HSET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HSET history */
+commandHistory HSET_History[] = {
+{"4.0.0","Accepts multiple `field` and `value` arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HSET tips */
+#define HSET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HSET key specs */
+keySpec HSET_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HSET data argument table */
+struct COMMAND_ARG HSET_data_Subargs[] = {
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* HSET argument table */
+struct COMMAND_ARG HSET_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=HSET_data_Subargs},
+};
+
+/********** HSETNX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HSETNX history */
+#define HSETNX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HSETNX tips */
+#define HSETNX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HSETNX key specs */
+keySpec HSETNX_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HSETNX argument table */
+struct COMMAND_ARG HSETNX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** HSTRLEN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HSTRLEN history */
+#define HSTRLEN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HSTRLEN tips */
+#define HSTRLEN_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HSTRLEN key specs */
+keySpec HSTRLEN_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HSTRLEN argument table */
+struct COMMAND_ARG HSTRLEN_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** HVALS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HVALS history */
+#define HVALS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HVALS tips */
+const char *HVALS_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HVALS key specs */
+keySpec HVALS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HVALS argument table */
+struct COMMAND_ARG HVALS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** PFADD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PFADD history */
+#define PFADD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PFADD tips */
+#define PFADD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PFADD key specs */
+keySpec PFADD_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* PFADD argument table */
+struct COMMAND_ARG PFADD_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("element",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** PFCOUNT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PFCOUNT history */
+#define PFCOUNT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PFCOUNT tips */
+#define PFCOUNT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PFCOUNT key specs */
+keySpec PFCOUNT_Keyspecs[1] = {
+{"RW because it may change the internal representation of the key, and propagate to replicas",CMD_KEY_RW|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* PFCOUNT argument table */
+struct COMMAND_ARG PFCOUNT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** PFDEBUG ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PFDEBUG history */
+#define PFDEBUG_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PFDEBUG tips */
+#define PFDEBUG_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PFDEBUG key specs */
+keySpec PFDEBUG_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* PFDEBUG argument table */
+struct COMMAND_ARG PFDEBUG_Args[] = {
+{MAKE_ARG("subcommand",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** PFMERGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PFMERGE history */
+#define PFMERGE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PFMERGE tips */
+#define PFMERGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PFMERGE key specs */
+keySpec PFMERGE_Keyspecs[2] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* PFMERGE argument table */
+struct COMMAND_ARG PFMERGE_Args[] = {
+{MAKE_ARG("destkey",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("sourcekey",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** PFSELFTEST ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PFSELFTEST history */
+#define PFSELFTEST_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PFSELFTEST tips */
+#define PFSELFTEST_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PFSELFTEST key specs */
+#define PFSELFTEST_Keyspecs NULL
+#endif
+
+/********** BLMOVE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BLMOVE history */
+#define BLMOVE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BLMOVE tips */
+#define BLMOVE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BLMOVE key specs */
+keySpec BLMOVE_Keyspecs[2] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* BLMOVE wherefrom argument table */
+struct COMMAND_ARG BLMOVE_wherefrom_Subargs[] = {
+{MAKE_ARG("left",ARG_TYPE_PURE_TOKEN,-1,"LEFT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("right",ARG_TYPE_PURE_TOKEN,-1,"RIGHT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BLMOVE whereto argument table */
+struct COMMAND_ARG BLMOVE_whereto_Subargs[] = {
+{MAKE_ARG("left",ARG_TYPE_PURE_TOKEN,-1,"LEFT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("right",ARG_TYPE_PURE_TOKEN,-1,"RIGHT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BLMOVE argument table */
+struct COMMAND_ARG BLMOVE_Args[] = {
+{MAKE_ARG("source",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("destination",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("wherefrom",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=BLMOVE_wherefrom_Subargs},
+{MAKE_ARG("whereto",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=BLMOVE_whereto_Subargs},
+{MAKE_ARG("timeout",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** BLMPOP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BLMPOP history */
+#define BLMPOP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BLMPOP tips */
+#define BLMPOP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BLMPOP key specs */
+keySpec BLMPOP_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* BLMPOP where argument table */
+struct COMMAND_ARG BLMPOP_where_Subargs[] = {
+{MAKE_ARG("left",ARG_TYPE_PURE_TOKEN,-1,"LEFT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("right",ARG_TYPE_PURE_TOKEN,-1,"RIGHT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BLMPOP argument table */
+struct COMMAND_ARG BLMPOP_Args[] = {
+{MAKE_ARG("timeout",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("where",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=BLMPOP_where_Subargs},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** BLPOP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BLPOP history */
+commandHistory BLPOP_History[] = {
+{"6.0.0","`timeout` is interpreted as a double instead of an integer."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BLPOP tips */
+#define BLPOP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BLPOP key specs */
+keySpec BLPOP_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-2,1,0}}
+};
+#endif
+
+/* BLPOP argument table */
+struct COMMAND_ARG BLPOP_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("timeout",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** BRPOP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BRPOP history */
+commandHistory BRPOP_History[] = {
+{"6.0.0","`timeout` is interpreted as a double instead of an integer."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BRPOP tips */
+#define BRPOP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BRPOP key specs */
+keySpec BRPOP_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-2,1,0}}
+};
+#endif
+
+/* BRPOP argument table */
+struct COMMAND_ARG BRPOP_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("timeout",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** BRPOPLPUSH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BRPOPLPUSH history */
+commandHistory BRPOPLPUSH_History[] = {
+{"6.0.0","`timeout` is interpreted as a double instead of an integer."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BRPOPLPUSH tips */
+#define BRPOPLPUSH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BRPOPLPUSH key specs */
+keySpec BRPOPLPUSH_Keyspecs[2] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* BRPOPLPUSH argument table */
+struct COMMAND_ARG BRPOPLPUSH_Args[] = {
+{MAKE_ARG("source",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("destination",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("timeout",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** LINDEX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LINDEX history */
+#define LINDEX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LINDEX tips */
+#define LINDEX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LINDEX key specs */
+keySpec LINDEX_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LINDEX argument table */
+struct COMMAND_ARG LINDEX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** LINSERT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LINSERT history */
+#define LINSERT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LINSERT tips */
+#define LINSERT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LINSERT key specs */
+keySpec LINSERT_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LINSERT where argument table */
+struct COMMAND_ARG LINSERT_where_Subargs[] = {
+{MAKE_ARG("before",ARG_TYPE_PURE_TOKEN,-1,"BEFORE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("after",ARG_TYPE_PURE_TOKEN,-1,"AFTER",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* LINSERT argument table */
+struct COMMAND_ARG LINSERT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("where",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=LINSERT_where_Subargs},
+{MAKE_ARG("pivot",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("element",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** LLEN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LLEN history */
+#define LLEN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LLEN tips */
+#define LLEN_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LLEN key specs */
+keySpec LLEN_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LLEN argument table */
+struct COMMAND_ARG LLEN_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** LMOVE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LMOVE history */
+#define LMOVE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LMOVE tips */
+#define LMOVE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LMOVE key specs */
+keySpec LMOVE_Keyspecs[2] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LMOVE wherefrom argument table */
+struct COMMAND_ARG LMOVE_wherefrom_Subargs[] = {
+{MAKE_ARG("left",ARG_TYPE_PURE_TOKEN,-1,"LEFT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("right",ARG_TYPE_PURE_TOKEN,-1,"RIGHT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* LMOVE whereto argument table */
+struct COMMAND_ARG LMOVE_whereto_Subargs[] = {
+{MAKE_ARG("left",ARG_TYPE_PURE_TOKEN,-1,"LEFT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("right",ARG_TYPE_PURE_TOKEN,-1,"RIGHT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* LMOVE argument table */
+struct COMMAND_ARG LMOVE_Args[] = {
+{MAKE_ARG("source",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("destination",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("wherefrom",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=LMOVE_wherefrom_Subargs},
+{MAKE_ARG("whereto",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=LMOVE_whereto_Subargs},
+};
+
+/********** LMPOP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LMPOP history */
+#define LMPOP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LMPOP tips */
+#define LMPOP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LMPOP key specs */
+keySpec LMPOP_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* LMPOP where argument table */
+struct COMMAND_ARG LMPOP_where_Subargs[] = {
+{MAKE_ARG("left",ARG_TYPE_PURE_TOKEN,-1,"LEFT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("right",ARG_TYPE_PURE_TOKEN,-1,"RIGHT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* LMPOP argument table */
+struct COMMAND_ARG LMPOP_Args[] = {
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("where",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=LMPOP_where_Subargs},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** LPOP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LPOP history */
+commandHistory LPOP_History[] = {
+{"6.2.0","Added the `count` argument."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LPOP tips */
+#define LPOP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LPOP key specs */
+keySpec LPOP_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LPOP argument table */
+struct COMMAND_ARG LPOP_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** LPOS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LPOS history */
+#define LPOS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LPOS tips */
+#define LPOS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LPOS key specs */
+keySpec LPOS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LPOS argument table */
+struct COMMAND_ARG LPOS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("element",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("rank",ARG_TYPE_INTEGER,-1,"RANK",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("num-matches",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("len",ARG_TYPE_INTEGER,-1,"MAXLEN",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** LPUSH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LPUSH history */
+commandHistory LPUSH_History[] = {
+{"2.4.0","Accepts multiple `element` arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LPUSH tips */
+#define LPUSH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LPUSH key specs */
+keySpec LPUSH_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LPUSH argument table */
+struct COMMAND_ARG LPUSH_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("element",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** LPUSHX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LPUSHX history */
+commandHistory LPUSHX_History[] = {
+{"4.0.0","Accepts multiple `element` arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LPUSHX tips */
+#define LPUSHX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LPUSHX key specs */
+keySpec LPUSHX_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LPUSHX argument table */
+struct COMMAND_ARG LPUSHX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("element",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** LRANGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LRANGE history */
+#define LRANGE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LRANGE tips */
+#define LRANGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LRANGE key specs */
+keySpec LRANGE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LRANGE argument table */
+struct COMMAND_ARG LRANGE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("stop",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** LREM ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LREM history */
+#define LREM_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LREM tips */
+#define LREM_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LREM key specs */
+keySpec LREM_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LREM argument table */
+struct COMMAND_ARG LREM_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("element",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** LSET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LSET history */
+#define LSET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LSET tips */
+#define LSET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LSET key specs */
+keySpec LSET_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LSET argument table */
+struct COMMAND_ARG LSET_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("element",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** LTRIM ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LTRIM history */
+#define LTRIM_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LTRIM tips */
+#define LTRIM_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LTRIM key specs */
+keySpec LTRIM_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* LTRIM argument table */
+struct COMMAND_ARG LTRIM_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("stop",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** RPOP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* RPOP history */
+commandHistory RPOP_History[] = {
+{"6.2.0","Added the `count` argument."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* RPOP tips */
+#define RPOP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* RPOP key specs */
+keySpec RPOP_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* RPOP argument table */
+struct COMMAND_ARG RPOP_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** RPOPLPUSH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* RPOPLPUSH history */
+#define RPOPLPUSH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* RPOPLPUSH tips */
+#define RPOPLPUSH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* RPOPLPUSH key specs */
+keySpec RPOPLPUSH_Keyspecs[2] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* RPOPLPUSH argument table */
+struct COMMAND_ARG RPOPLPUSH_Args[] = {
+{MAKE_ARG("source",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("destination",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** RPUSH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* RPUSH history */
+commandHistory RPUSH_History[] = {
+{"2.4.0","Accepts multiple `element` arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* RPUSH tips */
+#define RPUSH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* RPUSH key specs */
+keySpec RPUSH_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* RPUSH argument table */
+struct COMMAND_ARG RPUSH_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("element",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** RPUSHX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* RPUSHX history */
+commandHistory RPUSHX_History[] = {
+{"4.0.0","Accepts multiple `element` arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* RPUSHX tips */
+#define RPUSHX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* RPUSHX key specs */
+keySpec RPUSHX_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* RPUSHX argument table */
+struct COMMAND_ARG RPUSHX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("element",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** PSUBSCRIBE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PSUBSCRIBE history */
+#define PSUBSCRIBE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PSUBSCRIBE tips */
+#define PSUBSCRIBE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PSUBSCRIBE key specs */
+#define PSUBSCRIBE_Keyspecs NULL
+#endif
+
+/* PSUBSCRIBE argument table */
+struct COMMAND_ARG PSUBSCRIBE_Args[] = {
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** PUBLISH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PUBLISH history */
+#define PUBLISH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PUBLISH tips */
+#define PUBLISH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PUBLISH key specs */
+#define PUBLISH_Keyspecs NULL
+#endif
+
+/* PUBLISH argument table */
+struct COMMAND_ARG PUBLISH_Args[] = {
+{MAKE_ARG("channel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** PUBSUB CHANNELS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PUBSUB CHANNELS history */
+#define PUBSUB_CHANNELS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PUBSUB CHANNELS tips */
+#define PUBSUB_CHANNELS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PUBSUB CHANNELS key specs */
+#define PUBSUB_CHANNELS_Keyspecs NULL
+#endif
+
+/* PUBSUB CHANNELS argument table */
+struct COMMAND_ARG PUBSUB_CHANNELS_Args[] = {
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** PUBSUB HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PUBSUB HELP history */
+#define PUBSUB_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PUBSUB HELP tips */
+#define PUBSUB_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PUBSUB HELP key specs */
+#define PUBSUB_HELP_Keyspecs NULL
+#endif
+
+/********** PUBSUB NUMPAT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PUBSUB NUMPAT history */
+#define PUBSUB_NUMPAT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PUBSUB NUMPAT tips */
+#define PUBSUB_NUMPAT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PUBSUB NUMPAT key specs */
+#define PUBSUB_NUMPAT_Keyspecs NULL
+#endif
+
+/********** PUBSUB NUMSUB ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PUBSUB NUMSUB history */
+#define PUBSUB_NUMSUB_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PUBSUB NUMSUB tips */
+#define PUBSUB_NUMSUB_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PUBSUB NUMSUB key specs */
+#define PUBSUB_NUMSUB_Keyspecs NULL
+#endif
+
+/* PUBSUB NUMSUB argument table */
+struct COMMAND_ARG PUBSUB_NUMSUB_Args[] = {
+{MAKE_ARG("channel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** PUBSUB SHARDCHANNELS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PUBSUB SHARDCHANNELS history */
+#define PUBSUB_SHARDCHANNELS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PUBSUB SHARDCHANNELS tips */
+#define PUBSUB_SHARDCHANNELS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PUBSUB SHARDCHANNELS key specs */
+#define PUBSUB_SHARDCHANNELS_Keyspecs NULL
+#endif
+
+/* PUBSUB SHARDCHANNELS argument table */
+struct COMMAND_ARG PUBSUB_SHARDCHANNELS_Args[] = {
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** PUBSUB SHARDNUMSUB ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PUBSUB SHARDNUMSUB history */
+#define PUBSUB_SHARDNUMSUB_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PUBSUB SHARDNUMSUB tips */
+#define PUBSUB_SHARDNUMSUB_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PUBSUB SHARDNUMSUB key specs */
+#define PUBSUB_SHARDNUMSUB_Keyspecs NULL
+#endif
+
+/* PUBSUB SHARDNUMSUB argument table */
+struct COMMAND_ARG PUBSUB_SHARDNUMSUB_Args[] = {
+{MAKE_ARG("shardchannel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* PUBSUB command table */
+struct COMMAND_STRUCT PUBSUB_Subcommands[] = {
+{MAKE_CMD("channels","Returns the active channels.","O(N) where N is the number of active channels, and assuming constant time pattern matching (relatively short channels and patterns)","2.8.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_CHANNELS_History,0,PUBSUB_CHANNELS_Tips,0,pubsubCommand,-2,CMD_PUBSUB|CMD_LOADING|CMD_STALE,0,PUBSUB_CHANNELS_Keyspecs,0,NULL,1),.args=PUBSUB_CHANNELS_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_HELP_History,0,PUBSUB_HELP_Tips,0,pubsubCommand,2,CMD_LOADING|CMD_STALE,0,PUBSUB_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("numpat","Returns a count of unique pattern subscriptions.","O(1)","2.8.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_NUMPAT_History,0,PUBSUB_NUMPAT_Tips,0,pubsubCommand,2,CMD_PUBSUB|CMD_LOADING|CMD_STALE,0,PUBSUB_NUMPAT_Keyspecs,0,NULL,0)},
+{MAKE_CMD("numsub","Returns a count of subscribers to channels.","O(N) for the NUMSUB subcommand, where N is the number of requested channels","2.8.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_NUMSUB_History,0,PUBSUB_NUMSUB_Tips,0,pubsubCommand,-2,CMD_PUBSUB|CMD_LOADING|CMD_STALE,0,PUBSUB_NUMSUB_Keyspecs,0,NULL,1),.args=PUBSUB_NUMSUB_Args},
+{MAKE_CMD("shardchannels","Returns the active shard channels.","O(N) where N is the number of active shard channels, and assuming constant time pattern matching (relatively short shard channels).","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_SHARDCHANNELS_History,0,PUBSUB_SHARDCHANNELS_Tips,0,pubsubCommand,-2,CMD_PUBSUB|CMD_LOADING|CMD_STALE,0,PUBSUB_SHARDCHANNELS_Keyspecs,0,NULL,1),.args=PUBSUB_SHARDCHANNELS_Args},
+{MAKE_CMD("shardnumsub","Returns the count of subscribers of shard channels.","O(N) for the SHARDNUMSUB subcommand, where N is the number of requested shard channels","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_SHARDNUMSUB_History,0,PUBSUB_SHARDNUMSUB_Tips,0,pubsubCommand,-2,CMD_PUBSUB|CMD_LOADING|CMD_STALE,0,PUBSUB_SHARDNUMSUB_Keyspecs,0,NULL,1),.args=PUBSUB_SHARDNUMSUB_Args},
+{0}
+};
+
+/********** PUBSUB ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PUBSUB history */
+#define PUBSUB_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PUBSUB tips */
+#define PUBSUB_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PUBSUB key specs */
+#define PUBSUB_Keyspecs NULL
+#endif
+
+/********** PUNSUBSCRIBE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PUNSUBSCRIBE history */
+#define PUNSUBSCRIBE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PUNSUBSCRIBE tips */
+#define PUNSUBSCRIBE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PUNSUBSCRIBE key specs */
+#define PUNSUBSCRIBE_Keyspecs NULL
+#endif
+
+/* PUNSUBSCRIBE argument table */
+struct COMMAND_ARG PUNSUBSCRIBE_Args[] = {
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SPUBLISH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SPUBLISH history */
+#define SPUBLISH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SPUBLISH tips */
+#define SPUBLISH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SPUBLISH key specs */
+keySpec SPUBLISH_Keyspecs[1] = {
+{NULL,CMD_KEY_NOT_KEY,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SPUBLISH argument table */
+struct COMMAND_ARG SPUBLISH_Args[] = {
+{MAKE_ARG("shardchannel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SSUBSCRIBE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SSUBSCRIBE history */
+#define SSUBSCRIBE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SSUBSCRIBE tips */
+#define SSUBSCRIBE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SSUBSCRIBE key specs */
+keySpec SSUBSCRIBE_Keyspecs[1] = {
+{NULL,CMD_KEY_NOT_KEY,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* SSUBSCRIBE argument table */
+struct COMMAND_ARG SSUBSCRIBE_Args[] = {
+{MAKE_ARG("shardchannel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SUBSCRIBE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SUBSCRIBE history */
+#define SUBSCRIBE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SUBSCRIBE tips */
+#define SUBSCRIBE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SUBSCRIBE key specs */
+#define SUBSCRIBE_Keyspecs NULL
+#endif
+
+/* SUBSCRIBE argument table */
+struct COMMAND_ARG SUBSCRIBE_Args[] = {
+{MAKE_ARG("channel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SUNSUBSCRIBE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SUNSUBSCRIBE history */
+#define SUNSUBSCRIBE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SUNSUBSCRIBE tips */
+#define SUNSUBSCRIBE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SUNSUBSCRIBE key specs */
+keySpec SUNSUBSCRIBE_Keyspecs[1] = {
+{NULL,CMD_KEY_NOT_KEY,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* SUNSUBSCRIBE argument table */
+struct COMMAND_ARG SUNSUBSCRIBE_Args[] = {
+{MAKE_ARG("shardchannel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** UNSUBSCRIBE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* UNSUBSCRIBE history */
+#define UNSUBSCRIBE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* UNSUBSCRIBE tips */
+#define UNSUBSCRIBE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* UNSUBSCRIBE key specs */
+#define UNSUBSCRIBE_Keyspecs NULL
+#endif
+
+/* UNSUBSCRIBE argument table */
+struct COMMAND_ARG UNSUBSCRIBE_Args[] = {
+{MAKE_ARG("channel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** EVAL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* EVAL history */
+#define EVAL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* EVAL tips */
+#define EVAL_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* EVAL key specs */
+keySpec EVAL_Keyspecs[1] = {
+{"We cannot tell how the keys will be used so we assume the worst, RW and UPDATE",CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* EVAL argument table */
+struct COMMAND_ARG EVAL_Args[] = {
+{MAKE_ARG("script",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("arg",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** EVALSHA ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* EVALSHA history */
+#define EVALSHA_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* EVALSHA tips */
+#define EVALSHA_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* EVALSHA key specs */
+keySpec EVALSHA_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* EVALSHA argument table */
+struct COMMAND_ARG EVALSHA_Args[] = {
+{MAKE_ARG("sha1",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("arg",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** EVALSHA_RO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* EVALSHA_RO history */
+#define EVALSHA_RO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* EVALSHA_RO tips */
+#define EVALSHA_RO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* EVALSHA_RO key specs */
+keySpec EVALSHA_RO_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* EVALSHA_RO argument table */
+struct COMMAND_ARG EVALSHA_RO_Args[] = {
+{MAKE_ARG("sha1",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("arg",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** EVAL_RO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* EVAL_RO history */
+#define EVAL_RO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* EVAL_RO tips */
+#define EVAL_RO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* EVAL_RO key specs */
+keySpec EVAL_RO_Keyspecs[1] = {
+{"We cannot tell how the keys will be used so we assume the worst, RO and ACCESS",CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* EVAL_RO argument table */
+struct COMMAND_ARG EVAL_RO_Args[] = {
+{MAKE_ARG("script",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("arg",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** FCALL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FCALL history */
+#define FCALL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FCALL tips */
+#define FCALL_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FCALL key specs */
+keySpec FCALL_Keyspecs[1] = {
+{"We cannot tell how the keys will be used so we assume the worst, RW and UPDATE",CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* FCALL argument table */
+struct COMMAND_ARG FCALL_Args[] = {
+{MAKE_ARG("function",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("arg",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** FCALL_RO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FCALL_RO history */
+#define FCALL_RO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FCALL_RO tips */
+#define FCALL_RO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FCALL_RO key specs */
+keySpec FCALL_RO_Keyspecs[1] = {
+{"We cannot tell how the keys will be used so we assume the worst, RO and ACCESS",CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* FCALL_RO argument table */
+struct COMMAND_ARG FCALL_RO_Args[] = {
+{MAKE_ARG("function",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("arg",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** FUNCTION DELETE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FUNCTION DELETE history */
+#define FUNCTION_DELETE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FUNCTION DELETE tips */
+const char *FUNCTION_DELETE_Tips[] = {
+"request_policy:all_shards",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FUNCTION DELETE key specs */
+#define FUNCTION_DELETE_Keyspecs NULL
+#endif
+
+/* FUNCTION DELETE argument table */
+struct COMMAND_ARG FUNCTION_DELETE_Args[] = {
+{MAKE_ARG("library-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** FUNCTION DUMP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FUNCTION DUMP history */
+#define FUNCTION_DUMP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FUNCTION DUMP tips */
+#define FUNCTION_DUMP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FUNCTION DUMP key specs */
+#define FUNCTION_DUMP_Keyspecs NULL
+#endif
+
+/********** FUNCTION FLUSH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FUNCTION FLUSH history */
+#define FUNCTION_FLUSH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FUNCTION FLUSH tips */
+const char *FUNCTION_FLUSH_Tips[] = {
+"request_policy:all_shards",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FUNCTION FLUSH key specs */
+#define FUNCTION_FLUSH_Keyspecs NULL
+#endif
+
+/* FUNCTION FLUSH flush_type argument table */
+struct COMMAND_ARG FUNCTION_FLUSH_flush_type_Subargs[] = {
+{MAKE_ARG("async",ARG_TYPE_PURE_TOKEN,-1,"ASYNC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("sync",ARG_TYPE_PURE_TOKEN,-1,"SYNC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* FUNCTION FLUSH argument table */
+struct COMMAND_ARG FUNCTION_FLUSH_Args[] = {
+{MAKE_ARG("flush-type",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=FUNCTION_FLUSH_flush_type_Subargs},
+};
+
+/********** FUNCTION HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FUNCTION HELP history */
+#define FUNCTION_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FUNCTION HELP tips */
+#define FUNCTION_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FUNCTION HELP key specs */
+#define FUNCTION_HELP_Keyspecs NULL
+#endif
+
+/********** FUNCTION KILL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FUNCTION KILL history */
+#define FUNCTION_KILL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FUNCTION KILL tips */
+const char *FUNCTION_KILL_Tips[] = {
+"request_policy:all_shards",
+"response_policy:one_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FUNCTION KILL key specs */
+#define FUNCTION_KILL_Keyspecs NULL
+#endif
+
+/********** FUNCTION LIST ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FUNCTION LIST history */
+#define FUNCTION_LIST_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FUNCTION LIST tips */
+const char *FUNCTION_LIST_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FUNCTION LIST key specs */
+#define FUNCTION_LIST_Keyspecs NULL
+#endif
+
+/* FUNCTION LIST argument table */
+struct COMMAND_ARG FUNCTION_LIST_Args[] = {
+{MAKE_ARG("library-name-pattern",ARG_TYPE_STRING,-1,"LIBRARYNAME",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withcode",ARG_TYPE_PURE_TOKEN,-1,"WITHCODE",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** FUNCTION LOAD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FUNCTION LOAD history */
+#define FUNCTION_LOAD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FUNCTION LOAD tips */
+const char *FUNCTION_LOAD_Tips[] = {
+"request_policy:all_shards",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FUNCTION LOAD key specs */
+#define FUNCTION_LOAD_Keyspecs NULL
+#endif
+
+/* FUNCTION LOAD argument table */
+struct COMMAND_ARG FUNCTION_LOAD_Args[] = {
+{MAKE_ARG("replace",ARG_TYPE_PURE_TOKEN,-1,"REPLACE",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("function-code",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** FUNCTION RESTORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FUNCTION RESTORE history */
+#define FUNCTION_RESTORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FUNCTION RESTORE tips */
+const char *FUNCTION_RESTORE_Tips[] = {
+"request_policy:all_shards",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FUNCTION RESTORE key specs */
+#define FUNCTION_RESTORE_Keyspecs NULL
+#endif
+
+/* FUNCTION RESTORE policy argument table */
+struct COMMAND_ARG FUNCTION_RESTORE_policy_Subargs[] = {
+{MAKE_ARG("flush",ARG_TYPE_PURE_TOKEN,-1,"FLUSH",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("append",ARG_TYPE_PURE_TOKEN,-1,"APPEND",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("replace",ARG_TYPE_PURE_TOKEN,-1,"REPLACE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* FUNCTION RESTORE argument table */
+struct COMMAND_ARG FUNCTION_RESTORE_Args[] = {
+{MAKE_ARG("serialized-value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("policy",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=FUNCTION_RESTORE_policy_Subargs},
+};
+
+/********** FUNCTION STATS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FUNCTION STATS history */
+#define FUNCTION_STATS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FUNCTION STATS tips */
+const char *FUNCTION_STATS_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_shards",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FUNCTION STATS key specs */
+#define FUNCTION_STATS_Keyspecs NULL
+#endif
+
+/* FUNCTION command table */
+struct COMMAND_STRUCT FUNCTION_Subcommands[] = {
+{MAKE_CMD("delete","Deletes a library and its functions.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FUNCTION_DELETE_History,0,FUNCTION_DELETE_Tips,2,functionDeleteCommand,3,CMD_NOSCRIPT|CMD_WRITE,ACL_CATEGORY_SCRIPTING,FUNCTION_DELETE_Keyspecs,0,NULL,1),.args=FUNCTION_DELETE_Args},
+{MAKE_CMD("dump","Dumps all libraries into a serialized binary payload.","O(N) where N is the number of functions","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FUNCTION_DUMP_History,0,FUNCTION_DUMP_Tips,0,functionDumpCommand,2,CMD_NOSCRIPT,ACL_CATEGORY_SCRIPTING,FUNCTION_DUMP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("flush","Deletes all libraries and functions.","O(N) where N is the number of functions deleted","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FUNCTION_FLUSH_History,0,FUNCTION_FLUSH_Tips,2,functionFlushCommand,-2,CMD_NOSCRIPT|CMD_WRITE,ACL_CATEGORY_SCRIPTING,FUNCTION_FLUSH_Keyspecs,0,NULL,1),.args=FUNCTION_FLUSH_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FUNCTION_HELP_History,0,FUNCTION_HELP_Tips,0,functionHelpCommand,2,CMD_LOADING|CMD_STALE,ACL_CATEGORY_SCRIPTING,FUNCTION_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("kill","Terminates a function during execution.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FUNCTION_KILL_History,0,FUNCTION_KILL_Tips,2,functionKillCommand,2,CMD_NOSCRIPT|CMD_ALLOW_BUSY,ACL_CATEGORY_SCRIPTING,FUNCTION_KILL_Keyspecs,0,NULL,0)},
+{MAKE_CMD("list","Returns information about all libraries.","O(N) where N is the number of functions","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FUNCTION_LIST_History,0,FUNCTION_LIST_Tips,1,functionListCommand,-2,CMD_NOSCRIPT,ACL_CATEGORY_SCRIPTING,FUNCTION_LIST_Keyspecs,0,NULL,2),.args=FUNCTION_LIST_Args},
+{MAKE_CMD("load","Creates a library.","O(1) (considering compilation time is redundant)","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FUNCTION_LOAD_History,0,FUNCTION_LOAD_Tips,2,functionLoadCommand,-3,CMD_NOSCRIPT|CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SCRIPTING,FUNCTION_LOAD_Keyspecs,0,NULL,2),.args=FUNCTION_LOAD_Args},
+{MAKE_CMD("restore","Restores all libraries from a payload.","O(N) where N is the number of functions on the payload","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FUNCTION_RESTORE_History,0,FUNCTION_RESTORE_Tips,2,functionRestoreCommand,-3,CMD_NOSCRIPT|CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SCRIPTING,FUNCTION_RESTORE_Keyspecs,0,NULL,2),.args=FUNCTION_RESTORE_Args},
+{MAKE_CMD("stats","Returns information about a function during execution.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FUNCTION_STATS_History,0,FUNCTION_STATS_Tips,3,functionStatsCommand,2,CMD_NOSCRIPT|CMD_ALLOW_BUSY,ACL_CATEGORY_SCRIPTING,FUNCTION_STATS_Keyspecs,0,NULL,0)},
+{0}
+};
+
+/********** FUNCTION ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FUNCTION history */
+#define FUNCTION_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FUNCTION tips */
+#define FUNCTION_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FUNCTION key specs */
+#define FUNCTION_Keyspecs NULL
+#endif
+
+/********** SCRIPT DEBUG ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SCRIPT DEBUG history */
+#define SCRIPT_DEBUG_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SCRIPT DEBUG tips */
+#define SCRIPT_DEBUG_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SCRIPT DEBUG key specs */
+#define SCRIPT_DEBUG_Keyspecs NULL
+#endif
+
+/* SCRIPT DEBUG mode argument table */
+struct COMMAND_ARG SCRIPT_DEBUG_mode_Subargs[] = {
+{MAKE_ARG("yes",ARG_TYPE_PURE_TOKEN,-1,"YES",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("sync",ARG_TYPE_PURE_TOKEN,-1,"SYNC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("no",ARG_TYPE_PURE_TOKEN,-1,"NO",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SCRIPT DEBUG argument table */
+struct COMMAND_ARG SCRIPT_DEBUG_Args[] = {
+{MAKE_ARG("mode",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,3,NULL),.subargs=SCRIPT_DEBUG_mode_Subargs},
+};
+
+/********** SCRIPT EXISTS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SCRIPT EXISTS history */
+#define SCRIPT_EXISTS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SCRIPT EXISTS tips */
+const char *SCRIPT_EXISTS_Tips[] = {
+"request_policy:all_shards",
+"response_policy:agg_logical_and",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SCRIPT EXISTS key specs */
+#define SCRIPT_EXISTS_Keyspecs NULL
+#endif
+
+/* SCRIPT EXISTS argument table */
+struct COMMAND_ARG SCRIPT_EXISTS_Args[] = {
+{MAKE_ARG("sha1",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SCRIPT FLUSH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SCRIPT FLUSH history */
+commandHistory SCRIPT_FLUSH_History[] = {
+{"6.2.0","Added the `ASYNC` and `SYNC` flushing mode modifiers."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SCRIPT FLUSH tips */
+const char *SCRIPT_FLUSH_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SCRIPT FLUSH key specs */
+#define SCRIPT_FLUSH_Keyspecs NULL
+#endif
+
+/* SCRIPT FLUSH flush_type argument table */
+struct COMMAND_ARG SCRIPT_FLUSH_flush_type_Subargs[] = {
+{MAKE_ARG("async",ARG_TYPE_PURE_TOKEN,-1,"ASYNC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("sync",ARG_TYPE_PURE_TOKEN,-1,"SYNC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SCRIPT FLUSH argument table */
+struct COMMAND_ARG SCRIPT_FLUSH_Args[] = {
+{MAKE_ARG("flush-type",ARG_TYPE_ONEOF,-1,NULL,NULL,"6.2.0",CMD_ARG_OPTIONAL,2,NULL),.subargs=SCRIPT_FLUSH_flush_type_Subargs},
+};
+
+/********** SCRIPT HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SCRIPT HELP history */
+#define SCRIPT_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SCRIPT HELP tips */
+#define SCRIPT_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SCRIPT HELP key specs */
+#define SCRIPT_HELP_Keyspecs NULL
+#endif
+
+/********** SCRIPT KILL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SCRIPT KILL history */
+#define SCRIPT_KILL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SCRIPT KILL tips */
+const char *SCRIPT_KILL_Tips[] = {
+"request_policy:all_shards",
+"response_policy:one_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SCRIPT KILL key specs */
+#define SCRIPT_KILL_Keyspecs NULL
+#endif
+
+/********** SCRIPT LOAD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SCRIPT LOAD history */
+#define SCRIPT_LOAD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SCRIPT LOAD tips */
+const char *SCRIPT_LOAD_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SCRIPT LOAD key specs */
+#define SCRIPT_LOAD_Keyspecs NULL
+#endif
+
+/* SCRIPT LOAD argument table */
+struct COMMAND_ARG SCRIPT_LOAD_Args[] = {
+{MAKE_ARG("script",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SCRIPT command table */
+struct COMMAND_STRUCT SCRIPT_Subcommands[] = {
+{MAKE_CMD("debug","Sets the debug mode of server-side Lua scripts.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_DEBUG_History,0,SCRIPT_DEBUG_Tips,0,scriptCommand,3,CMD_NOSCRIPT,ACL_CATEGORY_SCRIPTING,SCRIPT_DEBUG_Keyspecs,0,NULL,1),.args=SCRIPT_DEBUG_Args},
+{MAKE_CMD("exists","Determines whether server-side Lua scripts exist in the script cache.","O(N) with N being the number of scripts to check (so checking a single script is an O(1) operation).","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_EXISTS_History,0,SCRIPT_EXISTS_Tips,2,scriptCommand,-3,CMD_NOSCRIPT,ACL_CATEGORY_SCRIPTING,SCRIPT_EXISTS_Keyspecs,0,NULL,1),.args=SCRIPT_EXISTS_Args},
+{MAKE_CMD("flush","Removes all server-side Lua scripts from the script cache.","O(N) with N being the number of scripts in cache","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_FLUSH_History,1,SCRIPT_FLUSH_Tips,2,scriptCommand,-2,CMD_NOSCRIPT,ACL_CATEGORY_SCRIPTING,SCRIPT_FLUSH_Keyspecs,0,NULL,1),.args=SCRIPT_FLUSH_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_HELP_History,0,SCRIPT_HELP_Tips,0,scriptCommand,2,CMD_LOADING|CMD_STALE,ACL_CATEGORY_SCRIPTING,SCRIPT_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("kill","Terminates a server-side Lua script during execution.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_KILL_History,0,SCRIPT_KILL_Tips,2,scriptCommand,2,CMD_NOSCRIPT|CMD_ALLOW_BUSY,ACL_CATEGORY_SCRIPTING,SCRIPT_KILL_Keyspecs,0,NULL,0)},
+{MAKE_CMD("load","Loads a server-side Lua script to the script cache.","O(N) with N being the length in bytes of the script body.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_LOAD_History,0,SCRIPT_LOAD_Tips,2,scriptCommand,3,CMD_NOSCRIPT|CMD_STALE,ACL_CATEGORY_SCRIPTING,SCRIPT_LOAD_Keyspecs,0,NULL,1),.args=SCRIPT_LOAD_Args},
+{0}
+};
+
+/********** SCRIPT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SCRIPT history */
+#define SCRIPT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SCRIPT tips */
+#define SCRIPT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SCRIPT key specs */
+#define SCRIPT_Keyspecs NULL
+#endif
+
+/********** SENTINEL CKQUORUM ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL CKQUORUM history */
+#define SENTINEL_CKQUORUM_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL CKQUORUM tips */
+#define SENTINEL_CKQUORUM_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL CKQUORUM key specs */
+#define SENTINEL_CKQUORUM_Keyspecs NULL
+#endif
+
+/* SENTINEL CKQUORUM argument table */
+struct COMMAND_ARG SENTINEL_CKQUORUM_Args[] = {
+{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SENTINEL CONFIG ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL CONFIG history */
+commandHistory SENTINEL_CONFIG_History[] = {
+{"7.2.0","Added the ability to set and get multiple parameters in one call."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL CONFIG tips */
+#define SENTINEL_CONFIG_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL CONFIG key specs */
+#define SENTINEL_CONFIG_Keyspecs NULL
+#endif
+
+/* SENTINEL CONFIG action set argument table */
+struct COMMAND_ARG SENTINEL_CONFIG_action_set_Subargs[] = {
+{MAKE_ARG("parameter",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SENTINEL CONFIG action argument table */
+struct COMMAND_ARG SENTINEL_CONFIG_action_Subargs[] = {
+{MAKE_ARG("set",ARG_TYPE_BLOCK,-1,"SET",NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=SENTINEL_CONFIG_action_set_Subargs},
+{MAKE_ARG("parameter",ARG_TYPE_STRING,-1,"GET",NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* SENTINEL CONFIG argument table */
+struct COMMAND_ARG SENTINEL_CONFIG_Args[] = {
+{MAKE_ARG("action",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=SENTINEL_CONFIG_action_Subargs},
+};
+
+/********** SENTINEL DEBUG ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL DEBUG history */
+#define SENTINEL_DEBUG_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL DEBUG tips */
+#define SENTINEL_DEBUG_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL DEBUG key specs */
+#define SENTINEL_DEBUG_Keyspecs NULL
+#endif
+
+/* SENTINEL DEBUG data argument table */
+struct COMMAND_ARG SENTINEL_DEBUG_data_Subargs[] = {
+{MAKE_ARG("parameter",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SENTINEL DEBUG argument table */
+struct COMMAND_ARG SENTINEL_DEBUG_Args[] = {
+{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,2,NULL),.subargs=SENTINEL_DEBUG_data_Subargs},
+};
+
+/********** SENTINEL FAILOVER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL FAILOVER history */
+#define SENTINEL_FAILOVER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL FAILOVER tips */
+#define SENTINEL_FAILOVER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL FAILOVER key specs */
+#define SENTINEL_FAILOVER_Keyspecs NULL
+#endif
+
+/* SENTINEL FAILOVER argument table */
+struct COMMAND_ARG SENTINEL_FAILOVER_Args[] = {
+{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SENTINEL FLUSHCONFIG ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL FLUSHCONFIG history */
+#define SENTINEL_FLUSHCONFIG_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL FLUSHCONFIG tips */
+#define SENTINEL_FLUSHCONFIG_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL FLUSHCONFIG key specs */
+#define SENTINEL_FLUSHCONFIG_Keyspecs NULL
+#endif
+
+/********** SENTINEL GET_MASTER_ADDR_BY_NAME ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL GET_MASTER_ADDR_BY_NAME history */
+#define SENTINEL_GET_MASTER_ADDR_BY_NAME_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL GET_MASTER_ADDR_BY_NAME tips */
+#define SENTINEL_GET_MASTER_ADDR_BY_NAME_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL GET_MASTER_ADDR_BY_NAME key specs */
+#define SENTINEL_GET_MASTER_ADDR_BY_NAME_Keyspecs NULL
+#endif
+
+/* SENTINEL GET_MASTER_ADDR_BY_NAME argument table */
+struct COMMAND_ARG SENTINEL_GET_MASTER_ADDR_BY_NAME_Args[] = {
+{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SENTINEL HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL HELP history */
+#define SENTINEL_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL HELP tips */
+#define SENTINEL_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL HELP key specs */
+#define SENTINEL_HELP_Keyspecs NULL
+#endif
+
+/********** SENTINEL INFO_CACHE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL INFO_CACHE history */
+#define SENTINEL_INFO_CACHE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL INFO_CACHE tips */
+#define SENTINEL_INFO_CACHE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL INFO_CACHE key specs */
+#define SENTINEL_INFO_CACHE_Keyspecs NULL
+#endif
+
+/* SENTINEL INFO_CACHE argument table */
+struct COMMAND_ARG SENTINEL_INFO_CACHE_Args[] = {
+{MAKE_ARG("nodename",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SENTINEL IS_MASTER_DOWN_BY_ADDR ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL IS_MASTER_DOWN_BY_ADDR history */
+#define SENTINEL_IS_MASTER_DOWN_BY_ADDR_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL IS_MASTER_DOWN_BY_ADDR tips */
+#define SENTINEL_IS_MASTER_DOWN_BY_ADDR_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL IS_MASTER_DOWN_BY_ADDR key specs */
+#define SENTINEL_IS_MASTER_DOWN_BY_ADDR_Keyspecs NULL
+#endif
+
+/* SENTINEL IS_MASTER_DOWN_BY_ADDR argument table */
+struct COMMAND_ARG SENTINEL_IS_MASTER_DOWN_BY_ADDR_Args[] = {
+{MAKE_ARG("ip",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("port",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("current-epoch",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("runid",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SENTINEL MASTER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL MASTER history */
+#define SENTINEL_MASTER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL MASTER tips */
+#define SENTINEL_MASTER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL MASTER key specs */
+#define SENTINEL_MASTER_Keyspecs NULL
+#endif
+
+/* SENTINEL MASTER argument table */
+struct COMMAND_ARG SENTINEL_MASTER_Args[] = {
+{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SENTINEL MASTERS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL MASTERS history */
+#define SENTINEL_MASTERS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL MASTERS tips */
+#define SENTINEL_MASTERS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL MASTERS key specs */
+#define SENTINEL_MASTERS_Keyspecs NULL
+#endif
+
+/********** SENTINEL MONITOR ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL MONITOR history */
+#define SENTINEL_MONITOR_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL MONITOR tips */
+#define SENTINEL_MONITOR_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL MONITOR key specs */
+#define SENTINEL_MONITOR_Keyspecs NULL
+#endif
+
+/* SENTINEL MONITOR argument table */
+struct COMMAND_ARG SENTINEL_MONITOR_Args[] = {
+{MAKE_ARG("name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ip",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("port",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("quorum",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SENTINEL MYID ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL MYID history */
+#define SENTINEL_MYID_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL MYID tips */
+#define SENTINEL_MYID_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL MYID key specs */
+#define SENTINEL_MYID_Keyspecs NULL
+#endif
+
+/********** SENTINEL PENDING_SCRIPTS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL PENDING_SCRIPTS history */
+#define SENTINEL_PENDING_SCRIPTS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL PENDING_SCRIPTS tips */
+#define SENTINEL_PENDING_SCRIPTS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL PENDING_SCRIPTS key specs */
+#define SENTINEL_PENDING_SCRIPTS_Keyspecs NULL
+#endif
+
+/********** SENTINEL REMOVE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL REMOVE history */
+#define SENTINEL_REMOVE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL REMOVE tips */
+#define SENTINEL_REMOVE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL REMOVE key specs */
+#define SENTINEL_REMOVE_Keyspecs NULL
+#endif
+
+/* SENTINEL REMOVE argument table */
+struct COMMAND_ARG SENTINEL_REMOVE_Args[] = {
+{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SENTINEL REPLICAS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL REPLICAS history */
+#define SENTINEL_REPLICAS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL REPLICAS tips */
+#define SENTINEL_REPLICAS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL REPLICAS key specs */
+#define SENTINEL_REPLICAS_Keyspecs NULL
+#endif
+
+/* SENTINEL REPLICAS argument table */
+struct COMMAND_ARG SENTINEL_REPLICAS_Args[] = {
+{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SENTINEL RESET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL RESET history */
+#define SENTINEL_RESET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL RESET tips */
+#define SENTINEL_RESET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL RESET key specs */
+#define SENTINEL_RESET_Keyspecs NULL
+#endif
+
+/* SENTINEL RESET argument table */
+struct COMMAND_ARG SENTINEL_RESET_Args[] = {
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SENTINEL SENTINELS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL SENTINELS history */
+#define SENTINEL_SENTINELS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL SENTINELS tips */
+#define SENTINEL_SENTINELS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL SENTINELS key specs */
+#define SENTINEL_SENTINELS_Keyspecs NULL
+#endif
+
+/* SENTINEL SENTINELS argument table */
+struct COMMAND_ARG SENTINEL_SENTINELS_Args[] = {
+{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SENTINEL SET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL SET history */
+#define SENTINEL_SET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL SET tips */
+#define SENTINEL_SET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL SET key specs */
+#define SENTINEL_SET_Keyspecs NULL
+#endif
+
+/* SENTINEL SET data argument table */
+struct COMMAND_ARG SENTINEL_SET_data_Subargs[] = {
+{MAKE_ARG("option",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SENTINEL SET argument table */
+struct COMMAND_ARG SENTINEL_SET_Args[] = {
+{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=SENTINEL_SET_data_Subargs},
+};
+
+/********** SENTINEL SIMULATE_FAILURE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL SIMULATE_FAILURE history */
+#define SENTINEL_SIMULATE_FAILURE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL SIMULATE_FAILURE tips */
+#define SENTINEL_SIMULATE_FAILURE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL SIMULATE_FAILURE key specs */
+#define SENTINEL_SIMULATE_FAILURE_Keyspecs NULL
+#endif
+
+/* SENTINEL SIMULATE_FAILURE mode argument table */
+struct COMMAND_ARG SENTINEL_SIMULATE_FAILURE_mode_Subargs[] = {
+{MAKE_ARG("crash-after-election",ARG_TYPE_PURE_TOKEN,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("crash-after-promotion",ARG_TYPE_PURE_TOKEN,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("help",ARG_TYPE_PURE_TOKEN,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SENTINEL SIMULATE_FAILURE argument table */
+struct COMMAND_ARG SENTINEL_SIMULATE_FAILURE_Args[] = {
+{MAKE_ARG("mode",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,3,NULL),.subargs=SENTINEL_SIMULATE_FAILURE_mode_Subargs},
+};
+
+/********** SENTINEL SLAVES ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL SLAVES history */
+#define SENTINEL_SLAVES_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL SLAVES tips */
+#define SENTINEL_SLAVES_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL SLAVES key specs */
+#define SENTINEL_SLAVES_Keyspecs NULL
+#endif
+
+/* SENTINEL SLAVES argument table */
+struct COMMAND_ARG SENTINEL_SLAVES_Args[] = {
+{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SENTINEL command table */
+struct COMMAND_STRUCT SENTINEL_Subcommands[] = {
+{MAKE_CMD("ckquorum","Checks for a Redis Sentinel quorum.",NULL,"2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_CKQUORUM_History,0,SENTINEL_CKQUORUM_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_CKQUORUM_Keyspecs,0,NULL,1),.args=SENTINEL_CKQUORUM_Args},
+{MAKE_CMD("config","Configures Redis Sentinel.","O(N) when N is the number of configuration parameters provided","6.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_CONFIG_History,1,SENTINEL_CONFIG_Tips,0,sentinelCommand,-4,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_CONFIG_Keyspecs,0,NULL,1),.args=SENTINEL_CONFIG_Args},
+{MAKE_CMD("debug","Lists or updates the current configurable parameters of Redis Sentinel.","O(N) where N is the number of configurable parameters","7.0.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_DEBUG_History,0,SENTINEL_DEBUG_Tips,0,sentinelCommand,-2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_DEBUG_Keyspecs,0,NULL,1),.args=SENTINEL_DEBUG_Args},
+{MAKE_CMD("failover","Forces a Redis Sentinel failover.",NULL,"2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_FAILOVER_History,0,SENTINEL_FAILOVER_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_FAILOVER_Keyspecs,0,NULL,1),.args=SENTINEL_FAILOVER_Args},
+{MAKE_CMD("flushconfig","Rewrites the Redis Sentinel configuration file.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_FLUSHCONFIG_History,0,SENTINEL_FLUSHCONFIG_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_FLUSHCONFIG_Keyspecs,0,NULL,0)},
+{MAKE_CMD("get-master-addr-by-name","Returns the port and address of a master Redis instance.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_GET_MASTER_ADDR_BY_NAME_History,0,SENTINEL_GET_MASTER_ADDR_BY_NAME_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_GET_MASTER_ADDR_BY_NAME_Keyspecs,0,NULL,1),.args=SENTINEL_GET_MASTER_ADDR_BY_NAME_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_HELP_History,0,SENTINEL_HELP_Tips,0,sentinelCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("info-cache","Returns the cached `INFO` replies from the deployment's instances.","O(N) where N is the number of instances","3.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_INFO_CACHE_History,0,SENTINEL_INFO_CACHE_Tips,0,sentinelCommand,-3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_INFO_CACHE_Keyspecs,0,NULL,1),.args=SENTINEL_INFO_CACHE_Args},
+{MAKE_CMD("is-master-down-by-addr","Determines whether a master Redis instance is down.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_IS_MASTER_DOWN_BY_ADDR_History,0,SENTINEL_IS_MASTER_DOWN_BY_ADDR_Tips,0,sentinelCommand,6,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_IS_MASTER_DOWN_BY_ADDR_Keyspecs,0,NULL,4),.args=SENTINEL_IS_MASTER_DOWN_BY_ADDR_Args},
+{MAKE_CMD("master","Returns the state of a master Redis instance.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MASTER_History,0,SENTINEL_MASTER_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MASTER_Keyspecs,0,NULL,1),.args=SENTINEL_MASTER_Args},
+{MAKE_CMD("masters","Returns a list of monitored Redis masters.","O(N) where N is the number of masters","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MASTERS_History,0,SENTINEL_MASTERS_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MASTERS_Keyspecs,0,NULL,0)},
+{MAKE_CMD("monitor","Starts monitoring.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MONITOR_History,0,SENTINEL_MONITOR_Tips,0,sentinelCommand,6,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MONITOR_Keyspecs,0,NULL,4),.args=SENTINEL_MONITOR_Args},
+{MAKE_CMD("myid","Returns the Redis Sentinel instance ID.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MYID_History,0,SENTINEL_MYID_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MYID_Keyspecs,0,NULL,0)},
+{MAKE_CMD("pending-scripts","Returns information about pending scripts for Redis Sentinel.",NULL,"2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_PENDING_SCRIPTS_History,0,SENTINEL_PENDING_SCRIPTS_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_PENDING_SCRIPTS_Keyspecs,0,NULL,0)},
+{MAKE_CMD("remove","Stops monitoring.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_REMOVE_History,0,SENTINEL_REMOVE_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_REMOVE_Keyspecs,0,NULL,1),.args=SENTINEL_REMOVE_Args},
+{MAKE_CMD("replicas","Returns a list of the monitored Redis replicas.","O(N) where N is the number of replicas","5.0.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_REPLICAS_History,0,SENTINEL_REPLICAS_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_REPLICAS_Keyspecs,0,NULL,1),.args=SENTINEL_REPLICAS_Args},
+{MAKE_CMD("reset","Resets Redis masters by name matching a pattern.","O(N) where N is the number of monitored masters","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_RESET_History,0,SENTINEL_RESET_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_RESET_Keyspecs,0,NULL,1),.args=SENTINEL_RESET_Args},
+{MAKE_CMD("sentinels","Returns a list of Sentinel instances.","O(N) where N is the number of Sentinels","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SENTINELS_History,0,SENTINEL_SENTINELS_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SENTINELS_Keyspecs,0,NULL,1),.args=SENTINEL_SENTINELS_Args},
+{MAKE_CMD("set","Changes the configuration of a monitored Redis master.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SET_History,0,SENTINEL_SET_Tips,0,sentinelCommand,-5,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SET_Keyspecs,0,NULL,2),.args=SENTINEL_SET_Args},
+{MAKE_CMD("simulate-failure","Simulates failover scenarios.",NULL,"3.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SIMULATE_FAILURE_History,0,SENTINEL_SIMULATE_FAILURE_Tips,0,sentinelCommand,-3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SIMULATE_FAILURE_Keyspecs,0,NULL,1),.args=SENTINEL_SIMULATE_FAILURE_Args},
+{MAKE_CMD("slaves","Returns a list of the monitored replicas.","O(N) where N is the number of replicas.","2.8.0",CMD_DOC_DEPRECATED,"`SENTINEL REPLICAS`","5.0.0","sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SLAVES_History,0,SENTINEL_SLAVES_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SLAVES_Keyspecs,0,NULL,1),.args=SENTINEL_SLAVES_Args},
+{0}
+};
+
+/********** SENTINEL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SENTINEL history */
+#define SENTINEL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SENTINEL tips */
+#define SENTINEL_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SENTINEL key specs */
+#define SENTINEL_Keyspecs NULL
+#endif
+
+/********** ACL CAT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL CAT history */
+#define ACL_CAT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL CAT tips */
+#define ACL_CAT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL CAT key specs */
+#define ACL_CAT_Keyspecs NULL
+#endif
+
+/* ACL CAT argument table */
+struct COMMAND_ARG ACL_CAT_Args[] = {
+{MAKE_ARG("category",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ACL DELUSER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL DELUSER history */
+#define ACL_DELUSER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL DELUSER tips */
+const char *ACL_DELUSER_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL DELUSER key specs */
+#define ACL_DELUSER_Keyspecs NULL
+#endif
+
+/* ACL DELUSER argument table */
+struct COMMAND_ARG ACL_DELUSER_Args[] = {
+{MAKE_ARG("username",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** ACL DRYRUN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL DRYRUN history */
+#define ACL_DRYRUN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL DRYRUN tips */
+#define ACL_DRYRUN_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL DRYRUN key specs */
+#define ACL_DRYRUN_Keyspecs NULL
+#endif
+
+/* ACL DRYRUN argument table */
+struct COMMAND_ARG ACL_DRYRUN_Args[] = {
+{MAKE_ARG("username",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("command",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("arg",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** ACL GENPASS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL GENPASS history */
+#define ACL_GENPASS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL GENPASS tips */
+#define ACL_GENPASS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL GENPASS key specs */
+#define ACL_GENPASS_Keyspecs NULL
+#endif
+
+/* ACL GENPASS argument table */
+struct COMMAND_ARG ACL_GENPASS_Args[] = {
+{MAKE_ARG("bits",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ACL GETUSER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL GETUSER history */
+commandHistory ACL_GETUSER_History[] = {
+{"6.2.0","Added Pub/Sub channel patterns."},
+{"7.0.0","Added selectors and changed the format of key and channel patterns from a list to their rule representation."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL GETUSER tips */
+#define ACL_GETUSER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL GETUSER key specs */
+#define ACL_GETUSER_Keyspecs NULL
+#endif
+
+/* ACL GETUSER argument table */
+struct COMMAND_ARG ACL_GETUSER_Args[] = {
+{MAKE_ARG("username",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ACL HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL HELP history */
+#define ACL_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL HELP tips */
+#define ACL_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL HELP key specs */
+#define ACL_HELP_Keyspecs NULL
+#endif
+
+/********** ACL LIST ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL LIST history */
+#define ACL_LIST_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL LIST tips */
+#define ACL_LIST_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL LIST key specs */
+#define ACL_LIST_Keyspecs NULL
+#endif
+
+/********** ACL LOAD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL LOAD history */
+#define ACL_LOAD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL LOAD tips */
+#define ACL_LOAD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL LOAD key specs */
+#define ACL_LOAD_Keyspecs NULL
+#endif
+
+/********** ACL LOG ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL LOG history */
+commandHistory ACL_LOG_History[] = {
+{"7.2.0","Added entry ID, timestamp created, and timestamp last updated."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL LOG tips */
+#define ACL_LOG_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL LOG key specs */
+#define ACL_LOG_Keyspecs NULL
+#endif
+
+/* ACL LOG operation argument table */
+struct COMMAND_ARG ACL_LOG_operation_Subargs[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("reset",ARG_TYPE_PURE_TOKEN,-1,"RESET",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ACL LOG argument table */
+struct COMMAND_ARG ACL_LOG_Args[] = {
+{MAKE_ARG("operation",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=ACL_LOG_operation_Subargs},
+};
+
+/********** ACL SAVE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL SAVE history */
+#define ACL_SAVE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL SAVE tips */
+const char *ACL_SAVE_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL SAVE key specs */
+#define ACL_SAVE_Keyspecs NULL
+#endif
+
+/********** ACL SETUSER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL SETUSER history */
+commandHistory ACL_SETUSER_History[] = {
+{"6.2.0","Added Pub/Sub channel patterns."},
+{"7.0.0","Added selectors and key based permissions."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL SETUSER tips */
+const char *ACL_SETUSER_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL SETUSER key specs */
+#define ACL_SETUSER_Keyspecs NULL
+#endif
+
+/* ACL SETUSER argument table */
+struct COMMAND_ARG ACL_SETUSER_Args[] = {
+{MAKE_ARG("username",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("rule",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** ACL USERS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL USERS history */
+#define ACL_USERS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL USERS tips */
+#define ACL_USERS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL USERS key specs */
+#define ACL_USERS_Keyspecs NULL
+#endif
+
+/********** ACL WHOAMI ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL WHOAMI history */
+#define ACL_WHOAMI_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL WHOAMI tips */
+#define ACL_WHOAMI_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL WHOAMI key specs */
+#define ACL_WHOAMI_Keyspecs NULL
+#endif
+
+/* ACL command table */
+struct COMMAND_STRUCT ACL_Subcommands[] = {
+{MAKE_CMD("cat","Lists the ACL categories, or the commands inside a category.","O(1) since the categories and commands are a fixed set.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_CAT_History,0,ACL_CAT_Tips,0,aclCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_CAT_Keyspecs,0,NULL,1),.args=ACL_CAT_Args},
+{MAKE_CMD("deluser","Deletes ACL users, and terminates their connections.","O(1) amortized time considering the typical user.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_DELUSER_History,0,ACL_DELUSER_Tips,2,aclCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_DELUSER_Keyspecs,0,NULL,1),.args=ACL_DELUSER_Args},
+{MAKE_CMD("dryrun","Simulates the execution of a command by a user, without executing the command.","O(1).","7.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_DRYRUN_History,0,ACL_DRYRUN_Tips,0,aclCommand,-4,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_DRYRUN_Keyspecs,0,NULL,3),.args=ACL_DRYRUN_Args},
+{MAKE_CMD("genpass","Generates a pseudorandom, secure password that can be used to identify ACL users.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_GENPASS_History,0,ACL_GENPASS_Tips,0,aclCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_GENPASS_Keyspecs,0,NULL,1),.args=ACL_GENPASS_Args},
+{MAKE_CMD("getuser","Lists the ACL rules of a user.","O(N). Where N is the number of password, command and pattern rules that the user has.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_GETUSER_History,2,ACL_GETUSER_Tips,0,aclCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_GETUSER_Keyspecs,0,NULL,1),.args=ACL_GETUSER_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_HELP_History,0,ACL_HELP_Tips,0,aclCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("list","Dumps the effective rules in ACL file format.","O(N). Where N is the number of configured users.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_LIST_History,0,ACL_LIST_Tips,0,aclCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_LIST_Keyspecs,0,NULL,0)},
+{MAKE_CMD("load","Reloads the rules from the configured ACL file.","O(N). Where N is the number of configured users.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_LOAD_History,0,ACL_LOAD_Tips,0,aclCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_LOAD_Keyspecs,0,NULL,0)},
+{MAKE_CMD("log","Lists recent security events generated due to ACL rules.","O(N) with N being the number of entries shown.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_LOG_History,1,ACL_LOG_Tips,0,aclCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_LOG_Keyspecs,0,NULL,1),.args=ACL_LOG_Args},
+{MAKE_CMD("save","Saves the effective ACL rules in the configured ACL file.","O(N). Where N is the number of configured users.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_SAVE_History,0,ACL_SAVE_Tips,2,aclCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_SAVE_Keyspecs,0,NULL,0)},
+{MAKE_CMD("setuser","Creates and modifies an ACL user and its rules.","O(N). Where N is the number of rules provided.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_SETUSER_History,2,ACL_SETUSER_Tips,2,aclCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_SETUSER_Keyspecs,0,NULL,2),.args=ACL_SETUSER_Args},
+{MAKE_CMD("users","Lists all ACL users.","O(N). Where N is the number of configured users.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_USERS_History,0,ACL_USERS_Tips,0,aclCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_USERS_Keyspecs,0,NULL,0)},
+{MAKE_CMD("whoami","Returns the authenticated username of the current connection.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_WHOAMI_History,0,ACL_WHOAMI_Tips,0,aclCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_WHOAMI_Keyspecs,0,NULL,0)},
+{0}
+};
+
+/********** ACL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ACL history */
+#define ACL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ACL tips */
+#define ACL_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ACL key specs */
+#define ACL_Keyspecs NULL
+#endif
+
+/********** BGREWRITEAOF ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BGREWRITEAOF history */
+#define BGREWRITEAOF_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BGREWRITEAOF tips */
+#define BGREWRITEAOF_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BGREWRITEAOF key specs */
+#define BGREWRITEAOF_Keyspecs NULL
+#endif
+
+/********** BGSAVE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BGSAVE history */
+commandHistory BGSAVE_History[] = {
+{"3.2.2","Added the `SCHEDULE` option."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BGSAVE tips */
+#define BGSAVE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BGSAVE key specs */
+#define BGSAVE_Keyspecs NULL
+#endif
+
+/* BGSAVE argument table */
+struct COMMAND_ARG BGSAVE_Args[] = {
+{MAKE_ARG("schedule",ARG_TYPE_PURE_TOKEN,-1,"SCHEDULE",NULL,"3.2.2",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** COMMAND COUNT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* COMMAND COUNT history */
+#define COMMAND_COUNT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* COMMAND COUNT tips */
+#define COMMAND_COUNT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* COMMAND COUNT key specs */
+#define COMMAND_COUNT_Keyspecs NULL
+#endif
+
+/********** COMMAND DOCS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* COMMAND DOCS history */
+#define COMMAND_DOCS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* COMMAND DOCS tips */
+const char *COMMAND_DOCS_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* COMMAND DOCS key specs */
+#define COMMAND_DOCS_Keyspecs NULL
+#endif
+
+/* COMMAND DOCS argument table */
+struct COMMAND_ARG COMMAND_DOCS_Args[] = {
+{MAKE_ARG("command-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** COMMAND GETKEYS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* COMMAND GETKEYS history */
+#define COMMAND_GETKEYS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* COMMAND GETKEYS tips */
+#define COMMAND_GETKEYS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* COMMAND GETKEYS key specs */
+#define COMMAND_GETKEYS_Keyspecs NULL
+#endif
+
+/* COMMAND GETKEYS argument table */
+struct COMMAND_ARG COMMAND_GETKEYS_Args[] = {
+{MAKE_ARG("command",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("arg",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** COMMAND GETKEYSANDFLAGS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* COMMAND GETKEYSANDFLAGS history */
+#define COMMAND_GETKEYSANDFLAGS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* COMMAND GETKEYSANDFLAGS tips */
+#define COMMAND_GETKEYSANDFLAGS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* COMMAND GETKEYSANDFLAGS key specs */
+#define COMMAND_GETKEYSANDFLAGS_Keyspecs NULL
+#endif
+
+/* COMMAND GETKEYSANDFLAGS argument table */
+struct COMMAND_ARG COMMAND_GETKEYSANDFLAGS_Args[] = {
+{MAKE_ARG("command",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("arg",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** COMMAND HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* COMMAND HELP history */
+#define COMMAND_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* COMMAND HELP tips */
+#define COMMAND_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* COMMAND HELP key specs */
+#define COMMAND_HELP_Keyspecs NULL
+#endif
+
+/********** COMMAND INFO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* COMMAND INFO history */
+commandHistory COMMAND_INFO_History[] = {
+{"7.0.0","Allowed to be called with no argument to get info on all commands."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* COMMAND INFO tips */
+const char *COMMAND_INFO_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* COMMAND INFO key specs */
+#define COMMAND_INFO_Keyspecs NULL
+#endif
+
+/* COMMAND INFO argument table */
+struct COMMAND_ARG COMMAND_INFO_Args[] = {
+{MAKE_ARG("command-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** COMMAND LIST ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* COMMAND LIST history */
+#define COMMAND_LIST_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* COMMAND LIST tips */
+const char *COMMAND_LIST_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* COMMAND LIST key specs */
+#define COMMAND_LIST_Keyspecs NULL
+#endif
+
+/* COMMAND LIST filterby argument table */
+struct COMMAND_ARG COMMAND_LIST_filterby_Subargs[] = {
+{MAKE_ARG("module-name",ARG_TYPE_STRING,-1,"MODULE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("category",ARG_TYPE_STRING,-1,"ACLCAT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,"PATTERN",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* COMMAND LIST argument table */
+struct COMMAND_ARG COMMAND_LIST_Args[] = {
+{MAKE_ARG("filterby",ARG_TYPE_ONEOF,-1,"FILTERBY",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=COMMAND_LIST_filterby_Subargs},
+};
+
+/* COMMAND command table */
+struct COMMAND_STRUCT COMMAND_Subcommands[] = {
+{MAKE_CMD("count","Returns a count of commands.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,COMMAND_COUNT_History,0,COMMAND_COUNT_Tips,0,commandCountCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,COMMAND_COUNT_Keyspecs,0,NULL,0)},
+{MAKE_CMD("docs","Returns documentary information about one, multiple or all commands.","O(N) where N is the number of commands to look up","7.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,COMMAND_DOCS_History,0,COMMAND_DOCS_Tips,1,commandDocsCommand,-2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,COMMAND_DOCS_Keyspecs,0,NULL,1),.args=COMMAND_DOCS_Args},
+{MAKE_CMD("getkeys","Extracts the key names from an arbitrary command.","O(N) where N is the number of arguments to the command","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,COMMAND_GETKEYS_History,0,COMMAND_GETKEYS_Tips,0,commandGetKeysCommand,-3,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,COMMAND_GETKEYS_Keyspecs,0,NULL,2),.args=COMMAND_GETKEYS_Args},
+{MAKE_CMD("getkeysandflags","Extracts the key names and access flags for an arbitrary command.","O(N) where N is the number of arguments to the command","7.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,COMMAND_GETKEYSANDFLAGS_History,0,COMMAND_GETKEYSANDFLAGS_Tips,0,commandGetKeysAndFlagsCommand,-3,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,COMMAND_GETKEYSANDFLAGS_Keyspecs,0,NULL,2),.args=COMMAND_GETKEYSANDFLAGS_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,COMMAND_HELP_History,0,COMMAND_HELP_Tips,0,commandHelpCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,COMMAND_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("info","Returns information about one, multiple or all commands.","O(N) where N is the number of commands to look up","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,COMMAND_INFO_History,1,COMMAND_INFO_Tips,1,commandInfoCommand,-2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,COMMAND_INFO_Keyspecs,0,NULL,1),.args=COMMAND_INFO_Args},
+{MAKE_CMD("list","Returns a list of command names.","O(N) where N is the total number of Redis commands","7.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,COMMAND_LIST_History,0,COMMAND_LIST_Tips,1,commandListCommand,-2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,COMMAND_LIST_Keyspecs,0,NULL,1),.args=COMMAND_LIST_Args},
+{0}
+};
+
+/********** COMMAND ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* COMMAND history */
+#define COMMAND_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* COMMAND tips */
+const char *COMMAND_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* COMMAND key specs */
+#define COMMAND_Keyspecs NULL
+#endif
+
+/********** CONFIG GET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CONFIG GET history */
+commandHistory CONFIG_GET_History[] = {
+{"7.0.0","Added the ability to pass multiple pattern parameters in one call"},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CONFIG GET tips */
+#define CONFIG_GET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CONFIG GET key specs */
+#define CONFIG_GET_Keyspecs NULL
+#endif
+
+/* CONFIG GET argument table */
+struct COMMAND_ARG CONFIG_GET_Args[] = {
+{MAKE_ARG("parameter",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** CONFIG HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CONFIG HELP history */
+#define CONFIG_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CONFIG HELP tips */
+#define CONFIG_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CONFIG HELP key specs */
+#define CONFIG_HELP_Keyspecs NULL
+#endif
+
+/********** CONFIG RESETSTAT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CONFIG RESETSTAT history */
+#define CONFIG_RESETSTAT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CONFIG RESETSTAT tips */
+const char *CONFIG_RESETSTAT_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CONFIG RESETSTAT key specs */
+#define CONFIG_RESETSTAT_Keyspecs NULL
+#endif
+
+/********** CONFIG REWRITE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CONFIG REWRITE history */
+#define CONFIG_REWRITE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CONFIG REWRITE tips */
+const char *CONFIG_REWRITE_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CONFIG REWRITE key specs */
+#define CONFIG_REWRITE_Keyspecs NULL
+#endif
+
+/********** CONFIG SET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CONFIG SET history */
+commandHistory CONFIG_SET_History[] = {
+{"7.0.0","Added the ability to set multiple parameters in one call."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CONFIG SET tips */
+const char *CONFIG_SET_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CONFIG SET key specs */
+#define CONFIG_SET_Keyspecs NULL
+#endif
+
+/* CONFIG SET data argument table */
+struct COMMAND_ARG CONFIG_SET_data_Subargs[] = {
+{MAKE_ARG("parameter",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CONFIG SET argument table */
+struct COMMAND_ARG CONFIG_SET_Args[] = {
+{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=CONFIG_SET_data_Subargs},
+};
+
+/* CONFIG command table */
+struct COMMAND_STRUCT CONFIG_Subcommands[] = {
+{MAKE_CMD("get","Returns the effective values of configuration parameters.","O(N) when N is the number of configuration parameters provided","2.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_GET_History,1,CONFIG_GET_Tips,0,configGetCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,CONFIG_GET_Keyspecs,0,NULL,1),.args=CONFIG_GET_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_HELP_History,0,CONFIG_HELP_Tips,0,configHelpCommand,2,CMD_LOADING|CMD_STALE,0,CONFIG_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("resetstat","Resets the server's statistics.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_RESETSTAT_History,0,CONFIG_RESETSTAT_Tips,2,configResetStatCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,CONFIG_RESETSTAT_Keyspecs,0,NULL,0)},
+{MAKE_CMD("rewrite","Persists the effective configuration to file.","O(1)","2.8.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_REWRITE_History,0,CONFIG_REWRITE_Tips,2,configRewriteCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,CONFIG_REWRITE_Keyspecs,0,NULL,0)},
+{MAKE_CMD("set","Sets configuration parameters in-flight.","O(N) when N is the number of configuration parameters provided","2.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_SET_History,1,CONFIG_SET_Tips,2,configSetCommand,-4,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,CONFIG_SET_Keyspecs,0,NULL,1),.args=CONFIG_SET_Args},
+{0}
+};
+
+/********** CONFIG ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CONFIG history */
+#define CONFIG_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CONFIG tips */
+#define CONFIG_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CONFIG key specs */
+#define CONFIG_Keyspecs NULL
+#endif
+
+/********** DBSIZE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* DBSIZE history */
+#define DBSIZE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* DBSIZE tips */
+const char *DBSIZE_Tips[] = {
+"request_policy:all_shards",
+"response_policy:agg_sum",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* DBSIZE key specs */
+#define DBSIZE_Keyspecs NULL
+#endif
+
+/********** DEBUG ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* DEBUG history */
+#define DEBUG_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* DEBUG tips */
+#define DEBUG_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* DEBUG key specs */
+#define DEBUG_Keyspecs NULL
+#endif
+
+/********** FAILOVER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FAILOVER history */
+#define FAILOVER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FAILOVER tips */
+#define FAILOVER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FAILOVER key specs */
+#define FAILOVER_Keyspecs NULL
+#endif
+
+/* FAILOVER target argument table */
+struct COMMAND_ARG FAILOVER_target_Subargs[] = {
+{MAKE_ARG("host",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("port",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("force",ARG_TYPE_PURE_TOKEN,-1,"FORCE",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* FAILOVER argument table */
+struct COMMAND_ARG FAILOVER_Args[] = {
+{MAKE_ARG("target",ARG_TYPE_BLOCK,-1,"TO",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=FAILOVER_target_Subargs},
+{MAKE_ARG("abort",ARG_TYPE_PURE_TOKEN,-1,"ABORT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,"TIMEOUT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** FLUSHALL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FLUSHALL history */
+commandHistory FLUSHALL_History[] = {
+{"4.0.0","Added the `ASYNC` flushing mode modifier."},
+{"6.2.0","Added the `SYNC` flushing mode modifier."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FLUSHALL tips */
+const char *FLUSHALL_Tips[] = {
+"request_policy:all_shards",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FLUSHALL key specs */
+#define FLUSHALL_Keyspecs NULL
+#endif
+
+/* FLUSHALL flush_type argument table */
+struct COMMAND_ARG FLUSHALL_flush_type_Subargs[] = {
+{MAKE_ARG("async",ARG_TYPE_PURE_TOKEN,-1,"ASYNC",NULL,"4.0.0",CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("sync",ARG_TYPE_PURE_TOKEN,-1,"SYNC",NULL,"6.2.0",CMD_ARG_NONE,0,NULL)},
+};
+
+/* FLUSHALL argument table */
+struct COMMAND_ARG FLUSHALL_Args[] = {
+{MAKE_ARG("flush-type",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=FLUSHALL_flush_type_Subargs},
+};
+
+/********** FLUSHDB ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* FLUSHDB history */
+commandHistory FLUSHDB_History[] = {
+{"4.0.0","Added the `ASYNC` flushing mode modifier."},
+{"6.2.0","Added the `SYNC` flushing mode modifier."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* FLUSHDB tips */
+const char *FLUSHDB_Tips[] = {
+"request_policy:all_shards",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* FLUSHDB key specs */
+#define FLUSHDB_Keyspecs NULL
+#endif
+
+/* FLUSHDB flush_type argument table */
+struct COMMAND_ARG FLUSHDB_flush_type_Subargs[] = {
+{MAKE_ARG("async",ARG_TYPE_PURE_TOKEN,-1,"ASYNC",NULL,"4.0.0",CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("sync",ARG_TYPE_PURE_TOKEN,-1,"SYNC",NULL,"6.2.0",CMD_ARG_NONE,0,NULL)},
+};
+
+/* FLUSHDB argument table */
+struct COMMAND_ARG FLUSHDB_Args[] = {
+{MAKE_ARG("flush-type",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=FLUSHDB_flush_type_Subargs},
+};
+
+/********** INFO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* INFO history */
+commandHistory INFO_History[] = {
+{"7.0.0","Added support for taking multiple section arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* INFO tips */
+const char *INFO_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_shards",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* INFO key specs */
+#define INFO_Keyspecs NULL
+#endif
+
+/* INFO argument table */
+struct COMMAND_ARG INFO_Args[] = {
+{MAKE_ARG("section",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** LASTSAVE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LASTSAVE history */
+#define LASTSAVE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LASTSAVE tips */
+const char *LASTSAVE_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LASTSAVE key specs */
+#define LASTSAVE_Keyspecs NULL
+#endif
+
+/********** LATENCY DOCTOR ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LATENCY DOCTOR history */
+#define LATENCY_DOCTOR_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LATENCY DOCTOR tips */
+const char *LATENCY_DOCTOR_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_nodes",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LATENCY DOCTOR key specs */
+#define LATENCY_DOCTOR_Keyspecs NULL
+#endif
+
+/********** LATENCY GRAPH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LATENCY GRAPH history */
+#define LATENCY_GRAPH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LATENCY GRAPH tips */
+const char *LATENCY_GRAPH_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_nodes",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LATENCY GRAPH key specs */
+#define LATENCY_GRAPH_Keyspecs NULL
+#endif
+
+/* LATENCY GRAPH argument table */
+struct COMMAND_ARG LATENCY_GRAPH_Args[] = {
+{MAKE_ARG("event",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** LATENCY HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LATENCY HELP history */
+#define LATENCY_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LATENCY HELP tips */
+#define LATENCY_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LATENCY HELP key specs */
+#define LATENCY_HELP_Keyspecs NULL
+#endif
+
+/********** LATENCY HISTOGRAM ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LATENCY HISTOGRAM history */
+#define LATENCY_HISTOGRAM_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LATENCY HISTOGRAM tips */
+const char *LATENCY_HISTOGRAM_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_nodes",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LATENCY HISTOGRAM key specs */
+#define LATENCY_HISTOGRAM_Keyspecs NULL
+#endif
+
+/* LATENCY HISTOGRAM argument table */
+struct COMMAND_ARG LATENCY_HISTOGRAM_Args[] = {
+{MAKE_ARG("command",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** LATENCY HISTORY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LATENCY HISTORY history */
+#define LATENCY_HISTORY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LATENCY HISTORY tips */
+const char *LATENCY_HISTORY_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_nodes",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LATENCY HISTORY key specs */
+#define LATENCY_HISTORY_Keyspecs NULL
+#endif
+
+/* LATENCY HISTORY argument table */
+struct COMMAND_ARG LATENCY_HISTORY_Args[] = {
+{MAKE_ARG("event",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** LATENCY LATEST ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LATENCY LATEST history */
+#define LATENCY_LATEST_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LATENCY LATEST tips */
+const char *LATENCY_LATEST_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_nodes",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LATENCY LATEST key specs */
+#define LATENCY_LATEST_Keyspecs NULL
+#endif
+
+/********** LATENCY RESET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LATENCY RESET history */
+#define LATENCY_RESET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LATENCY RESET tips */
+const char *LATENCY_RESET_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:agg_sum",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LATENCY RESET key specs */
+#define LATENCY_RESET_Keyspecs NULL
+#endif
+
+/* LATENCY RESET argument table */
+struct COMMAND_ARG LATENCY_RESET_Args[] = {
+{MAKE_ARG("event",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* LATENCY command table */
+struct COMMAND_STRUCT LATENCY_Subcommands[] = {
+{MAKE_CMD("doctor","Returns a human-readable latency analysis report.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,LATENCY_DOCTOR_History,0,LATENCY_DOCTOR_Tips,3,latencyCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,LATENCY_DOCTOR_Keyspecs,0,NULL,0)},
+{MAKE_CMD("graph","Returns a latency graph for an event.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,LATENCY_GRAPH_History,0,LATENCY_GRAPH_Tips,3,latencyCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,LATENCY_GRAPH_Keyspecs,0,NULL,1),.args=LATENCY_GRAPH_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,LATENCY_HELP_History,0,LATENCY_HELP_Tips,0,latencyCommand,2,CMD_LOADING|CMD_STALE,0,LATENCY_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("histogram","Returns the cumulative distribution of latencies of a subset or all commands.","O(N) where N is the number of commands with latency information being retrieved.","7.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,LATENCY_HISTOGRAM_History,0,LATENCY_HISTOGRAM_Tips,3,latencyCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,LATENCY_HISTOGRAM_Keyspecs,0,NULL,1),.args=LATENCY_HISTOGRAM_Args},
+{MAKE_CMD("history","Returns timestamp-latency samples for an event.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,LATENCY_HISTORY_History,0,LATENCY_HISTORY_Tips,3,latencyCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,LATENCY_HISTORY_Keyspecs,0,NULL,1),.args=LATENCY_HISTORY_Args},
+{MAKE_CMD("latest","Returns the latest latency samples for all events.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,LATENCY_LATEST_History,0,LATENCY_LATEST_Tips,3,latencyCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,LATENCY_LATEST_Keyspecs,0,NULL,0)},
+{MAKE_CMD("reset","Resets the latency data for one or more events.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,LATENCY_RESET_History,0,LATENCY_RESET_Tips,2,latencyCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,LATENCY_RESET_Keyspecs,0,NULL,1),.args=LATENCY_RESET_Args},
+{0}
+};
+
+/********** LATENCY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LATENCY history */
+#define LATENCY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LATENCY tips */
+#define LATENCY_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LATENCY key specs */
+#define LATENCY_Keyspecs NULL
+#endif
+
+/********** LOLWUT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LOLWUT history */
+#define LOLWUT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LOLWUT tips */
+#define LOLWUT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LOLWUT key specs */
+#define LOLWUT_Keyspecs NULL
+#endif
+
+/* LOLWUT argument table */
+struct COMMAND_ARG LOLWUT_Args[] = {
+{MAKE_ARG("version",ARG_TYPE_INTEGER,-1,"VERSION",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** MEMORY DOCTOR ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MEMORY DOCTOR history */
+#define MEMORY_DOCTOR_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MEMORY DOCTOR tips */
+const char *MEMORY_DOCTOR_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_shards",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MEMORY DOCTOR key specs */
+#define MEMORY_DOCTOR_Keyspecs NULL
+#endif
+
+/********** MEMORY HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MEMORY HELP history */
+#define MEMORY_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MEMORY HELP tips */
+#define MEMORY_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MEMORY HELP key specs */
+#define MEMORY_HELP_Keyspecs NULL
+#endif
+
+/********** MEMORY MALLOC_STATS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MEMORY MALLOC_STATS history */
+#define MEMORY_MALLOC_STATS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MEMORY MALLOC_STATS tips */
+const char *MEMORY_MALLOC_STATS_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_shards",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MEMORY MALLOC_STATS key specs */
+#define MEMORY_MALLOC_STATS_Keyspecs NULL
+#endif
+
+/********** MEMORY PURGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MEMORY PURGE history */
+#define MEMORY_PURGE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MEMORY PURGE tips */
+const char *MEMORY_PURGE_Tips[] = {
+"request_policy:all_shards",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MEMORY PURGE key specs */
+#define MEMORY_PURGE_Keyspecs NULL
+#endif
+
+/********** MEMORY STATS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MEMORY STATS history */
+#define MEMORY_STATS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MEMORY STATS tips */
+const char *MEMORY_STATS_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_shards",
+"response_policy:special",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MEMORY STATS key specs */
+#define MEMORY_STATS_Keyspecs NULL
+#endif
+
+/********** MEMORY USAGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MEMORY USAGE history */
+#define MEMORY_USAGE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MEMORY USAGE tips */
+#define MEMORY_USAGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MEMORY USAGE key specs */
+keySpec MEMORY_USAGE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* MEMORY USAGE argument table */
+struct COMMAND_ARG MEMORY_USAGE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"SAMPLES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* MEMORY command table */
+struct COMMAND_STRUCT MEMORY_Subcommands[] = {
+{MAKE_CMD("doctor","Outputs a memory problems report.","O(1)","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MEMORY_DOCTOR_History,0,MEMORY_DOCTOR_Tips,3,memoryCommand,2,0,0,MEMORY_DOCTOR_Keyspecs,0,NULL,0)},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MEMORY_HELP_History,0,MEMORY_HELP_Tips,0,memoryCommand,2,CMD_LOADING|CMD_STALE,0,MEMORY_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("malloc-stats","Returns the allocator statistics.","Depends on how much memory is allocated, could be slow","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MEMORY_MALLOC_STATS_History,0,MEMORY_MALLOC_STATS_Tips,3,memoryCommand,2,0,0,MEMORY_MALLOC_STATS_Keyspecs,0,NULL,0)},
+{MAKE_CMD("purge","Asks the allocator to release memory.","Depends on how much memory is allocated, could be slow","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MEMORY_PURGE_History,0,MEMORY_PURGE_Tips,2,memoryCommand,2,0,0,MEMORY_PURGE_Keyspecs,0,NULL,0)},
+{MAKE_CMD("stats","Returns details about memory usage.","O(1)","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MEMORY_STATS_History,0,MEMORY_STATS_Tips,3,memoryCommand,2,0,0,MEMORY_STATS_Keyspecs,0,NULL,0)},
+{MAKE_CMD("usage","Estimates the memory usage of a key.","O(N) where N is the number of samples.","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MEMORY_USAGE_History,0,MEMORY_USAGE_Tips,0,memoryCommand,-3,CMD_READONLY,0,MEMORY_USAGE_Keyspecs,1,NULL,2),.args=MEMORY_USAGE_Args},
+{0}
+};
+
+/********** MEMORY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MEMORY history */
+#define MEMORY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MEMORY tips */
+#define MEMORY_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MEMORY key specs */
+#define MEMORY_Keyspecs NULL
+#endif
+
+/********** MODULE HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MODULE HELP history */
+#define MODULE_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MODULE HELP tips */
+#define MODULE_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MODULE HELP key specs */
+#define MODULE_HELP_Keyspecs NULL
+#endif
+
+/********** MODULE LIST ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MODULE LIST history */
+#define MODULE_LIST_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MODULE LIST tips */
+const char *MODULE_LIST_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MODULE LIST key specs */
+#define MODULE_LIST_Keyspecs NULL
+#endif
+
+/********** MODULE LOAD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MODULE LOAD history */
+#define MODULE_LOAD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MODULE LOAD tips */
+#define MODULE_LOAD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MODULE LOAD key specs */
+#define MODULE_LOAD_Keyspecs NULL
+#endif
+
+/* MODULE LOAD argument table */
+struct COMMAND_ARG MODULE_LOAD_Args[] = {
+{MAKE_ARG("path",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("arg",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** MODULE LOADEX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MODULE LOADEX history */
+#define MODULE_LOADEX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MODULE LOADEX tips */
+#define MODULE_LOADEX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MODULE LOADEX key specs */
+#define MODULE_LOADEX_Keyspecs NULL
+#endif
+
+/* MODULE LOADEX configs argument table */
+struct COMMAND_ARG MODULE_LOADEX_configs_Subargs[] = {
+{MAKE_ARG("name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* MODULE LOADEX argument table */
+struct COMMAND_ARG MODULE_LOADEX_Args[] = {
+{MAKE_ARG("path",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("configs",ARG_TYPE_BLOCK,-1,"CONFIG",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE|CMD_ARG_MULTIPLE_TOKEN,2,NULL),.subargs=MODULE_LOADEX_configs_Subargs},
+{MAKE_ARG("args",ARG_TYPE_STRING,-1,"ARGS",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** MODULE UNLOAD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MODULE UNLOAD history */
+#define MODULE_UNLOAD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MODULE UNLOAD tips */
+#define MODULE_UNLOAD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MODULE UNLOAD key specs */
+#define MODULE_UNLOAD_Keyspecs NULL
+#endif
+
+/* MODULE UNLOAD argument table */
+struct COMMAND_ARG MODULE_UNLOAD_Args[] = {
+{MAKE_ARG("name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* MODULE command table */
+struct COMMAND_STRUCT MODULE_Subcommands[] = {
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MODULE_HELP_History,0,MODULE_HELP_Tips,0,moduleCommand,2,CMD_LOADING|CMD_STALE,0,MODULE_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("list","Returns all loaded modules.","O(N) where N is the number of loaded modules.","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MODULE_LIST_History,0,MODULE_LIST_Tips,1,moduleCommand,2,CMD_ADMIN|CMD_NOSCRIPT,0,MODULE_LIST_Keyspecs,0,NULL,0)},
+{MAKE_CMD("load","Loads a module.","O(1)","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MODULE_LOAD_History,0,MODULE_LOAD_Tips,0,moduleCommand,-3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_PROTECTED,0,MODULE_LOAD_Keyspecs,0,NULL,2),.args=MODULE_LOAD_Args},
+{MAKE_CMD("loadex","Loads a module using extended parameters.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MODULE_LOADEX_History,0,MODULE_LOADEX_Tips,0,moduleCommand,-3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_PROTECTED,0,MODULE_LOADEX_Keyspecs,0,NULL,3),.args=MODULE_LOADEX_Args},
+{MAKE_CMD("unload","Unloads a module.","O(1)","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MODULE_UNLOAD_History,0,MODULE_UNLOAD_Tips,0,moduleCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_PROTECTED,0,MODULE_UNLOAD_Keyspecs,0,NULL,1),.args=MODULE_UNLOAD_Args},
+{0}
+};
+
+/********** MODULE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MODULE history */
+#define MODULE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MODULE tips */
+#define MODULE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MODULE key specs */
+#define MODULE_Keyspecs NULL
+#endif
+
+/********** MONITOR ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MONITOR history */
+#define MONITOR_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MONITOR tips */
+#define MONITOR_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MONITOR key specs */
+#define MONITOR_Keyspecs NULL
+#endif
+
+/********** PSYNC ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PSYNC history */
+#define PSYNC_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PSYNC tips */
+#define PSYNC_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PSYNC key specs */
+#define PSYNC_Keyspecs NULL
+#endif
+
+/* PSYNC argument table */
+struct COMMAND_ARG PSYNC_Args[] = {
+{MAKE_ARG("replicationid",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** REPLCONF ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* REPLCONF history */
+#define REPLCONF_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* REPLCONF tips */
+#define REPLCONF_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* REPLCONF key specs */
+#define REPLCONF_Keyspecs NULL
+#endif
+
+/********** REPLICAOF ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* REPLICAOF history */
+#define REPLICAOF_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* REPLICAOF tips */
+#define REPLICAOF_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* REPLICAOF key specs */
+#define REPLICAOF_Keyspecs NULL
+#endif
+
+/* REPLICAOF args host_port argument table */
+struct COMMAND_ARG REPLICAOF_args_host_port_Subargs[] = {
+{MAKE_ARG("host",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("port",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* REPLICAOF args no_one argument table */
+struct COMMAND_ARG REPLICAOF_args_no_one_Subargs[] = {
+{MAKE_ARG("no",ARG_TYPE_PURE_TOKEN,-1,"NO",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("one",ARG_TYPE_PURE_TOKEN,-1,"ONE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* REPLICAOF args argument table */
+struct COMMAND_ARG REPLICAOF_args_Subargs[] = {
+{MAKE_ARG("host-port",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=REPLICAOF_args_host_port_Subargs},
+{MAKE_ARG("no-one",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=REPLICAOF_args_no_one_Subargs},
+};
+
+/* REPLICAOF argument table */
+struct COMMAND_ARG REPLICAOF_Args[] = {
+{MAKE_ARG("args",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=REPLICAOF_args_Subargs},
+};
+
+/********** RESTORE_ASKING ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* RESTORE_ASKING history */
+commandHistory RESTORE_ASKING_History[] = {
+{"3.0.0","Added the `REPLACE` modifier."},
+{"5.0.0","Added the `ABSTTL` modifier."},
+{"5.0.0","Added the `IDLETIME` and `FREQ` options."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* RESTORE_ASKING tips */
+#define RESTORE_ASKING_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* RESTORE_ASKING key specs */
+keySpec RESTORE_ASKING_Keyspecs[1] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* RESTORE_ASKING argument table */
+struct COMMAND_ARG RESTORE_ASKING_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("ttl",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("serialized-value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("replace",ARG_TYPE_PURE_TOKEN,-1,"REPLACE",NULL,"3.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("absttl",ARG_TYPE_PURE_TOKEN,-1,"ABSTTL",NULL,"5.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,"IDLETIME",NULL,"5.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("frequency",ARG_TYPE_INTEGER,-1,"FREQ",NULL,"5.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ROLE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ROLE history */
+#define ROLE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ROLE tips */
+#define ROLE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ROLE key specs */
+#define ROLE_Keyspecs NULL
+#endif
+
+/********** SAVE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SAVE history */
+#define SAVE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SAVE tips */
+#define SAVE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SAVE key specs */
+#define SAVE_Keyspecs NULL
+#endif
+
+/********** SHUTDOWN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SHUTDOWN history */
+commandHistory SHUTDOWN_History[] = {
+{"7.0.0","Added the `NOW`, `FORCE` and `ABORT` modifiers."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SHUTDOWN tips */
+#define SHUTDOWN_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SHUTDOWN key specs */
+#define SHUTDOWN_Keyspecs NULL
+#endif
+
+/* SHUTDOWN save_selector argument table */
+struct COMMAND_ARG SHUTDOWN_save_selector_Subargs[] = {
+{MAKE_ARG("nosave",ARG_TYPE_PURE_TOKEN,-1,"NOSAVE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("save",ARG_TYPE_PURE_TOKEN,-1,"SAVE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SHUTDOWN argument table */
+struct COMMAND_ARG SHUTDOWN_Args[] = {
+{MAKE_ARG("save-selector",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=SHUTDOWN_save_selector_Subargs},
+{MAKE_ARG("now",ARG_TYPE_PURE_TOKEN,-1,"NOW",NULL,"7.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("force",ARG_TYPE_PURE_TOKEN,-1,"FORCE",NULL,"7.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("abort",ARG_TYPE_PURE_TOKEN,-1,"ABORT",NULL,"7.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** SLAVEOF ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SLAVEOF history */
+#define SLAVEOF_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SLAVEOF tips */
+#define SLAVEOF_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SLAVEOF key specs */
+#define SLAVEOF_Keyspecs NULL
+#endif
+
+/* SLAVEOF args host_port argument table */
+struct COMMAND_ARG SLAVEOF_args_host_port_Subargs[] = {
+{MAKE_ARG("host",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("port",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SLAVEOF args no_one argument table */
+struct COMMAND_ARG SLAVEOF_args_no_one_Subargs[] = {
+{MAKE_ARG("no",ARG_TYPE_PURE_TOKEN,-1,"NO",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("one",ARG_TYPE_PURE_TOKEN,-1,"ONE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SLAVEOF args argument table */
+struct COMMAND_ARG SLAVEOF_args_Subargs[] = {
+{MAKE_ARG("host-port",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=SLAVEOF_args_host_port_Subargs},
+{MAKE_ARG("no-one",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=SLAVEOF_args_no_one_Subargs},
+};
+
+/* SLAVEOF argument table */
+struct COMMAND_ARG SLAVEOF_Args[] = {
+{MAKE_ARG("args",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=SLAVEOF_args_Subargs},
+};
+
+/********** SLOWLOG GET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SLOWLOG GET history */
+commandHistory SLOWLOG_GET_History[] = {
+{"4.0.0","Added client IP address, port and name to the reply."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SLOWLOG GET tips */
+const char *SLOWLOG_GET_Tips[] = {
+"request_policy:all_nodes",
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SLOWLOG GET key specs */
+#define SLOWLOG_GET_Keyspecs NULL
+#endif
+
+/* SLOWLOG GET argument table */
+struct COMMAND_ARG SLOWLOG_GET_Args[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** SLOWLOG HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SLOWLOG HELP history */
+#define SLOWLOG_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SLOWLOG HELP tips */
+#define SLOWLOG_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SLOWLOG HELP key specs */
+#define SLOWLOG_HELP_Keyspecs NULL
+#endif
+
+/********** SLOWLOG LEN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SLOWLOG LEN history */
+#define SLOWLOG_LEN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SLOWLOG LEN tips */
+const char *SLOWLOG_LEN_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:agg_sum",
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SLOWLOG LEN key specs */
+#define SLOWLOG_LEN_Keyspecs NULL
+#endif
+
+/********** SLOWLOG RESET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SLOWLOG RESET history */
+#define SLOWLOG_RESET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SLOWLOG RESET tips */
+const char *SLOWLOG_RESET_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SLOWLOG RESET key specs */
+#define SLOWLOG_RESET_Keyspecs NULL
+#endif
+
+/* SLOWLOG command table */
+struct COMMAND_STRUCT SLOWLOG_Subcommands[] = {
+{MAKE_CMD("get","Returns the slow log's entries.","O(N) where N is the number of entries returned","2.2.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SLOWLOG_GET_History,1,SLOWLOG_GET_Tips,2,slowlogCommand,-2,CMD_ADMIN|CMD_LOADING|CMD_STALE,0,SLOWLOG_GET_Keyspecs,0,NULL,1),.args=SLOWLOG_GET_Args},
+{MAKE_CMD("help","Show helpful text about the different subcommands","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SLOWLOG_HELP_History,0,SLOWLOG_HELP_Tips,0,slowlogCommand,2,CMD_LOADING|CMD_STALE,0,SLOWLOG_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("len","Returns the number of entries in the slow log.","O(1)","2.2.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SLOWLOG_LEN_History,0,SLOWLOG_LEN_Tips,3,slowlogCommand,2,CMD_ADMIN|CMD_LOADING|CMD_STALE,0,SLOWLOG_LEN_Keyspecs,0,NULL,0)},
+{MAKE_CMD("reset","Clears all entries from the slow log.","O(N) where N is the number of entries in the slowlog","2.2.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SLOWLOG_RESET_History,0,SLOWLOG_RESET_Tips,2,slowlogCommand,2,CMD_ADMIN|CMD_LOADING|CMD_STALE,0,SLOWLOG_RESET_Keyspecs,0,NULL,0)},
+{0}
+};
+
+/********** SLOWLOG ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SLOWLOG history */
+#define SLOWLOG_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SLOWLOG tips */
+#define SLOWLOG_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SLOWLOG key specs */
+#define SLOWLOG_Keyspecs NULL
+#endif
+
+/********** SWAPDB ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SWAPDB history */
+#define SWAPDB_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SWAPDB tips */
+#define SWAPDB_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SWAPDB key specs */
+#define SWAPDB_Keyspecs NULL
+#endif
+
+/* SWAPDB argument table */
+struct COMMAND_ARG SWAPDB_Args[] = {
+{MAKE_ARG("index1",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("index2",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SYNC ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SYNC history */
+#define SYNC_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SYNC tips */
+#define SYNC_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SYNC key specs */
+#define SYNC_Keyspecs NULL
+#endif
+
+/********** TIME ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* TIME history */
+#define TIME_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* TIME tips */
+const char *TIME_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* TIME key specs */
+#define TIME_Keyspecs NULL
+#endif
+
+/********** SADD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SADD history */
+commandHistory SADD_History[] = {
+{"2.4.0","Accepts multiple `member` arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SADD tips */
+#define SADD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SADD key specs */
+keySpec SADD_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SADD argument table */
+struct COMMAND_ARG SADD_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SCARD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SCARD history */
+#define SCARD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SCARD tips */
+#define SCARD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SCARD key specs */
+keySpec SCARD_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SCARD argument table */
+struct COMMAND_ARG SCARD_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SDIFF ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SDIFF history */
+#define SDIFF_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SDIFF tips */
+const char *SDIFF_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SDIFF key specs */
+keySpec SDIFF_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* SDIFF argument table */
+struct COMMAND_ARG SDIFF_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SDIFFSTORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SDIFFSTORE history */
+#define SDIFFSTORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SDIFFSTORE tips */
+#define SDIFFSTORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SDIFFSTORE key specs */
+keySpec SDIFFSTORE_Keyspecs[2] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* SDIFFSTORE argument table */
+struct COMMAND_ARG SDIFFSTORE_Args[] = {
+{MAKE_ARG("destination",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SINTER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SINTER history */
+#define SINTER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SINTER tips */
+const char *SINTER_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SINTER key specs */
+keySpec SINTER_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* SINTER argument table */
+struct COMMAND_ARG SINTER_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SINTERCARD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SINTERCARD history */
+#define SINTERCARD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SINTERCARD tips */
+#define SINTERCARD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SINTERCARD key specs */
+keySpec SINTERCARD_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* SINTERCARD argument table */
+struct COMMAND_ARG SINTERCARD_Args[] = {
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("limit",ARG_TYPE_INTEGER,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** SINTERSTORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SINTERSTORE history */
+#define SINTERSTORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SINTERSTORE tips */
+#define SINTERSTORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SINTERSTORE key specs */
+keySpec SINTERSTORE_Keyspecs[2] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* SINTERSTORE argument table */
+struct COMMAND_ARG SINTERSTORE_Args[] = {
+{MAKE_ARG("destination",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SISMEMBER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SISMEMBER history */
+#define SISMEMBER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SISMEMBER tips */
+#define SISMEMBER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SISMEMBER key specs */
+keySpec SISMEMBER_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SISMEMBER argument table */
+struct COMMAND_ARG SISMEMBER_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SMEMBERS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SMEMBERS history */
+#define SMEMBERS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SMEMBERS tips */
+const char *SMEMBERS_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SMEMBERS key specs */
+keySpec SMEMBERS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SMEMBERS argument table */
+struct COMMAND_ARG SMEMBERS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SMISMEMBER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SMISMEMBER history */
+#define SMISMEMBER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SMISMEMBER tips */
+#define SMISMEMBER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SMISMEMBER key specs */
+keySpec SMISMEMBER_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SMISMEMBER argument table */
+struct COMMAND_ARG SMISMEMBER_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SMOVE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SMOVE history */
+#define SMOVE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SMOVE tips */
+#define SMOVE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SMOVE key specs */
+keySpec SMOVE_Keyspecs[2] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SMOVE argument table */
+struct COMMAND_ARG SMOVE_Args[] = {
+{MAKE_ARG("source",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("destination",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SPOP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SPOP history */
+commandHistory SPOP_History[] = {
+{"3.2.0","Added the `count` argument."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SPOP tips */
+const char *SPOP_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SPOP key specs */
+keySpec SPOP_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SPOP argument table */
+struct COMMAND_ARG SPOP_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,"3.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** SRANDMEMBER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SRANDMEMBER history */
+commandHistory SRANDMEMBER_History[] = {
+{"2.6.0","Added the optional `count` argument."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SRANDMEMBER tips */
+const char *SRANDMEMBER_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SRANDMEMBER key specs */
+keySpec SRANDMEMBER_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SRANDMEMBER argument table */
+struct COMMAND_ARG SRANDMEMBER_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,"2.6.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** SREM ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SREM history */
+commandHistory SREM_History[] = {
+{"2.4.0","Accepts multiple `member` arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SREM tips */
+#define SREM_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SREM key specs */
+keySpec SREM_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SREM argument table */
+struct COMMAND_ARG SREM_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SSCAN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SSCAN history */
+#define SSCAN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SSCAN tips */
+const char *SSCAN_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SSCAN key specs */
+keySpec SSCAN_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SSCAN argument table */
+struct COMMAND_ARG SSCAN_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("cursor",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,"MATCH",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** SUNION ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SUNION history */
+#define SUNION_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SUNION tips */
+const char *SUNION_Tips[] = {
+"nondeterministic_output_order",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SUNION key specs */
+keySpec SUNION_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* SUNION argument table */
+struct COMMAND_ARG SUNION_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** SUNIONSTORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SUNIONSTORE history */
+#define SUNIONSTORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SUNIONSTORE tips */
+#define SUNIONSTORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SUNIONSTORE key specs */
+keySpec SUNIONSTORE_Keyspecs[2] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* SUNIONSTORE argument table */
+struct COMMAND_ARG SUNIONSTORE_Args[] = {
+{MAKE_ARG("destination",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** BZMPOP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BZMPOP history */
+#define BZMPOP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BZMPOP tips */
+#define BZMPOP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BZMPOP key specs */
+keySpec BZMPOP_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* BZMPOP where argument table */
+struct COMMAND_ARG BZMPOP_where_Subargs[] = {
+{MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* BZMPOP argument table */
+struct COMMAND_ARG BZMPOP_Args[] = {
+{MAKE_ARG("timeout",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("where",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=BZMPOP_where_Subargs},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** BZPOPMAX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BZPOPMAX history */
+commandHistory BZPOPMAX_History[] = {
+{"6.0.0","`timeout` is interpreted as a double instead of an integer."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BZPOPMAX tips */
+#define BZPOPMAX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BZPOPMAX key specs */
+keySpec BZPOPMAX_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-2,1,0}}
+};
+#endif
+
+/* BZPOPMAX argument table */
+struct COMMAND_ARG BZPOPMAX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("timeout",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** BZPOPMIN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* BZPOPMIN history */
+commandHistory BZPOPMIN_History[] = {
+{"6.0.0","`timeout` is interpreted as a double instead of an integer."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* BZPOPMIN tips */
+#define BZPOPMIN_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* BZPOPMIN key specs */
+keySpec BZPOPMIN_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-2,1,0}}
+};
+#endif
+
+/* BZPOPMIN argument table */
+struct COMMAND_ARG BZPOPMIN_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("timeout",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ZADD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZADD history */
+commandHistory ZADD_History[] = {
+{"2.4.0","Accepts multiple elements."},
+{"3.0.2","Added the `XX`, `NX`, `CH` and `INCR` options."},
+{"6.2.0","Added the `GT` and `LT` options."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZADD tips */
+#define ZADD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZADD key specs */
+keySpec ZADD_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZADD condition argument table */
+struct COMMAND_ARG ZADD_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZADD comparison argument table */
+struct COMMAND_ARG ZADD_comparison_Subargs[] = {
+{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZADD data argument table */
+struct COMMAND_ARG ZADD_data_Subargs[] = {
+{MAKE_ARG("score",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZADD argument table */
+struct COMMAND_ARG ZADD_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"3.0.2",CMD_ARG_OPTIONAL,2,NULL),.subargs=ZADD_condition_Subargs},
+{MAKE_ARG("comparison",ARG_TYPE_ONEOF,-1,NULL,NULL,"6.2.0",CMD_ARG_OPTIONAL,2,NULL),.subargs=ZADD_comparison_Subargs},
+{MAKE_ARG("change",ARG_TYPE_PURE_TOKEN,-1,"CH",NULL,"3.0.2",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("increment",ARG_TYPE_PURE_TOKEN,-1,"INCR",NULL,"3.0.2",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=ZADD_data_Subargs},
+};
+
+/********** ZCARD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZCARD history */
+#define ZCARD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZCARD tips */
+#define ZCARD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZCARD key specs */
+keySpec ZCARD_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZCARD argument table */
+struct COMMAND_ARG ZCARD_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ZCOUNT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZCOUNT history */
+#define ZCOUNT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZCOUNT tips */
+#define ZCOUNT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZCOUNT key specs */
+keySpec ZCOUNT_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZCOUNT argument table */
+struct COMMAND_ARG ZCOUNT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ZDIFF ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZDIFF history */
+#define ZDIFF_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZDIFF tips */
+#define ZDIFF_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZDIFF key specs */
+keySpec ZDIFF_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* ZDIFF argument table */
+struct COMMAND_ARG ZDIFF_Args[] = {
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("withscores",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZDIFFSTORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZDIFFSTORE history */
+#define ZDIFFSTORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZDIFFSTORE tips */
+#define ZDIFFSTORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZDIFFSTORE key specs */
+keySpec ZDIFFSTORE_Keyspecs[2] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* ZDIFFSTORE argument table */
+struct COMMAND_ARG ZDIFFSTORE_Args[] = {
+{MAKE_ARG("destination",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** ZINCRBY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZINCRBY history */
+#define ZINCRBY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZINCRBY tips */
+#define ZINCRBY_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZINCRBY key specs */
+keySpec ZINCRBY_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZINCRBY argument table */
+struct COMMAND_ARG ZINCRBY_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("increment",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ZINTER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZINTER history */
+#define ZINTER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZINTER tips */
+#define ZINTER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZINTER key specs */
+keySpec ZINTER_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* ZINTER aggregate argument table */
+struct COMMAND_ARG ZINTER_aggregate_Subargs[] = {
+{MAKE_ARG("sum",ARG_TYPE_PURE_TOKEN,-1,"SUM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZINTER argument table */
+struct COMMAND_ARG ZINTER_Args[] = {
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("weight",ARG_TYPE_INTEGER,-1,"WEIGHTS",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=ZINTER_aggregate_Subargs},
+{MAKE_ARG("withscores",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZINTERCARD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZINTERCARD history */
+#define ZINTERCARD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZINTERCARD tips */
+#define ZINTERCARD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZINTERCARD key specs */
+keySpec ZINTERCARD_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* ZINTERCARD argument table */
+struct COMMAND_ARG ZINTERCARD_Args[] = {
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("limit",ARG_TYPE_INTEGER,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZINTERSTORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZINTERSTORE history */
+#define ZINTERSTORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZINTERSTORE tips */
+#define ZINTERSTORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZINTERSTORE key specs */
+keySpec ZINTERSTORE_Keyspecs[2] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* ZINTERSTORE aggregate argument table */
+struct COMMAND_ARG ZINTERSTORE_aggregate_Subargs[] = {
+{MAKE_ARG("sum",ARG_TYPE_PURE_TOKEN,-1,"SUM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZINTERSTORE argument table */
+struct COMMAND_ARG ZINTERSTORE_Args[] = {
+{MAKE_ARG("destination",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("weight",ARG_TYPE_INTEGER,-1,"WEIGHTS",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=ZINTERSTORE_aggregate_Subargs},
+};
+
+/********** ZLEXCOUNT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZLEXCOUNT history */
+#define ZLEXCOUNT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZLEXCOUNT tips */
+#define ZLEXCOUNT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZLEXCOUNT key specs */
+keySpec ZLEXCOUNT_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZLEXCOUNT argument table */
+struct COMMAND_ARG ZLEXCOUNT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ZMPOP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZMPOP history */
+#define ZMPOP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZMPOP tips */
+#define ZMPOP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZMPOP key specs */
+keySpec ZMPOP_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* ZMPOP where argument table */
+struct COMMAND_ARG ZMPOP_where_Subargs[] = {
+{MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZMPOP argument table */
+struct COMMAND_ARG ZMPOP_Args[] = {
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("where",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=ZMPOP_where_Subargs},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZMSCORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZMSCORE history */
+#define ZMSCORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZMSCORE tips */
+#define ZMSCORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZMSCORE key specs */
+keySpec ZMSCORE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZMSCORE argument table */
+struct COMMAND_ARG ZMSCORE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** ZPOPMAX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZPOPMAX history */
+#define ZPOPMAX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZPOPMAX tips */
+#define ZPOPMAX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZPOPMAX key specs */
+keySpec ZPOPMAX_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZPOPMAX argument table */
+struct COMMAND_ARG ZPOPMAX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZPOPMIN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZPOPMIN history */
+#define ZPOPMIN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZPOPMIN tips */
+#define ZPOPMIN_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZPOPMIN key specs */
+keySpec ZPOPMIN_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZPOPMIN argument table */
+struct COMMAND_ARG ZPOPMIN_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZRANDMEMBER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZRANDMEMBER history */
+#define ZRANDMEMBER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZRANDMEMBER tips */
+const char *ZRANDMEMBER_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZRANDMEMBER key specs */
+keySpec ZRANDMEMBER_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZRANDMEMBER options argument table */
+struct COMMAND_ARG ZRANDMEMBER_options_Subargs[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("withscores",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* ZRANDMEMBER argument table */
+struct COMMAND_ARG ZRANDMEMBER_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("options",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=ZRANDMEMBER_options_Subargs},
+};
+
+/********** ZRANGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZRANGE history */
+commandHistory ZRANGE_History[] = {
+{"6.2.0","Added the `REV`, `BYSCORE`, `BYLEX` and `LIMIT` options."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZRANGE tips */
+#define ZRANGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZRANGE key specs */
+keySpec ZRANGE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZRANGE sortby argument table */
+struct COMMAND_ARG ZRANGE_sortby_Subargs[] = {
+{MAKE_ARG("byscore",ARG_TYPE_PURE_TOKEN,-1,"BYSCORE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("bylex",ARG_TYPE_PURE_TOKEN,-1,"BYLEX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZRANGE limit argument table */
+struct COMMAND_ARG ZRANGE_limit_Subargs[] = {
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZRANGE argument table */
+struct COMMAND_ARG ZRANGE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("stop",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("sortby",ARG_TYPE_ONEOF,-1,NULL,NULL,"6.2.0",CMD_ARG_OPTIONAL,2,NULL),.subargs=ZRANGE_sortby_Subargs},
+{MAKE_ARG("rev",ARG_TYPE_PURE_TOKEN,-1,"REV",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("limit",ARG_TYPE_BLOCK,-1,"LIMIT",NULL,"6.2.0",CMD_ARG_OPTIONAL,2,NULL),.subargs=ZRANGE_limit_Subargs},
+{MAKE_ARG("withscores",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZRANGEBYLEX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZRANGEBYLEX history */
+#define ZRANGEBYLEX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZRANGEBYLEX tips */
+#define ZRANGEBYLEX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZRANGEBYLEX key specs */
+keySpec ZRANGEBYLEX_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZRANGEBYLEX limit argument table */
+struct COMMAND_ARG ZRANGEBYLEX_limit_Subargs[] = {
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZRANGEBYLEX argument table */
+struct COMMAND_ARG ZRANGEBYLEX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("limit",ARG_TYPE_BLOCK,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=ZRANGEBYLEX_limit_Subargs},
+};
+
+/********** ZRANGEBYSCORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZRANGEBYSCORE history */
+commandHistory ZRANGEBYSCORE_History[] = {
+{"2.0.0","Added the `WITHSCORES` modifier."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZRANGEBYSCORE tips */
+#define ZRANGEBYSCORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZRANGEBYSCORE key specs */
+keySpec ZRANGEBYSCORE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZRANGEBYSCORE limit argument table */
+struct COMMAND_ARG ZRANGEBYSCORE_limit_Subargs[] = {
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZRANGEBYSCORE argument table */
+struct COMMAND_ARG ZRANGEBYSCORE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("withscores",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORES",NULL,"2.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("limit",ARG_TYPE_BLOCK,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=ZRANGEBYSCORE_limit_Subargs},
+};
+
+/********** ZRANGESTORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZRANGESTORE history */
+#define ZRANGESTORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZRANGESTORE tips */
+#define ZRANGESTORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZRANGESTORE key specs */
+keySpec ZRANGESTORE_Keyspecs[2] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZRANGESTORE sortby argument table */
+struct COMMAND_ARG ZRANGESTORE_sortby_Subargs[] = {
+{MAKE_ARG("byscore",ARG_TYPE_PURE_TOKEN,-1,"BYSCORE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("bylex",ARG_TYPE_PURE_TOKEN,-1,"BYLEX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZRANGESTORE limit argument table */
+struct COMMAND_ARG ZRANGESTORE_limit_Subargs[] = {
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZRANGESTORE argument table */
+struct COMMAND_ARG ZRANGESTORE_Args[] = {
+{MAKE_ARG("dst",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("src",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("sortby",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=ZRANGESTORE_sortby_Subargs},
+{MAKE_ARG("rev",ARG_TYPE_PURE_TOKEN,-1,"REV",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("limit",ARG_TYPE_BLOCK,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=ZRANGESTORE_limit_Subargs},
+};
+
+/********** ZRANK ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZRANK history */
+commandHistory ZRANK_History[] = {
+{"7.2.0","Added the optional `WITHSCORE` argument."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZRANK tips */
+#define ZRANK_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZRANK key specs */
+keySpec ZRANK_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZRANK argument table */
+struct COMMAND_ARG ZRANK_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("withscore",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORE",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZREM ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZREM history */
+commandHistory ZREM_History[] = {
+{"2.4.0","Accepts multiple elements."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZREM tips */
+#define ZREM_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZREM key specs */
+keySpec ZREM_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZREM argument table */
+struct COMMAND_ARG ZREM_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** ZREMRANGEBYLEX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZREMRANGEBYLEX history */
+#define ZREMRANGEBYLEX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZREMRANGEBYLEX tips */
+#define ZREMRANGEBYLEX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZREMRANGEBYLEX key specs */
+keySpec ZREMRANGEBYLEX_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZREMRANGEBYLEX argument table */
+struct COMMAND_ARG ZREMRANGEBYLEX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ZREMRANGEBYRANK ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZREMRANGEBYRANK history */
+#define ZREMRANGEBYRANK_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZREMRANGEBYRANK tips */
+#define ZREMRANGEBYRANK_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZREMRANGEBYRANK key specs */
+keySpec ZREMRANGEBYRANK_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZREMRANGEBYRANK argument table */
+struct COMMAND_ARG ZREMRANGEBYRANK_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("stop",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ZREMRANGEBYSCORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZREMRANGEBYSCORE history */
+#define ZREMRANGEBYSCORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZREMRANGEBYSCORE tips */
+#define ZREMRANGEBYSCORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZREMRANGEBYSCORE key specs */
+keySpec ZREMRANGEBYSCORE_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZREMRANGEBYSCORE argument table */
+struct COMMAND_ARG ZREMRANGEBYSCORE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ZREVRANGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZREVRANGE history */
+#define ZREVRANGE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZREVRANGE tips */
+#define ZREVRANGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZREVRANGE key specs */
+keySpec ZREVRANGE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZREVRANGE argument table */
+struct COMMAND_ARG ZREVRANGE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("stop",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("withscores",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZREVRANGEBYLEX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZREVRANGEBYLEX history */
+#define ZREVRANGEBYLEX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZREVRANGEBYLEX tips */
+#define ZREVRANGEBYLEX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZREVRANGEBYLEX key specs */
+keySpec ZREVRANGEBYLEX_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZREVRANGEBYLEX limit argument table */
+struct COMMAND_ARG ZREVRANGEBYLEX_limit_Subargs[] = {
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZREVRANGEBYLEX argument table */
+struct COMMAND_ARG ZREVRANGEBYLEX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("limit",ARG_TYPE_BLOCK,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=ZREVRANGEBYLEX_limit_Subargs},
+};
+
+/********** ZREVRANGEBYSCORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZREVRANGEBYSCORE history */
+commandHistory ZREVRANGEBYSCORE_History[] = {
+{"2.1.6","`min` and `max` can be exclusive."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZREVRANGEBYSCORE tips */
+#define ZREVRANGEBYSCORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZREVRANGEBYSCORE key specs */
+keySpec ZREVRANGEBYSCORE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZREVRANGEBYSCORE limit argument table */
+struct COMMAND_ARG ZREVRANGEBYSCORE_limit_Subargs[] = {
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZREVRANGEBYSCORE argument table */
+struct COMMAND_ARG ZREVRANGEBYSCORE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("withscores",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("limit",ARG_TYPE_BLOCK,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=ZREVRANGEBYSCORE_limit_Subargs},
+};
+
+/********** ZREVRANK ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZREVRANK history */
+commandHistory ZREVRANK_History[] = {
+{"7.2.0","Added the optional `WITHSCORE` argument."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZREVRANK tips */
+#define ZREVRANK_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZREVRANK key specs */
+keySpec ZREVRANK_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZREVRANK argument table */
+struct COMMAND_ARG ZREVRANK_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("withscore",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORE",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZSCAN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZSCAN history */
+#define ZSCAN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZSCAN tips */
+const char *ZSCAN_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZSCAN key specs */
+keySpec ZSCAN_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZSCAN argument table */
+struct COMMAND_ARG ZSCAN_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("cursor",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,"MATCH",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZSCORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZSCORE history */
+#define ZSCORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZSCORE tips */
+#define ZSCORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZSCORE key specs */
+keySpec ZSCORE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* ZSCORE argument table */
+struct COMMAND_ARG ZSCORE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("member",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** ZUNION ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZUNION history */
+#define ZUNION_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZUNION tips */
+#define ZUNION_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZUNION key specs */
+keySpec ZUNION_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* ZUNION aggregate argument table */
+struct COMMAND_ARG ZUNION_aggregate_Subargs[] = {
+{MAKE_ARG("sum",ARG_TYPE_PURE_TOKEN,-1,"SUM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZUNION argument table */
+struct COMMAND_ARG ZUNION_Args[] = {
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("weight",ARG_TYPE_INTEGER,-1,"WEIGHTS",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=ZUNION_aggregate_Subargs},
+{MAKE_ARG("withscores",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** ZUNIONSTORE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* ZUNIONSTORE history */
+#define ZUNIONSTORE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* ZUNIONSTORE tips */
+#define ZUNIONSTORE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* ZUNIONSTORE key specs */
+keySpec ZUNIONSTORE_Keyspecs[2] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_KEYNUM,.fk.keynum={0,1,1}}
+};
+#endif
+
+/* ZUNIONSTORE aggregate argument table */
+struct COMMAND_ARG ZUNIONSTORE_aggregate_Subargs[] = {
+{MAKE_ARG("sum",ARG_TYPE_PURE_TOKEN,-1,"SUM",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* ZUNIONSTORE argument table */
+struct COMMAND_ARG ZUNIONSTORE_Args[] = {
+{MAKE_ARG("destination",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("weight",ARG_TYPE_INTEGER,-1,"WEIGHTS",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=ZUNIONSTORE_aggregate_Subargs},
+};
+
+/********** XACK ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XACK history */
+#define XACK_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XACK tips */
+#define XACK_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XACK key specs */
+keySpec XACK_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XACK argument table */
+struct COMMAND_ARG XACK_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** XADD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XADD history */
+commandHistory XADD_History[] = {
+{"6.2.0","Added the `NOMKSTREAM` option, `MINID` trimming strategy and the `LIMIT` option."},
+{"7.0.0","Added support for the `<ms>-*` explicit ID form."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XADD tips */
+const char *XADD_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XADD key specs */
+keySpec XADD_Keyspecs[1] = {
+{"UPDATE instead of INSERT because of the optional trimming feature",CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XADD trim strategy argument table */
+struct COMMAND_ARG XADD_trim_strategy_Subargs[] = {
+{MAKE_ARG("maxlen",ARG_TYPE_PURE_TOKEN,-1,"MAXLEN",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("minid",ARG_TYPE_PURE_TOKEN,-1,"MINID",NULL,"6.2.0",CMD_ARG_NONE,0,NULL)},
+};
+
+/* XADD trim operator argument table */
+struct COMMAND_ARG XADD_trim_operator_Subargs[] = {
+{MAKE_ARG("equal",ARG_TYPE_PURE_TOKEN,-1,"=",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("approximately",ARG_TYPE_PURE_TOKEN,-1,"~",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* XADD trim argument table */
+struct COMMAND_ARG XADD_trim_Subargs[] = {
+{MAKE_ARG("strategy",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=XADD_trim_strategy_Subargs},
+{MAKE_ARG("operator",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=XADD_trim_operator_Subargs},
+{MAKE_ARG("threshold",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"LIMIT",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* XADD id_selector argument table */
+struct COMMAND_ARG XADD_id_selector_Subargs[] = {
+{MAKE_ARG("auto-id",ARG_TYPE_PURE_TOKEN,-1,"*",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* XADD data argument table */
+struct COMMAND_ARG XADD_data_Subargs[] = {
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* XADD argument table */
+struct COMMAND_ARG XADD_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("nomkstream",ARG_TYPE_PURE_TOKEN,-1,"NOMKSTREAM",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("trim",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=XADD_trim_Subargs},
+{MAKE_ARG("id-selector",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=XADD_id_selector_Subargs},
+{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=XADD_data_Subargs},
+};
+
+/********** XAUTOCLAIM ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XAUTOCLAIM history */
+commandHistory XAUTOCLAIM_History[] = {
+{"7.0.0","Added an element to the reply array, containing deleted entries the command cleared from the PEL"},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XAUTOCLAIM tips */
+const char *XAUTOCLAIM_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XAUTOCLAIM key specs */
+keySpec XAUTOCLAIM_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XAUTOCLAIM argument table */
+struct COMMAND_ARG XAUTOCLAIM_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("consumer",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min-idle-time",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("justid",ARG_TYPE_PURE_TOKEN,-1,"JUSTID",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** XCLAIM ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XCLAIM history */
+#define XCLAIM_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XCLAIM tips */
+const char *XCLAIM_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XCLAIM key specs */
+keySpec XCLAIM_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XCLAIM argument table */
+struct COMMAND_ARG XCLAIM_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("consumer",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("min-idle-time",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("ms",ARG_TYPE_INTEGER,-1,"IDLE",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,"TIME",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"RETRYCOUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("force",ARG_TYPE_PURE_TOKEN,-1,"FORCE",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("justid",ARG_TYPE_PURE_TOKEN,-1,"JUSTID",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("lastid",ARG_TYPE_STRING,-1,"LASTID",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** XDEL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XDEL history */
+#define XDEL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XDEL tips */
+#define XDEL_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XDEL key specs */
+keySpec XDEL_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XDEL argument table */
+struct COMMAND_ARG XDEL_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** XGROUP CREATE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XGROUP CREATE history */
+commandHistory XGROUP_CREATE_History[] = {
+{"7.0.0","Added the `entries_read` named argument."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XGROUP CREATE tips */
+#define XGROUP_CREATE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XGROUP CREATE key specs */
+keySpec XGROUP_CREATE_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XGROUP CREATE id_selector argument table */
+struct COMMAND_ARG XGROUP_CREATE_id_selector_Subargs[] = {
+{MAKE_ARG("id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("new-id",ARG_TYPE_PURE_TOKEN,-1,"$",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* XGROUP CREATE argument table */
+struct COMMAND_ARG XGROUP_CREATE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("id-selector",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=XGROUP_CREATE_id_selector_Subargs},
+{MAKE_ARG("mkstream",ARG_TYPE_PURE_TOKEN,-1,"MKSTREAM",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("entries-read",ARG_TYPE_INTEGER,-1,"ENTRIESREAD",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** XGROUP CREATECONSUMER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XGROUP CREATECONSUMER history */
+#define XGROUP_CREATECONSUMER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XGROUP CREATECONSUMER tips */
+#define XGROUP_CREATECONSUMER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XGROUP CREATECONSUMER key specs */
+keySpec XGROUP_CREATECONSUMER_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XGROUP CREATECONSUMER argument table */
+struct COMMAND_ARG XGROUP_CREATECONSUMER_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("consumer",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** XGROUP DELCONSUMER ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XGROUP DELCONSUMER history */
+#define XGROUP_DELCONSUMER_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XGROUP DELCONSUMER tips */
+#define XGROUP_DELCONSUMER_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XGROUP DELCONSUMER key specs */
+keySpec XGROUP_DELCONSUMER_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XGROUP DELCONSUMER argument table */
+struct COMMAND_ARG XGROUP_DELCONSUMER_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("consumer",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** XGROUP DESTROY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XGROUP DESTROY history */
+#define XGROUP_DESTROY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XGROUP DESTROY tips */
+#define XGROUP_DESTROY_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XGROUP DESTROY key specs */
+keySpec XGROUP_DESTROY_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XGROUP DESTROY argument table */
+struct COMMAND_ARG XGROUP_DESTROY_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** XGROUP HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XGROUP HELP history */
+#define XGROUP_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XGROUP HELP tips */
+#define XGROUP_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XGROUP HELP key specs */
+#define XGROUP_HELP_Keyspecs NULL
+#endif
+
+/********** XGROUP SETID ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XGROUP SETID history */
+commandHistory XGROUP_SETID_History[] = {
+{"7.0.0","Added the optional `entries_read` argument."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XGROUP SETID tips */
+#define XGROUP_SETID_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XGROUP SETID key specs */
+keySpec XGROUP_SETID_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XGROUP SETID id_selector argument table */
+struct COMMAND_ARG XGROUP_SETID_id_selector_Subargs[] = {
+{MAKE_ARG("id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("new-id",ARG_TYPE_PURE_TOKEN,-1,"$",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* XGROUP SETID argument table */
+struct COMMAND_ARG XGROUP_SETID_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("id-selector",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=XGROUP_SETID_id_selector_Subargs},
+{MAKE_ARG("entriesread",ARG_TYPE_INTEGER,-1,"ENTRIESREAD",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL),.display_text="entries-read"},
+};
+
+/* XGROUP command table */
+struct COMMAND_STRUCT XGROUP_Subcommands[] = {
+{MAKE_CMD("create","Creates a consumer group.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XGROUP_CREATE_History,1,XGROUP_CREATE_Tips,0,xgroupCommand,-5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STREAM,XGROUP_CREATE_Keyspecs,1,NULL,5),.args=XGROUP_CREATE_Args},
+{MAKE_CMD("createconsumer","Creates a consumer in a consumer group.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XGROUP_CREATECONSUMER_History,0,XGROUP_CREATECONSUMER_Tips,0,xgroupCommand,5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STREAM,XGROUP_CREATECONSUMER_Keyspecs,1,NULL,3),.args=XGROUP_CREATECONSUMER_Args},
+{MAKE_CMD("delconsumer","Deletes a consumer from a consumer group.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XGROUP_DELCONSUMER_History,0,XGROUP_DELCONSUMER_Tips,0,xgroupCommand,5,CMD_WRITE,ACL_CATEGORY_STREAM,XGROUP_DELCONSUMER_Keyspecs,1,NULL,3),.args=XGROUP_DELCONSUMER_Args},
+{MAKE_CMD("destroy","Destroys a consumer group.","O(N) where N is the number of entries in the group's pending entries list (PEL).","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XGROUP_DESTROY_History,0,XGROUP_DESTROY_Tips,0,xgroupCommand,4,CMD_WRITE,ACL_CATEGORY_STREAM,XGROUP_DESTROY_Keyspecs,1,NULL,2),.args=XGROUP_DESTROY_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XGROUP_HELP_History,0,XGROUP_HELP_Tips,0,xgroupCommand,2,CMD_LOADING|CMD_STALE,ACL_CATEGORY_STREAM,XGROUP_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("setid","Sets the last-delivered ID of a consumer group.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XGROUP_SETID_History,1,XGROUP_SETID_Tips,0,xgroupCommand,-5,CMD_WRITE,ACL_CATEGORY_STREAM,XGROUP_SETID_Keyspecs,1,NULL,4),.args=XGROUP_SETID_Args},
+{0}
+};
+
+/********** XGROUP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XGROUP history */
+#define XGROUP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XGROUP tips */
+#define XGROUP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XGROUP key specs */
+#define XGROUP_Keyspecs NULL
+#endif
+
+/********** XINFO CONSUMERS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XINFO CONSUMERS history */
+commandHistory XINFO_CONSUMERS_History[] = {
+{"7.2.0","Added the `inactive` field."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XINFO CONSUMERS tips */
+const char *XINFO_CONSUMERS_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XINFO CONSUMERS key specs */
+keySpec XINFO_CONSUMERS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XINFO CONSUMERS argument table */
+struct COMMAND_ARG XINFO_CONSUMERS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** XINFO GROUPS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XINFO GROUPS history */
+commandHistory XINFO_GROUPS_History[] = {
+{"7.0.0","Added the `entries-read` and `lag` fields"},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XINFO GROUPS tips */
+#define XINFO_GROUPS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XINFO GROUPS key specs */
+keySpec XINFO_GROUPS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XINFO GROUPS argument table */
+struct COMMAND_ARG XINFO_GROUPS_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** XINFO HELP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XINFO HELP history */
+#define XINFO_HELP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XINFO HELP tips */
+#define XINFO_HELP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XINFO HELP key specs */
+#define XINFO_HELP_Keyspecs NULL
+#endif
+
+/********** XINFO STREAM ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XINFO STREAM history */
+commandHistory XINFO_STREAM_History[] = {
+{"6.0.0","Added the `FULL` modifier."},
+{"7.0.0","Added the `max-deleted-entry-id`, `entries-added`, `recorded-first-entry-id`, `entries-read` and `lag` fields"},
+{"7.2.0","Added the `active-time` field, and changed the meaning of `seen-time`."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XINFO STREAM tips */
+#define XINFO_STREAM_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XINFO STREAM key specs */
+keySpec XINFO_STREAM_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XINFO STREAM full_block argument table */
+struct COMMAND_ARG XINFO_STREAM_full_block_Subargs[] = {
+{MAKE_ARG("full",ARG_TYPE_PURE_TOKEN,-1,"FULL",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* XINFO STREAM argument table */
+struct COMMAND_ARG XINFO_STREAM_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("full-block",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=XINFO_STREAM_full_block_Subargs},
+};
+
+/* XINFO command table */
+struct COMMAND_STRUCT XINFO_Subcommands[] = {
+{MAKE_CMD("consumers","Returns a list of the consumers in a consumer group.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_CONSUMERS_History,1,XINFO_CONSUMERS_Tips,1,xinfoCommand,4,CMD_READONLY,ACL_CATEGORY_STREAM,XINFO_CONSUMERS_Keyspecs,1,NULL,2),.args=XINFO_CONSUMERS_Args},
+{MAKE_CMD("groups","Returns a list of the consumer groups of a stream.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_GROUPS_History,1,XINFO_GROUPS_Tips,0,xinfoCommand,3,CMD_READONLY,ACL_CATEGORY_STREAM,XINFO_GROUPS_Keyspecs,1,NULL,1),.args=XINFO_GROUPS_Args},
+{MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_HELP_History,0,XINFO_HELP_Tips,0,xinfoCommand,2,CMD_LOADING|CMD_STALE,ACL_CATEGORY_STREAM,XINFO_HELP_Keyspecs,0,NULL,0)},
+{MAKE_CMD("stream","Returns information about a stream.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_STREAM_History,3,XINFO_STREAM_Tips,0,xinfoCommand,-3,CMD_READONLY,ACL_CATEGORY_STREAM,XINFO_STREAM_Keyspecs,1,NULL,2),.args=XINFO_STREAM_Args},
+{0}
+};
+
+/********** XINFO ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XINFO history */
+#define XINFO_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XINFO tips */
+#define XINFO_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XINFO key specs */
+#define XINFO_Keyspecs NULL
+#endif
+
+/********** XLEN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XLEN history */
+#define XLEN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XLEN tips */
+#define XLEN_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XLEN key specs */
+keySpec XLEN_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XLEN argument table */
+struct COMMAND_ARG XLEN_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** XPENDING ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XPENDING history */
+commandHistory XPENDING_History[] = {
+{"6.2.0","Added the `IDLE` option and exclusive range intervals."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XPENDING tips */
+const char *XPENDING_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XPENDING key specs */
+keySpec XPENDING_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XPENDING filters argument table */
+struct COMMAND_ARG XPENDING_filters_Subargs[] = {
+{MAKE_ARG("min-idle-time",ARG_TYPE_INTEGER,-1,"IDLE",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("end",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("consumer",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* XPENDING argument table */
+struct COMMAND_ARG XPENDING_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("filters",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,5,NULL),.subargs=XPENDING_filters_Subargs},
+};
+
+/********** XRANGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XRANGE history */
+commandHistory XRANGE_History[] = {
+{"6.2.0","Added exclusive ranges."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XRANGE tips */
+#define XRANGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XRANGE key specs */
+keySpec XRANGE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XRANGE argument table */
+struct COMMAND_ARG XRANGE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("end",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** XREAD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XREAD history */
+#define XREAD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XREAD tips */
+#define XREAD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XREAD key specs */
+keySpec XREAD_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_KEYWORD,.bs.keyword={"STREAMS",1},KSPEC_FK_RANGE,.fk.range={-1,1,2}}
+};
+#endif
+
+/* XREAD streams argument table */
+struct COMMAND_ARG XREAD_streams_Subargs[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* XREAD argument table */
+struct COMMAND_ARG XREAD_Args[] = {
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,"BLOCK",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("streams",ARG_TYPE_BLOCK,-1,"STREAMS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=XREAD_streams_Subargs},
+};
+
+/********** XREADGROUP ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XREADGROUP history */
+#define XREADGROUP_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XREADGROUP tips */
+#define XREADGROUP_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XREADGROUP key specs */
+keySpec XREADGROUP_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_KEYWORD,.bs.keyword={"STREAMS",4},KSPEC_FK_RANGE,.fk.range={-1,1,2}}
+};
+#endif
+
+/* XREADGROUP group_block argument table */
+struct COMMAND_ARG XREADGROUP_group_block_Subargs[] = {
+{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("consumer",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* XREADGROUP streams argument table */
+struct COMMAND_ARG XREADGROUP_streams_Subargs[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+{MAKE_ARG("id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* XREADGROUP argument table */
+struct COMMAND_ARG XREADGROUP_Args[] = {
+{MAKE_ARG("group-block",ARG_TYPE_BLOCK,-1,"GROUP",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=XREADGROUP_group_block_Subargs},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,"BLOCK",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("noack",ARG_TYPE_PURE_TOKEN,-1,"NOACK",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("streams",ARG_TYPE_BLOCK,-1,"STREAMS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=XREADGROUP_streams_Subargs},
+};
+
+/********** XREVRANGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XREVRANGE history */
+commandHistory XREVRANGE_History[] = {
+{"6.2.0","Added exclusive ranges."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XREVRANGE tips */
+#define XREVRANGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XREVRANGE key specs */
+keySpec XREVRANGE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XREVRANGE argument table */
+struct COMMAND_ARG XREVRANGE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("end",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** XSETID ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XSETID history */
+commandHistory XSETID_History[] = {
+{"7.0.0","Added the `entries_added` and `max_deleted_entry_id` arguments."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XSETID tips */
+#define XSETID_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XSETID key specs */
+keySpec XSETID_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XSETID argument table */
+struct COMMAND_ARG XSETID_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("last-id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("entries-added",ARG_TYPE_INTEGER,-1,"ENTRIESADDED",NULL,"7.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("max-deleted-id",ARG_TYPE_STRING,-1,"MAXDELETEDID",NULL,"7.0.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** XTRIM ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* XTRIM history */
+commandHistory XTRIM_History[] = {
+{"6.2.0","Added the `MINID` trimming strategy and the `LIMIT` option."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* XTRIM tips */
+const char *XTRIM_Tips[] = {
+"nondeterministic_output",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* XTRIM key specs */
+keySpec XTRIM_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* XTRIM trim strategy argument table */
+struct COMMAND_ARG XTRIM_trim_strategy_Subargs[] = {
+{MAKE_ARG("maxlen",ARG_TYPE_PURE_TOKEN,-1,"MAXLEN",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("minid",ARG_TYPE_PURE_TOKEN,-1,"MINID",NULL,"6.2.0",CMD_ARG_NONE,0,NULL)},
+};
+
+/* XTRIM trim operator argument table */
+struct COMMAND_ARG XTRIM_trim_operator_Subargs[] = {
+{MAKE_ARG("equal",ARG_TYPE_PURE_TOKEN,-1,"=",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("approximately",ARG_TYPE_PURE_TOKEN,-1,"~",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* XTRIM trim argument table */
+struct COMMAND_ARG XTRIM_trim_Subargs[] = {
+{MAKE_ARG("strategy",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=XTRIM_trim_strategy_Subargs},
+{MAKE_ARG("operator",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=XTRIM_trim_operator_Subargs},
+{MAKE_ARG("threshold",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"LIMIT",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/* XTRIM argument table */
+struct COMMAND_ARG XTRIM_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("trim",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,4,NULL),.subargs=XTRIM_trim_Subargs},
+};
+
+/********** APPEND ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* APPEND history */
+#define APPEND_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* APPEND tips */
+#define APPEND_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* APPEND key specs */
+keySpec APPEND_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* APPEND argument table */
+struct COMMAND_ARG APPEND_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** DECR ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* DECR history */
+#define DECR_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* DECR tips */
+#define DECR_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* DECR key specs */
+keySpec DECR_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* DECR argument table */
+struct COMMAND_ARG DECR_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** DECRBY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* DECRBY history */
+#define DECRBY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* DECRBY tips */
+#define DECRBY_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* DECRBY key specs */
+keySpec DECRBY_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* DECRBY argument table */
+struct COMMAND_ARG DECRBY_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("decrement",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** GET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GET history */
+#define GET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GET tips */
+#define GET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GET key specs */
+keySpec GET_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GET argument table */
+struct COMMAND_ARG GET_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** GETDEL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GETDEL history */
+#define GETDEL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GETDEL tips */
+#define GETDEL_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GETDEL key specs */
+keySpec GETDEL_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GETDEL argument table */
+struct COMMAND_ARG GETDEL_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** GETEX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GETEX history */
+#define GETEX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GETEX tips */
+#define GETEX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GETEX key specs */
+keySpec GETEX_Keyspecs[1] = {
+{"RW and UPDATE because it changes the TTL",CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GETEX expiration argument table */
+struct COMMAND_ARG GETEX_expiration_Subargs[] = {
+{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,"EX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,"PX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unix-time-seconds",ARG_TYPE_UNIX_TIME,-1,"EXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,"PXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("persist",ARG_TYPE_PURE_TOKEN,-1,"PERSIST",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* GETEX argument table */
+struct COMMAND_ARG GETEX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("expiration",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,5,NULL),.subargs=GETEX_expiration_Subargs},
+};
+
+/********** GETRANGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GETRANGE history */
+#define GETRANGE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GETRANGE tips */
+#define GETRANGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GETRANGE key specs */
+keySpec GETRANGE_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GETRANGE argument table */
+struct COMMAND_ARG GETRANGE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** GETSET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* GETSET history */
+#define GETSET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* GETSET tips */
+#define GETSET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* GETSET key specs */
+keySpec GETSET_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* GETSET argument table */
+struct COMMAND_ARG GETSET_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** INCR ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* INCR history */
+#define INCR_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* INCR tips */
+#define INCR_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* INCR key specs */
+keySpec INCR_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* INCR argument table */
+struct COMMAND_ARG INCR_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** INCRBY ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* INCRBY history */
+#define INCRBY_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* INCRBY tips */
+#define INCRBY_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* INCRBY key specs */
+keySpec INCRBY_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* INCRBY argument table */
+struct COMMAND_ARG INCRBY_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("increment",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** INCRBYFLOAT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* INCRBYFLOAT history */
+#define INCRBYFLOAT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* INCRBYFLOAT tips */
+#define INCRBYFLOAT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* INCRBYFLOAT key specs */
+keySpec INCRBYFLOAT_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* INCRBYFLOAT argument table */
+struct COMMAND_ARG INCRBYFLOAT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("increment",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** LCS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* LCS history */
+#define LCS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* LCS tips */
+#define LCS_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* LCS key specs */
+keySpec LCS_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={1,1,0}}
+};
+#endif
+
+/* LCS argument table */
+struct COMMAND_ARG LCS_Args[] = {
+{MAKE_ARG("key1",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("key2",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("len",ARG_TYPE_PURE_TOKEN,-1,"LEN",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("idx",ARG_TYPE_PURE_TOKEN,-1,"IDX",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("min-match-len",ARG_TYPE_INTEGER,-1,"MINMATCHLEN",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("withmatchlen",ARG_TYPE_PURE_TOKEN,-1,"WITHMATCHLEN",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+};
+
+/********** MGET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MGET history */
+#define MGET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MGET tips */
+const char *MGET_Tips[] = {
+"request_policy:multi_shard",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MGET key specs */
+keySpec MGET_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* MGET argument table */
+struct COMMAND_ARG MGET_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/********** MSET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MSET history */
+#define MSET_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MSET tips */
+const char *MSET_Tips[] = {
+"request_policy:multi_shard",
+"response_policy:all_succeeded",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MSET key specs */
+keySpec MSET_Keyspecs[1] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,2,0}}
+};
+#endif
+
+/* MSET data argument table */
+struct COMMAND_ARG MSET_data_Subargs[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* MSET argument table */
+struct COMMAND_ARG MSET_Args[] = {
+{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=MSET_data_Subargs},
+};
+
+/********** MSETNX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MSETNX history */
+#define MSETNX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MSETNX tips */
+#define MSETNX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MSETNX key specs */
+keySpec MSETNX_Keyspecs[1] = {
+{NULL,CMD_KEY_OW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,2,0}}
+};
+#endif
+
+/* MSETNX data argument table */
+struct COMMAND_ARG MSETNX_data_Subargs[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* MSETNX argument table */
+struct COMMAND_ARG MSETNX_Args[] = {
+{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=MSETNX_data_Subargs},
+};
+
+/********** PSETEX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* PSETEX history */
+#define PSETEX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* PSETEX tips */
+#define PSETEX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* PSETEX key specs */
+keySpec PSETEX_Keyspecs[1] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* PSETEX argument table */
+struct COMMAND_ARG PSETEX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SET ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SET history */
+commandHistory SET_History[] = {
+{"2.6.12","Added the `EX`, `PX`, `NX` and `XX` options."},
+{"6.0.0","Added the `KEEPTTL` option."},
+{"6.2.0","Added the `GET`, `EXAT` and `PXAT` option."},
+{"7.0.0","Allowed the `NX` and `GET` options to be used together."},
+};
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SET tips */
+#define SET_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SET key specs */
+keySpec SET_Keyspecs[1] = {
+{"RW and ACCESS due to the optional `GET` argument",CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE|CMD_KEY_VARIABLE_FLAGS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SET condition argument table */
+struct COMMAND_ARG SET_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SET expiration argument table */
+struct COMMAND_ARG SET_expiration_Subargs[] = {
+{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,"EX",NULL,"2.6.12",CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,"PX",NULL,"2.6.12",CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unix-time-seconds",ARG_TYPE_UNIX_TIME,-1,"EXAT",NULL,"6.2.0",CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,"PXAT",NULL,"6.2.0",CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("keepttl",ARG_TYPE_PURE_TOKEN,-1,"KEEPTTL",NULL,"6.0.0",CMD_ARG_NONE,0,NULL)},
+};
+
+/* SET argument table */
+struct COMMAND_ARG SET_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"2.6.12",CMD_ARG_OPTIONAL,2,NULL),.subargs=SET_condition_Subargs},
+{MAKE_ARG("get",ARG_TYPE_PURE_TOKEN,-1,"GET",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("expiration",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,5,NULL),.subargs=SET_expiration_Subargs},
+};
+
+/********** SETEX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SETEX history */
+#define SETEX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SETEX tips */
+#define SETEX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SETEX key specs */
+keySpec SETEX_Keyspecs[1] = {
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SETEX argument table */
+struct COMMAND_ARG SETEX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SETNX ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SETNX history */
+#define SETNX_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SETNX tips */
+#define SETNX_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SETNX key specs */
+keySpec SETNX_Keyspecs[1] = {
+{NULL,CMD_KEY_OW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SETNX argument table */
+struct COMMAND_ARG SETNX_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SETRANGE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SETRANGE history */
+#define SETRANGE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SETRANGE tips */
+#define SETRANGE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SETRANGE key specs */
+keySpec SETRANGE_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SETRANGE argument table */
+struct COMMAND_ARG SETRANGE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("offset",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** STRLEN ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* STRLEN history */
+#define STRLEN_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* STRLEN tips */
+#define STRLEN_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* STRLEN key specs */
+keySpec STRLEN_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* STRLEN argument table */
+struct COMMAND_ARG STRLEN_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** SUBSTR ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SUBSTR history */
+#define SUBSTR_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SUBSTR tips */
+#define SUBSTR_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SUBSTR key specs */
+keySpec SUBSTR_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* SUBSTR argument table */
+struct COMMAND_ARG SUBSTR_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/********** DISCARD ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* DISCARD history */
+#define DISCARD_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* DISCARD tips */
+#define DISCARD_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* DISCARD key specs */
+#define DISCARD_Keyspecs NULL
+#endif
+
+/********** EXEC ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* EXEC history */
+#define EXEC_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* EXEC tips */
+#define EXEC_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* EXEC key specs */
+#define EXEC_Keyspecs NULL
+#endif
+
+/********** MULTI ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* MULTI history */
+#define MULTI_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* MULTI tips */
+#define MULTI_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* MULTI key specs */
+#define MULTI_Keyspecs NULL
+#endif
+
+/********** UNWATCH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* UNWATCH history */
+#define UNWATCH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* UNWATCH tips */
+#define UNWATCH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* UNWATCH key specs */
+#define UNWATCH_Keyspecs NULL
+#endif
+
+/********** WATCH ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* WATCH history */
+#define WATCH_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* WATCH tips */
+#define WATCH_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* WATCH key specs */
+keySpec WATCH_Keyspecs[1] = {
+{NULL,CMD_KEY_RO,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+};
+#endif
+
+/* WATCH argument table */
+struct COMMAND_ARG WATCH_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* Main command table */
+struct COMMAND_STRUCT redisCommandTable[] = {
+/* bitmap */
+{MAKE_CMD("bitcount","Counts the number of set bits (population counting) in a string.","O(N)","2.6.0",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,BITCOUNT_History,1,BITCOUNT_Tips,0,bitcountCommand,-2,CMD_READONLY,ACL_CATEGORY_BITMAP,BITCOUNT_Keyspecs,1,NULL,2),.args=BITCOUNT_Args},
+{MAKE_CMD("bitfield","Performs arbitrary bitfield integer operations on strings.","O(1) for each subcommand specified","3.2.0",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,BITFIELD_History,0,BITFIELD_Tips,0,bitfieldCommand,-2,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_BITMAP,BITFIELD_Keyspecs,1,bitfieldGetKeys,2),.args=BITFIELD_Args},
+{MAKE_CMD("bitfield_ro","Performs arbitrary read-only bitfield integer operations on strings.","O(1) for each subcommand specified","6.0.0",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,BITFIELD_RO_History,0,BITFIELD_RO_Tips,0,bitfieldroCommand,-2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_BITMAP,BITFIELD_RO_Keyspecs,1,NULL,2),.args=BITFIELD_RO_Args},
+{MAKE_CMD("bitop","Performs bitwise operations on multiple strings, and stores the result.","O(N)","2.6.0",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,BITOP_History,0,BITOP_Tips,0,bitopCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_BITMAP,BITOP_Keyspecs,2,NULL,3),.args=BITOP_Args},
+{MAKE_CMD("bitpos","Finds the first set (1) or clear (0) bit in a string.","O(N)","2.8.7",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,BITPOS_History,1,BITPOS_Tips,0,bitposCommand,-3,CMD_READONLY,ACL_CATEGORY_BITMAP,BITPOS_Keyspecs,1,NULL,3),.args=BITPOS_Args},
+{MAKE_CMD("getbit","Returns a bit value by offset.","O(1)","2.2.0",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,GETBIT_History,0,GETBIT_Tips,0,getbitCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_BITMAP,GETBIT_Keyspecs,1,NULL,2),.args=GETBIT_Args},
+{MAKE_CMD("setbit","Sets or clears the bit at offset of the string value. Creates the key if it doesn't exist.","O(1)","2.2.0",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,SETBIT_History,0,SETBIT_Tips,0,setbitCommand,4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_BITMAP,SETBIT_Keyspecs,1,NULL,3),.args=SETBIT_Args},
+/* cluster */
+{MAKE_CMD("asking","Signals that a cluster client is following an -ASK redirect.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,ASKING_History,0,ASKING_Tips,0,askingCommand,1,CMD_FAST,ACL_CATEGORY_CONNECTION,ASKING_Keyspecs,0,NULL,0)},
+{MAKE_CMD("cluster","A container for Redis Cluster commands.","Depends on subcommand.","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_History,0,CLUSTER_Tips,0,NULL,-2,0,0,CLUSTER_Keyspecs,0,NULL,0),.subcommands=CLUSTER_Subcommands},
+{MAKE_CMD("readonly","Enables read-only queries for a connection to a Redis Cluster replica node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,READONLY_History,0,READONLY_Tips,0,readonlyCommand,1,CMD_FAST|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,READONLY_Keyspecs,0,NULL,0)},
+{MAKE_CMD("readwrite","Enables read-write queries for a connection to a Reids Cluster replica node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,READWRITE_History,0,READWRITE_Tips,0,readwriteCommand,1,CMD_FAST|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,READWRITE_Keyspecs,0,NULL,0)},
+/* connection */
+{MAKE_CMD("auth","Authenticates the connection.","O(N) where N is the number of passwords defined for the user","1.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,AUTH_History,1,AUTH_Tips,0,authCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_NO_AUTH|CMD_SENTINEL|CMD_ALLOW_BUSY,ACL_CATEGORY_CONNECTION,AUTH_Keyspecs,0,NULL,2),.args=AUTH_Args},
+{MAKE_CMD("client","A container for client connection commands.","Depends on subcommand.","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_History,0,CLIENT_Tips,0,NULL,-2,CMD_SENTINEL,0,CLIENT_Keyspecs,0,NULL,0),.subcommands=CLIENT_Subcommands},
+{MAKE_CMD("echo","Returns the given string.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,ECHO_History,0,ECHO_Tips,0,echoCommand,2,CMD_LOADING|CMD_STALE|CMD_FAST,ACL_CATEGORY_CONNECTION,ECHO_Keyspecs,0,NULL,1),.args=ECHO_Args},
+{MAKE_CMD("hello","Handshakes with the Redis server.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,HELLO_History,1,HELLO_Tips,0,helloCommand,-1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_NO_AUTH|CMD_SENTINEL|CMD_ALLOW_BUSY,ACL_CATEGORY_CONNECTION,HELLO_Keyspecs,0,NULL,1),.args=HELLO_Args},
+{MAKE_CMD("ping","Returns the server's liveliness response.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,PING_History,0,PING_Tips,2,pingCommand,-1,CMD_FAST|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,PING_Keyspecs,0,NULL,1),.args=PING_Args},
+{MAKE_CMD("quit","Closes the connection.","O(1)","1.0.0",CMD_DOC_DEPRECATED,"just closing the connection","7.2.0","connection",COMMAND_GROUP_CONNECTION,QUIT_History,0,QUIT_Tips,0,quitCommand,-1,CMD_ALLOW_BUSY|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_NO_AUTH,ACL_CATEGORY_CONNECTION,QUIT_Keyspecs,0,NULL,0)},
+{MAKE_CMD("reset","Resets the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,RESET_History,0,RESET_Tips,0,resetCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_NO_AUTH|CMD_ALLOW_BUSY,ACL_CATEGORY_CONNECTION,RESET_Keyspecs,0,NULL,0)},
+{MAKE_CMD("select","Changes the selected database.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,SELECT_History,0,SELECT_Tips,0,selectCommand,2,CMD_LOADING|CMD_STALE|CMD_FAST,ACL_CATEGORY_CONNECTION,SELECT_Keyspecs,0,NULL,1),.args=SELECT_Args},
+/* generic */
+{MAKE_CMD("copy","Copies the value of a key to a new key.","O(N) worst case for collections, where N is the number of nested items. O(1) for string values.","6.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,COPY_History,0,COPY_Tips,0,copyCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_KEYSPACE,COPY_Keyspecs,2,NULL,4),.args=COPY_Args},
+{MAKE_CMD("del","Deletes one or more keys.","O(N) where N is the number of keys that will be removed. When a key to remove holds a value other than a string, the individual complexity for this key is O(M) where M is the number of elements in the list, set, sorted set or hash. Removing a single key that holds a string value is O(1).","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,DEL_History,0,DEL_Tips,2,delCommand,-2,CMD_WRITE,ACL_CATEGORY_KEYSPACE,DEL_Keyspecs,1,NULL,1),.args=DEL_Args},
+{MAKE_CMD("dump","Returns a serialized representation of the value stored at a key.","O(1) to access the key and additional O(N*M) to serialize it, where N is the number of Redis objects composing the value and M their average size. For small string values the time complexity is thus O(1)+O(1*M) where M is small, so simply O(1).","2.6.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,DUMP_History,0,DUMP_Tips,1,dumpCommand,2,CMD_READONLY,ACL_CATEGORY_KEYSPACE,DUMP_Keyspecs,1,NULL,1),.args=DUMP_Args},
+{MAKE_CMD("exists","Determines whether one or more keys exist.","O(N) where N is the number of keys to check.","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,EXISTS_History,1,EXISTS_Tips,2,existsCommand,-2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,EXISTS_Keyspecs,1,NULL,1),.args=EXISTS_Args},
+{MAKE_CMD("expire","Sets the expiration time of a key in seconds.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,EXPIRE_History,1,EXPIRE_Tips,0,expireCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,EXPIRE_Keyspecs,1,NULL,3),.args=EXPIRE_Args},
+{MAKE_CMD("expireat","Sets the expiration time of a key to a Unix timestamp.","O(1)","1.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,EXPIREAT_History,1,EXPIREAT_Tips,0,expireatCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,EXPIREAT_Keyspecs,1,NULL,3),.args=EXPIREAT_Args},
+{MAKE_CMD("expiretime","Returns the expiration time of a key as a Unix timestamp.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,EXPIRETIME_History,0,EXPIRETIME_Tips,0,expiretimeCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,EXPIRETIME_Keyspecs,1,NULL,1),.args=EXPIRETIME_Args},
+{MAKE_CMD("keys","Returns all key names that match a pattern.","O(N) with N being the number of keys in the database, under the assumption that the key names in the database and the given pattern have limited length.","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,KEYS_History,0,KEYS_Tips,2,keysCommand,2,CMD_READONLY,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,KEYS_Keyspecs,0,NULL,1),.args=KEYS_Args},
+{MAKE_CMD("migrate","Atomically transfers a key from one Redis instance to another.","This command actually executes a DUMP+DEL in the source instance, and a RESTORE in the target instance. See the pages of these commands for time complexity. Also an O(N) data transfer between the two instances is performed.","2.6.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,MIGRATE_History,4,MIGRATE_Tips,1,migrateCommand,-6,CMD_WRITE,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,MIGRATE_Keyspecs,2,migrateGetKeys,9),.args=MIGRATE_Args},
+{MAKE_CMD("move","Moves a key to another database.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,MOVE_History,0,MOVE_Tips,0,moveCommand,3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,MOVE_Keyspecs,1,NULL,2),.args=MOVE_Args},
+{MAKE_CMD("object","A container for object introspection commands.","Depends on subcommand.","2.2.3",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,OBJECT_History,0,OBJECT_Tips,0,NULL,-2,0,0,OBJECT_Keyspecs,0,NULL,0),.subcommands=OBJECT_Subcommands},
+{MAKE_CMD("persist","Removes the expiration time of a key.","O(1)","2.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,PERSIST_History,0,PERSIST_Tips,0,persistCommand,2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,PERSIST_Keyspecs,1,NULL,1),.args=PERSIST_Args},
+{MAKE_CMD("pexpire","Sets the expiration time of a key in milliseconds.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,PEXPIRE_History,1,PEXPIRE_Tips,0,pexpireCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,PEXPIRE_Keyspecs,1,NULL,3),.args=PEXPIRE_Args},
+{MAKE_CMD("pexpireat","Sets the expiration time of a key to a Unix milliseconds timestamp.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,PEXPIREAT_History,1,PEXPIREAT_Tips,0,pexpireatCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,PEXPIREAT_Keyspecs,1,NULL,3),.args=PEXPIREAT_Args},
+{MAKE_CMD("pexpiretime","Returns the expiration time of a key as a Unix milliseconds timestamp.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,PEXPIRETIME_History,0,PEXPIRETIME_Tips,0,pexpiretimeCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,PEXPIRETIME_Keyspecs,1,NULL,1),.args=PEXPIRETIME_Args},
+{MAKE_CMD("pttl","Returns the expiration time in milliseconds of a key.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,PTTL_History,1,PTTL_Tips,1,pttlCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,PTTL_Keyspecs,1,NULL,1),.args=PTTL_Args},
+{MAKE_CMD("randomkey","Returns a random key name from the database.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,RANDOMKEY_History,0,RANDOMKEY_Tips,3,randomkeyCommand,1,CMD_READONLY|CMD_TOUCHES_ARBITRARY_KEYS,ACL_CATEGORY_KEYSPACE,RANDOMKEY_Keyspecs,0,NULL,0)},
+{MAKE_CMD("rename","Renames a key and overwrites the destination.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,RENAME_History,0,RENAME_Tips,0,renameCommand,3,CMD_WRITE,ACL_CATEGORY_KEYSPACE,RENAME_Keyspecs,2,NULL,2),.args=RENAME_Args},
+{MAKE_CMD("renamenx","Renames a key only when the target key name doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,RENAMENX_History,1,RENAMENX_Tips,0,renamenxCommand,3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,RENAMENX_Keyspecs,2,NULL,2),.args=RENAMENX_Args},
+{MAKE_CMD("restore","Creates a key from the serialized representation of a value.","O(1) to create the new key and additional O(N*M) to reconstruct the serialized value, where N is the number of Redis objects composing the value and M their average size. For small string values the time complexity is thus O(1)+O(1*M) where M is small, so simply O(1). However for sorted set values the complexity is O(N*M*log(N)) because inserting values into sorted sets is O(log(N)).","2.6.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,RESTORE_History,3,RESTORE_Tips,0,restoreCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,RESTORE_Keyspecs,1,NULL,7),.args=RESTORE_Args},
+{MAKE_CMD("scan","Iterates over the key names in the database.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,SCAN_History,1,SCAN_Tips,3,scanCommand,-2,CMD_READONLY|CMD_TOUCHES_ARBITRARY_KEYS,ACL_CATEGORY_KEYSPACE,SCAN_Keyspecs,0,NULL,4),.args=SCAN_Args},
+{MAKE_CMD("sort","Sorts the elements in a list, a set, or a sorted set, optionally storing the result.","O(N+M*log(M)) where N is the number of elements in the list or set to sort, and M the number of returned elements. When the elements are not sorted, complexity is O(N).","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,SORT_History,0,SORT_Tips,0,sortCommand,-2,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SET|ACL_CATEGORY_SORTEDSET|ACL_CATEGORY_LIST|ACL_CATEGORY_DANGEROUS,SORT_Keyspecs,3,sortGetKeys,7),.args=SORT_Args},
+{MAKE_CMD("sort_ro","Returns the sorted elements of a list, a set, or a sorted set.","O(N+M*log(M)) where N is the number of elements in the list or set to sort, and M the number of returned elements. When the elements are not sorted, complexity is O(N).","7.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,SORT_RO_History,0,SORT_RO_Tips,0,sortroCommand,-2,CMD_READONLY,ACL_CATEGORY_SET|ACL_CATEGORY_SORTEDSET|ACL_CATEGORY_LIST|ACL_CATEGORY_DANGEROUS,SORT_RO_Keyspecs,2,sortROGetKeys,6),.args=SORT_RO_Args},
+{MAKE_CMD("touch","Returns the number of existing keys out of those specified after updating the time they were last accessed.","O(N) where N is the number of keys that will be touched.","3.2.1",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,TOUCH_History,0,TOUCH_Tips,2,touchCommand,-2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,TOUCH_Keyspecs,1,NULL,1),.args=TOUCH_Args},
+{MAKE_CMD("ttl","Returns the expiration time in seconds of a key.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,TTL_History,1,TTL_Tips,1,ttlCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,TTL_Keyspecs,1,NULL,1),.args=TTL_Args},
+{MAKE_CMD("type","Determines the type of value stored at a key.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,TYPE_History,0,TYPE_Tips,0,typeCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,TYPE_Keyspecs,1,NULL,1),.args=TYPE_Args},
+{MAKE_CMD("unlink","Asynchronously deletes one or more keys.","O(1) for each key removed regardless of its size. Then the command does O(N) work in a different thread in order to reclaim memory, where N is the number of allocations the deleted objects where composed of.","4.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,UNLINK_History,0,UNLINK_Tips,2,unlinkCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,UNLINK_Keyspecs,1,NULL,1),.args=UNLINK_Args},
+{MAKE_CMD("wait","Blocks until the asynchronous replication of all preceding write commands sent by the connection is completed.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAIT_History,0,WAIT_Tips,2,waitCommand,3,0,ACL_CATEGORY_CONNECTION,WAIT_Keyspecs,0,NULL,2),.args=WAIT_Args},
+{MAKE_CMD("waitaof","Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the master and/or replicas.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAITAOF_History,0,WAITAOF_Tips,2,waitaofCommand,4,CMD_NOSCRIPT,ACL_CATEGORY_CONNECTION,WAITAOF_Keyspecs,0,NULL,3),.args=WAITAOF_Args},
+/* geo */
+{MAKE_CMD("geoadd","Adds one or more members to a geospatial index. The key is created if it doesn't exist.","O(log(N)) for each item added, where N is the number of elements in the sorted set.","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOADD_History,1,GEOADD_Tips,0,geoaddCommand,-5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEOADD_Keyspecs,1,NULL,4),.args=GEOADD_Args},
+{MAKE_CMD("geodist","Returns the distance between two members of a geospatial index.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEODIST_History,0,GEODIST_Tips,0,geodistCommand,-4,CMD_READONLY,ACL_CATEGORY_GEO,GEODIST_Keyspecs,1,NULL,4),.args=GEODIST_Args},
+{MAKE_CMD("geohash","Returns members from a geospatial index as geohash strings.","O(1) for each member requested.","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOHASH_History,0,GEOHASH_Tips,0,geohashCommand,-2,CMD_READONLY,ACL_CATEGORY_GEO,GEOHASH_Keyspecs,1,NULL,2),.args=GEOHASH_Args},
+{MAKE_CMD("geopos","Returns the longitude and latitude of members from a geospatial index.","O(1) for each member requested.","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOPOS_History,0,GEOPOS_Tips,0,geoposCommand,-2,CMD_READONLY,ACL_CATEGORY_GEO,GEOPOS_Keyspecs,1,NULL,2),.args=GEOPOS_Args},
+{MAKE_CMD("georadius","Queries a geospatial index for members within a distance from a coordinate, optionally stores the result.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.0",CMD_DOC_DEPRECATED,"`GEOSEARCH` and `GEOSEARCHSTORE` with the `BYRADIUS` argument","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUS_History,2,GEORADIUS_Tips,0,georadiusCommand,-6,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEORADIUS_Keyspecs,3,georadiusGetKeys,11),.args=GEORADIUS_Args},
+{MAKE_CMD("georadiusbymember","Queries a geospatial index for members within a distance from a member, optionally stores the result.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.0",CMD_DOC_DEPRECATED,"`GEOSEARCH` and `GEOSEARCHSTORE` with the `BYRADIUS` and `FROMMEMBER` arguments","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUSBYMEMBER_History,1,GEORADIUSBYMEMBER_Tips,0,georadiusbymemberCommand,-5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEORADIUSBYMEMBER_Keyspecs,3,georadiusGetKeys,10),.args=GEORADIUSBYMEMBER_Args},
+{MAKE_CMD("georadiusbymember_ro","Returns members from a geospatial index that are within a distance from a member.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.10",CMD_DOC_DEPRECATED,"`GEOSEARCH` with the `BYRADIUS` and `FROMMEMBER` arguments","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUSBYMEMBER_RO_History,0,GEORADIUSBYMEMBER_RO_Tips,0,georadiusbymemberroCommand,-5,CMD_READONLY,ACL_CATEGORY_GEO,GEORADIUSBYMEMBER_RO_Keyspecs,1,NULL,9),.args=GEORADIUSBYMEMBER_RO_Args},
+{MAKE_CMD("georadius_ro","Returns members from a geospatial index that are within a distance from a coordinate.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.10",CMD_DOC_DEPRECATED,"`GEOSEARCH` with the `BYRADIUS` argument","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUS_RO_History,1,GEORADIUS_RO_Tips,0,georadiusroCommand,-6,CMD_READONLY,ACL_CATEGORY_GEO,GEORADIUS_RO_Keyspecs,1,NULL,10),.args=GEORADIUS_RO_Args},
+{MAKE_CMD("geosearch","Queries a geospatial index for members inside an area of a box or a circle.","O(N+log(M)) where N is the number of elements in the grid-aligned bounding box area around the shape provided as the filter and M is the number of items inside the shape","6.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOSEARCH_History,1,GEOSEARCH_Tips,0,geosearchCommand,-7,CMD_READONLY,ACL_CATEGORY_GEO,GEOSEARCH_Keyspecs,1,NULL,8),.args=GEOSEARCH_Args},
+{MAKE_CMD("geosearchstore","Queries a geospatial index for members inside an area of a box or a circle, optionally stores the result.","O(N+log(M)) where N is the number of elements in the grid-aligned bounding box area around the shape provided as the filter and M is the number of items inside the shape","6.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOSEARCHSTORE_History,1,GEOSEARCHSTORE_Tips,0,geosearchstoreCommand,-8,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEOSEARCHSTORE_Keyspecs,2,NULL,7),.args=GEOSEARCHSTORE_Args},
+/* hash */
+{MAKE_CMD("hdel","Deletes one or more fields and their values from a hash. Deletes the hash if no fields remain.","O(N) where N is the number of fields to be removed.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HDEL_History,1,HDEL_Tips,0,hdelCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HDEL_Keyspecs,1,NULL,2),.args=HDEL_Args},
+{MAKE_CMD("hexists","Determines whether a field exists in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXISTS_History,0,HEXISTS_Tips,0,hexistsCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXISTS_Keyspecs,1,NULL,2),.args=HEXISTS_Args},
+{MAKE_CMD("hget","Returns the value of a field in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGET_History,0,HGET_Tips,0,hgetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGET_Keyspecs,1,NULL,2),.args=HGET_Args},
+{MAKE_CMD("hgetall","Returns all fields and values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETALL_History,0,HGETALL_Tips,1,hgetallCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HGETALL_Keyspecs,1,NULL,1),.args=HGETALL_Args},
+{MAKE_CMD("hincrby","Increments the integer value of a field in a hash by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBY_History,0,HINCRBY_Tips,0,hincrbyCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBY_Keyspecs,1,NULL,3),.args=HINCRBY_Args},
+{MAKE_CMD("hincrbyfloat","Increments the floating point value of a field by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBYFLOAT_History,0,HINCRBYFLOAT_Tips,0,hincrbyfloatCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBYFLOAT_Keyspecs,1,NULL,3),.args=HINCRBYFLOAT_Args},
+{MAKE_CMD("hkeys","Returns all fields in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HKEYS_History,0,HKEYS_Tips,1,hkeysCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HKEYS_Keyspecs,1,NULL,1),.args=HKEYS_Args},
+{MAKE_CMD("hlen","Returns the number of fields in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HLEN_History,0,HLEN_Tips,0,hlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HLEN_Keyspecs,1,NULL,1),.args=HLEN_Args},
+{MAKE_CMD("hmget","Returns the values of all fields in a hash.","O(N) where N is the number of fields being requested.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HMGET_History,0,HMGET_Tips,0,hmgetCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HMGET_Keyspecs,1,NULL,2),.args=HMGET_Args},
+{MAKE_CMD("hmset","Sets the values of multiple fields.","O(N) where N is the number of fields being set.","2.0.0",CMD_DOC_DEPRECATED,"`HSET` with multiple field-value pairs","4.0.0","hash",COMMAND_GROUP_HASH,HMSET_History,0,HMSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HMSET_Keyspecs,1,NULL,2),.args=HMSET_Args},
+{MAKE_CMD("hrandfield","Returns one or more random fields from a hash.","O(N) where N is the number of fields returned","6.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HRANDFIELD_History,0,HRANDFIELD_Tips,1,hrandfieldCommand,-2,CMD_READONLY,ACL_CATEGORY_HASH,HRANDFIELD_Keyspecs,1,NULL,2),.args=HRANDFIELD_Args},
+{MAKE_CMD("hscan","Iterates over fields and values of a hash.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSCAN_History,0,HSCAN_Tips,1,hscanCommand,-3,CMD_READONLY,ACL_CATEGORY_HASH,HSCAN_Keyspecs,1,NULL,4),.args=HSCAN_Args},
+{MAKE_CMD("hset","Creates or modifies the value of a field in a hash.","O(1) for each field/value pair added, so O(N) to add N field/value pairs when the command is called with multiple field/value pairs.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSET_History,1,HSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSET_Keyspecs,1,NULL,2),.args=HSET_Args},
+{MAKE_CMD("hsetnx","Sets the value of a field in a hash only when the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSETNX_History,0,HSETNX_Tips,0,hsetnxCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSETNX_Keyspecs,1,NULL,3),.args=HSETNX_Args},
+{MAKE_CMD("hstrlen","Returns the length of the value of a field.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSTRLEN_History,0,HSTRLEN_Tips,0,hstrlenCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HSTRLEN_Keyspecs,1,NULL,2),.args=HSTRLEN_Args},
+{MAKE_CMD("hvals","Returns all values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HVALS_History,0,HVALS_Tips,1,hvalsCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HVALS_Keyspecs,1,NULL,1),.args=HVALS_Args},
+/* hyperloglog */
+{MAKE_CMD("pfadd","Adds elements to a HyperLogLog key. Creates the key if it doesn't exist.","O(1) to add every element.","2.8.9",CMD_DOC_NONE,NULL,NULL,"hyperloglog",COMMAND_GROUP_HYPERLOGLOG,PFADD_History,0,PFADD_Tips,0,pfaddCommand,-2,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HYPERLOGLOG,PFADD_Keyspecs,1,NULL,2),.args=PFADD_Args},
+{MAKE_CMD("pfcount","Returns the approximated cardinality of the set(s) observed by the HyperLogLog key(s).","O(1) with a very small average constant time when called with a single key. O(N) with N being the number of keys, and much bigger constant times, when called with multiple keys.","2.8.9",CMD_DOC_NONE,NULL,NULL,"hyperloglog",COMMAND_GROUP_HYPERLOGLOG,PFCOUNT_History,0,PFCOUNT_Tips,0,pfcountCommand,-2,CMD_READONLY|CMD_MAY_REPLICATE,ACL_CATEGORY_HYPERLOGLOG,PFCOUNT_Keyspecs,1,NULL,1),.args=PFCOUNT_Args},
+{MAKE_CMD("pfdebug","Internal commands for debugging HyperLogLog values.","N/A","2.8.9",CMD_DOC_SYSCMD,NULL,NULL,"hyperloglog",COMMAND_GROUP_HYPERLOGLOG,PFDEBUG_History,0,PFDEBUG_Tips,0,pfdebugCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_ADMIN,ACL_CATEGORY_HYPERLOGLOG,PFDEBUG_Keyspecs,1,NULL,2),.args=PFDEBUG_Args},
+{MAKE_CMD("pfmerge","Merges one or more HyperLogLog values into a single key.","O(N) to merge N HyperLogLogs, but with high constant times.","2.8.9",CMD_DOC_NONE,NULL,NULL,"hyperloglog",COMMAND_GROUP_HYPERLOGLOG,PFMERGE_History,0,PFMERGE_Tips,0,pfmergeCommand,-2,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_HYPERLOGLOG,PFMERGE_Keyspecs,2,NULL,2),.args=PFMERGE_Args},
+{MAKE_CMD("pfselftest","An internal command for testing HyperLogLog values.","N/A","2.8.9",CMD_DOC_SYSCMD,NULL,NULL,"hyperloglog",COMMAND_GROUP_HYPERLOGLOG,PFSELFTEST_History,0,PFSELFTEST_Tips,0,pfselftestCommand,1,CMD_ADMIN,ACL_CATEGORY_HYPERLOGLOG,PFSELFTEST_Keyspecs,0,NULL,0)},
+/* list */
+{MAKE_CMD("blmove","Pops an element from a list, pushes it to another list and returns it. Blocks until an element is available otherwise. Deletes the list if the last element was moved.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,BLMOVE_History,0,BLMOVE_Tips,0,blmoveCommand,6,CMD_WRITE|CMD_DENYOOM|CMD_BLOCKING,ACL_CATEGORY_LIST,BLMOVE_Keyspecs,2,NULL,5),.args=BLMOVE_Args},
+{MAKE_CMD("blmpop","Pops the first element from one of multiple lists. Blocks until an element is available otherwise. Deletes the list if the last element was popped.","O(N+M) where N is the number of provided keys and M is the number of elements returned.","7.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,BLMPOP_History,0,BLMPOP_Tips,0,blmpopCommand,-5,CMD_WRITE|CMD_BLOCKING,ACL_CATEGORY_LIST,BLMPOP_Keyspecs,1,blmpopGetKeys,5),.args=BLMPOP_Args},
+{MAKE_CMD("blpop","Removes and returns the first element in a list. Blocks until an element is available otherwise. Deletes the list if the last element was popped.","O(N) where N is the number of provided keys.","2.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,BLPOP_History,1,BLPOP_Tips,0,blpopCommand,-3,CMD_WRITE|CMD_BLOCKING,ACL_CATEGORY_LIST,BLPOP_Keyspecs,1,NULL,2),.args=BLPOP_Args},
+{MAKE_CMD("brpop","Removes and returns the last element in a list. Blocks until an element is available otherwise. Deletes the list if the last element was popped.","O(N) where N is the number of provided keys.","2.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,BRPOP_History,1,BRPOP_Tips,0,brpopCommand,-3,CMD_WRITE|CMD_BLOCKING,ACL_CATEGORY_LIST,BRPOP_Keyspecs,1,NULL,2),.args=BRPOP_Args},
+{MAKE_CMD("brpoplpush","Pops an element from a list, pushes it to another list and returns it. Block until an element is available otherwise. Deletes the list if the last element was popped.","O(1)","2.2.0",CMD_DOC_DEPRECATED,"`BLMOVE` with the `RIGHT` and `LEFT` arguments","6.2.0","list",COMMAND_GROUP_LIST,BRPOPLPUSH_History,1,BRPOPLPUSH_Tips,0,brpoplpushCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_BLOCKING,ACL_CATEGORY_LIST,BRPOPLPUSH_Keyspecs,2,NULL,3),.args=BRPOPLPUSH_Args},
+{MAKE_CMD("lindex","Returns an element from a list by its index.","O(N) where N is the number of elements to traverse to get to the element at index. This makes asking for the first or the last element of the list O(1).","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LINDEX_History,0,LINDEX_Tips,0,lindexCommand,3,CMD_READONLY,ACL_CATEGORY_LIST,LINDEX_Keyspecs,1,NULL,2),.args=LINDEX_Args},
+{MAKE_CMD("linsert","Inserts an element before or after another element in a list.","O(N) where N is the number of elements to traverse before seeing the value pivot. This means that inserting somewhere on the left end on the list (head) can be considered O(1) and inserting somewhere on the right end (tail) is O(N).","2.2.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LINSERT_History,0,LINSERT_Tips,0,linsertCommand,5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_LIST,LINSERT_Keyspecs,1,NULL,4),.args=LINSERT_Args},
+{MAKE_CMD("llen","Returns the length of a list.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LLEN_History,0,LLEN_Tips,0,llenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_LIST,LLEN_Keyspecs,1,NULL,1),.args=LLEN_Args},
+{MAKE_CMD("lmove","Returns an element after popping it from one list and pushing it to another. Deletes the list if the last element was moved.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LMOVE_History,0,LMOVE_Tips,0,lmoveCommand,5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_LIST,LMOVE_Keyspecs,2,NULL,4),.args=LMOVE_Args},
+{MAKE_CMD("lmpop","Returns multiple elements from a list after removing them. Deletes the list if the last element was popped.","O(N+M) where N is the number of provided keys and M is the number of elements returned.","7.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LMPOP_History,0,LMPOP_Tips,0,lmpopCommand,-4,CMD_WRITE,ACL_CATEGORY_LIST,LMPOP_Keyspecs,1,lmpopGetKeys,4),.args=LMPOP_Args},
+{MAKE_CMD("lpop","Returns the first elements in a list after removing it. Deletes the list if the last element was popped.","O(N) where N is the number of elements returned","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LPOP_History,1,LPOP_Tips,0,lpopCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_LIST,LPOP_Keyspecs,1,NULL,2),.args=LPOP_Args},
+{MAKE_CMD("lpos","Returns the index of matching elements in a list.","O(N) where N is the number of elements in the list, for the average case. When searching for elements near the head or the tail of the list, or when the MAXLEN option is provided, the command may run in constant time.","6.0.6",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LPOS_History,0,LPOS_Tips,0,lposCommand,-3,CMD_READONLY,ACL_CATEGORY_LIST,LPOS_Keyspecs,1,NULL,5),.args=LPOS_Args},
+{MAKE_CMD("lpush","Prepends one or more elements to a list. Creates the key if it doesn't exist.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LPUSH_History,1,LPUSH_Tips,0,lpushCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_LIST,LPUSH_Keyspecs,1,NULL,2),.args=LPUSH_Args},
+{MAKE_CMD("lpushx","Prepends one or more elements to a list only when the list exists.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","2.2.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LPUSHX_History,1,LPUSHX_Tips,0,lpushxCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_LIST,LPUSHX_Keyspecs,1,NULL,2),.args=LPUSHX_Args},
+{MAKE_CMD("lrange","Returns a range of elements from a list.","O(S+N) where S is the distance of start offset from HEAD for small lists, from nearest end (HEAD or TAIL) for large lists; and N is the number of elements in the specified range.","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LRANGE_History,0,LRANGE_Tips,0,lrangeCommand,4,CMD_READONLY,ACL_CATEGORY_LIST,LRANGE_Keyspecs,1,NULL,3),.args=LRANGE_Args},
+{MAKE_CMD("lrem","Removes elements from a list. Deletes the list if the last element was removed.","O(N+M) where N is the length of the list and M is the number of elements removed.","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LREM_History,0,LREM_Tips,0,lremCommand,4,CMD_WRITE,ACL_CATEGORY_LIST,LREM_Keyspecs,1,NULL,3),.args=LREM_Args},
+{MAKE_CMD("lset","Sets the value of an element in a list by its index.","O(N) where N is the length of the list. Setting either the first or the last element of the list is O(1).","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LSET_History,0,LSET_Tips,0,lsetCommand,4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_LIST,LSET_Keyspecs,1,NULL,3),.args=LSET_Args},
+{MAKE_CMD("ltrim","Removes elements from both ends a list. Deletes the list if all elements were trimmed.","O(N) where N is the number of elements to be removed by the operation.","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,LTRIM_History,0,LTRIM_Tips,0,ltrimCommand,4,CMD_WRITE,ACL_CATEGORY_LIST,LTRIM_Keyspecs,1,NULL,3),.args=LTRIM_Args},
+{MAKE_CMD("rpop","Returns and removes the last elements of a list. Deletes the list if the last element was popped.","O(N) where N is the number of elements returned","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,RPOP_History,1,RPOP_Tips,0,rpopCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_LIST,RPOP_Keyspecs,1,NULL,2),.args=RPOP_Args},
+{MAKE_CMD("rpoplpush","Returns the last element of a list after removing and pushing it to another list. Deletes the list if the last element was popped.","O(1)","1.2.0",CMD_DOC_DEPRECATED,"`LMOVE` with the `RIGHT` and `LEFT` arguments","6.2.0","list",COMMAND_GROUP_LIST,RPOPLPUSH_History,0,RPOPLPUSH_Tips,0,rpoplpushCommand,3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_LIST,RPOPLPUSH_Keyspecs,2,NULL,2),.args=RPOPLPUSH_Args},
+{MAKE_CMD("rpush","Appends one or more elements to a list. Creates the key if it doesn't exist.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,RPUSH_History,1,RPUSH_Tips,0,rpushCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_LIST,RPUSH_Keyspecs,1,NULL,2),.args=RPUSH_Args},
+{MAKE_CMD("rpushx","Appends an element to a list only when the list exists.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","2.2.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,RPUSHX_History,1,RPUSHX_Tips,0,rpushxCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_LIST,RPUSHX_Keyspecs,1,NULL,2),.args=RPUSHX_Args},
+/* pubsub */
+{MAKE_CMD("psubscribe","Listens for messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PSUBSCRIBE_History,0,PSUBSCRIBE_Tips,0,psubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,PSUBSCRIBE_Keyspecs,0,NULL,1),.args=PSUBSCRIBE_Args},
+{MAKE_CMD("publish","Posts a message to a channel.","O(N+M) where N is the number of clients subscribed to the receiving channel and M is the total number of subscribed patterns (by any client).","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBLISH_History,0,PUBLISH_Tips,0,publishCommand,3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE|CMD_SENTINEL,0,PUBLISH_Keyspecs,0,NULL,2),.args=PUBLISH_Args},
+{MAKE_CMD("pubsub","A container for Pub/Sub commands.","Depends on subcommand.","2.8.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_History,0,PUBSUB_Tips,0,NULL,-2,0,0,PUBSUB_Keyspecs,0,NULL,0),.subcommands=PUBSUB_Subcommands},
+{MAKE_CMD("punsubscribe","Stops listening to messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to unsubscribe.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUNSUBSCRIBE_History,0,PUNSUBSCRIBE_Tips,0,punsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,PUNSUBSCRIBE_Keyspecs,0,NULL,1),.args=PUNSUBSCRIBE_Args},
+{MAKE_CMD("spublish","Post a message to a shard channel","O(N) where N is the number of clients subscribed to the receiving shard channel.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SPUBLISH_History,0,SPUBLISH_Tips,0,spublishCommand,3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE,0,SPUBLISH_Keyspecs,1,NULL,2),.args=SPUBLISH_Args},
+{MAKE_CMD("ssubscribe","Listens for messages published to shard channels.","O(N) where N is the number of shard channels to subscribe to.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SSUBSCRIBE_History,0,SSUBSCRIBE_Tips,0,ssubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SSUBSCRIBE_Keyspecs,1,NULL,1),.args=SSUBSCRIBE_Args},
+{MAKE_CMD("subscribe","Listens for messages published to channels.","O(N) where N is the number of channels to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUBSCRIBE_History,0,SUBSCRIBE_Tips,0,subscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,SUBSCRIBE_Keyspecs,0,NULL,1),.args=SUBSCRIBE_Args},
+{MAKE_CMD("sunsubscribe","Stops listening to messages posted to shard channels.","O(N) where N is the number of shard channels to unsubscribe.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUNSUBSCRIBE_History,0,SUNSUBSCRIBE_Tips,0,sunsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SUNSUBSCRIBE_Keyspecs,1,NULL,1),.args=SUNSUBSCRIBE_Args},
+{MAKE_CMD("unsubscribe","Stops listening to messages posted to channels.","O(N) where N is the number of channels to unsubscribe.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,UNSUBSCRIBE_History,0,UNSUBSCRIBE_Tips,0,unsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,UNSUBSCRIBE_Keyspecs,0,NULL,1),.args=UNSUBSCRIBE_Args},
+/* scripting */
+{MAKE_CMD("eval","Executes a server-side Lua script.","Depends on the script that is executed.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,EVAL_History,0,EVAL_Tips,0,evalCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_MAY_REPLICATE|CMD_NO_MANDATORY_KEYS|CMD_STALE,ACL_CATEGORY_SCRIPTING,EVAL_Keyspecs,1,evalGetKeys,4),.args=EVAL_Args},
+{MAKE_CMD("evalsha","Executes a server-side Lua script by SHA1 digest.","Depends on the script that is executed.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,EVALSHA_History,0,EVALSHA_Tips,0,evalShaCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_MAY_REPLICATE|CMD_NO_MANDATORY_KEYS|CMD_STALE,ACL_CATEGORY_SCRIPTING,EVALSHA_Keyspecs,1,evalGetKeys,4),.args=EVALSHA_Args},
+{MAKE_CMD("evalsha_ro","Executes a read-only server-side Lua script by SHA1 digest.","Depends on the script that is executed.","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,EVALSHA_RO_History,0,EVALSHA_RO_Tips,0,evalShaRoCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_NO_MANDATORY_KEYS|CMD_STALE|CMD_READONLY,ACL_CATEGORY_SCRIPTING,EVALSHA_RO_Keyspecs,1,evalGetKeys,4),.args=EVALSHA_RO_Args},
+{MAKE_CMD("eval_ro","Executes a read-only server-side Lua script.","Depends on the script that is executed.","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,EVAL_RO_History,0,EVAL_RO_Tips,0,evalRoCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_NO_MANDATORY_KEYS|CMD_STALE|CMD_READONLY,ACL_CATEGORY_SCRIPTING,EVAL_RO_Keyspecs,1,evalGetKeys,4),.args=EVAL_RO_Args},
+{MAKE_CMD("fcall","Invokes a function.","Depends on the function that is executed.","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FCALL_History,0,FCALL_Tips,0,fcallCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_MAY_REPLICATE|CMD_NO_MANDATORY_KEYS|CMD_STALE,ACL_CATEGORY_SCRIPTING,FCALL_Keyspecs,1,functionGetKeys,4),.args=FCALL_Args},
+{MAKE_CMD("fcall_ro","Invokes a read-only function.","Depends on the function that is executed.","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FCALL_RO_History,0,FCALL_RO_Tips,0,fcallroCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_NO_MANDATORY_KEYS|CMD_STALE|CMD_READONLY,ACL_CATEGORY_SCRIPTING,FCALL_RO_Keyspecs,1,functionGetKeys,4),.args=FCALL_RO_Args},
+{MAKE_CMD("function","A container for function commands.","Depends on subcommand.","7.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,FUNCTION_History,0,FUNCTION_Tips,0,NULL,-2,0,0,FUNCTION_Keyspecs,0,NULL,0),.subcommands=FUNCTION_Subcommands},
+{MAKE_CMD("script","A container for Lua scripts management commands.","Depends on subcommand.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_History,0,SCRIPT_Tips,0,NULL,-2,0,0,SCRIPT_Keyspecs,0,NULL,0),.subcommands=SCRIPT_Subcommands},
+/* sentinel */
+{MAKE_CMD("sentinel","A container for Redis Sentinel commands.","Depends on subcommand.","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_History,0,SENTINEL_Tips,0,NULL,-2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_Keyspecs,0,NULL,0),.subcommands=SENTINEL_Subcommands},
+/* server */
+{MAKE_CMD("acl","A container for Access List Control commands.","Depends on subcommand.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_History,0,ACL_Tips,0,NULL,-2,CMD_SENTINEL,0,ACL_Keyspecs,0,NULL,0),.subcommands=ACL_Subcommands},
+{MAKE_CMD("bgrewriteaof","Asynchronously rewrites the append-only file to disk.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,BGREWRITEAOF_History,0,BGREWRITEAOF_Tips,0,bgrewriteaofCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT,0,BGREWRITEAOF_Keyspecs,0,NULL,0)},
+{MAKE_CMD("bgsave","Asynchronously saves the database(s) to disk.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,BGSAVE_History,1,BGSAVE_Tips,0,bgsaveCommand,-1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT,0,BGSAVE_Keyspecs,0,NULL,1),.args=BGSAVE_Args},
+{MAKE_CMD("command","Returns detailed information about all commands.","O(N) where N is the total number of Redis commands","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,COMMAND_History,0,COMMAND_Tips,1,commandCommand,-1,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,COMMAND_Keyspecs,0,NULL,0),.subcommands=COMMAND_Subcommands},
+{MAKE_CMD("config","A container for server configuration commands.","Depends on subcommand.","2.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_History,0,CONFIG_Tips,0,NULL,-2,0,0,CONFIG_Keyspecs,0,NULL,0),.subcommands=CONFIG_Subcommands},
+{MAKE_CMD("dbsize","Returns the number of keys in the database.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,DBSIZE_History,0,DBSIZE_Tips,2,dbsizeCommand,1,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,DBSIZE_Keyspecs,0,NULL,0)},
+{MAKE_CMD("debug","A container for debugging commands.","Depends on subcommand.","1.0.0",CMD_DOC_SYSCMD,NULL,NULL,"server",COMMAND_GROUP_SERVER,DEBUG_History,0,DEBUG_Tips,0,debugCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_PROTECTED,0,DEBUG_Keyspecs,0,NULL,0)},
+{MAKE_CMD("failover","Starts a coordinated failover from a server to one of its replicas.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,FAILOVER_History,0,FAILOVER_Tips,0,failoverCommand,-1,CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,FAILOVER_Keyspecs,0,NULL,3),.args=FAILOVER_Args},
+{MAKE_CMD("flushall","Removes all keys from all databases.","O(N) where N is the total number of keys in all databases","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,FLUSHALL_History,2,FLUSHALL_Tips,2,flushallCommand,-1,CMD_WRITE,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,FLUSHALL_Keyspecs,0,NULL,1),.args=FLUSHALL_Args},
+{MAKE_CMD("flushdb","Remove all keys from the current database.","O(N) where N is the number of keys in the selected database","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,FLUSHDB_History,2,FLUSHDB_Tips,2,flushdbCommand,-1,CMD_WRITE,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,FLUSHDB_Keyspecs,0,NULL,1),.args=FLUSHDB_Args},
+{MAKE_CMD("info","Returns information and statistics about the server.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,INFO_History,1,INFO_Tips,3,infoCommand,-1,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_DANGEROUS,INFO_Keyspecs,0,NULL,1),.args=INFO_Args},
+{MAKE_CMD("lastsave","Returns the Unix timestamp of the last successful save to disk.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,LASTSAVE_History,0,LASTSAVE_Tips,1,lastsaveCommand,1,CMD_LOADING|CMD_STALE|CMD_FAST,ACL_CATEGORY_ADMIN|ACL_CATEGORY_DANGEROUS,LASTSAVE_Keyspecs,0,NULL,0)},
+{MAKE_CMD("latency","A container for latency diagnostics commands.","Depends on subcommand.","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,LATENCY_History,0,LATENCY_Tips,0,NULL,-2,0,0,LATENCY_Keyspecs,0,NULL,0),.subcommands=LATENCY_Subcommands},
+{MAKE_CMD("lolwut","Displays computer art and the Redis version",NULL,"5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,LOLWUT_History,0,LOLWUT_Tips,0,lolwutCommand,-1,CMD_READONLY|CMD_FAST,0,LOLWUT_Keyspecs,0,NULL,1),.args=LOLWUT_Args},
+{MAKE_CMD("memory","A container for memory diagnostics commands.","Depends on subcommand.","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MEMORY_History,0,MEMORY_Tips,0,NULL,-2,0,0,MEMORY_Keyspecs,0,NULL,0),.subcommands=MEMORY_Subcommands},
+{MAKE_CMD("module","A container for module commands.","Depends on subcommand.","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MODULE_History,0,MODULE_Tips,0,NULL,-2,0,0,MODULE_Keyspecs,0,NULL,0),.subcommands=MODULE_Subcommands},
+{MAKE_CMD("monitor","Listens for all requests received by the server in real-time.",NULL,"1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MONITOR_History,0,MONITOR_Tips,0,monitorCommand,1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,MONITOR_Keyspecs,0,NULL,0)},
+{MAKE_CMD("psync","An internal command used in replication.",NULL,"2.8.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,PSYNC_History,0,PSYNC_Tips,0,syncCommand,-3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NO_MULTI|CMD_NOSCRIPT,0,PSYNC_Keyspecs,0,NULL,2),.args=PSYNC_Args},
+{MAKE_CMD("replconf","An internal command for configuring the replication stream.","O(1)","3.0.0",CMD_DOC_SYSCMD,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLCONF_History,0,REPLCONF_Tips,0,replconfCommand,-1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_ALLOW_BUSY,0,REPLCONF_Keyspecs,0,NULL,0)},
+{MAKE_CMD("replicaof","Configures a server as replica of another, or promotes it to a master.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLICAOF_History,0,REPLICAOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,REPLICAOF_Keyspecs,0,NULL,1),.args=REPLICAOF_Args},
+{MAKE_CMD("restore-asking","An internal command for migrating keys in a cluster.","O(1) to create the new key and additional O(N*M) to reconstruct the serialized value, where N is the number of Redis objects composing the value and M their average size. For small string values the time complexity is thus O(1)+O(1*M) where M is small, so simply O(1). However for sorted set values the complexity is O(N*M*log(N)) because inserting values into sorted sets is O(log(N)).","3.0.0",CMD_DOC_SYSCMD,NULL,NULL,"server",COMMAND_GROUP_SERVER,RESTORE_ASKING_History,3,RESTORE_ASKING_Tips,0,restoreCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_ASKING,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,RESTORE_ASKING_Keyspecs,1,NULL,7),.args=RESTORE_ASKING_Args},
+{MAKE_CMD("role","Returns the replication role.","O(1)","2.8.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ROLE_History,0,ROLE_Tips,0,roleCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_SENTINEL,ACL_CATEGORY_ADMIN|ACL_CATEGORY_DANGEROUS,ROLE_Keyspecs,0,NULL,0)},
+{MAKE_CMD("save","Synchronously saves the database(s) to disk.","O(N) where N is the total number of keys in all databases","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SAVE_History,0,SAVE_Tips,0,saveCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_NO_MULTI,0,SAVE_Keyspecs,0,NULL,0)},
+{MAKE_CMD("shutdown","Synchronously saves the database(s) to disk and shuts down the Redis server.","O(N) when saving, where N is the total number of keys in all databases when saving data, otherwise O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SHUTDOWN_History,1,SHUTDOWN_Tips,0,shutdownCommand,-1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_NO_MULTI|CMD_SENTINEL|CMD_ALLOW_BUSY,0,SHUTDOWN_Keyspecs,0,NULL,4),.args=SHUTDOWN_Args},
+{MAKE_CMD("slaveof","Sets a Redis server as a replica of another, or promotes it to being a master.","O(1)","1.0.0",CMD_DOC_DEPRECATED,"`REPLICAOF`","5.0.0","server",COMMAND_GROUP_SERVER,SLAVEOF_History,0,SLAVEOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,SLAVEOF_Keyspecs,0,NULL,1),.args=SLAVEOF_Args},
+{MAKE_CMD("slowlog","A container for slow log commands.","Depends on subcommand.","2.2.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SLOWLOG_History,0,SLOWLOG_Tips,0,NULL,-2,0,0,SLOWLOG_Keyspecs,0,NULL,0),.subcommands=SLOWLOG_Subcommands},
+{MAKE_CMD("swapdb","Swaps two Redis databases.","O(N) where N is the count of clients watching or blocking on keys from both databases.","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SWAPDB_History,0,SWAPDB_Tips,0,swapdbCommand,3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,SWAPDB_Keyspecs,0,NULL,2),.args=SWAPDB_Args},
+{MAKE_CMD("sync","An internal command used in replication.",NULL,"1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SYNC_History,0,SYNC_Tips,0,syncCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NO_MULTI|CMD_NOSCRIPT,0,SYNC_Keyspecs,0,NULL,0)},
+{MAKE_CMD("time","Returns the server time.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,TIME_History,0,TIME_Tips,1,timeCommand,1,CMD_LOADING|CMD_STALE|CMD_FAST,0,TIME_Keyspecs,0,NULL,0)},
+/* set */
+{MAKE_CMD("sadd","Adds one or more members to a set. Creates the key if it doesn't exist.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SADD_History,1,SADD_Tips,0,saddCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_SET,SADD_Keyspecs,1,NULL,2),.args=SADD_Args},
+{MAKE_CMD("scard","Returns the number of members in a set.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SCARD_History,0,SCARD_Tips,0,scardCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SET,SCARD_Keyspecs,1,NULL,1),.args=SCARD_Args},
+{MAKE_CMD("sdiff","Returns the difference of multiple sets.","O(N) where N is the total number of elements in all given sets.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SDIFF_History,0,SDIFF_Tips,1,sdiffCommand,-2,CMD_READONLY,ACL_CATEGORY_SET,SDIFF_Keyspecs,1,NULL,1),.args=SDIFF_Args},
+{MAKE_CMD("sdiffstore","Stores the difference of multiple sets in a key.","O(N) where N is the total number of elements in all given sets.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SDIFFSTORE_History,0,SDIFFSTORE_Tips,0,sdiffstoreCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SET,SDIFFSTORE_Keyspecs,2,NULL,2),.args=SDIFFSTORE_Args},
+{MAKE_CMD("sinter","Returns the intersect of multiple sets.","O(N*M) worst case where N is the cardinality of the smallest set and M is the number of sets.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SINTER_History,0,SINTER_Tips,1,sinterCommand,-2,CMD_READONLY,ACL_CATEGORY_SET,SINTER_Keyspecs,1,NULL,1),.args=SINTER_Args},
+{MAKE_CMD("sintercard","Returns the number of members of the intersect of multiple sets.","O(N*M) worst case where N is the cardinality of the smallest set and M is the number of sets.","7.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SINTERCARD_History,0,SINTERCARD_Tips,0,sinterCardCommand,-3,CMD_READONLY,ACL_CATEGORY_SET,SINTERCARD_Keyspecs,1,sintercardGetKeys,3),.args=SINTERCARD_Args},
+{MAKE_CMD("sinterstore","Stores the intersect of multiple sets in a key.","O(N*M) worst case where N is the cardinality of the smallest set and M is the number of sets.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SINTERSTORE_History,0,SINTERSTORE_Tips,0,sinterstoreCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SET,SINTERSTORE_Keyspecs,2,NULL,2),.args=SINTERSTORE_Args},
+{MAKE_CMD("sismember","Determines whether a member belongs to a set.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SISMEMBER_History,0,SISMEMBER_Tips,0,sismemberCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SET,SISMEMBER_Keyspecs,1,NULL,2),.args=SISMEMBER_Args},
+{MAKE_CMD("smembers","Returns all members of a set.","O(N) where N is the set cardinality.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SMEMBERS_History,0,SMEMBERS_Tips,1,sinterCommand,2,CMD_READONLY,ACL_CATEGORY_SET,SMEMBERS_Keyspecs,1,NULL,1),.args=SMEMBERS_Args},
+{MAKE_CMD("smismember","Determines whether multiple members belong to a set.","O(N) where N is the number of elements being checked for membership","6.2.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SMISMEMBER_History,0,SMISMEMBER_Tips,0,smismemberCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SET,SMISMEMBER_Keyspecs,1,NULL,2),.args=SMISMEMBER_Args},
+{MAKE_CMD("smove","Moves a member from one set to another.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SMOVE_History,0,SMOVE_Tips,0,smoveCommand,4,CMD_WRITE|CMD_FAST,ACL_CATEGORY_SET,SMOVE_Keyspecs,2,NULL,3),.args=SMOVE_Args},
+{MAKE_CMD("spop","Returns one or more random members from a set after removing them. Deletes the set if the last member was popped.","Without the count argument O(1), otherwise O(N) where N is the value of the passed count.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SPOP_History,1,SPOP_Tips,1,spopCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_SET,SPOP_Keyspecs,1,NULL,2),.args=SPOP_Args},
+{MAKE_CMD("srandmember","Get one or multiple random members from a set","Without the count argument O(1), otherwise O(N) where N is the absolute value of the passed count.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SRANDMEMBER_History,1,SRANDMEMBER_Tips,1,srandmemberCommand,-2,CMD_READONLY,ACL_CATEGORY_SET,SRANDMEMBER_Keyspecs,1,NULL,2),.args=SRANDMEMBER_Args},
+{MAKE_CMD("srem","Removes one or more members from a set. Deletes the set if the last member was removed.","O(N) where N is the number of members to be removed.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SREM_History,1,SREM_Tips,0,sremCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_SET,SREM_Keyspecs,1,NULL,2),.args=SREM_Args},
+{MAKE_CMD("sscan","Iterates over members of a set.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SSCAN_History,0,SSCAN_Tips,1,sscanCommand,-3,CMD_READONLY,ACL_CATEGORY_SET,SSCAN_Keyspecs,1,NULL,4),.args=SSCAN_Args},
+{MAKE_CMD("sunion","Returns the union of multiple sets.","O(N) where N is the total number of elements in all given sets.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SUNION_History,0,SUNION_Tips,1,sunionCommand,-2,CMD_READONLY,ACL_CATEGORY_SET,SUNION_Keyspecs,1,NULL,1),.args=SUNION_Args},
+{MAKE_CMD("sunionstore","Stores the union of multiple sets in a key.","O(N) where N is the total number of elements in all given sets.","1.0.0",CMD_DOC_NONE,NULL,NULL,"set",COMMAND_GROUP_SET,SUNIONSTORE_History,0,SUNIONSTORE_Tips,0,sunionstoreCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SET,SUNIONSTORE_Keyspecs,2,NULL,2),.args=SUNIONSTORE_Args},
+/* sorted_set */
+{MAKE_CMD("bzmpop","Removes and returns a member by score from one or more sorted sets. Blocks until a member is available otherwise. Deletes the sorted set if the last element was popped.","O(K) + O(M*log(N)) where K is the number of provided keys, N being the number of elements in the sorted set, and M being the number of elements popped.","7.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,BZMPOP_History,0,BZMPOP_Tips,0,bzmpopCommand,-5,CMD_WRITE|CMD_BLOCKING,ACL_CATEGORY_SORTEDSET,BZMPOP_Keyspecs,1,blmpopGetKeys,5),.args=BZMPOP_Args},
+{MAKE_CMD("bzpopmax","Removes and returns the member with the highest score from one or more sorted sets. Blocks until a member available otherwise. Deletes the sorted set if the last element was popped.","O(log(N)) with N being the number of elements in the sorted set.","5.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,BZPOPMAX_History,1,BZPOPMAX_Tips,0,bzpopmaxCommand,-3,CMD_WRITE|CMD_FAST|CMD_BLOCKING,ACL_CATEGORY_SORTEDSET,BZPOPMAX_Keyspecs,1,NULL,2),.args=BZPOPMAX_Args},
+{MAKE_CMD("bzpopmin","Removes and returns the member with the lowest score from one or more sorted sets. Blocks until a member is available otherwise. Deletes the sorted set if the last element was popped.","O(log(N)) with N being the number of elements in the sorted set.","5.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,BZPOPMIN_History,1,BZPOPMIN_Tips,0,bzpopminCommand,-3,CMD_WRITE|CMD_FAST|CMD_BLOCKING,ACL_CATEGORY_SORTEDSET,BZPOPMIN_Keyspecs,1,NULL,2),.args=BZPOPMIN_Args},
+{MAKE_CMD("zadd","Adds one or more members to a sorted set, or updates their scores. Creates the key if it doesn't exist.","O(log(N)) for each item added, where N is the number of elements in the sorted set.","1.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZADD_History,3,ZADD_Tips,0,zaddCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZADD_Keyspecs,1,NULL,6),.args=ZADD_Args},
+{MAKE_CMD("zcard","Returns the number of members in a sorted set.","O(1)","1.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZCARD_History,0,ZCARD_Tips,0,zcardCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZCARD_Keyspecs,1,NULL,1),.args=ZCARD_Args},
+{MAKE_CMD("zcount","Returns the count of members in a sorted set that have scores within a range.","O(log(N)) with N being the number of elements in the sorted set.","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZCOUNT_History,0,ZCOUNT_Tips,0,zcountCommand,4,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZCOUNT_Keyspecs,1,NULL,3),.args=ZCOUNT_Args},
+{MAKE_CMD("zdiff","Returns the difference between multiple sorted sets.","O(L + (N-K)log(N)) worst case where L is the total number of elements in all the sets, N is the size of the first set, and K is the size of the result set.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZDIFF_History,0,ZDIFF_Tips,0,zdiffCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZDIFF_Keyspecs,1,zunionInterDiffGetKeys,3),.args=ZDIFF_Args},
+{MAKE_CMD("zdiffstore","Stores the difference of multiple sorted sets in a key.","O(L + (N-K)log(N)) worst case where L is the total number of elements in all the sets, N is the size of the first set, and K is the size of the result set.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZDIFFSTORE_History,0,ZDIFFSTORE_Tips,0,zdiffstoreCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SORTEDSET,ZDIFFSTORE_Keyspecs,2,zunionInterDiffStoreGetKeys,3),.args=ZDIFFSTORE_Args},
+{MAKE_CMD("zincrby","Increments the score of a member in a sorted set.","O(log(N)) where N is the number of elements in the sorted set.","1.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZINCRBY_History,0,ZINCRBY_Tips,0,zincrbyCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZINCRBY_Keyspecs,1,NULL,3),.args=ZINCRBY_Args},
+{MAKE_CMD("zinter","Returns the intersect of multiple sorted sets.","O(N*K)+O(M*log(M)) worst case with N being the smallest input sorted set, K being the number of input sorted sets and M being the number of elements in the resulting sorted set.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZINTER_History,0,ZINTER_Tips,0,zinterCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZINTER_Keyspecs,1,zunionInterDiffGetKeys,5),.args=ZINTER_Args},
+{MAKE_CMD("zintercard","Returns the number of members of the intersect of multiple sorted sets.","O(N*K) worst case with N being the smallest input sorted set, K being the number of input sorted sets.","7.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZINTERCARD_History,0,ZINTERCARD_Tips,0,zinterCardCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZINTERCARD_Keyspecs,1,zunionInterDiffGetKeys,3),.args=ZINTERCARD_Args},
+{MAKE_CMD("zinterstore","Stores the intersect of multiple sorted sets in a key.","O(N*K)+O(M*log(M)) worst case with N being the smallest input sorted set, K being the number of input sorted sets and M being the number of elements in the resulting sorted set.","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZINTERSTORE_History,0,ZINTERSTORE_Tips,0,zinterstoreCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SORTEDSET,ZINTERSTORE_Keyspecs,2,zunionInterDiffStoreGetKeys,5),.args=ZINTERSTORE_Args},
+{MAKE_CMD("zlexcount","Returns the number of members in a sorted set within a lexicographical range.","O(log(N)) with N being the number of elements in the sorted set.","2.8.9",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZLEXCOUNT_History,0,ZLEXCOUNT_Tips,0,zlexcountCommand,4,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZLEXCOUNT_Keyspecs,1,NULL,3),.args=ZLEXCOUNT_Args},
+{MAKE_CMD("zmpop","Returns the highest- or lowest-scoring members from one or more sorted sets after removing them. Deletes the sorted set if the last member was popped.","O(K) + O(M*log(N)) where K is the number of provided keys, N being the number of elements in the sorted set, and M being the number of elements popped.","7.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZMPOP_History,0,ZMPOP_Tips,0,zmpopCommand,-4,CMD_WRITE,ACL_CATEGORY_SORTEDSET,ZMPOP_Keyspecs,1,zmpopGetKeys,4),.args=ZMPOP_Args},
+{MAKE_CMD("zmscore","Returns the score of one or more members in a sorted set.","O(N) where N is the number of members being requested.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZMSCORE_History,0,ZMSCORE_Tips,0,zmscoreCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZMSCORE_Keyspecs,1,NULL,2),.args=ZMSCORE_Args},
+{MAKE_CMD("zpopmax","Returns the highest-scoring members from a sorted set after removing them. Deletes the sorted set if the last member was popped.","O(log(N)*M) with N being the number of elements in the sorted set, and M being the number of elements popped.","5.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZPOPMAX_History,0,ZPOPMAX_Tips,0,zpopmaxCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZPOPMAX_Keyspecs,1,NULL,2),.args=ZPOPMAX_Args},
+{MAKE_CMD("zpopmin","Returns the lowest-scoring members from a sorted set after removing them. Deletes the sorted set if the last member was popped.","O(log(N)*M) with N being the number of elements in the sorted set, and M being the number of elements popped.","5.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZPOPMIN_History,0,ZPOPMIN_Tips,0,zpopminCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZPOPMIN_Keyspecs,1,NULL,2),.args=ZPOPMIN_Args},
+{MAKE_CMD("zrandmember","Returns one or more random members from a sorted set.","O(N) where N is the number of members returned","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZRANDMEMBER_History,0,ZRANDMEMBER_Tips,1,zrandmemberCommand,-2,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZRANDMEMBER_Keyspecs,1,NULL,2),.args=ZRANDMEMBER_Args},
+{MAKE_CMD("zrange","Returns members in a sorted set within a range of indexes.","O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements returned.","1.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZRANGE_History,1,ZRANGE_Tips,0,zrangeCommand,-4,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZRANGE_Keyspecs,1,NULL,7),.args=ZRANGE_Args},
+{MAKE_CMD("zrangebylex","Returns members in a sorted set within a lexicographical range.","O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements being returned. If M is constant (e.g. always asking for the first 10 elements with LIMIT), you can consider it O(log(N)).","2.8.9",CMD_DOC_DEPRECATED,"`ZRANGE` with the `BYLEX` argument","6.2.0","sorted_set",COMMAND_GROUP_SORTED_SET,ZRANGEBYLEX_History,0,ZRANGEBYLEX_Tips,0,zrangebylexCommand,-4,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZRANGEBYLEX_Keyspecs,1,NULL,4),.args=ZRANGEBYLEX_Args},
+{MAKE_CMD("zrangebyscore","Returns members in a sorted set within a range of scores.","O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements being returned. If M is constant (e.g. always asking for the first 10 elements with LIMIT), you can consider it O(log(N)).","1.0.5",CMD_DOC_DEPRECATED,"`ZRANGE` with the `BYSCORE` argument","6.2.0","sorted_set",COMMAND_GROUP_SORTED_SET,ZRANGEBYSCORE_History,1,ZRANGEBYSCORE_Tips,0,zrangebyscoreCommand,-4,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZRANGEBYSCORE_Keyspecs,1,NULL,5),.args=ZRANGEBYSCORE_Args},
+{MAKE_CMD("zrangestore","Stores a range of members from sorted set in a key.","O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements stored into the destination key.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZRANGESTORE_History,0,ZRANGESTORE_Tips,0,zrangestoreCommand,-5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SORTEDSET,ZRANGESTORE_Keyspecs,2,NULL,7),.args=ZRANGESTORE_Args},
+{MAKE_CMD("zrank","Returns the index of a member in a sorted set ordered by ascending scores.","O(log(N))","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZRANK_History,1,ZRANK_Tips,0,zrankCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZRANK_Keyspecs,1,NULL,3),.args=ZRANK_Args},
+{MAKE_CMD("zrem","Removes one or more members from a sorted set. Deletes the sorted set if all members were removed.","O(M*log(N)) with N being the number of elements in the sorted set and M the number of elements to be removed.","1.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZREM_History,1,ZREM_Tips,0,zremCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZREM_Keyspecs,1,NULL,2),.args=ZREM_Args},
+{MAKE_CMD("zremrangebylex","Removes members in a sorted set within a lexicographical range. Deletes the sorted set if all members were removed.","O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements removed by the operation.","2.8.9",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZREMRANGEBYLEX_History,0,ZREMRANGEBYLEX_Tips,0,zremrangebylexCommand,4,CMD_WRITE,ACL_CATEGORY_SORTEDSET,ZREMRANGEBYLEX_Keyspecs,1,NULL,3),.args=ZREMRANGEBYLEX_Args},
+{MAKE_CMD("zremrangebyrank","Removes members in a sorted set within a range of indexes. Deletes the sorted set if all members were removed.","O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements removed by the operation.","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZREMRANGEBYRANK_History,0,ZREMRANGEBYRANK_Tips,0,zremrangebyrankCommand,4,CMD_WRITE,ACL_CATEGORY_SORTEDSET,ZREMRANGEBYRANK_Keyspecs,1,NULL,3),.args=ZREMRANGEBYRANK_Args},
+{MAKE_CMD("zremrangebyscore","Removes members in a sorted set within a range of scores. Deletes the sorted set if all members were removed.","O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements removed by the operation.","1.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZREMRANGEBYSCORE_History,0,ZREMRANGEBYSCORE_Tips,0,zremrangebyscoreCommand,4,CMD_WRITE,ACL_CATEGORY_SORTEDSET,ZREMRANGEBYSCORE_Keyspecs,1,NULL,3),.args=ZREMRANGEBYSCORE_Args},
+{MAKE_CMD("zrevrange","Returns members in a sorted set within a range of indexes in reverse order.","O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements returned.","1.2.0",CMD_DOC_DEPRECATED,"`ZRANGE` with the `REV` argument","6.2.0","sorted_set",COMMAND_GROUP_SORTED_SET,ZREVRANGE_History,0,ZREVRANGE_Tips,0,zrevrangeCommand,-4,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZREVRANGE_Keyspecs,1,NULL,4),.args=ZREVRANGE_Args},
+{MAKE_CMD("zrevrangebylex","Returns members in a sorted set within a lexicographical range in reverse order.","O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements being returned. If M is constant (e.g. always asking for the first 10 elements with LIMIT), you can consider it O(log(N)).","2.8.9",CMD_DOC_DEPRECATED,"`ZRANGE` with the `REV` and `BYLEX` arguments","6.2.0","sorted_set",COMMAND_GROUP_SORTED_SET,ZREVRANGEBYLEX_History,0,ZREVRANGEBYLEX_Tips,0,zrevrangebylexCommand,-4,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZREVRANGEBYLEX_Keyspecs,1,NULL,4),.args=ZREVRANGEBYLEX_Args},
+{MAKE_CMD("zrevrangebyscore","Returns members in a sorted set within a range of scores in reverse order.","O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements being returned. If M is constant (e.g. always asking for the first 10 elements with LIMIT), you can consider it O(log(N)).","2.2.0",CMD_DOC_DEPRECATED,"`ZRANGE` with the `REV` and `BYSCORE` arguments","6.2.0","sorted_set",COMMAND_GROUP_SORTED_SET,ZREVRANGEBYSCORE_History,1,ZREVRANGEBYSCORE_Tips,0,zrevrangebyscoreCommand,-4,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZREVRANGEBYSCORE_Keyspecs,1,NULL,5),.args=ZREVRANGEBYSCORE_Args},
+{MAKE_CMD("zrevrank","Returns the index of a member in a sorted set ordered by descending scores.","O(log(N))","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZREVRANK_History,1,ZREVRANK_Tips,0,zrevrankCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZREVRANK_Keyspecs,1,NULL,3),.args=ZREVRANK_Args},
+{MAKE_CMD("zscan","Iterates over members and scores of a sorted set.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZSCAN_History,0,ZSCAN_Tips,1,zscanCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZSCAN_Keyspecs,1,NULL,4),.args=ZSCAN_Args},
+{MAKE_CMD("zscore","Returns the score of a member in a sorted set.","O(1)","1.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZSCORE_History,0,ZSCORE_Tips,0,zscoreCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZSCORE_Keyspecs,1,NULL,2),.args=ZSCORE_Args},
+{MAKE_CMD("zunion","Returns the union of multiple sorted sets.","O(N)+O(M*log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZUNION_History,0,ZUNION_Tips,0,zunionCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZUNION_Keyspecs,1,zunionInterDiffGetKeys,5),.args=ZUNION_Args},
+{MAKE_CMD("zunionstore","Stores the union of multiple sorted sets in a key.","O(N)+O(M log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set.","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZUNIONSTORE_History,0,ZUNIONSTORE_Tips,0,zunionstoreCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SORTEDSET,ZUNIONSTORE_Keyspecs,2,zunionInterDiffStoreGetKeys,5),.args=ZUNIONSTORE_Args},
+/* stream */
+{MAKE_CMD("xack","Returns the number of messages that were successfully acknowledged by the consumer group member of a stream.","O(1) for each message ID processed.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XACK_History,0,XACK_Tips,0,xackCommand,-4,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STREAM,XACK_Keyspecs,1,NULL,3),.args=XACK_Args},
+{MAKE_CMD("xadd","Appends a new message to a stream. Creates the key if it doesn't exist.","O(1) when adding a new entry, O(N) when trimming where N being the number of entries evicted.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XADD_History,2,XADD_Tips,1,xaddCommand,-5,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STREAM,XADD_Keyspecs,1,NULL,5),.args=XADD_Args},
+{MAKE_CMD("xautoclaim","Changes, or acquires, ownership of messages in a consumer group, as if the messages were delivered to as consumer group member.","O(1) if COUNT is small.","6.2.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XAUTOCLAIM_History,1,XAUTOCLAIM_Tips,1,xautoclaimCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STREAM,XAUTOCLAIM_Keyspecs,1,NULL,7),.args=XAUTOCLAIM_Args},
+{MAKE_CMD("xclaim","Changes, or acquires, ownership of a message in a consumer group, as if the message was delivered a consumer group member.","O(log N) with N being the number of messages in the PEL of the consumer group.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XCLAIM_History,0,XCLAIM_Tips,1,xclaimCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STREAM,XCLAIM_Keyspecs,1,NULL,11),.args=XCLAIM_Args},
+{MAKE_CMD("xdel","Returns the number of messages after removing them from a stream.","O(1) for each single item to delete in the stream, regardless of the stream size.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XDEL_History,0,XDEL_Tips,0,xdelCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STREAM,XDEL_Keyspecs,1,NULL,2),.args=XDEL_Args},
+{MAKE_CMD("xgroup","A container for consumer groups commands.","Depends on subcommand.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XGROUP_History,0,XGROUP_Tips,0,NULL,-2,0,0,XGROUP_Keyspecs,0,NULL,0),.subcommands=XGROUP_Subcommands},
+{MAKE_CMD("xinfo","A container for stream introspection commands.","Depends on subcommand.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_History,0,XINFO_Tips,0,NULL,-2,0,0,XINFO_Keyspecs,0,NULL,0),.subcommands=XINFO_Subcommands},
+{MAKE_CMD("xlen","Return the number of messages in a stream.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XLEN_History,0,XLEN_Tips,0,xlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_STREAM,XLEN_Keyspecs,1,NULL,1),.args=XLEN_Args},
+{MAKE_CMD("xpending","Returns the information and entries from a stream consumer group's pending entries list.","O(N) with N being the number of elements returned, so asking for a small fixed number of entries per call is O(1). O(M), where M is the total number of entries scanned when used with the IDLE filter. When the command returns just the summary and the list of consumers is small, it runs in O(1) time; otherwise, an additional O(N) time for iterating every consumer.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XPENDING_History,1,XPENDING_Tips,1,xpendingCommand,-3,CMD_READONLY,ACL_CATEGORY_STREAM,XPENDING_Keyspecs,1,NULL,3),.args=XPENDING_Args},
+{MAKE_CMD("xrange","Returns the messages from a stream within a range of IDs.","O(N) with N being the number of elements being returned. If N is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1).","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XRANGE_History,1,XRANGE_Tips,0,xrangeCommand,-4,CMD_READONLY,ACL_CATEGORY_STREAM,XRANGE_Keyspecs,1,NULL,4),.args=XRANGE_Args},
+{MAKE_CMD("xread","Returns messages from multiple streams with IDs greater than the ones requested. Blocks until a message is available otherwise.",NULL,"5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XREAD_History,0,XREAD_Tips,0,xreadCommand,-4,CMD_BLOCKING|CMD_READONLY|CMD_BLOCKING,ACL_CATEGORY_STREAM,XREAD_Keyspecs,1,xreadGetKeys,3),.args=XREAD_Args},
+{MAKE_CMD("xreadgroup","Returns new or historical messages from a stream for a consumer in a group. Blocks until a message is available otherwise.","For each stream mentioned: O(M) with M being the number of elements returned. If M is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1). On the other side when XREADGROUP blocks, XADD will pay the O(N) time in order to serve the N clients blocked on the stream getting new data.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XREADGROUP_History,0,XREADGROUP_Tips,0,xreadCommand,-7,CMD_BLOCKING|CMD_WRITE,ACL_CATEGORY_STREAM,XREADGROUP_Keyspecs,1,xreadGetKeys,5),.args=XREADGROUP_Args},
+{MAKE_CMD("xrevrange","Returns the messages from a stream within a range of IDs in reverse order.","O(N) with N being the number of elements returned. If N is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1).","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XREVRANGE_History,1,XREVRANGE_Tips,0,xrevrangeCommand,-4,CMD_READONLY,ACL_CATEGORY_STREAM,XREVRANGE_Keyspecs,1,NULL,4),.args=XREVRANGE_Args},
+{MAKE_CMD("xsetid","An internal command for replicating stream values.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XSETID_History,1,XSETID_Tips,0,xsetidCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STREAM,XSETID_Keyspecs,1,NULL,4),.args=XSETID_Args},
+{MAKE_CMD("xtrim","Deletes messages from the beginning of a stream.","O(N), with N being the number of evicted entries. Constant times are very small however, since entries are organized in macro nodes containing multiple entries that can be released with a single deallocation.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XTRIM_History,1,XTRIM_Tips,1,xtrimCommand,-4,CMD_WRITE,ACL_CATEGORY_STREAM,XTRIM_Keyspecs,1,NULL,2),.args=XTRIM_Args},
+/* string */
+{MAKE_CMD("append","Appends a string to the value of a key. Creates the key if it doesn't exist.","O(1). The amortized time complexity is O(1) assuming the appended value is small and the already present value is of any size, since the dynamic string library used by Redis will double the free space available on every reallocation.","2.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,APPEND_History,0,APPEND_Tips,0,appendCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,APPEND_Keyspecs,1,NULL,2),.args=APPEND_Args},
+{MAKE_CMD("decr","Decrements the integer value of a key by one. Uses 0 as initial value if the key doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,DECR_History,0,DECR_Tips,0,decrCommand,2,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,DECR_Keyspecs,1,NULL,1),.args=DECR_Args},
+{MAKE_CMD("decrby","Decrements a number from the integer value of a key. Uses 0 as initial value if the key doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,DECRBY_History,0,DECRBY_Tips,0,decrbyCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,DECRBY_Keyspecs,1,NULL,2),.args=DECRBY_Args},
+{MAKE_CMD("get","Returns the string value of a key.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,GET_History,0,GET_Tips,0,getCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_STRING,GET_Keyspecs,1,NULL,1),.args=GET_Args},
+{MAKE_CMD("getdel","Returns the string value of a key after deleting the key.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,GETDEL_History,0,GETDEL_Tips,0,getdelCommand,2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STRING,GETDEL_Keyspecs,1,NULL,1),.args=GETDEL_Args},
+{MAKE_CMD("getex","Returns the string value of a key after setting its expiration time.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,GETEX_History,0,GETEX_Tips,0,getexCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STRING,GETEX_Keyspecs,1,NULL,2),.args=GETEX_Args},
+{MAKE_CMD("getrange","Returns a substring of the string stored at a key.","O(N) where N is the length of the returned string. The complexity is ultimately determined by the returned length, but because creating a substring from an existing string is very cheap, it can be considered O(1) for small strings.","2.4.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,GETRANGE_History,0,GETRANGE_Tips,0,getrangeCommand,4,CMD_READONLY,ACL_CATEGORY_STRING,GETRANGE_Keyspecs,1,NULL,3),.args=GETRANGE_Args},
+{MAKE_CMD("getset","Returns the previous string value of a key after setting it to a new value.","O(1)","1.0.0",CMD_DOC_DEPRECATED,"`SET` with the `!GET` argument","6.2.0","string",COMMAND_GROUP_STRING,GETSET_History,0,GETSET_Tips,0,getsetCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,GETSET_Keyspecs,1,NULL,2),.args=GETSET_Args},
+{MAKE_CMD("incr","Increments the integer value of a key by one. Uses 0 as initial value if the key doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,INCR_History,0,INCR_Tips,0,incrCommand,2,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,INCR_Keyspecs,1,NULL,1),.args=INCR_Args},
+{MAKE_CMD("incrby","Increments the integer value of a key by a number. Uses 0 as initial value if the key doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,INCRBY_History,0,INCRBY_Tips,0,incrbyCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,INCRBY_Keyspecs,1,NULL,2),.args=INCRBY_Args},
+{MAKE_CMD("incrbyfloat","Increment the floating point value of a key by a number. Uses 0 as initial value if the key doesn't exist.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,INCRBYFLOAT_History,0,INCRBYFLOAT_Tips,0,incrbyfloatCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,INCRBYFLOAT_Keyspecs,1,NULL,2),.args=INCRBYFLOAT_Args},
+{MAKE_CMD("lcs","Finds the longest common substring.","O(N*M) where N and M are the lengths of s1 and s2, respectively","7.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,LCS_History,0,LCS_Tips,0,lcsCommand,-3,CMD_READONLY,ACL_CATEGORY_STRING,LCS_Keyspecs,1,NULL,6),.args=LCS_Args},
+{MAKE_CMD("mget","Atomically returns the string values of one or more keys.","O(N) where N is the number of keys to retrieve.","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,MGET_History,0,MGET_Tips,1,mgetCommand,-2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_STRING,MGET_Keyspecs,1,NULL,1),.args=MGET_Args},
+{MAKE_CMD("mset","Atomically creates or modifies the string values of one or more keys.","O(N) where N is the number of keys to set.","1.0.1",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,MSET_History,0,MSET_Tips,2,msetCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,MSET_Keyspecs,1,NULL,1),.args=MSET_Args},
+{MAKE_CMD("msetnx","Atomically modifies the string values of one or more keys only when all keys don't exist.","O(N) where N is the number of keys to set.","1.0.1",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,MSETNX_History,0,MSETNX_Tips,0,msetnxCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,MSETNX_Keyspecs,1,NULL,1),.args=MSETNX_Args},
+{MAKE_CMD("psetex","Sets both string value and expiration time in milliseconds of a key. The key is created if it doesn't exist.","O(1)","2.6.0",CMD_DOC_DEPRECATED,"`SET` with the `PX` argument","2.6.12","string",COMMAND_GROUP_STRING,PSETEX_History,0,PSETEX_Tips,0,psetexCommand,4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,PSETEX_Keyspecs,1,NULL,3),.args=PSETEX_Args},
+{MAKE_CMD("set","Sets the string value of a key, ignoring its type. The key is created if it doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,SET_History,4,SET_Tips,0,setCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,SET_Keyspecs,1,setGetKeys,5),.args=SET_Args},
+{MAKE_CMD("setex","Sets the string value and expiration time of a key. Creates the key if it doesn't exist.","O(1)","2.0.0",CMD_DOC_DEPRECATED,"`SET` with the `EX` argument","2.6.12","string",COMMAND_GROUP_STRING,SETEX_History,0,SETEX_Tips,0,setexCommand,4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,SETEX_Keyspecs,1,NULL,3),.args=SETEX_Args},
+{MAKE_CMD("setnx","Set the string value of a key only when the key doesn't exist.","O(1)","1.0.0",CMD_DOC_DEPRECATED,"`SET` with the `NX` argument","2.6.12","string",COMMAND_GROUP_STRING,SETNX_History,0,SETNX_Tips,0,setnxCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,SETNX_Keyspecs,1,NULL,2),.args=SETNX_Args},
+{MAKE_CMD("setrange","Overwrites a part of a string value with another by an offset. Creates the key if it doesn't exist.","O(1), not counting the time taken to copy the new string in place. Usually, this string is very small so the amortized complexity is O(1). Otherwise, complexity is O(M) with M being the length of the value argument.","2.2.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,SETRANGE_History,0,SETRANGE_Tips,0,setrangeCommand,4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,SETRANGE_Keyspecs,1,NULL,3),.args=SETRANGE_Args},
+{MAKE_CMD("strlen","Returns the length of a string value.","O(1)","2.2.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,STRLEN_History,0,STRLEN_Tips,0,strlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_STRING,STRLEN_Keyspecs,1,NULL,1),.args=STRLEN_Args},
+{MAKE_CMD("substr","Returns a substring from a string value.","O(N) where N is the length of the returned string. The complexity is ultimately determined by the returned length, but because creating a substring from an existing string is very cheap, it can be considered O(1) for small strings.","1.0.0",CMD_DOC_DEPRECATED,"`GETRANGE`","2.0.0","string",COMMAND_GROUP_STRING,SUBSTR_History,0,SUBSTR_Tips,0,getrangeCommand,4,CMD_READONLY,ACL_CATEGORY_STRING,SUBSTR_Keyspecs,1,NULL,3),.args=SUBSTR_Args},
+/* transactions */
+{MAKE_CMD("discard","Discards a transaction.","O(N), when N is the number of queued commands","2.0.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,DISCARD_History,0,DISCARD_Tips,0,discardCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,DISCARD_Keyspecs,0,NULL,0)},
+{MAKE_CMD("exec","Executes all commands in a transaction.","Depends on commands in the transaction","1.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,EXEC_History,0,EXEC_Tips,0,execCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SKIP_SLOWLOG,ACL_CATEGORY_TRANSACTION,EXEC_Keyspecs,0,NULL,0)},
+{MAKE_CMD("multi","Starts a transaction.","O(1)","1.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,MULTI_History,0,MULTI_Tips,0,multiCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,MULTI_Keyspecs,0,NULL,0)},
+{MAKE_CMD("unwatch","Forgets about watched keys of a transaction.","O(1)","2.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,UNWATCH_History,0,UNWATCH_Tips,0,unwatchCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,UNWATCH_Keyspecs,0,NULL,0)},
+{MAKE_CMD("watch","Monitors changes to keys to determine the execution of a transaction.","O(1) for every key.","2.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,WATCH_History,0,WATCH_Tips,0,watchCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,WATCH_Keyspecs,1,NULL,1),.args=WATCH_Args},
+{0}
+};
diff --git a/src/commands.h b/src/commands.h
new file mode 100644
index 0000000..1eefab4
--- /dev/null
+++ b/src/commands.h
@@ -0,0 +1,40 @@
+#ifndef __REDIS_COMMANDS_H
+#define __REDIS_COMMANDS_H
+
+/* Must be synced with ARG_TYPE_STR and generate-command-code.py */
+typedef enum {
+ ARG_TYPE_STRING,
+ ARG_TYPE_INTEGER,
+ ARG_TYPE_DOUBLE,
+ ARG_TYPE_KEY, /* A string, but represents a keyname */
+ ARG_TYPE_PATTERN,
+ ARG_TYPE_UNIX_TIME,
+ ARG_TYPE_PURE_TOKEN,
+ ARG_TYPE_ONEOF, /* Has subargs */
+ ARG_TYPE_BLOCK /* Has subargs */
+} redisCommandArgType;
+
+#define CMD_ARG_NONE (0)
+#define CMD_ARG_OPTIONAL (1<<0)
+#define CMD_ARG_MULTIPLE (1<<1)
+#define CMD_ARG_MULTIPLE_TOKEN (1<<2)
+
+/* Must be compatible with RedisModuleCommandArg. See moduleCopyCommandArgs. */
+typedef struct redisCommandArg {
+ const char *name;
+ redisCommandArgType type;
+ int key_spec_index;
+ const char *token;
+ const char *summary;
+ const char *since;
+ int flags;
+ const char *deprecated_since;
+ int num_args;
+ struct redisCommandArg *subargs;
+ const char *display_text;
+} redisCommandArg;
+
+/* Returns the command group name by group number. */
+const char *commandGroupStr(int index);
+
+#endif
diff --git a/src/commands/acl-cat.json b/src/commands/acl-cat.json
new file mode 100644
index 0000000..dfbe4c4
--- /dev/null
+++ b/src/commands/acl-cat.json
@@ -0,0 +1,42 @@
+{
+ "CAT": {
+ "summary": "Lists the ACL categories, or the commands inside a category.",
+ "complexity": "O(1) since the categories and commands are a fixed set.",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": -2,
+ "container": "ACL",
+ "function": "aclCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "type": "array",
+ "description": "In case `category` was not given, a list of existing ACL categories",
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "type": "array",
+ "description": "In case `category` was given, list of commands that fall under the provided ACL category",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "category",
+ "type": "string",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/acl-deluser.json b/src/commands/acl-deluser.json
new file mode 100644
index 0000000..80e8a7a
--- /dev/null
+++ b/src/commands/acl-deluser.json
@@ -0,0 +1,33 @@
+{
+ "DELUSER": {
+ "summary": "Deletes ACL users, and terminates their connections.",
+ "complexity": "O(1) amortized time considering the typical user.",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": -3,
+ "container": "ACL",
+ "function": "aclCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The number of users that were deleted"
+ },
+ "arguments": [
+ {
+ "name": "username",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/acl-dryrun.json b/src/commands/acl-dryrun.json
new file mode 100644
index 0000000..bee6a6a
--- /dev/null
+++ b/src/commands/acl-dryrun.json
@@ -0,0 +1,47 @@
+{
+ "DRYRUN": {
+ "summary": "Simulates the execution of a command by a user, without executing the command.",
+ "complexity": "O(1).",
+ "group": "server",
+ "since": "7.0.0",
+ "arity": -4,
+ "container": "ACL",
+ "function": "aclCommand",
+ "history": [],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "const": "OK",
+ "description": "The given user may successfully execute the given command."
+ },
+ {
+ "type": "string",
+ "description": "The description of the problem, in case the user is not allowed to run the given command."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "username",
+ "type": "string"
+ },
+ {
+ "name": "command",
+ "type": "string"
+ },
+ {
+ "name": "arg",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/acl-genpass.json b/src/commands/acl-genpass.json
new file mode 100644
index 0000000..86c1f8e
--- /dev/null
+++ b/src/commands/acl-genpass.json
@@ -0,0 +1,28 @@
+{
+ "GENPASS": {
+ "summary": "Generates a pseudorandom, secure password that can be used to identify ACL users.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": -2,
+ "container": "ACL",
+ "function": "aclCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "Pseudorandom data. By default it contains 64 bytes, representing 256 bits of data. If `bits` was given, the output string length is the number of specified bits (rounded to the next multiple of 4) divided by 4."
+ },
+ "arguments": [
+ {
+ "name": "bits",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/acl-getuser.json b/src/commands/acl-getuser.json
new file mode 100644
index 0000000..535389b
--- /dev/null
+++ b/src/commands/acl-getuser.json
@@ -0,0 +1,91 @@
+{
+ "GETUSER": {
+ "summary": "Lists the ACL rules of a user.",
+ "complexity": "O(N). Where N is the number of password, command and pattern rules that the user has.",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": 3,
+ "container": "ACL",
+ "function": "aclCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added Pub/Sub channel patterns."
+ ],
+ [
+ "7.0.0",
+ "Added selectors and changed the format of key and channel patterns from a list to their rule representation."
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "arguments": [
+ {
+ "name": "username",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "a set of ACL rule definitions for the user",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "flags": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "passwords": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "commands": {
+ "description": "root selector's commands",
+ "type": "string"
+ },
+ "keys": {
+ "description": "root selector's keys",
+ "type": "string"
+ },
+ "channels": {
+ "description": "root selector's channels",
+ "type": "string"
+ },
+ "selectors": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "commands": {
+ "type": "string"
+ },
+ "keys": {
+ "type": "string"
+ },
+ "channels": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ },
+ {
+ "description": "If user does not exist",
+ "type": "null"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/acl-help.json b/src/commands/acl-help.json
new file mode 100644
index 0000000..3c95914
--- /dev/null
+++ b/src/commands/acl-help.json
@@ -0,0 +1,23 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": 2,
+ "container": "ACL",
+ "function": "aclCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "A list of subcommands and their description",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/acl-list.json b/src/commands/acl-list.json
new file mode 100644
index 0000000..0d75b13
--- /dev/null
+++ b/src/commands/acl-list.json
@@ -0,0 +1,25 @@
+{
+ "LIST": {
+ "summary": "Dumps the effective rules in ACL file format.",
+ "complexity": "O(N). Where N is the number of configured users.",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": 2,
+ "container": "ACL",
+ "function": "aclCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "A list of currently active ACL rules",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/acl-load.json b/src/commands/acl-load.json
new file mode 100644
index 0000000..d7b91ba
--- /dev/null
+++ b/src/commands/acl-load.json
@@ -0,0 +1,21 @@
+{
+ "LOAD": {
+ "summary": "Reloads the rules from the configured ACL file.",
+ "complexity": "O(N). Where N is the number of configured users.",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": 2,
+ "container": "ACL",
+ "function": "aclCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/acl-log.json b/src/commands/acl-log.json
new file mode 100644
index 0000000..de5f029
--- /dev/null
+++ b/src/commands/acl-log.json
@@ -0,0 +1,90 @@
+{
+ "LOG": {
+ "summary": "Lists recent security events generated due to ACL rules.",
+ "complexity": "O(N) with N being the number of entries shown.",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": -2,
+ "container": "ACL",
+ "function": "aclCommand",
+ "history": [
+ [
+ "7.2.0",
+ "Added entry ID, timestamp created, and timestamp last updated."
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "In case `RESET` was not given, a list of recent ACL security events.",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "count": {
+ "type": "integer"
+ },
+ "reason": {
+ "type": "string"
+ },
+ "context": {
+ "type": "string"
+ },
+ "object": {
+ "type": "string"
+ },
+ "username": {
+ "type": "string"
+ },
+ "age-seconds": {
+ "type": "number"
+ },
+ "client-info": {
+ "type": "string"
+ },
+ "entry-id": {
+ "type": "integer"
+ },
+ "timestamp-created": {
+ "type": "integer"
+ },
+ "timestamp-last-updated": {
+ "type": "integer"
+ }
+ }
+ }
+ },
+ {
+ "const": "OK",
+ "description": "In case `RESET` was given, OK indicates ACL log was cleared."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "operation",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "reset",
+ "type": "pure-token",
+ "token": "RESET"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/acl-save.json b/src/commands/acl-save.json
new file mode 100644
index 0000000..98d8dfd
--- /dev/null
+++ b/src/commands/acl-save.json
@@ -0,0 +1,25 @@
+{
+ "SAVE": {
+ "summary": "Saves the effective ACL rules in the configured ACL file.",
+ "complexity": "O(N). Where N is the number of configured users.",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": 2,
+ "container": "ACL",
+ "function": "aclCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/acl-setuser.json b/src/commands/acl-setuser.json
new file mode 100644
index 0000000..1a90917
--- /dev/null
+++ b/src/commands/acl-setuser.json
@@ -0,0 +1,47 @@
+{
+ "SETUSER": {
+ "summary": "Creates and modifies an ACL user and its rules.",
+ "complexity": "O(N). Where N is the number of rules provided.",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": -3,
+ "container": "ACL",
+ "function": "aclCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added Pub/Sub channel patterns."
+ ],
+ [
+ "7.0.0",
+ "Added selectors and key based permissions."
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "username",
+ "type": "string"
+ },
+ {
+ "name": "rule",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/acl-users.json b/src/commands/acl-users.json
new file mode 100644
index 0000000..1a6bc75
--- /dev/null
+++ b/src/commands/acl-users.json
@@ -0,0 +1,25 @@
+{
+ "USERS": {
+ "summary": "Lists all ACL users.",
+ "complexity": "O(N). Where N is the number of configured users.",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": 2,
+ "container": "ACL",
+ "function": "aclCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List of existing ACL users",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/acl-whoami.json b/src/commands/acl-whoami.json
new file mode 100644
index 0000000..2efe98c
--- /dev/null
+++ b/src/commands/acl-whoami.json
@@ -0,0 +1,21 @@
+{
+ "WHOAMI": {
+ "summary": "Returns the authenticated username of the current connection.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": 2,
+ "container": "ACL",
+ "function": "aclCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "The username of the current connection."
+ }
+ }
+}
diff --git a/src/commands/acl.json b/src/commands/acl.json
new file mode 100644
index 0000000..1474b78
--- /dev/null
+++ b/src/commands/acl.json
@@ -0,0 +1,12 @@
+{
+ "ACL": {
+ "summary": "A container for Access List Control commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "server",
+ "since": "6.0.0",
+ "arity": -2,
+ "command_flags": [
+ "SENTINEL"
+ ]
+ }
+}
diff --git a/src/commands/append.json b/src/commands/append.json
new file mode 100644
index 0000000..a8ec6bf
--- /dev/null
+++ b/src/commands/append.json
@@ -0,0 +1,53 @@
+{
+ "APPEND": {
+ "summary": "Appends a string to the value of a key. Creates the key if it doesn't exist.",
+ "complexity": "O(1). The amortized time complexity is O(1) assuming the appended value is small and the already present value is of any size, since the dynamic string library used by Redis will double the free space available on every reallocation.",
+ "group": "string",
+ "since": "2.0.0",
+ "arity": 3,
+ "function": "appendCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The length of the string after the append operation."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/asking.json b/src/commands/asking.json
new file mode 100644
index 0000000..3886795
--- /dev/null
+++ b/src/commands/asking.json
@@ -0,0 +1,19 @@
+{
+ "ASKING": {
+ "summary": "Signals that a cluster client is following an -ASK redirect.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 1,
+ "function": "askingCommand",
+ "command_flags": [
+ "FAST"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/auth.json b/src/commands/auth.json
new file mode 100644
index 0000000..3b1ba35
--- /dev/null
+++ b/src/commands/auth.json
@@ -0,0 +1,43 @@
+{
+ "AUTH": {
+ "summary": "Authenticates the connection.",
+ "complexity": "O(N) where N is the number of passwords defined for the user",
+ "group": "connection",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "authCommand",
+ "history": [
+ [
+ "6.0.0",
+ "Added ACL style (username and password)."
+ ]
+ ],
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "NO_AUTH",
+ "SENTINEL",
+ "ALLOW_BUSY"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "username",
+ "type": "string",
+ "optional": true,
+ "since": "6.0.0"
+ },
+ {
+ "name": "password",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/bgrewriteaof.json b/src/commands/bgrewriteaof.json
new file mode 100644
index 0000000..6267d31
--- /dev/null
+++ b/src/commands/bgrewriteaof.json
@@ -0,0 +1,19 @@
+{
+ "BGREWRITEAOF": {
+ "summary": "Asynchronously rewrites the append-only file to disk.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": 1,
+ "function": "bgrewriteaofCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "NOSCRIPT"
+ ],
+ "reply_schema": {
+ "description": "A simple string reply indicating that the rewriting started or is about to start ASAP",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/bgsave.json b/src/commands/bgsave.json
new file mode 100644
index 0000000..f73d8a8
--- /dev/null
+++ b/src/commands/bgsave.json
@@ -0,0 +1,40 @@
+{
+ "BGSAVE": {
+ "summary": "Asynchronously saves the database(s) to disk.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": -1,
+ "function": "bgsaveCommand",
+ "history": [
+ [
+ "3.2.2",
+ "Added the `SCHEDULE` option."
+ ]
+ ],
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "NOSCRIPT"
+ ],
+ "arguments": [
+ {
+ "name": "schedule",
+ "token": "SCHEDULE",
+ "type": "pure-token",
+ "optional": true,
+ "since": "3.2.2"
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "const": "Background saving started"
+ },
+ {
+ "const": "Background saving scheduled"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/bitcount.json b/src/commands/bitcount.json
new file mode 100644
index 0000000..2d277a8
--- /dev/null
+++ b/src/commands/bitcount.json
@@ -0,0 +1,87 @@
+{
+ "BITCOUNT": {
+ "summary": "Counts the number of set bits (population counting) in a string.",
+ "complexity": "O(N)",
+ "group": "bitmap",
+ "since": "2.6.0",
+ "arity": -2,
+ "function": "bitcountCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added the `BYTE|BIT` option."
+ ]
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "BITMAP"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "range",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "start",
+ "type": "integer"
+ },
+ {
+ "name": "end",
+ "type": "integer"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "optional": true,
+ "since": "7.0.0",
+ "arguments": [
+ {
+ "name": "byte",
+ "type": "pure-token",
+ "token": "BYTE"
+ },
+ {
+ "name": "bit",
+ "type": "pure-token",
+ "token": "BIT"
+ }
+ ]
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of bits set to 1.",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/bitfield.json b/src/commands/bitfield.json
new file mode 100644
index 0000000..843cea8
--- /dev/null
+++ b/src/commands/bitfield.json
@@ -0,0 +1,159 @@
+{
+ "BITFIELD": {
+ "summary": "Performs arbitrary bitfield integer operations on strings.",
+ "complexity": "O(1) for each subcommand specified",
+ "group": "bitmap",
+ "since": "3.2.0",
+ "arity": -2,
+ "function": "bitfieldCommand",
+ "get_keys_function": "bitfieldGetKeys",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "BITMAP"
+ ],
+ "key_specs": [
+ {
+ "notes": "This command allows both access and modification of the key",
+ "flags": [
+ "RW",
+ "UPDATE",
+ "ACCESS",
+ "VARIABLE_FLAGS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "operation",
+ "type": "oneof",
+ "multiple": true,
+ "optional": true,
+ "arguments": [
+ {
+ "token": "GET",
+ "name": "get-block",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "encoding",
+ "type": "string"
+ },
+ {
+ "name": "offset",
+ "type": "integer"
+ }
+ ]
+ },
+ {
+ "name": "write",
+ "type": "block",
+ "arguments": [
+ {
+ "token": "OVERFLOW",
+ "name": "overflow-block",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "wrap",
+ "type": "pure-token",
+ "token": "WRAP"
+ },
+ {
+ "name": "sat",
+ "type": "pure-token",
+ "token": "SAT"
+ },
+ {
+ "name": "fail",
+ "type": "pure-token",
+ "token": "FAIL"
+ }
+ ]
+ },
+ {
+ "name": "write-operation",
+ "type": "oneof",
+ "arguments": [
+ {
+ "token": "SET",
+ "name": "set-block",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "encoding",
+ "type": "string"
+ },
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "value",
+ "type": "integer"
+ }
+ ]
+ },
+ {
+ "token": "INCRBY",
+ "name": "incrby-block",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "encoding",
+ "type": "string"
+ },
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "increment",
+ "type": "integer"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "description": "The result of the subcommand at the same position",
+ "type": "integer"
+ },
+ {
+ "description": "In case OVERFLOW FAIL was given and overflows or underflows detected",
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+}
diff --git a/src/commands/bitfield_ro.json b/src/commands/bitfield_ro.json
new file mode 100644
index 0000000..0b5aa71
--- /dev/null
+++ b/src/commands/bitfield_ro.json
@@ -0,0 +1,69 @@
+{
+ "BITFIELD_RO": {
+ "summary": "Performs arbitrary read-only bitfield integer operations on strings.",
+ "complexity": "O(1) for each subcommand specified",
+ "group": "bitmap",
+ "since": "6.0.0",
+ "arity": -2,
+ "function": "bitfieldroCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "BITMAP"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "token": "GET",
+ "name": "get-block",
+ "type": "block",
+ "optional": true,
+ "multiple": true,
+ "multiple_token": true,
+ "arguments": [
+ {
+ "name": "encoding",
+ "type": "string"
+ },
+ {
+ "name": "offset",
+ "type": "integer"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "items": {
+ "description": "The result of the subcommand at the same position",
+ "type": "integer"
+ }
+ }
+ }
+}
diff --git a/src/commands/bitop.json b/src/commands/bitop.json
new file mode 100644
index 0000000..d9e1ff9
--- /dev/null
+++ b/src/commands/bitop.json
@@ -0,0 +1,99 @@
+{
+ "BITOP": {
+ "summary": "Performs bitwise operations on multiple strings, and stores the result.",
+ "complexity": "O(N)",
+ "group": "bitmap",
+ "since": "2.6.0",
+ "arity": -4,
+ "function": "bitopCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "BITMAP"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 3
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "operation",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "and",
+ "type": "pure-token",
+ "token": "AND"
+ },
+ {
+ "name": "or",
+ "type": "pure-token",
+ "token": "OR"
+ },
+ {
+ "name": "xor",
+ "type": "pure-token",
+ "token": "XOR"
+ },
+ {
+ "name": "not",
+ "type": "pure-token",
+ "token": "NOT"
+ }
+ ]
+ },
+ {
+ "name": "destkey",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 1,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "the size of the string stored in the destination key, that is equal to the size of the longest input string",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/bitpos.json b/src/commands/bitpos.json
new file mode 100644
index 0000000..11b0851
--- /dev/null
+++ b/src/commands/bitpos.json
@@ -0,0 +1,106 @@
+{
+ "BITPOS": {
+ "summary": "Finds the first set (1) or clear (0) bit in a string.",
+ "complexity": "O(N)",
+ "group": "bitmap",
+ "since": "2.8.7",
+ "arity": -3,
+ "function": "bitposCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added the `BYTE|BIT` option."
+ ]
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "BITMAP"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "bit",
+ "type": "integer"
+ },
+ {
+ "name": "range",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "start",
+ "type": "integer"
+ },
+ {
+ "name": "end-unit-block",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "end",
+ "type": "integer"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "optional": true,
+ "since": "7.0.0",
+ "arguments": [
+ {
+ "name": "byte",
+ "type": "pure-token",
+ "token": "BYTE"
+ },
+ {
+ "name": "bit",
+ "type": "pure-token",
+ "token": "BIT"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "the position of the first bit set to 1 or 0 according to the request",
+ "type": "integer",
+ "minimum": 0
+ },
+ {
+ "description": "In case the `bit` argument is 1 and the string is empty or composed of just zero bytes",
+ "const": -1
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/blmove.json b/src/commands/blmove.json
new file mode 100644
index 0000000..b316c52
--- /dev/null
+++ b/src/commands/blmove.json
@@ -0,0 +1,117 @@
+{
+ "BLMOVE": {
+ "summary": "Pops an element from a list, pushes it to another list and returns it. Blocks until an element is available otherwise. Deletes the list if the last element was moved.",
+ "complexity": "O(1)",
+ "group": "list",
+ "since": "6.2.0",
+ "arity": 6,
+ "function": "blmoveCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "BLOCKING"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The popped element.",
+ "type": "string"
+ },
+ {
+ "description": "Operation timed-out",
+ "type": "null"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "source",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 1
+ },
+ {
+ "name": "wherefrom",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "left",
+ "type": "pure-token",
+ "token": "LEFT"
+ },
+ {
+ "name": "right",
+ "type": "pure-token",
+ "token": "RIGHT"
+ }
+ ]
+ },
+ {
+ "name": "whereto",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "left",
+ "type": "pure-token",
+ "token": "LEFT"
+ },
+ {
+ "name": "right",
+ "type": "pure-token",
+ "token": "RIGHT"
+ }
+ ]
+ },
+ {
+ "name": "timeout",
+ "type": "double"
+ }
+ ]
+ }
+}
diff --git a/src/commands/blmpop.json b/src/commands/blmpop.json
new file mode 100644
index 0000000..48bfa3e
--- /dev/null
+++ b/src/commands/blmpop.json
@@ -0,0 +1,105 @@
+{
+ "BLMPOP": {
+ "summary": "Pops the first element from one of multiple lists. Blocks until an element is available otherwise. Deletes the list if the last element was popped.",
+ "complexity": "O(N+M) where N is the number of provided keys and M is the number of elements returned.",
+ "group": "list",
+ "since": "7.0.0",
+ "arity": -5,
+ "function": "blmpopCommand",
+ "get_keys_function": "blmpopGetKeys",
+ "command_flags": [
+ "WRITE",
+ "BLOCKING"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "Operation timed-out",
+ "type": "null"
+ },
+ {
+ "description": "The key from which elements were popped and the popped elements",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "List key from which elements were popped.",
+ "type": "string"
+ },
+ {
+ "description": "Array of popped elements.",
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "timeout",
+ "type": "double"
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "where",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "left",
+ "type": "pure-token",
+ "token": "LEFT"
+ },
+ {
+ "name": "right",
+ "type": "pure-token",
+ "token": "RIGHT"
+ }
+ ]
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/blpop.json b/src/commands/blpop.json
new file mode 100644
index 0000000..cf90f0a
--- /dev/null
+++ b/src/commands/blpop.json
@@ -0,0 +1,80 @@
+{
+ "BLPOP": {
+ "summary": "Removes and returns the first element in a list. Blocks until an element is available otherwise. Deletes the list if the last element was popped.",
+ "complexity": "O(N) where N is the number of provided keys.",
+ "group": "list",
+ "since": "2.0.0",
+ "arity": -3,
+ "function": "blpopCommand",
+ "history": [
+ [
+ "6.0.0",
+ "`timeout` is interpreted as a double instead of an integer."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "BLOCKING"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -2,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "null",
+ "description": "No element could be popped and timeout expired"
+ },
+ {
+ "description": "The key from which the element was popped and the value of the popped element",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "List key from which the element was popped.",
+ "type": "string"
+ },
+ {
+ "description": "Value of the popped element.",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "timeout",
+ "type": "double"
+ }
+ ]
+ }
+}
diff --git a/src/commands/brpop.json b/src/commands/brpop.json
new file mode 100644
index 0000000..129f74d
--- /dev/null
+++ b/src/commands/brpop.json
@@ -0,0 +1,79 @@
+{
+ "BRPOP": {
+ "summary": "Removes and returns the last element in a list. Blocks until an element is available otherwise. Deletes the list if the last element was popped.",
+ "complexity": "O(N) where N is the number of provided keys.",
+ "group": "list",
+ "since": "2.0.0",
+ "arity": -3,
+ "function": "brpopCommand",
+ "history": [
+ [
+ "6.0.0",
+ "`timeout` is interpreted as a double instead of an integer."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "BLOCKING"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -2,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "timeout",
+ "type": "double"
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "No element could be popped and the timeout expired.",
+ "type": "null"
+ },
+ {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "The name of the key where an element was popped ",
+ "type": "string"
+ },
+ {
+ "description": "The value of the popped element",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/brpoplpush.json b/src/commands/brpoplpush.json
new file mode 100644
index 0000000..dce1516
--- /dev/null
+++ b/src/commands/brpoplpush.json
@@ -0,0 +1,96 @@
+{
+ "BRPOPLPUSH": {
+ "summary": "Pops an element from a list, pushes it to another list and returns it. Block until an element is available otherwise. Deletes the list if the last element was popped.",
+ "complexity": "O(1)",
+ "group": "list",
+ "since": "2.2.0",
+ "arity": 4,
+ "function": "brpoplpushCommand",
+ "history": [
+ [
+ "6.0.0",
+ "`timeout` is interpreted as a double instead of an integer."
+ ]
+ ],
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`BLMOVE` with the `RIGHT` and `LEFT` arguments",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "BLOCKING"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "string",
+ "description": "The element being popped from source and pushed to destination."
+ },
+ {
+ "type": "null",
+ "description": "Timeout is reached."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "source",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 1
+ },
+ {
+ "name": "timeout",
+ "type": "double"
+ }
+ ]
+ }
+}
diff --git a/src/commands/bzmpop.json b/src/commands/bzmpop.json
new file mode 100644
index 0000000..4ff5c11
--- /dev/null
+++ b/src/commands/bzmpop.json
@@ -0,0 +1,117 @@
+{
+ "BZMPOP": {
+ "summary": "Removes and returns a member by score from one or more sorted sets. Blocks until a member is available otherwise. Deletes the sorted set if the last element was popped.",
+ "complexity": "O(K) + O(M*log(N)) where K is the number of provided keys, N being the number of elements in the sorted set, and M being the number of elements popped.",
+ "group": "sorted_set",
+ "since": "7.0.0",
+ "arity": -5,
+ "function": "bzmpopCommand",
+ "get_keys_function": "blmpopGetKeys",
+ "command_flags": [
+ "WRITE",
+ "BLOCKING"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "Timeout reached and no elements were popped.",
+ "type": "null"
+ },
+ {
+ "description": "The keyname and the popped members.",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Keyname",
+ "type": "string"
+ },
+ {
+ "description": "Popped members and their scores.",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Member",
+ "type": "string"
+ },
+ {
+ "description": "Score",
+ "type": "number"
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "timeout",
+ "type": "double"
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "where",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "min",
+ "type": "pure-token",
+ "token": "MIN"
+ },
+ {
+ "name": "max",
+ "type": "pure-token",
+ "token": "MAX"
+ }
+ ]
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/bzpopmax.json b/src/commands/bzpopmax.json
new file mode 100644
index 0000000..70b0b70
--- /dev/null
+++ b/src/commands/bzpopmax.json
@@ -0,0 +1,85 @@
+{
+ "BZPOPMAX": {
+ "summary": "Removes and returns the member with the highest score from one or more sorted sets. Blocks until a member available otherwise. Deletes the sorted set if the last element was popped.",
+ "complexity": "O(log(N)) with N being the number of elements in the sorted set.",
+ "group": "sorted_set",
+ "since": "5.0.0",
+ "arity": -3,
+ "function": "bzpopmaxCommand",
+ "history": [
+ [
+ "6.0.0",
+ "`timeout` is interpreted as a double instead of an integer."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST",
+ "BLOCKING"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -2,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "Timeout reached and no elements were popped.",
+ "type": "null"
+ },
+ {
+ "description": "The keyname, popped member, and its score.",
+ "type": "array",
+ "minItems": 3,
+ "maxItems": 3,
+ "items": [
+ {
+ "description": "Keyname",
+ "type": "string"
+ },
+ {
+ "description": "Member",
+ "type": "string"
+ },
+ {
+ "description": "Score",
+ "type": "number"
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "timeout",
+ "type": "double"
+ }
+ ]
+ }
+}
diff --git a/src/commands/bzpopmin.json b/src/commands/bzpopmin.json
new file mode 100644
index 0000000..a2f305c
--- /dev/null
+++ b/src/commands/bzpopmin.json
@@ -0,0 +1,85 @@
+{
+ "BZPOPMIN": {
+ "summary": "Removes and returns the member with the lowest score from one or more sorted sets. Blocks until a member is available otherwise. Deletes the sorted set if the last element was popped.",
+ "complexity": "O(log(N)) with N being the number of elements in the sorted set.",
+ "group": "sorted_set",
+ "since": "5.0.0",
+ "arity": -3,
+ "function": "bzpopminCommand",
+ "history": [
+ [
+ "6.0.0",
+ "`timeout` is interpreted as a double instead of an integer."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST",
+ "BLOCKING"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -2,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "Timeout reached and no elements were popped.",
+ "type": "null"
+ },
+ {
+ "description": "The keyname, popped member, and its score.",
+ "type": "array",
+ "minItems": 3,
+ "maxItems": 3,
+ "items": [
+ {
+ "description": "Keyname",
+ "type": "string"
+ },
+ {
+ "description": "Member",
+ "type": "string"
+ },
+ {
+ "description": "Score",
+ "type": "number"
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "timeout",
+ "type": "double"
+ }
+ ]
+ }
+}
diff --git a/src/commands/client-caching.json b/src/commands/client-caching.json
new file mode 100644
index 0000000..2a4ae89
--- /dev/null
+++ b/src/commands/client-caching.json
@@ -0,0 +1,41 @@
+{
+ "CACHING": {
+ "summary": "Instructs the server whether to track the keys in the next request.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "6.0.0",
+ "arity": 3,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "mode",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "yes",
+ "type": "pure-token",
+ "token": "YES"
+ },
+ {
+ "name": "no",
+ "type": "pure-token",
+ "token": "NO"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/client-getname.json b/src/commands/client-getname.json
new file mode 100644
index 0000000..9e237af
--- /dev/null
+++ b/src/commands/client-getname.json
@@ -0,0 +1,32 @@
+{
+ "GETNAME": {
+ "summary": "Returns the name of the connection.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "2.6.9",
+ "arity": 2,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "string",
+ "description": "The connection name of the current connection"
+ },
+ {
+ "type": "null",
+ "description": "Connection name was not set"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/client-getredir.json b/src/commands/client-getredir.json
new file mode 100644
index 0000000..6fdb002
--- /dev/null
+++ b/src/commands/client-getredir.json
@@ -0,0 +1,37 @@
+{
+ "GETREDIR": {
+ "summary": "Returns the client ID to which the connection's tracking notifications are redirected.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "6.0.0",
+ "arity": 2,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "const": 0,
+ "description": "Not redirecting notifications to any client."
+ },
+ {
+ "const": -1,
+ "description": "Client tracking is not enabled."
+ },
+ {
+ "type": "integer",
+ "description": "ID of the client we are redirecting the notifications to.",
+ "minimum": 1
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/client-help.json b/src/commands/client-help.json
new file mode 100644
index 0000000..b49294c
--- /dev/null
+++ b/src/commands/client-help.json
@@ -0,0 +1,26 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "5.0.0",
+ "arity": 2,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/client-id.json b/src/commands/client-id.json
new file mode 100644
index 0000000..7c2bf08
--- /dev/null
+++ b/src/commands/client-id.json
@@ -0,0 +1,24 @@
+{
+ "ID": {
+ "summary": "Returns the unique client ID of the connection.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "5.0.0",
+ "arity": 2,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The id of the client"
+ }
+ }
+}
diff --git a/src/commands/client-info.json b/src/commands/client-info.json
new file mode 100644
index 0000000..93fa008
--- /dev/null
+++ b/src/commands/client-info.json
@@ -0,0 +1,27 @@
+{
+ "INFO": {
+ "summary": "Returns information about the connection.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "6.2.0",
+ "arity": 2,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "description": "a unique string, as described at the CLIENT LIST page, for the current client",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/client-kill.json b/src/commands/client-kill.json
new file mode 100644
index 0000000..bd0262d
--- /dev/null
+++ b/src/commands/client-kill.json
@@ -0,0 +1,159 @@
+{
+ "KILL": {
+ "summary": "Terminates open connections.",
+ "complexity": "O(N) where N is the number of client connections",
+ "group": "connection",
+ "since": "2.4.0",
+ "arity": -3,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "history": [
+ [
+ "2.8.12",
+ "Added new filter format."
+ ],
+ [
+ "2.8.12",
+ "`ID` option."
+ ],
+ [
+ "3.2.0",
+ "Added `master` type in for `TYPE` option."
+ ],
+ [
+ "5.0.0",
+ "Replaced `slave` `TYPE` with `replica`. `slave` still supported for backward compatibility."
+ ],
+ [
+ "6.2.0",
+ "`LADDR` option."
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "arguments": [
+ {
+ "name": "filter",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "old-format",
+ "display": "ip:port",
+ "type": "string",
+ "deprecated_since": "2.8.12"
+ },
+ {
+ "name": "new-format",
+ "type": "oneof",
+ "multiple": true,
+ "arguments": [
+ {
+ "token": "ID",
+ "name": "client-id",
+ "type": "integer",
+ "optional": true,
+ "since": "2.8.12"
+ },
+ {
+ "token": "TYPE",
+ "name": "client-type",
+ "type": "oneof",
+ "optional": true,
+ "since": "2.8.12",
+ "arguments": [
+ {
+ "name": "normal",
+ "type": "pure-token",
+ "token": "normal"
+ },
+ {
+ "name": "master",
+ "type": "pure-token",
+ "token": "master",
+ "since": "3.2.0"
+ },
+ {
+ "name": "slave",
+ "type": "pure-token",
+ "token": "slave"
+ },
+ {
+ "name": "replica",
+ "type": "pure-token",
+ "token": "replica",
+ "since": "5.0.0"
+ },
+ {
+ "name": "pubsub",
+ "type": "pure-token",
+ "token": "pubsub"
+ }
+ ]
+ },
+ {
+ "token": "USER",
+ "name": "username",
+ "type": "string",
+ "optional": true
+ },
+ {
+ "token": "ADDR",
+ "name": "addr",
+ "display": "ip:port",
+ "type": "string",
+ "optional": true
+ },
+ {
+ "token": "LADDR",
+ "name": "laddr",
+ "display": "ip:port",
+ "type": "string",
+ "optional": true,
+ "since": "6.2.0"
+ },
+ {
+ "token": "SKIPME",
+ "name": "skipme",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "yes",
+ "type": "pure-token",
+ "token": "YES"
+ },
+ {
+ "name": "no",
+ "type": "pure-token",
+ "token": "NO"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "when called in 3 argument format",
+ "const": "OK"
+ },
+ {
+ "description": "when called in filter/value format, the number of clients killed",
+ "type": "integer",
+ "minimum": 0
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/client-list.json b/src/commands/client-list.json
new file mode 100644
index 0000000..f72ffaf
--- /dev/null
+++ b/src/commands/client-list.json
@@ -0,0 +1,93 @@
+{
+ "LIST": {
+ "summary": "Lists open connections.",
+ "complexity": "O(N) where N is the number of client connections",
+ "group": "connection",
+ "since": "2.4.0",
+ "arity": -2,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "history": [
+ [
+ "2.8.12",
+ "Added unique client `id` field."
+ ],
+ [
+ "5.0.0",
+ "Added optional `TYPE` filter."
+ ],
+ [
+ "6.0.0",
+ "Added `user` field."
+ ],
+ [
+ "6.2.0",
+ "Added `argv-mem`, `tot-mem`, `laddr` and `redir` fields and the optional `ID` filter."
+ ],
+ [
+ "7.0.0",
+ "Added `resp`, `multi-mem`, `rbs` and `rbp` fields."
+ ],
+ [
+ "7.0.3",
+ "Added `ssub` field."
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "Information and statistics about client connections"
+ },
+ "arguments": [
+ {
+ "token": "TYPE",
+ "name": "client-type",
+ "type": "oneof",
+ "optional": true,
+ "since": "5.0.0",
+ "arguments": [
+ {
+ "name": "normal",
+ "type": "pure-token",
+ "token": "normal"
+ },
+ {
+ "name": "master",
+ "type": "pure-token",
+ "token": "master"
+ },
+ {
+ "name": "replica",
+ "type": "pure-token",
+ "token": "replica"
+ },
+ {
+ "name": "pubsub",
+ "type": "pure-token",
+ "token": "pubsub"
+ }
+ ]
+ },
+ {
+ "name": "client-id",
+ "token": "ID",
+ "type": "integer",
+ "optional": true,
+ "multiple": true,
+ "since": "6.2.0"
+ }
+ ]
+ }
+}
diff --git a/src/commands/client-no-evict.json b/src/commands/client-no-evict.json
new file mode 100644
index 0000000..9ed6718
--- /dev/null
+++ b/src/commands/client-no-evict.json
@@ -0,0 +1,42 @@
+{
+ "NO-EVICT": {
+ "summary": "Sets the client eviction mode of the connection.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "7.0.0",
+ "arity": 3,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "arguments": [
+ {
+ "name": "enabled",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "on",
+ "type": "pure-token",
+ "token": "ON"
+ },
+ {
+ "name": "off",
+ "type": "pure-token",
+ "token": "OFF"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/client-no-touch.json b/src/commands/client-no-touch.json
new file mode 100644
index 0000000..4cf7b72
--- /dev/null
+++ b/src/commands/client-no-touch.json
@@ -0,0 +1,40 @@
+{
+ "NO-TOUCH": {
+ "summary": "Controls whether commands sent by the client affect the LRU/LFU of accessed keys.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "7.2.0",
+ "arity": 3,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "enabled",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "on",
+ "type": "pure-token",
+ "token": "ON"
+ },
+ {
+ "name": "off",
+ "type": "pure-token",
+ "token": "OFF"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/client-pause.json b/src/commands/client-pause.json
new file mode 100644
index 0000000..b1dd7bc
--- /dev/null
+++ b/src/commands/client-pause.json
@@ -0,0 +1,54 @@
+{
+ "PAUSE": {
+ "summary": "Suspends commands processing.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "3.0.0",
+ "arity": -3,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "history": [
+ [
+ "6.2.0",
+ "`CLIENT PAUSE WRITE` mode added along with the `mode` option."
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "arguments": [
+ {
+ "name": "timeout",
+ "type": "integer"
+ },
+ {
+ "name": "mode",
+ "type": "oneof",
+ "optional": true,
+ "since": "6.2.0",
+ "arguments": [
+ {
+ "name": "write",
+ "type": "pure-token",
+ "token": "WRITE"
+ },
+ {
+ "name": "all",
+ "type": "pure-token",
+ "token": "ALL"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/client-reply.json b/src/commands/client-reply.json
new file mode 100644
index 0000000..9406de8
--- /dev/null
+++ b/src/commands/client-reply.json
@@ -0,0 +1,47 @@
+{
+ "REPLY": {
+ "summary": "Instructs the server whether to reply to commands.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "3.2.0",
+ "arity": 3,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK",
+ "description": "When called with either OFF or SKIP subcommands, no reply is made. When called with ON, reply is OK."
+ },
+ "arguments": [
+ {
+ "name": "action",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "on",
+ "type": "pure-token",
+ "token": "ON"
+ },
+ {
+ "name": "off",
+ "type": "pure-token",
+ "token": "OFF"
+ },
+ {
+ "name": "skip",
+ "type": "pure-token",
+ "token": "SKIP"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/client-setinfo.json b/src/commands/client-setinfo.json
new file mode 100644
index 0000000..d0d8f73
--- /dev/null
+++ b/src/commands/client-setinfo.json
@@ -0,0 +1,45 @@
+{
+ "SETINFO": {
+ "summary": "Sets information specific to the client or connection.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "7.2.0",
+ "arity": 4,
+ "container": "CLIENT",
+ "function": "clientSetinfoCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "attr",
+ "type": "oneof",
+ "arguments": [
+ {
+ "token": "lib-name",
+ "name": "libname",
+ "type": "string"
+ },
+ {
+ "token": "lib-ver",
+ "name": "libver",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/client-setname.json b/src/commands/client-setname.json
new file mode 100644
index 0000000..b071bd1
--- /dev/null
+++ b/src/commands/client-setname.json
@@ -0,0 +1,33 @@
+{
+ "SETNAME": {
+ "summary": "Sets the connection name.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "2.6.9",
+ "arity": 3,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "arguments": [
+ {
+ "name": "connection-name",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/client-tracking.json b/src/commands/client-tracking.json
new file mode 100644
index 0000000..28e84ec
--- /dev/null
+++ b/src/commands/client-tracking.json
@@ -0,0 +1,80 @@
+{
+ "TRACKING": {
+ "summary": "Controls server-assisted client-side caching for the connection.",
+ "complexity": "O(1). Some options may introduce additional complexity.",
+ "group": "connection",
+ "since": "6.0.0",
+ "arity": -3,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "arguments": [
+ {
+ "name": "status",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "on",
+ "type": "pure-token",
+ "token": "ON"
+ },
+ {
+ "name": "off",
+ "type": "pure-token",
+ "token": "OFF"
+ }
+ ]
+ },
+ {
+ "token": "REDIRECT",
+ "name": "client-id",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "token": "PREFIX",
+ "name": "prefix",
+ "type": "string",
+ "optional": true,
+ "multiple": true,
+ "multiple_token": true
+ },
+ {
+ "name": "BCAST",
+ "token": "BCAST",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "OPTIN",
+ "token": "OPTIN",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "OPTOUT",
+ "token": "OPTOUT",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "NOLOOP",
+ "token": "NOLOOP",
+ "type": "pure-token",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "if the client was successfully put into or taken out of tracking mode",
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/client-trackinginfo.json b/src/commands/client-trackinginfo.json
new file mode 100644
index 0000000..270a3d5
--- /dev/null
+++ b/src/commands/client-trackinginfo.json
@@ -0,0 +1,80 @@
+{
+ "TRACKINGINFO": {
+ "summary": "Returns information about server-assisted client-side caching for the connection.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "6.2.0",
+ "arity": 2,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "flags": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "const": "off",
+ "description": "The connection isn't using server assisted client side caching."
+ },
+ {
+ "const": "on",
+ "description": "Server assisted client side caching is enabled for the connection."
+ },
+ {
+ "const": "bcast",
+ "description": "The client uses broadcasting mode."
+ },
+ {
+ "const": "optin",
+ "description": "The client does not cache keys by default."
+ },
+ {
+ "const": "optout",
+ "description": "The client caches keys by default."
+ },
+ {
+ "const": "caching-yes",
+ "description": "The next command will cache keys (exists only together with optin)."
+ },
+ {
+ "const": "caching-no",
+ "description": "The next command won't cache keys (exists only together with optout)."
+ },
+ {
+ "const": "noloop",
+ "description": "The client isn't notified about keys modified by itself."
+ },
+ {
+ "const": "broken_redirect",
+ "description": "The client ID used for redirection isn't valid anymore."
+ }
+ ]
+ }
+ },
+ "redirect": {
+ "type": "integer",
+ "description": "The client ID used for notifications redirection, or -1 when none."
+ },
+ "prefixes": {
+ "type": "array",
+ "description": "List of key prefixes for which notifications are sent to the client.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/commands/client-unblock.json b/src/commands/client-unblock.json
new file mode 100644
index 0000000..d028f47
--- /dev/null
+++ b/src/commands/client-unblock.json
@@ -0,0 +1,56 @@
+{
+ "UNBLOCK": {
+ "summary": "Unblocks a client blocked by a blocking command from a different connection.",
+ "complexity": "O(log N) where N is the number of client connections",
+ "group": "connection",
+ "since": "5.0.0",
+ "arity": -3,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "const": 0,
+ "description": "if the client was unblocked successfully"
+ },
+ {
+ "const": 1,
+ "description": "if the client wasn't unblocked"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "client-id",
+ "type": "integer"
+ },
+ {
+ "name": "unblock-type",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "timeout",
+ "type": "pure-token",
+ "token": "TIMEOUT"
+ },
+ {
+ "name": "error",
+ "type": "pure-token",
+ "token": "ERROR"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/client-unpause.json b/src/commands/client-unpause.json
new file mode 100644
index 0000000..6c55210
--- /dev/null
+++ b/src/commands/client-unpause.json
@@ -0,0 +1,24 @@
+{
+ "UNPAUSE": {
+ "summary": "Resumes processing commands from paused clients.",
+ "complexity": "O(N) Where N is the number of paused clients",
+ "group": "connection",
+ "since": "6.2.0",
+ "arity": 2,
+ "container": "CLIENT",
+ "function": "clientCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/client.json b/src/commands/client.json
new file mode 100644
index 0000000..b509961
--- /dev/null
+++ b/src/commands/client.json
@@ -0,0 +1,12 @@
+{
+ "CLIENT": {
+ "summary": "A container for client connection commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "connection",
+ "since": "2.4.0",
+ "arity": -2,
+ "command_flags": [
+ "SENTINEL"
+ ]
+ }
+}
diff --git a/src/commands/cluster-addslots.json b/src/commands/cluster-addslots.json
new file mode 100644
index 0000000..4d2ea25
--- /dev/null
+++ b/src/commands/cluster-addslots.json
@@ -0,0 +1,26 @@
+{
+ "ADDSLOTS": {
+ "summary": "Assigns new hash slots to a node.",
+ "complexity": "O(N) where N is the total number of hash slot arguments",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": -3,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "slot",
+ "type": "integer",
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-addslotsrange.json b/src/commands/cluster-addslotsrange.json
new file mode 100644
index 0000000..4ff8a40
--- /dev/null
+++ b/src/commands/cluster-addslotsrange.json
@@ -0,0 +1,36 @@
+{
+ "ADDSLOTSRANGE": {
+ "summary": "Assigns new hash slot ranges to a node.",
+ "complexity": "O(N) where N is the total number of the slots between the start slot and end slot arguments.",
+ "group": "cluster",
+ "since": "7.0.0",
+ "arity": -4,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "range",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "start-slot",
+ "type": "integer"
+ },
+ {
+ "name": "end-slot",
+ "type": "integer"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-bumpepoch.json b/src/commands/cluster-bumpepoch.json
new file mode 100644
index 0000000..786723c
--- /dev/null
+++ b/src/commands/cluster-bumpepoch.json
@@ -0,0 +1,33 @@
+{
+ "BUMPEPOCH": {
+ "summary": "Advances the cluster config epoch.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "if the epoch was incremented",
+ "type": "string",
+ "pattern": "^BUMPED [0-9]*$"
+ },
+ {
+ "description": "if the node already has the greatest config epoch in the cluster",
+ "type": "string",
+ "pattern": "^STILL [0-9]*$"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/cluster-count-failure-reports.json b/src/commands/cluster-count-failure-reports.json
new file mode 100644
index 0000000..b80632d
--- /dev/null
+++ b/src/commands/cluster-count-failure-reports.json
@@ -0,0 +1,29 @@
+{
+ "COUNT-FAILURE-REPORTS": {
+ "summary": "Returns the number of active failure reports active for a node.",
+ "complexity": "O(N) where N is the number of failure reports",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 3,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "ADMIN",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "arguments": [
+ {
+ "name": "node-id",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "the number of active failure reports for the node",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/cluster-countkeysinslot.json b/src/commands/cluster-countkeysinslot.json
new file mode 100644
index 0000000..caeec51
--- /dev/null
+++ b/src/commands/cluster-countkeysinslot.json
@@ -0,0 +1,25 @@
+{
+ "COUNTKEYSINSLOT": {
+ "summary": "Returns the number of keys in a hash slot.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 3,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "slot",
+ "type": "integer"
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of keys in the specified hash slot",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/cluster-delslots.json b/src/commands/cluster-delslots.json
new file mode 100644
index 0000000..0732d2b
--- /dev/null
+++ b/src/commands/cluster-delslots.json
@@ -0,0 +1,26 @@
+{
+ "DELSLOTS": {
+ "summary": "Sets hash slots as unbound for a node.",
+ "complexity": "O(N) where N is the total number of hash slot arguments",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": -3,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "slot",
+ "type": "integer",
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-delslotsrange.json b/src/commands/cluster-delslotsrange.json
new file mode 100644
index 0000000..77f1aea
--- /dev/null
+++ b/src/commands/cluster-delslotsrange.json
@@ -0,0 +1,36 @@
+{
+ "DELSLOTSRANGE": {
+ "summary": "Sets hash slot ranges as unbound for a node.",
+ "complexity": "O(N) where N is the total number of the slots between the start slot and end slot arguments.",
+ "group": "cluster",
+ "since": "7.0.0",
+ "arity": -4,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "range",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "start-slot",
+ "type": "integer"
+ },
+ {
+ "name": "end-slot",
+ "type": "integer"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-failover.json b/src/commands/cluster-failover.json
new file mode 100644
index 0000000..f58fd56
--- /dev/null
+++ b/src/commands/cluster-failover.json
@@ -0,0 +1,38 @@
+{
+ "FAILOVER": {
+ "summary": "Forces a replica to perform a manual failover of its master.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": -2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "options",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "force",
+ "type": "pure-token",
+ "token": "FORCE"
+ },
+ {
+ "name": "takeover",
+ "type": "pure-token",
+ "token": "TAKEOVER"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-flushslots.json b/src/commands/cluster-flushslots.json
new file mode 100644
index 0000000..09902ad
--- /dev/null
+++ b/src/commands/cluster-flushslots.json
@@ -0,0 +1,19 @@
+{
+ "FLUSHSLOTS": {
+ "summary": "Deletes all slots information from a node.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-forget.json b/src/commands/cluster-forget.json
new file mode 100644
index 0000000..6f9a25c
--- /dev/null
+++ b/src/commands/cluster-forget.json
@@ -0,0 +1,25 @@
+{
+ "FORGET": {
+ "summary": "Removes a node from the nodes table.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 3,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "node-id",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-getkeysinslot.json b/src/commands/cluster-getkeysinslot.json
new file mode 100644
index 0000000..039dfea
--- /dev/null
+++ b/src/commands/cluster-getkeysinslot.json
@@ -0,0 +1,35 @@
+{
+ "GETKEYSINSLOT": {
+ "summary": "Returns the key names in a hash slot.",
+ "complexity": "O(N) where N is the number of requested keys",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 4,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "arguments": [
+ {
+ "name": "slot",
+ "type": "integer"
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ }
+ ],
+ "reply_schema": {
+ "description": "an array with up to count elements",
+ "type": "array",
+ "items": {
+ "description": "key name",
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/cluster-help.json b/src/commands/cluster-help.json
new file mode 100644
index 0000000..27f7d08
--- /dev/null
+++ b/src/commands/cluster-help.json
@@ -0,0 +1,22 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "5.0.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/cluster-info.json b/src/commands/cluster-info.json
new file mode 100644
index 0000000..2c88760
--- /dev/null
+++ b/src/commands/cluster-info.json
@@ -0,0 +1,21 @@
+{
+ "INFO": {
+ "summary": "Returns information about the state of a node.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "description": "A map between named fields and values in the form of <field>:<value> lines separated by newlines composed by the two bytes CRLF",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/cluster-keyslot.json b/src/commands/cluster-keyslot.json
new file mode 100644
index 0000000..e51b643
--- /dev/null
+++ b/src/commands/cluster-keyslot.json
@@ -0,0 +1,25 @@
+{
+ "KEYSLOT": {
+ "summary": "Returns the hash slot for a key.",
+ "complexity": "O(N) where N is the number of bytes in the key",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 3,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "The hash slot number for the specified key",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/cluster-links.json b/src/commands/cluster-links.json
new file mode 100644
index 0000000..07b2e83
--- /dev/null
+++ b/src/commands/cluster-links.json
@@ -0,0 +1,60 @@
+{
+ "LINKS": {
+ "summary": "Returns a list of all TCP links to and from peer nodes.",
+ "complexity": "O(N) where N is the total number of Cluster nodes",
+ "group": "cluster",
+ "since": "7.0.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "description": "an array of cluster links and their attributes",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "direction": {
+ "description": "This link is established by the local node _to_ the peer, or accepted by the local node _from_ the peer.",
+ "oneOf": [
+ {
+ "description": "connection initiated from peer",
+ "const": "from"
+ },
+ {
+ "description": "connection initiated to peer",
+ "const": "to"
+ }
+ ]
+ },
+ "node": {
+ "description": "the node id of the peer",
+ "type": "string"
+ },
+ "create-time": {
+ "description": "unix time creation time of the link. (In the case of a _to_ link, this is the time when the TCP link is created by the local node, not the time when it is actually established.)",
+ "type": "integer"
+ },
+ "events": {
+ "description": "events currently registered for the link. r means readable event, w means writable event",
+ "type": "string"
+ },
+ "send-buffer-allocated": {
+ "description": "allocated size of the link's send buffer, which is used to buffer outgoing messages toward the peer",
+ "type": "integer"
+ },
+ "send-buffer-used": {
+ "description": "size of the portion of the link's send buffer that is currently holding data(messages)",
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false
+ }
+ }
+ }
+}
diff --git a/src/commands/cluster-meet.json b/src/commands/cluster-meet.json
new file mode 100644
index 0000000..7ca23a7
--- /dev/null
+++ b/src/commands/cluster-meet.json
@@ -0,0 +1,41 @@
+{
+ "MEET": {
+ "summary": "Forces a node to handshake with another node.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": -4,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "history": [
+ [
+ "4.0.0",
+ "Added the optional `cluster_bus_port` argument."
+ ]
+ ],
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "ip",
+ "type": "string"
+ },
+ {
+ "name": "port",
+ "type": "integer"
+ },
+ {
+ "name": "cluster-bus-port",
+ "type": "integer",
+ "optional": true,
+ "since": "4.0.0"
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-myid.json b/src/commands/cluster-myid.json
new file mode 100644
index 0000000..340d425
--- /dev/null
+++ b/src/commands/cluster-myid.json
@@ -0,0 +1,18 @@
+{
+ "MYID": {
+ "summary": "Returns the ID of a node.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "STALE"
+ ],
+ "reply_schema": {
+ "description": "the node id",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/cluster-myshardid.json b/src/commands/cluster-myshardid.json
new file mode 100644
index 0000000..7db63fd
--- /dev/null
+++ b/src/commands/cluster-myshardid.json
@@ -0,0 +1,22 @@
+{
+ "MYSHARDID": {
+ "summary": "Returns the shard ID of a node.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "7.2.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "history": [],
+ "command_flags": [
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "description": "the node's shard id",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/cluster-nodes.json b/src/commands/cluster-nodes.json
new file mode 100644
index 0000000..ce34944
--- /dev/null
+++ b/src/commands/cluster-nodes.json
@@ -0,0 +1,21 @@
+{
+ "NODES": {
+ "summary": "Returns the cluster configuration for a node.",
+ "complexity": "O(N) where N is the total number of Cluster nodes",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "description": "the serialized cluster configuration",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/cluster-replicas.json b/src/commands/cluster-replicas.json
new file mode 100644
index 0000000..e01617f
--- /dev/null
+++ b/src/commands/cluster-replicas.json
@@ -0,0 +1,32 @@
+{
+ "REPLICAS": {
+ "summary": "Lists the replica nodes of a master node.",
+ "complexity": "O(N) where N is the number of replicas.",
+ "group": "cluster",
+ "since": "5.0.0",
+ "arity": 3,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "ADMIN",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "arguments": [
+ {
+ "name": "node-id",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "a list of replica nodes replicating from the specified master node provided in the same format used by CLUSTER NODES",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "the serialized cluster configuration"
+ }
+ }
+ }
+}
diff --git a/src/commands/cluster-replicate.json b/src/commands/cluster-replicate.json
new file mode 100644
index 0000000..060d4af
--- /dev/null
+++ b/src/commands/cluster-replicate.json
@@ -0,0 +1,25 @@
+{
+ "REPLICATE": {
+ "summary": "Configure a node as replica of a master node.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 3,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "node-id",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-reset.json b/src/commands/cluster-reset.json
new file mode 100644
index 0000000..6bd4fe6
--- /dev/null
+++ b/src/commands/cluster-reset.json
@@ -0,0 +1,38 @@
+{
+ "RESET": {
+ "summary": "Resets a node.",
+ "complexity": "O(N) where N is the number of known nodes. The command may execute a FLUSHALL as a side effect.",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": -2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "ADMIN",
+ "STALE",
+ "NOSCRIPT"
+ ],
+ "arguments": [
+ {
+ "name": "reset-type",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "hard",
+ "type": "pure-token",
+ "token": "HARD"
+ },
+ {
+ "name": "soft",
+ "type": "pure-token",
+ "token": "SOFT"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-saveconfig.json b/src/commands/cluster-saveconfig.json
new file mode 100644
index 0000000..d004509
--- /dev/null
+++ b/src/commands/cluster-saveconfig.json
@@ -0,0 +1,19 @@
+{
+ "SAVECONFIG": {
+ "summary": "Forces a node to save the cluster configuration to disk.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-set-config-epoch.json b/src/commands/cluster-set-config-epoch.json
new file mode 100644
index 0000000..70c7de4
--- /dev/null
+++ b/src/commands/cluster-set-config-epoch.json
@@ -0,0 +1,25 @@
+{
+ "SET-CONFIG-EPOCH": {
+ "summary": "Sets the configuration epoch for a new node.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 3,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "config-epoch",
+ "type": "integer"
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-setslot.json b/src/commands/cluster-setslot.json
new file mode 100644
index 0000000..d0d4819
--- /dev/null
+++ b/src/commands/cluster-setslot.json
@@ -0,0 +1,54 @@
+{
+ "SETSLOT": {
+ "summary": "Binds a hash slot to a node.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": -4,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "slot",
+ "type": "integer"
+ },
+ {
+ "name": "subcommand",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "importing",
+ "display": "node-id",
+ "type": "string",
+ "token": "IMPORTING"
+ },
+ {
+ "name": "migrating",
+ "display": "node-id",
+ "type": "string",
+ "token": "MIGRATING"
+ },
+ {
+ "name": "node",
+ "display": "node-id",
+ "type": "string",
+ "token": "NODE"
+ },
+ {
+ "name": "stable",
+ "type": "pure-token",
+ "token": "STABLE"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/cluster-shards.json b/src/commands/cluster-shards.json
new file mode 100644
index 0000000..e7a0829
--- /dev/null
+++ b/src/commands/cluster-shards.json
@@ -0,0 +1,90 @@
+{
+ "SHARDS": {
+ "summary": "Returns the mapping of cluster slots to shards.",
+ "complexity": "O(N) where N is the total number of cluster nodes",
+ "group": "cluster",
+ "since": "7.0.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "history": [],
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "description": "a nested list of a map of hash ranges and shard nodes describing individual shards",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "slots": {
+ "description": "an even number element array specifying the start and end slot numbers for slot ranges owned by this shard",
+ "type": "array",
+ "items": {
+ "type": "integer"
+ }
+ },
+ "nodes": {
+ "description": "nodes that handle these slot ranges",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "id": {
+ "type": "string"
+ },
+ "port": {
+ "type": "integer"
+ },
+ "tls-port": {
+ "type": "integer"
+ },
+ "ip": {
+ "type": "string"
+ },
+ "endpoint": {
+ "type": "string"
+ },
+ "hostname": {
+ "type": "string"
+ },
+ "role": {
+ "oneOf": [
+ {
+ "const": "master"
+ },
+ {
+ "const": "replica"
+ }
+ ]
+ },
+ "replication-offset": {
+ "type": "integer"
+ },
+ "health": {
+ "oneOf": [
+ {
+ "const": "fail"
+ },
+ {
+ "const": "loading"
+ },
+ {
+ "const": "online"
+ }
+ ]
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/commands/cluster-slaves.json b/src/commands/cluster-slaves.json
new file mode 100644
index 0000000..a736088
--- /dev/null
+++ b/src/commands/cluster-slaves.json
@@ -0,0 +1,37 @@
+{
+ "SLAVES": {
+ "summary": "Lists the replica nodes of a master node.",
+ "complexity": "O(N) where N is the number of replicas.",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 3,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "deprecated_since": "5.0.0",
+ "replaced_by": "`CLUSTER REPLICAS`",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "ADMIN",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "arguments": [
+ {
+ "name": "node-id",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "a list of replica nodes replicating from the specified master node provided in the same format used by CLUSTER NODES",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "the serialized cluster configuration"
+ }
+ }
+ }
+}
diff --git a/src/commands/cluster-slots.json b/src/commands/cluster-slots.json
new file mode 100644
index 0000000..b00cc60
--- /dev/null
+++ b/src/commands/cluster-slots.json
@@ -0,0 +1,136 @@
+{
+ "SLOTS": {
+ "summary": "Returns the mapping of cluster slots to nodes.",
+ "complexity": "O(N) where N is the total number of Cluster nodes",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 2,
+ "container": "CLUSTER",
+ "function": "clusterCommand",
+ "deprecated_since": "7.0.0",
+ "replaced_by": "`CLUSTER SHARDS`",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "history": [
+ [
+ "4.0.0",
+ "Added node IDs."
+ ],
+ [
+ "7.0.0",
+ "Added additional networking metadata field."
+ ]
+ ],
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "description": "nested list of slot ranges with networking information",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 3,
+ "maxItems": 4294967295,
+ "items": [
+ {
+ "description": "start slot number",
+ "type": "integer"
+ },
+ {
+ "description": "end slot number",
+ "type": "integer"
+ },
+ {
+ "type": "array",
+ "description": "Master node for the slot range",
+ "minItems": 4,
+ "maxItems": 4,
+ "items": [
+ {
+ "description": "endpoint description",
+ "oneOf": [
+ {
+ "description": "hostname or ip",
+ "type": "string"
+ },
+ {
+ "description": "unknown type",
+ "type": "null"
+ }
+ ]
+ },
+ {
+ "description": "port",
+ "type": "integer"
+ },
+ {
+ "description": "node name",
+ "type": "string"
+ },
+ {
+ "description": "array of node descriptions",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "hostname": {
+ "type": "string"
+ },
+ "ip": {
+ "type": "string"
+ }
+ }
+ }
+ ]
+ }
+ ],
+ "additionalItems": {
+ "type": "array",
+ "description": "Replica node for the slot range",
+ "minItems": 4,
+ "maxItems": 4,
+ "items": [
+ {
+ "description": "endpoint description",
+ "oneOf": [
+ {
+ "description": "hostname or ip",
+ "type": "string"
+ },
+ {
+ "description": "unknown type",
+ "type": "null"
+ }
+ ]
+ },
+ {
+ "description": "port",
+ "type": "integer"
+ },
+ {
+ "description": "node name",
+ "type": "string"
+ },
+ {
+ "description": "array of node descriptions",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "hostname": {
+ "type": "string"
+ },
+ "ip": {
+ "type": "string"
+ }
+ }
+ }
+ ]
+ }
+ }
+ }
+ }
+}
diff --git a/src/commands/cluster.json b/src/commands/cluster.json
new file mode 100644
index 0000000..e74404d
--- /dev/null
+++ b/src/commands/cluster.json
@@ -0,0 +1,9 @@
+{
+ "CLUSTER": {
+ "summary": "A container for Redis Cluster commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": -2
+ }
+}
diff --git a/src/commands/command-count.json b/src/commands/command-count.json
new file mode 100644
index 0000000..7306767
--- /dev/null
+++ b/src/commands/command-count.json
@@ -0,0 +1,23 @@
+{
+ "COUNT": {
+ "summary": "Returns a count of commands.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": 2,
+ "container": "COMMAND",
+ "function": "commandCountCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "description": "Number of total commands in this Redis server.",
+ "type": "integer"
+ }
+ }
+}
diff --git a/src/commands/command-docs.json b/src/commands/command-docs.json
new file mode 100644
index 0000000..75df5b4
--- /dev/null
+++ b/src/commands/command-docs.json
@@ -0,0 +1,211 @@
+{
+ "DOCS": {
+ "summary": "Returns documentary information about one, multiple or all commands.",
+ "complexity": "O(N) where N is the number of commands to look up",
+ "group": "server",
+ "since": "7.0.0",
+ "arity": -2,
+ "container": "COMMAND",
+ "function": "commandDocsCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "reply_schema": {
+ "description": "A map where each key is a command name, and each value is the documentary information",
+ "type": "object",
+ "additionalProperties": false,
+ "patternProperties": {
+ "^.*$": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "summary": {
+ "description": "short command description",
+ "type": "string"
+ },
+ "since": {
+ "description": "the Redis version that added the command (or for module commands, the module version).",
+ "type": "string"
+ },
+ "group": {
+ "description": "the functional group to which the command belongs",
+ "oneOf": [
+ {
+ "const": "bitmap"
+ },
+ {
+ "const": "cluster"
+ },
+ {
+ "const": "connection"
+ },
+ {
+ "const": "generic"
+ },
+ {
+ "const": "geo"
+ },
+ {
+ "const": "hash"
+ },
+ {
+ "const": "hyperloglog"
+ },
+ {
+ "const": "list"
+ },
+ {
+ "const": "module"
+ },
+ {
+ "const": "pubsub"
+ },
+ {
+ "const": "scripting"
+ },
+ {
+ "const": "sentinel"
+ },
+ {
+ "const": "server"
+ },
+ {
+ "const": "set"
+ },
+ {
+ "const": "sorted-set"
+ },
+ {
+ "const": "stream"
+ },
+ {
+ "const": "string"
+ },
+ {
+ "const": "transactions"
+ }
+ ]
+ },
+ "complexity": {
+ "description": "a short explanation about the command's time complexity.",
+ "type": "string"
+ },
+ "module": {
+ "type": "string"
+ },
+ "doc_flags": {
+ "description": "an array of documentation flags",
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "description": "the command is deprecated.",
+ "const": "deprecated"
+ },
+ {
+ "description": "a system command that isn't meant to be called by users.",
+ "const": "syscmd"
+ }
+ ]
+ }
+ },
+ "deprecated_since": {
+ "description": "the Redis version that deprecated the command (or for module commands, the module version)",
+ "type": "string"
+ },
+ "replaced_by": {
+ "description": "the alternative for a deprecated command.",
+ "type": "string"
+ },
+ "history": {
+ "description": "an array of historical notes describing changes to the command's behavior or arguments.",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string",
+ "description": "The Redis version that the entry applies to."
+ },
+ {
+ "type": "string",
+ "description": "The description of the change."
+ }
+ ]
+ }
+ },
+ "arguments": {
+ "description": "an array of maps that describe the command's arguments.",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string"
+ },
+ "display_text": {
+ "type": "string"
+ },
+ "key_spec_index": {
+ "type": "integer"
+ },
+ "token": {
+ "type": "string"
+ },
+ "summary": {
+ "type": "string"
+ },
+ "since": {
+ "type": "string"
+ },
+ "deprecated_since": {
+ "type": "string"
+ },
+ "flags": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "arguments": {
+ "type": "array"
+ }
+ }
+ }
+ },
+ "reply_schema": {
+ "description": "command reply schema",
+ "type": "object"
+ },
+ "subcommands": {
+ "description": "A map where each key is a subcommand, and each value is the documentary information",
+ "$ref": "#"
+ }
+ }
+ }
+ }
+ },
+ "arguments": [
+ {
+ "name": "command-name",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/command-getkeys.json b/src/commands/command-getkeys.json
new file mode 100644
index 0000000..88103c5
--- /dev/null
+++ b/src/commands/command-getkeys.json
@@ -0,0 +1,39 @@
+{
+ "GETKEYS": {
+ "summary": "Extracts the key names from an arbitrary command.",
+ "complexity": "O(N) where N is the number of arguments to the command",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": -3,
+ "container": "COMMAND",
+ "function": "commandGetKeysCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "description": "List of keys from the given Redis command.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "uniqueItems": true
+ },
+ "arguments": [
+ {
+ "name": "command",
+ "type": "string"
+ },
+ {
+ "name": "arg",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/command-getkeysandflags.json b/src/commands/command-getkeysandflags.json
new file mode 100644
index 0000000..a069f5e
--- /dev/null
+++ b/src/commands/command-getkeysandflags.json
@@ -0,0 +1,55 @@
+{
+ "GETKEYSANDFLAGS": {
+ "summary": "Extracts the key names and access flags for an arbitrary command.",
+ "complexity": "O(N) where N is the number of arguments to the command",
+ "group": "server",
+ "since": "7.0.0",
+ "arity": -3,
+ "container": "COMMAND",
+ "function": "commandGetKeysAndFlagsCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "description": "List of keys from the given Redis command and their usage flags.",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Key name",
+ "type": "string"
+ },
+ {
+ "description": "Set of key flags",
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
+ "arguments": [
+ {
+ "name": "command",
+ "type": "string"
+ },
+ {
+ "name": "arg",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/command-help.json b/src/commands/command-help.json
new file mode 100644
index 0000000..5735f01
--- /dev/null
+++ b/src/commands/command-help.json
@@ -0,0 +1,26 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "5.0.0",
+ "arity": 2,
+ "container": "COMMAND",
+ "function": "commandHelpCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/command-info.json b/src/commands/command-info.json
new file mode 100644
index 0000000..8c69060
--- /dev/null
+++ b/src/commands/command-info.json
@@ -0,0 +1,213 @@
+{
+ "INFO": {
+ "summary": "Returns information about one, multiple or all commands.",
+ "complexity": "O(N) where N is the number of commands to look up",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": -2,
+ "container": "COMMAND",
+ "function": "commandInfoCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Allowed to be called with no argument to get info on all commands."
+ ]
+ ],
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "arguments": [
+ {
+ "name": "command-name",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "description": "command does not exist",
+ "type": "null"
+ },
+ {
+ "description": "command info array output",
+ "type": "array",
+ "minItems": 10,
+ "maxItems": 10,
+ "items": [
+ {
+ "description": "command name",
+ "type": "string"
+ },
+ {
+ "description": "command arity",
+ "type": "integer"
+ },
+ {
+ "description": "command flags",
+ "type": "array",
+ "items": {
+ "description": "command flag",
+ "type": "string"
+ }
+ },
+ {
+ "description": "command first key index",
+ "type": "integer"
+ },
+ {
+ "description": "command last key index",
+ "type": "integer"
+ },
+ {
+ "description": "command key step index",
+ "type": "integer"
+ },
+ {
+ "description": "command categories",
+ "type": "array",
+ "items": {
+ "description": "command category",
+ "type": "string"
+ }
+ },
+ {
+ "description": "command tips",
+ "type": "array",
+ "items": {
+ "description": "command tip",
+ "type": "string"
+ }
+ },
+ {
+ "description": "command key specs",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "notes": {
+ "type": "string"
+ },
+ "flags": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "begin_search": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "type": {
+ "type": "string"
+ },
+ "spec": {
+ "anyOf": [
+ {
+ "description": "unknown type, empty map",
+ "type": "object",
+ "additionalProperties": false
+ },
+ {
+ "description": "index type",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "index": {
+ "type": "integer"
+ }
+ }
+ },
+ {
+ "description": "keyword type",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "keyword": {
+ "type": "string"
+ },
+ "startfrom": {
+ "type": "integer"
+ }
+ }
+ }
+ ]
+ }
+ }
+ },
+ "find_keys": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "type": {
+ "type": "string"
+ },
+ "spec": {
+ "anyOf": [
+ {
+ "description": "unknown type",
+ "type": "object",
+ "additionalProperties": false
+ },
+ {
+ "description": "range type",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "lastkey": {
+ "type": "integer"
+ },
+ "keystep": {
+ "type": "integer"
+ },
+ "limit": {
+ "type": "integer"
+ }
+ }
+ },
+ {
+ "description": "keynum type",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "keynumidx": {
+ "type": "integer"
+ },
+ "firstkey": {
+ "type": "integer"
+ },
+ "keystep": {
+ "type": "integer"
+ }
+ }
+ }
+ ]
+ }
+ }
+ }
+ }
+ }
+ },
+ {
+ "type": "array",
+ "description": "subcommands"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ }
+}
diff --git a/src/commands/command-list.json b/src/commands/command-list.json
new file mode 100644
index 0000000..b85a9a5
--- /dev/null
+++ b/src/commands/command-list.json
@@ -0,0 +1,55 @@
+{
+ "LIST": {
+ "summary": "Returns a list of command names.",
+ "complexity": "O(N) where N is the total number of Redis commands",
+ "group": "server",
+ "since": "7.0.0",
+ "arity": -2,
+ "container": "COMMAND",
+ "function": "commandListCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "arguments": [
+ {
+ "name": "filterby",
+ "token": "FILTERBY",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "module-name",
+ "type": "string",
+ "token": "MODULE"
+ },
+ {
+ "name": "category",
+ "type": "string",
+ "token": "ACLCAT"
+ },
+ {
+ "name": "pattern",
+ "type": "pattern",
+ "token": "PATTERN"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "items": {
+ "description": "command name",
+ "type": "string"
+ },
+ "uniqueItems": true
+ }
+ }
+}
diff --git a/src/commands/command.json b/src/commands/command.json
new file mode 100644
index 0000000..9d15be2
--- /dev/null
+++ b/src/commands/command.json
@@ -0,0 +1,21 @@
+{
+ "COMMAND": {
+ "summary": "Returns detailed information about all commands.",
+ "complexity": "O(N) where N is the total number of Redis commands",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": -1,
+ "function": "commandCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ]
+ }
+}
diff --git a/src/commands/config-get.json b/src/commands/config-get.json
new file mode 100644
index 0000000..53f2d85
--- /dev/null
+++ b/src/commands/config-get.json
@@ -0,0 +1,36 @@
+{
+ "GET": {
+ "summary": "Returns the effective values of configuration parameters.",
+ "complexity": "O(N) when N is the number of configuration parameters provided",
+ "group": "server",
+ "since": "2.0.0",
+ "arity": -3,
+ "container": "CONFIG",
+ "function": "configGetCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added the ability to pass multiple pattern parameters in one call"
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "reply_schema": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "parameter",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/config-help.json b/src/commands/config-help.json
new file mode 100644
index 0000000..da3e9c4
--- /dev/null
+++ b/src/commands/config-help.json
@@ -0,0 +1,22 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "5.0.0",
+ "arity": 2,
+ "container": "CONFIG",
+ "function": "configHelpCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/config-resetstat.json b/src/commands/config-resetstat.json
new file mode 100644
index 0000000..fd6701f
--- /dev/null
+++ b/src/commands/config-resetstat.json
@@ -0,0 +1,24 @@
+{
+ "RESETSTAT": {
+ "summary": "Resets the server's statistics.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.0.0",
+ "arity": 2,
+ "container": "CONFIG",
+ "function": "configResetStatCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/config-rewrite.json b/src/commands/config-rewrite.json
new file mode 100644
index 0000000..af49dd7
--- /dev/null
+++ b/src/commands/config-rewrite.json
@@ -0,0 +1,24 @@
+{
+ "REWRITE": {
+ "summary": "Persists the effective configuration to file.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.8.0",
+ "arity": 2,
+ "container": "CONFIG",
+ "function": "configRewriteCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/config-set.json b/src/commands/config-set.json
new file mode 100644
index 0000000..83f2461
--- /dev/null
+++ b/src/commands/config-set.json
@@ -0,0 +1,47 @@
+{
+ "SET": {
+ "summary": "Sets configuration parameters in-flight.",
+ "complexity": "O(N) when N is the number of configuration parameters provided",
+ "group": "server",
+ "since": "2.0.0",
+ "arity": -4,
+ "container": "CONFIG",
+ "function": "configSetCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added the ability to set multiple parameters in one call."
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "data",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "parameter",
+ "type": "string"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/config.json b/src/commands/config.json
new file mode 100644
index 0000000..1c4457a
--- /dev/null
+++ b/src/commands/config.json
@@ -0,0 +1,9 @@
+{
+ "CONFIG": {
+ "summary": "A container for server configuration commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "server",
+ "since": "2.0.0",
+ "arity": -2
+ }
+}
diff --git a/src/commands/copy.json b/src/commands/copy.json
new file mode 100644
index 0000000..b30ddae
--- /dev/null
+++ b/src/commands/copy.json
@@ -0,0 +1,91 @@
+{
+ "COPY": {
+ "summary": "Copies the value of a key to a new key.",
+ "complexity": "O(N) worst case for collections, where N is the number of nested items. O(1) for string values.",
+ "group": "generic",
+ "since": "6.2.0",
+ "arity": -3,
+ "function": "copyCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "source",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 1
+ },
+ {
+ "token": "DB",
+ "name": "destination-db",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "name": "replace",
+ "token": "REPLACE",
+ "type": "pure-token",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "source was copied",
+ "const": 1
+ },
+ {
+ "description": "source was not copied",
+ "const": 0
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/dbsize.json b/src/commands/dbsize.json
new file mode 100644
index 0000000..249df41
--- /dev/null
+++ b/src/commands/dbsize.json
@@ -0,0 +1,25 @@
+{
+ "DBSIZE": {
+ "summary": "Returns the number of keys in the database.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": 1,
+ "function": "dbsizeCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:AGG_SUM"
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The number of keys in the currently-selected database."
+ }
+ }
+}
diff --git a/src/commands/debug.json b/src/commands/debug.json
new file mode 100644
index 0000000..1646d7b
--- /dev/null
+++ b/src/commands/debug.json
@@ -0,0 +1,20 @@
+{
+ "DEBUG": {
+ "summary": "A container for debugging commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "debugCommand",
+ "doc_flags": [
+ "SYSCMD"
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "PROTECTED"
+ ]
+ }
+}
diff --git a/src/commands/decr.json b/src/commands/decr.json
new file mode 100644
index 0000000..0841928
--- /dev/null
+++ b/src/commands/decr.json
@@ -0,0 +1,50 @@
+{
+ "DECR": {
+ "summary": "Decrements the integer value of a key by one. Uses 0 as initial value if the key doesn't exist.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "decrCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The value of the key after decrementing it."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/decrby.json b/src/commands/decrby.json
new file mode 100644
index 0000000..3db9879
--- /dev/null
+++ b/src/commands/decrby.json
@@ -0,0 +1,54 @@
+{
+ "DECRBY": {
+ "summary": "Decrements a number from the integer value of a key. Uses 0 as initial value if the key doesn't exist.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "1.0.0",
+ "arity": 3,
+ "function": "decrbyCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The value of the key after decrementing it."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "decrement",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/del.json b/src/commands/del.json
new file mode 100644
index 0000000..2727b53
--- /dev/null
+++ b/src/commands/del.json
@@ -0,0 +1,53 @@
+{
+ "DEL": {
+ "summary": "Deletes one or more keys.",
+ "complexity": "O(N) where N is the number of keys that will be removed. When a key to remove holds a value other than a string, the individual complexity for this key is O(M) where M is the number of elements in the list, set, sorted set or hash. Removing a single key that holds a string value is O(1).",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "delCommand",
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:MULTI_SHARD",
+ "RESPONSE_POLICY:AGG_SUM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RM",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "the number of keys that were removed",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/discard.json b/src/commands/discard.json
new file mode 100644
index 0000000..ffd37f0
--- /dev/null
+++ b/src/commands/discard.json
@@ -0,0 +1,23 @@
+{
+ "DISCARD": {
+ "summary": "Discards a transaction.",
+ "complexity": "O(N), when N is the number of queued commands",
+ "group": "transactions",
+ "since": "2.0.0",
+ "arity": 1,
+ "function": "discardCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "ALLOW_BUSY"
+ ],
+ "acl_categories": [
+ "TRANSACTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/dump.json b/src/commands/dump.json
new file mode 100644
index 0000000..9e772ed
--- /dev/null
+++ b/src/commands/dump.json
@@ -0,0 +1,58 @@
+{
+ "DUMP": {
+ "summary": "Returns a serialized representation of the value stored at a key.",
+ "complexity": "O(1) to access the key and additional O(N*M) to serialize it, where N is the number of Redis objects composing the value and M their average size. For small string values the time complexity is thus O(1)+O(1*M) where M is small, so simply O(1).",
+ "group": "generic",
+ "since": "2.6.0",
+ "arity": 2,
+ "function": "dumpCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The serialized value.",
+ "type": "string"
+ },
+ {
+ "description": "Key does not exist.",
+ "type": "null"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/echo.json b/src/commands/echo.json
new file mode 100644
index 0000000..df87198
--- /dev/null
+++ b/src/commands/echo.json
@@ -0,0 +1,28 @@
+{
+ "ECHO": {
+ "summary": "Returns the given string.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "echoCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "description": "The given string",
+ "type": "string"
+ },
+ "arguments": [
+ {
+ "name": "message",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/eval.json b/src/commands/eval.json
new file mode 100644
index 0000000..05e1cb5
--- /dev/null
+++ b/src/commands/eval.json
@@ -0,0 +1,69 @@
+{
+ "EVAL": {
+ "summary": "Executes a server-side Lua script.",
+ "complexity": "Depends on the script that is executed.",
+ "group": "scripting",
+ "since": "2.6.0",
+ "arity": -3,
+ "function": "evalCommand",
+ "get_keys_function": "evalGetKeys",
+ "command_flags": [
+ "NOSCRIPT",
+ "SKIP_MONITOR",
+ "MAY_REPLICATE",
+ "NO_MANDATORY_KEYS",
+ "STALE"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "key_specs": [
+ {
+ "notes": "We cannot tell how the keys will be used so we assume the worst, RW and UPDATE",
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "script",
+ "type": "string"
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "optional": true,
+ "multiple": true
+ },
+ {
+ "name": "arg",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "Return value depends on the script that is executed"
+ }
+ }
+}
diff --git a/src/commands/eval_ro.json b/src/commands/eval_ro.json
new file mode 100644
index 0000000..1440998
--- /dev/null
+++ b/src/commands/eval_ro.json
@@ -0,0 +1,68 @@
+{
+ "EVAL_RO": {
+ "summary": "Executes a read-only server-side Lua script.",
+ "complexity": "Depends on the script that is executed.",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": -3,
+ "function": "evalRoCommand",
+ "get_keys_function": "evalGetKeys",
+ "command_flags": [
+ "NOSCRIPT",
+ "SKIP_MONITOR",
+ "NO_MANDATORY_KEYS",
+ "STALE",
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "key_specs": [
+ {
+ "notes": "We cannot tell how the keys will be used so we assume the worst, RO and ACCESS",
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "script",
+ "type": "string"
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "optional":true,
+ "multiple": true
+ },
+ {
+ "name": "arg",
+ "type": "string",
+ "optional":true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "Return value depends on the script that is executed"
+ }
+ }
+}
diff --git a/src/commands/evalsha.json b/src/commands/evalsha.json
new file mode 100644
index 0000000..c7d0aa7
--- /dev/null
+++ b/src/commands/evalsha.json
@@ -0,0 +1,68 @@
+{
+ "EVALSHA": {
+ "summary": "Executes a server-side Lua script by SHA1 digest.",
+ "complexity": "Depends on the script that is executed.",
+ "group": "scripting",
+ "since": "2.6.0",
+ "arity": -3,
+ "function": "evalShaCommand",
+ "get_keys_function": "evalGetKeys",
+ "command_flags": [
+ "NOSCRIPT",
+ "SKIP_MONITOR",
+ "MAY_REPLICATE",
+ "NO_MANDATORY_KEYS",
+ "STALE"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "sha1",
+ "type": "string"
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "optional": true,
+ "multiple": true
+ },
+ {
+ "name": "arg",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "Return value depends on the script that is executed"
+ }
+ }
+}
diff --git a/src/commands/evalsha_ro.json b/src/commands/evalsha_ro.json
new file mode 100644
index 0000000..51e4dab
--- /dev/null
+++ b/src/commands/evalsha_ro.json
@@ -0,0 +1,67 @@
+{
+ "EVALSHA_RO": {
+ "summary": "Executes a read-only server-side Lua script by SHA1 digest.",
+ "complexity": "Depends on the script that is executed.",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": -3,
+ "function": "evalShaRoCommand",
+ "get_keys_function": "evalGetKeys",
+ "command_flags": [
+ "NOSCRIPT",
+ "SKIP_MONITOR",
+ "NO_MANDATORY_KEYS",
+ "STALE",
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "sha1",
+ "type": "string"
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "optional":true,
+ "multiple": true
+ },
+ {
+ "name": "arg",
+ "type": "string",
+ "optional":true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "Return value depends on the script that is executed"
+ }
+ }
+}
diff --git a/src/commands/exec.json b/src/commands/exec.json
new file mode 100644
index 0000000..5f03d76
--- /dev/null
+++ b/src/commands/exec.json
@@ -0,0 +1,31 @@
+{
+ "EXEC": {
+ "summary": "Executes all commands in a transaction.",
+ "complexity": "Depends on commands in the transaction",
+ "group": "transactions",
+ "since": "1.2.0",
+ "arity": 1,
+ "function": "execCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SKIP_SLOWLOG"
+ ],
+ "acl_categories": [
+ "TRANSACTION"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "Each element being the reply to each of the commands in the atomic transaction.",
+ "type": "array"
+ },
+ {
+ "description": "The transaction was aborted because a `WATCH`ed key was touched",
+ "type": "null"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/exists.json b/src/commands/exists.json
new file mode 100644
index 0000000..8b108bc
--- /dev/null
+++ b/src/commands/exists.json
@@ -0,0 +1,58 @@
+{
+ "EXISTS": {
+ "summary": "Determines whether one or more keys exist.",
+ "complexity": "O(N) where N is the number of keys to check.",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "existsCommand",
+ "history": [
+ [
+ "3.0.3",
+ "Accepts multiple `key` arguments."
+ ]
+ ],
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:MULTI_SHARD",
+ "RESPONSE_POLICY:AGG_SUM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Number of keys that exist from those specified as arguments.",
+ "type": "integer"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/expire.json b/src/commands/expire.json
new file mode 100644
index 0000000..bf80939
--- /dev/null
+++ b/src/commands/expire.json
@@ -0,0 +1,94 @@
+{
+ "EXPIRE": {
+ "summary": "Sets the expiration time of a key in seconds.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": -3,
+ "function": "expireCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added options: `NX`, `XX`, `GT` and `LT`."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The timeout was not set. e.g. key doesn't exist, or operation skipped due to the provided arguments.",
+ "const": 0
+ },
+ {
+ "description": "The timeout was set.",
+ "const": 1
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "seconds",
+ "type": "integer"
+ },
+ {
+ "name": "condition",
+ "type": "oneof",
+ "optional": true,
+ "since": "7.0.0",
+ "arguments": [
+ {
+ "name": "nx",
+ "type": "pure-token",
+ "token": "NX"
+ },
+ {
+ "name": "xx",
+ "type": "pure-token",
+ "token": "XX"
+ },
+ {
+ "name": "gt",
+ "type": "pure-token",
+ "token": "GT"
+ },
+ {
+ "name": "lt",
+ "type": "pure-token",
+ "token": "LT"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/expireat.json b/src/commands/expireat.json
new file mode 100644
index 0000000..c2ba5d8
--- /dev/null
+++ b/src/commands/expireat.json
@@ -0,0 +1,94 @@
+{
+ "EXPIREAT": {
+ "summary": "Sets the expiration time of a key to a Unix timestamp.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "1.2.0",
+ "arity": -3,
+ "function": "expireatCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added options: `NX`, `XX`, `GT` and `LT`."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "const": 1,
+ "description": "The timeout was set."
+ },
+ {
+ "const": 0,
+ "description": "The timeout was not set. e.g. key doesn't exist, or operation skipped due to the provided arguments."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "unix-time-seconds",
+ "type": "unix-time"
+ },
+ {
+ "name": "condition",
+ "type": "oneof",
+ "optional": true,
+ "since": "7.0.0",
+ "arguments": [
+ {
+ "name": "nx",
+ "type": "pure-token",
+ "token": "NX"
+ },
+ {
+ "name": "xx",
+ "type": "pure-token",
+ "token": "XX"
+ },
+ {
+ "name": "gt",
+ "type": "pure-token",
+ "token": "GT"
+ },
+ {
+ "name": "lt",
+ "type": "pure-token",
+ "token": "LT"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/expiretime.json b/src/commands/expiretime.json
new file mode 100644
index 0000000..99da9d2
--- /dev/null
+++ b/src/commands/expiretime.json
@@ -0,0 +1,61 @@
+{
+ "EXPIRETIME": {
+ "summary": "Returns the expiration time of a key as a Unix timestamp.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "7.0.0",
+ "arity": 2,
+ "function": "expiretimeCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "integer",
+ "description": "Expiration Unix timestamp in seconds.",
+ "minimum": 0
+ },
+ {
+ "const": -1,
+ "description": "The key exists but has no associated expiration time."
+ },
+ {
+ "const": -2,
+ "description": "The key does not exist."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/failover.json b/src/commands/failover.json
new file mode 100644
index 0000000..c694862
--- /dev/null
+++ b/src/commands/failover.json
@@ -0,0 +1,54 @@
+{
+ "FAILOVER": {
+ "summary": "Starts a coordinated failover from a server to one of its replicas.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "6.2.0",
+ "arity": -1,
+ "function": "failoverCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "STALE"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "target",
+ "token": "TO",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "host",
+ "type": "string"
+ },
+ {
+ "name": "port",
+ "type": "integer"
+ },
+ {
+ "token": "FORCE",
+ "name": "force",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ },
+ {
+ "token": "ABORT",
+ "name": "abort",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "token": "TIMEOUT",
+ "name": "milliseconds",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/fcall.json b/src/commands/fcall.json
new file mode 100644
index 0000000..7d6be69
--- /dev/null
+++ b/src/commands/fcall.json
@@ -0,0 +1,69 @@
+{
+ "FCALL": {
+ "summary": "Invokes a function.",
+ "complexity": "Depends on the function that is executed.",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": -3,
+ "function": "fcallCommand",
+ "get_keys_function": "functionGetKeys",
+ "command_flags": [
+ "NOSCRIPT",
+ "SKIP_MONITOR",
+ "MAY_REPLICATE",
+ "NO_MANDATORY_KEYS",
+ "STALE"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "key_specs": [
+ {
+ "notes": "We cannot tell how the keys will be used so we assume the worst, RW and UPDATE",
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "function",
+ "type": "string"
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "optional": true,
+ "multiple": true
+ },
+ {
+ "name": "arg",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "Return value depends on the function that is executed"
+ }
+ }
+}
diff --git a/src/commands/fcall_ro.json b/src/commands/fcall_ro.json
new file mode 100644
index 0000000..d0e4c76
--- /dev/null
+++ b/src/commands/fcall_ro.json
@@ -0,0 +1,68 @@
+{
+ "FCALL_RO": {
+ "summary": "Invokes a read-only function.",
+ "complexity": "Depends on the function that is executed.",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": -3,
+ "function": "fcallroCommand",
+ "get_keys_function": "functionGetKeys",
+ "command_flags": [
+ "NOSCRIPT",
+ "SKIP_MONITOR",
+ "NO_MANDATORY_KEYS",
+ "STALE",
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "key_specs": [
+ {
+ "notes": "We cannot tell how the keys will be used so we assume the worst, RO and ACCESS",
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "function",
+ "type": "string"
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "optional": true,
+ "multiple": true
+ },
+ {
+ "name": "arg",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "Return value depends on the function that is executed"
+ }
+ }
+}
diff --git a/src/commands/flushall.json b/src/commands/flushall.json
new file mode 100644
index 0000000..304bd03
--- /dev/null
+++ b/src/commands/flushall.json
@@ -0,0 +1,55 @@
+{
+ "FLUSHALL": {
+ "summary": "Removes all keys from all databases.",
+ "complexity": "O(N) where N is the total number of keys in all databases",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": -1,
+ "function": "flushallCommand",
+ "history": [
+ [
+ "4.0.0",
+ "Added the `ASYNC` flushing mode modifier."
+ ],
+ [
+ "6.2.0",
+ "Added the `SYNC` flushing mode modifier."
+ ]
+ ],
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "KEYSPACE",
+ "DANGEROUS"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "flush-type",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "async",
+ "type": "pure-token",
+ "token": "ASYNC",
+ "since": "4.0.0"
+ },
+ {
+ "name": "sync",
+ "type": "pure-token",
+ "token": "SYNC",
+ "since": "6.2.0"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/flushdb.json b/src/commands/flushdb.json
new file mode 100644
index 0000000..f1d10b8
--- /dev/null
+++ b/src/commands/flushdb.json
@@ -0,0 +1,55 @@
+{
+ "FLUSHDB": {
+ "summary": "Remove all keys from the current database.",
+ "complexity": "O(N) where N is the number of keys in the selected database",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": -1,
+ "function": "flushdbCommand",
+ "history": [
+ [
+ "4.0.0",
+ "Added the `ASYNC` flushing mode modifier."
+ ],
+ [
+ "6.2.0",
+ "Added the `SYNC` flushing mode modifier."
+ ]
+ ],
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "KEYSPACE",
+ "DANGEROUS"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "flush-type",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "async",
+ "type": "pure-token",
+ "token": "ASYNC",
+ "since": "4.0.0"
+ },
+ {
+ "name": "sync",
+ "type": "pure-token",
+ "token": "SYNC",
+ "since": "6.2.0"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/function-delete.json b/src/commands/function-delete.json
new file mode 100644
index 0000000..0895457
--- /dev/null
+++ b/src/commands/function-delete.json
@@ -0,0 +1,31 @@
+{
+ "DELETE": {
+ "summary": "Deletes a library and its functions.",
+ "complexity": "O(1)",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": 3,
+ "container": "FUNCTION",
+ "function": "functionDeleteCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "WRITE"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "arguments": [
+ {
+ "name": "library-name",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/function-dump.json b/src/commands/function-dump.json
new file mode 100644
index 0000000..d117241
--- /dev/null
+++ b/src/commands/function-dump.json
@@ -0,0 +1,21 @@
+{
+ "DUMP": {
+ "summary": "Dumps all libraries into a serialized binary payload.",
+ "complexity": "O(N) where N is the number of functions",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": 2,
+ "container": "FUNCTION",
+ "function": "functionDumpCommand",
+ "command_flags": [
+ "NOSCRIPT"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "reply_schema": {
+ "description": "the serialized payload",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/function-flush.json b/src/commands/function-flush.json
new file mode 100644
index 0000000..58742a0
--- /dev/null
+++ b/src/commands/function-flush.json
@@ -0,0 +1,44 @@
+{
+ "FLUSH": {
+ "summary": "Deletes all libraries and functions.",
+ "complexity": "O(N) where N is the number of functions deleted",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": -2,
+ "container": "FUNCTION",
+ "function": "functionFlushCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "WRITE"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "arguments": [
+ {
+ "name": "flush-type",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "async",
+ "type": "pure-token",
+ "token": "ASYNC"
+ },
+ {
+ "name": "sync",
+ "type": "pure-token",
+ "token": "SYNC"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/function-help.json b/src/commands/function-help.json
new file mode 100644
index 0000000..662e7e6
--- /dev/null
+++ b/src/commands/function-help.json
@@ -0,0 +1,25 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": 2,
+ "container": "FUNCTION",
+ "function": "functionHelpCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/function-kill.json b/src/commands/function-kill.json
new file mode 100644
index 0000000..396370c
--- /dev/null
+++ b/src/commands/function-kill.json
@@ -0,0 +1,25 @@
+{
+ "KILL": {
+ "summary": "Terminates a function during execution.",
+ "complexity": "O(1)",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": 2,
+ "container": "FUNCTION",
+ "function": "functionKillCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "ALLOW_BUSY"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:ONE_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/function-list.json b/src/commands/function-list.json
new file mode 100644
index 0000000..2ab1cf5
--- /dev/null
+++ b/src/commands/function-list.json
@@ -0,0 +1,87 @@
+{
+ "LIST": {
+ "summary": "Returns information about all libraries.",
+ "complexity": "O(N) where N is the number of functions",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": -2,
+ "container": "FUNCTION",
+ "function": "functionListCommand",
+ "command_flags": [
+ "NOSCRIPT"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "library_name": {
+ "description": " the name of the library",
+ "type": "string"
+ },
+ "engine": {
+ "description": "the engine of the library",
+ "type": "string"
+ },
+ "functions": {
+ "description": "the list of functions in the library",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "name": {
+ "description": "the name of the function",
+ "type": "string"
+ },
+ "description": {
+ "description": "the function's description",
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "string"
+ }
+ ]
+ },
+ "flags": {
+ "description": "an array of function flags",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ },
+ "library_code": {
+ "description": "the library's source code (when given the WITHCODE modifier)",
+ "type": "string"
+ }
+ }
+ }
+ },
+ "arguments": [
+ {
+ "name": "library-name-pattern",
+ "type": "string",
+ "token": "LIBRARYNAME",
+ "optional": true
+ },
+ {
+ "name": "withcode",
+ "type": "pure-token",
+ "token": "WITHCODE",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/function-load.json b/src/commands/function-load.json
new file mode 100644
index 0000000..f918b65
--- /dev/null
+++ b/src/commands/function-load.json
@@ -0,0 +1,39 @@
+{
+ "LOAD": {
+ "summary": "Creates a library.",
+ "complexity": "O(1) (considering compilation time is redundant)",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": -3,
+ "container": "FUNCTION",
+ "function": "functionLoadCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "arguments": [
+ {
+ "name": "replace",
+ "type": "pure-token",
+ "token": "REPLACE",
+ "optional": true
+ },
+ {
+ "name": "function-code",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "The library name that was loaded",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/function-restore.json b/src/commands/function-restore.json
new file mode 100644
index 0000000..0c37004
--- /dev/null
+++ b/src/commands/function-restore.json
@@ -0,0 +1,54 @@
+{
+ "RESTORE": {
+ "summary": "Restores all libraries from a payload.",
+ "complexity": "O(N) where N is the number of functions on the payload",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": -3,
+ "container": "FUNCTION",
+ "function": "functionRestoreCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "arguments": [
+ {
+ "name": "serialized-value",
+ "type": "string"
+ },
+ {
+ "name": "policy",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "flush",
+ "type": "pure-token",
+ "token": "FLUSH"
+ },
+ {
+ "name": "append",
+ "type": "pure-token",
+ "token": "APPEND"
+ },
+ {
+ "name": "replace",
+ "type": "pure-token",
+ "token": "REPLACE"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/function-stats.json b/src/commands/function-stats.json
new file mode 100644
index 0000000..65519db
--- /dev/null
+++ b/src/commands/function-stats.json
@@ -0,0 +1,81 @@
+{
+ "STATS": {
+ "summary": "Returns information about a function during execution.",
+ "complexity": "O(1)",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": 2,
+ "container": "FUNCTION",
+ "function": "functionStatsCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "ALLOW_BUSY"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "reply_schema": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "running_script": {
+ "description": "information about the running script.",
+ "oneOf": [
+ {
+ "description": "If there's no in-flight function",
+ "type": "null"
+ },
+ {
+ "description": "a map with the information about the running script",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "name": {
+ "description": "the name of the function.",
+ "type": "string"
+ },
+ "command": {
+ "description": "the command and arguments used for invoking the function.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "duration_ms": {
+ "description": "the function's runtime duration in milliseconds.",
+ "type": "integer"
+ }
+ }
+ }
+ ]
+ },
+ "engines": {
+ "description": "A map when each entry in the map represent a single engine.",
+ "type": "object",
+ "patternProperties": {
+ "^.*$": {
+ "description": "Engine map contains statistics about the engine",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "libraries_count": {
+ "description": "number of libraries",
+ "type": "integer"
+ },
+ "functions_count": {
+ "description": "number of functions",
+ "type": "integer"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/commands/function.json b/src/commands/function.json
new file mode 100644
index 0000000..c474137
--- /dev/null
+++ b/src/commands/function.json
@@ -0,0 +1,9 @@
+{
+ "FUNCTION": {
+ "summary": "A container for function commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "scripting",
+ "since": "7.0.0",
+ "arity": -2
+ }
+}
diff --git a/src/commands/geoadd.json b/src/commands/geoadd.json
new file mode 100644
index 0000000..5409bfc
--- /dev/null
+++ b/src/commands/geoadd.json
@@ -0,0 +1,98 @@
+{
+ "GEOADD": {
+ "summary": "Adds one or more members to a geospatial index. The key is created if it doesn't exist.",
+ "complexity": "O(log(N)) for each item added, where N is the number of elements in the sorted set.",
+ "group": "geo",
+ "since": "3.2.0",
+ "arity": -5,
+ "function": "geoaddCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added the `CH`, `NX` and `XX` options."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "GEO"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "condition",
+ "type": "oneof",
+ "optional": true,
+ "since": "6.2.0",
+ "arguments": [
+ {
+ "name": "nx",
+ "type": "pure-token",
+ "token": "NX"
+ },
+ {
+ "name": "xx",
+ "type": "pure-token",
+ "token": "XX"
+ }
+ ]
+ },
+ {
+ "name": "change",
+ "token": "CH",
+ "type": "pure-token",
+ "optional": true,
+ "since": "6.2.0"
+ },
+ {
+ "name": "data",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "longitude",
+ "type": "double"
+ },
+ {
+ "name": "latitude",
+ "type": "double"
+ },
+ {
+ "name": "member",
+ "type": "string"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "description": "When used without optional arguments, the number of elements added to the sorted set (excluding score updates). If the CH option is specified, the number of elements that were changed (added or updated).",
+ "type": "integer"
+ }
+ }
+}
diff --git a/src/commands/geodist.json b/src/commands/geodist.json
new file mode 100644
index 0000000..145ca71
--- /dev/null
+++ b/src/commands/geodist.json
@@ -0,0 +1,91 @@
+{
+ "GEODIST": {
+ "summary": "Returns the distance between two members of a geospatial index.",
+ "complexity": "O(1)",
+ "group": "geo",
+ "since": "3.2.0",
+ "arity": -4,
+ "function": "geodistCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "GEO"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member1",
+ "type": "string"
+ },
+ {
+ "name": "member2",
+ "type": "string"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "m",
+ "type": "pure-token",
+ "token": "m"
+ },
+ {
+ "name": "km",
+ "type": "pure-token",
+ "token": "km"
+ },
+ {
+ "name": "ft",
+ "type": "pure-token",
+ "token": "ft"
+ },
+ {
+ "name": "mi",
+ "type": "pure-token",
+ "token": "mi"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "one or both of elements are missing",
+ "type": "null"
+ },
+ {
+ "description": "distance as a double (represented as a string) in the specified units",
+ "type": "string",
+ "pattern": "^[0-9]*(.[0-9]*)?$"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/geohash.json b/src/commands/geohash.json
new file mode 100644
index 0000000..01402c4
--- /dev/null
+++ b/src/commands/geohash.json
@@ -0,0 +1,56 @@
+{
+ "GEOHASH": {
+ "summary": "Returns members from a geospatial index as geohash strings.",
+ "complexity": "O(1) for each member requested.",
+ "group": "geo",
+ "since": "3.2.0",
+ "arity": -2,
+ "function": "geohashCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "GEO"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string",
+ "multiple": true,
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "An array where each element is the Geohash corresponding to each member name passed as argument to the command.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/geopos.json b/src/commands/geopos.json
new file mode 100644
index 0000000..408b6e6
--- /dev/null
+++ b/src/commands/geopos.json
@@ -0,0 +1,76 @@
+{
+ "GEOPOS": {
+ "summary": "Returns the longitude and latitude of members from a geospatial index.",
+ "complexity": "O(1) for each member requested.",
+ "group": "geo",
+ "since": "3.2.0",
+ "arity": -2,
+ "function": "geoposCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "GEO"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string",
+ "multiple": true,
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "An array where each element is a two elements array representing longitude and latitude (x,y) of each member name passed as argument to the command",
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "description": "Element does not exist",
+ "type": "null"
+ },
+ {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Latitude (x)",
+ "type": "number"
+ },
+ {
+ "description": "Longitude (y)",
+ "type": "number"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ }
+}
diff --git a/src/commands/georadius.json b/src/commands/georadius.json
new file mode 100644
index 0000000..6ced904
--- /dev/null
+++ b/src/commands/georadius.json
@@ -0,0 +1,270 @@
+{
+ "GEORADIUS": {
+ "summary": "Queries a geospatial index for members within a distance from a coordinate, optionally stores the result.",
+ "complexity": "O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.",
+ "group": "geo",
+ "since": "3.2.0",
+ "arity": -6,
+ "function": "georadiusCommand",
+ "get_keys_function": "georadiusGetKeys",
+ "history": [
+ [
+ "6.2.0",
+ "Added the `ANY` option for `COUNT`."
+ ],
+ [
+ "7.0.0",
+ "Added support for uppercase unit names."
+ ]
+ ],
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`GEOSEARCH` and `GEOSEARCHSTORE` with the `BYRADIUS` argument",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "GEO"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "keyword": {
+ "keyword": "STORE",
+ "startfrom": 6
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "keyword": {
+ "keyword": "STOREDIST",
+ "startfrom": 6
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "longitude",
+ "type": "double"
+ },
+ {
+ "name": "latitude",
+ "type": "double"
+ },
+ {
+ "name": "radius",
+ "type": "double"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "m",
+ "type": "pure-token",
+ "token": "m"
+ },
+ {
+ "name": "km",
+ "type": "pure-token",
+ "token": "km"
+ },
+ {
+ "name": "ft",
+ "type": "pure-token",
+ "token": "ft"
+ },
+ {
+ "name": "mi",
+ "type": "pure-token",
+ "token": "mi"
+ }
+ ]
+ },
+ {
+ "name": "withcoord",
+ "token": "WITHCOORD",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "withdist",
+ "token": "WITHDIST",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "withhash",
+ "token": "WITHHASH",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "count-block",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "any",
+ "token": "ANY",
+ "type": "pure-token",
+ "optional": true,
+ "since": "6.2.0"
+ }
+ ]
+ },
+ {
+ "name": "order",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "asc",
+ "type": "pure-token",
+ "token": "ASC"
+ },
+ {
+ "name": "desc",
+ "type": "pure-token",
+ "token": "DESC"
+ }
+ ]
+ },
+ {
+ "name": "store",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "token": "STORE",
+ "name": "storekey",
+ "display": "key",
+ "type": "key",
+ "key_spec_index": 1
+ },
+ {
+ "token": "STOREDIST",
+ "name": "storedistkey",
+ "display": "key",
+ "type": "key",
+ "key_spec_index": 2
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "description": "Array of matched members information",
+ "anyOf": [
+ {
+ "description": "If no WITH* option is specified, array of matched members names",
+ "type": "array",
+ "items": {
+ "description": "name",
+ "type": "string"
+ }
+ },
+ {
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 1,
+ "maxItems": 4,
+ "items": [
+ {
+ "description": "Matched member name",
+ "type": "string"
+ }
+ ],
+ "additionalItems": {
+ "oneOf": [
+ {
+ "description": "If WITHDIST option is specified, the distance from the center as a floating point number, in the same unit specified in the radius",
+ "type": "string"
+ },
+ {
+ "description": "If WITHHASH option is specified, the geohash integer",
+ "type": "integer"
+ },
+ {
+ "description": "If WITHCOORD option is specified, the coordinates as a two items x,y array (longitude,latitude)",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "latitude (x)",
+ "type": "number"
+ },
+ {
+ "description": "longitude (y)",
+ "type": "number"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ },
+ {
+ "description": "number of items stored in key",
+ "type": "integer"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/georadius_ro.json b/src/commands/georadius_ro.json
new file mode 100644
index 0000000..964246a
--- /dev/null
+++ b/src/commands/georadius_ro.json
@@ -0,0 +1,201 @@
+{
+ "GEORADIUS_RO": {
+ "summary": "Returns members from a geospatial index that are within a distance from a coordinate.",
+ "complexity": "O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.",
+ "group": "geo",
+ "since": "3.2.10",
+ "arity": -6,
+ "function": "georadiusroCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added the `ANY` option for `COUNT`."
+ ]
+ ],
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`GEOSEARCH` with the `BYRADIUS` argument",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "GEO"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "longitude",
+ "type": "double"
+ },
+ {
+ "name": "latitude",
+ "type": "double"
+ },
+ {
+ "name": "radius",
+ "type": "double"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "m",
+ "type": "pure-token",
+ "token": "m"
+ },
+ {
+ "name": "km",
+ "type": "pure-token",
+ "token": "km"
+ },
+ {
+ "name": "ft",
+ "type": "pure-token",
+ "token": "ft"
+ },
+ {
+ "name": "mi",
+ "type": "pure-token",
+ "token": "mi"
+ }
+ ]
+ },
+ {
+ "name": "withcoord",
+ "token": "WITHCOORD",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "withdist",
+ "token": "WITHDIST",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "withhash",
+ "token": "WITHHASH",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "count-block",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "any",
+ "token": "ANY",
+ "type": "pure-token",
+ "optional": true,
+ "since": "6.2.0"
+ }
+ ]
+ },
+ {
+ "name": "order",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "asc",
+ "type": "pure-token",
+ "token": "ASC"
+ },
+ {
+ "name": "desc",
+ "type": "pure-token",
+ "token": "DESC"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "description": "Array of matched members information",
+ "anyOf": [
+ {
+ "description": "If no WITH* option is specified, array of matched members names",
+ "type": "array",
+ "items": {
+ "description": "name",
+ "type": "string"
+ }
+ },
+ {
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 1,
+ "maxItems": 4,
+ "items": [
+ {
+ "description": "Matched member name",
+ "type": "string"
+ }
+ ],
+ "additionalItems": {
+ "oneOf": [
+ {
+ "description": "If WITHDIST option is specified, the distance from the center as a floating point number, in the same unit specified in the radius",
+ "type": "string"
+ },
+ {
+ "description": "If WITHHASH option is specified, the geohash integer",
+ "type": "integer"
+ },
+ {
+ "description": "If WITHCOORD option is specified, the coordinates as a two items x,y array (longitude,latitude)",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "latitude (x)",
+ "type": "number"
+ },
+ {
+ "description": "longitude (y)",
+ "type": "number"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/georadiusbymember.json b/src/commands/georadiusbymember.json
new file mode 100644
index 0000000..4b62741
--- /dev/null
+++ b/src/commands/georadiusbymember.json
@@ -0,0 +1,261 @@
+{
+ "GEORADIUSBYMEMBER": {
+ "summary": "Queries a geospatial index for members within a distance from a member, optionally stores the result.",
+ "complexity": "O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.",
+ "group": "geo",
+ "since": "3.2.0",
+ "arity": -5,
+ "function": "georadiusbymemberCommand",
+ "get_keys_function": "georadiusGetKeys",
+ "history": [
+ [
+ "7.0.0",
+ "Added support for uppercase unit names."
+ ]
+ ],
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`GEOSEARCH` and `GEOSEARCHSTORE` with the `BYRADIUS` and `FROMMEMBER` arguments",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "GEO"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "keyword": {
+ "keyword": "STORE",
+ "startfrom": 5
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "keyword": {
+ "keyword": "STOREDIST",
+ "startfrom": 5
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string"
+ },
+ {
+ "name": "radius",
+ "type": "double"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "m",
+ "type": "pure-token",
+ "token": "m"
+ },
+ {
+ "name": "km",
+ "type": "pure-token",
+ "token": "km"
+ },
+ {
+ "name": "ft",
+ "type": "pure-token",
+ "token": "ft"
+ },
+ {
+ "name": "mi",
+ "type": "pure-token",
+ "token": "mi"
+ }
+ ]
+ },
+ {
+ "name": "withcoord",
+ "token": "WITHCOORD",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "withdist",
+ "token": "WITHDIST",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "withhash",
+ "token": "WITHHASH",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "count-block",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "any",
+ "token": "ANY",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ },
+ {
+ "name": "order",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "asc",
+ "type": "pure-token",
+ "token": "ASC"
+ },
+ {
+ "name": "desc",
+ "type": "pure-token",
+ "token": "DESC"
+ }
+ ]
+ },
+ {
+ "name": "store",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "token": "STORE",
+ "name": "storekey",
+ "display": "key",
+ "type": "key",
+ "key_spec_index": 1
+ },
+ {
+ "token": "STOREDIST",
+ "name": "storedistkey",
+ "display": "key",
+ "type": "key",
+ "key_spec_index": 2
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "description": "Array of matched members information",
+ "anyOf": [
+ {
+ "description": "If no WITH* option is specified, array of matched members names",
+ "type": "array",
+ "items": {
+ "description": "name",
+ "type": "string"
+ }
+ },
+ {
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 1,
+ "maxItems": 4,
+ "items": [
+ {
+ "description": "Matched member name",
+ "type": "string"
+ }
+ ],
+ "additionalItems": {
+ "oneOf": [
+ {
+ "description": "If WITHDIST option is specified, the distance from the center as a floating point number, in the same unit specified in the radius",
+ "type": "string"
+ },
+ {
+ "description": "If WITHHASH option is specified, the geohash integer",
+ "type": "integer"
+ },
+ {
+ "description": "If WITHCOORD option is specified, the coordinates as a two items x,y array (longitude,latitude)",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "latitude (x)",
+ "type": "number"
+ },
+ {
+ "description": "longitude (y)",
+ "type": "number"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ },
+ {
+ "description": "number of items stored in key",
+ "type": "integer"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/georadiusbymember_ro.json b/src/commands/georadiusbymember_ro.json
new file mode 100644
index 0000000..5925881
--- /dev/null
+++ b/src/commands/georadiusbymember_ro.json
@@ -0,0 +1,190 @@
+{
+ "GEORADIUSBYMEMBER_RO": {
+ "summary": "Returns members from a geospatial index that are within a distance from a member.",
+ "complexity": "O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.",
+ "group": "geo",
+ "since": "3.2.10",
+ "arity": -5,
+ "function": "georadiusbymemberroCommand",
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`GEOSEARCH` with the `BYRADIUS` and `FROMMEMBER` arguments",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "GEO"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string"
+ },
+ {
+ "name": "radius",
+ "type": "double"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "m",
+ "type": "pure-token",
+ "token": "m"
+ },
+ {
+ "name": "km",
+ "type": "pure-token",
+ "token": "km"
+ },
+ {
+ "name": "ft",
+ "type": "pure-token",
+ "token": "ft"
+ },
+ {
+ "name": "mi",
+ "type": "pure-token",
+ "token": "mi"
+ }
+ ]
+ },
+ {
+ "name": "withcoord",
+ "token": "WITHCOORD",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "withdist",
+ "token": "WITHDIST",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "withhash",
+ "token": "WITHHASH",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "count-block",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "any",
+ "token": "ANY",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ },
+ {
+ "name": "order",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "asc",
+ "type": "pure-token",
+ "token": "ASC"
+ },
+ {
+ "name": "desc",
+ "type": "pure-token",
+ "token": "DESC"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "description": "Array of matched members information",
+ "anyOf": [
+ {
+ "description": "If no WITH* option is specified, array of matched members names",
+ "type": "array",
+ "items": {
+ "description": "name",
+ "type": "string"
+ }
+ },
+ {
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 1,
+ "maxItems": 4,
+ "items": [
+ {
+ "description": "Matched member name",
+ "type": "string"
+ }
+ ],
+ "additionalItems": {
+ "oneOf": [
+ {
+ "description": "If WITHDIST option is specified, the distance from the center as a floating point number, in the same unit specified in the radius",
+ "type": "string"
+ },
+ {
+ "description": "If WITHHASH option is specified, the geohash integer",
+ "type": "integer"
+ },
+ {
+ "description": "If WITHCOORD option is specified, the coordinates as a two items x,y array (longitude,latitude)",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "latitude (x)",
+ "type": "number"
+ },
+ {
+ "description": "longitude (y)",
+ "type": "number"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/geosearch.json b/src/commands/geosearch.json
new file mode 100644
index 0000000..b2e2825
--- /dev/null
+++ b/src/commands/geosearch.json
@@ -0,0 +1,267 @@
+{
+ "GEOSEARCH": {
+ "summary": "Queries a geospatial index for members inside an area of a box or a circle.",
+ "complexity": "O(N+log(M)) where N is the number of elements in the grid-aligned bounding box area around the shape provided as the filter and M is the number of items inside the shape",
+ "group": "geo",
+ "since": "6.2.0",
+ "arity": -7,
+ "function": "geosearchCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added support for uppercase unit names."
+ ]
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "GEO"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "from",
+ "type": "oneof",
+ "arguments": [
+ {
+ "token": "FROMMEMBER",
+ "name": "member",
+ "type": "string"
+ },
+ {
+ "token": "FROMLONLAT",
+ "name": "fromlonlat",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "longitude",
+ "type": "double"
+ },
+ {
+ "name": "latitude",
+ "type": "double"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "by",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "circle",
+ "type": "block",
+ "arguments": [
+ {
+ "token": "BYRADIUS",
+ "name": "radius",
+ "type": "double"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "m",
+ "type": "pure-token",
+ "token": "m"
+ },
+ {
+ "name": "km",
+ "type": "pure-token",
+ "token": "km"
+ },
+ {
+ "name": "ft",
+ "type": "pure-token",
+ "token": "ft"
+ },
+ {
+ "name": "mi",
+ "type": "pure-token",
+ "token": "mi"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "box",
+ "type": "block",
+ "arguments": [
+ {
+ "token": "BYBOX",
+ "name": "width",
+ "type": "double"
+ },
+ {
+ "name": "height",
+ "type": "double"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "m",
+ "type": "pure-token",
+ "token": "m"
+ },
+ {
+ "name": "km",
+ "type": "pure-token",
+ "token": "km"
+ },
+ {
+ "name": "ft",
+ "type": "pure-token",
+ "token": "ft"
+ },
+ {
+ "name": "mi",
+ "type": "pure-token",
+ "token": "mi"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "order",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "asc",
+ "type": "pure-token",
+ "token": "ASC"
+ },
+ {
+ "name": "desc",
+ "type": "pure-token",
+ "token": "DESC"
+ }
+ ]
+ },
+ {
+ "name": "count-block",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "any",
+ "token": "ANY",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ },
+ {
+ "name": "withcoord",
+ "token": "WITHCOORD",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "withdist",
+ "token": "WITHDIST",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "withhash",
+ "token": "WITHHASH",
+ "type": "pure-token",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "Array of matched members information",
+ "anyOf": [
+ {
+ "description": "If no WITH* option is specified, array of matched members names",
+ "type": "array",
+ "items": {
+ "description": "name",
+ "type": "string"
+ }
+ },
+ {
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 1,
+ "maxItems": 4,
+ "items": [
+ {
+ "description": "Matched member name",
+ "type": "string"
+ }
+ ],
+ "additionalItems": {
+ "oneOf": [
+ {
+ "description": "If WITHDIST option is specified, the distance from the center as a floating point number, in the same unit specified in the radius",
+ "type": "string"
+ },
+ {
+ "description": "If WITHHASH option is specified, the geohash integer",
+ "type": "integer"
+ },
+ {
+ "description": "If WITHCOORD option is specified, the coordinates as a two items x,y array (longitude,latitude)",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "latitude (x)",
+ "type": "number"
+ },
+ {
+ "description": "longitude (y)",
+ "type": "number"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/geosearchstore.json b/src/commands/geosearchstore.json
new file mode 100644
index 0000000..dfbdaaa
--- /dev/null
+++ b/src/commands/geosearchstore.json
@@ -0,0 +1,228 @@
+{
+ "GEOSEARCHSTORE": {
+ "summary": "Queries a geospatial index for members inside an area of a box or a circle, optionally stores the result.",
+ "complexity": "O(N+log(M)) where N is the number of elements in the grid-aligned bounding box area around the shape provided as the filter and M is the number of items inside the shape",
+ "group": "geo",
+ "since": "6.2.0",
+ "arity": -8,
+ "function": "geosearchstoreCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added support for uppercase unit names."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "GEO"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "source",
+ "type": "key",
+ "key_spec_index": 1
+ },
+ {
+ "name": "from",
+ "type": "oneof",
+ "arguments": [
+ {
+ "token": "FROMMEMBER",
+ "name": "member",
+ "type": "string"
+ },
+ {
+ "token": "FROMLONLAT",
+ "name": "fromlonlat",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "longitude",
+ "type": "double"
+ },
+ {
+ "name": "latitude",
+ "type": "double"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "by",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "circle",
+ "type": "block",
+ "arguments": [
+ {
+ "token": "BYRADIUS",
+ "name": "radius",
+ "type": "double"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "m",
+ "type": "pure-token",
+ "token": "m"
+ },
+ {
+ "name": "km",
+ "type": "pure-token",
+ "token": "km"
+ },
+ {
+ "name": "ft",
+ "type": "pure-token",
+ "token": "ft"
+ },
+ {
+ "name": "mi",
+ "type": "pure-token",
+ "token": "mi"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "box",
+ "type": "block",
+ "arguments": [
+ {
+ "token": "BYBOX",
+ "name": "width",
+ "type": "double"
+ },
+ {
+ "name": "height",
+ "type": "double"
+ },
+ {
+ "name": "unit",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "m",
+ "type": "pure-token",
+ "token": "m"
+ },
+ {
+ "name": "km",
+ "type": "pure-token",
+ "token": "km"
+ },
+ {
+ "name": "ft",
+ "type": "pure-token",
+ "token": "ft"
+ },
+ {
+ "name": "mi",
+ "type": "pure-token",
+ "token": "mi"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "order",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "asc",
+ "type": "pure-token",
+ "token": "ASC"
+ },
+ {
+ "name": "desc",
+ "type": "pure-token",
+ "token": "DESC"
+ }
+ ]
+ },
+ {
+ "name": "count-block",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "any",
+ "token": "ANY",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ },
+ {
+ "name": "storedist",
+ "token": "STOREDIST",
+ "type": "pure-token",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "the number of elements in the resulting set",
+ "type": "integer"
+ }
+ }
+}
diff --git a/src/commands/get.json b/src/commands/get.json
new file mode 100644
index 0000000..693c1ac
--- /dev/null
+++ b/src/commands/get.json
@@ -0,0 +1,56 @@
+{
+ "GET": {
+ "summary": "Returns the string value of a key.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "getCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The value of the key.",
+ "type": "string"
+ },
+ {
+ "description": "Key does not exist.",
+ "type": "null"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/getbit.json b/src/commands/getbit.json
new file mode 100644
index 0000000..629f5db
--- /dev/null
+++ b/src/commands/getbit.json
@@ -0,0 +1,59 @@
+{
+ "GETBIT": {
+ "summary": "Returns a bit value by offset.",
+ "complexity": "O(1)",
+ "group": "bitmap",
+ "since": "2.2.0",
+ "arity": 3,
+ "function": "getbitCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "BITMAP"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The bit value stored at offset.",
+ "oneOf": [
+ {
+ "const": 0
+ },
+ {
+ "const": 1
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "offset",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/getdel.json b/src/commands/getdel.json
new file mode 100644
index 0000000..f3d86b0
--- /dev/null
+++ b/src/commands/getdel.json
@@ -0,0 +1,57 @@
+{
+ "GETDEL": {
+ "summary": "Returns the string value of a key after deleting the key.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "6.2.0",
+ "arity": 2,
+ "function": "getdelCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The value of the key.",
+ "type": "string"
+ },
+ {
+ "description": "The key does not exist.",
+ "type": "null"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/getex.json b/src/commands/getex.json
new file mode 100644
index 0000000..978b9d1
--- /dev/null
+++ b/src/commands/getex.json
@@ -0,0 +1,90 @@
+{
+ "GETEX": {
+ "summary": "Returns the string value of a key after setting its expiration time.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "6.2.0",
+ "arity": -2,
+ "function": "getexCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "notes": "RW and UPDATE because it changes the TTL",
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The value of the key.",
+ "type": "string"
+ },
+ {
+ "description": "Key does not exist.",
+ "type": "null"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "expiration",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "seconds",
+ "type": "integer",
+ "token": "EX"
+ },
+ {
+ "name": "milliseconds",
+ "type": "integer",
+ "token": "PX"
+ },
+ {
+ "name": "unix-time-seconds",
+ "type": "unix-time",
+ "token": "EXAT"
+ },
+ {
+ "name": "unix-time-milliseconds",
+ "type": "unix-time",
+ "token": "PXAT"
+ },
+ {
+ "name": "persist",
+ "type": "pure-token",
+ "token": "PERSIST"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/getrange.json b/src/commands/getrange.json
new file mode 100644
index 0000000..82bb723
--- /dev/null
+++ b/src/commands/getrange.json
@@ -0,0 +1,55 @@
+{
+ "GETRANGE": {
+ "summary": "Returns a substring of the string stored at a key.",
+ "complexity": "O(N) where N is the length of the returned string. The complexity is ultimately determined by the returned length, but because creating a substring from an existing string is very cheap, it can be considered O(1) for small strings.",
+ "group": "string",
+ "since": "2.4.0",
+ "arity": 4,
+ "function": "getrangeCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "The substring of the string value stored at key, determined by the offsets start and end (both are inclusive)."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "start",
+ "type": "integer"
+ },
+ {
+ "name": "end",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/getset.json b/src/commands/getset.json
new file mode 100644
index 0000000..42823d5
--- /dev/null
+++ b/src/commands/getset.json
@@ -0,0 +1,67 @@
+{
+ "GETSET": {
+ "summary": "Returns the previous string value of a key after setting it to a new value.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "1.0.0",
+ "arity": 3,
+ "function": "getsetCommand",
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`SET` with the `!GET` argument",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The old value stored at the key.",
+ "type": "string"
+ },
+ {
+ "description": "The key does not exist.",
+ "type": "null"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/hdel.json b/src/commands/hdel.json
new file mode 100644
index 0000000..11da02d
--- /dev/null
+++ b/src/commands/hdel.json
@@ -0,0 +1,59 @@
+{
+ "HDEL": {
+ "summary": "Deletes one or more fields and their values from a hash. Deletes the hash if no fields remain.",
+ "complexity": "O(N) where N is the number of fields to be removed.",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": -3,
+ "function": "hdelCommand",
+ "history": [
+ [
+ "2.4.0",
+ "Accepts multiple `field` arguments."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The number of fields that were removed from the hash."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "field",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/hello.json b/src/commands/hello.json
new file mode 100644
index 0000000..e916e72
--- /dev/null
+++ b/src/commands/hello.json
@@ -0,0 +1,111 @@
+{
+ "HELLO": {
+ "summary": "Handshakes with the Redis server.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "6.0.0",
+ "arity": -1,
+ "function": "helloCommand",
+ "history": [
+ [
+ "6.2.0",
+ "`protover` made optional; when called without arguments the command reports the current connection's context."
+ ]
+ ],
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "NO_AUTH",
+ "SENTINEL",
+ "ALLOW_BUSY"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "server": {
+ "type": "string"
+ },
+ "version": {
+ "type": "string"
+ },
+ "proto": {
+ "const": 3
+ },
+ "id": {
+ "type": "integer"
+ },
+ "mode": {
+ "type": "string"
+ },
+ "role": {
+ "type": "string"
+ },
+ "modules": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "ver": {
+ "type": "integer"
+ },
+ "path": {
+ "type": "string"
+ },
+ "args": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "arguments": [
+ {
+ "name": "arguments",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "protover",
+ "type": "integer"
+ },
+ {
+ "token": "AUTH",
+ "name": "auth",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "username",
+ "type": "string"
+ },
+ {
+ "name": "password",
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "token": "SETNAME",
+ "name": "clientname",
+ "type": "string",
+ "optional": true
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/hexists.json b/src/commands/hexists.json
new file mode 100644
index 0000000..f5ea405
--- /dev/null
+++ b/src/commands/hexists.json
@@ -0,0 +1,59 @@
+{
+ "HEXISTS": {
+ "summary": "Determines whether a field exists in a hash.",
+ "complexity": "O(1)",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": 3,
+ "function": "hexistsCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The hash does not contain the field, or key does not exist.",
+ "const": 0
+ },
+ {
+ "description": "The hash contains the field.",
+ "const": 1
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "field",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/hget.json b/src/commands/hget.json
new file mode 100644
index 0000000..a041143
--- /dev/null
+++ b/src/commands/hget.json
@@ -0,0 +1,60 @@
+{
+ "HGET": {
+ "summary": "Returns the value of a field in a hash.",
+ "complexity": "O(1)",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": 3,
+ "function": "hgetCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The value associated with the field.",
+ "type": "string"
+ },
+ {
+ "description": "If the field is not present in the hash or key does not exist.",
+ "type": "null"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "field",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/hgetall.json b/src/commands/hgetall.json
new file mode 100644
index 0000000..9bbf835
--- /dev/null
+++ b/src/commands/hgetall.json
@@ -0,0 +1,53 @@
+{
+ "HGETALL": {
+ "summary": "Returns all fields and values in a hash.",
+ "complexity": "O(N) where N is the size of the hash.",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": 2,
+ "function": "hgetallCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "object",
+ "description": "Map of fields and their values stored in the hash, or an empty list when key does not exist. In RESP2 this is returned as a flat array.",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/hincrby.json b/src/commands/hincrby.json
new file mode 100644
index 0000000..a90f5ba
--- /dev/null
+++ b/src/commands/hincrby.json
@@ -0,0 +1,58 @@
+{
+ "HINCRBY": {
+ "summary": "Increments the integer value of a field in a hash by a number. Uses 0 as initial value if the field doesn't exist.",
+ "complexity": "O(1)",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": 4,
+ "function": "hincrbyCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The value of the field after the increment operation."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "field",
+ "type": "string"
+ },
+ {
+ "name": "increment",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/hincrbyfloat.json b/src/commands/hincrbyfloat.json
new file mode 100644
index 0000000..6a7d1fd
--- /dev/null
+++ b/src/commands/hincrbyfloat.json
@@ -0,0 +1,58 @@
+{
+ "HINCRBYFLOAT": {
+ "summary": "Increments the floating point value of a field by a number. Uses 0 as initial value if the field doesn't exist.",
+ "complexity": "O(1)",
+ "group": "hash",
+ "since": "2.6.0",
+ "arity": 4,
+ "function": "hincrbyfloatCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "The value of the field after the increment operation."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "field",
+ "type": "string"
+ },
+ {
+ "name": "increment",
+ "type": "double"
+ }
+ ]
+ }
+}
diff --git a/src/commands/hkeys.json b/src/commands/hkeys.json
new file mode 100644
index 0000000..917df1c
--- /dev/null
+++ b/src/commands/hkeys.json
@@ -0,0 +1,54 @@
+{
+ "HKEYS": {
+ "summary": "Returns all fields in a hash.",
+ "complexity": "O(N) where N is the size of the hash.",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": 2,
+ "function": "hkeysCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List of fields in the hash, or an empty list when the key does not exist.",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/hlen.json b/src/commands/hlen.json
new file mode 100644
index 0000000..d4c13ac
--- /dev/null
+++ b/src/commands/hlen.json
@@ -0,0 +1,47 @@
+{
+ "HLEN": {
+ "summary": "Returns the number of fields in a hash.",
+ "complexity": "O(1)",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": 2,
+ "function": "hlenCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "Number of the fields in the hash, or 0 when the key does not exist."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/hmget.json b/src/commands/hmget.json
new file mode 100644
index 0000000..73fa9c3
--- /dev/null
+++ b/src/commands/hmget.json
@@ -0,0 +1,64 @@
+{
+ "HMGET": {
+ "summary": "Returns the values of all fields in a hash.",
+ "complexity": "O(N) where N is the number of fields being requested.",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": -3,
+ "function": "hmgetCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "List of values associated with the given fields, in the same order as they are requested.",
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "field",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/hmset.json b/src/commands/hmset.json
new file mode 100644
index 0000000..e92f411
--- /dev/null
+++ b/src/commands/hmset.json
@@ -0,0 +1,68 @@
+{
+ "HMSET": {
+ "summary": "Sets the values of multiple fields.",
+ "complexity": "O(N) where N is the number of fields being set.",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": -4,
+ "function": "hsetCommand",
+ "deprecated_since": "4.0.0",
+ "replaced_by": "`HSET` with multiple field-value pairs",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "data",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "field",
+ "type": "string"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/hrandfield.json b/src/commands/hrandfield.json
new file mode 100644
index 0000000..83abc74
--- /dev/null
+++ b/src/commands/hrandfield.json
@@ -0,0 +1,101 @@
+{
+ "HRANDFIELD": {
+ "summary": "Returns one or more random fields from a hash.",
+ "complexity": "O(N) where N is the number of fields returned",
+ "group": "hash",
+ "since": "6.2.0",
+ "arity": -2,
+ "function": "hrandfieldCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "description": "Key doesn't exist",
+ "type": "null"
+ },
+ {
+ "description": "A single random field. Returned in case `COUNT` was not used.",
+ "type": "string"
+ },
+ {
+ "description": "A list of fields. Returned in case `COUNT` was used.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "description": "Fields and their values. Returned in case `COUNT` and `WITHVALUES` were used. In RESP2 this is returned as a flat array.",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Field",
+ "type": "string"
+ },
+ {
+ "description": "Value",
+ "type": "string"
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "options",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "withvalues",
+ "token": "WITHVALUES",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/hscan.json b/src/commands/hscan.json
new file mode 100644
index 0000000..0888eec
--- /dev/null
+++ b/src/commands/hscan.json
@@ -0,0 +1,81 @@
+{
+ "HSCAN": {
+ "summary": "Iterates over fields and values of a hash.",
+ "complexity": "O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.",
+ "group": "hash",
+ "since": "2.8.0",
+ "arity": -3,
+ "function": "hscanCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "cursor",
+ "type": "integer"
+ },
+ {
+ "token": "MATCH",
+ "name": "pattern",
+ "type": "pattern",
+ "optional": true
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "cursor and scan response in array form",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "cursor",
+ "type": "string"
+ },
+ {
+ "description": "list of key/value pairs from the hash where each even element is the key, and each odd element is the value",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/hset.json b/src/commands/hset.json
new file mode 100644
index 0000000..8180972
--- /dev/null
+++ b/src/commands/hset.json
@@ -0,0 +1,70 @@
+{
+ "HSET": {
+ "summary": "Creates or modifies the value of a field in a hash.",
+ "complexity": "O(1) for each field/value pair added, so O(N) to add N field/value pairs when the command is called with multiple field/value pairs.",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": -4,
+ "function": "hsetCommand",
+ "history": [
+ [
+ "4.0.0",
+ "Accepts multiple `field` and `value` arguments."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of fields that were added",
+ "type": "integer"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "data",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "field",
+ "type": "string"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/hsetnx.json b/src/commands/hsetnx.json
new file mode 100644
index 0000000..e024c41
--- /dev/null
+++ b/src/commands/hsetnx.json
@@ -0,0 +1,65 @@
+{
+ "HSETNX": {
+ "summary": "Sets the value of a field in a hash only when the field doesn't exist.",
+ "complexity": "O(1)",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": 4,
+ "function": "hsetnxCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The field is a new field in the hash and value was set.",
+ "const": 0
+ },
+ {
+ "description": "The field already exists in the hash and no operation was performed.",
+ "const": 1
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "field",
+ "type": "string"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/hstrlen.json b/src/commands/hstrlen.json
new file mode 100644
index 0000000..82ac6db
--- /dev/null
+++ b/src/commands/hstrlen.json
@@ -0,0 +1,52 @@
+{
+ "HSTRLEN": {
+ "summary": "Returns the length of the value of a field.",
+ "complexity": "O(1)",
+ "group": "hash",
+ "since": "3.2.0",
+ "arity": 3,
+ "function": "hstrlenCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "String length of the value associated with the field, or zero when the field is not present in the hash or key does not exist at all.",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "field",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/hvals.json b/src/commands/hvals.json
new file mode 100644
index 0000000..55aeaaf
--- /dev/null
+++ b/src/commands/hvals.json
@@ -0,0 +1,53 @@
+{
+ "HVALS": {
+ "summary": "Returns all values in a hash.",
+ "complexity": "O(N) where N is the size of the hash.",
+ "group": "hash",
+ "since": "2.0.0",
+ "arity": 2,
+ "function": "hvalsCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "HASH"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List of values in the hash, or an empty list when the key does not exist.",
+ "items": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/incr.json b/src/commands/incr.json
new file mode 100644
index 0000000..f33ec02
--- /dev/null
+++ b/src/commands/incr.json
@@ -0,0 +1,50 @@
+{
+ "INCR": {
+ "summary": "Increments the integer value of a key by one. Uses 0 as initial value if the key doesn't exist.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "incrCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ],
+ "reply_schema": {
+ "description": "The value of key after the increment",
+ "type": "integer"
+ }
+ }
+}
diff --git a/src/commands/incrby.json b/src/commands/incrby.json
new file mode 100644
index 0000000..2668011
--- /dev/null
+++ b/src/commands/incrby.json
@@ -0,0 +1,54 @@
+{
+ "INCRBY": {
+ "summary": "Increments the integer value of a key by a number. Uses 0 as initial value if the key doesn't exist.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "1.0.0",
+ "arity": 3,
+ "function": "incrbyCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The value of the key after incrementing it."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "increment",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/incrbyfloat.json b/src/commands/incrbyfloat.json
new file mode 100644
index 0000000..c594a1a
--- /dev/null
+++ b/src/commands/incrbyfloat.json
@@ -0,0 +1,54 @@
+{
+ "INCRBYFLOAT": {
+ "summary": "Increment the floating point value of a key by a number. Uses 0 as initial value if the key doesn't exist.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "2.6.0",
+ "arity": 3,
+ "function": "incrbyfloatCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "The value of the key after incrementing it."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "increment",
+ "type": "double"
+ }
+ ]
+ }
+}
diff --git a/src/commands/info.json b/src/commands/info.json
new file mode 100644
index 0000000..04a02b1
--- /dev/null
+++ b/src/commands/info.json
@@ -0,0 +1,41 @@
+{
+ "INFO": {
+ "summary": "Returns information and statistics about the server.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": -1,
+ "function": "infoCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added support for taking multiple section arguments."
+ ]
+ ],
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "DANGEROUS"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "reply_schema": {
+ "description": "A map of info fields, one field per line in the form of <field>:<value> where the value can be a comma separated map like <key>=<val>. Also contains section header lines starting with `#` and blank lines.",
+ "type": "string"
+ },
+ "arguments": [
+ {
+ "name": "section",
+ "type": "string",
+ "multiple": true,
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/keys.json b/src/commands/keys.json
new file mode 100644
index 0000000..9dd4e11
--- /dev/null
+++ b/src/commands/keys.json
@@ -0,0 +1,34 @@
+{
+ "KEYS": {
+ "summary": "Returns all key names that match a pattern.",
+ "complexity": "O(N) with N being the number of keys in the database, under the assumption that the key names in the database and the given pattern have limited length.",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "keysCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "KEYSPACE",
+ "DANGEROUS"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "arguments": [
+ {
+ "name": "pattern",
+ "type": "pattern"
+ }
+ ],
+ "reply_schema": {
+ "description": "list of keys matching pattern",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/lastsave.json b/src/commands/lastsave.json
new file mode 100644
index 0000000..dc06154
--- /dev/null
+++ b/src/commands/lastsave.json
@@ -0,0 +1,26 @@
+{
+ "LASTSAVE": {
+ "summary": "Returns the Unix timestamp of the last successful save to disk.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": 1,
+ "function": "lastsaveCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "FAST"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "acl_categories": [
+ "ADMIN",
+ "DANGEROUS"
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "UNIX TIME of the last DB save executed with success."
+ }
+ }
+}
diff --git a/src/commands/latency-doctor.json b/src/commands/latency-doctor.json
new file mode 100644
index 0000000..8f1f8dd
--- /dev/null
+++ b/src/commands/latency-doctor.json
@@ -0,0 +1,26 @@
+{
+ "DOCTOR": {
+ "summary": "Returns a human-readable latency analysis report.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": 2,
+ "container": "LATENCY",
+ "function": "latencyCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "A human readable latency analysis report."
+ }
+ }
+}
diff --git a/src/commands/latency-graph.json b/src/commands/latency-graph.json
new file mode 100644
index 0000000..cb5d209
--- /dev/null
+++ b/src/commands/latency-graph.json
@@ -0,0 +1,32 @@
+{
+ "GRAPH": {
+ "summary": "Returns a latency graph for an event.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": 3,
+ "container": "LATENCY",
+ "function": "latencyCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "arguments": [
+ {
+ "name": "event",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "Latency graph"
+ }
+ }
+}
diff --git a/src/commands/latency-help.json b/src/commands/latency-help.json
new file mode 100644
index 0000000..36ff527
--- /dev/null
+++ b/src/commands/latency-help.json
@@ -0,0 +1,22 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": 2,
+ "container": "LATENCY",
+ "function": "latencyCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/latency-histogram.json b/src/commands/latency-histogram.json
new file mode 100644
index 0000000..5e33eb6
--- /dev/null
+++ b/src/commands/latency-histogram.json
@@ -0,0 +1,54 @@
+{
+ "HISTOGRAM": {
+ "summary": "Returns the cumulative distribution of latencies of a subset or all commands.",
+ "complexity": "O(N) where N is the number of commands with latency information being retrieved.",
+ "group": "server",
+ "since": "7.0.0",
+ "arity": -2,
+ "container": "LATENCY",
+ "function": "latencyCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "reply_schema": {
+ "type": "object",
+ "description": "A map where each key is a command name, and each value is a map with the total calls, and an inner map of the histogram time buckets.",
+ "patternProperties": {
+ "^.*$": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "calls": {
+ "description": "The total calls for the command.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "histogram_usec": {
+ "description": "Histogram map, bucket id to latency",
+ "type": "object",
+ "additionalProperties": {
+ "type": "integer"
+ }
+ }
+ }
+ }
+ }
+ },
+ "arguments": [
+ {
+ "name": "COMMAND",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/latency-history.json b/src/commands/latency-history.json
new file mode 100644
index 0000000..7c3591a
--- /dev/null
+++ b/src/commands/latency-history.json
@@ -0,0 +1,49 @@
+{
+ "HISTORY": {
+ "summary": "Returns timestamp-latency samples for an event.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": 3,
+ "container": "LATENCY",
+ "function": "latencyCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "An array where each element is a two elements array representing the timestamp and the latency of the event.",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "timestamp of the event",
+ "type": "integer",
+ "minimum": 0
+ },
+ {
+ "description": "latency of the event",
+ "type": "integer",
+ "minimum": 0
+ }
+ ]
+ }
+ },
+ "arguments": [
+ {
+ "name": "event",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/latency-latest.json b/src/commands/latency-latest.json
new file mode 100644
index 0000000..88c9e7a
--- /dev/null
+++ b/src/commands/latency-latest.json
@@ -0,0 +1,49 @@
+{
+ "LATEST": {
+ "summary": "Returns the latest latency samples for all events.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": 2,
+ "container": "LATENCY",
+ "function": "latencyCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "An array where each element is a four elements array representing the event's name, timestamp, latest and all-time latency measurements.",
+ "items": {
+ "type": "array",
+ "minItems": 4,
+ "maxItems": 4,
+ "items": [
+ {
+ "type": "string",
+ "description": "Event name."
+ },
+ {
+ "type": "integer",
+ "description": "Timestamp."
+ },
+ {
+ "type": "integer",
+ "description": "Latest latency in milliseconds."
+ },
+ {
+ "type": "integer",
+ "description": "Max latency in milliseconds."
+ }
+ ]
+ }
+ }
+ }
+}
diff --git a/src/commands/latency-reset.json b/src/commands/latency-reset.json
new file mode 100644
index 0000000..3223282
--- /dev/null
+++ b/src/commands/latency-reset.json
@@ -0,0 +1,33 @@
+{
+ "RESET": {
+ "summary": "Resets the latency data for one or more events.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": -2,
+ "container": "LATENCY",
+ "function": "latencyCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:AGG_SUM"
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "Number of event time series that were reset."
+ },
+ "arguments": [
+ {
+ "name": "event",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/latency.json b/src/commands/latency.json
new file mode 100644
index 0000000..e4844d4
--- /dev/null
+++ b/src/commands/latency.json
@@ -0,0 +1,9 @@
+{
+ "LATENCY": {
+ "summary": "A container for latency diagnostics commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "server",
+ "since": "2.8.13",
+ "arity": -2
+ }
+}
diff --git a/src/commands/lcs.json b/src/commands/lcs.json
new file mode 100644
index 0000000..a26b089
--- /dev/null
+++ b/src/commands/lcs.json
@@ -0,0 +1,127 @@
+{
+ "LCS": {
+ "summary": "Finds the longest common substring.",
+ "complexity": "O(N*M) where N and M are the lengths of s1 and s2, respectively",
+ "group": "string",
+ "since": "7.0.0",
+ "arity": -3,
+ "function": "lcsCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "string",
+ "description": "The longest common subsequence."
+ },
+ {
+ "type": "integer",
+ "description": "The length of the longest common subsequence when 'LEN' is given."
+ },
+ {
+ "type": "object",
+ "description": "Array with the LCS length and all the ranges in both the strings when 'IDX' is given. In RESP2 this is returned as a flat array",
+ "additionalProperties": false,
+ "properties": {
+ "matches": {
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 3,
+ "items": [
+ {
+ "type": "array",
+ "description": "Matched range in the first string.",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": {
+ "type": "integer"
+ }
+ },
+ {
+ "type": "array",
+ "description": "Matched range in the second string.",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": {
+ "type": "integer"
+ }
+ }
+ ],
+ "additionalItems": {
+ "type": "integer",
+ "description": "The length of the match when 'WITHMATCHLEN' is given."
+ }
+ }
+ },
+ "len": {
+ "type": "integer",
+ "description": "Length of the longest common subsequence."
+ }
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key1",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "key2",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "len",
+ "token": "LEN",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "idx",
+ "token": "IDX",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "token": "MINMATCHLEN",
+ "name": "min-match-len",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "name": "withmatchlen",
+ "token": "WITHMATCHLEN",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/lindex.json b/src/commands/lindex.json
new file mode 100644
index 0000000..a589d52
--- /dev/null
+++ b/src/commands/lindex.json
@@ -0,0 +1,59 @@
+{
+ "LINDEX": {
+ "summary": "Returns an element from a list by its index.",
+ "complexity": "O(N) where N is the number of elements to traverse to get to the element at index. This makes asking for the first or the last element of the list O(1).",
+ "group": "list",
+ "since": "1.0.0",
+ "arity": 3,
+ "function": "lindexCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "null",
+ "description": "Index is out of range"
+ },
+ {
+ "description": "The requested element",
+ "type": "string"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "index",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/linsert.json b/src/commands/linsert.json
new file mode 100644
index 0000000..8059dc5
--- /dev/null
+++ b/src/commands/linsert.json
@@ -0,0 +1,85 @@
+{
+ "LINSERT": {
+ "summary": "Inserts an element before or after another element in a list.",
+ "complexity": "O(N) where N is the number of elements to traverse before seeing the value pivot. This means that inserting somewhere on the left end on the list (head) can be considered O(1) and inserting somewhere on the right end (tail) is O(N).",
+ "group": "list",
+ "since": "2.2.0",
+ "arity": 5,
+ "function": "linsertCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "List length after a successful insert operation.",
+ "type": "integer",
+ "minimum": 1
+ },
+ {
+ "description": "in case key doesn't exist.",
+ "const": 0
+ },
+ {
+ "description": "when the pivot wasn't found.",
+ "const": -1
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "where",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "before",
+ "type": "pure-token",
+ "token": "BEFORE"
+ },
+ {
+ "name": "after",
+ "type": "pure-token",
+ "token": "AFTER"
+ }
+ ]
+ },
+ {
+ "name": "pivot",
+ "type": "string"
+ },
+ {
+ "name": "element",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/llen.json b/src/commands/llen.json
new file mode 100644
index 0000000..846aa40
--- /dev/null
+++ b/src/commands/llen.json
@@ -0,0 +1,48 @@
+{
+ "LLEN": {
+ "summary": "Returns the length of a list.",
+ "complexity": "O(1)",
+ "group": "list",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "llenCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "List length.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/lmove.json b/src/commands/lmove.json
new file mode 100644
index 0000000..ab0c6ad
--- /dev/null
+++ b/src/commands/lmove.json
@@ -0,0 +1,104 @@
+{
+ "LMOVE": {
+ "summary": "Returns an element after popping it from one list and pushing it to another. Deletes the list if the last element was moved.",
+ "complexity": "O(1)",
+ "group": "list",
+ "since": "6.2.0",
+ "arity": 5,
+ "function": "lmoveCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The element being popped and pushed.",
+ "type": "string"
+ },
+ "arguments": [
+ {
+ "name": "source",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 1
+ },
+ {
+ "name": "wherefrom",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "left",
+ "type": "pure-token",
+ "token": "LEFT"
+ },
+ {
+ "name": "right",
+ "type": "pure-token",
+ "token": "RIGHT"
+ }
+ ]
+ },
+ {
+ "name": "whereto",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "left",
+ "type": "pure-token",
+ "token": "LEFT"
+ },
+ {
+ "name": "right",
+ "type": "pure-token",
+ "token": "RIGHT"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/lmpop.json b/src/commands/lmpop.json
new file mode 100644
index 0000000..7cc3476
--- /dev/null
+++ b/src/commands/lmpop.json
@@ -0,0 +1,100 @@
+{
+ "LMPOP": {
+ "summary": "Returns multiple elements from a list after removing them. Deletes the list if the last element was popped.",
+ "complexity": "O(N+M) where N is the number of provided keys and M is the number of elements returned.",
+ "group": "list",
+ "since": "7.0.0",
+ "arity": -4,
+ "function": "lmpopCommand",
+ "get_keys_function": "lmpopGetKeys",
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "description": "If no element could be popped.",
+ "type": "null"
+ },
+ {
+ "description": "List key from which elements were popped.",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Name of the key from which elements were popped.",
+ "type": "string"
+ },
+ {
+ "description": "Array of popped elements.",
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "where",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "left",
+ "type": "pure-token",
+ "token": "LEFT"
+ },
+ {
+ "name": "right",
+ "type": "pure-token",
+ "token": "RIGHT"
+ }
+ ]
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/lolwut.json b/src/commands/lolwut.json
new file mode 100644
index 0000000..546c14c
--- /dev/null
+++ b/src/commands/lolwut.json
@@ -0,0 +1,25 @@
+{
+ "LOLWUT": {
+ "summary": "Displays computer art and the Redis version",
+ "group": "server",
+ "since": "5.0.0",
+ "arity": -1,
+ "function": "lolwutCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "String containing the generative computer art, and a text with the Redis version."
+ },
+ "arguments": [
+ {
+ "token": "VERSION",
+ "name": "version",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/lpop.json b/src/commands/lpop.json
new file mode 100644
index 0000000..b1d6cd1
--- /dev/null
+++ b/src/commands/lpop.json
@@ -0,0 +1,77 @@
+{
+ "LPOP": {
+ "summary": "Returns the first elements in a list after removing it. Deletes the list if the last element was popped.",
+ "complexity": "O(N) where N is the number of elements returned",
+ "group": "list",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "lpopCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added the `count` argument."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "Key does not exist.",
+ "type": "null"
+ },
+ {
+ "description": "In case `count` argument was not given, the value of the first element.",
+ "type": "string"
+ },
+ {
+ "description": "In case `count` argument was given, a list of popped elements",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "count",
+ "type": "integer",
+ "optional": true,
+ "since": "6.2.0"
+ }
+ ]
+ }
+}
diff --git a/src/commands/lpos.json b/src/commands/lpos.json
new file mode 100644
index 0000000..7b63b72
--- /dev/null
+++ b/src/commands/lpos.json
@@ -0,0 +1,85 @@
+{
+ "LPOS": {
+ "summary": "Returns the index of matching elements in a list.",
+ "complexity": "O(N) where N is the number of elements in the list, for the average case. When searching for elements near the head or the tail of the list, or when the MAXLEN option is provided, the command may run in constant time.",
+ "group": "list",
+ "since": "6.0.6",
+ "arity": -3,
+ "function": "lposCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "description": "In case there is no matching element",
+ "type": "null"
+ },
+ {
+ "description": "An integer representing the matching element",
+ "type": "integer"
+ },
+ {
+ "description": "If the COUNT option is given, an array of integers representing the matching elements (empty if there are no matches)",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "integer"
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "element",
+ "type": "string"
+ },
+ {
+ "token": "RANK",
+ "name": "rank",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "token": "COUNT",
+ "name": "num-matches",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "token": "MAXLEN",
+ "name": "len",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/lpush.json b/src/commands/lpush.json
new file mode 100644
index 0000000..34cd8e2
--- /dev/null
+++ b/src/commands/lpush.json
@@ -0,0 +1,60 @@
+{
+ "LPUSH": {
+ "summary": "Prepends one or more elements to a list. Creates the key if it doesn't exist.",
+ "complexity": "O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.",
+ "group": "list",
+ "since": "1.0.0",
+ "arity": -3,
+ "function": "lpushCommand",
+ "history": [
+ [
+ "2.4.0",
+ "Accepts multiple `element` arguments."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Length of the list after the push operations.",
+ "type": "integer"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "element",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/lpushx.json b/src/commands/lpushx.json
new file mode 100644
index 0000000..5f6d17c
--- /dev/null
+++ b/src/commands/lpushx.json
@@ -0,0 +1,61 @@
+{
+ "LPUSHX": {
+ "summary": "Prepends one or more elements to a list only when the list exists.",
+ "complexity": "O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.",
+ "group": "list",
+ "since": "2.2.0",
+ "arity": -3,
+ "function": "lpushxCommand",
+ "history": [
+ [
+ "4.0.0",
+ "Accepts multiple `element` arguments."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "the length of the list after the push operation",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "element",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/lrange.json b/src/commands/lrange.json
new file mode 100644
index 0000000..303d2f6
--- /dev/null
+++ b/src/commands/lrange.json
@@ -0,0 +1,58 @@
+{
+ "LRANGE": {
+ "summary": "Returns a range of elements from a list.",
+ "complexity": "O(S+N) where S is the distance of start offset from HEAD for small lists, from nearest end (HEAD or TAIL) for large lists; and N is the number of elements in the specified range.",
+ "group": "list",
+ "since": "1.0.0",
+ "arity": 4,
+ "function": "lrangeCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "start",
+ "type": "integer"
+ },
+ {
+ "name": "stop",
+ "type": "integer"
+ }
+ ],
+ "reply_schema": {
+ "description": "List of elements in the specified range",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/lrem.json b/src/commands/lrem.json
new file mode 100644
index 0000000..c267d3e
--- /dev/null
+++ b/src/commands/lrem.json
@@ -0,0 +1,56 @@
+{
+ "LREM": {
+ "summary": "Removes elements from a list. Deletes the list if the last element was removed.",
+ "complexity": "O(N+M) where N is the length of the list and M is the number of elements removed.",
+ "group": "list",
+ "since": "1.0.0",
+ "arity": 4,
+ "function": "lremCommand",
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of removed elements.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "element",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/lset.json b/src/commands/lset.json
new file mode 100644
index 0000000..473b02c
--- /dev/null
+++ b/src/commands/lset.json
@@ -0,0 +1,55 @@
+{
+ "LSET": {
+ "summary": "Sets the value of an element in a list by its index.",
+ "complexity": "O(N) where N is the length of the list. Setting either the first or the last element of the list is O(1).",
+ "group": "list",
+ "since": "1.0.0",
+ "arity": 4,
+ "function": "lsetCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "index",
+ "type": "integer"
+ },
+ {
+ "name": "element",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/ltrim.json b/src/commands/ltrim.json
new file mode 100644
index 0000000..3bba299
--- /dev/null
+++ b/src/commands/ltrim.json
@@ -0,0 +1,54 @@
+{
+ "LTRIM": {
+ "summary": "Removes elements from both ends a list. Deletes the list if all elements were trimmed.",
+ "complexity": "O(N) where N is the number of elements to be removed by the operation.",
+ "group": "list",
+ "since": "1.0.0",
+ "arity": 4,
+ "function": "ltrimCommand",
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "start",
+ "type": "integer"
+ },
+ {
+ "name": "stop",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/memory-doctor.json b/src/commands/memory-doctor.json
new file mode 100644
index 0000000..c0c8c22
--- /dev/null
+++ b/src/commands/memory-doctor.json
@@ -0,0 +1,20 @@
+{
+ "DOCTOR": {
+ "summary": "Outputs a memory problems report.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": 2,
+ "container": "MEMORY",
+ "function": "memoryCommand",
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "reply_schema": {
+ "description": "memory problems report",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/memory-help.json b/src/commands/memory-help.json
new file mode 100644
index 0000000..e72934c
--- /dev/null
+++ b/src/commands/memory-help.json
@@ -0,0 +1,22 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": 2,
+ "container": "MEMORY",
+ "function": "memoryCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/memory-malloc-stats.json b/src/commands/memory-malloc-stats.json
new file mode 100644
index 0000000..5ef6a31
--- /dev/null
+++ b/src/commands/memory-malloc-stats.json
@@ -0,0 +1,20 @@
+{
+ "MALLOC-STATS": {
+ "summary": "Returns the allocator statistics.",
+ "complexity": "Depends on how much memory is allocated, could be slow",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": 2,
+ "container": "MEMORY",
+ "function": "memoryCommand",
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "The memory allocator's internal statistics report."
+ }
+ }
+}
diff --git a/src/commands/memory-purge.json b/src/commands/memory-purge.json
new file mode 100644
index 0000000..77ed61d
--- /dev/null
+++ b/src/commands/memory-purge.json
@@ -0,0 +1,18 @@
+{
+ "PURGE": {
+ "summary": "Asks the allocator to release memory.",
+ "complexity": "Depends on how much memory is allocated, could be slow",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": 2,
+ "container": "MEMORY",
+ "function": "memoryCommand",
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/memory-stats.json b/src/commands/memory-stats.json
new file mode 100644
index 0000000..de82dc8
--- /dev/null
+++ b/src/commands/memory-stats.json
@@ -0,0 +1,121 @@
+{
+ "STATS": {
+ "summary": "Returns details about memory usage.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": 2,
+ "container": "MEMORY",
+ "function": "memoryCommand",
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "reply_schema": {
+ "description": "memory usage details",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "peak.allocated": {
+ "type": "integer"
+ },
+ "total.allocated": {
+ "type": "integer"
+ },
+ "startup.allocated": {
+ "type": "integer"
+ },
+ "replication.backlog": {
+ "type": "integer"
+ },
+ "clients.slaves": {
+ "type": "integer"
+ },
+ "clients.normal": {
+ "type": "integer"
+ },
+ "cluster.links": {
+ "type": "integer"
+ },
+ "aof.buffer": {
+ "type": "integer"
+ },
+ "lua.caches": {
+ "type": "integer"
+ },
+ "functions.caches": {
+ "type": "integer"
+ },
+ "overhead.total": {
+ "type": "integer"
+ },
+ "keys.count": {
+ "type": "integer"
+ },
+ "keys.bytes-per-key": {
+ "type": "integer"
+ },
+ "dataset.bytes": {
+ "type": "integer"
+ },
+ "dataset.percentage": {
+ "type": "number"
+ },
+ "peak.percentage": {
+ "type": "number"
+ },
+ "allocator.allocated": {
+ "type": "integer"
+ },
+ "allocator.active": {
+ "type": "integer"
+ },
+ "allocator.resident": {
+ "type": "integer"
+ },
+ "allocator-fragmentation.ratio": {
+ "type": "number"
+ },
+ "allocator-fragmentation.bytes": {
+ "type": "integer"
+ },
+ "allocator-rss.ratio": {
+ "type": "number"
+ },
+ "allocator-rss.bytes": {
+ "type": "integer"
+ },
+ "rss-overhead.ratio": {
+ "type": "number"
+ },
+ "rss-overhead.bytes": {
+ "type": "integer"
+ },
+ "fragmentation": {
+ "type": "number"
+ },
+ "fragmentation.bytes": {
+ "type": "integer"
+ }
+ },
+ "patternProperties": {
+ "^db.": {
+ "type": "object",
+ "properties": {
+ "overhead.hashtable.main": {
+ "type": "integer"
+ },
+ "overhead.hashtable.expires": {
+ "type": "integer"
+ },
+ "overhead.hashtable.slot-to-keys": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false
+ }
+ }
+ }
+ }
+}
diff --git a/src/commands/memory-usage.json b/src/commands/memory-usage.json
new file mode 100644
index 0000000..78678ad
--- /dev/null
+++ b/src/commands/memory-usage.json
@@ -0,0 +1,58 @@
+{
+ "USAGE": {
+ "summary": "Estimates the memory usage of a key.",
+ "complexity": "O(N) where N is the number of samples.",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": -3,
+ "container": "MEMORY",
+ "function": "memoryCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "Number of bytes that a key and its value require to be stored in RAM.",
+ "type": "integer"
+ },
+ {
+ "description": "Key does not exist.",
+ "type": "null"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "token": "SAMPLES",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/memory.json b/src/commands/memory.json
new file mode 100644
index 0000000..aab0841
--- /dev/null
+++ b/src/commands/memory.json
@@ -0,0 +1,9 @@
+{
+ "MEMORY": {
+ "summary": "A container for memory diagnostics commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": -2
+ }
+}
diff --git a/src/commands/mget.json b/src/commands/mget.json
new file mode 100644
index 0000000..a177853
--- /dev/null
+++ b/src/commands/mget.json
@@ -0,0 +1,63 @@
+{
+ "MGET": {
+ "summary": "Atomically returns the string values of one or more keys.",
+ "complexity": "O(N) where N is the number of keys to retrieve.",
+ "group": "string",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "mgetCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:MULTI_SHARD"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "List of values at the specified keys.",
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/migrate.json b/src/commands/migrate.json
new file mode 100644
index 0000000..f1dfae4
--- /dev/null
+++ b/src/commands/migrate.json
@@ -0,0 +1,181 @@
+{
+ "MIGRATE": {
+ "summary": "Atomically transfers a key from one Redis instance to another.",
+ "complexity": "This command actually executes a DUMP+DEL in the source instance, and a RESTORE in the target instance. See the pages of these commands for time complexity. Also an O(N) data transfer between the two instances is performed.",
+ "group": "generic",
+ "since": "2.6.0",
+ "arity": -6,
+ "function": "migrateCommand",
+ "get_keys_function": "migrateGetKeys",
+ "history": [
+ [
+ "3.0.0",
+ "Added the `COPY` and `REPLACE` options."
+ ],
+ [
+ "3.0.6",
+ "Added the `KEYS` option."
+ ],
+ [
+ "4.0.7",
+ "Added the `AUTH` option."
+ ],
+ [
+ "6.0.0",
+ "Added the `AUTH2` option."
+ ]
+ ],
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "KEYSPACE",
+ "DANGEROUS"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 3
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE",
+ "INCOMPLETE"
+ ],
+ "begin_search": {
+ "keyword": {
+ "keyword": "KEYS",
+ "startfrom": -2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "const": "OK",
+ "description": "Success."
+ },
+ {
+ "const": "NOKEY",
+ "description": "No keys were found in the source instance."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "host",
+ "type": "string"
+ },
+ {
+ "name": "port",
+ "type": "integer"
+ },
+ {
+ "name": "key-selector",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "empty-string",
+ "type": "pure-token",
+ "token": "\"\""
+ }
+ ]
+ },
+ {
+ "name": "destination-db",
+ "type": "integer"
+ },
+ {
+ "name": "timeout",
+ "type": "integer"
+ },
+ {
+ "name": "copy",
+ "token": "COPY",
+ "type": "pure-token",
+ "optional": true,
+ "since": "3.0.0"
+ },
+ {
+ "name": "replace",
+ "token": "REPLACE",
+ "type": "pure-token",
+ "optional": true,
+ "since": "3.0.0"
+ },
+ {
+ "name": "authentication",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "token": "AUTH",
+ "name": "auth",
+ "display": "password",
+ "type": "string",
+ "since": "4.0.7"
+ },
+ {
+ "token": "AUTH2",
+ "name": "auth2",
+ "type": "block",
+ "since": "6.0.0",
+ "arguments": [
+ {
+ "name": "username",
+ "type": "string"
+ },
+ {
+ "name": "password",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "token": "KEYS",
+ "name": "keys",
+ "display": "key",
+ "type": "key",
+ "key_spec_index": 1,
+ "optional": true,
+ "multiple": true,
+ "since": "3.0.6"
+ }
+ ]
+ }
+}
diff --git a/src/commands/module-help.json b/src/commands/module-help.json
new file mode 100644
index 0000000..5f3db0f
--- /dev/null
+++ b/src/commands/module-help.json
@@ -0,0 +1,22 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "5.0.0",
+ "arity": 2,
+ "container": "MODULE",
+ "function": "moduleCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/module-list.json b/src/commands/module-list.json
new file mode 100644
index 0000000..92a022b
--- /dev/null
+++ b/src/commands/module-list.json
@@ -0,0 +1,47 @@
+{
+ "LIST": {
+ "summary": "Returns all loaded modules.",
+ "complexity": "O(N) where N is the number of loaded modules.",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": 2,
+ "container": "MODULE",
+ "function": "moduleCommand",
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Returns information about the modules loaded to the server.",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "Name of the module."
+ },
+ "ver": {
+ "type": "integer",
+ "description": "Version of the module."
+ },
+ "path": {
+ "type": "string",
+ "description": "Module path."
+ },
+ "args": {
+ "type": "array",
+ "description": "Module arguments.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/commands/module-load.json b/src/commands/module-load.json
new file mode 100644
index 0000000..dd5d654
--- /dev/null
+++ b/src/commands/module-load.json
@@ -0,0 +1,32 @@
+{
+ "LOAD": {
+ "summary": "Loads a module.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": -3,
+ "container": "MODULE",
+ "function": "moduleCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "NOSCRIPT",
+ "PROTECTED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "path",
+ "type": "string"
+ },
+ {
+ "name": "arg",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/module-loadex.json b/src/commands/module-loadex.json
new file mode 100644
index 0000000..6c750ea
--- /dev/null
+++ b/src/commands/module-loadex.json
@@ -0,0 +1,51 @@
+{
+ "LOADEX": {
+ "summary": "Loads a module using extended parameters.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "7.0.0",
+ "arity": -3,
+ "container": "MODULE",
+ "function": "moduleCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "NOSCRIPT",
+ "PROTECTED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "path",
+ "type": "string"
+ },
+ {
+ "name": "configs",
+ "token": "CONFIG",
+ "type": "block",
+ "multiple": true,
+ "multiple_token": true,
+ "optional": true,
+ "arguments": [
+ {
+ "name": "name",
+ "type": "string"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "name": "args",
+ "token": "ARGS",
+ "type": "string",
+ "multiple": true,
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/module-unload.json b/src/commands/module-unload.json
new file mode 100644
index 0000000..f2fbf80
--- /dev/null
+++ b/src/commands/module-unload.json
@@ -0,0 +1,26 @@
+{
+ "UNLOAD": {
+ "summary": "Unloads a module.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": 3,
+ "container": "MODULE",
+ "function": "moduleCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "NOSCRIPT",
+ "PROTECTED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "name",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/module.json b/src/commands/module.json
new file mode 100644
index 0000000..148f182
--- /dev/null
+++ b/src/commands/module.json
@@ -0,0 +1,9 @@
+{
+ "MODULE": {
+ "summary": "A container for module commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": -2
+ }
+}
diff --git a/src/commands/monitor.json b/src/commands/monitor.json
new file mode 100644
index 0000000..23c659e
--- /dev/null
+++ b/src/commands/monitor.json
@@ -0,0 +1,16 @@
+{
+ "MONITOR": {
+ "summary": "Listens for all requests received by the server in real-time.",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": 1,
+ "function": "monitorCommand",
+ "history": [],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ]
+ }
+}
diff --git a/src/commands/move.json b/src/commands/move.json
new file mode 100644
index 0000000..2030680
--- /dev/null
+++ b/src/commands/move.json
@@ -0,0 +1,61 @@
+{
+ "MOVE": {
+ "summary": "Moves a key to another database.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": 3,
+ "function": "moveCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "db",
+ "type": "integer"
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "key was moved",
+ "const": 1
+ },
+ {
+ "description": "key wasn't moved",
+ "const": 0
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/mset.json b/src/commands/mset.json
new file mode 100644
index 0000000..9a57446
--- /dev/null
+++ b/src/commands/mset.json
@@ -0,0 +1,62 @@
+{
+ "MSET": {
+ "summary": "Atomically creates or modifies the string values of one or more keys.",
+ "complexity": "O(N) where N is the number of keys to set.",
+ "group": "string",
+ "since": "1.0.1",
+ "arity": -3,
+ "function": "msetCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:MULTI_SHARD",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 2,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "data",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/msetnx.json b/src/commands/msetnx.json
new file mode 100644
index 0000000..27592d3
--- /dev/null
+++ b/src/commands/msetnx.json
@@ -0,0 +1,67 @@
+{
+ "MSETNX": {
+ "summary": "Atomically modifies the string values of one or more keys only when all keys don't exist.",
+ "complexity": "O(N) where N is the number of keys to set.",
+ "group": "string",
+ "since": "1.0.1",
+ "arity": -3,
+ "function": "msetnxCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 2,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "No key was set (at least one key already existed).",
+ "const": 0
+ },
+ {
+ "description": "All the keys were set.",
+ "const": 1
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "data",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/multi.json b/src/commands/multi.json
new file mode 100644
index 0000000..5f17a1d
--- /dev/null
+++ b/src/commands/multi.json
@@ -0,0 +1,23 @@
+{
+ "MULTI": {
+ "summary": "Starts a transaction.",
+ "complexity": "O(1)",
+ "group": "transactions",
+ "since": "1.2.0",
+ "arity": 1,
+ "function": "multiCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "ALLOW_BUSY"
+ ],
+ "acl_categories": [
+ "TRANSACTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/object-encoding.json b/src/commands/object-encoding.json
new file mode 100644
index 0000000..f255b57
--- /dev/null
+++ b/src/commands/object-encoding.json
@@ -0,0 +1,58 @@
+{
+ "ENCODING": {
+ "summary": "Returns the internal encoding of a Redis object.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "2.2.3",
+ "arity": 3,
+ "container": "OBJECT",
+ "function": "objectCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "key doesn't exist",
+ "type": "null"
+ },
+ {
+ "description": "encoding of the object",
+ "type": "string"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/object-freq.json b/src/commands/object-freq.json
new file mode 100644
index 0000000..03b5b2b
--- /dev/null
+++ b/src/commands/object-freq.json
@@ -0,0 +1,50 @@
+{
+ "FREQ": {
+ "summary": "Returns the logarithmic access frequency counter of a Redis object.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "4.0.0",
+ "arity": 3,
+ "container": "OBJECT",
+ "function": "objectCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ],
+ "reply_schema": {
+ "description": "the counter's value",
+ "type": "integer"
+ }
+ }
+}
diff --git a/src/commands/object-help.json b/src/commands/object-help.json
new file mode 100644
index 0000000..5261650
--- /dev/null
+++ b/src/commands/object-help.json
@@ -0,0 +1,25 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "6.2.0",
+ "arity": 2,
+ "container": "OBJECT",
+ "function": "objectCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/object-idletime.json b/src/commands/object-idletime.json
new file mode 100644
index 0000000..03c202a
--- /dev/null
+++ b/src/commands/object-idletime.json
@@ -0,0 +1,50 @@
+{
+ "IDLETIME": {
+ "summary": "Returns the time since the last access to a Redis object.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "2.2.3",
+ "arity": 3,
+ "container": "OBJECT",
+ "function": "objectCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ],
+ "reply_schema": {
+ "description": "the idle time in seconds",
+ "type": "integer"
+ }
+ }
+}
diff --git a/src/commands/object-refcount.json b/src/commands/object-refcount.json
new file mode 100644
index 0000000..48009bb
--- /dev/null
+++ b/src/commands/object-refcount.json
@@ -0,0 +1,50 @@
+{
+ "REFCOUNT": {
+ "summary": "Returns the reference count of a value of a key.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "2.2.3",
+ "arity": 3,
+ "container": "OBJECT",
+ "function": "objectCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ],
+ "reply_schema": {
+ "description": "the number of references",
+ "type": "integer"
+ }
+ }
+}
diff --git a/src/commands/object.json b/src/commands/object.json
new file mode 100644
index 0000000..14be26b
--- /dev/null
+++ b/src/commands/object.json
@@ -0,0 +1,9 @@
+{
+ "OBJECT": {
+ "summary": "A container for object introspection commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "generic",
+ "since": "2.2.3",
+ "arity": -2
+ }
+}
diff --git a/src/commands/persist.json b/src/commands/persist.json
new file mode 100644
index 0000000..11e6e01
--- /dev/null
+++ b/src/commands/persist.json
@@ -0,0 +1,56 @@
+{
+ "PERSIST": {
+ "summary": "Removes the expiration time of a key.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "2.2.0",
+ "arity": 2,
+ "function": "persistCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "const": 0,
+ "description": "Key does not exist or does not have an associated timeout."
+ },
+ {
+ "const": 1,
+ "description": "The timeout has been removed."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/pexpire.json b/src/commands/pexpire.json
new file mode 100644
index 0000000..a133f4f
--- /dev/null
+++ b/src/commands/pexpire.json
@@ -0,0 +1,94 @@
+{
+ "PEXPIRE": {
+ "summary": "Sets the expiration time of a key in milliseconds.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "2.6.0",
+ "arity": -3,
+ "function": "pexpireCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added options: `NX`, `XX`, `GT` and `LT`."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "const": 0,
+ "description": "The timeout was not set. e.g. key doesn't exist, or operation skipped due to the provided arguments."
+ },
+ {
+ "const": 1,
+ "description": "The timeout was set."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "milliseconds",
+ "type": "integer"
+ },
+ {
+ "name": "condition",
+ "type": "oneof",
+ "optional": true,
+ "since": "7.0.0",
+ "arguments": [
+ {
+ "name": "nx",
+ "type": "pure-token",
+ "token": "NX"
+ },
+ {
+ "name": "xx",
+ "type": "pure-token",
+ "token": "XX"
+ },
+ {
+ "name": "gt",
+ "type": "pure-token",
+ "token": "GT"
+ },
+ {
+ "name": "lt",
+ "type": "pure-token",
+ "token": "LT"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/pexpireat.json b/src/commands/pexpireat.json
new file mode 100644
index 0000000..dd08ed0
--- /dev/null
+++ b/src/commands/pexpireat.json
@@ -0,0 +1,94 @@
+{
+ "PEXPIREAT": {
+ "summary": "Sets the expiration time of a key to a Unix milliseconds timestamp.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "2.6.0",
+ "arity": -3,
+ "function": "pexpireatCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added options: `NX`, `XX`, `GT` and `LT`."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "const": 1,
+ "description": "The timeout was set."
+ },
+ {
+ "const": 0,
+ "description": "The timeout was not set. e.g. key doesn't exist, or operation skipped due to the provided arguments."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "unix-time-milliseconds",
+ "type": "unix-time"
+ },
+ {
+ "name": "condition",
+ "type": "oneof",
+ "optional": true,
+ "since": "7.0.0",
+ "arguments": [
+ {
+ "name": "nx",
+ "type": "pure-token",
+ "token": "NX"
+ },
+ {
+ "name": "xx",
+ "type": "pure-token",
+ "token": "XX"
+ },
+ {
+ "name": "gt",
+ "type": "pure-token",
+ "token": "GT"
+ },
+ {
+ "name": "lt",
+ "type": "pure-token",
+ "token": "LT"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/pexpiretime.json b/src/commands/pexpiretime.json
new file mode 100644
index 0000000..3fa055c
--- /dev/null
+++ b/src/commands/pexpiretime.json
@@ -0,0 +1,61 @@
+{
+ "PEXPIRETIME": {
+ "summary": "Returns the expiration time of a key as a Unix milliseconds timestamp.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "7.0.0",
+ "arity": 2,
+ "function": "pexpiretimeCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "integer",
+ "description": "Expiration Unix timestamp in milliseconds.",
+ "minimum": 0
+ },
+ {
+ "const": -1,
+ "description": "The key exists but has no associated expiration time."
+ },
+ {
+ "const": -2,
+ "description": "The key does not exist."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/pfadd.json b/src/commands/pfadd.json
new file mode 100644
index 0000000..7d8448a
--- /dev/null
+++ b/src/commands/pfadd.json
@@ -0,0 +1,63 @@
+{
+ "PFADD": {
+ "summary": "Adds elements to a HyperLogLog key. Creates the key if it doesn't exist.",
+ "complexity": "O(1) to add every element.",
+ "group": "hyperloglog",
+ "since": "2.8.9",
+ "arity": -2,
+ "function": "pfaddCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "HYPERLOGLOG"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "element",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "if at least 1 HyperLogLog internal register was altered",
+ "const": 1
+ },
+ {
+ "description": "if no HyperLogLog internal register were altered",
+ "const": 0
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/pfcount.json b/src/commands/pfcount.json
new file mode 100644
index 0000000..02a64c3
--- /dev/null
+++ b/src/commands/pfcount.json
@@ -0,0 +1,50 @@
+{
+ "PFCOUNT": {
+ "summary": "Returns the approximated cardinality of the set(s) observed by the HyperLogLog key(s).",
+ "complexity": "O(1) with a very small average constant time when called with a single key. O(N) with N being the number of keys, and much bigger constant times, when called with multiple keys.",
+ "group": "hyperloglog",
+ "since": "2.8.9",
+ "arity": -2,
+ "function": "pfcountCommand",
+ "command_flags": [
+ "READONLY",
+ "MAY_REPLICATE"
+ ],
+ "acl_categories": [
+ "HYPERLOGLOG"
+ ],
+ "key_specs": [
+ {
+ "notes": "RW because it may change the internal representation of the key, and propagate to replicas",
+ "flags": [
+ "RW",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "The approximated number of unique elements observed via PFADD",
+ "type": "integer"
+ }
+ }
+}
diff --git a/src/commands/pfdebug.json b/src/commands/pfdebug.json
new file mode 100644
index 0000000..4cd2853
--- /dev/null
+++ b/src/commands/pfdebug.json
@@ -0,0 +1,52 @@
+{
+ "PFDEBUG": {
+ "summary": "Internal commands for debugging HyperLogLog values.",
+ "complexity": "N/A",
+ "group": "hyperloglog",
+ "since": "2.8.9",
+ "arity": 3,
+ "function": "pfdebugCommand",
+ "doc_flags": [
+ "SYSCMD"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "ADMIN"
+ ],
+ "acl_categories": [
+ "HYPERLOGLOG"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "subcommand",
+ "type": "string"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/pfmerge.json b/src/commands/pfmerge.json
new file mode 100644
index 0000000..c93070f
--- /dev/null
+++ b/src/commands/pfmerge.json
@@ -0,0 +1,73 @@
+{
+ "PFMERGE": {
+ "summary": "Merges one or more HyperLogLog values into a single key.",
+ "complexity": "O(N) to merge N HyperLogLogs, but with high constant times.",
+ "group": "hyperloglog",
+ "since": "2.8.9",
+ "arity": -2,
+ "function": "pfmergeCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "HYPERLOGLOG"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "destkey",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "sourcekey",
+ "type": "key",
+ "key_spec_index": 1,
+ "optional": true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/pfselftest.json b/src/commands/pfselftest.json
new file mode 100644
index 0000000..ed29280
--- /dev/null
+++ b/src/commands/pfselftest.json
@@ -0,0 +1,22 @@
+{
+ "PFSELFTEST": {
+ "summary": "An internal command for testing HyperLogLog values.",
+ "complexity": "N/A",
+ "group": "hyperloglog",
+ "since": "2.8.9",
+ "arity": 1,
+ "function": "pfselftestCommand",
+ "doc_flags": [
+ "SYSCMD"
+ ],
+ "command_flags": [
+ "ADMIN"
+ ],
+ "acl_categories": [
+ "HYPERLOGLOG"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/ping.json b/src/commands/ping.json
new file mode 100644
index 0000000..b634c0b
--- /dev/null
+++ b/src/commands/ping.json
@@ -0,0 +1,40 @@
+{
+ "PING": {
+ "summary": "Returns the server's liveliness response.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "1.0.0",
+ "arity": -1,
+ "function": "pingCommand",
+ "command_flags": [
+ "FAST",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "const": "PONG",
+ "description": "Default reply."
+ },
+ {
+ "type": "string",
+ "description": "Relay of given `message`."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "message",
+ "type": "string",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/psetex.json b/src/commands/psetex.json
new file mode 100644
index 0000000..8d88766
--- /dev/null
+++ b/src/commands/psetex.json
@@ -0,0 +1,60 @@
+{
+ "PSETEX": {
+ "summary": "Sets both string value and expiration time in milliseconds of a key. The key is created if it doesn't exist.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "2.6.0",
+ "arity": 4,
+ "function": "psetexCommand",
+ "deprecated_since": "2.6.12",
+ "replaced_by": "`SET` with the `PX` argument",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "milliseconds",
+ "type": "integer"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/psubscribe.json b/src/commands/psubscribe.json
new file mode 100644
index 0000000..cab5d14
--- /dev/null
+++ b/src/commands/psubscribe.json
@@ -0,0 +1,24 @@
+{
+ "PSUBSCRIBE": {
+ "summary": "Listens for messages published to channels that match one or more patterns.",
+ "complexity": "O(N) where N is the number of patterns to subscribe to.",
+ "group": "pubsub",
+ "since": "2.0.0",
+ "arity": -2,
+ "function": "psubscribeCommand",
+ "command_flags": [
+ "PUBSUB",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "arguments": [
+ {
+ "name": "pattern",
+ "type": "pattern",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/psync.json b/src/commands/psync.json
new file mode 100644
index 0000000..60da8ed
--- /dev/null
+++ b/src/commands/psync.json
@@ -0,0 +1,25 @@
+{
+ "PSYNC": {
+ "summary": "An internal command used in replication.",
+ "group": "server",
+ "since": "2.8.0",
+ "arity": -3,
+ "function": "syncCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "NO_MULTI",
+ "NOSCRIPT"
+ ],
+ "arguments": [
+ {
+ "name": "replicationid",
+ "type": "string"
+ },
+ {
+ "name": "offset",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/pttl.json b/src/commands/pttl.json
new file mode 100644
index 0000000..304270b
--- /dev/null
+++ b/src/commands/pttl.json
@@ -0,0 +1,70 @@
+{
+ "PTTL": {
+ "summary": "Returns the expiration time in milliseconds of a key.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "2.6.0",
+ "arity": 2,
+ "function": "pttlCommand",
+ "history": [
+ [
+ "2.8.0",
+ "Added the -2 reply."
+ ]
+ ],
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "TTL in milliseconds.",
+ "type": "integer",
+ "minimum": 0
+ },
+ {
+ "description": "The key exists but has no associated expire.",
+ "const": -1
+ },
+ {
+ "description": "The key does not exist.",
+ "const": -2
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/publish.json b/src/commands/publish.json
new file mode 100644
index 0000000..1dd757d
--- /dev/null
+++ b/src/commands/publish.json
@@ -0,0 +1,33 @@
+{
+ "PUBLISH": {
+ "summary": "Posts a message to a channel.",
+ "complexity": "O(N+M) where N is the number of clients subscribed to the receiving channel and M is the total number of subscribed patterns (by any client).",
+ "group": "pubsub",
+ "since": "2.0.0",
+ "arity": 3,
+ "function": "publishCommand",
+ "command_flags": [
+ "PUBSUB",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "MAY_REPLICATE",
+ "SENTINEL"
+ ],
+ "arguments": [
+ {
+ "name": "channel",
+ "type": "string"
+ },
+ {
+ "name": "message",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "the number of clients that received the message. Note that in a Redis Cluster, only clients that are connected to the same node as the publishing client are included in the count",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/pubsub-channels.json b/src/commands/pubsub-channels.json
new file mode 100644
index 0000000..08505b3
--- /dev/null
+++ b/src/commands/pubsub-channels.json
@@ -0,0 +1,31 @@
+{
+ "CHANNELS": {
+ "summary": "Returns the active channels.",
+ "complexity": "O(N) where N is the number of active channels, and assuming constant time pattern matching (relatively short channels and patterns)",
+ "group": "pubsub",
+ "since": "2.8.0",
+ "arity": -2,
+ "container": "PUBSUB",
+ "function": "pubsubCommand",
+ "command_flags": [
+ "PUBSUB",
+ "LOADING",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "pattern",
+ "type": "pattern",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "a list of active channels, optionally matching the specified pattern",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/pubsub-help.json b/src/commands/pubsub-help.json
new file mode 100644
index 0000000..32faedc
--- /dev/null
+++ b/src/commands/pubsub-help.json
@@ -0,0 +1,22 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "pubsub",
+ "since": "6.2.0",
+ "arity": 2,
+ "container": "PUBSUB",
+ "function": "pubsubCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/pubsub-numpat.json b/src/commands/pubsub-numpat.json
new file mode 100644
index 0000000..ae653b7
--- /dev/null
+++ b/src/commands/pubsub-numpat.json
@@ -0,0 +1,21 @@
+{
+ "NUMPAT": {
+ "summary": "Returns a count of unique pattern subscriptions.",
+ "complexity": "O(1)",
+ "group": "pubsub",
+ "since": "2.8.0",
+ "arity": 2,
+ "container": "PUBSUB",
+ "function": "pubsubCommand",
+ "command_flags": [
+ "PUBSUB",
+ "LOADING",
+ "STALE"
+ ],
+ "reply_schema": {
+ "description": "the number of patterns all the clients are subscribed to",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/pubsub-numsub.json b/src/commands/pubsub-numsub.json
new file mode 100644
index 0000000..1cfe8e2
--- /dev/null
+++ b/src/commands/pubsub-numsub.json
@@ -0,0 +1,28 @@
+{
+ "NUMSUB": {
+ "summary": "Returns a count of subscribers to channels.",
+ "complexity": "O(N) for the NUMSUB subcommand, where N is the number of requested channels",
+ "group": "pubsub",
+ "since": "2.8.0",
+ "arity": -2,
+ "container": "PUBSUB",
+ "function": "pubsubCommand",
+ "command_flags": [
+ "PUBSUB",
+ "LOADING",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "channel",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "the number of subscribers per channel, each even element (including 0th) is channel name, each odd element is the number of subscribers",
+ "type": "array"
+ }
+ }
+}
diff --git a/src/commands/pubsub-shardchannels.json b/src/commands/pubsub-shardchannels.json
new file mode 100644
index 0000000..7aa0a7a
--- /dev/null
+++ b/src/commands/pubsub-shardchannels.json
@@ -0,0 +1,31 @@
+{
+ "SHARDCHANNELS": {
+ "summary": "Returns the active shard channels.",
+ "complexity": "O(N) where N is the number of active shard channels, and assuming constant time pattern matching (relatively short shard channels).",
+ "group": "pubsub",
+ "since": "7.0.0",
+ "arity": -2,
+ "container": "PUBSUB",
+ "function": "pubsubCommand",
+ "command_flags": [
+ "PUBSUB",
+ "LOADING",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "pattern",
+ "type": "pattern",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "a list of active channels, optionally matching the specified pattern",
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "uniqueItems": true
+ }
+ }
+}
diff --git a/src/commands/pubsub-shardnumsub.json b/src/commands/pubsub-shardnumsub.json
new file mode 100644
index 0000000..4367534
--- /dev/null
+++ b/src/commands/pubsub-shardnumsub.json
@@ -0,0 +1,28 @@
+{
+ "SHARDNUMSUB": {
+ "summary": "Returns the count of subscribers of shard channels.",
+ "complexity": "O(N) for the SHARDNUMSUB subcommand, where N is the number of requested shard channels",
+ "group": "pubsub",
+ "since": "7.0.0",
+ "arity": -2,
+ "container": "PUBSUB",
+ "function": "pubsubCommand",
+ "command_flags": [
+ "PUBSUB",
+ "LOADING",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "shardchannel",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "the number of subscribers per shard channel, each even element (including 0th) is channel name, each odd element is the number of subscribers",
+ "type": "array"
+ }
+ }
+}
diff --git a/src/commands/pubsub.json b/src/commands/pubsub.json
new file mode 100644
index 0000000..2f0bb5e
--- /dev/null
+++ b/src/commands/pubsub.json
@@ -0,0 +1,9 @@
+{
+ "PUBSUB": {
+ "summary": "A container for Pub/Sub commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "pubsub",
+ "since": "2.8.0",
+ "arity": -2
+ }
+}
diff --git a/src/commands/punsubscribe.json b/src/commands/punsubscribe.json
new file mode 100644
index 0000000..cb977d8
--- /dev/null
+++ b/src/commands/punsubscribe.json
@@ -0,0 +1,25 @@
+{
+ "PUNSUBSCRIBE": {
+ "summary": "Stops listening to messages published to channels that match one or more patterns.",
+ "complexity": "O(N) where N is the number of patterns to unsubscribe.",
+ "group": "pubsub",
+ "since": "2.0.0",
+ "arity": -1,
+ "function": "punsubscribeCommand",
+ "command_flags": [
+ "PUBSUB",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "arguments": [
+ {
+ "name": "pattern",
+ "type": "pattern",
+ "optional": true,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/quit.json b/src/commands/quit.json
new file mode 100644
index 0000000..e8dd6e9
--- /dev/null
+++ b/src/commands/quit.json
@@ -0,0 +1,29 @@
+{
+ "QUIT": {
+ "summary": "Closes the connection.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "1.0.0",
+ "arity": -1,
+ "function": "quitCommand",
+ "deprecated_since": "7.2.0",
+ "replaced_by": "just closing the connection",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "ALLOW_BUSY",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "NO_AUTH"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/randomkey.json b/src/commands/randomkey.json
new file mode 100644
index 0000000..eeef61a
--- /dev/null
+++ b/src/commands/randomkey.json
@@ -0,0 +1,34 @@
+{
+ "RANDOMKEY": {
+ "summary": "Returns a random key name from the database.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": 1,
+ "function": "randomkeyCommand",
+ "command_flags": [
+ "READONLY",
+ "TOUCHES_ARBITRARY_KEYS"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:SPECIAL",
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "when the database is empty",
+ "type": "null"
+ },
+ {
+ "description": "random key in db",
+ "type": "string"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/readonly.json b/src/commands/readonly.json
new file mode 100644
index 0000000..253573c
--- /dev/null
+++ b/src/commands/readonly.json
@@ -0,0 +1,21 @@
+{
+ "READONLY": {
+ "summary": "Enables read-only queries for a connection to a Redis Cluster replica node.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 1,
+ "function": "readonlyCommand",
+ "command_flags": [
+ "FAST",
+ "LOADING",
+ "STALE"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/readwrite.json b/src/commands/readwrite.json
new file mode 100644
index 0000000..440dd59
--- /dev/null
+++ b/src/commands/readwrite.json
@@ -0,0 +1,21 @@
+{
+ "READWRITE": {
+ "summary": "Enables read-write queries for a connection to a Reids Cluster replica node.",
+ "complexity": "O(1)",
+ "group": "cluster",
+ "since": "3.0.0",
+ "arity": 1,
+ "function": "readwriteCommand",
+ "command_flags": [
+ "FAST",
+ "LOADING",
+ "STALE"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/rename.json b/src/commands/rename.json
new file mode 100644
index 0000000..a8b65ae
--- /dev/null
+++ b/src/commands/rename.json
@@ -0,0 +1,72 @@
+{
+ "RENAME": {
+ "summary": "Renames a key and overwrites the destination.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": 3,
+ "function": "renameCommand",
+ "history": [],
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "newkey",
+ "type": "key",
+ "key_spec_index": 1
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/renamenx.json b/src/commands/renamenx.json
new file mode 100644
index 0000000..72f0569
--- /dev/null
+++ b/src/commands/renamenx.json
@@ -0,0 +1,86 @@
+{
+ "RENAMENX": {
+ "summary": "Renames a key only when the target key name doesn't exist.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": 3,
+ "function": "renamenxCommand",
+ "history": [
+ [
+ "3.2.0",
+ "The command no longer returns an error when source and destination names are the same."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "OW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "newkey",
+ "type": "key",
+ "key_spec_index": 1
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "key was renamed to newkey",
+ "const": 1
+ },
+ {
+ "description": "new key already exists",
+ "const": 0
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/replconf.json b/src/commands/replconf.json
new file mode 100644
index 0000000..e8efc7b
--- /dev/null
+++ b/src/commands/replconf.json
@@ -0,0 +1,23 @@
+{
+ "REPLCONF": {
+ "summary": "An internal command for configuring the replication stream.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "3.0.0",
+ "arity": -1,
+ "function": "replconfCommand",
+ "doc_flags": [
+ "SYSCMD"
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "ALLOW_BUSY"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/replicaof.json b/src/commands/replicaof.json
new file mode 100644
index 0000000..95e5cb4
--- /dev/null
+++ b/src/commands/replicaof.json
@@ -0,0 +1,59 @@
+{
+ "REPLICAOF": {
+ "summary": "Configures a server as replica of another, or promotes it to a master.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "5.0.0",
+ "arity": 3,
+ "function": "replicaofCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "NOSCRIPT",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "args",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "host-port",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "host",
+ "type": "string"
+ },
+ {
+ "name": "port",
+ "type": "integer"
+ }
+ ]
+ },
+ {
+ "name": "no-one",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "no",
+ "type": "pure-token",
+ "token": "NO"
+ },
+ {
+ "name": "one",
+ "type": "pure-token",
+ "token": "ONE"
+ }
+ ]
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "description": "replicaOf status",
+ "type": "string",
+ "pattern": "OK*"
+ }
+ }
+}
diff --git a/src/commands/reset.json b/src/commands/reset.json
new file mode 100644
index 0000000..3fb1a44
--- /dev/null
+++ b/src/commands/reset.json
@@ -0,0 +1,24 @@
+{
+ "RESET": {
+ "summary": "Resets the connection.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "6.2.0",
+ "arity": 1,
+ "function": "resetCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "NO_AUTH",
+ "ALLOW_BUSY"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "RESET"
+ }
+ }
+}
diff --git a/src/commands/restore-asking.json b/src/commands/restore-asking.json
new file mode 100644
index 0000000..2694a87
--- /dev/null
+++ b/src/commands/restore-asking.json
@@ -0,0 +1,102 @@
+{
+ "RESTORE-ASKING": {
+ "summary": "An internal command for migrating keys in a cluster.",
+ "complexity": "O(1) to create the new key and additional O(N*M) to reconstruct the serialized value, where N is the number of Redis objects composing the value and M their average size. For small string values the time complexity is thus O(1)+O(1*M) where M is small, so simply O(1). However for sorted set values the complexity is O(N*M*log(N)) because inserting values into sorted sets is O(log(N)).",
+ "group": "server",
+ "since": "3.0.0",
+ "arity": -4,
+ "function": "restoreCommand",
+ "history": [
+ [
+ "3.0.0",
+ "Added the `REPLACE` modifier."
+ ],
+ [
+ "5.0.0",
+ "Added the `ABSTTL` modifier."
+ ],
+ [
+ "5.0.0",
+ "Added the `IDLETIME` and `FREQ` options."
+ ]
+ ],
+ "doc_flags": [
+ "SYSCMD"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "ASKING"
+ ],
+ "acl_categories": [
+ "KEYSPACE",
+ "DANGEROUS"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "ttl",
+ "type": "integer"
+ },
+ {
+ "name": "serialized-value",
+ "type": "string"
+ },
+ {
+ "name": "replace",
+ "token": "REPLACE",
+ "type": "pure-token",
+ "optional": true,
+ "since": "3.0.0"
+ },
+ {
+ "name": "absttl",
+ "token": "ABSTTL",
+ "type": "pure-token",
+ "optional": true,
+ "since": "5.0.0"
+ },
+ {
+ "token": "IDLETIME",
+ "name": "seconds",
+ "type": "integer",
+ "optional": true,
+ "since": "5.0.0"
+ },
+ {
+ "token": "FREQ",
+ "name": "frequency",
+ "type": "integer",
+ "optional": true,
+ "since": "5.0.0"
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/restore.json b/src/commands/restore.json
new file mode 100644
index 0000000..383dd45
--- /dev/null
+++ b/src/commands/restore.json
@@ -0,0 +1,98 @@
+{
+ "RESTORE": {
+ "summary": "Creates a key from the serialized representation of a value.",
+ "complexity": "O(1) to create the new key and additional O(N*M) to reconstruct the serialized value, where N is the number of Redis objects composing the value and M their average size. For small string values the time complexity is thus O(1)+O(1*M) where M is small, so simply O(1). However for sorted set values the complexity is O(N*M*log(N)) because inserting values into sorted sets is O(log(N)).",
+ "group": "generic",
+ "since": "2.6.0",
+ "arity": -4,
+ "function": "restoreCommand",
+ "history": [
+ [
+ "3.0.0",
+ "Added the `REPLACE` modifier."
+ ],
+ [
+ "5.0.0",
+ "Added the `ABSTTL` modifier."
+ ],
+ [
+ "5.0.0",
+ "Added the `IDLETIME` and `FREQ` options."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "KEYSPACE",
+ "DANGEROUS"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "ttl",
+ "type": "integer"
+ },
+ {
+ "name": "serialized-value",
+ "type": "string"
+ },
+ {
+ "name": "replace",
+ "token": "REPLACE",
+ "type": "pure-token",
+ "optional": true,
+ "since": "3.0.0"
+ },
+ {
+ "name": "absttl",
+ "token": "ABSTTL",
+ "type": "pure-token",
+ "optional": true,
+ "since": "5.0.0"
+ },
+ {
+ "token": "IDLETIME",
+ "name": "seconds",
+ "type": "integer",
+ "optional": true,
+ "since": "5.0.0"
+ },
+ {
+ "token": "FREQ",
+ "name": "frequency",
+ "type": "integer",
+ "optional": true,
+ "since": "5.0.0"
+ }
+ ]
+ }
+}
diff --git a/src/commands/role.json b/src/commands/role.json
new file mode 100644
index 0000000..a0299fa
--- /dev/null
+++ b/src/commands/role.json
@@ -0,0 +1,134 @@
+{
+ "ROLE": {
+ "summary": "Returns the replication role.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.8.12",
+ "arity": 1,
+ "function": "roleCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "SENTINEL"
+ ],
+ "acl_categories": [
+ "ADMIN",
+ "DANGEROUS"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "array",
+ "minItems": 3,
+ "maxItems": 3,
+ "items": [
+ {
+ "const": "master"
+ },
+ {
+ "description": "current replication master offset",
+ "type": "integer"
+ },
+ {
+ "description": "connected replicas",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 3,
+ "maxItems": 3,
+ "items": [
+ {
+ "description": "replica ip",
+ "type": "string"
+ },
+ {
+ "description": "replica port",
+ "type": "string"
+ },
+ {
+ "description": "last acknowledged replication offset",
+ "type": "string"
+ }
+ ]
+ }
+ }
+ ]
+ },
+ {
+ "type": "array",
+ "minItems": 5,
+ "maxItems": 5,
+ "items": [
+ {
+ "const": "slave"
+ },
+ {
+ "description": "ip of master",
+ "type": "string"
+ },
+ {
+ "description": "port number of master",
+ "type": "integer"
+ },
+ {
+ "description": "state of the replication from the point of view of the master",
+ "oneOf": [
+ {
+ "description": "the instance is in handshake with its master",
+ "const": "handshake"
+ },
+ {
+ "description": "the instance in not active",
+ "const": "none"
+ },
+ {
+ "description": "the instance needs to connect to its master",
+ "const": "connect"
+ },
+ {
+ "description": "the master-replica connection is in progress",
+ "const": "connecting"
+ },
+ {
+ "description": "the master and replica are trying to perform the synchronization",
+ "const": "sync"
+ },
+ {
+ "description": "the replica is online",
+ "const": "connected"
+ },
+ {
+ "description": "instance state is unknown",
+ "const": "unknown"
+ }
+ ]
+ },
+ {
+ "description": "the amount of data received from the replica so far in terms of master replication offset",
+ "type": "integer"
+ }
+ ]
+ },
+ {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "const": "sentinel"
+ },
+ {
+ "description": "list of master names monitored by this sentinel instance",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/rpop.json b/src/commands/rpop.json
new file mode 100644
index 0000000..79b5a92
--- /dev/null
+++ b/src/commands/rpop.json
@@ -0,0 +1,76 @@
+{
+ "RPOP": {
+ "summary": "Returns and removes the last elements of a list. Deletes the list if the last element was popped.",
+ "complexity": "O(N) where N is the number of elements returned",
+ "group": "list",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "rpopCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added the `count` argument."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "null",
+ "description": "Key does not exist."
+ },
+ {
+ "type": "string",
+ "description": "When 'COUNT' was not given, the value of the last element."
+ },
+ {
+ "type": "array",
+ "description": "When 'COUNT' was given, list of popped elements.",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "count",
+ "type": "integer",
+ "optional": true,
+ "since": "6.2.0"
+ }
+ ]
+ }
+}
diff --git a/src/commands/rpoplpush.json b/src/commands/rpoplpush.json
new file mode 100644
index 0000000..1951499
--- /dev/null
+++ b/src/commands/rpoplpush.json
@@ -0,0 +1,85 @@
+{
+ "RPOPLPUSH": {
+ "summary": "Returns the last element of a list after removing and pushing it to another list. Deletes the list if the last element was popped.",
+ "complexity": "O(1)",
+ "group": "list",
+ "since": "1.2.0",
+ "arity": 3,
+ "function": "rpoplpushCommand",
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`LMOVE` with the `RIGHT` and `LEFT` arguments",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "string",
+ "description": "The element being popped and pushed."
+ },
+ {
+ "type": "null",
+ "description": "Source list is empty."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "source",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 1
+ }
+ ]
+ }
+}
diff --git a/src/commands/rpush.json b/src/commands/rpush.json
new file mode 100644
index 0000000..e5d6908
--- /dev/null
+++ b/src/commands/rpush.json
@@ -0,0 +1,61 @@
+{
+ "RPUSH": {
+ "summary": "Appends one or more elements to a list. Creates the key if it doesn't exist.",
+ "complexity": "O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.",
+ "group": "list",
+ "since": "1.0.0",
+ "arity": -3,
+ "function": "rpushCommand",
+ "history": [
+ [
+ "2.4.0",
+ "Accepts multiple `element` arguments."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Length of the list after the push operations.",
+ "type": "integer",
+ "minimum": 1
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "element",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/rpushx.json b/src/commands/rpushx.json
new file mode 100644
index 0000000..b41f9c5
--- /dev/null
+++ b/src/commands/rpushx.json
@@ -0,0 +1,61 @@
+{
+ "RPUSHX": {
+ "summary": "Appends an element to a list only when the list exists.",
+ "complexity": "O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.",
+ "group": "list",
+ "since": "2.2.0",
+ "arity": -3,
+ "function": "rpushxCommand",
+ "history": [
+ [
+ "4.0.0",
+ "Accepts multiple `element` arguments."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "LIST"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "Length of the list after the push operation.",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "element",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/sadd.json b/src/commands/sadd.json
new file mode 100644
index 0000000..00b3c26
--- /dev/null
+++ b/src/commands/sadd.json
@@ -0,0 +1,60 @@
+{
+ "SADD": {
+ "summary": "Adds one or more members to a set. Creates the key if it doesn't exist.",
+ "complexity": "O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": -3,
+ "function": "saddCommand",
+ "history": [
+ [
+ "2.4.0",
+ "Accepts multiple `member` arguments."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Number of elements that were added to the set, not including all the elements already present in the set.",
+ "type": "integer"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/save.json b/src/commands/save.json
new file mode 100644
index 0000000..0645e27
--- /dev/null
+++ b/src/commands/save.json
@@ -0,0 +1,19 @@
+{
+ "SAVE": {
+ "summary": "Synchronously saves the database(s) to disk.",
+ "complexity": "O(N) where N is the total number of keys in all databases",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": 1,
+ "function": "saveCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "NOSCRIPT",
+ "NO_MULTI"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/scan.json b/src/commands/scan.json
new file mode 100644
index 0000000..a7df78a
--- /dev/null
+++ b/src/commands/scan.json
@@ -0,0 +1,72 @@
+{
+ "SCAN": {
+ "summary": "Iterates over the key names in the database.",
+ "complexity": "O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.",
+ "group": "generic",
+ "since": "2.8.0",
+ "arity": -2,
+ "function": "scanCommand",
+ "history": [
+ [
+ "6.0.0",
+ "Added the `TYPE` subcommand."
+ ]
+ ],
+ "command_flags": [
+ "READONLY",
+ "TOUCHES_ARBITRARY_KEYS"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT",
+ "REQUEST_POLICY:SPECIAL",
+ "RESPONSE_POLICY:SPECIAL"
+ ],
+ "arguments": [
+ {
+ "name": "cursor",
+ "type": "integer"
+ },
+ {
+ "token": "MATCH",
+ "name": "pattern",
+ "type": "pattern",
+ "optional": true
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "token": "TYPE",
+ "name": "type",
+ "type": "string",
+ "optional": true,
+ "since": "6.0.0"
+ }
+ ],
+ "reply_schema": {
+ "description": "cursor and scan response in array form",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "cursor",
+ "type": "string"
+ },
+ {
+ "description": "list of keys",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/scard.json b/src/commands/scard.json
new file mode 100644
index 0000000..8df0a4f
--- /dev/null
+++ b/src/commands/scard.json
@@ -0,0 +1,48 @@
+{
+ "SCARD": {
+ "summary": "Returns the number of members in a set.",
+ "complexity": "O(1)",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "scardCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The cardinality (number of elements) of the set, or 0 if key does not exist.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/script-debug.json b/src/commands/script-debug.json
new file mode 100644
index 0000000..ebba38a
--- /dev/null
+++ b/src/commands/script-debug.json
@@ -0,0 +1,43 @@
+{
+ "DEBUG": {
+ "summary": "Sets the debug mode of server-side Lua scripts.",
+ "complexity": "O(1)",
+ "group": "scripting",
+ "since": "3.2.0",
+ "arity": 3,
+ "container": "SCRIPT",
+ "function": "scriptCommand",
+ "command_flags": [
+ "NOSCRIPT"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "arguments": [
+ {
+ "name": "mode",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "yes",
+ "type": "pure-token",
+ "token": "YES"
+ },
+ {
+ "name": "sync",
+ "type": "pure-token",
+ "token": "SYNC"
+ },
+ {
+ "name": "no",
+ "type": "pure-token",
+ "token": "NO"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/script-exists.json b/src/commands/script-exists.json
new file mode 100644
index 0000000..d8c47e4
--- /dev/null
+++ b/src/commands/script-exists.json
@@ -0,0 +1,44 @@
+{
+ "EXISTS": {
+ "summary": "Determines whether server-side Lua scripts exist in the script cache.",
+ "complexity": "O(N) with N being the number of scripts to check (so checking a single script is an O(1) operation).",
+ "group": "scripting",
+ "since": "2.6.0",
+ "arity": -3,
+ "container": "SCRIPT",
+ "function": "scriptCommand",
+ "command_flags": [
+ "NOSCRIPT"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:AGG_LOGICAL_AND"
+ ],
+ "arguments": [
+ {
+ "name": "sha1",
+ "type": "string",
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "An array of integers that correspond to the specified SHA1 digest arguments.",
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "description": "sha1 hash exists in script cache",
+ "const": 1
+ },
+ {
+ "description": "sha1 hash does not exist in script cache",
+ "const": 0
+ }
+ ]
+ }
+ }
+ }
+}
diff --git a/src/commands/script-flush.json b/src/commands/script-flush.json
new file mode 100644
index 0000000..7487dc5
--- /dev/null
+++ b/src/commands/script-flush.json
@@ -0,0 +1,50 @@
+{
+ "FLUSH": {
+ "summary": "Removes all server-side Lua scripts from the script cache.",
+ "complexity": "O(N) with N being the number of scripts in cache",
+ "group": "scripting",
+ "since": "2.6.0",
+ "arity": -2,
+ "container": "SCRIPT",
+ "function": "scriptCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added the `ASYNC` and `SYNC` flushing mode modifiers."
+ ]
+ ],
+ "command_flags": [
+ "NOSCRIPT"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "arguments": [
+ {
+ "name": "flush-type",
+ "type": "oneof",
+ "optional": true,
+ "since": "6.2.0",
+ "arguments": [
+ {
+ "name": "async",
+ "type": "pure-token",
+ "token": "ASYNC"
+ },
+ {
+ "name": "sync",
+ "type": "pure-token",
+ "token": "SYNC"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/script-help.json b/src/commands/script-help.json
new file mode 100644
index 0000000..c5ea5df
--- /dev/null
+++ b/src/commands/script-help.json
@@ -0,0 +1,25 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "scripting",
+ "since": "5.0.0",
+ "arity": 2,
+ "container": "SCRIPT",
+ "function": "scriptCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/script-kill.json b/src/commands/script-kill.json
new file mode 100644
index 0000000..fe72d31
--- /dev/null
+++ b/src/commands/script-kill.json
@@ -0,0 +1,25 @@
+{
+ "KILL": {
+ "summary": "Terminates a server-side Lua script during execution.",
+ "complexity": "O(1)",
+ "group": "scripting",
+ "since": "2.6.0",
+ "arity": 2,
+ "container": "SCRIPT",
+ "function": "scriptCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "ALLOW_BUSY"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:ONE_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/script-load.json b/src/commands/script-load.json
new file mode 100644
index 0000000..37f80fd
--- /dev/null
+++ b/src/commands/script-load.json
@@ -0,0 +1,32 @@
+{
+ "LOAD": {
+ "summary": "Loads a server-side Lua script to the script cache.",
+ "complexity": "O(N) with N being the length in bytes of the script body.",
+ "group": "scripting",
+ "since": "2.6.0",
+ "arity": 3,
+ "container": "SCRIPT",
+ "function": "scriptCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "STALE"
+ ],
+ "acl_categories": [
+ "SCRIPTING"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "arguments": [
+ {
+ "name": "script",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "The SHA1 digest of the script added into the script cache",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/script.json b/src/commands/script.json
new file mode 100644
index 0000000..1d420c1
--- /dev/null
+++ b/src/commands/script.json
@@ -0,0 +1,9 @@
+{
+ "SCRIPT": {
+ "summary": "A container for Lua scripts management commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "scripting",
+ "since": "2.6.0",
+ "arity": -2
+ }
+}
diff --git a/src/commands/sdiff.json b/src/commands/sdiff.json
new file mode 100644
index 0000000..ac04d03
--- /dev/null
+++ b/src/commands/sdiff.json
@@ -0,0 +1,55 @@
+{
+ "SDIFF": {
+ "summary": "Returns the difference of multiple sets.",
+ "complexity": "O(N) where N is the total number of elements in all given sets.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "sdiffCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List with the members of the resulting set.",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/sdiffstore.json b/src/commands/sdiffstore.json
new file mode 100644
index 0000000..94b2d24
--- /dev/null
+++ b/src/commands/sdiffstore.json
@@ -0,0 +1,73 @@
+{
+ "SDIFFSTORE": {
+ "summary": "Stores the difference of multiple sets in a key.",
+ "complexity": "O(N) where N is the total number of elements in all given sets.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": -3,
+ "function": "sdiffstoreCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Number of the elements in the resulting set.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 1,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/select.json b/src/commands/select.json
new file mode 100644
index 0000000..5cf8634
--- /dev/null
+++ b/src/commands/select.json
@@ -0,0 +1,27 @@
+{
+ "SELECT": {
+ "summary": "Changes the selected database.",
+ "complexity": "O(1)",
+ "group": "connection",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "selectCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "index",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-ckquorum.json b/src/commands/sentinel-ckquorum.json
new file mode 100644
index 0000000..cdd6cd1
--- /dev/null
+++ b/src/commands/sentinel-ckquorum.json
@@ -0,0 +1,26 @@
+{
+ "CKQUORUM": {
+ "summary": "Checks for a Redis Sentinel quorum.",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "Returns OK if the current Sentinel configuration is able to reach the quorum needed to failover a master, and the majority needed to authorize the failover.",
+ "pattern": "OK"
+ },
+ "arguments": [
+ {
+ "name": "master-name",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-config.json b/src/commands/sentinel-config.json
new file mode 100644
index 0000000..a406125
--- /dev/null
+++ b/src/commands/sentinel-config.json
@@ -0,0 +1,121 @@
+{
+ "CONFIG": {
+ "summary": "Configures Redis Sentinel.",
+ "complexity": "O(N) when N is the number of configuration parameters provided",
+ "group": "sentinel",
+ "since": "6.2.0",
+ "arity": -4,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "history": [
+ [
+ "7.2.0",
+ "Added the ability to set and get multiple parameters in one call."
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "object",
+ "description": "When 'SENTINEL-CONFIG GET' is called, returns a map.",
+ "properties": {
+ "resolve-hostnames": {
+ "oneOf": [
+ {
+ "const": "yes"
+ },
+ {
+ "const": "no"
+ }
+ ]
+ },
+ "announce-hostnames": {
+ "oneOf": [
+ {
+ "const": "yes"
+ },
+ {
+ "const": "no"
+ }
+ ]
+ },
+ "announce-ip": {
+ "type": "string"
+ },
+ "announce-port": {
+ "type": "string"
+ },
+ "sentinel-user": {
+ "type": "string"
+ },
+ "sentinel-pass": {
+ "type": "string"
+ },
+ "loglevel": {
+ "oneOf": [
+ {
+ "const": "debug"
+ },
+ {
+ "const": "verbose"
+ },
+ {
+ "const": "notice"
+ },
+ {
+ "const": "warning"
+ },
+ {
+ "const": "nothing"
+ },
+ {
+ "const": "unknown"
+ }
+ ]
+ }
+ },
+ "additionalProperties": false
+ },
+ {
+ "const": "OK",
+ "description": "When 'SENTINEL-CONFIG SET' is called, returns OK on success."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name":"action",
+ "type":"oneof",
+ "arguments":[
+ {
+ "name":"set",
+ "token":"SET",
+ "type":"block",
+ "multiple": true,
+ "arguments":[
+ {
+ "name":"parameter",
+ "type":"string"
+ },
+ {
+ "name":"value",
+ "type":"string"
+ }
+ ]
+ },
+ {
+ "token":"GET",
+ "name":"parameter",
+ "type":"string",
+ "multiple": true
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-debug.json b/src/commands/sentinel-debug.json
new file mode 100644
index 0000000..c671ec5
--- /dev/null
+++ b/src/commands/sentinel-debug.json
@@ -0,0 +1,49 @@
+{
+ "DEBUG": {
+ "summary": "Lists or updates the current configurable parameters of Redis Sentinel.",
+ "complexity": "O(N) where N is the number of configurable parameters",
+ "group": "sentinel",
+ "since": "7.0.0",
+ "arity": -2,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The configuration update was successful.",
+ "const": "OK"
+ },
+ {
+ "description": "List of configurable time parameters and their values (milliseconds).",
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "data",
+ "type": "block",
+ "optional": true,
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "parameter",
+ "type": "string"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-failover.json b/src/commands/sentinel-failover.json
new file mode 100644
index 0000000..8f50375
--- /dev/null
+++ b/src/commands/sentinel-failover.json
@@ -0,0 +1,25 @@
+{
+ "FAILOVER": {
+ "summary": "Forces a Redis Sentinel failover.",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "const": "OK",
+ "description": "Force a fail over as if the master was not reachable, and without asking for agreement to other Sentinels."
+ },
+ "arguments": [
+ {
+ "name": "master-name",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-flushconfig.json b/src/commands/sentinel-flushconfig.json
new file mode 100644
index 0000000..b2fa5de
--- /dev/null
+++ b/src/commands/sentinel-flushconfig.json
@@ -0,0 +1,20 @@
+{
+ "FLUSHCONFIG": {
+ "summary": "Rewrites the Redis Sentinel configuration file.",
+ "complexity": "O(1)",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 2,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "const": "OK",
+ "description": "Force Sentinel to rewrite its configuration on disk, including the current Sentinel state."
+ }
+ }
+}
diff --git a/src/commands/sentinel-get-master-addr-by-name.json b/src/commands/sentinel-get-master-addr-by-name.json
new file mode 100644
index 0000000..998f95e
--- /dev/null
+++ b/src/commands/sentinel-get-master-addr-by-name.json
@@ -0,0 +1,38 @@
+{
+ "GET-MASTER-ADDR-BY-NAME": {
+ "summary": "Returns the port and address of a master Redis instance.",
+ "complexity": "O(1)",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string",
+ "description": "IP addr or hostname."
+ },
+ {
+ "type": "string",
+ "description": "Port.",
+ "pattern": "[0-9]+"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "master-name",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-help.json b/src/commands/sentinel-help.json
new file mode 100644
index 0000000..d601450
--- /dev/null
+++ b/src/commands/sentinel-help.json
@@ -0,0 +1,24 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "sentinel",
+ "since": "6.2.0",
+ "arity": 2,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/sentinel-info-cache.json b/src/commands/sentinel-info-cache.json
new file mode 100644
index 0000000..af89f18
--- /dev/null
+++ b/src/commands/sentinel-info-cache.json
@@ -0,0 +1,64 @@
+{
+ "INFO-CACHE": {
+ "summary": "Returns the cached `INFO` replies from the deployment's instances.",
+ "complexity": "O(N) where N is the number of instances",
+ "group": "sentinel",
+ "since": "3.2.0",
+ "arity": -3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "This is actually a map, the odd entries are a master name, and the even entries are the last cached INFO output from that master and all its replicas.",
+ "minItems": 0,
+ "maxItems": 4294967295,
+ "items": [
+ {
+ "oneOf": [
+ {
+ "type": "string",
+ "description": "The master name."
+ },
+ {
+ "type": "array",
+ "description": "This is an array of pairs, the odd entries are the INFO age, and the even entries are the cached INFO string. The first pair belong to the master and the rest are its replicas.",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "The number of milliseconds since when the INFO was cached.",
+ "type": "integer"
+ },
+ {
+ "description": "The cached INFO string or null.",
+ "oneOf": [
+ {
+ "description": "The cached INFO string.",
+ "type": "string"
+ },
+ {
+ "description": "No cached INFO string.",
+ "type": "null"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "nodename",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-is-master-down-by-addr.json b/src/commands/sentinel-is-master-down-by-addr.json
new file mode 100644
index 0000000..a13e96b
--- /dev/null
+++ b/src/commands/sentinel-is-master-down-by-addr.json
@@ -0,0 +1,61 @@
+{
+ "IS-MASTER-DOWN-BY-ADDR": {
+ "summary": "Determines whether a master Redis instance is down.",
+ "complexity": "O(1)",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 6,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "minItems": 3,
+ "maxItems": 3,
+ "items": [
+ {
+ "oneOf": [
+ {
+ "const": 0,
+ "description": "Master is up."
+ },
+ {
+ "const": 1,
+ "description": "Master is down."
+ }
+ ]
+ },
+ {
+ "type": "string",
+ "description": "Sentinel address."
+ },
+ {
+ "type": "integer",
+ "description": "Port."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "ip",
+ "type": "string"
+ },
+ {
+ "name": "port",
+ "type": "integer"
+ },
+ {
+ "name": "current-epoch",
+ "type": "integer"
+ },
+ {
+ "name": "runid",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-master.json b/src/commands/sentinel-master.json
new file mode 100644
index 0000000..8ca446d
--- /dev/null
+++ b/src/commands/sentinel-master.json
@@ -0,0 +1,29 @@
+{
+ "MASTER": {
+ "summary": "Returns the state of a master Redis instance.",
+ "complexity": "O(1)",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "object",
+ "description": "The state and info of the specified master.",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "master-name",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-masters.json b/src/commands/sentinel-masters.json
new file mode 100644
index 0000000..1e96b71
--- /dev/null
+++ b/src/commands/sentinel-masters.json
@@ -0,0 +1,26 @@
+{
+ "MASTERS": {
+ "summary": "Returns a list of monitored Redis masters.",
+ "complexity": "O(N) where N is the number of masters",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 2,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List of monitored Redis masters, and their state.",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ }
+ }
+}
diff --git a/src/commands/sentinel-monitor.json b/src/commands/sentinel-monitor.json
new file mode 100644
index 0000000..cf6b4ee
--- /dev/null
+++ b/src/commands/sentinel-monitor.json
@@ -0,0 +1,37 @@
+{
+ "MONITOR": {
+ "summary": "Starts monitoring.",
+ "complexity": "O(1)",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 6,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "name",
+ "type": "string"
+ },
+ {
+ "name": "ip",
+ "type": "string"
+ },
+ {
+ "name": "port",
+ "type": "integer"
+ },
+ {
+ "name": "quorum",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-myid.json b/src/commands/sentinel-myid.json
new file mode 100644
index 0000000..4d366eb
--- /dev/null
+++ b/src/commands/sentinel-myid.json
@@ -0,0 +1,20 @@
+{
+ "MYID": {
+ "summary": "Returns the Redis Sentinel instance ID.",
+ "complexity": "O(1)",
+ "group": "sentinel",
+ "since": "6.2.0",
+ "arity": 2,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "description": "Node ID of the sentinel instance.",
+ "type": "string"
+ }
+ }
+}
diff --git a/src/commands/sentinel-pending-scripts.json b/src/commands/sentinel-pending-scripts.json
new file mode 100644
index 0000000..22dae47
--- /dev/null
+++ b/src/commands/sentinel-pending-scripts.json
@@ -0,0 +1,52 @@
+{
+ "PENDING-SCRIPTS": {
+ "summary": "Returns information about pending scripts for Redis Sentinel.",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 2,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List of pending scripts.",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "argv": {
+ "type": "array",
+ "description": "Script arguments.",
+ "items": {
+ "type": "string"
+ }
+ },
+ "flags": {
+ "type": "string",
+ "description": "Script flags."
+ },
+ "pid": {
+ "type": "string",
+ "description": "Script pid."
+ },
+ "run-time": {
+ "type": "string",
+ "description": "Script run-time."
+ },
+ "run-delay": {
+ "type": "string",
+ "description": "Script run-delay."
+ },
+ "retry-num": {
+ "type": "string",
+ "description": "Number of times we tried to execute the script."
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/commands/sentinel-remove.json b/src/commands/sentinel-remove.json
new file mode 100644
index 0000000..1fe084f
--- /dev/null
+++ b/src/commands/sentinel-remove.json
@@ -0,0 +1,25 @@
+{
+ "REMOVE": {
+ "summary": "Stops monitoring.",
+ "complexity": "O(1)",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "master-name",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-replicas.json b/src/commands/sentinel-replicas.json
new file mode 100644
index 0000000..09d88f2
--- /dev/null
+++ b/src/commands/sentinel-replicas.json
@@ -0,0 +1,32 @@
+{
+ "REPLICAS": {
+ "summary": "Returns a list of the monitored Redis replicas.",
+ "complexity": "O(N) where N is the number of replicas",
+ "group": "sentinel",
+ "since": "5.0.0",
+ "arity": 3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List of replicas for this master, and their state.",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ },
+ "arguments": [
+ {
+ "name": "master-name",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-reset.json b/src/commands/sentinel-reset.json
new file mode 100644
index 0000000..17b53a4
--- /dev/null
+++ b/src/commands/sentinel-reset.json
@@ -0,0 +1,26 @@
+{
+ "RESET": {
+ "summary": "Resets Redis masters by name matching a pattern.",
+ "complexity": "O(N) where N is the number of monitored masters",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The number of masters that were reset."
+ },
+ "arguments": [
+ {
+ "name": "pattern",
+ "type": "pattern"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-sentinels.json b/src/commands/sentinel-sentinels.json
new file mode 100644
index 0000000..fdaa5cb
--- /dev/null
+++ b/src/commands/sentinel-sentinels.json
@@ -0,0 +1,32 @@
+{
+ "SENTINELS": {
+ "summary": "Returns a list of Sentinel instances.",
+ "complexity": "O(N) where N is the number of Sentinels",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": 3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List of sentinel instances, and their state.",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ },
+ "arguments": [
+ {
+ "name": "master-name",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-set.json b/src/commands/sentinel-set.json
new file mode 100644
index 0000000..3e86196
--- /dev/null
+++ b/src/commands/sentinel-set.json
@@ -0,0 +1,40 @@
+{
+ "SET": {
+ "summary": "Changes the configuration of a monitored Redis master.",
+ "complexity": "O(1)",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": -5,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "master-name",
+ "type": "string"
+ },
+ {
+ "name": "data",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "option",
+ "type": "string"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-simulate-failure.json b/src/commands/sentinel-simulate-failure.json
new file mode 100644
index 0000000..5031d44
--- /dev/null
+++ b/src/commands/sentinel-simulate-failure.json
@@ -0,0 +1,52 @@
+{
+ "SIMULATE-FAILURE": {
+ "summary": "Simulates failover scenarios.",
+ "group": "sentinel",
+ "since": "3.2.0",
+ "arity": -3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The simulated flag was set.",
+ "const": "OK"
+ },
+ {
+ "description": "Supported simulates flags. Returned in case `HELP` was used.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "mode",
+ "type": "oneof",
+ "optional":true,
+ "multiple":true,
+ "arguments": [
+ {
+ "name": "crash-after-election",
+ "type": "pure-token"
+ },
+ {
+ "name": "crash-after-promotion",
+ "type": "pure-token"
+ },
+ {
+ "name": "help",
+ "type": "pure-token"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel-slaves.json b/src/commands/sentinel-slaves.json
new file mode 100644
index 0000000..c1fec41
--- /dev/null
+++ b/src/commands/sentinel-slaves.json
@@ -0,0 +1,37 @@
+{
+ "SLAVES": {
+ "summary": "Returns a list of the monitored replicas.",
+ "complexity": "O(N) where N is the number of replicas.",
+ "group": "sentinel",
+ "since": "2.8.0",
+ "arity": 3,
+ "container": "SENTINEL",
+ "function": "sentinelCommand",
+ "deprecated_since": "5.0.0",
+ "replaced_by": "`SENTINEL REPLICAS`",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List of monitored replicas, and their state.",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ },
+ "arguments": [
+ {
+ "name": "master-name",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sentinel.json b/src/commands/sentinel.json
new file mode 100644
index 0000000..c14d5a3
--- /dev/null
+++ b/src/commands/sentinel.json
@@ -0,0 +1,14 @@
+{
+ "SENTINEL": {
+ "summary": "A container for Redis Sentinel commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "sentinel",
+ "since": "2.8.4",
+ "arity": -2,
+ "command_flags": [
+ "ADMIN",
+ "SENTINEL",
+ "ONLY_SENTINEL"
+ ]
+ }
+}
diff --git a/src/commands/set.json b/src/commands/set.json
new file mode 100644
index 0000000..8236bc7
--- /dev/null
+++ b/src/commands/set.json
@@ -0,0 +1,152 @@
+{
+ "SET": {
+ "summary": "Sets the string value of a key, ignoring its type. The key is created if it doesn't exist.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "1.0.0",
+ "arity": -3,
+ "function": "setCommand",
+ "get_keys_function": "setGetKeys",
+ "history": [
+ [
+ "2.6.12",
+ "Added the `EX`, `PX`, `NX` and `XX` options."
+ ],
+ [
+ "6.0.0",
+ "Added the `KEEPTTL` option."
+ ],
+ [
+ "6.2.0",
+ "Added the `GET`, `EXAT` and `PXAT` option."
+ ],
+ [
+ "7.0.0",
+ "Allowed the `NX` and `GET` options to be used together."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "notes": "RW and ACCESS due to the optional `GET` argument",
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE",
+ "VARIABLE_FLAGS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf":[
+ {
+ "description": "`GET` not given: Operation was aborted (conflict with one of the `XX`/`NX` options).",
+ "type": "null"
+ },
+ {
+ "description": "`GET` not given: The key was set.",
+ "const": "OK"
+ },
+ {
+ "description": "`GET` given: The key didn't exist before the `SET`",
+ "type": "null"
+ },
+ {
+ "description": "`GET` given: The previous value of the key",
+ "type": "string"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "value",
+ "type": "string"
+ },
+ {
+ "name": "condition",
+ "type": "oneof",
+ "optional": true,
+ "since": "2.6.12",
+ "arguments": [
+ {
+ "name": "nx",
+ "type": "pure-token",
+ "token": "NX"
+ },
+ {
+ "name": "xx",
+ "type": "pure-token",
+ "token": "XX"
+ }
+ ]
+ },
+ {
+ "name": "get",
+ "token": "GET",
+ "type": "pure-token",
+ "optional": true,
+ "since": "6.2.0"
+ },
+ {
+ "name": "expiration",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "seconds",
+ "type": "integer",
+ "token": "EX",
+ "since": "2.6.12"
+ },
+ {
+ "name": "milliseconds",
+ "type": "integer",
+ "token": "PX",
+ "since": "2.6.12"
+ },
+ {
+ "name": "unix-time-seconds",
+ "type": "unix-time",
+ "token": "EXAT",
+ "since": "6.2.0"
+ },
+ {
+ "name": "unix-time-milliseconds",
+ "type": "unix-time",
+ "token": "PXAT",
+ "since": "6.2.0"
+ },
+ {
+ "name": "keepttl",
+ "type": "pure-token",
+ "token": "KEEPTTL",
+ "since": "6.0.0"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/setbit.json b/src/commands/setbit.json
new file mode 100644
index 0000000..a1f6726
--- /dev/null
+++ b/src/commands/setbit.json
@@ -0,0 +1,64 @@
+{
+ "SETBIT": {
+ "summary": "Sets or clears the bit at offset of the string value. Creates the key if it doesn't exist.",
+ "complexity": "O(1)",
+ "group": "bitmap",
+ "since": "2.2.0",
+ "arity": 4,
+ "function": "setbitCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "BITMAP"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The original bit value stored at offset.",
+ "oneOf": [
+ {
+ "const": 0
+ },
+ {
+ "const": 1
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "value",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/setex.json b/src/commands/setex.json
new file mode 100644
index 0000000..1543a41
--- /dev/null
+++ b/src/commands/setex.json
@@ -0,0 +1,60 @@
+{
+ "SETEX": {
+ "summary": "Sets the string value and expiration time of a key. Creates the key if it doesn't exist.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "2.0.0",
+ "arity": 4,
+ "function": "setexCommand",
+ "deprecated_since": "2.6.12",
+ "replaced_by": "`SET` with the `EX` argument",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "seconds",
+ "type": "integer"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/setnx.json b/src/commands/setnx.json
new file mode 100644
index 0000000..7459724
--- /dev/null
+++ b/src/commands/setnx.json
@@ -0,0 +1,66 @@
+{
+ "SETNX": {
+ "summary": "Set the string value of a key only when the key doesn't exist.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "1.0.0",
+ "arity": 3,
+ "function": "setnxCommand",
+ "deprecated_since": "2.6.12",
+ "replaced_by": "`SET` with the `NX` argument",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "The key was set.",
+ "const": 0
+ },
+ {
+ "description": "The key was not set.",
+ "const": 1
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/setrange.json b/src/commands/setrange.json
new file mode 100644
index 0000000..32a8c52
--- /dev/null
+++ b/src/commands/setrange.json
@@ -0,0 +1,57 @@
+{
+ "SETRANGE": {
+ "summary": "Overwrites a part of a string value with another by an offset. Creates the key if it doesn't exist.",
+ "complexity": "O(1), not counting the time taken to copy the new string in place. Usually, this string is very small so the amortized complexity is O(1). Otherwise, complexity is O(M) with M being the length of the value argument.",
+ "group": "string",
+ "since": "2.2.0",
+ "arity": 4,
+ "function": "setrangeCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Length of the string after it was modified by the command.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/shutdown.json b/src/commands/shutdown.json
new file mode 100644
index 0000000..a9e45d4
--- /dev/null
+++ b/src/commands/shutdown.json
@@ -0,0 +1,69 @@
+{
+ "SHUTDOWN": {
+ "summary": "Synchronously saves the database(s) to disk and shuts down the Redis server.",
+ "complexity": "O(N) when saving, where N is the total number of keys in all databases when saving data, otherwise O(1)",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": -1,
+ "function": "shutdownCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added the `NOW`, `FORCE` and `ABORT` modifiers."
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "NO_MULTI",
+ "SENTINEL",
+ "ALLOW_BUSY"
+ ],
+ "arguments": [
+ {
+ "name": "save-selector",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "nosave",
+ "type": "pure-token",
+ "token": "NOSAVE"
+ },
+ {
+ "name": "save",
+ "type": "pure-token",
+ "token": "SAVE"
+ }
+ ]
+ },
+ {
+ "name": "now",
+ "type": "pure-token",
+ "token": "NOW",
+ "optional": true,
+ "since": "7.0.0"
+ },
+ {
+ "name": "force",
+ "type": "pure-token",
+ "token": "FORCE",
+ "optional": true,
+ "since": "7.0.0"
+ },
+ {
+ "name": "abort",
+ "type": "pure-token",
+ "token": "ABORT",
+ "optional": true,
+ "since": "7.0.0"
+ }
+ ],
+ "reply_schema": {
+ "description": "OK if ABORT was specified and shutdown was aborted. On successful shutdown, nothing is returned since the server quits and the connection is closed. On failure, an error is returned.",
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/sinter.json b/src/commands/sinter.json
new file mode 100644
index 0000000..ad5ac91
--- /dev/null
+++ b/src/commands/sinter.json
@@ -0,0 +1,55 @@
+{
+ "SINTER": {
+ "summary": "Returns the intersect of multiple sets.",
+ "complexity": "O(N*M) worst case where N is the cardinality of the smallest set and M is the number of sets.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "sinterCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List with the members of the resulting set.",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/sintercard.json b/src/commands/sintercard.json
new file mode 100644
index 0000000..9a79183
--- /dev/null
+++ b/src/commands/sintercard.json
@@ -0,0 +1,60 @@
+{
+ "SINTERCARD": {
+ "summary": "Returns the number of members of the intersect of multiple sets.",
+ "complexity": "O(N*M) worst case where N is the cardinality of the smallest set and M is the number of sets.",
+ "group": "set",
+ "since": "7.0.0",
+ "arity": -3,
+ "function": "sinterCardCommand",
+ "get_keys_function": "sintercardGetKeys",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Number of the elements in the resulting intersection.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "token": "LIMIT",
+ "name": "limit",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/sinterstore.json b/src/commands/sinterstore.json
new file mode 100644
index 0000000..28ccfff
--- /dev/null
+++ b/src/commands/sinterstore.json
@@ -0,0 +1,73 @@
+{
+ "SINTERSTORE": {
+ "summary": "Stores the intersect of multiple sets in a key.",
+ "complexity": "O(N*M) worst case where N is the cardinality of the smallest set and M is the number of sets.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": -3,
+ "function": "sinterstoreCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Number of the elements in the result set.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 1,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/sismember.json b/src/commands/sismember.json
new file mode 100644
index 0000000..51ef920
--- /dev/null
+++ b/src/commands/sismember.json
@@ -0,0 +1,59 @@
+{
+ "SISMEMBER": {
+ "summary": "Determines whether a member belongs to a set.",
+ "complexity": "O(1)",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": 3,
+ "function": "sismemberCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "const": 0,
+ "description": "The element is not a member of the set, or the key does not exist."
+ },
+ {
+ "const": 1,
+ "description": "The element is a member of the set."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/slaveof.json b/src/commands/slaveof.json
new file mode 100644
index 0000000..6595960
--- /dev/null
+++ b/src/commands/slaveof.json
@@ -0,0 +1,64 @@
+{
+ "SLAVEOF": {
+ "summary": "Sets a Redis server as a replica of another, or promotes it to being a master.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": 3,
+ "function": "replicaofCommand",
+ "deprecated_since": "5.0.0",
+ "replaced_by": "`REPLICAOF`",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "NOSCRIPT",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "args",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "host-port",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "host",
+ "type": "string"
+ },
+ {
+ "name": "port",
+ "type": "integer"
+ }
+ ]
+ },
+ {
+ "name": "no-one",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "no",
+ "type": "pure-token",
+ "token": "NO"
+ },
+ {
+ "name": "one",
+ "type": "pure-token",
+ "token": "ONE"
+ }
+ ]
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "description": "slaveOf status",
+ "type": "string",
+ "pattern": "OK*"
+ }
+ }
+}
diff --git a/src/commands/slowlog-get.json b/src/commands/slowlog-get.json
new file mode 100644
index 0000000..ffc54b5
--- /dev/null
+++ b/src/commands/slowlog-get.json
@@ -0,0 +1,74 @@
+{
+ "GET": {
+ "summary": "Returns the slow log's entries.",
+ "complexity": "O(N) where N is the number of entries returned",
+ "group": "server",
+ "since": "2.2.12",
+ "arity": -2,
+ "container": "SLOWLOG",
+ "function": "slowlogCommand",
+ "history": [
+ [
+ "4.0.0",
+ "Added client IP address, port and name to the reply."
+ ]
+ ],
+ "command_flags": [
+ "ADMIN",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Entries from the slow log in chronological order.",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "minItems": 6,
+ "maxItems": 6,
+ "items": [
+ {
+ "type": "integer",
+ "description": "Slow log entry ID."
+ },
+ {
+ "type": "integer",
+ "description": "The unix timestamp at which the logged command was processed.",
+ "minimum": 0
+ },
+ {
+ "type": "integer",
+ "description": "The amount of time needed for its execution, in microseconds.",
+ "minimum": 0
+ },
+ {
+ "type": "array",
+ "description": "The arguments of the command.",
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "type": "string",
+ "description": "Client IP address and port."
+ },
+ {
+ "type": "string",
+ "description": "Client name if set via the CLIENT SETNAME command."
+ }
+ ]
+ }
+ },
+ "arguments": [
+ {
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/slowlog-help.json b/src/commands/slowlog-help.json
new file mode 100644
index 0000000..dde8fd4
--- /dev/null
+++ b/src/commands/slowlog-help.json
@@ -0,0 +1,22 @@
+{
+ "HELP": {
+ "summary": "Show helpful text about the different subcommands",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "6.2.0",
+ "arity": 2,
+ "container": "SLOWLOG",
+ "function": "slowlogCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/slowlog-len.json b/src/commands/slowlog-len.json
new file mode 100644
index 0000000..717a8ad
--- /dev/null
+++ b/src/commands/slowlog-len.json
@@ -0,0 +1,26 @@
+{
+ "LEN": {
+ "summary": "Returns the number of entries in the slow log.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.2.12",
+ "arity": 2,
+ "container": "SLOWLOG",
+ "function": "slowlogCommand",
+ "command_flags": [
+ "ADMIN",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:AGG_SUM",
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "Number of entries in the slow log.",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/slowlog-reset.json b/src/commands/slowlog-reset.json
new file mode 100644
index 0000000..cfc1e4d
--- /dev/null
+++ b/src/commands/slowlog-reset.json
@@ -0,0 +1,23 @@
+{
+ "RESET": {
+ "summary": "Clears all entries from the slow log.",
+ "complexity": "O(N) where N is the number of entries in the slowlog",
+ "group": "server",
+ "since": "2.2.12",
+ "arity": 2,
+ "container": "SLOWLOG",
+ "function": "slowlogCommand",
+ "command_flags": [
+ "ADMIN",
+ "LOADING",
+ "STALE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_NODES",
+ "RESPONSE_POLICY:ALL_SUCCEEDED"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/slowlog.json b/src/commands/slowlog.json
new file mode 100644
index 0000000..1b9526b
--- /dev/null
+++ b/src/commands/slowlog.json
@@ -0,0 +1,9 @@
+{
+ "SLOWLOG": {
+ "summary": "A container for slow log commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "server",
+ "since": "2.2.12",
+ "arity": -2
+ }
+}
diff --git a/src/commands/smembers.json b/src/commands/smembers.json
new file mode 100644
index 0000000..c511408
--- /dev/null
+++ b/src/commands/smembers.json
@@ -0,0 +1,54 @@
+{
+ "SMEMBERS": {
+ "summary": "Returns all members of a set.",
+ "complexity": "O(N) where N is the set cardinality.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "sinterCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "All elements of the set.",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/smismember.json b/src/commands/smismember.json
new file mode 100644
index 0000000..dbc1ddc
--- /dev/null
+++ b/src/commands/smismember.json
@@ -0,0 +1,66 @@
+{
+ "SMISMEMBER": {
+ "summary": "Determines whether multiple members belong to a set.",
+ "complexity": "O(N) where N is the number of elements being checked for membership",
+ "group": "set",
+ "since": "6.2.0",
+ "arity": -3,
+ "function": "smismemberCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List representing the membership of the given elements, in the same order as they are requested.",
+ "minItems": 1,
+ "items": {
+ "oneOf": [
+ {
+ "const": 0,
+ "description": "Not a member of the set or the key does not exist."
+ },
+ {
+ "const": 1,
+ "description": "A member of the set."
+ }
+ ]
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/smove.json b/src/commands/smove.json
new file mode 100644
index 0000000..9521bb3
--- /dev/null
+++ b/src/commands/smove.json
@@ -0,0 +1,84 @@
+{
+ "SMOVE": {
+ "summary": "Moves a member from one set to another.",
+ "complexity": "O(1)",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": 4,
+ "function": "smoveCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "const": 1,
+ "description": "Element is moved."
+ },
+ {
+ "const": 0,
+ "description": "The element is not a member of source and no operation was performed."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "source",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 1
+ },
+ {
+ "name": "member",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sort.json b/src/commands/sort.json
new file mode 100644
index 0000000..d5f6511
--- /dev/null
+++ b/src/commands/sort.json
@@ -0,0 +1,162 @@
+{
+ "SORT": {
+ "summary": "Sorts the elements in a list, a set, or a sorted set, optionally storing the result.",
+ "complexity": "O(N+M*log(M)) where N is the number of elements in the list or set to sort, and M the number of returned elements. When the elements are not sorted, complexity is O(N).",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "sortCommand",
+ "get_keys_function": "sortGetKeys",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "SET",
+ "SORTEDSET",
+ "LIST",
+ "DANGEROUS"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "notes": "For the optional BY/GET keyword. It is marked 'unknown' because the key names derive from the content of the key we sort",
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "unknown": null
+ },
+ "find_keys": {
+ "unknown": null
+ }
+ },
+ {
+ "notes": "For the optional STORE keyword. It is marked 'unknown' because the keyword can appear anywhere in the argument array",
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "unknown": null
+ },
+ "find_keys": {
+ "unknown": null
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "token": "BY",
+ "name": "by-pattern",
+ "display": "pattern",
+ "type": "pattern",
+ "key_spec_index": 1,
+ "optional": true
+ },
+ {
+ "token": "LIMIT",
+ "name": "limit",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ }
+ ]
+ },
+ {
+ "token": "GET",
+ "name": "get-pattern",
+ "display": "pattern",
+ "key_spec_index": 1,
+ "type": "pattern",
+ "optional": true,
+ "multiple": true,
+ "multiple_token": true
+ },
+ {
+ "name": "order",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "asc",
+ "type": "pure-token",
+ "token": "ASC"
+ },
+ {
+ "name": "desc",
+ "type": "pure-token",
+ "token": "DESC"
+ }
+ ]
+ },
+ {
+ "name": "sorting",
+ "token": "ALPHA",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "token": "STORE",
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 2,
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "when the store option is specified the command returns the number of sorted elements in the destination list",
+ "type": "integer",
+ "minimum": 0
+ },
+ {
+ "description": "when not passing the store option the command returns a list of sorted elements",
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "description": "GET option is specified, but no object was found",
+ "type": "null"
+ }
+ ]
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/sort_ro.json b/src/commands/sort_ro.json
new file mode 100644
index 0000000..04cc3c8
--- /dev/null
+++ b/src/commands/sort_ro.json
@@ -0,0 +1,132 @@
+{
+ "SORT_RO": {
+ "summary": "Returns the sorted elements of a list, a set, or a sorted set.",
+ "complexity": "O(N+M*log(M)) where N is the number of elements in the list or set to sort, and M the number of returned elements. When the elements are not sorted, complexity is O(N).",
+ "group": "generic",
+ "since": "7.0.0",
+ "arity": -2,
+ "function": "sortroCommand",
+ "get_keys_function": "sortROGetKeys",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SET",
+ "SORTEDSET",
+ "LIST",
+ "DANGEROUS"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "notes": "For the optional BY/GET keyword. It is marked 'unknown' because the key names derive from the content of the key we sort",
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "unknown": null
+ },
+ "find_keys": {
+ "unknown": null
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "token": "BY",
+ "name": "by-pattern",
+ "display": "pattern",
+ "type": "pattern",
+ "key_spec_index": 1,
+ "optional": true
+ },
+ {
+ "token": "LIMIT",
+ "name": "limit",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ }
+ ]
+ },
+ {
+ "token": "GET",
+ "name": "get-pattern",
+ "display": "pattern",
+ "key_spec_index": 1,
+ "type": "pattern",
+ "optional": true,
+ "multiple": true,
+ "multiple_token": true
+ },
+ {
+ "name": "order",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "asc",
+ "type": "pure-token",
+ "token": "ASC"
+ },
+ {
+ "name": "desc",
+ "type": "pure-token",
+ "token": "DESC"
+ }
+ ]
+ },
+ {
+ "name": "sorting",
+ "token": "ALPHA",
+ "type": "pure-token",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "a list of sorted elements",
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "description": "GET option is specified, but no object was found",
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+}
diff --git a/src/commands/spop.json b/src/commands/spop.json
new file mode 100644
index 0000000..c3954be
--- /dev/null
+++ b/src/commands/spop.json
@@ -0,0 +1,80 @@
+{
+ "SPOP": {
+ "summary": "Returns one or more random members from a set after removing them. Deletes the set if the last member was popped.",
+ "complexity": "Without the count argument O(1), otherwise O(N) where N is the value of the passed count.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "spopCommand",
+ "history": [
+ [
+ "3.2.0",
+ "Added the `count` argument."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "null",
+ "description": "The key does not exist."
+ },
+ {
+ "type": "string",
+ "description": "The removed member when 'COUNT' is not given."
+ },
+ {
+ "type": "array",
+ "description": "List to the removed members when 'COUNT' is given.",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "count",
+ "type": "integer",
+ "optional": true,
+ "since": "3.2.0"
+ }
+ ]
+ }
+}
diff --git a/src/commands/spublish.json b/src/commands/spublish.json
new file mode 100644
index 0000000..16c948c
--- /dev/null
+++ b/src/commands/spublish.json
@@ -0,0 +1,51 @@
+{
+ "SPUBLISH": {
+ "summary": "Post a message to a shard channel",
+ "complexity": "O(N) where N is the number of clients subscribed to the receiving shard channel.",
+ "group": "pubsub",
+ "since": "7.0.0",
+ "arity": 3,
+ "function": "spublishCommand",
+ "command_flags": [
+ "PUBSUB",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "MAY_REPLICATE"
+ ],
+ "arguments": [
+ {
+ "name": "shardchannel",
+ "type": "string"
+ },
+ {
+ "name": "message",
+ "type": "string"
+ }
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "NOT_KEY"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "the number of clients that received the message. Note that in a Redis Cluster, only clients that are connected to the same node as the publishing client are included in the count",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/srandmember.json b/src/commands/srandmember.json
new file mode 100644
index 0000000..4ba2b75
--- /dev/null
+++ b/src/commands/srandmember.json
@@ -0,0 +1,83 @@
+{
+ "SRANDMEMBER": {
+ "summary": "Get one or multiple random members from a set",
+ "complexity": "Without the count argument O(1), otherwise O(N) where N is the absolute value of the passed count.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "srandmemberCommand",
+ "history": [
+ [
+ "2.6.0",
+ "Added the optional `count` argument."
+ ]
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "count",
+ "type": "integer",
+ "optional": true,
+ "since": "2.6.0"
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "In case `count` is not given and key doesn't exist",
+ "type": "null"
+ },
+ {
+ "description": "In case `count` is not given, randomly selected element",
+ "type": "string"
+ },
+ {
+ "description": "In case `count` is given, an array of elements",
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "minItems": 1
+ },
+ {
+ "description": "In case `count` is given and key doesn't exist",
+ "type": "array",
+ "maxItems": 0
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/srem.json b/src/commands/srem.json
new file mode 100644
index 0000000..d7797cf
--- /dev/null
+++ b/src/commands/srem.json
@@ -0,0 +1,60 @@
+{
+ "SREM": {
+ "summary": "Removes one or more members from a set. Deletes the set if the last member was removed.",
+ "complexity": "O(N) where N is the number of members to be removed.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": -3,
+ "function": "sremCommand",
+ "history": [
+ [
+ "2.4.0",
+ "Accepts multiple `member` arguments."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Number of members that were removed from the set, not including non existing members.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/sscan.json b/src/commands/sscan.json
new file mode 100644
index 0000000..b221c94
--- /dev/null
+++ b/src/commands/sscan.json
@@ -0,0 +1,81 @@
+{
+ "SSCAN": {
+ "summary": "Iterates over members of a set.",
+ "complexity": "O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.",
+ "group": "set",
+ "since": "2.8.0",
+ "arity": -3,
+ "function": "sscanCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "cursor",
+ "type": "integer"
+ },
+ {
+ "token": "MATCH",
+ "name": "pattern",
+ "type": "pattern",
+ "optional": true
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "cursor and scan response in array form",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "cursor",
+ "type": "string"
+ },
+ {
+ "description": "list of set members",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/ssubscribe.json b/src/commands/ssubscribe.json
new file mode 100644
index 0000000..46373d5
--- /dev/null
+++ b/src/commands/ssubscribe.json
@@ -0,0 +1,42 @@
+{
+ "SSUBSCRIBE": {
+ "summary": "Listens for messages published to shard channels.",
+ "complexity": "O(N) where N is the number of shard channels to subscribe to.",
+ "group": "pubsub",
+ "since": "7.0.0",
+ "arity": -2,
+ "function": "ssubscribeCommand",
+ "command_flags": [
+ "PUBSUB",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "shardchannel",
+ "type": "string",
+ "multiple": true
+ }
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "NOT_KEY"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ]
+ }
+}
diff --git a/src/commands/strlen.json b/src/commands/strlen.json
new file mode 100644
index 0000000..395a02d
--- /dev/null
+++ b/src/commands/strlen.json
@@ -0,0 +1,48 @@
+{
+ "STRLEN": {
+ "summary": "Returns the length of a string value.",
+ "complexity": "O(1)",
+ "group": "string",
+ "since": "2.2.0",
+ "arity": 2,
+ "function": "strlenCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The length of the string value stored at key, or 0 when key does not exist.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/subscribe.json b/src/commands/subscribe.json
new file mode 100644
index 0000000..bdf12b7
--- /dev/null
+++ b/src/commands/subscribe.json
@@ -0,0 +1,25 @@
+{
+ "SUBSCRIBE": {
+ "summary": "Listens for messages published to channels.",
+ "complexity": "O(N) where N is the number of channels to subscribe to.",
+ "group": "pubsub",
+ "since": "2.0.0",
+ "arity": -2,
+ "function": "subscribeCommand",
+ "history": [],
+ "command_flags": [
+ "PUBSUB",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "arguments": [
+ {
+ "name": "channel",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/substr.json b/src/commands/substr.json
new file mode 100644
index 0000000..1244183
--- /dev/null
+++ b/src/commands/substr.json
@@ -0,0 +1,60 @@
+{
+ "SUBSTR": {
+ "summary": "Returns a substring from a string value.",
+ "complexity": "O(N) where N is the length of the returned string. The complexity is ultimately determined by the returned length, but because creating a substring from an existing string is very cheap, it can be considered O(1) for small strings.",
+ "group": "string",
+ "since": "1.0.0",
+ "arity": 4,
+ "function": "getrangeCommand",
+ "deprecated_since": "2.0.0",
+ "replaced_by": "`GETRANGE`",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "STRING"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "string",
+ "description": "The substring of the string value stored at key, determined by the offsets start and end (both are inclusive)."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "start",
+ "type": "integer"
+ },
+ {
+ "name": "end",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/sunion.json b/src/commands/sunion.json
new file mode 100644
index 0000000..56f2b9e
--- /dev/null
+++ b/src/commands/sunion.json
@@ -0,0 +1,55 @@
+{
+ "SUNION": {
+ "summary": "Returns the union of multiple sets.",
+ "complexity": "O(N) where N is the total number of elements in all given sets.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": -2,
+ "function": "sunionCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT_ORDER"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List with the members of the resulting set.",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/sunionstore.json b/src/commands/sunionstore.json
new file mode 100644
index 0000000..94d4e16
--- /dev/null
+++ b/src/commands/sunionstore.json
@@ -0,0 +1,73 @@
+{
+ "SUNIONSTORE": {
+ "summary": "Stores the union of multiple sets in a key.",
+ "complexity": "O(N) where N is the total number of elements in all given sets.",
+ "group": "set",
+ "since": "1.0.0",
+ "arity": -3,
+ "function": "sunionstoreCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "SET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "Number of the elements in the resulting set.",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 1,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/sunsubscribe.json b/src/commands/sunsubscribe.json
new file mode 100644
index 0000000..2d68fc7
--- /dev/null
+++ b/src/commands/sunsubscribe.json
@@ -0,0 +1,43 @@
+{
+ "SUNSUBSCRIBE": {
+ "summary": "Stops listening to messages posted to shard channels.",
+ "complexity": "O(N) where N is the number of shard channels to unsubscribe.",
+ "group": "pubsub",
+ "since": "7.0.0",
+ "arity": -1,
+ "function": "sunsubscribeCommand",
+ "command_flags": [
+ "PUBSUB",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE"
+ ],
+ "arguments": [
+ {
+ "name": "shardchannel",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "NOT_KEY"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ]
+ }
+}
diff --git a/src/commands/swapdb.json b/src/commands/swapdb.json
new file mode 100644
index 0000000..e98bc76
--- /dev/null
+++ b/src/commands/swapdb.json
@@ -0,0 +1,31 @@
+{
+ "SWAPDB": {
+ "summary": "Swaps two Redis databases.",
+ "complexity": "O(N) where N is the count of clients watching or blocking on keys from both databases.",
+ "group": "server",
+ "since": "4.0.0",
+ "arity": 3,
+ "function": "swapdbCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE",
+ "DANGEROUS"
+ ],
+ "arguments": [
+ {
+ "name": "index1",
+ "type": "integer"
+ },
+ {
+ "name": "index2",
+ "type": "integer"
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/sync.json b/src/commands/sync.json
new file mode 100644
index 0000000..e18c337
--- /dev/null
+++ b/src/commands/sync.json
@@ -0,0 +1,15 @@
+{
+ "SYNC": {
+ "summary": "An internal command used in replication.",
+ "group": "server",
+ "since": "1.0.0",
+ "arity": 1,
+ "function": "syncCommand",
+ "command_flags": [
+ "NO_ASYNC_LOADING",
+ "ADMIN",
+ "NO_MULTI",
+ "NOSCRIPT"
+ ]
+ }
+}
diff --git a/src/commands/time.json b/src/commands/time.json
new file mode 100644
index 0000000..3161d3f
--- /dev/null
+++ b/src/commands/time.json
@@ -0,0 +1,28 @@
+{
+ "TIME": {
+ "summary": "Returns the server time.",
+ "complexity": "O(1)",
+ "group": "server",
+ "since": "2.6.0",
+ "arity": 1,
+ "function": "timeCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE",
+ "FAST"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Array containing two elements: Unix time in seconds and microseconds.",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": {
+ "type": "string",
+ "pattern": "[0-9]+"
+ }
+ }
+ }
+}
diff --git a/src/commands/touch.json b/src/commands/touch.json
new file mode 100644
index 0000000..fd1dc61
--- /dev/null
+++ b/src/commands/touch.json
@@ -0,0 +1,53 @@
+{
+ "TOUCH": {
+ "summary": "Returns the number of existing keys out of those specified after updating the time they were last accessed.",
+ "complexity": "O(N) where N is the number of keys that will be touched.",
+ "group": "generic",
+ "since": "3.2.1",
+ "arity": -2,
+ "function": "touchCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:MULTI_SHARD",
+ "RESPONSE_POLICY:AGG_SUM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "the number of touched keys",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/ttl.json b/src/commands/ttl.json
new file mode 100644
index 0000000..9f5ab89
--- /dev/null
+++ b/src/commands/ttl.json
@@ -0,0 +1,70 @@
+{
+ "TTL": {
+ "summary": "Returns the expiration time in seconds of a key.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "ttlCommand",
+ "history": [
+ [
+ "2.8.0",
+ "Added the -2 reply."
+ ]
+ ],
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "TTL in seconds.",
+ "type": "integer",
+ "minimum": 0
+ },
+ {
+ "description": "The key exists but has no associated expire.",
+ "const": -1
+ },
+ {
+ "description": "The key does not exist.",
+ "const": -2
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/type.json b/src/commands/type.json
new file mode 100644
index 0000000..e8353b9
--- /dev/null
+++ b/src/commands/type.json
@@ -0,0 +1,55 @@
+{
+ "TYPE": {
+ "summary": "Determines the type of value stored at a key.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "1.0.0",
+ "arity": 2,
+ "function": "typeCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "Key doesn't exist",
+ "type": "null"
+ },
+ {
+ "description": "Type of the key",
+ "type": "string"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/unlink.json b/src/commands/unlink.json
new file mode 100644
index 0000000..a05704a
--- /dev/null
+++ b/src/commands/unlink.json
@@ -0,0 +1,54 @@
+{
+ "UNLINK": {
+ "summary": "Asynchronously deletes one or more keys.",
+ "complexity": "O(1) for each key removed regardless of its size. Then the command does O(N) work in a different thread in order to reclaim memory, where N is the number of allocations the deleted objects where composed of.",
+ "group": "generic",
+ "since": "4.0.0",
+ "arity": -2,
+ "function": "unlinkCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "KEYSPACE"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:MULTI_SHARD",
+ "RESPONSE_POLICY:AGG_SUM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RM",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "the number of keys that were unlinked",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/unsubscribe.json b/src/commands/unsubscribe.json
new file mode 100644
index 0000000..e8586bc
--- /dev/null
+++ b/src/commands/unsubscribe.json
@@ -0,0 +1,25 @@
+{
+ "UNSUBSCRIBE": {
+ "summary": "Stops listening to messages posted to channels.",
+ "complexity": "O(N) where N is the number of channels to unsubscribe.",
+ "group": "pubsub",
+ "since": "2.0.0",
+ "arity": -1,
+ "function": "unsubscribeCommand",
+ "command_flags": [
+ "PUBSUB",
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "SENTINEL"
+ ],
+ "arguments": [
+ {
+ "name": "channel",
+ "type": "string",
+ "optional": true,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/unwatch.json b/src/commands/unwatch.json
new file mode 100644
index 0000000..28cc5f0
--- /dev/null
+++ b/src/commands/unwatch.json
@@ -0,0 +1,23 @@
+{
+ "UNWATCH": {
+ "summary": "Forgets about watched keys of a transaction.",
+ "complexity": "O(1)",
+ "group": "transactions",
+ "since": "2.2.0",
+ "arity": 1,
+ "function": "unwatchCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "ALLOW_BUSY"
+ ],
+ "acl_categories": [
+ "TRANSACTION"
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/wait.json b/src/commands/wait.json
new file mode 100644
index 0000000..f936b92
--- /dev/null
+++ b/src/commands/wait.json
@@ -0,0 +1,34 @@
+{
+ "WAIT": {
+ "summary": "Blocks until the asynchronous replication of all preceding write commands sent by the connection is completed.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "3.0.0",
+ "arity": 3,
+ "function": "waitCommand",
+ "command_flags": [
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:AGG_MIN"
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "The number of replicas reached by all the writes performed in the context of the current connection.",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "numreplicas",
+ "type": "integer"
+ },
+ {
+ "name": "timeout",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/waitaof.json b/src/commands/waitaof.json
new file mode 100644
index 0000000..735a8f2
--- /dev/null
+++ b/src/commands/waitaof.json
@@ -0,0 +1,52 @@
+{
+ "WAITAOF": {
+ "summary": "Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the master and/or replicas.",
+ "complexity": "O(1)",
+ "group": "generic",
+ "since": "7.2.0",
+ "arity": 4,
+ "function": "waitaofCommand",
+ "command_flags": [
+ "NOSCRIPT"
+ ],
+ "acl_categories": [
+ "CONNECTION"
+ ],
+ "command_tips": [
+ "REQUEST_POLICY:ALL_SHARDS",
+ "RESPONSE_POLICY:AGG_MIN"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Number of local and remote AOF files in sync.",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Number of local AOF files.",
+ "type": "integer",
+ "minimum": 0
+ },
+ {
+ "description": "Number of replica AOF files.",
+ "type": "number",
+ "minimum": 0
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "numlocal",
+ "type": "integer"
+ },
+ {
+ "name": "numreplicas",
+ "type": "integer"
+ },
+ {
+ "name": "timeout",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/watch.json b/src/commands/watch.json
new file mode 100644
index 0000000..9faab2b
--- /dev/null
+++ b/src/commands/watch.json
@@ -0,0 +1,50 @@
+{
+ "WATCH": {
+ "summary": "Monitors changes to keys to determine the execution of a transaction.",
+ "complexity": "O(1) for every key.",
+ "group": "transactions",
+ "since": "2.2.0",
+ "arity": -2,
+ "function": "watchCommand",
+ "command_flags": [
+ "NOSCRIPT",
+ "LOADING",
+ "STALE",
+ "FAST",
+ "ALLOW_BUSY"
+ ],
+ "acl_categories": [
+ "TRANSACTION"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/xack.json b/src/commands/xack.json
new file mode 100644
index 0000000..4a1e92b
--- /dev/null
+++ b/src/commands/xack.json
@@ -0,0 +1,58 @@
+{
+ "XACK": {
+ "summary": "Returns the number of messages that were successfully acknowledged by the consumer group member of a stream.",
+ "complexity": "O(1) for each message ID processed.",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -4,
+ "function": "xackCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "group",
+ "type": "string"
+ },
+ {
+ "name": "ID",
+ "type": "string",
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "The command returns the number of messages successfully acknowledged. Certain message IDs may no longer be part of the PEL (for example because they have already been acknowledged), and XACK will not count them as successfully acknowledged.",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/xadd.json b/src/commands/xadd.json
new file mode 100644
index 0000000..2188052
--- /dev/null
+++ b/src/commands/xadd.json
@@ -0,0 +1,161 @@
+{
+ "XADD": {
+ "summary": "Appends a new message to a stream. Creates the key if it doesn't exist.",
+ "complexity": "O(1) when adding a new entry, O(N) when trimming where N being the number of entries evicted.",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -5,
+ "function": "xaddCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added the `NOMKSTREAM` option, `MINID` trimming strategy and the `LIMIT` option."
+ ],
+ [
+ "7.0.0",
+ "Added support for the `<ms>-*` explicit ID form."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "notes": "UPDATE instead of INSERT because of the optional trimming feature",
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "token": "NOMKSTREAM",
+ "name": "nomkstream",
+ "type": "pure-token",
+ "optional": true,
+ "since": "6.2.0"
+ },
+ {
+ "name": "trim",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "strategy",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "maxlen",
+ "type": "pure-token",
+ "token": "MAXLEN"
+ },
+ {
+ "name": "minid",
+ "type": "pure-token",
+ "token": "MINID",
+ "since": "6.2.0"
+ }
+ ]
+ },
+ {
+ "name": "operator",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "equal",
+ "type": "pure-token",
+ "token": "="
+ },
+ {
+ "name": "approximately",
+ "type": "pure-token",
+ "token": "~"
+ }
+ ]
+ },
+ {
+ "name": "threshold",
+ "type": "string"
+ },
+ {
+ "token": "LIMIT",
+ "name": "count",
+ "type": "integer",
+ "optional": true,
+ "since": "6.2.0"
+ }
+ ]
+ },
+ {
+ "name": "id-selector",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "auto-id",
+ "type": "pure-token",
+ "token": "*"
+ },
+ {
+ "name": "id",
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "name": "data",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "field",
+ "type": "string"
+ },
+ {
+ "name": "value",
+ "type": "string"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "oneOf":[
+ {
+ "description": "The ID of the added entry. The ID is the one auto-generated if * is passed as ID argument, otherwise the command just returns the same ID specified by the user during insertion.",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "The NOMKSTREAM option is given and the key doesn't exist.",
+ "type": "null"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/xautoclaim.json b/src/commands/xautoclaim.json
new file mode 100644
index 0000000..2e8e9c1
--- /dev/null
+++ b/src/commands/xautoclaim.json
@@ -0,0 +1,158 @@
+{
+ "XAUTOCLAIM": {
+ "summary": "Changes, or acquires, ownership of messages in a consumer group, as if the messages were delivered to as consumer group member.",
+ "complexity": "O(1) if COUNT is small.",
+ "group": "stream",
+ "since": "6.2.0",
+ "arity": -6,
+ "function": "xautoclaimCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added an element to the reply array, containing deleted entries the command cleared from the PEL"
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "description": "Claimed stream entries (with data, if `JUSTID` was not given).",
+ "type": "array",
+ "minItems": 3,
+ "maxItems": 3,
+ "items": [
+ {
+ "description": "Cursor for next call.",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "Data",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
+ {
+ "description": "Entry IDs which no longer exist in the stream, and were deleted from the PEL in which they were found.",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ }
+ }
+ ]
+ },
+ {
+ "description": "Claimed stream entries (without data, if `JUSTID` was given).",
+ "type": "array",
+ "minItems": 3,
+ "maxItems": 3,
+ "items": [
+ {
+ "description": "Cursor for next call.",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ }
+ },
+ {
+ "description": "Entry IDs which no longer exist in the stream, and were deleted from the PEL in which they were found.",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "group",
+ "type": "string"
+ },
+ {
+ "name": "consumer",
+ "type": "string"
+ },
+ {
+ "name": "min-idle-time",
+ "type": "string"
+ },
+ {
+ "name": "start",
+ "type": "string"
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "name": "justid",
+ "token": "JUSTID",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/xclaim.json b/src/commands/xclaim.json
new file mode 100644
index 0000000..5c44746
--- /dev/null
+++ b/src/commands/xclaim.json
@@ -0,0 +1,138 @@
+{
+ "XCLAIM": {
+ "summary": "Changes, or acquires, ownership of a message in a consumer group, as if the message was delivered a consumer group member.",
+ "complexity": "O(log N) with N being the number of messages in the PEL of the consumer group.",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -6,
+ "function": "xclaimCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "group",
+ "type": "string"
+ },
+ {
+ "name": "consumer",
+ "type": "string"
+ },
+ {
+ "name": "min-idle-time",
+ "type": "string"
+ },
+ {
+ "name": "ID",
+ "type": "string",
+ "multiple": true
+ },
+ {
+ "token": "IDLE",
+ "name": "ms",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "token": "TIME",
+ "name": "unix-time-milliseconds",
+ "type": "unix-time",
+ "optional": true
+ },
+ {
+ "token": "RETRYCOUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "name": "force",
+ "token": "FORCE",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "justid",
+ "token": "JUSTID",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "lastid",
+ "token": "LASTID",
+ "type": "string",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "Stream entries with IDs matching the specified range.",
+ "anyOf": [
+ {
+ "description": "If JUSTID option is specified, return just an array of IDs of messages successfully claimed",
+ "type": "array",
+ "items": {
+ "description": "Entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ }
+ },
+ {
+ "description": "array of stream entries that contains each entry as an array of 2 elements, the Entry ID and the entry data itself",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "Data",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/xdel.json b/src/commands/xdel.json
new file mode 100644
index 0000000..5cf4a70
--- /dev/null
+++ b/src/commands/xdel.json
@@ -0,0 +1,54 @@
+{
+ "XDEL": {
+ "summary": "Returns the number of messages after removing them from a stream.",
+ "complexity": "O(1) for each single item to delete in the stream, regardless of the stream size.",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -3,
+ "function": "xdelCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "ID",
+ "type": "string",
+ "multiple": true
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of entries actually deleted",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/xgroup-create.json b/src/commands/xgroup-create.json
new file mode 100644
index 0000000..6b11a1f
--- /dev/null
+++ b/src/commands/xgroup-create.json
@@ -0,0 +1,85 @@
+{
+ "CREATE": {
+ "summary": "Creates a consumer group.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -5,
+ "container": "XGROUP",
+ "function": "xgroupCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added the `entries_read` named argument."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "group",
+ "type": "string"
+ },
+ {
+ "name": "id-selector",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "id",
+ "type": "string"
+ },
+ {
+ "name": "new-id",
+ "type": "pure-token",
+ "token": "$"
+ }
+ ]
+ },
+ {
+ "token": "MKSTREAM",
+ "name": "mkstream",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "token": "ENTRIESREAD",
+ "name": "entries-read",
+ "type": "integer",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/xgroup-createconsumer.json b/src/commands/xgroup-createconsumer.json
new file mode 100644
index 0000000..2f3d6a9
--- /dev/null
+++ b/src/commands/xgroup-createconsumer.json
@@ -0,0 +1,64 @@
+{
+ "CREATECONSUMER": {
+ "summary": "Creates a consumer in a consumer group.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "6.2.0",
+ "arity": 5,
+ "container": "XGROUP",
+ "function": "xgroupCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "INSERT"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "group",
+ "type": "string"
+ },
+ {
+ "name": "consumer",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of created consumers (0 or 1)",
+ "oneOf": [
+ {
+ "const": 1
+ },
+ {
+ "const": 0
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/xgroup-delconsumer.json b/src/commands/xgroup-delconsumer.json
new file mode 100644
index 0000000..12244f8
--- /dev/null
+++ b/src/commands/xgroup-delconsumer.json
@@ -0,0 +1,57 @@
+{
+ "DELCONSUMER": {
+ "summary": "Deletes a consumer from a consumer group.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": 5,
+ "container": "XGROUP",
+ "function": "xgroupCommand",
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "group",
+ "type": "string"
+ },
+ {
+ "name": "consumer",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of pending messages that were yet associated with such a consumer",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/xgroup-destroy.json b/src/commands/xgroup-destroy.json
new file mode 100644
index 0000000..c9affbd
--- /dev/null
+++ b/src/commands/xgroup-destroy.json
@@ -0,0 +1,59 @@
+{
+ "DESTROY": {
+ "summary": "Destroys a consumer group.",
+ "complexity": "O(N) where N is the number of entries in the group's pending entries list (PEL).",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": 4,
+ "container": "XGROUP",
+ "function": "xgroupCommand",
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "group",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of destroyed consumer groups (0 or 1)",
+ "oneOf": [
+ {
+ "const": 1
+ },
+ {
+ "const": 0
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/xgroup-help.json b/src/commands/xgroup-help.json
new file mode 100644
index 0000000..3d2a738
--- /dev/null
+++ b/src/commands/xgroup-help.json
@@ -0,0 +1,25 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": 2,
+ "container": "XGROUP",
+ "function": "xgroupCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/xgroup-setid.json b/src/commands/xgroup-setid.json
new file mode 100644
index 0000000..76a3c79
--- /dev/null
+++ b/src/commands/xgroup-setid.json
@@ -0,0 +1,79 @@
+{
+ "SETID": {
+ "summary": "Sets the last-delivered ID of a consumer group.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -5,
+ "container": "XGROUP",
+ "function": "xgroupCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added the optional `entries_read` argument."
+ ]
+ ],
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "group",
+ "type": "string"
+ },
+ {
+ "name": "id-selector",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "id",
+ "type": "string"
+ },
+ {
+ "name": "new-id",
+ "type": "pure-token",
+ "token": "$"
+ }
+ ]
+ },
+ {
+ "name": "entriesread",
+ "display": "entries-read",
+ "token": "ENTRIESREAD",
+ "type": "integer",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/xgroup.json b/src/commands/xgroup.json
new file mode 100644
index 0000000..4910b7c
--- /dev/null
+++ b/src/commands/xgroup.json
@@ -0,0 +1,9 @@
+{
+ "XGROUP": {
+ "summary": "A container for consumer groups commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -2
+ }
+}
diff --git a/src/commands/xinfo-consumers.json b/src/commands/xinfo-consumers.json
new file mode 100644
index 0000000..b507e8e
--- /dev/null
+++ b/src/commands/xinfo-consumers.json
@@ -0,0 +1,80 @@
+{
+ "CONSUMERS": {
+ "summary": "Returns a list of the consumers in a consumer group.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": 4,
+ "container": "XINFO",
+ "function": "xinfoCommand",
+ "history": [
+ [
+ "7.2.0",
+ "Added the `inactive` field."
+ ]
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "group",
+ "type": "string"
+ }
+ ],
+ "reply_schema": {
+ "description": "Array list of consumers",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "pending": {
+ "type": "integer"
+ },
+ "idle": {
+ "type": "integer"
+ },
+ "inactive": {
+ "type": "integer"
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/commands/xinfo-groups.json b/src/commands/xinfo-groups.json
new file mode 100644
index 0000000..a9cbe8e
--- /dev/null
+++ b/src/commands/xinfo-groups.json
@@ -0,0 +1,92 @@
+{
+ "GROUPS": {
+ "summary": "Returns a list of the consumer groups of a stream.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": 3,
+ "container": "XINFO",
+ "history": [
+ [
+ "7.0.0",
+ "Added the `entries-read` and `lag` fields"
+ ]
+ ],
+ "function": "xinfoCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "consumers": {
+ "type": "integer"
+ },
+ "pending": {
+ "type": "integer"
+ },
+ "last-delivered-id": {
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ "entries-read": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "integer"
+ }
+ ]
+ },
+ "lag": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "integer"
+ }
+ ]
+ }
+ }
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/xinfo-help.json b/src/commands/xinfo-help.json
new file mode 100644
index 0000000..d4cbe3d
--- /dev/null
+++ b/src/commands/xinfo-help.json
@@ -0,0 +1,25 @@
+{
+ "HELP": {
+ "summary": "Returns helpful text about the different subcommands.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": 2,
+ "container": "XINFO",
+ "function": "xinfoCommand",
+ "command_flags": [
+ "LOADING",
+ "STALE"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "Helpful text about subcommands.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/src/commands/xinfo-stream.json b/src/commands/xinfo-stream.json
new file mode 100644
index 0000000..018826f
--- /dev/null
+++ b/src/commands/xinfo-stream.json
@@ -0,0 +1,361 @@
+{
+ "STREAM": {
+ "summary": "Returns information about a stream.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -3,
+ "container": "XINFO",
+ "history": [
+ [
+ "6.0.0",
+ "Added the `FULL` modifier."
+ ],
+ [
+ "7.0.0",
+ "Added the `max-deleted-entry-id`, `entries-added`, `recorded-first-entry-id`, `entries-read` and `lag` fields"
+ ],
+ [
+ "7.2.0",
+ "Added the `active-time` field, and changed the meaning of `seen-time`."
+ ]
+ ],
+ "function": "xinfoCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "Summary form, in case `FULL` was not given.",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "length": {
+ "description": "the number of entries in the stream (see `XLEN`)",
+ "type": "integer"
+ },
+ "radix-tree-keys": {
+ "description": "the number of keys in the underlying radix data structure",
+ "type": "integer"
+ },
+ "radix-tree-nodes": {
+ "description": "the number of nodes in the underlying radix data structure",
+ "type": "integer"
+ },
+ "last-generated-id": {
+ "description": "the ID of the least-recently entry that was added to the stream",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ "max-deleted-entry-id": {
+ "description": "the maximal entry ID that was deleted from the stream",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ "recorded-first-entry-id": {
+ "description": "cached copy of the first entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ "entries-added": {
+ "description": "the count of all entries added to the stream during its lifetime",
+ "type": "integer"
+ },
+ "groups": {
+ "description": "the number of consumer groups defined for the stream",
+ "type": "integer"
+ },
+ "first-entry": {
+ "description": "the first entry of the stream",
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "data",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "last-entry": {
+ "description": "the last entry of the stream",
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "data",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ }
+ },
+ {
+ "description": "Extended form, in case `FULL` was given.",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "length": {
+ "description": "the number of entries in the stream (see `XLEN`)",
+ "type": "integer"
+ },
+ "radix-tree-keys": {
+ "description": "the number of keys in the underlying radix data structure",
+ "type": "integer"
+ },
+ "radix-tree-nodes": {
+ "description": "the number of nodes in the underlying radix data structure",
+ "type": "integer"
+ },
+ "last-generated-id": {
+ "description": "the ID of the least-recently entry that was added to the stream",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ "max-deleted-entry-id": {
+ "description": "the maximal entry ID that was deleted from the stream",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ "recorded-first-entry-id": {
+ "description": "cached copy of the first entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ "entries-added": {
+ "description": "the count of all entries added to the stream during its lifetime",
+ "type": "integer"
+ },
+ "entries": {
+ "description": "all the entries of the stream",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "data",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
+ "groups": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "name": {
+ "description": "group name",
+ "type": "string"
+ },
+ "last-delivered-id": {
+ "description": "last entry ID that was delivered to a consumer",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ "entries-read": {
+ "description": "total number of entries ever read by consumers in the group",
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "integer"
+ }
+ ]
+ },
+ "lag": {
+ "description": "number of entries left to be consumed from the stream",
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "integer"
+ }
+ ]
+ },
+ "pel-count": {
+ "description": "total number of unacknowledged entries",
+ "type": "integer"
+ },
+ "pending": {
+ "description": "data about all of the unacknowledged entries",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 4,
+ "maxItems": 4,
+ "items": [
+ {
+ "description": "Entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "Consumer name",
+ "type": "string"
+ },
+ {
+ "description": "Delivery timestamp",
+ "type": "integer"
+ },
+ {
+ "description": "Delivery count",
+ "type": "integer"
+ }
+ ]
+ }
+ },
+ "consumers": {
+ "description": "data about all of the consumers of the group",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "active-time": {
+ "type": "integer",
+ "description": "Last time this consumer was active (successful reading/claiming).",
+ "minimum": 0
+ },
+ "name": {
+ "description": "consumer name",
+ "type": "string"
+ },
+ "seen-time": {
+ "description": "timestamp of the last interaction attempt of the consumer",
+ "type": "integer"
+ },
+ "pel-count": {
+ "description": "number of unacknowledged entries that belong to the consumer",
+ "type": "integer"
+ },
+ "pending": {
+ "description": "data about the unacknowledged entries",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 3,
+ "maxItems": 3,
+ "items": [
+ {
+ "description": "Entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "Delivery timestamp",
+ "type": "integer"
+ },
+ {
+ "description": "Delivery count",
+ "type": "integer"
+ }
+ ]
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "full-block",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "full",
+ "token": "FULL",
+ "type": "pure-token"
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/xinfo.json b/src/commands/xinfo.json
new file mode 100644
index 0000000..cc85bf1
--- /dev/null
+++ b/src/commands/xinfo.json
@@ -0,0 +1,9 @@
+{
+ "XINFO": {
+ "summary": "A container for stream introspection commands.",
+ "complexity": "Depends on subcommand.",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -2
+ }
+}
diff --git a/src/commands/xlen.json b/src/commands/xlen.json
new file mode 100644
index 0000000..16ce72c
--- /dev/null
+++ b/src/commands/xlen.json
@@ -0,0 +1,48 @@
+{
+ "XLEN": {
+ "summary": "Return the number of messages in a stream.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": 2,
+ "function": "xlenCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of entries of the stream at key",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/xpending.json b/src/commands/xpending.json
new file mode 100644
index 0000000..a6df801
--- /dev/null
+++ b/src/commands/xpending.json
@@ -0,0 +1,160 @@
+{
+ "XPENDING": {
+ "summary": "Returns the information and entries from a stream consumer group's pending entries list.",
+ "complexity": "O(N) with N being the number of elements returned, so asking for a small fixed number of entries per call is O(1). O(M), where M is the total number of entries scanned when used with the IDLE filter. When the command returns just the summary and the list of consumers is small, it runs in O(1) time; otherwise, an additional O(N) time for iterating every consumer.",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -3,
+ "function": "xpendingCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added the `IDLE` option and exclusive range intervals."
+ ]
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "Extended form, in case `start` was given.",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 4,
+ "maxItems": 4,
+ "items": [
+ {
+ "description": "Entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "Consumer name",
+ "type": "string"
+ },
+ {
+ "description": "Idle time",
+ "type": "integer"
+ },
+ {
+ "description": "Delivery count",
+ "type": "integer"
+ }
+ ]
+ }
+ },
+ {
+ "description": "Summary form, in case `start` was not given.",
+ "type": "array",
+ "minItems": 4,
+ "maxItems": 4,
+ "items": [
+ {
+ "description": "Total number of pending messages",
+ "type": "integer"
+ },
+ {
+ "description": "Minimal pending entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "Maximal pending entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "Consumers with pending messages",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Consumer name",
+ "type": "string"
+ },
+ {
+ "description": "Number of pending messages",
+ "type": "string"
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "group",
+ "type": "string"
+ },
+ {
+ "name": "filters",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "token": "IDLE",
+ "name": "min-idle-time",
+ "type": "integer",
+ "optional": true,
+ "since": "6.2.0"
+ },
+ {
+ "name": "start",
+ "type": "string"
+ },
+ {
+ "name": "end",
+ "type": "string"
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "consumer",
+ "type": "string",
+ "optional": true
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/xrange.json b/src/commands/xrange.json
new file mode 100644
index 0000000..edfe2cc
--- /dev/null
+++ b/src/commands/xrange.json
@@ -0,0 +1,87 @@
+{
+ "XRANGE": {
+ "summary": "Returns the messages from a stream within a range of IDs.",
+ "complexity": "O(N) with N being the number of elements being returned. If N is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1).",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -4,
+ "function": "xrangeCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added exclusive ranges."
+ ]
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Stream entries with IDs matching the specified range.",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Entry ID",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "Data",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "start",
+ "type": "string"
+ },
+ {
+ "name": "end",
+ "type": "string"
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/xread.json b/src/commands/xread.json
new file mode 100644
index 0000000..3a78ffb
--- /dev/null
+++ b/src/commands/xread.json
@@ -0,0 +1,108 @@
+{
+ "XREAD": {
+ "summary": "Returns messages from multiple streams with IDs greater than the ones requested. Blocks until a message is available otherwise.",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -4,
+ "function": "xreadCommand",
+ "get_keys_function": "xreadGetKeys",
+ "command_flags": [
+ "BLOCKING",
+ "READONLY",
+ "BLOCKING"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "keyword": {
+ "keyword": "STREAMS",
+ "startfrom": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 2
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "token": "BLOCK",
+ "name": "milliseconds",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "name": "streams",
+ "token": "STREAMS",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "ID",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "A map of key-value elements when each element composed of key name and the entries reported for that key",
+ "type": "object",
+ "patternProperties": {
+ "^.*$": {
+ "description": "The entries reported for that key",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "entry id",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "array of field-value pairs",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "description": "If BLOCK option is given, and a timeout occurs, or there is no stream we can serve",
+ "type": "null"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/xreadgroup.json b/src/commands/xreadgroup.json
new file mode 100644
index 0000000..93e45a8
--- /dev/null
+++ b/src/commands/xreadgroup.json
@@ -0,0 +1,134 @@
+{
+ "XREADGROUP": {
+ "summary": "Returns new or historical messages from a stream for a consumer in a group. Blocks until a message is available otherwise.",
+ "complexity": "For each stream mentioned: O(M) with M being the number of elements returned. If M is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1). On the other side when XREADGROUP blocks, XADD will pay the O(N) time in order to serve the N clients blocked on the stream getting new data.",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -7,
+ "function": "xreadCommand",
+ "get_keys_function": "xreadGetKeys",
+ "command_flags": [
+ "BLOCKING",
+ "WRITE"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "keyword": {
+ "keyword": "STREAMS",
+ "startfrom": 4
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": -1,
+ "step": 1,
+ "limit": 2
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "token": "GROUP",
+ "name": "group-block",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "group",
+ "type": "string"
+ },
+ {
+ "name": "consumer",
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "token": "BLOCK",
+ "name": "milliseconds",
+ "type": "integer",
+ "optional": true
+ },
+ {
+ "name": "noack",
+ "token": "NOACK",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "name": "streams",
+ "token": "STREAMS",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "ID",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "If BLOCK option is specified and the timeout expired",
+ "type": "null"
+ },
+ {
+ "description": "A map of key-value elements when each element composed of key name and the entries reported for that key",
+ "type": "object",
+ "additionalProperties": {
+ "description": "The entries reported for that key",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Stream id",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "oneOf": [
+ {
+ "description": "Array of field-value pairs",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/xrevrange.json b/src/commands/xrevrange.json
new file mode 100644
index 0000000..a0c3e4f
--- /dev/null
+++ b/src/commands/xrevrange.json
@@ -0,0 +1,86 @@
+{
+ "XREVRANGE": {
+ "summary": "Returns the messages from a stream within a range of IDs in reverse order.",
+ "complexity": "O(N) with N being the number of elements returned. If N is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1).",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -4,
+ "function": "xrevrangeCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added exclusive ranges."
+ ]
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "end",
+ "type": "string"
+ },
+ {
+ "name": "start",
+ "type": "string"
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "An array of the entries with IDs matching the specified range",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Stream id",
+ "type": "string",
+ "pattern": "[0-9]+-[0-9]+"
+ },
+ {
+ "description": "Array of field-value pairs",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ }
+ }
+}
diff --git a/src/commands/xsetid.json b/src/commands/xsetid.json
new file mode 100644
index 0000000..4607037
--- /dev/null
+++ b/src/commands/xsetid.json
@@ -0,0 +1,72 @@
+{
+ "XSETID": {
+ "summary": "An internal command for replicating stream values.",
+ "complexity": "O(1)",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -3,
+ "function": "xsetidCommand",
+ "history": [
+ [
+ "7.0.0",
+ "Added the `entries_added` and `max_deleted_entry_id` arguments."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "last-id",
+ "type": "string"
+ },
+ {
+ "name": "entries-added",
+ "token": "ENTRIESADDED",
+ "type": "integer",
+ "optional": true,
+ "since": "7.0.0"
+ },
+ {
+ "name": "max-deleted-id",
+ "token": "MAXDELETEDID",
+ "type": "string",
+ "optional": true,
+ "since": "7.0.0"
+ }
+ ],
+ "reply_schema": {
+ "const": "OK"
+ }
+ }
+}
diff --git a/src/commands/xtrim.json b/src/commands/xtrim.json
new file mode 100644
index 0000000..0b79cd4
--- /dev/null
+++ b/src/commands/xtrim.json
@@ -0,0 +1,108 @@
+{
+ "XTRIM": {
+ "summary": "Deletes messages from the beginning of a stream.",
+ "complexity": "O(N), with N being the number of evicted entries. Constant times are very small however, since entries are organized in macro nodes containing multiple entries that can be released with a single deallocation.",
+ "group": "stream",
+ "since": "5.0.0",
+ "arity": -4,
+ "function": "xtrimCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added the `MINID` trimming strategy and the `LIMIT` option."
+ ]
+ ],
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "STREAM"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "trim",
+ "type": "block",
+ "arguments": [
+ {
+ "name": "strategy",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "maxlen",
+ "type": "pure-token",
+ "token": "MAXLEN"
+ },
+ {
+ "name": "minid",
+ "type": "pure-token",
+ "token": "MINID",
+ "since": "6.2.0"
+ }
+ ]
+ },
+ {
+ "name": "operator",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "equal",
+ "type": "pure-token",
+ "token": "="
+ },
+ {
+ "name": "approximately",
+ "type": "pure-token",
+ "token": "~"
+ }
+ ]
+ },
+ {
+ "name": "threshold",
+ "type": "string"
+ },
+ {
+ "token": "LIMIT",
+ "name": "count",
+ "type": "integer",
+ "optional": true,
+ "since": "6.2.0"
+ }
+ ]
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of entries deleted from the stream.",
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+}
diff --git a/src/commands/zadd.json b/src/commands/zadd.json
new file mode 100644
index 0000000..d489ee4
--- /dev/null
+++ b/src/commands/zadd.json
@@ -0,0 +1,144 @@
+{
+ "ZADD": {
+ "summary": "Adds one or more members to a sorted set, or updates their scores. Creates the key if it doesn't exist.",
+ "complexity": "O(log(N)) for each item added, where N is the number of elements in the sorted set.",
+ "group": "sorted_set",
+ "since": "1.2.0",
+ "arity": -4,
+ "function": "zaddCommand",
+ "history": [
+ [
+ "2.4.0",
+ "Accepts multiple elements."
+ ],
+ [
+ "3.0.2",
+ "Added the `XX`, `NX`, `CH` and `INCR` options."
+ ],
+ [
+ "6.2.0",
+ "Added the `GT` and `LT` options."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf":[
+ {
+ "description": "Operation was aborted (conflict with one of the `XX`/`NX`/`LT`/`GT` options).",
+ "type": "null"
+ },
+ {
+ "description": "The number of new members (when the `CH` option is not used)",
+ "type": "integer"
+ },
+ {
+ "description": "The number of new or updated members (when the `CH` option is used)",
+ "type": "integer"
+ },
+ {
+ "description": "The updated score of the member (when the `INCR` option is used)",
+ "type": "number"
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "condition",
+ "type": "oneof",
+ "optional": true,
+ "since": "3.0.2",
+ "arguments": [
+ {
+ "name": "nx",
+ "type": "pure-token",
+ "token": "NX"
+ },
+ {
+ "name": "xx",
+ "type": "pure-token",
+ "token": "XX"
+ }
+ ]
+ },
+ {
+ "name": "comparison",
+ "type": "oneof",
+ "optional": true,
+ "since": "6.2.0",
+ "arguments": [
+ {
+ "name": "gt",
+ "type": "pure-token",
+ "token": "GT"
+ },
+ {
+ "name": "lt",
+ "type": "pure-token",
+ "token": "LT"
+ }
+ ]
+ },
+ {
+ "name": "change",
+ "token": "CH",
+ "type": "pure-token",
+ "optional": true,
+ "since": "3.0.2"
+ },
+ {
+ "name": "increment",
+ "token": "INCR",
+ "type": "pure-token",
+ "optional": true,
+ "since": "3.0.2"
+ },
+ {
+ "name": "data",
+ "type": "block",
+ "multiple": true,
+ "arguments": [
+ {
+ "name": "score",
+ "type": "double"
+ },
+ {
+ "name": "member",
+ "type": "string"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/zcard.json b/src/commands/zcard.json
new file mode 100644
index 0000000..58683a4
--- /dev/null
+++ b/src/commands/zcard.json
@@ -0,0 +1,47 @@
+{
+ "ZCARD": {
+ "summary": "Returns the number of members in a sorted set.",
+ "complexity": "O(1)",
+ "group": "sorted_set",
+ "since": "1.2.0",
+ "arity": 2,
+ "function": "zcardCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The cardinality (number of elements) of the sorted set, or 0 if key does not exist",
+ "type": "integer"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ }
+ ]
+ }
+}
diff --git a/src/commands/zcount.json b/src/commands/zcount.json
new file mode 100644
index 0000000..0fdebd7
--- /dev/null
+++ b/src/commands/zcount.json
@@ -0,0 +1,56 @@
+{
+ "ZCOUNT": {
+ "summary": "Returns the count of members in a sorted set that have scores within a range.",
+ "complexity": "O(log(N)) with N being the number of elements in the sorted set.",
+ "group": "sorted_set",
+ "since": "2.0.0",
+ "arity": 4,
+ "function": "zcountCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of elements in the specified score range",
+ "type": "integer"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "min",
+ "type": "double"
+ },
+ {
+ "name": "max",
+ "type": "double"
+ }
+ ]
+ }
+}
diff --git a/src/commands/zdiff.json b/src/commands/zdiff.json
new file mode 100644
index 0000000..912d5c6
--- /dev/null
+++ b/src/commands/zdiff.json
@@ -0,0 +1,85 @@
+{
+ "ZDIFF": {
+ "summary": "Returns the difference between multiple sorted sets.",
+ "complexity": "O(L + (N-K)log(N)) worst case where L is the total number of elements in all the sets, N is the size of the first set, and K is the size of the result set.",
+ "group": "sorted_set",
+ "since": "6.2.0",
+ "arity": -3,
+ "function": "zdiffCommand",
+ "get_keys_function": "zunionInterDiffGetKeys",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "description": "A list of members. Returned in case `WITHSCORES` was not used.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "description": "Members and their scores. Returned in case `WITHSCORES` was used. In RESP2 this is returned as a flat array",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Member",
+ "type": "string"
+ },
+ {
+ "description": "Score",
+ "type": "number"
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "withscores",
+ "token": "WITHSCORES",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zdiffstore.json b/src/commands/zdiffstore.json
new file mode 100644
index 0000000..35b7203
--- /dev/null
+++ b/src/commands/zdiffstore.json
@@ -0,0 +1,77 @@
+{
+ "ZDIFFSTORE": {
+ "summary": "Stores the difference of multiple sorted sets in a key.",
+ "complexity": "O(L + (N-K)log(N)) worst case where L is the total number of elements in all the sets, N is the size of the first set, and K is the size of the result set.",
+ "group": "sorted_set",
+ "since": "6.2.0",
+ "arity": -4,
+ "function": "zdiffstoreCommand",
+ "get_keys_function": "zunionInterDiffStoreGetKeys",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Number of elements in the resulting sorted set at `destination`",
+ "type": "integer"
+ },
+ "arguments": [
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 1,
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zincrby.json b/src/commands/zincrby.json
new file mode 100644
index 0000000..6830883
--- /dev/null
+++ b/src/commands/zincrby.json
@@ -0,0 +1,58 @@
+{
+ "ZINCRBY": {
+ "summary": "Increments the score of a member in a sorted set.",
+ "complexity": "O(log(N)) where N is the number of elements in the sorted set.",
+ "group": "sorted_set",
+ "since": "1.2.0",
+ "arity": 4,
+ "function": "zincrbyCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The new score of `member`",
+ "type": "number"
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "increment",
+ "type": "integer"
+ },
+ {
+ "name": "member",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/zinter.json b/src/commands/zinter.json
new file mode 100644
index 0000000..4828e21
--- /dev/null
+++ b/src/commands/zinter.json
@@ -0,0 +1,115 @@
+{
+ "ZINTER": {
+ "summary": "Returns the intersect of multiple sorted sets.",
+ "complexity": "O(N*K)+O(M*log(M)) worst case with N being the smallest input sorted set, K being the number of input sorted sets and M being the number of elements in the resulting sorted set.",
+ "group": "sorted_set",
+ "since": "6.2.0",
+ "arity": -3,
+ "function": "zinterCommand",
+ "get_keys_function": "zunionInterDiffGetKeys",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "description": "Result of intersection, containing only the member names. Returned in case `WITHSCORES` was not used.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "description": "Result of intersection, containing members and their scores. Returned in case `WITHSCORES` was used. In RESP2 this is returned as a flat array",
+ "type": "array",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Member",
+ "type": "string"
+ },
+ {
+ "description": "Score",
+ "type": "number"
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "token": "WEIGHTS",
+ "name": "weight",
+ "type": "integer",
+ "optional": true,
+ "multiple": true
+ },
+ {
+ "token": "AGGREGATE",
+ "name": "aggregate",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "sum",
+ "type": "pure-token",
+ "token": "SUM"
+ },
+ {
+ "name": "min",
+ "type": "pure-token",
+ "token": "MIN"
+ },
+ {
+ "name": "max",
+ "type": "pure-token",
+ "token": "MAX"
+ }
+ ]
+ },
+ {
+ "name": "withscores",
+ "token": "WITHSCORES",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zintercard.json b/src/commands/zintercard.json
new file mode 100644
index 0000000..7fdab3e
--- /dev/null
+++ b/src/commands/zintercard.json
@@ -0,0 +1,60 @@
+{
+ "ZINTERCARD": {
+ "summary": "Returns the number of members of the intersect of multiple sorted sets.",
+ "complexity": "O(N*K) worst case with N being the smallest input sorted set, K being the number of input sorted sets.",
+ "group": "sorted_set",
+ "since": "7.0.0",
+ "arity": -3,
+ "function": "zinterCardCommand",
+ "get_keys_function": "zunionInterDiffGetKeys",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Number of elements in the resulting intersection.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "token": "LIMIT",
+ "name": "limit",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zinterstore.json b/src/commands/zinterstore.json
new file mode 100644
index 0000000..5bd940c
--- /dev/null
+++ b/src/commands/zinterstore.json
@@ -0,0 +1,108 @@
+{
+ "ZINTERSTORE": {
+ "summary": "Stores the intersect of multiple sorted sets in a key.",
+ "complexity": "O(N*K)+O(M*log(M)) worst case with N being the smallest input sorted set, K being the number of input sorted sets and M being the number of elements in the resulting sorted set.",
+ "group": "sorted_set",
+ "since": "2.0.0",
+ "arity": -4,
+ "function": "zinterstoreCommand",
+ "get_keys_function": "zunionInterDiffStoreGetKeys",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Number of elements in the resulting sorted set.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 1,
+ "multiple": true
+ },
+ {
+ "token": "WEIGHTS",
+ "name": "weight",
+ "type": "integer",
+ "optional": true,
+ "multiple": true
+ },
+ {
+ "token": "AGGREGATE",
+ "name": "aggregate",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "sum",
+ "type": "pure-token",
+ "token": "SUM"
+ },
+ {
+ "name": "min",
+ "type": "pure-token",
+ "token": "MIN"
+ },
+ {
+ "name": "max",
+ "type": "pure-token",
+ "token": "MAX"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/zlexcount.json b/src/commands/zlexcount.json
new file mode 100644
index 0000000..8bf2884
--- /dev/null
+++ b/src/commands/zlexcount.json
@@ -0,0 +1,57 @@
+{
+ "ZLEXCOUNT": {
+ "summary": "Returns the number of members in a sorted set within a lexicographical range.",
+ "complexity": "O(log(N)) with N being the number of elements in the sorted set.",
+ "group": "sorted_set",
+ "since": "2.8.9",
+ "arity": 4,
+ "function": "zlexcountCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "Number of elements in the specified score range.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "min",
+ "type": "string"
+ },
+ {
+ "name": "max",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/zmpop.json b/src/commands/zmpop.json
new file mode 100644
index 0000000..86dc3ba
--- /dev/null
+++ b/src/commands/zmpop.json
@@ -0,0 +1,111 @@
+{
+ "ZMPOP": {
+ "summary": "Returns the highest- or lowest-scoring members from one or more sorted sets after removing them. Deletes the sorted set if the last member was popped.",
+ "complexity": "O(K) + O(M*log(N)) where K is the number of provided keys, N being the number of elements in the sorted set, and M being the number of elements popped.",
+ "group": "sorted_set",
+ "since": "7.0.0",
+ "arity": -4,
+ "function": "zmpopCommand",
+ "get_keys_function": "zmpopGetKeys",
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "description": "No element could be popped.",
+ "type": "null"
+ },
+ {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string",
+ "description": "Name of the key that elements were popped."
+ },
+ {
+ "type": "array",
+ "description": "Popped elements.",
+ "items": {
+ "type": "array",
+ "uniqueItems": true,
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string",
+ "description": "Name of the member."
+ },
+ {
+ "type": "number",
+ "description": "Score."
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "name": "where",
+ "type": "oneof",
+ "arguments": [
+ {
+ "name": "min",
+ "type": "pure-token",
+ "token": "MIN"
+ },
+ {
+ "name": "max",
+ "type": "pure-token",
+ "token": "MAX"
+ }
+ ]
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zmscore.json b/src/commands/zmscore.json
new file mode 100644
index 0000000..6a036fe
--- /dev/null
+++ b/src/commands/zmscore.json
@@ -0,0 +1,65 @@
+{
+ "ZMSCORE": {
+ "summary": "Returns the score of one or more members in a sorted set.",
+ "complexity": "O(N) where N is the number of members being requested.",
+ "group": "sorted_set",
+ "since": "6.2.0",
+ "arity": -3,
+ "function": "zmscoreCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "oneOf": [
+ {
+ "type": "number",
+ "description": "The score of the member (a double precision floating point number). In RESP2, this is returned as string."
+ },
+ {
+ "type": "null",
+ "description": "Member does not exist in the sorted set."
+ }
+ ]
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zpopmax.json b/src/commands/zpopmax.json
new file mode 100644
index 0000000..56d86bf
--- /dev/null
+++ b/src/commands/zpopmax.json
@@ -0,0 +1,89 @@
+{
+ "ZPOPMAX": {
+ "summary": "Returns the highest-scoring members from a sorted set after removing them. Deletes the sorted set if the last member was popped.",
+ "complexity": "O(log(N)*M) with N being the number of elements in the sorted set, and M being the number of elements popped.",
+ "group": "sorted_set",
+ "since": "5.0.0",
+ "arity": -2,
+ "function": "zpopmaxCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "type": "array",
+ "description": "List of popped elements and scores when 'COUNT' isn't specified.",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string",
+ "description": "Popped element."
+ },
+ {
+ "type": "number",
+ "description": "Score."
+ }
+ ]
+ },
+ {
+ "type": "array",
+ "description": "List of popped elements and scores when 'COUNT' is specified.",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string",
+ "description": "Popped element."
+ },
+ {
+ "type": "number",
+ "description": "Score."
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zpopmin.json b/src/commands/zpopmin.json
new file mode 100644
index 0000000..3fe36f3
--- /dev/null
+++ b/src/commands/zpopmin.json
@@ -0,0 +1,89 @@
+{
+ "ZPOPMIN": {
+ "summary": "Returns the lowest-scoring members from a sorted set after removing them. Deletes the sorted set if the last member was popped.",
+ "complexity": "O(log(N)*M) with N being the number of elements in the sorted set, and M being the number of elements popped.",
+ "group": "sorted_set",
+ "since": "5.0.0",
+ "arity": -2,
+ "function": "zpopminCommand",
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "ACCESS",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "type": "array",
+ "description": "List of popped elements and scores when 'COUNT' isn't specified.",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string",
+ "description": "Popped element."
+ },
+ {
+ "type": "number",
+ "description": "Score."
+ }
+ ]
+ },
+ {
+ "type": "array",
+ "description": "List of popped elements and scores when 'COUNT' is specified.",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string",
+ "description": "Popped element."
+ },
+ {
+ "type": "number",
+ "description": "Score."
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrandmember.json b/src/commands/zrandmember.json
new file mode 100644
index 0000000..13abc9a
--- /dev/null
+++ b/src/commands/zrandmember.json
@@ -0,0 +1,101 @@
+{
+ "ZRANDMEMBER": {
+ "summary": "Returns one or more random members from a sorted set.",
+ "complexity": "O(N) where N is the number of members returned",
+ "group": "sorted_set",
+ "since": "6.2.0",
+ "arity": -2,
+ "function": "zrandmemberCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "type": "null",
+ "description": "Key does not exist."
+ },
+ {
+ "type": "string",
+ "description": "Randomly selected element when 'COUNT' is not used."
+ },
+ {
+ "type": "array",
+ "description": "Randomly selected elements when 'COUNT' is used.",
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "type": "array",
+ "description": "Randomly selected elements when 'COUNT' and 'WITHSCORES' modifiers are used.",
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string",
+ "description": "Element."
+ },
+ {
+ "type": "number",
+ "description": "Score."
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "options",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "count",
+ "type": "integer"
+ },
+ {
+ "name": "withscores",
+ "token": "WITHSCORES",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrange.json b/src/commands/zrange.json
new file mode 100644
index 0000000..dc7af8d
--- /dev/null
+++ b/src/commands/zrange.json
@@ -0,0 +1,137 @@
+{
+ "ZRANGE": {
+ "summary": "Returns members in a sorted set within a range of indexes.",
+ "complexity": "O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements returned.",
+ "group": "sorted_set",
+ "since": "1.2.0",
+ "arity": -4,
+ "function": "zrangeCommand",
+ "history": [
+ [
+ "6.2.0",
+ "Added the `REV`, `BYSCORE`, `BYLEX` and `LIMIT` options."
+ ]
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "description": "A list of member elements",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "description": "Members and their scores. Returned in case `WITHSCORES` was used. In RESP2 this is returned as a flat array",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "Member",
+ "type": "string"
+ },
+ {
+ "description": "Score",
+ "type": "number"
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "start",
+ "type": "string"
+ },
+ {
+ "name": "stop",
+ "type": "string"
+ },
+ {
+ "name": "sortby",
+ "type": "oneof",
+ "optional": true,
+ "since": "6.2.0",
+ "arguments": [
+ {
+ "name": "byscore",
+ "type": "pure-token",
+ "token": "BYSCORE"
+ },
+ {
+ "name": "bylex",
+ "type": "pure-token",
+ "token": "BYLEX"
+ }
+ ]
+ },
+ {
+ "name": "rev",
+ "token": "REV",
+ "type": "pure-token",
+ "optional": true,
+ "since": "6.2.0"
+ },
+ {
+ "token": "LIMIT",
+ "name": "limit",
+ "type": "block",
+ "optional": true,
+ "since": "6.2.0",
+ "arguments": [
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ }
+ ]
+ },
+ {
+ "name": "withscores",
+ "token": "WITHSCORES",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrangebylex.json b/src/commands/zrangebylex.json
new file mode 100644
index 0000000..5949b87
--- /dev/null
+++ b/src/commands/zrangebylex.json
@@ -0,0 +1,80 @@
+{
+ "ZRANGEBYLEX": {
+ "summary": "Returns members in a sorted set within a lexicographical range.",
+ "complexity": "O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements being returned. If M is constant (e.g. always asking for the first 10 elements with LIMIT), you can consider it O(log(N)).",
+ "group": "sorted_set",
+ "since": "2.8.9",
+ "arity": -4,
+ "function": "zrangebylexCommand",
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`ZRANGE` with the `BYLEX` argument",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List of elements in the specified score range.",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "min",
+ "type": "string"
+ },
+ {
+ "name": "max",
+ "type": "string"
+ },
+ {
+ "token": "LIMIT",
+ "name": "limit",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrangebyscore.json b/src/commands/zrangebyscore.json
new file mode 100644
index 0000000..557ef1d
--- /dev/null
+++ b/src/commands/zrangebyscore.json
@@ -0,0 +1,119 @@
+{
+ "ZRANGEBYSCORE": {
+ "summary": "Returns members in a sorted set within a range of scores.",
+ "complexity": "O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements being returned. If M is constant (e.g. always asking for the first 10 elements with LIMIT), you can consider it O(log(N)).",
+ "group": "sorted_set",
+ "since": "1.0.5",
+ "arity": -4,
+ "function": "zrangebyscoreCommand",
+ "history": [
+ [
+ "2.0.0",
+ "Added the `WITHSCORES` modifier."
+ ]
+ ],
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`ZRANGE` with the `BYSCORE` argument",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "type": "array",
+ "description": "List of the elements in the specified score range, as not WITHSCORES",
+ "uniqueItems": true,
+ "items": {
+ "type": "string",
+ "description": "Element"
+ }
+ },
+ {
+ "type": "array",
+ "description": "List of the elements and their scores in the specified score range, as WITHSCORES used",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "description": "Tuple of element and its score",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "element",
+ "type": "string"
+ },
+ {
+ "description": "score",
+ "type": "number"
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "min",
+ "type": "double"
+ },
+ {
+ "name": "max",
+ "type": "double"
+ },
+ {
+ "name": "withscores",
+ "token": "WITHSCORES",
+ "type": "pure-token",
+ "optional": true,
+ "since": "2.0.0"
+ },
+ {
+ "token": "LIMIT",
+ "name": "limit",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrangestore.json b/src/commands/zrangestore.json
new file mode 100644
index 0000000..8eeaf74
--- /dev/null
+++ b/src/commands/zrangestore.json
@@ -0,0 +1,118 @@
+{
+ "ZRANGESTORE": {
+ "summary": "Stores a range of members from sorted set in a key.",
+ "complexity": "O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements stored into the destination key.",
+ "group": "sorted_set",
+ "since": "6.2.0",
+ "arity": -5,
+ "function": "zrangestoreCommand",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "Number of elements in the resulting sorted set."
+ },
+ "arguments": [
+ {
+ "name": "dst",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "src",
+ "type": "key",
+ "key_spec_index": 1
+ },
+ {
+ "name": "min",
+ "type": "string"
+ },
+ {
+ "name": "max",
+ "type": "string"
+ },
+ {
+ "name": "sortby",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "byscore",
+ "type": "pure-token",
+ "token": "BYSCORE"
+ },
+ {
+ "name": "bylex",
+ "type": "pure-token",
+ "token": "BYLEX"
+ }
+ ]
+ },
+ {
+ "name": "rev",
+ "token": "REV",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "token": "LIMIT",
+ "name": "limit",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrank.json b/src/commands/zrank.json
new file mode 100644
index 0000000..f5f427c
--- /dev/null
+++ b/src/commands/zrank.json
@@ -0,0 +1,86 @@
+{
+ "ZRANK": {
+ "summary": "Returns the index of a member in a sorted set ordered by ascending scores.",
+ "complexity": "O(log(N))",
+ "group": "sorted_set",
+ "since": "2.0.0",
+ "arity": -3,
+ "function": "zrankCommand",
+ "history": [
+ [
+ "7.2.0",
+ "Added the optional `WITHSCORE` argument."
+ ]
+ ],
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "null",
+ "description": "Key does not exist or the member does not exist in the sorted set."
+ },
+ {
+ "type": "integer",
+ "description": "The rank of the member when 'WITHSCORE' is not used."
+ },
+ {
+ "type": "array",
+ "description": "The rank and score of the member when 'WITHSCORE' is used.",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "integer"
+ },
+ {
+ "type": "number"
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string"
+ },
+ {
+ "name": "withscore",
+ "token": "WITHSCORE",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrem.json b/src/commands/zrem.json
new file mode 100644
index 0000000..8766124
--- /dev/null
+++ b/src/commands/zrem.json
@@ -0,0 +1,60 @@
+{
+ "ZREM": {
+ "summary": "Removes one or more members from a sorted set. Deletes the sorted set if all members were removed.",
+ "complexity": "O(M*log(N)) with N being the number of elements in the sorted set and M the number of elements to be removed.",
+ "group": "sorted_set",
+ "since": "1.2.0",
+ "arity": -3,
+ "function": "zremCommand",
+ "history": [
+ [
+ "2.4.0",
+ "Accepts multiple elements."
+ ]
+ ],
+ "command_flags": [
+ "WRITE",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of members removed from the sorted set, not including non existing members.",
+ "type": "integer",
+ "minimum": 0
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string",
+ "multiple": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zremrangebylex.json b/src/commands/zremrangebylex.json
new file mode 100644
index 0000000..169472c
--- /dev/null
+++ b/src/commands/zremrangebylex.json
@@ -0,0 +1,55 @@
+{
+ "ZREMRANGEBYLEX": {
+ "summary": "Removes members in a sorted set within a lexicographical range. Deletes the sorted set if all members were removed.",
+ "complexity": "O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements removed by the operation.",
+ "group": "sorted_set",
+ "since": "2.8.9",
+ "arity": 4,
+ "function": "zremrangebylexCommand",
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "Number of elements removed."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "min",
+ "type": "string"
+ },
+ {
+ "name": "max",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/zremrangebyrank.json b/src/commands/zremrangebyrank.json
new file mode 100644
index 0000000..7e668e8
--- /dev/null
+++ b/src/commands/zremrangebyrank.json
@@ -0,0 +1,55 @@
+{
+ "ZREMRANGEBYRANK": {
+ "summary": "Removes members in a sorted set within a range of indexes. Deletes the sorted set if all members were removed.",
+ "complexity": "O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements removed by the operation.",
+ "group": "sorted_set",
+ "since": "2.0.0",
+ "arity": 4,
+ "function": "zremrangebyrankCommand",
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "Number of elements removed."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "start",
+ "type": "integer"
+ },
+ {
+ "name": "stop",
+ "type": "integer"
+ }
+ ]
+ }
+}
diff --git a/src/commands/zremrangebyscore.json b/src/commands/zremrangebyscore.json
new file mode 100644
index 0000000..aed5d1b
--- /dev/null
+++ b/src/commands/zremrangebyscore.json
@@ -0,0 +1,55 @@
+{
+ "ZREMRANGEBYSCORE": {
+ "summary": "Removes members in a sorted set within a range of scores. Deletes the sorted set if all members were removed.",
+ "complexity": "O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements removed by the operation.",
+ "group": "sorted_set",
+ "since": "1.2.0",
+ "arity": 4,
+ "function": "zremrangebyscoreCommand",
+ "command_flags": [
+ "WRITE"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RW",
+ "DELETE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "integer",
+ "description": "Number of elements removed."
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "min",
+ "type": "double"
+ },
+ {
+ "name": "max",
+ "type": "double"
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrevrange.json b/src/commands/zrevrange.json
new file mode 100644
index 0000000..116fe82
--- /dev/null
+++ b/src/commands/zrevrange.json
@@ -0,0 +1,94 @@
+{
+ "ZREVRANGE": {
+ "summary": "Returns members in a sorted set within a range of indexes in reverse order.",
+ "complexity": "O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements returned.",
+ "group": "sorted_set",
+ "since": "1.2.0",
+ "arity": -4,
+ "function": "zrevrangeCommand",
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`ZRANGE` with the `REV` argument",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "description": "List of member elements.",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "description": "List of the members and their scores. Returned in case `WITHSCORES` was used.",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "member",
+ "type": "string"
+ },
+ {
+ "description": "score",
+ "type": "number"
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "start",
+ "type": "integer"
+ },
+ {
+ "name": "stop",
+ "type": "integer"
+ },
+ {
+ "name": "withscores",
+ "token": "WITHSCORES",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrevrangebylex.json b/src/commands/zrevrangebylex.json
new file mode 100644
index 0000000..d1d8100
--- /dev/null
+++ b/src/commands/zrevrangebylex.json
@@ -0,0 +1,80 @@
+{
+ "ZREVRANGEBYLEX": {
+ "summary": "Returns members in a sorted set within a lexicographical range in reverse order.",
+ "complexity": "O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements being returned. If M is constant (e.g. always asking for the first 10 elements with LIMIT), you can consider it O(log(N)).",
+ "group": "sorted_set",
+ "since": "2.8.9",
+ "arity": -4,
+ "function": "zrevrangebylexCommand",
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`ZRANGE` with the `REV` and `BYLEX` arguments",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "type": "array",
+ "description": "List of the elements in the specified score range.",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "max",
+ "type": "string"
+ },
+ {
+ "name": "min",
+ "type": "string"
+ },
+ {
+ "token": "LIMIT",
+ "name": "limit",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrevrangebyscore.json b/src/commands/zrevrangebyscore.json
new file mode 100644
index 0000000..ab04052
--- /dev/null
+++ b/src/commands/zrevrangebyscore.json
@@ -0,0 +1,118 @@
+{
+ "ZREVRANGEBYSCORE": {
+ "summary": "Returns members in a sorted set within a range of scores in reverse order.",
+ "complexity": "O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements being returned. If M is constant (e.g. always asking for the first 10 elements with LIMIT), you can consider it O(log(N)).",
+ "group": "sorted_set",
+ "since": "2.2.0",
+ "arity": -4,
+ "function": "zrevrangebyscoreCommand",
+ "history": [
+ [
+ "2.1.6",
+ "`min` and `max` can be exclusive."
+ ]
+ ],
+ "deprecated_since": "6.2.0",
+ "replaced_by": "`ZRANGE` with the `REV` and `BYSCORE` arguments",
+ "doc_flags": [
+ "DEPRECATED"
+ ],
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "type": "array",
+ "description": "List of the elements in the specified score range, as not WITHSCORES",
+ "uniqueItems": true,
+ "items": {
+ "type": "string",
+ "description": "Element"
+ }
+ },
+ {
+ "type": "array",
+ "description": "List of the elements and their scores in the specified score range, as WITHSCORES used",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "description": "Tuple of element and its score",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string",
+ "description": "element"
+ },
+ {
+ "type": "number",
+ "description": "score"
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "max",
+ "type": "double"
+ },
+ {
+ "name": "min",
+ "type": "double"
+ },
+ {
+ "name": "withscores",
+ "token": "WITHSCORES",
+ "type": "pure-token",
+ "optional": true
+ },
+ {
+ "token": "LIMIT",
+ "name": "limit",
+ "type": "block",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "offset",
+ "type": "integer"
+ },
+ {
+ "name": "count",
+ "type": "integer"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/commands/zrevrank.json b/src/commands/zrevrank.json
new file mode 100644
index 0000000..39897ca
--- /dev/null
+++ b/src/commands/zrevrank.json
@@ -0,0 +1,86 @@
+{
+ "ZREVRANK": {
+ "summary": "Returns the index of a member in a sorted set ordered by descending scores.",
+ "complexity": "O(log(N))",
+ "group": "sorted_set",
+ "since": "2.0.0",
+ "arity": -3,
+ "function": "zrevrankCommand",
+ "history": [
+ [
+ "7.2.0",
+ "Added the optional `WITHSCORE` argument."
+ ]
+ ],
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "null",
+ "description": "Key does not exist or the member does not exist in the sorted set."
+ },
+ {
+ "type": "integer",
+ "description": "The rank of the member when 'WITHSCORE' is not used."
+ },
+ {
+ "type": "array",
+ "description": "The rank and score of the member when 'WITHSCORE' is used.",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "integer"
+ },
+ {
+ "type": "number"
+ }
+ ]
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string"
+ },
+ {
+ "name": "withscore",
+ "token": "WITHSCORE",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zscan.json b/src/commands/zscan.json
new file mode 100644
index 0000000..7c69ccf
--- /dev/null
+++ b/src/commands/zscan.json
@@ -0,0 +1,81 @@
+{
+ "ZSCAN": {
+ "summary": "Iterates over members and scores of a sorted set.",
+ "complexity": "O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.",
+ "group": "sorted_set",
+ "since": "2.8.0",
+ "arity": -3,
+ "function": "zscanCommand",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "command_tips": [
+ "NONDETERMINISTIC_OUTPUT"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "cursor",
+ "type": "integer"
+ },
+ {
+ "token": "MATCH",
+ "name": "pattern",
+ "type": "pattern",
+ "optional": true
+ },
+ {
+ "token": "COUNT",
+ "name": "count",
+ "type": "integer",
+ "optional": true
+ }
+ ],
+ "reply_schema": {
+ "description": "cursor and scan response in array form",
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "description": "cursor",
+ "type": "string"
+ },
+ {
+ "description": "list of elements of the sorted set, where each even element is the member, and each odd value is its associated score",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ }
+}
diff --git a/src/commands/zscore.json b/src/commands/zscore.json
new file mode 100644
index 0000000..5022470
--- /dev/null
+++ b/src/commands/zscore.json
@@ -0,0 +1,60 @@
+{
+ "ZSCORE": {
+ "summary": "Returns the score of a member in a sorted set.",
+ "complexity": "O(1)",
+ "group": "sorted_set",
+ "since": "1.2.0",
+ "arity": 3,
+ "function": "zscoreCommand",
+ "command_flags": [
+ "READONLY",
+ "FAST"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "oneOf": [
+ {
+ "type": "number",
+ "description": "The score of the member (a double precision floating point number). In RESP2, this is returned as string."
+ },
+ {
+ "type": "null",
+ "description": "Member does not exist in the sorted set, or key does not exist."
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "member",
+ "type": "string"
+ }
+ ]
+ }
+}
diff --git a/src/commands/zunion.json b/src/commands/zunion.json
new file mode 100644
index 0000000..1ce3dc5
--- /dev/null
+++ b/src/commands/zunion.json
@@ -0,0 +1,115 @@
+{
+ "ZUNION": {
+ "summary": "Returns the union of multiple sorted sets.",
+ "complexity": "O(N)+O(M*log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set.",
+ "group": "sorted_set",
+ "since": "6.2.0",
+ "arity": -3,
+ "function": "zunionCommand",
+ "get_keys_function": "zunionInterDiffGetKeys",
+ "command_flags": [
+ "READONLY"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "anyOf": [
+ {
+ "description": "The result of union when 'WITHSCORES' is not used.",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "description": "The result of union when 'WITHSCORES' is used.",
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "array",
+ "minItems": 2,
+ "maxItems": 2,
+ "items": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "number"
+ }
+ ]
+ }
+ }
+ ]
+ },
+ "arguments": [
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 0,
+ "multiple": true
+ },
+ {
+ "token": "WEIGHTS",
+ "name": "weight",
+ "type": "integer",
+ "optional": true,
+ "multiple": true
+ },
+ {
+ "token": "AGGREGATE",
+ "name": "aggregate",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "sum",
+ "type": "pure-token",
+ "token": "SUM"
+ },
+ {
+ "name": "min",
+ "type": "pure-token",
+ "token": "MIN"
+ },
+ {
+ "name": "max",
+ "type": "pure-token",
+ "token": "MAX"
+ }
+ ]
+ },
+ {
+ "name": "withscores",
+ "token": "WITHSCORES",
+ "type": "pure-token",
+ "optional": true
+ }
+ ]
+ }
+}
diff --git a/src/commands/zunionstore.json b/src/commands/zunionstore.json
new file mode 100644
index 0000000..65e7b54
--- /dev/null
+++ b/src/commands/zunionstore.json
@@ -0,0 +1,107 @@
+{
+ "ZUNIONSTORE": {
+ "summary": "Stores the union of multiple sorted sets in a key.",
+ "complexity": "O(N)+O(M log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set.",
+ "group": "sorted_set",
+ "since": "2.0.0",
+ "arity": -4,
+ "function": "zunionstoreCommand",
+ "get_keys_function": "zunionInterDiffStoreGetKeys",
+ "command_flags": [
+ "WRITE",
+ "DENYOOM"
+ ],
+ "acl_categories": [
+ "SORTEDSET"
+ ],
+ "key_specs": [
+ {
+ "flags": [
+ "OW",
+ "UPDATE"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 1
+ }
+ },
+ "find_keys": {
+ "range": {
+ "lastkey": 0,
+ "step": 1,
+ "limit": 0
+ }
+ }
+ },
+ {
+ "flags": [
+ "RO",
+ "ACCESS"
+ ],
+ "begin_search": {
+ "index": {
+ "pos": 2
+ }
+ },
+ "find_keys": {
+ "keynum": {
+ "keynumidx": 0,
+ "firstkey": 1,
+ "step": 1
+ }
+ }
+ }
+ ],
+ "reply_schema": {
+ "description": "The number of elements in the resulting sorted set.",
+ "type": "integer"
+ },
+ "arguments": [
+ {
+ "name": "destination",
+ "type": "key",
+ "key_spec_index": 0
+ },
+ {
+ "name": "numkeys",
+ "type": "integer"
+ },
+ {
+ "name": "key",
+ "type": "key",
+ "key_spec_index": 1,
+ "multiple": true
+ },
+ {
+ "token": "WEIGHTS",
+ "name": "weight",
+ "type": "integer",
+ "optional": true,
+ "multiple": true
+ },
+ {
+ "token": "AGGREGATE",
+ "name": "aggregate",
+ "type": "oneof",
+ "optional": true,
+ "arguments": [
+ {
+ "name": "sum",
+ "type": "pure-token",
+ "token": "SUM"
+ },
+ {
+ "name": "min",
+ "type": "pure-token",
+ "token": "MIN"
+ },
+ {
+ "name": "max",
+ "type": "pure-token",
+ "token": "MAX"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/config.c b/src/config.c
new file mode 100644
index 0000000..b267042
--- /dev/null
+++ b/src/config.c
@@ -0,0 +1,3413 @@
+/* Configuration file parsing and CONFIG GET/SET commands implementation.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "cluster.h"
+#include "connection.h"
+#include "bio.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <glob.h>
+#include <string.h>
+#include <locale.h>
+#include <ctype.h>
+
+/*-----------------------------------------------------------------------------
+ * Config file name-value maps.
+ *----------------------------------------------------------------------------*/
+
+typedef struct deprecatedConfig {
+ const char *name;
+ const int argc_min;
+ const int argc_max;
+} deprecatedConfig;
+
+configEnum maxmemory_policy_enum[] = {
+ {"volatile-lru", MAXMEMORY_VOLATILE_LRU},
+ {"volatile-lfu", MAXMEMORY_VOLATILE_LFU},
+ {"volatile-random",MAXMEMORY_VOLATILE_RANDOM},
+ {"volatile-ttl",MAXMEMORY_VOLATILE_TTL},
+ {"allkeys-lru",MAXMEMORY_ALLKEYS_LRU},
+ {"allkeys-lfu",MAXMEMORY_ALLKEYS_LFU},
+ {"allkeys-random",MAXMEMORY_ALLKEYS_RANDOM},
+ {"noeviction",MAXMEMORY_NO_EVICTION},
+ {NULL, 0}
+};
+
+configEnum syslog_facility_enum[] = {
+ {"user", LOG_USER},
+ {"local0", LOG_LOCAL0},
+ {"local1", LOG_LOCAL1},
+ {"local2", LOG_LOCAL2},
+ {"local3", LOG_LOCAL3},
+ {"local4", LOG_LOCAL4},
+ {"local5", LOG_LOCAL5},
+ {"local6", LOG_LOCAL6},
+ {"local7", LOG_LOCAL7},
+ {NULL, 0}
+};
+
+configEnum loglevel_enum[] = {
+ {"debug", LL_DEBUG},
+ {"verbose", LL_VERBOSE},
+ {"notice", LL_NOTICE},
+ {"warning", LL_WARNING},
+ {"nothing", LL_NOTHING},
+ {NULL,0}
+};
+
+configEnum supervised_mode_enum[] = {
+ {"upstart", SUPERVISED_UPSTART},
+ {"systemd", SUPERVISED_SYSTEMD},
+ {"auto", SUPERVISED_AUTODETECT},
+ {"no", SUPERVISED_NONE},
+ {NULL, 0}
+};
+
+configEnum aof_fsync_enum[] = {
+ {"everysec", AOF_FSYNC_EVERYSEC},
+ {"always", AOF_FSYNC_ALWAYS},
+ {"no", AOF_FSYNC_NO},
+ {NULL, 0}
+};
+
+configEnum shutdown_on_sig_enum[] = {
+ {"default", 0},
+ {"save", SHUTDOWN_SAVE},
+ {"nosave", SHUTDOWN_NOSAVE},
+ {"now", SHUTDOWN_NOW},
+ {"force", SHUTDOWN_FORCE},
+ {NULL, 0}
+};
+
+configEnum repl_diskless_load_enum[] = {
+ {"disabled", REPL_DISKLESS_LOAD_DISABLED},
+ {"on-empty-db", REPL_DISKLESS_LOAD_WHEN_DB_EMPTY},
+ {"swapdb", REPL_DISKLESS_LOAD_SWAPDB},
+ {NULL, 0}
+};
+
+configEnum tls_auth_clients_enum[] = {
+ {"no", TLS_CLIENT_AUTH_NO},
+ {"yes", TLS_CLIENT_AUTH_YES},
+ {"optional", TLS_CLIENT_AUTH_OPTIONAL},
+ {NULL, 0}
+};
+
+configEnum oom_score_adj_enum[] = {
+ {"no", OOM_SCORE_ADJ_NO},
+ {"yes", OOM_SCORE_RELATIVE},
+ {"relative", OOM_SCORE_RELATIVE},
+ {"absolute", OOM_SCORE_ADJ_ABSOLUTE},
+ {NULL, 0}
+};
+
+configEnum acl_pubsub_default_enum[] = {
+ {"allchannels", SELECTOR_FLAG_ALLCHANNELS},
+ {"resetchannels", 0},
+ {NULL, 0}
+};
+
+configEnum sanitize_dump_payload_enum[] = {
+ {"no", SANITIZE_DUMP_NO},
+ {"yes", SANITIZE_DUMP_YES},
+ {"clients", SANITIZE_DUMP_CLIENTS},
+ {NULL, 0}
+};
+
+configEnum protected_action_enum[] = {
+ {"no", PROTECTED_ACTION_ALLOWED_NO},
+ {"yes", PROTECTED_ACTION_ALLOWED_YES},
+ {"local", PROTECTED_ACTION_ALLOWED_LOCAL},
+ {NULL, 0}
+};
+
+configEnum cluster_preferred_endpoint_type_enum[] = {
+ {"ip", CLUSTER_ENDPOINT_TYPE_IP},
+ {"hostname", CLUSTER_ENDPOINT_TYPE_HOSTNAME},
+ {"unknown-endpoint", CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT},
+ {NULL, 0}
+};
+
+configEnum propagation_error_behavior_enum[] = {
+ {"ignore", PROPAGATION_ERR_BEHAVIOR_IGNORE},
+ {"panic", PROPAGATION_ERR_BEHAVIOR_PANIC},
+ {"panic-on-replicas", PROPAGATION_ERR_BEHAVIOR_PANIC_ON_REPLICAS},
+ {NULL, 0}
+};
+
+/* Output buffer limits presets. */
+clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = {
+ {0, 0, 0}, /* normal */
+ {1024*1024*256, 1024*1024*64, 60}, /* slave */
+ {1024*1024*32, 1024*1024*8, 60} /* pubsub */
+};
+
+/* OOM Score defaults */
+int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT] = { 0, 200, 800 };
+
+/* Generic config infrastructure function pointers
+ * int is_valid_fn(val, err)
+ * Return 1 when val is valid, and 0 when invalid.
+ * Optionally set err to a static error string.
+ */
+
+/* Configuration values that require no special handling to set, get, load or
+ * rewrite. */
+typedef struct boolConfigData {
+ int *config; /* The pointer to the server config this value is stored in */
+ int default_value; /* The default value of the config on rewrite */
+ int (*is_valid_fn)(int val, const char **err); /* Optional function to check validity of new value (generic doc above) */
+} boolConfigData;
+
+typedef struct stringConfigData {
+ char **config; /* Pointer to the server config this value is stored in. */
+ const char *default_value; /* Default value of the config on rewrite. */
+ int (*is_valid_fn)(char* val, const char **err); /* Optional function to check validity of new value (generic doc above) */
+ int convert_empty_to_null; /* Boolean indicating if empty strings should
+ be stored as a NULL value. */
+} stringConfigData;
+
+typedef struct sdsConfigData {
+ sds *config; /* Pointer to the server config this value is stored in. */
+ char *default_value; /* Default value of the config on rewrite. */
+ int (*is_valid_fn)(sds val, const char **err); /* Optional function to check validity of new value (generic doc above) */
+ int convert_empty_to_null; /* Boolean indicating if empty SDS strings should
+ be stored as a NULL value. */
+} sdsConfigData;
+
+typedef struct enumConfigData {
+ int *config; /* The pointer to the server config this value is stored in */
+ configEnum *enum_value; /* The underlying enum type this data represents */
+ int default_value; /* The default value of the config on rewrite */
+ int (*is_valid_fn)(int val, const char **err); /* Optional function to check validity of new value (generic doc above) */
+} enumConfigData;
+
+typedef enum numericType {
+ NUMERIC_TYPE_INT,
+ NUMERIC_TYPE_UINT,
+ NUMERIC_TYPE_LONG,
+ NUMERIC_TYPE_ULONG,
+ NUMERIC_TYPE_LONG_LONG,
+ NUMERIC_TYPE_ULONG_LONG,
+ NUMERIC_TYPE_SIZE_T,
+ NUMERIC_TYPE_SSIZE_T,
+ NUMERIC_TYPE_OFF_T,
+ NUMERIC_TYPE_TIME_T,
+} numericType;
+
+typedef struct numericConfigData {
+ union {
+ int *i;
+ unsigned int *ui;
+ long *l;
+ unsigned long *ul;
+ long long *ll;
+ unsigned long long *ull;
+ size_t *st;
+ ssize_t *sst;
+ off_t *ot;
+ time_t *tt;
+ } config; /* The pointer to the numeric config this value is stored in */
+ unsigned int flags;
+ numericType numeric_type; /* An enum indicating the type of this value */
+ long long lower_bound; /* The lower bound of this numeric value */
+ long long upper_bound; /* The upper bound of this numeric value */
+ long long default_value; /* The default value of the config on rewrite */
+ int (*is_valid_fn)(long long val, const char **err); /* Optional function to check validity of new value (generic doc above) */
+} numericConfigData;
+
+typedef union typeData {
+ boolConfigData yesno;
+ stringConfigData string;
+ sdsConfigData sds;
+ enumConfigData enumd;
+ numericConfigData numeric;
+} typeData;
+
+typedef struct standardConfig standardConfig;
+
+typedef int (*apply_fn)(const char **err);
+typedef struct typeInterface {
+ /* Called on server start, to init the server with default value */
+ void (*init)(standardConfig *config);
+ /* Called on server startup and CONFIG SET, returns 1 on success,
+ * 2 meaning no actual change done, 0 on error and can set a verbose err
+ * string */
+ int (*set)(standardConfig *config, sds *argv, int argc, const char **err);
+ /* Optional: called after `set()` to apply the config change. Used only in
+ * the context of CONFIG SET. Returns 1 on success, 0 on failure.
+ * Optionally set err to a static error string. */
+ apply_fn apply;
+ /* Called on CONFIG GET, returns sds to be used in reply */
+ sds (*get)(standardConfig *config);
+ /* Called on CONFIG REWRITE, required to rewrite the config state */
+ void (*rewrite)(standardConfig *config, const char *name, struct rewriteConfigState *state);
+} typeInterface;
+
+struct standardConfig {
+ const char *name; /* The user visible name of this config */
+ const char *alias; /* An alias that can also be used for this config */
+ unsigned int flags; /* Flags for this specific config */
+ typeInterface interface; /* The function pointers that define the type interface */
+ typeData data; /* The type specific data exposed used by the interface */
+ configType type; /* The type of config this is. */
+ void *privdata; /* privdata for this config, for module configs this is a ModuleConfig struct */
+};
+
+dict *configs = NULL; /* Runtime config values */
+
+/* Lookup a config by the provided sds string name, or return NULL
+ * if the config does not exist */
+static standardConfig *lookupConfig(sds name) {
+ dictEntry *de = dictFind(configs, name);
+ return de ? dictGetVal(de) : NULL;
+}
+
+/*-----------------------------------------------------------------------------
+ * Enum access functions
+ *----------------------------------------------------------------------------*/
+
+/* Get enum value from name. If there is no match INT_MIN is returned. */
+int configEnumGetValue(configEnum *ce, sds *argv, int argc, int bitflags) {
+ if (argc == 0 || (!bitflags && argc != 1)) return INT_MIN;
+ int values = 0;
+ for (int i = 0; i < argc; i++) {
+ int matched = 0;
+ for (configEnum *ceItem = ce; ceItem->name != NULL; ceItem++) {
+ if (!strcasecmp(argv[i],ceItem->name)) {
+ values |= ceItem->val;
+ matched = 1;
+ }
+ }
+ if (!matched) return INT_MIN;
+ }
+ return values;
+}
+
+/* Get enum name/s from value. If no matches are found "unknown" is returned. */
+static sds configEnumGetName(configEnum *ce, int values, int bitflags) {
+ sds names = NULL;
+ int unmatched = values;
+ for( ; ce->name != NULL; ce++) {
+ if (values == ce->val) { /* Short path for perfect match */
+ sdsfree(names);
+ return sdsnew(ce->name);
+ }
+
+ /* Note: for bitflags, we want them sorted from high to low, so that if there are several / partially
+ * overlapping entries, we'll prefer the ones matching more bits. */
+ if (bitflags && ce->val && ce->val == (unmatched & ce->val)) {
+ names = names ? sdscatfmt(names, " %s", ce->name) : sdsnew(ce->name);
+ unmatched &= ~ce->val;
+ }
+ }
+ if (!names || unmatched) {
+ sdsfree(names);
+ return sdsnew("unknown");
+ }
+ return names;
+}
+
+/* Used for INFO generation. */
+const char *evictPolicyToString(void) {
+ for (configEnum *ce = maxmemory_policy_enum; ce->name != NULL; ce++) {
+ if (server.maxmemory_policy == ce->val)
+ return ce->name;
+ }
+ serverPanic("unknown eviction policy");
+}
+
+/*-----------------------------------------------------------------------------
+ * Config file parsing
+ *----------------------------------------------------------------------------*/
+
+int yesnotoi(char *s) {
+ if (!strcasecmp(s,"yes")) return 1;
+ else if (!strcasecmp(s,"no")) return 0;
+ else return -1;
+}
+
+void appendServerSaveParams(time_t seconds, int changes) {
+ server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
+ server.saveparams[server.saveparamslen].seconds = seconds;
+ server.saveparams[server.saveparamslen].changes = changes;
+ server.saveparamslen++;
+}
+
+void resetServerSaveParams(void) {
+ zfree(server.saveparams);
+ server.saveparams = NULL;
+ server.saveparamslen = 0;
+}
+
+void queueLoadModule(sds path, sds *argv, int argc) {
+ int i;
+ struct moduleLoadQueueEntry *loadmod;
+
+ loadmod = zmalloc(sizeof(struct moduleLoadQueueEntry));
+ loadmod->argv = argc ? zmalloc(sizeof(robj*)*argc) : NULL;
+ loadmod->path = sdsnew(path);
+ loadmod->argc = argc;
+ for (i = 0; i < argc; i++) {
+ loadmod->argv[i] = createRawStringObject(argv[i],sdslen(argv[i]));
+ }
+ listAddNodeTail(server.loadmodule_queue,loadmod);
+}
+
+/* Parse an array of `arg_len` sds strings, validate and populate
+ * server.client_obuf_limits if valid.
+ * Used in CONFIG SET and configuration file parsing. */
+static int updateClientOutputBufferLimit(sds *args, int arg_len, const char **err) {
+ int j;
+ int class;
+ unsigned long long hard, soft;
+ int hard_err, soft_err;
+ int soft_seconds;
+ char *soft_seconds_eptr;
+ clientBufferLimitsConfig values[CLIENT_TYPE_OBUF_COUNT];
+ int classes[CLIENT_TYPE_OBUF_COUNT] = {0};
+
+ /* We need a multiple of 4: <class> <hard> <soft> <soft_seconds> */
+ if (arg_len % 4) {
+ if (err) *err = "Wrong number of arguments in "
+ "buffer limit configuration.";
+ return 0;
+ }
+
+ /* Sanity check of single arguments, so that we either refuse the
+ * whole configuration string or accept it all, even if a single
+ * error in a single client class is present. */
+ for (j = 0; j < arg_len; j += 4) {
+ class = getClientTypeByName(args[j]);
+ if (class == -1 || class == CLIENT_TYPE_MASTER) {
+ if (err) *err = "Invalid client class specified in "
+ "buffer limit configuration.";
+ return 0;
+ }
+
+ hard = memtoull(args[j+1], &hard_err);
+ soft = memtoull(args[j+2], &soft_err);
+ soft_seconds = strtoll(args[j+3], &soft_seconds_eptr, 10);
+ if (hard_err || soft_err ||
+ soft_seconds < 0 || *soft_seconds_eptr != '\0')
+ {
+ if (err) *err = "Error in hard, soft or soft_seconds setting in "
+ "buffer limit configuration.";
+ return 0;
+ }
+
+ values[class].hard_limit_bytes = hard;
+ values[class].soft_limit_bytes = soft;
+ values[class].soft_limit_seconds = soft_seconds;
+ classes[class] = 1;
+ }
+
+ /* Finally set the new config. */
+ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) {
+ if (classes[j]) server.client_obuf_limits[j] = values[j];
+ }
+
+ return 1;
+}
+
+/* Note this is here to support detecting we're running a config set from
+ * within conf file parsing. This is only needed to support the deprecated
+ * abnormal aggregate `save T C` functionality. Remove in the future. */
+static int reading_config_file;
+
+void loadServerConfigFromString(char *config) {
+ deprecatedConfig deprecated_configs[] = {
+ {"list-max-ziplist-entries", 2, 2},
+ {"list-max-ziplist-value", 2, 2},
+ {"lua-replicate-commands", 2, 2},
+ {NULL, 0},
+ };
+ char buf[1024];
+ const char *err = NULL;
+ int linenum = 0, totlines, i;
+ sds *lines;
+ sds *argv = NULL;
+ int argc;
+
+ reading_config_file = 1;
+ lines = sdssplitlen(config,strlen(config),"\n",1,&totlines);
+
+ for (i = 0; i < totlines; i++) {
+ linenum = i+1;
+ lines[i] = sdstrim(lines[i]," \t\r\n");
+
+ /* Skip comments and blank lines */
+ if (lines[i][0] == '#' || lines[i][0] == '\0') continue;
+
+ /* Split into arguments */
+ argv = sdssplitargs(lines[i],&argc);
+ if (argv == NULL) {
+ err = "Unbalanced quotes in configuration line";
+ goto loaderr;
+ }
+
+ /* Skip this line if the resulting command vector is empty. */
+ if (argc == 0) {
+ sdsfreesplitres(argv,argc);
+ argv = NULL;
+ continue;
+ }
+ sdstolower(argv[0]);
+
+ /* Iterate the configs that are standard */
+ standardConfig *config = lookupConfig(argv[0]);
+ if (config) {
+ /* For normal single arg configs enforce we have a single argument.
+ * Note that MULTI_ARG_CONFIGs need to validate arg count on their own */
+ if (!(config->flags & MULTI_ARG_CONFIG) && argc != 2) {
+ err = "wrong number of arguments";
+ goto loaderr;
+ }
+
+ if ((config->flags & MULTI_ARG_CONFIG) && argc == 2 && sdslen(argv[1])) {
+ /* For MULTI_ARG_CONFIGs, if we only have one argument, try to split it by spaces.
+ * Only if the argument is not empty, otherwise something like --save "" will fail.
+ * So that we can support something like --config "arg1 arg2 arg3". */
+ sds *new_argv;
+ int new_argc;
+ new_argv = sdssplitargs(argv[1], &new_argc);
+ if (!config->interface.set(config, new_argv, new_argc, &err)) {
+ if(new_argv) sdsfreesplitres(new_argv, new_argc);
+ goto loaderr;
+ }
+ sdsfreesplitres(new_argv, new_argc);
+ } else {
+ /* Set config using all arguments that follows */
+ if (!config->interface.set(config, &argv[1], argc-1, &err)) {
+ goto loaderr;
+ }
+ }
+
+ sdsfreesplitres(argv,argc);
+ argv = NULL;
+ continue;
+ } else {
+ int match = 0;
+ for (deprecatedConfig *config = deprecated_configs; config->name != NULL; config++) {
+ if (!strcasecmp(argv[0], config->name) &&
+ config->argc_min <= argc &&
+ argc <= config->argc_max)
+ {
+ match = 1;
+ break;
+ }
+ }
+ if (match) {
+ sdsfreesplitres(argv,argc);
+ argv = NULL;
+ continue;
+ }
+ }
+
+ /* Execute config directives */
+ if (!strcasecmp(argv[0],"include") && argc == 2) {
+ loadServerConfig(argv[1], 0, NULL);
+ } else if (!strcasecmp(argv[0],"rename-command") && argc == 3) {
+ struct redisCommand *cmd = lookupCommandBySds(argv[1]);
+ int retval;
+
+ if (!cmd) {
+ err = "No such command in rename-command";
+ goto loaderr;
+ }
+
+ /* If the target command name is the empty string we just
+ * remove it from the command table. */
+ retval = dictDelete(server.commands, argv[1]);
+ serverAssert(retval == DICT_OK);
+
+ /* Otherwise we re-add the command under a different name. */
+ if (sdslen(argv[2]) != 0) {
+ sds copy = sdsdup(argv[2]);
+
+ retval = dictAdd(server.commands, copy, cmd);
+ if (retval != DICT_OK) {
+ sdsfree(copy);
+ err = "Target command name already exists"; goto loaderr;
+ }
+ }
+ } else if (!strcasecmp(argv[0],"user") && argc >= 2) {
+ int argc_err;
+ if (ACLAppendUserForLoading(argv,argc,&argc_err) == C_ERR) {
+ const char *errmsg = ACLSetUserStringError();
+ snprintf(buf,sizeof(buf),"Error in user declaration '%s': %s",
+ argv[argc_err],errmsg);
+ err = buf;
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"loadmodule") && argc >= 2) {
+ queueLoadModule(argv[1],&argv[2],argc-2);
+ } else if (strchr(argv[0], '.')) {
+ if (argc < 2) {
+ err = "Module config specified without value";
+ goto loaderr;
+ }
+ sds name = sdsdup(argv[0]);
+ sds val = sdsdup(argv[1]);
+ for (int i = 2; i < argc; i++)
+ val = sdscatfmt(val, " %S", argv[i]);
+ if (!dictReplace(server.module_configs_queue, name, val)) sdsfree(name);
+ } else if (!strcasecmp(argv[0],"sentinel")) {
+ /* argc == 1 is handled by main() as we need to enter the sentinel
+ * mode ASAP. */
+ if (argc != 1) {
+ if (!server.sentinel_mode) {
+ err = "sentinel directive while not in sentinel mode";
+ goto loaderr;
+ }
+ queueSentinelConfig(argv+1,argc-1,linenum,lines[i]);
+ }
+ } else {
+ err = "Bad directive or wrong number of arguments"; goto loaderr;
+ }
+ sdsfreesplitres(argv,argc);
+ argv = NULL;
+ }
+
+ if (server.logfile[0] != '\0') {
+ FILE *logfp;
+
+ /* Test if we are able to open the file. The server will not
+ * be able to abort just for this problem later... */
+ logfp = fopen(server.logfile,"a");
+ if (logfp == NULL) {
+ err = sdscatprintf(sdsempty(),
+ "Can't open the log file: %s", strerror(errno));
+ goto loaderr;
+ }
+ fclose(logfp);
+ }
+
+ /* Sanity checks. */
+ if (server.cluster_enabled && server.masterhost) {
+ err = "replicaof directive not allowed in cluster mode";
+ goto loaderr;
+ }
+
+ /* in case cluster mode is enabled dbnum must be 1 */
+ if (server.cluster_enabled && server.dbnum > 1) {
+ serverLog(LL_WARNING, "WARNING: Changing databases number from %d to 1 since we are in cluster mode", server.dbnum);
+ server.dbnum = 1;
+ }
+
+ /* To ensure backward compatibility and work while hz is out of range */
+ if (server.config_hz < CONFIG_MIN_HZ) server.config_hz = CONFIG_MIN_HZ;
+ if (server.config_hz > CONFIG_MAX_HZ) server.config_hz = CONFIG_MAX_HZ;
+
+ sdsfreesplitres(lines,totlines);
+ reading_config_file = 0;
+ return;
+
+loaderr:
+ if (argv) sdsfreesplitres(argv,argc);
+ fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR (Redis %s) ***\n",
+ REDIS_VERSION);
+ if (i < totlines) {
+ fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
+ fprintf(stderr, ">>> '%s'\n", lines[i]);
+ }
+ fprintf(stderr, "%s\n", err);
+ exit(1);
+}
+
+/* Load the server configuration from the specified filename.
+ * The function appends the additional configuration directives stored
+ * in the 'options' string to the config file before loading.
+ *
+ * Both filename and options can be NULL, in such a case are considered
+ * empty. This way loadServerConfig can be used to just load a file or
+ * just load a string. */
+#define CONFIG_READ_LEN 1024
+void loadServerConfig(char *filename, char config_from_stdin, char *options) {
+ sds config = sdsempty();
+ char buf[CONFIG_READ_LEN+1];
+ FILE *fp;
+ glob_t globbuf;
+
+ /* Load the file content */
+ if (filename) {
+
+ /* The logic for handling wildcards has slightly different behavior in cases where
+ * there is a failure to locate the included file.
+ * Whether or not a wildcard is specified, we should ALWAYS log errors when attempting
+ * to open included config files.
+ *
+ * However, we desire a behavioral difference between instances where a wildcard was
+ * specified and those where it hasn't:
+ * no wildcards : attempt to open the specified file and fail with a logged error
+ * if the file cannot be found and opened.
+ * with wildcards : attempt to glob the specified pattern; if no files match the
+ * pattern, then gracefully continue on to the next entry in the
+ * config file, as if the current entry was never encountered.
+ * This will allow for empty conf.d directories to be included. */
+
+ if (strchr(filename, '*') || strchr(filename, '?') || strchr(filename, '[')) {
+ /* A wildcard character detected in filename, so let us use glob */
+ if (glob(filename, 0, NULL, &globbuf) == 0) {
+
+ for (size_t i = 0; i < globbuf.gl_pathc; i++) {
+ if ((fp = fopen(globbuf.gl_pathv[i], "r")) == NULL) {
+ serverLog(LL_WARNING,
+ "Fatal error, can't open config file '%s': %s",
+ globbuf.gl_pathv[i], strerror(errno));
+ exit(1);
+ }
+ while(fgets(buf,CONFIG_READ_LEN+1,fp) != NULL)
+ config = sdscat(config,buf);
+ fclose(fp);
+ }
+
+ globfree(&globbuf);
+ }
+ } else {
+ /* No wildcard in filename means we can use the original logic to read and
+ * potentially fail traditionally */
+ if ((fp = fopen(filename, "r")) == NULL) {
+ serverLog(LL_WARNING,
+ "Fatal error, can't open config file '%s': %s",
+ filename, strerror(errno));
+ exit(1);
+ }
+ while(fgets(buf,CONFIG_READ_LEN+1,fp) != NULL)
+ config = sdscat(config,buf);
+ fclose(fp);
+ }
+ }
+
+ /* Append content from stdin */
+ if (config_from_stdin) {
+ serverLog(LL_NOTICE,"Reading config from stdin");
+ fp = stdin;
+ while(fgets(buf,CONFIG_READ_LEN+1,fp) != NULL)
+ config = sdscat(config,buf);
+ }
+
+ /* Append the additional options */
+ if (options) {
+ config = sdscat(config,"\n");
+ config = sdscat(config,options);
+ }
+ loadServerConfigFromString(config);
+ sdsfree(config);
+}
+
+static int performInterfaceSet(standardConfig *config, sds value, const char **errstr) {
+ sds *argv;
+ int argc, res;
+
+ if (config->flags & MULTI_ARG_CONFIG) {
+ argv = sdssplitlen(value, sdslen(value), " ", 1, &argc);
+ } else {
+ argv = (char**)&value;
+ argc = 1;
+ }
+
+ /* Set the config */
+ res = config->interface.set(config, argv, argc, errstr);
+ if (config->flags & MULTI_ARG_CONFIG) sdsfreesplitres(argv, argc);
+ return res;
+}
+
+/* Find the config by name and attempt to set it to value. */
+int performModuleConfigSetFromName(sds name, sds value, const char **err) {
+ standardConfig *config = lookupConfig(name);
+ if (!config || !(config->flags & MODULE_CONFIG)) {
+ *err = "Config name not found";
+ return 0;
+ }
+ return performInterfaceSet(config, value, err);
+}
+
+/* Find config by name and attempt to set it to its default value. */
+int performModuleConfigSetDefaultFromName(sds name, const char **err) {
+ standardConfig *config = lookupConfig(name);
+ serverAssert(config);
+ if (!(config->flags & MODULE_CONFIG)) {
+ *err = "Config name not found";
+ return 0;
+ }
+ switch (config->type) {
+ case BOOL_CONFIG:
+ return setModuleBoolConfig(config->privdata, config->data.yesno.default_value, err);
+ case SDS_CONFIG:
+ return setModuleStringConfig(config->privdata, config->data.sds.default_value, err);
+ case NUMERIC_CONFIG:
+ return setModuleNumericConfig(config->privdata, config->data.numeric.default_value, err);
+ case ENUM_CONFIG:
+ return setModuleEnumConfig(config->privdata, config->data.enumd.default_value, err);
+ default:
+ serverPanic("Config type of module config is not allowed.");
+ }
+ return 0;
+}
+
+static void restoreBackupConfig(standardConfig **set_configs, sds *old_values, int count, apply_fn *apply_fns, list *module_configs) {
+ int i;
+ const char *errstr = "unknown error";
+ /* Set all backup values */
+ for (i = 0; i < count; i++) {
+ if (!performInterfaceSet(set_configs[i], old_values[i], &errstr))
+ serverLog(LL_WARNING, "Failed restoring failed CONFIG SET command. Error setting %s to '%s': %s",
+ set_configs[i]->name, old_values[i], errstr);
+ }
+ /* Apply backup */
+ if (apply_fns) {
+ for (i = 0; i < count && apply_fns[i] != NULL; i++) {
+ if (!apply_fns[i](&errstr))
+ serverLog(LL_WARNING, "Failed applying restored failed CONFIG SET command: %s", errstr);
+ }
+ }
+ if (module_configs) {
+ if (!moduleConfigApplyConfig(module_configs, &errstr, NULL))
+ serverLog(LL_WARNING, "Failed applying restored failed CONFIG SET command: %s", errstr);
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ * CONFIG SET implementation
+ *----------------------------------------------------------------------------*/
+
+void configSetCommand(client *c) {
+ const char *errstr = NULL;
+ const char *invalid_arg_name = NULL;
+ const char *err_arg_name = NULL;
+ standardConfig **set_configs; /* TODO: make this a dict for better performance */
+ list *module_configs_apply;
+ const char **config_names;
+ sds *new_values;
+ sds *old_values = NULL;
+ apply_fn *apply_fns; /* TODO: make this a set for better performance */
+ int config_count, i, j;
+ int invalid_args = 0, deny_loading_error = 0;
+ int *config_map_fns;
+
+ /* Make sure we have an even number of arguments: conf-val pairs */
+ if (c->argc & 1) {
+ addReplyErrorObject(c, shared.syntaxerr);
+ return;
+ }
+ config_count = (c->argc - 2) / 2;
+
+ module_configs_apply = listCreate();
+ set_configs = zcalloc(sizeof(standardConfig*)*config_count);
+ config_names = zcalloc(sizeof(char*)*config_count);
+ new_values = zmalloc(sizeof(sds*)*config_count);
+ old_values = zcalloc(sizeof(sds*)*config_count);
+ apply_fns = zcalloc(sizeof(apply_fn)*config_count);
+ config_map_fns = zmalloc(sizeof(int)*config_count);
+
+ /* Find all relevant configs */
+ for (i = 0; i < config_count; i++) {
+ standardConfig *config = lookupConfig(c->argv[2+i*2]->ptr);
+ /* Fail if we couldn't find this config */
+ if (!config) {
+ if (!invalid_args) {
+ invalid_arg_name = c->argv[2+i*2]->ptr;
+ invalid_args = 1;
+ }
+ continue;
+ }
+
+ /* Note: it's important we run over ALL passed configs and check if we need to call `redactClientCommandArgument()`.
+ * This is in order to avoid anyone using this command for a log/slowlog/monitor/etc. displaying sensitive info.
+ * So even if we encounter an error we still continue running over the remaining arguments. */
+ if (config->flags & SENSITIVE_CONFIG) {
+ redactClientCommandArgument(c,2+i*2+1);
+ }
+
+ /* We continue to make sure we redact all the configs */
+ if (invalid_args) continue;
+
+ if (config->flags & IMMUTABLE_CONFIG ||
+ (config->flags & PROTECTED_CONFIG && !allowProtectedAction(server.enable_protected_configs, c)))
+ {
+ /* Note: we don't abort the loop since we still want to handle redacting sensitive configs (above) */
+ errstr = (config->flags & IMMUTABLE_CONFIG) ? "can't set immutable config" : "can't set protected config";
+ err_arg_name = c->argv[2+i*2]->ptr;
+ invalid_args = 1;
+ continue;
+ }
+
+ if (server.loading && config->flags & DENY_LOADING_CONFIG) {
+ /* Note: we don't abort the loop since we still want to handle redacting sensitive configs (above) */
+ deny_loading_error = 1;
+ invalid_args = 1;
+ continue;
+ }
+
+ /* If this config appears twice then fail */
+ for (j = 0; j < i; j++) {
+ if (set_configs[j] == config) {
+ /* Note: we don't abort the loop since we still want to handle redacting sensitive configs (above) */
+ errstr = "duplicate parameter";
+ err_arg_name = c->argv[2+i*2]->ptr;
+ invalid_args = 1;
+ break;
+ }
+ }
+ set_configs[i] = config;
+ config_names[i] = config->name;
+ new_values[i] = c->argv[2+i*2+1]->ptr;
+ }
+
+ if (invalid_args) goto err;
+
+ /* Backup old values before setting new ones */
+ for (i = 0; i < config_count; i++)
+ old_values[i] = set_configs[i]->interface.get(set_configs[i]);
+
+ /* Set all new values (don't apply yet) */
+ for (i = 0; i < config_count; i++) {
+ int res = performInterfaceSet(set_configs[i], new_values[i], &errstr);
+ if (!res) {
+ restoreBackupConfig(set_configs, old_values, i+1, NULL, NULL);
+ err_arg_name = set_configs[i]->name;
+ goto err;
+ } else if (res == 1) {
+ /* A new value was set, if this config has an apply function then store it for execution later */
+ if (set_configs[i]->flags & MODULE_CONFIG) {
+ addModuleConfigApply(module_configs_apply, set_configs[i]->privdata);
+ } else if (set_configs[i]->interface.apply) {
+ /* Check if this apply function is already stored */
+ int exists = 0;
+ for (j = 0; apply_fns[j] != NULL && j <= i; j++) {
+ if (apply_fns[j] == set_configs[i]->interface.apply) {
+ exists = 1;
+ break;
+ }
+ }
+ /* Apply function not stored, store it */
+ if (!exists) {
+ apply_fns[j] = set_configs[i]->interface.apply;
+ config_map_fns[j] = i;
+ }
+ }
+ }
+ }
+
+ /* Apply all configs after being set */
+ for (i = 0; i < config_count && apply_fns[i] != NULL; i++) {
+ if (!apply_fns[i](&errstr)) {
+ serverLog(LL_WARNING, "Failed applying new configuration. Possibly related to new %s setting. Restoring previous settings.", set_configs[config_map_fns[i]]->name);
+ restoreBackupConfig(set_configs, old_values, config_count, apply_fns, NULL);
+ err_arg_name = set_configs[config_map_fns[i]]->name;
+ goto err;
+ }
+ }
+ /* Apply all module configs that were set. */
+ if (!moduleConfigApplyConfig(module_configs_apply, &errstr, &err_arg_name)) {
+ serverLogRaw(LL_WARNING, "Failed applying new module configuration. Restoring previous settings.");
+ restoreBackupConfig(set_configs, old_values, config_count, apply_fns, module_configs_apply);
+ goto err;
+ }
+
+ RedisModuleConfigChangeV1 cc = {.num_changes = config_count, .config_names = config_names};
+ moduleFireServerEvent(REDISMODULE_EVENT_CONFIG, REDISMODULE_SUBEVENT_CONFIG_CHANGE, &cc);
+ addReply(c,shared.ok);
+ goto end;
+
+err:
+ if (deny_loading_error) {
+ /* We give the loading error precedence because it may be handled by clients differently, unlike a plain -ERR. */
+ addReplyErrorObject(c,shared.loadingerr);
+ } else if (invalid_arg_name) {
+ addReplyErrorFormat(c,"Unknown option or number of arguments for CONFIG SET - '%s'", invalid_arg_name);
+ } else if (errstr) {
+ addReplyErrorFormat(c,"CONFIG SET failed (possibly related to argument '%s') - %s", err_arg_name, errstr);
+ } else {
+ addReplyErrorFormat(c,"CONFIG SET failed (possibly related to argument '%s')", err_arg_name);
+ }
+end:
+ zfree(set_configs);
+ zfree(config_names);
+ zfree(new_values);
+ for (i = 0; i < config_count; i++)
+ sdsfree(old_values[i]);
+ zfree(old_values);
+ zfree(apply_fns);
+ zfree(config_map_fns);
+ listRelease(module_configs_apply);
+}
+
+/*-----------------------------------------------------------------------------
+ * CONFIG GET implementation
+ *----------------------------------------------------------------------------*/
+
+void configGetCommand(client *c) {
+ int i;
+ dictEntry *de;
+ dictIterator *di;
+ /* Create a dictionary to store the matched configs */
+ dict *matches = dictCreate(&externalStringType);
+ for (i = 0; i < c->argc - 2; i++) {
+ robj *o = c->argv[2+i];
+ sds name = o->ptr;
+
+ /* If the string doesn't contain glob patterns, just directly
+ * look up the key in the dictionary. */
+ if (!strpbrk(name, "[*?")) {
+ if (dictFind(matches, name)) continue;
+ standardConfig *config = lookupConfig(name);
+
+ if (config) {
+ dictAdd(matches, name, config);
+ }
+ continue;
+ }
+
+ /* Otherwise, do a match against all items in the dictionary. */
+ di = dictGetIterator(configs);
+
+ while ((de = dictNext(di)) != NULL) {
+ standardConfig *config = dictGetVal(de);
+ /* Note that hidden configs require an exact match (not a pattern) */
+ if (config->flags & HIDDEN_CONFIG) continue;
+ if (dictFind(matches, config->name)) continue;
+ if (stringmatch(name, dictGetKey(de), 1)) {
+ dictAdd(matches, dictGetKey(de), config);
+ }
+ }
+ dictReleaseIterator(di);
+ }
+
+ di = dictGetIterator(matches);
+ addReplyMapLen(c, dictSize(matches));
+ while ((de = dictNext(di)) != NULL) {
+ standardConfig *config = (standardConfig *) dictGetVal(de);
+ addReplyBulkCString(c, dictGetKey(de));
+ addReplyBulkSds(c, config->interface.get(config));
+ }
+ dictReleaseIterator(di);
+ dictRelease(matches);
+}
+
+/*-----------------------------------------------------------------------------
+ * CONFIG REWRITE implementation
+ *----------------------------------------------------------------------------*/
+
+#define REDIS_CONFIG_REWRITE_SIGNATURE "# Generated by CONFIG REWRITE"
+
+/* We use the following dictionary type to store where a configuration
+ * option is mentioned in the old configuration file, so it's
+ * like "maxmemory" -> list of line numbers (first line is zero). */
+void dictListDestructor(dict *d, void *val);
+
+/* Sentinel config rewriting is implemented inside sentinel.c by
+ * rewriteConfigSentinelOption(). */
+void rewriteConfigSentinelOption(struct rewriteConfigState *state);
+
+dictType optionToLineDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictListDestructor, /* val destructor */
+ NULL /* allow to expand */
+};
+
+dictType optionSetDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* The config rewrite state. */
+struct rewriteConfigState {
+ dict *option_to_line; /* Option -> list of config file lines map */
+ dict *rewritten; /* Dictionary of already processed options */
+ int numlines; /* Number of lines in current config */
+ sds *lines; /* Current lines as an array of sds strings */
+ int needs_signature; /* True if we need to append the rewrite
+ signature. */
+ int force_write; /* True if we want all keywords to be force
+ written. Currently only used for testing
+ and debug information. */
+};
+
+/* Free the configuration rewrite state. */
+void rewriteConfigReleaseState(struct rewriteConfigState *state) {
+ sdsfreesplitres(state->lines,state->numlines);
+ dictRelease(state->option_to_line);
+ dictRelease(state->rewritten);
+ zfree(state);
+}
+
+/* Create the configuration rewrite state */
+struct rewriteConfigState *rewriteConfigCreateState(void) {
+ struct rewriteConfigState *state = zmalloc(sizeof(*state));
+ state->option_to_line = dictCreate(&optionToLineDictType);
+ state->rewritten = dictCreate(&optionSetDictType);
+ state->numlines = 0;
+ state->lines = NULL;
+ state->needs_signature = 1;
+ state->force_write = 0;
+ return state;
+}
+
+/* Append the new line to the current configuration state. */
+void rewriteConfigAppendLine(struct rewriteConfigState *state, sds line) {
+ state->lines = zrealloc(state->lines, sizeof(char*) * (state->numlines+1));
+ state->lines[state->numlines++] = line;
+}
+
+/* Populate the option -> list of line numbers map. */
+void rewriteConfigAddLineNumberToOption(struct rewriteConfigState *state, sds option, int linenum) {
+ list *l = dictFetchValue(state->option_to_line,option);
+
+ if (l == NULL) {
+ l = listCreate();
+ dictAdd(state->option_to_line,sdsdup(option),l);
+ }
+ listAddNodeTail(l,(void*)(long)linenum);
+}
+
+/* Add the specified option to the set of processed options.
+ * This is useful as only unused lines of processed options will be blanked
+ * in the config file, while options the rewrite process does not understand
+ * remain untouched. */
+void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char *option) {
+ sds opt = sdsnew(option);
+
+ if (dictAdd(state->rewritten,opt,NULL) != DICT_OK) sdsfree(opt);
+}
+
+/* Read the old file, split it into lines to populate a newly created
+ * config rewrite state, and return it to the caller.
+ *
+ * If it is impossible to read the old file, NULL is returned.
+ * If the old file does not exist at all, an empty state is returned. */
+struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
+ FILE *fp = fopen(path,"r");
+ if (fp == NULL && errno != ENOENT) return NULL;
+
+ struct redis_stat sb;
+ if (fp && redis_fstat(fileno(fp),&sb) == -1) return NULL;
+
+ int linenum = -1;
+ struct rewriteConfigState *state = rewriteConfigCreateState();
+
+ if (fp == NULL || sb.st_size == 0) return state;
+
+ /* Load the file content */
+ sds config = sdsnewlen(SDS_NOINIT,sb.st_size);
+ if (fread(config,1,sb.st_size,fp) == 0) {
+ sdsfree(config);
+ rewriteConfigReleaseState(state);
+ fclose(fp);
+ return NULL;
+ }
+
+ int i, totlines;
+ sds *lines = sdssplitlen(config,sdslen(config),"\n",1,&totlines);
+
+ /* Read the old content line by line, populate the state. */
+ for (i = 0; i < totlines; i++) {
+ int argc;
+ sds *argv;
+ sds line = sdstrim(lines[i],"\r\n\t ");
+ lines[i] = NULL;
+
+ linenum++; /* Zero based, so we init at -1 */
+
+ /* Handle comments and empty lines. */
+ if (line[0] == '#' || line[0] == '\0') {
+ if (state->needs_signature && !strcmp(line,REDIS_CONFIG_REWRITE_SIGNATURE))
+ state->needs_signature = 0;
+ rewriteConfigAppendLine(state,line);
+ continue;
+ }
+
+ /* Not a comment, split into arguments. */
+ argv = sdssplitargs(line,&argc);
+
+ if (argv == NULL ||
+ (!lookupConfig(argv[0]) &&
+ /* The following is a list of config features that are only supported in
+ * config file parsing and are not recognized by lookupConfig */
+ strcasecmp(argv[0],"include") &&
+ strcasecmp(argv[0],"rename-command") &&
+ strcasecmp(argv[0],"user") &&
+ strcasecmp(argv[0],"loadmodule") &&
+ strcasecmp(argv[0],"sentinel")))
+ {
+ /* The line is either unparsable for some reason, for
+ * instance it may have unbalanced quotes, may contain a
+ * config that doesn't exist anymore, for instance a module that got
+ * unloaded. Load it as a comment. */
+ sds aux = sdsnew("# ??? ");
+ aux = sdscatsds(aux,line);
+ if (argv) sdsfreesplitres(argv, argc);
+ sdsfree(line);
+ rewriteConfigAppendLine(state,aux);
+ continue;
+ }
+
+ sdstolower(argv[0]); /* We only want lowercase config directives. */
+
+ /* Now we populate the state according to the content of this line.
+ * Append the line and populate the option -> line numbers map. */
+ rewriteConfigAppendLine(state,line);
+
+ /* If this is a alias config, replace it with the original name. */
+ standardConfig *s_conf = lookupConfig(argv[0]);
+ if (s_conf && s_conf->flags & ALIAS_CONFIG) {
+ sdsfree(argv[0]);
+ argv[0] = sdsnew(s_conf->alias);
+ }
+
+ /* If this is sentinel config, we use sentinel "sentinel <config>" as option
+ to avoid messing up the sequence. */
+ if (server.sentinel_mode && argc > 1 && !strcasecmp(argv[0],"sentinel")) {
+ sds sentinelOption = sdsempty();
+ sentinelOption = sdscatfmt(sentinelOption,"%S %S",argv[0],argv[1]);
+ rewriteConfigAddLineNumberToOption(state,sentinelOption,linenum);
+ sdsfree(sentinelOption);
+ } else {
+ rewriteConfigAddLineNumberToOption(state,argv[0],linenum);
+ }
+ sdsfreesplitres(argv,argc);
+ }
+ fclose(fp);
+ sdsfreesplitres(lines,totlines);
+ sdsfree(config);
+ return state;
+}
+
+/* Rewrite the specified configuration option with the new "line".
+ * It progressively uses lines of the file that were already used for the same
+ * configuration option in the old version of the file, removing that line from
+ * the map of options -> line numbers.
+ *
+ * If there are lines associated with a given configuration option and
+ * "force" is non-zero, the line is appended to the configuration file.
+ * Usually "force" is true when an option has not its default value, so it
+ * must be rewritten even if not present previously.
+ *
+ * The first time a line is appended into a configuration file, a comment
+ * is added to show that starting from that point the config file was generated
+ * by CONFIG REWRITE.
+ *
+ * "line" is either used, or freed, so the caller does not need to free it
+ * in any way. */
+int rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force) {
+ sds o = sdsnew(option);
+ list *l = dictFetchValue(state->option_to_line,o);
+
+ rewriteConfigMarkAsProcessed(state,option);
+
+ if (!l && !force && !state->force_write) {
+ /* Option not used previously, and we are not forced to use it. */
+ sdsfree(line);
+ sdsfree(o);
+ return 0;
+ }
+
+ if (l) {
+ listNode *ln = listFirst(l);
+ int linenum = (long) ln->value;
+
+ /* There are still lines in the old configuration file we can reuse
+ * for this option. Replace the line with the new one. */
+ listDelNode(l,ln);
+ if (listLength(l) == 0) dictDelete(state->option_to_line,o);
+ sdsfree(state->lines[linenum]);
+ state->lines[linenum] = line;
+ } else {
+ /* Append a new line. */
+ if (state->needs_signature) {
+ rewriteConfigAppendLine(state,
+ sdsnew(REDIS_CONFIG_REWRITE_SIGNATURE));
+ state->needs_signature = 0;
+ }
+ rewriteConfigAppendLine(state,line);
+ }
+ sdsfree(o);
+ return 1;
+}
+
+/* Write the long long 'bytes' value as a string in a way that is parsable
+ * inside redis.conf. If possible uses the GB, MB, KB notation. */
+int rewriteConfigFormatMemory(char *buf, size_t len, long long bytes) {
+ int gb = 1024*1024*1024;
+ int mb = 1024*1024;
+ int kb = 1024;
+
+ if (bytes && (bytes % gb) == 0) {
+ return snprintf(buf,len,"%lldgb",bytes/gb);
+ } else if (bytes && (bytes % mb) == 0) {
+ return snprintf(buf,len,"%lldmb",bytes/mb);
+ } else if (bytes && (bytes % kb) == 0) {
+ return snprintf(buf,len,"%lldkb",bytes/kb);
+ } else {
+ return snprintf(buf,len,"%lld",bytes);
+ }
+}
+
+/* Rewrite a simple "option-name <bytes>" configuration option. */
+void rewriteConfigBytesOption(struct rewriteConfigState *state, const char *option, long long value, long long defvalue) {
+ char buf[64];
+ int force = value != defvalue;
+ sds line;
+
+ rewriteConfigFormatMemory(buf,sizeof(buf),value);
+ line = sdscatprintf(sdsempty(),"%s %s",option,buf);
+ rewriteConfigRewriteLine(state,option,line,force);
+}
+
+/* Rewrite a simple "option-name n%" configuration option. */
+void rewriteConfigPercentOption(struct rewriteConfigState *state, const char *option, long long value, long long defvalue) {
+ int force = value != defvalue;
+ sds line = sdscatprintf(sdsempty(),"%s %lld%%",option,value);
+
+ rewriteConfigRewriteLine(state,option,line,force);
+}
+
+/* Rewrite a yes/no option. */
+void rewriteConfigYesNoOption(struct rewriteConfigState *state, const char *option, int value, int defvalue) {
+ int force = value != defvalue;
+ sds line = sdscatprintf(sdsempty(),"%s %s",option,
+ value ? "yes" : "no");
+
+ rewriteConfigRewriteLine(state,option,line,force);
+}
+
+/* Rewrite a string option. */
+void rewriteConfigStringOption(struct rewriteConfigState *state, const char *option, char *value, const char *defvalue) {
+ int force = 1;
+ sds line;
+
+ /* String options set to NULL need to be not present at all in the
+ * configuration file to be set to NULL again at the next reboot. */
+ if (value == NULL) {
+ rewriteConfigMarkAsProcessed(state,option);
+ return;
+ }
+
+ /* Set force to zero if the value is set to its default. */
+ if (defvalue && strcmp(value,defvalue) == 0) force = 0;
+
+ line = sdsnew(option);
+ line = sdscatlen(line, " ", 1);
+ line = sdscatrepr(line, value, strlen(value));
+
+ rewriteConfigRewriteLine(state,option,line,force);
+}
+
+/* Rewrite a SDS string option. */
+void rewriteConfigSdsOption(struct rewriteConfigState *state, const char *option, sds value, const char *defvalue) {
+ int force = 1;
+ sds line;
+
+ /* If there is no value set, we don't want the SDS option
+ * to be present in the configuration at all. */
+ if (value == NULL) {
+ rewriteConfigMarkAsProcessed(state, option);
+ return;
+ }
+
+ /* Set force to zero if the value is set to its default. */
+ if (defvalue && strcmp(value, defvalue) == 0) force = 0;
+
+ line = sdsnew(option);
+ line = sdscatlen(line, " ", 1);
+ line = sdscatrepr(line, value, sdslen(value));
+
+ rewriteConfigRewriteLine(state, option, line, force);
+}
+
+/* Rewrite a numerical (long long range) option. */
+void rewriteConfigNumericalOption(struct rewriteConfigState *state, const char *option, long long value, long long defvalue) {
+ int force = value != defvalue;
+ sds line = sdscatprintf(sdsempty(),"%s %lld",option,value);
+
+ rewriteConfigRewriteLine(state,option,line,force);
+}
+
+/* Rewrite an octal option. */
+void rewriteConfigOctalOption(struct rewriteConfigState *state, const char *option, long long value, long long defvalue) {
+ int force = value != defvalue;
+ sds line = sdscatprintf(sdsempty(),"%s %llo",option,value);
+
+ rewriteConfigRewriteLine(state,option,line,force);
+}
+
+/* Rewrite an enumeration option. It takes as usually state and option name,
+ * and in addition the enumeration array and the default value for the
+ * option. */
+void rewriteConfigEnumOption(struct rewriteConfigState *state, const char *option, int value, standardConfig *config) {
+ int multiarg = config->flags & MULTI_ARG_CONFIG;
+ sds names = configEnumGetName(config->data.enumd.enum_value,value,multiarg);
+ sds line = sdscatfmt(sdsempty(),"%s %s",option,names);
+ sdsfree(names);
+ int force = value != config->data.enumd.default_value;
+
+ rewriteConfigRewriteLine(state,option,line,force);
+}
+
+/* Rewrite the save option. */
+void rewriteConfigSaveOption(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ UNUSED(config);
+ int j;
+ sds line;
+
+ /* In Sentinel mode we don't need to rewrite the save parameters */
+ if (server.sentinel_mode) {
+ rewriteConfigMarkAsProcessed(state,name);
+ return;
+ }
+
+ /* Rewrite save parameters, or an empty 'save ""' line to avoid the
+ * defaults from being used.
+ */
+ if (!server.saveparamslen) {
+ rewriteConfigRewriteLine(state,name,sdsnew("save \"\""),1);
+ } else {
+ for (j = 0; j < server.saveparamslen; j++) {
+ line = sdscatprintf(sdsempty(),"save %ld %d",
+ (long) server.saveparams[j].seconds, server.saveparams[j].changes);
+ rewriteConfigRewriteLine(state,name,line,1);
+ }
+ }
+
+ /* Mark "save" as processed in case server.saveparamslen is zero. */
+ rewriteConfigMarkAsProcessed(state,name);
+}
+
+/* Rewrite the user option. */
+void rewriteConfigUserOption(struct rewriteConfigState *state) {
+ /* If there is a user file defined we just mark this configuration
+ * directive as processed, so that all the lines containing users
+ * inside the config file gets discarded. */
+ if (server.acl_filename[0] != '\0') {
+ rewriteConfigMarkAsProcessed(state,"user");
+ return;
+ }
+
+ /* Otherwise scan the list of users and rewrite every line. Note that
+ * in case the list here is empty, the effect will just be to comment
+ * all the users directive inside the config file. */
+ raxIterator ri;
+ raxStart(&ri,Users);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ user *u = ri.data;
+ sds line = sdsnew("user ");
+ line = sdscatsds(line,u->name);
+ line = sdscatlen(line," ",1);
+ robj *descr = ACLDescribeUser(u);
+ line = sdscatsds(line,descr->ptr);
+ decrRefCount(descr);
+ rewriteConfigRewriteLine(state,"user",line,1);
+ }
+ raxStop(&ri);
+
+ /* Mark "user" as processed in case there are no defined users. */
+ rewriteConfigMarkAsProcessed(state,"user");
+}
+
+/* Rewrite the dir option, always using absolute paths.*/
+void rewriteConfigDirOption(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ UNUSED(config);
+ char cwd[1024];
+
+ if (getcwd(cwd,sizeof(cwd)) == NULL) {
+ rewriteConfigMarkAsProcessed(state,name);
+ return; /* no rewrite on error. */
+ }
+ rewriteConfigStringOption(state,name,cwd,NULL);
+}
+
+/* Rewrite the slaveof option. */
+void rewriteConfigReplicaOfOption(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ UNUSED(config);
+ sds line;
+
+ /* If this is a master, we want all the slaveof config options
+ * in the file to be removed. Note that if this is a cluster instance
+ * we don't want a slaveof directive inside redis.conf. */
+ if (server.cluster_enabled || server.masterhost == NULL) {
+ rewriteConfigMarkAsProcessed(state, name);
+ return;
+ }
+ line = sdscatprintf(sdsempty(),"%s %s %d", name,
+ server.masterhost, server.masterport);
+ rewriteConfigRewriteLine(state,name,line,1);
+}
+
+/* Rewrite the notify-keyspace-events option. */
+void rewriteConfigNotifyKeyspaceEventsOption(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ UNUSED(config);
+ int force = server.notify_keyspace_events != 0;
+ sds line, flags;
+
+ flags = keyspaceEventsFlagsToString(server.notify_keyspace_events);
+ line = sdsnew(name);
+ line = sdscatlen(line, " ", 1);
+ line = sdscatrepr(line, flags, sdslen(flags));
+ sdsfree(flags);
+ rewriteConfigRewriteLine(state,name,line,force);
+}
+
+/* Rewrite the client-output-buffer-limit option. */
+void rewriteConfigClientOutputBufferLimitOption(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ UNUSED(config);
+ int j;
+ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) {
+ int force = (server.client_obuf_limits[j].hard_limit_bytes !=
+ clientBufferLimitsDefaults[j].hard_limit_bytes) ||
+ (server.client_obuf_limits[j].soft_limit_bytes !=
+ clientBufferLimitsDefaults[j].soft_limit_bytes) ||
+ (server.client_obuf_limits[j].soft_limit_seconds !=
+ clientBufferLimitsDefaults[j].soft_limit_seconds);
+ sds line;
+ char hard[64], soft[64];
+
+ rewriteConfigFormatMemory(hard,sizeof(hard),
+ server.client_obuf_limits[j].hard_limit_bytes);
+ rewriteConfigFormatMemory(soft,sizeof(soft),
+ server.client_obuf_limits[j].soft_limit_bytes);
+
+ char *typename = getClientTypeName(j);
+ if (!strcmp(typename,"slave")) typename = "replica";
+ line = sdscatprintf(sdsempty(),"%s %s %s %s %ld",
+ name, typename, hard, soft,
+ (long) server.client_obuf_limits[j].soft_limit_seconds);
+ rewriteConfigRewriteLine(state,name,line,force);
+ }
+}
+
+/* Rewrite the oom-score-adj-values option. */
+void rewriteConfigOOMScoreAdjValuesOption(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ UNUSED(config);
+ int force = 0;
+ int j;
+ sds line;
+
+ line = sdsnew(name);
+ line = sdscatlen(line, " ", 1);
+ for (j = 0; j < CONFIG_OOM_COUNT; j++) {
+ if (server.oom_score_adj_values[j] != configOOMScoreAdjValuesDefaults[j])
+ force = 1;
+
+ line = sdscatprintf(line, "%d", server.oom_score_adj_values[j]);
+ if (j+1 != CONFIG_OOM_COUNT)
+ line = sdscatlen(line, " ", 1);
+ }
+ rewriteConfigRewriteLine(state,name,line,force);
+}
+
+/* Rewrite the bind option. */
+void rewriteConfigBindOption(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ UNUSED(config);
+ int force = 1;
+ sds line, addresses;
+ int is_default = 0;
+
+ /* Compare server.bindaddr with CONFIG_DEFAULT_BINDADDR */
+ if (server.bindaddr_count == CONFIG_DEFAULT_BINDADDR_COUNT) {
+ is_default = 1;
+ char *default_bindaddr[CONFIG_DEFAULT_BINDADDR_COUNT] = CONFIG_DEFAULT_BINDADDR;
+ for (int j = 0; j < CONFIG_DEFAULT_BINDADDR_COUNT; j++) {
+ if (strcmp(server.bindaddr[j], default_bindaddr[j]) != 0) {
+ is_default = 0;
+ break;
+ }
+ }
+ }
+
+ if (is_default) {
+ rewriteConfigMarkAsProcessed(state,name);
+ return;
+ }
+
+ /* Rewrite as bind <addr1> <addr2> ... <addrN> */
+ if (server.bindaddr_count > 0)
+ addresses = sdsjoin(server.bindaddr,server.bindaddr_count," ");
+ else
+ addresses = sdsnew("\"\"");
+ line = sdsnew(name);
+ line = sdscatlen(line, " ", 1);
+ line = sdscatsds(line, addresses);
+ sdsfree(addresses);
+
+ rewriteConfigRewriteLine(state,name,line,force);
+}
+
+/* Rewrite the loadmodule option. */
+void rewriteConfigLoadmoduleOption(struct rewriteConfigState *state) {
+ sds line;
+
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+ while ((de = dictNext(di)) != NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ line = sdsnew("loadmodule ");
+ line = sdscatsds(line, module->loadmod->path);
+ for (int i = 0; i < module->loadmod->argc; i++) {
+ line = sdscatlen(line, " ", 1);
+ line = sdscatsds(line, module->loadmod->argv[i]->ptr);
+ }
+ rewriteConfigRewriteLine(state,"loadmodule",line,1);
+ }
+ dictReleaseIterator(di);
+ /* Mark "loadmodule" as processed in case modules is empty. */
+ rewriteConfigMarkAsProcessed(state,"loadmodule");
+}
+
+/* Glue together the configuration lines in the current configuration
+ * rewrite state into a single string, stripping multiple empty lines. */
+sds rewriteConfigGetContentFromState(struct rewriteConfigState *state) {
+ sds content = sdsempty();
+ int j, was_empty = 0;
+
+ for (j = 0; j < state->numlines; j++) {
+ /* Every cluster of empty lines is turned into a single empty line. */
+ if (sdslen(state->lines[j]) == 0) {
+ if (was_empty) continue;
+ was_empty = 1;
+ } else {
+ was_empty = 0;
+ }
+ content = sdscatsds(content,state->lines[j]);
+ content = sdscatlen(content,"\n",1);
+ }
+ return content;
+}
+
+/* At the end of the rewrite process the state contains the remaining
+ * map between "option name" => "lines in the original config file".
+ * Lines used by the rewrite process were removed by the function
+ * rewriteConfigRewriteLine(), all the other lines are "orphaned" and
+ * should be replaced by empty lines.
+ *
+ * This function does just this, iterating all the option names and
+ * blanking all the lines still associated. */
+void rewriteConfigRemoveOrphaned(struct rewriteConfigState *state) {
+ dictIterator *di = dictGetIterator(state->option_to_line);
+ dictEntry *de;
+
+ while((de = dictNext(di)) != NULL) {
+ list *l = dictGetVal(de);
+ sds option = dictGetKey(de);
+
+ /* Don't blank lines about options the rewrite process
+ * don't understand. */
+ if (dictFind(state->rewritten,option) == NULL) {
+ serverLog(LL_DEBUG,"Not rewritten option: %s", option);
+ continue;
+ }
+
+ while(listLength(l)) {
+ listNode *ln = listFirst(l);
+ int linenum = (long) ln->value;
+
+ sdsfree(state->lines[linenum]);
+ state->lines[linenum] = sdsempty();
+ listDelNode(l,ln);
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* This function returns a string representation of all the config options
+ * marked with DEBUG_CONFIG, which can be used to help with debugging. */
+sds getConfigDebugInfo(void) {
+ struct rewriteConfigState *state = rewriteConfigCreateState();
+ state->force_write = 1; /* Force the output */
+ state->needs_signature = 0; /* Omit the rewrite signature */
+
+ /* Iterate the configs and "rewrite" the ones that have
+ * the debug flag. */
+ dictIterator *di = dictGetIterator(configs);
+ dictEntry *de;
+ while ((de = dictNext(di)) != NULL) {
+ standardConfig *config = dictGetVal(de);
+ if (!(config->flags & DEBUG_CONFIG)) continue;
+ config->interface.rewrite(config, config->name, state);
+ }
+ dictReleaseIterator(di);
+ sds info = rewriteConfigGetContentFromState(state);
+ rewriteConfigReleaseState(state);
+ return info;
+}
+
+/* This function replaces the old configuration file with the new content
+ * in an atomic manner.
+ *
+ * The function returns 0 on success, otherwise -1 is returned and errno
+ * is set accordingly. */
+int rewriteConfigOverwriteFile(char *configfile, sds content) {
+ int fd = -1;
+ int retval = -1;
+ char tmp_conffile[PATH_MAX];
+ const char *tmp_suffix = ".XXXXXX";
+ size_t offset = 0;
+ ssize_t written_bytes = 0;
+ int old_errno;
+
+ int tmp_path_len = snprintf(tmp_conffile, sizeof(tmp_conffile), "%s%s", configfile, tmp_suffix);
+ if (tmp_path_len <= 0 || (unsigned int)tmp_path_len >= sizeof(tmp_conffile)) {
+ serverLog(LL_WARNING, "Config file full path is too long");
+ errno = ENAMETOOLONG;
+ return retval;
+ }
+
+#if defined(_GNU_SOURCE) && !defined(__HAIKU__)
+ fd = mkostemp(tmp_conffile, O_CLOEXEC);
+#else
+ /* There's a theoretical chance here to leak the FD if a module thread forks & execv in the middle */
+ fd = mkstemp(tmp_conffile);
+#endif
+
+ if (fd == -1) {
+ serverLog(LL_WARNING, "Could not create tmp config file (%s)", strerror(errno));
+ return retval;
+ }
+
+ while (offset < sdslen(content)) {
+ written_bytes = write(fd, content + offset, sdslen(content) - offset);
+ if (written_bytes <= 0) {
+ if (errno == EINTR) continue; /* FD is blocking, no other retryable errors */
+ serverLog(LL_WARNING, "Failed after writing (%zd) bytes to tmp config file (%s)", offset, strerror(errno));
+ goto cleanup;
+ }
+ offset+=written_bytes;
+ }
+
+ if (fsync(fd))
+ serverLog(LL_WARNING, "Could not sync tmp config file to disk (%s)", strerror(errno));
+ else if (fchmod(fd, 0644 & ~server.umask) == -1)
+ serverLog(LL_WARNING, "Could not chmod config file (%s)", strerror(errno));
+ else if (rename(tmp_conffile, configfile) == -1)
+ serverLog(LL_WARNING, "Could not rename tmp config file (%s)", strerror(errno));
+ else if (fsyncFileDir(configfile) == -1)
+ serverLog(LL_WARNING, "Could not sync config file dir (%s)", strerror(errno));
+ else {
+ retval = 0;
+ serverLog(LL_DEBUG, "Rewritten config file (%s) successfully", configfile);
+ }
+
+cleanup:
+ old_errno = errno;
+ close(fd);
+ if (retval) unlink(tmp_conffile);
+ errno = old_errno;
+ return retval;
+}
+
+/* Rewrite the configuration file at "path".
+ * If the configuration file already exists, we try at best to retain comments
+ * and overall structure.
+ *
+ * Configuration parameters that are at their default value, unless already
+ * explicitly included in the old configuration file, are not rewritten.
+ * The force_write flag overrides this behavior and forces everything to be
+ * written. This is currently only used for testing purposes.
+ *
+ * On error -1 is returned and errno is set accordingly, otherwise 0. */
+int rewriteConfig(char *path, int force_write) {
+ struct rewriteConfigState *state;
+ sds newcontent;
+ int retval;
+
+ /* Step 1: read the old config into our rewrite state. */
+ if ((state = rewriteConfigReadOldFile(path)) == NULL) return -1;
+ if (force_write) state->force_write = 1;
+
+ /* Step 2: rewrite every single option, replacing or appending it inside
+ * the rewrite state. */
+
+ /* Iterate the configs that are standard */
+ dictIterator *di = dictGetIterator(configs);
+ dictEntry *de;
+ while ((de = dictNext(di)) != NULL) {
+ standardConfig *config = dictGetVal(de);
+ /* Only rewrite the primary names */
+ if (config->flags & ALIAS_CONFIG) continue;
+ if (config->interface.rewrite) config->interface.rewrite(config, dictGetKey(de), state);
+ }
+ dictReleaseIterator(di);
+
+ rewriteConfigUserOption(state);
+ rewriteConfigLoadmoduleOption(state);
+
+ /* Rewrite Sentinel config if in Sentinel mode. */
+ if (server.sentinel_mode) rewriteConfigSentinelOption(state);
+
+ /* Step 3: remove all the orphaned lines in the old file, that is, lines
+ * that were used by a config option and are no longer used, like in case
+ * of multiple "save" options or duplicated options. */
+ rewriteConfigRemoveOrphaned(state);
+
+ /* Step 4: generate a new configuration file from the modified state
+ * and write it into the original file. */
+ newcontent = rewriteConfigGetContentFromState(state);
+ retval = rewriteConfigOverwriteFile(server.configfile,newcontent);
+
+ sdsfree(newcontent);
+ rewriteConfigReleaseState(state);
+ return retval;
+}
+
+/*-----------------------------------------------------------------------------
+ * Configs that fit one of the major types and require no special handling
+ *----------------------------------------------------------------------------*/
+#define LOADBUF_SIZE 256
+static char loadbuf[LOADBUF_SIZE];
+
+#define embedCommonConfig(config_name, config_alias, config_flags) \
+ .name = (config_name), \
+ .alias = (config_alias), \
+ .flags = (config_flags),
+
+#define embedConfigInterface(initfn, setfn, getfn, rewritefn, applyfn) .interface = { \
+ .init = (initfn), \
+ .set = (setfn), \
+ .get = (getfn), \
+ .rewrite = (rewritefn), \
+ .apply = (applyfn) \
+},
+
+/* What follows is the generic config types that are supported. To add a new
+ * config with one of these types, add it to the standardConfig table with
+ * the creation macro for each type.
+ *
+ * Each type contains the following:
+ * * A function defining how to load this type on startup.
+ * * A function defining how to update this type on CONFIG SET.
+ * * A function defining how to serialize this type on CONFIG SET.
+ * * A function defining how to rewrite this type on CONFIG REWRITE.
+ * * A Macro defining how to create this type.
+ */
+
+/* Bool Configs */
+static void boolConfigInit(standardConfig *config) {
+ *config->data.yesno.config = config->data.yesno.default_value;
+}
+
+static int boolConfigSet(standardConfig *config, sds *argv, int argc, const char **err) {
+ UNUSED(argc);
+ int yn = yesnotoi(argv[0]);
+ if (yn == -1) {
+ *err = "argument must be 'yes' or 'no'";
+ return 0;
+ }
+ if (config->data.yesno.is_valid_fn && !config->data.yesno.is_valid_fn(yn, err))
+ return 0;
+ int prev = config->flags & MODULE_CONFIG ? getModuleBoolConfig(config->privdata) : *(config->data.yesno.config);
+ if (prev != yn) {
+ if (config->flags & MODULE_CONFIG) {
+ return setModuleBoolConfig(config->privdata, yn, err);
+ }
+ *(config->data.yesno.config) = yn;
+ return 1;
+ }
+ return (config->flags & VOLATILE_CONFIG) ? 1 : 2;
+}
+
+static sds boolConfigGet(standardConfig *config) {
+ if (config->flags & MODULE_CONFIG) {
+ return sdsnew(getModuleBoolConfig(config->privdata) ? "yes" : "no");
+ }
+ return sdsnew(*config->data.yesno.config ? "yes" : "no");
+}
+
+static void boolConfigRewrite(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ int val = config->flags & MODULE_CONFIG ? getModuleBoolConfig(config->privdata) : *(config->data.yesno.config);
+ rewriteConfigYesNoOption(state, name, val, config->data.yesno.default_value);
+}
+
+#define createBoolConfig(name, alias, flags, config_addr, default, is_valid, apply) { \
+ embedCommonConfig(name, alias, flags) \
+ embedConfigInterface(boolConfigInit, boolConfigSet, boolConfigGet, boolConfigRewrite, apply) \
+ .type = BOOL_CONFIG, \
+ .data.yesno = { \
+ .config = &(config_addr), \
+ .default_value = (default), \
+ .is_valid_fn = (is_valid), \
+ } \
+}
+
+/* String Configs */
+static void stringConfigInit(standardConfig *config) {
+ *config->data.string.config = (config->data.string.convert_empty_to_null && !config->data.string.default_value) ? NULL : zstrdup(config->data.string.default_value);
+}
+
+static int stringConfigSet(standardConfig *config, sds *argv, int argc, const char **err) {
+ UNUSED(argc);
+ if (config->data.string.is_valid_fn && !config->data.string.is_valid_fn(argv[0], err))
+ return 0;
+ char *prev = *config->data.string.config;
+ char *new = (config->data.string.convert_empty_to_null && !argv[0][0]) ? NULL : argv[0];
+ if (new != prev && (new == NULL || prev == NULL || strcmp(prev, new))) {
+ *config->data.string.config = new != NULL ? zstrdup(new) : NULL;
+ zfree(prev);
+ return 1;
+ }
+ return (config->flags & VOLATILE_CONFIG) ? 1 : 2;
+}
+
+static sds stringConfigGet(standardConfig *config) {
+ return sdsnew(*config->data.string.config ? *config->data.string.config : "");
+}
+
+static void stringConfigRewrite(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ rewriteConfigStringOption(state, name,*(config->data.string.config), config->data.string.default_value);
+}
+
+/* SDS Configs */
+static void sdsConfigInit(standardConfig *config) {
+ *config->data.sds.config = (config->data.sds.convert_empty_to_null && !config->data.sds.default_value) ? NULL : sdsnew(config->data.sds.default_value);
+}
+
+static int sdsConfigSet(standardConfig *config, sds *argv, int argc, const char **err) {
+ UNUSED(argc);
+ if (config->data.sds.is_valid_fn && !config->data.sds.is_valid_fn(argv[0], err))
+ return 0;
+
+ sds prev = config->flags & MODULE_CONFIG ? getModuleStringConfig(config->privdata) : *config->data.sds.config;
+ sds new = (config->data.string.convert_empty_to_null && (sdslen(argv[0]) == 0)) ? NULL : argv[0];
+
+ /* if prev and new configuration are not equal, set the new one */
+ if (new != prev && (new == NULL || prev == NULL || sdscmp(prev, new))) {
+ /* If MODULE_CONFIG flag is set, then free temporary prev getModuleStringConfig returned.
+ * Otherwise, free the actual previous config value Redis held (Same action, different reasons) */
+ sdsfree(prev);
+
+ if (config->flags & MODULE_CONFIG) {
+ return setModuleStringConfig(config->privdata, new, err);
+ }
+ *config->data.sds.config = new != NULL ? sdsdup(new) : NULL;
+ return 1;
+ }
+ if (config->flags & MODULE_CONFIG && prev) sdsfree(prev);
+ return (config->flags & VOLATILE_CONFIG) ? 1 : 2;
+}
+
+static sds sdsConfigGet(standardConfig *config) {
+ sds val = config->flags & MODULE_CONFIG ? getModuleStringConfig(config->privdata) : *config->data.sds.config;
+ if (val) {
+ if (config->flags & MODULE_CONFIG) return val;
+ return sdsdup(val);
+ } else {
+ return sdsnew("");
+ }
+}
+
+static void sdsConfigRewrite(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ sds val = config->flags & MODULE_CONFIG ? getModuleStringConfig(config->privdata) : *config->data.sds.config;
+ rewriteConfigSdsOption(state, name, val, config->data.sds.default_value);
+ if ((val) && (config->flags & MODULE_CONFIG)) sdsfree(val);
+}
+
+
+#define ALLOW_EMPTY_STRING 0
+#define EMPTY_STRING_IS_NULL 1
+
+#define createStringConfig(name, alias, flags, empty_to_null, config_addr, default, is_valid, apply) { \
+ embedCommonConfig(name, alias, flags) \
+ embedConfigInterface(stringConfigInit, stringConfigSet, stringConfigGet, stringConfigRewrite, apply) \
+ .type = STRING_CONFIG, \
+ .data.string = { \
+ .config = &(config_addr), \
+ .default_value = (default), \
+ .is_valid_fn = (is_valid), \
+ .convert_empty_to_null = (empty_to_null), \
+ } \
+}
+
+#define createSDSConfig(name, alias, flags, empty_to_null, config_addr, default, is_valid, apply) { \
+ embedCommonConfig(name, alias, flags) \
+ embedConfigInterface(sdsConfigInit, sdsConfigSet, sdsConfigGet, sdsConfigRewrite, apply) \
+ .type = SDS_CONFIG, \
+ .data.sds = { \
+ .config = &(config_addr), \
+ .default_value = (default), \
+ .is_valid_fn = (is_valid), \
+ .convert_empty_to_null = (empty_to_null), \
+ } \
+}
+
+/* Enum configs */
+static void enumConfigInit(standardConfig *config) {
+ *config->data.enumd.config = config->data.enumd.default_value;
+}
+
+static int enumConfigSet(standardConfig *config, sds *argv, int argc, const char **err) {
+ int enumval;
+ int bitflags = !!(config->flags & MULTI_ARG_CONFIG);
+ enumval = configEnumGetValue(config->data.enumd.enum_value, argv, argc, bitflags);
+
+ if (enumval == INT_MIN) {
+ sds enumerr = sdsnew("argument(s) must be one of the following: ");
+ configEnum *enumNode = config->data.enumd.enum_value;
+ while(enumNode->name != NULL) {
+ enumerr = sdscatlen(enumerr, enumNode->name,
+ strlen(enumNode->name));
+ enumerr = sdscatlen(enumerr, ", ", 2);
+ enumNode++;
+ }
+ sdsrange(enumerr,0,-3); /* Remove final ", ". */
+
+ redis_strlcpy(loadbuf, enumerr, LOADBUF_SIZE);
+
+ sdsfree(enumerr);
+ *err = loadbuf;
+ return 0;
+ }
+ if (config->data.enumd.is_valid_fn && !config->data.enumd.is_valid_fn(enumval, err))
+ return 0;
+ int prev = config->flags & MODULE_CONFIG ? getModuleEnumConfig(config->privdata) : *(config->data.enumd.config);
+ if (prev != enumval) {
+ if (config->flags & MODULE_CONFIG)
+ return setModuleEnumConfig(config->privdata, enumval, err);
+ *(config->data.enumd.config) = enumval;
+ return 1;
+ }
+ return (config->flags & VOLATILE_CONFIG) ? 1 : 2;
+}
+
+static sds enumConfigGet(standardConfig *config) {
+ int val = config->flags & MODULE_CONFIG ? getModuleEnumConfig(config->privdata) : *(config->data.enumd.config);
+ int bitflags = !!(config->flags & MULTI_ARG_CONFIG);
+ return configEnumGetName(config->data.enumd.enum_value,val,bitflags);
+}
+
+static void enumConfigRewrite(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ int val = config->flags & MODULE_CONFIG ? getModuleEnumConfig(config->privdata) : *(config->data.enumd.config);
+ rewriteConfigEnumOption(state, name, val, config);
+}
+
+#define createEnumConfig(name, alias, flags, enum, config_addr, default, is_valid, apply) { \
+ embedCommonConfig(name, alias, flags) \
+ embedConfigInterface(enumConfigInit, enumConfigSet, enumConfigGet, enumConfigRewrite, apply) \
+ .type = ENUM_CONFIG, \
+ .data.enumd = { \
+ .config = &(config_addr), \
+ .default_value = (default), \
+ .is_valid_fn = (is_valid), \
+ .enum_value = (enum), \
+ } \
+}
+
+/* Gets a 'long long val' and sets it into the union, using a macro to get
+ * compile time type check. */
+int setNumericType(standardConfig *config, long long val, const char **err) {
+ if (config->data.numeric.numeric_type == NUMERIC_TYPE_INT) {
+ *(config->data.numeric.config.i) = (int) val;
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_UINT) {
+ *(config->data.numeric.config.ui) = (unsigned int) val;
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_LONG) {
+ *(config->data.numeric.config.l) = (long) val;
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_ULONG) {
+ *(config->data.numeric.config.ul) = (unsigned long) val;
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_LONG_LONG) {
+ if (config->flags & MODULE_CONFIG)
+ return setModuleNumericConfig(config->privdata, val, err);
+ else *(config->data.numeric.config.ll) = (long long) val;
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG) {
+ *(config->data.numeric.config.ull) = (unsigned long long) val;
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) {
+ *(config->data.numeric.config.st) = (size_t) val;
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_SSIZE_T) {
+ *(config->data.numeric.config.sst) = (ssize_t) val;
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_OFF_T) {
+ *(config->data.numeric.config.ot) = (off_t) val;
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_TIME_T) {
+ *(config->data.numeric.config.tt) = (time_t) val;
+ }
+ return 1;
+}
+
+/* Gets a 'long long val' and sets it with the value from the union, using a
+ * macro to get compile time type check. */
+#define GET_NUMERIC_TYPE(val) \
+ if (config->data.numeric.numeric_type == NUMERIC_TYPE_INT) { \
+ val = *(config->data.numeric.config.i); \
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_UINT) { \
+ val = *(config->data.numeric.config.ui); \
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_LONG) { \
+ val = *(config->data.numeric.config.l); \
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_ULONG) { \
+ val = *(config->data.numeric.config.ul); \
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_LONG_LONG) { \
+ if (config->flags & MODULE_CONFIG) val = getModuleNumericConfig(config->privdata); \
+ else val = *(config->data.numeric.config.ll); \
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG) { \
+ val = *(config->data.numeric.config.ull); \
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) { \
+ val = *(config->data.numeric.config.st); \
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_SSIZE_T) { \
+ val = *(config->data.numeric.config.sst); \
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_OFF_T) { \
+ val = *(config->data.numeric.config.ot); \
+ } else if (config->data.numeric.numeric_type == NUMERIC_TYPE_TIME_T) { \
+ val = *(config->data.numeric.config.tt); \
+ }
+
+/* Numeric configs */
+static void numericConfigInit(standardConfig *config) {
+ setNumericType(config, config->data.numeric.default_value, NULL);
+}
+
+static int numericBoundaryCheck(standardConfig *config, long long ll, const char **err) {
+ if (config->data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG ||
+ config->data.numeric.numeric_type == NUMERIC_TYPE_UINT ||
+ config->data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) {
+ /* Boundary check for unsigned types */
+ unsigned long long ull = ll;
+ unsigned long long upper_bound = config->data.numeric.upper_bound;
+ unsigned long long lower_bound = config->data.numeric.lower_bound;
+ if (ull > upper_bound || ull < lower_bound) {
+ if (config->data.numeric.flags & OCTAL_CONFIG) {
+ snprintf(loadbuf, LOADBUF_SIZE,
+ "argument must be between %llo and %llo inclusive",
+ lower_bound,
+ upper_bound);
+ } else {
+ snprintf(loadbuf, LOADBUF_SIZE,
+ "argument must be between %llu and %llu inclusive",
+ lower_bound,
+ upper_bound);
+ }
+ *err = loadbuf;
+ return 0;
+ }
+ } else {
+ /* Boundary check for percentages */
+ if (config->data.numeric.flags & PERCENT_CONFIG && ll < 0) {
+ if (ll < config->data.numeric.lower_bound) {
+ snprintf(loadbuf, LOADBUF_SIZE,
+ "percentage argument must be less or equal to %lld",
+ -config->data.numeric.lower_bound);
+ *err = loadbuf;
+ return 0;
+ }
+ }
+ /* Boundary check for signed types */
+ else if (ll > config->data.numeric.upper_bound || ll < config->data.numeric.lower_bound) {
+ snprintf(loadbuf, LOADBUF_SIZE,
+ "argument must be between %lld and %lld inclusive",
+ config->data.numeric.lower_bound,
+ config->data.numeric.upper_bound);
+ *err = loadbuf;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int numericParseString(standardConfig *config, sds value, const char **err, long long *res) {
+ /* First try to parse as memory */
+ if (config->data.numeric.flags & MEMORY_CONFIG) {
+ int memerr;
+ *res = memtoull(value, &memerr);
+ if (!memerr)
+ return 1;
+ }
+
+ /* Attempt to parse as percent */
+ if (config->data.numeric.flags & PERCENT_CONFIG &&
+ sdslen(value) > 1 && value[sdslen(value)-1] == '%' &&
+ string2ll(value, sdslen(value)-1, res) &&
+ *res >= 0) {
+ /* We store percentage as negative value */
+ *res = -*res;
+ return 1;
+ }
+
+ /* Attempt to parse as an octal number */
+ if (config->data.numeric.flags & OCTAL_CONFIG) {
+ char *endptr;
+ errno = 0;
+ *res = strtoll(value, &endptr, 8);
+ if (errno == 0 && *endptr == '\0')
+ return 1; /* No overflow or invalid characters */
+ }
+
+ /* Attempt a simple number (no special flags set) */
+ if (!config->data.numeric.flags && string2ll(value, sdslen(value), res))
+ return 1;
+
+ /* Select appropriate error string */
+ if (config->data.numeric.flags & MEMORY_CONFIG &&
+ config->data.numeric.flags & PERCENT_CONFIG)
+ *err = "argument must be a memory or percent value" ;
+ else if (config->data.numeric.flags & MEMORY_CONFIG)
+ *err = "argument must be a memory value";
+ else if (config->data.numeric.flags & OCTAL_CONFIG)
+ *err = "argument couldn't be parsed as an octal number";
+ else
+ *err = "argument couldn't be parsed into an integer";
+ return 0;
+}
+
+static int numericConfigSet(standardConfig *config, sds *argv, int argc, const char **err) {
+ UNUSED(argc);
+ long long ll, prev = 0;
+
+ if (!numericParseString(config, argv[0], err, &ll))
+ return 0;
+
+ if (!numericBoundaryCheck(config, ll, err))
+ return 0;
+
+ if (config->data.numeric.is_valid_fn && !config->data.numeric.is_valid_fn(ll, err))
+ return 0;
+
+ GET_NUMERIC_TYPE(prev)
+ if (prev != ll) {
+ return setNumericType(config, ll, err);
+ }
+
+ return (config->flags & VOLATILE_CONFIG) ? 1 : 2;
+}
+
+static sds numericConfigGet(standardConfig *config) {
+ char buf[128];
+
+ long long value = 0;
+ GET_NUMERIC_TYPE(value)
+
+ if (config->data.numeric.flags & PERCENT_CONFIG && value < 0) {
+ int len = ll2string(buf, sizeof(buf), -value);
+ buf[len] = '%';
+ buf[len+1] = '\0';
+ }
+ else if (config->data.numeric.flags & MEMORY_CONFIG) {
+ ull2string(buf, sizeof(buf), value);
+ } else if (config->data.numeric.flags & OCTAL_CONFIG) {
+ snprintf(buf, sizeof(buf), "%llo", value);
+ } else {
+ ll2string(buf, sizeof(buf), value);
+ }
+ return sdsnew(buf);
+}
+
+static void numericConfigRewrite(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ long long value = 0;
+
+ GET_NUMERIC_TYPE(value)
+
+ if (config->data.numeric.flags & PERCENT_CONFIG && value < 0) {
+ rewriteConfigPercentOption(state, name, -value, config->data.numeric.default_value);
+ } else if (config->data.numeric.flags & MEMORY_CONFIG) {
+ rewriteConfigBytesOption(state, name, value, config->data.numeric.default_value);
+ } else if (config->data.numeric.flags & OCTAL_CONFIG) {
+ rewriteConfigOctalOption(state, name, value, config->data.numeric.default_value);
+ } else {
+ rewriteConfigNumericalOption(state, name, value, config->data.numeric.default_value);
+ }
+}
+
+#define embedCommonNumericalConfig(name, alias, _flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) { \
+ embedCommonConfig(name, alias, _flags) \
+ embedConfigInterface(numericConfigInit, numericConfigSet, numericConfigGet, numericConfigRewrite, apply) \
+ .type = NUMERIC_CONFIG, \
+ .data.numeric = { \
+ .lower_bound = (lower), \
+ .upper_bound = (upper), \
+ .default_value = (default), \
+ .is_valid_fn = (is_valid), \
+ .flags = (num_conf_flags),
+
+#define createIntConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ embedCommonNumericalConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ .numeric_type = NUMERIC_TYPE_INT, \
+ .config.i = &(config_addr) \
+ } \
+}
+
+#define createUIntConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ embedCommonNumericalConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ .numeric_type = NUMERIC_TYPE_UINT, \
+ .config.ui = &(config_addr) \
+ } \
+}
+
+#define createLongConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ embedCommonNumericalConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ .numeric_type = NUMERIC_TYPE_LONG, \
+ .config.l = &(config_addr) \
+ } \
+}
+
+#define createULongConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ embedCommonNumericalConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ .numeric_type = NUMERIC_TYPE_ULONG, \
+ .config.ul = &(config_addr) \
+ } \
+}
+
+#define createLongLongConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ embedCommonNumericalConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ .numeric_type = NUMERIC_TYPE_LONG_LONG, \
+ .config.ll = &(config_addr) \
+ } \
+}
+
+#define createULongLongConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ embedCommonNumericalConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ .numeric_type = NUMERIC_TYPE_ULONG_LONG, \
+ .config.ull = &(config_addr) \
+ } \
+}
+
+#define createSizeTConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ embedCommonNumericalConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ .numeric_type = NUMERIC_TYPE_SIZE_T, \
+ .config.st = &(config_addr) \
+ } \
+}
+
+#define createSSizeTConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ embedCommonNumericalConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ .numeric_type = NUMERIC_TYPE_SSIZE_T, \
+ .config.sst = &(config_addr) \
+ } \
+}
+
+#define createTimeTConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ embedCommonNumericalConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ .numeric_type = NUMERIC_TYPE_TIME_T, \
+ .config.tt = &(config_addr) \
+ } \
+}
+
+#define createOffTConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ embedCommonNumericalConfig(name, alias, flags, lower, upper, config_addr, default, num_conf_flags, is_valid, apply) \
+ .numeric_type = NUMERIC_TYPE_OFF_T, \
+ .config.ot = &(config_addr) \
+ } \
+}
+
+#define createSpecialConfig(name, alias, modifiable, setfn, getfn, rewritefn, applyfn) { \
+ .type = SPECIAL_CONFIG, \
+ embedCommonConfig(name, alias, modifiable) \
+ embedConfigInterface(NULL, setfn, getfn, rewritefn, applyfn) \
+}
+
+static int isValidActiveDefrag(int val, const char **err) {
+#ifndef HAVE_DEFRAG
+ if (val) {
+ *err = "Active defragmentation cannot be enabled: it "
+ "requires a Redis server compiled with a modified Jemalloc "
+ "like the one shipped by default with the Redis source "
+ "distribution";
+ return 0;
+ }
+#else
+ UNUSED(val);
+ UNUSED(err);
+#endif
+ return 1;
+}
+
+static int isValidDBfilename(char *val, const char **err) {
+ if (!pathIsBaseName(val)) {
+ *err = "dbfilename can't be a path, just a filename";
+ return 0;
+ }
+ return 1;
+}
+
+static int isValidAOFfilename(char *val, const char **err) {
+ if (!strcmp(val, "")) {
+ *err = "appendfilename can't be empty";
+ return 0;
+ }
+ if (!pathIsBaseName(val)) {
+ *err = "appendfilename can't be a path, just a filename";
+ return 0;
+ }
+ return 1;
+}
+
+static int isValidAOFdirname(char *val, const char **err) {
+ if (!strcmp(val, "")) {
+ *err = "appenddirname can't be empty";
+ return 0;
+ }
+ if (!pathIsBaseName(val)) {
+ *err = "appenddirname can't be a path, just a dirname";
+ return 0;
+ }
+ return 1;
+}
+
+static int isValidShutdownOnSigFlags(int val, const char **err) {
+ /* Individual arguments are validated by createEnumConfig logic.
+ * We just need to ensure valid combinations here. */
+ if (val & SHUTDOWN_NOSAVE && val & SHUTDOWN_SAVE) {
+ *err = "shutdown options SAVE and NOSAVE can't be used simultaneously";
+ return 0;
+ }
+ return 1;
+}
+
+static int isValidAnnouncedNodename(char *val,const char **err) {
+ if (!(isValidAuxString(val,sdslen(val)))) {
+ *err = "Announced human node name contained invalid character";
+ return 0;
+ }
+ return 1;
+}
+
+static int isValidAnnouncedHostname(char *val, const char **err) {
+ if (strlen(val) >= NET_HOST_STR_LEN) {
+ *err = "Hostnames must be less than "
+ STRINGIFY(NET_HOST_STR_LEN) " characters";
+ return 0;
+ }
+
+ int i = 0;
+ char c;
+ while ((c = val[i])) {
+ /* We just validate the character set to make sure that everything
+ * is parsed and handled correctly. */
+ if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+ || (c >= '0' && c <= '9') || (c == '-') || (c == '.')))
+ {
+ *err = "Hostnames may only contain alphanumeric characters, "
+ "hyphens or dots";
+ return 0;
+ }
+ c = val[i++];
+ }
+ return 1;
+}
+
+/* Validate specified string is a valid proc-title-template */
+static int isValidProcTitleTemplate(char *val, const char **err) {
+ if (!validateProcTitleTemplate(val)) {
+ *err = "template format is invalid or contains unknown variables";
+ return 0;
+ }
+ return 1;
+}
+
+static int updateLocaleCollate(const char **err) {
+ const char *s = setlocale(LC_COLLATE, server.locale_collate);
+ if (s == NULL) {
+ *err = "Invalid locale name";
+ return 0;
+ }
+ return 1;
+}
+
+static int updateProcTitleTemplate(const char **err) {
+ if (redisSetProcTitle(NULL) == C_ERR) {
+ *err = "failed to set process title";
+ return 0;
+ }
+ return 1;
+}
+
+static int updateHZ(const char **err) {
+ UNUSED(err);
+ /* Hz is more a hint from the user, so we accept values out of range
+ * but cap them to reasonable values. */
+ if (server.config_hz < CONFIG_MIN_HZ) server.config_hz = CONFIG_MIN_HZ;
+ if (server.config_hz > CONFIG_MAX_HZ) server.config_hz = CONFIG_MAX_HZ;
+ server.hz = server.config_hz;
+ return 1;
+}
+
+static int updatePort(const char **err) {
+ connListener *listener = listenerByType(CONN_TYPE_SOCKET);
+
+ serverAssert(listener != NULL);
+ listener->bindaddr = server.bindaddr;
+ listener->bindaddr_count = server.bindaddr_count;
+ listener->port = server.port;
+ listener->ct = connectionByType(CONN_TYPE_SOCKET);
+ if (changeListener(listener) == C_ERR) {
+ *err = "Unable to listen on this port. Check server logs.";
+ return 0;
+ }
+
+ return 1;
+}
+
+static int updateJemallocBgThread(const char **err) {
+ UNUSED(err);
+ set_jemalloc_bg_thread(server.jemalloc_bg_thread);
+ return 1;
+}
+
+static int updateReplBacklogSize(const char **err) {
+ UNUSED(err);
+ resizeReplicationBacklog();
+ return 1;
+}
+
+static int updateMaxmemory(const char **err) {
+ UNUSED(err);
+ if (server.maxmemory) {
+ size_t used = zmalloc_used_memory()-freeMemoryGetNotCountedMemory();
+ if (server.maxmemory < used) {
+ serverLog(LL_WARNING,"WARNING: the new maxmemory value set via CONFIG SET (%llu) is smaller than the current memory usage (%zu). This will result in key eviction and/or the inability to accept new write commands depending on the maxmemory-policy.", server.maxmemory, used);
+ }
+ startEvictionTimeProc();
+ }
+ return 1;
+}
+
+static int updateGoodSlaves(const char **err) {
+ UNUSED(err);
+ refreshGoodSlavesCount();
+ return 1;
+}
+
+static int updateWatchdogPeriod(const char **err) {
+ UNUSED(err);
+ applyWatchdogPeriod();
+ return 1;
+}
+
+static int updateAppendonly(const char **err) {
+ if (!server.aof_enabled && server.aof_state != AOF_OFF) {
+ stopAppendOnly();
+ } else if (server.aof_enabled && server.aof_state == AOF_OFF) {
+ if (startAppendOnly() == C_ERR) {
+ *err = "Unable to turn on AOF. Check server logs.";
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int updateAofAutoGCEnabled(const char **err) {
+ UNUSED(err);
+ if (!server.aof_disable_auto_gc) {
+ aofDelHistoryFiles();
+ }
+
+ return 1;
+}
+
+static int updateSighandlerEnabled(const char **err) {
+ UNUSED(err);
+ if (server.crashlog_enabled)
+ setupSignalHandlers();
+ else
+ removeSignalHandlers();
+ return 1;
+}
+
+static int updateMaxclients(const char **err) {
+ unsigned int new_maxclients = server.maxclients;
+ adjustOpenFilesLimit();
+ if (server.maxclients != new_maxclients) {
+ static char msg[128];
+ snprintf(msg, sizeof(msg), "The operating system is not able to handle the specified number of clients, try with %d", server.maxclients);
+ *err = msg;
+ return 0;
+ }
+ if ((unsigned int) aeGetSetSize(server.el) <
+ server.maxclients + CONFIG_FDSET_INCR)
+ {
+ if (aeResizeSetSize(server.el,
+ server.maxclients + CONFIG_FDSET_INCR) == AE_ERR)
+ {
+ *err = "The event loop API used by Redis is not able to handle the specified number of clients";
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int updateOOMScoreAdj(const char **err) {
+ if (setOOMScoreAdj(-1) == C_ERR) {
+ *err = "Failed to set current oom_score_adj. Check server logs.";
+ return 0;
+ }
+
+ return 1;
+}
+
+int updateRequirePass(const char **err) {
+ UNUSED(err);
+ /* The old "requirepass" directive just translates to setting
+ * a password to the default user. The only thing we do
+ * additionally is to remember the cleartext password in this
+ * case, for backward compatibility with Redis <= 5. */
+ ACLUpdateDefaultUserPassword(server.requirepass);
+ return 1;
+}
+
+int updateAppendFsync(const char **err) {
+ UNUSED(err);
+ if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
+ /* Wait for all bio jobs related to AOF to drain before proceeding. This prevents a race
+ * between updates to `fsynced_reploff_pending` done in the main thread and those done on the
+ * worker thread. */
+ bioDrainWorker(BIO_AOF_FSYNC);
+ }
+ return 1;
+}
+
+/* applyBind affects both TCP and TLS (if enabled) together */
+static int applyBind(const char **err) {
+ connListener *tcp_listener = listenerByType(CONN_TYPE_SOCKET);
+ connListener *tls_listener = listenerByType(CONN_TYPE_TLS);
+
+ serverAssert(tcp_listener != NULL);
+ tcp_listener->bindaddr = server.bindaddr;
+ tcp_listener->bindaddr_count = server.bindaddr_count;
+ tcp_listener->port = server.port;
+ tcp_listener->ct = connectionByType(CONN_TYPE_SOCKET);
+ if (changeListener(tcp_listener) == C_ERR) {
+ *err = "Failed to bind to specified addresses.";
+ if (tls_listener)
+ closeListener(tls_listener); /* failed with TLS together */
+ return 0;
+ }
+
+ if (server.tls_port != 0) {
+ serverAssert(tls_listener != NULL);
+ tls_listener->bindaddr = server.bindaddr;
+ tls_listener->bindaddr_count = server.bindaddr_count;
+ tls_listener->port = server.tls_port;
+ tls_listener->ct = connectionByType(CONN_TYPE_TLS);
+ if (changeListener(tls_listener) == C_ERR) {
+ *err = "Failed to bind to specified addresses.";
+ closeListener(tcp_listener); /* failed with TCP together */
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int updateClusterFlags(const char **err) {
+ UNUSED(err);
+ clusterUpdateMyselfFlags();
+ return 1;
+}
+
+static int updateClusterAnnouncedPort(const char **err) {
+ UNUSED(err);
+ clusterUpdateMyselfAnnouncedPorts();
+ return 1;
+}
+
+static int updateClusterIp(const char **err) {
+ UNUSED(err);
+ clusterUpdateMyselfIp();
+ return 1;
+}
+
+int updateClusterHostname(const char **err) {
+ UNUSED(err);
+ clusterUpdateMyselfHostname();
+ return 1;
+}
+
+int updateClusterHumanNodename(const char **err) {
+ UNUSED(err);
+ clusterUpdateMyselfHumanNodename();
+ return 1;
+}
+
+static int applyTlsCfg(const char **err) {
+ UNUSED(err);
+
+ /* If TLS is enabled, try to configure OpenSSL. */
+ if ((server.tls_port || server.tls_replication || server.tls_cluster)
+ && connTypeConfigure(connectionTypeTls(), &server.tls_ctx_config, 1) == C_ERR) {
+ *err = "Unable to update TLS configuration. Check server logs.";
+ return 0;
+ }
+ return 1;
+}
+
+static int applyTLSPort(const char **err) {
+ /* Configure TLS in case it wasn't enabled */
+ if (connTypeConfigure(connectionTypeTls(), &server.tls_ctx_config, 0) == C_ERR) {
+ *err = "Unable to update TLS configuration. Check server logs.";
+ return 0;
+ }
+
+ connListener *listener = listenerByType(CONN_TYPE_TLS);
+ serverAssert(listener != NULL);
+ listener->bindaddr = server.bindaddr;
+ listener->bindaddr_count = server.bindaddr_count;
+ listener->port = server.tls_port;
+ listener->ct = connectionByType(CONN_TYPE_TLS);
+ if (changeListener(listener) == C_ERR) {
+ *err = "Unable to listen on this port. Check server logs.";
+ return 0;
+ }
+
+ return 1;
+}
+
+static int setConfigDirOption(standardConfig *config, sds *argv, int argc, const char **err) {
+ UNUSED(config);
+ if (argc != 1) {
+ *err = "wrong number of arguments";
+ return 0;
+ }
+ if (chdir(argv[0]) == -1) {
+ *err = strerror(errno);
+ return 0;
+ }
+ return 1;
+}
+
+static sds getConfigDirOption(standardConfig *config) {
+ UNUSED(config);
+ char buf[1024];
+
+ if (getcwd(buf,sizeof(buf)) == NULL)
+ buf[0] = '\0';
+
+ return sdsnew(buf);
+}
+
+static int setConfigSaveOption(standardConfig *config, sds *argv, int argc, const char **err) {
+ UNUSED(config);
+ int j;
+
+ /* Special case: treat single arg "" as zero args indicating empty save configuration */
+ if (argc == 1 && !strcasecmp(argv[0],"")) {
+ resetServerSaveParams();
+ argc = 0;
+ }
+
+ /* Perform sanity check before setting the new config:
+ * - Even number of args
+ * - Seconds >= 1, changes >= 0 */
+ if (argc & 1) {
+ *err = "Invalid save parameters";
+ return 0;
+ }
+ for (j = 0; j < argc; j++) {
+ char *eptr;
+ long val;
+
+ val = strtoll(argv[j], &eptr, 10);
+ if (eptr[0] != '\0' ||
+ ((j & 1) == 0 && val < 1) ||
+ ((j & 1) == 1 && val < 0)) {
+ *err = "Invalid save parameters";
+ return 0;
+ }
+ }
+ /* Finally set the new config */
+ if (!reading_config_file) {
+ resetServerSaveParams();
+ } else {
+ /* We don't reset save params before loading, because if they're not part
+ * of the file the defaults should be used.
+ */
+ static int save_loaded = 0;
+ if (!save_loaded) {
+ save_loaded = 1;
+ resetServerSaveParams();
+ }
+ }
+
+ for (j = 0; j < argc; j += 2) {
+ time_t seconds;
+ int changes;
+
+ seconds = strtoll(argv[j],NULL,10);
+ changes = strtoll(argv[j+1],NULL,10);
+ appendServerSaveParams(seconds, changes);
+ }
+
+ return 1;
+}
+
+static sds getConfigSaveOption(standardConfig *config) {
+ UNUSED(config);
+ sds buf = sdsempty();
+ int j;
+
+ for (j = 0; j < server.saveparamslen; j++) {
+ buf = sdscatprintf(buf,"%jd %d",
+ (intmax_t)server.saveparams[j].seconds,
+ server.saveparams[j].changes);
+ if (j != server.saveparamslen-1)
+ buf = sdscatlen(buf," ",1);
+ }
+
+ return buf;
+}
+
+static int setConfigClientOutputBufferLimitOption(standardConfig *config, sds *argv, int argc, const char **err) {
+ UNUSED(config);
+ return updateClientOutputBufferLimit(argv, argc, err);
+}
+
+static sds getConfigClientOutputBufferLimitOption(standardConfig *config) {
+ UNUSED(config);
+ sds buf = sdsempty();
+ int j;
+ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) {
+ buf = sdscatprintf(buf,"%s %llu %llu %ld",
+ getClientTypeName(j),
+ server.client_obuf_limits[j].hard_limit_bytes,
+ server.client_obuf_limits[j].soft_limit_bytes,
+ (long) server.client_obuf_limits[j].soft_limit_seconds);
+ if (j != CLIENT_TYPE_OBUF_COUNT-1)
+ buf = sdscatlen(buf," ",1);
+ }
+ return buf;
+}
+
+/* Parse an array of CONFIG_OOM_COUNT sds strings, validate and populate
+ * server.oom_score_adj_values if valid.
+ */
+static int setConfigOOMScoreAdjValuesOption(standardConfig *config, sds *argv, int argc, const char **err) {
+ int i;
+ int values[CONFIG_OOM_COUNT];
+ int change = 0;
+ UNUSED(config);
+
+ if (argc != CONFIG_OOM_COUNT) {
+ *err = "wrong number of arguments";
+ return 0;
+ }
+
+ for (i = 0; i < CONFIG_OOM_COUNT; i++) {
+ char *eptr;
+ long long val = strtoll(argv[i], &eptr, 10);
+
+ if (*eptr != '\0' || val < -2000 || val > 2000) {
+ if (err) *err = "Invalid oom-score-adj-values, elements must be between -2000 and 2000.";
+ return 0;
+ }
+
+ values[i] = val;
+ }
+
+ /* Verify that the values make sense. If they don't omit a warning but
+ * keep the configuration, which may still be valid for privileged processes.
+ */
+
+ if (values[CONFIG_OOM_REPLICA] < values[CONFIG_OOM_MASTER] ||
+ values[CONFIG_OOM_BGCHILD] < values[CONFIG_OOM_REPLICA])
+ {
+ serverLog(LL_WARNING,
+ "The oom-score-adj-values configuration may not work for non-privileged processes! "
+ "Please consult the documentation.");
+ }
+
+ for (i = 0; i < CONFIG_OOM_COUNT; i++) {
+ if (server.oom_score_adj_values[i] != values[i]) {
+ server.oom_score_adj_values[i] = values[i];
+ change = 1;
+ }
+ }
+
+ return change ? 1 : 2;
+}
+
+static sds getConfigOOMScoreAdjValuesOption(standardConfig *config) {
+ UNUSED(config);
+ sds buf = sdsempty();
+ int j;
+
+ for (j = 0; j < CONFIG_OOM_COUNT; j++) {
+ buf = sdscatprintf(buf,"%d", server.oom_score_adj_values[j]);
+ if (j != CONFIG_OOM_COUNT-1)
+ buf = sdscatlen(buf," ",1);
+ }
+
+ return buf;
+}
+
+static int setConfigNotifyKeyspaceEventsOption(standardConfig *config, sds *argv, int argc, const char **err) {
+ UNUSED(config);
+ if (argc != 1) {
+ *err = "wrong number of arguments";
+ return 0;
+ }
+ int flags = keyspaceEventsStringToFlags(argv[0]);
+ if (flags == -1) {
+ *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdn'.";
+ return 0;
+ }
+ server.notify_keyspace_events = flags;
+ return 1;
+}
+
+static sds getConfigNotifyKeyspaceEventsOption(standardConfig *config) {
+ UNUSED(config);
+ return keyspaceEventsFlagsToString(server.notify_keyspace_events);
+}
+
+static int setConfigBindOption(standardConfig *config, sds* argv, int argc, const char **err) {
+ UNUSED(config);
+ int j;
+
+ if (argc > CONFIG_BINDADDR_MAX) {
+ *err = "Too many bind addresses specified.";
+ return 0;
+ }
+
+ /* A single empty argument is treated as a zero bindaddr count */
+ if (argc == 1 && sdslen(argv[0]) == 0) argc = 0;
+
+ /* Free old bind addresses */
+ for (j = 0; j < server.bindaddr_count; j++) {
+ zfree(server.bindaddr[j]);
+ }
+ for (j = 0; j < argc; j++)
+ server.bindaddr[j] = zstrdup(argv[j]);
+ server.bindaddr_count = argc;
+
+ return 1;
+}
+
+static int setConfigReplicaOfOption(standardConfig *config, sds* argv, int argc, const char **err) {
+ UNUSED(config);
+
+ if (argc != 2) {
+ *err = "wrong number of arguments";
+ return 0;
+ }
+
+ sdsfree(server.masterhost);
+ server.masterhost = NULL;
+ if (!strcasecmp(argv[0], "no") && !strcasecmp(argv[1], "one")) {
+ return 1;
+ }
+ char *ptr;
+ server.masterport = strtol(argv[1], &ptr, 10);
+ if (server.masterport < 0 || server.masterport > 65535 || *ptr != '\0') {
+ *err = "Invalid master port";
+ return 0;
+ }
+ server.masterhost = sdsnew(argv[0]);
+ server.repl_state = REPL_STATE_CONNECT;
+ return 1;
+}
+
+static sds getConfigBindOption(standardConfig *config) {
+ UNUSED(config);
+ return sdsjoin(server.bindaddr,server.bindaddr_count," ");
+}
+
+static sds getConfigReplicaOfOption(standardConfig *config) {
+ UNUSED(config);
+ char buf[256];
+ if (server.masterhost)
+ snprintf(buf,sizeof(buf),"%s %d",
+ server.masterhost, server.masterport);
+ else
+ buf[0] = '\0';
+ return sdsnew(buf);
+}
+
+int allowProtectedAction(int config, client *c) {
+ return (config == PROTECTED_ACTION_ALLOWED_YES) ||
+ (config == PROTECTED_ACTION_ALLOWED_LOCAL && (connIsLocal(c->conn) == 1));
+}
+
+
+static int setConfigLatencyTrackingInfoPercentilesOutputOption(standardConfig *config, sds *argv, int argc, const char **err) {
+ UNUSED(config);
+ zfree(server.latency_tracking_info_percentiles);
+ server.latency_tracking_info_percentiles = NULL;
+ server.latency_tracking_info_percentiles_len = argc;
+
+ /* Special case: treat single arg "" as zero args indicating empty percentile configuration */
+ if (argc == 1 && sdslen(argv[0]) == 0)
+ server.latency_tracking_info_percentiles_len = 0;
+ else
+ server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*argc);
+
+ for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+ double percentile;
+ if (!string2d(argv[j], sdslen(argv[j]), &percentile)) {
+ *err = "Invalid latency-tracking-info-percentiles parameters";
+ goto configerr;
+ }
+ if (percentile > 100.0 || percentile < 0.0) {
+ *err = "latency-tracking-info-percentiles parameters should sit between [0.0,100.0]";
+ goto configerr;
+ }
+ server.latency_tracking_info_percentiles[j] = percentile;
+ }
+
+ return 1;
+configerr:
+ zfree(server.latency_tracking_info_percentiles);
+ server.latency_tracking_info_percentiles = NULL;
+ server.latency_tracking_info_percentiles_len = 0;
+ return 0;
+}
+
+static sds getConfigLatencyTrackingInfoPercentilesOutputOption(standardConfig *config) {
+ UNUSED(config);
+ sds buf = sdsempty();
+ for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+ char fbuf[128];
+ size_t len = snprintf(fbuf, sizeof(fbuf), "%f", server.latency_tracking_info_percentiles[j]);
+ len = trimDoubleString(fbuf, len);
+ buf = sdscatlen(buf, fbuf, len);
+ if (j != server.latency_tracking_info_percentiles_len-1)
+ buf = sdscatlen(buf," ",1);
+ }
+ return buf;
+}
+
+/* Rewrite the latency-tracking-info-percentiles option. */
+void rewriteConfigLatencyTrackingInfoPercentilesOutputOption(standardConfig *config, const char *name, struct rewriteConfigState *state) {
+ UNUSED(config);
+ sds line = sdsnew(name);
+ /* Rewrite latency-tracking-info-percentiles parameters,
+ * or an empty 'latency-tracking-info-percentiles ""' line to avoid the
+ * defaults from being used.
+ */
+ if (!server.latency_tracking_info_percentiles_len) {
+ line = sdscat(line," \"\"");
+ } else {
+ for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+ char fbuf[128];
+ size_t len = snprintf(fbuf, sizeof(fbuf), " %f", server.latency_tracking_info_percentiles[j]);
+ len = trimDoubleString(fbuf, len);
+ line = sdscatlen(line, fbuf, len);
+ }
+ }
+ rewriteConfigRewriteLine(state,name,line,1);
+}
+
+static int applyClientMaxMemoryUsage(const char **err) {
+ UNUSED(err);
+ listIter li;
+ listNode *ln;
+
+ /* server.client_mem_usage_buckets is an indication that the previous config
+ * was non-zero, in which case we can exit and no apply is needed. */
+ if(server.maxmemory_clients !=0 && server.client_mem_usage_buckets)
+ return 1;
+ if (server.maxmemory_clients != 0)
+ initServerClientMemUsageBuckets();
+
+ /* When client eviction is enabled update memory buckets for all clients.
+ * When disabled, clear that data structure. */
+ listRewind(server.clients, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *c = listNodeValue(ln);
+ if (server.maxmemory_clients == 0) {
+ /* Remove client from memory usage bucket. */
+ removeClientFromMemUsageBucket(c, 0);
+ } else {
+ /* Update each client(s) memory usage and add to appropriate bucket. */
+ updateClientMemUsageAndBucket(c);
+ }
+ }
+
+ if (server.maxmemory_clients == 0)
+ freeServerClientMemUsageBuckets();
+ return 1;
+}
+
+standardConfig static_configs[] = {
+ /* Bool configs */
+ createBoolConfig("rdbchecksum", NULL, IMMUTABLE_CONFIG, server.rdb_checksum, 1, NULL, NULL),
+ createBoolConfig("daemonize", NULL, IMMUTABLE_CONFIG, server.daemonize, 0, NULL, NULL),
+ createBoolConfig("io-threads-do-reads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, server.io_threads_do_reads, 0,NULL, NULL), /* Read + parse from threads? */
+ createBoolConfig("always-show-logo", NULL, IMMUTABLE_CONFIG, server.always_show_logo, 0, NULL, NULL),
+ createBoolConfig("protected-mode", NULL, MODIFIABLE_CONFIG, server.protected_mode, 1, NULL, NULL),
+ createBoolConfig("rdbcompression", NULL, MODIFIABLE_CONFIG, server.rdb_compression, 1, NULL, NULL),
+ createBoolConfig("rdb-del-sync-files", NULL, MODIFIABLE_CONFIG, server.rdb_del_sync_files, 0, NULL, NULL),
+ createBoolConfig("activerehashing", NULL, MODIFIABLE_CONFIG, server.activerehashing, 1, NULL, NULL),
+ createBoolConfig("stop-writes-on-bgsave-error", NULL, MODIFIABLE_CONFIG, server.stop_writes_on_bgsave_err, 1, NULL, NULL),
+ createBoolConfig("set-proc-title", NULL, IMMUTABLE_CONFIG, server.set_proc_title, 1, NULL, NULL), /* Should setproctitle be used? */
+ createBoolConfig("dynamic-hz", NULL, MODIFIABLE_CONFIG, server.dynamic_hz, 1, NULL, NULL), /* Adapt hz to # of clients.*/
+ createBoolConfig("lazyfree-lazy-eviction", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_eviction, 0, NULL, NULL),
+ createBoolConfig("lazyfree-lazy-expire", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_expire, 0, NULL, NULL),
+ createBoolConfig("lazyfree-lazy-server-del", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_server_del, 0, NULL, NULL),
+ createBoolConfig("lazyfree-lazy-user-del", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_user_del , 0, NULL, NULL),
+ createBoolConfig("lazyfree-lazy-user-flush", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_user_flush , 0, NULL, NULL),
+ createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, server.repl_disable_tcp_nodelay, 0, NULL, NULL),
+ createBoolConfig("repl-diskless-sync", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_diskless_sync, 1, NULL, NULL),
+ createBoolConfig("aof-rewrite-incremental-fsync", NULL, MODIFIABLE_CONFIG, server.aof_rewrite_incremental_fsync, 1, NULL, NULL),
+ createBoolConfig("no-appendfsync-on-rewrite", NULL, MODIFIABLE_CONFIG, server.aof_no_fsync_on_rewrite, 0, NULL, NULL),
+ createBoolConfig("cluster-require-full-coverage", NULL, MODIFIABLE_CONFIG, server.cluster_require_full_coverage, 1, NULL, NULL),
+ createBoolConfig("rdb-save-incremental-fsync", NULL, MODIFIABLE_CONFIG, server.rdb_save_incremental_fsync, 1, NULL, NULL),
+ createBoolConfig("aof-load-truncated", NULL, MODIFIABLE_CONFIG, server.aof_load_truncated, 1, NULL, NULL),
+ createBoolConfig("aof-use-rdb-preamble", NULL, MODIFIABLE_CONFIG, server.aof_use_rdb_preamble, 1, NULL, NULL),
+ createBoolConfig("aof-timestamp-enabled", NULL, MODIFIABLE_CONFIG, server.aof_timestamp_enabled, 0, NULL, NULL),
+ createBoolConfig("cluster-replica-no-failover", "cluster-slave-no-failover", MODIFIABLE_CONFIG, server.cluster_slave_no_failover, 0, NULL, updateClusterFlags), /* Failover by default. */
+ createBoolConfig("replica-lazy-flush", "slave-lazy-flush", MODIFIABLE_CONFIG, server.repl_slave_lazy_flush, 0, NULL, NULL),
+ createBoolConfig("replica-serve-stale-data", "slave-serve-stale-data", MODIFIABLE_CONFIG, server.repl_serve_stale_data, 1, NULL, NULL),
+ createBoolConfig("replica-read-only", "slave-read-only", DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_slave_ro, 1, NULL, NULL),
+ createBoolConfig("replica-ignore-maxmemory", "slave-ignore-maxmemory", MODIFIABLE_CONFIG, server.repl_slave_ignore_maxmemory, 1, NULL, NULL),
+ createBoolConfig("jemalloc-bg-thread", NULL, MODIFIABLE_CONFIG, server.jemalloc_bg_thread, 1, NULL, updateJemallocBgThread),
+ createBoolConfig("activedefrag", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.active_defrag_enabled, 0, isValidActiveDefrag, NULL),
+ createBoolConfig("syslog-enabled", NULL, IMMUTABLE_CONFIG, server.syslog_enabled, 0, NULL, NULL),
+ createBoolConfig("cluster-enabled", NULL, IMMUTABLE_CONFIG, server.cluster_enabled, 0, NULL, NULL),
+ createBoolConfig("appendonly", NULL, MODIFIABLE_CONFIG | DENY_LOADING_CONFIG, server.aof_enabled, 0, NULL, updateAppendonly),
+ createBoolConfig("cluster-allow-reads-when-down", NULL, MODIFIABLE_CONFIG, server.cluster_allow_reads_when_down, 0, NULL, NULL),
+ createBoolConfig("cluster-allow-pubsubshard-when-down", NULL, MODIFIABLE_CONFIG, server.cluster_allow_pubsubshard_when_down, 1, NULL, NULL),
+ createBoolConfig("crash-log-enabled", NULL, MODIFIABLE_CONFIG, server.crashlog_enabled, 1, NULL, updateSighandlerEnabled),
+ createBoolConfig("crash-memcheck-enabled", NULL, MODIFIABLE_CONFIG, server.memcheck_enabled, 1, NULL, NULL),
+ createBoolConfig("use-exit-on-panic", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, server.use_exit_on_panic, 0, NULL, NULL),
+ createBoolConfig("disable-thp", NULL, IMMUTABLE_CONFIG, server.disable_thp, 1, NULL, NULL),
+ createBoolConfig("cluster-allow-replica-migration", NULL, MODIFIABLE_CONFIG, server.cluster_allow_replica_migration, 1, NULL, NULL),
+ createBoolConfig("replica-announced", NULL, MODIFIABLE_CONFIG, server.replica_announced, 1, NULL, NULL),
+ createBoolConfig("latency-tracking", NULL, MODIFIABLE_CONFIG, server.latency_tracking_enabled, 1, NULL, NULL),
+ createBoolConfig("aof-disable-auto-gc", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, server.aof_disable_auto_gc, 0, NULL, updateAofAutoGCEnabled),
+ createBoolConfig("replica-ignore-disk-write-errors", NULL, MODIFIABLE_CONFIG, server.repl_ignore_disk_write_error, 0, NULL, NULL),
+
+ /* String Configs */
+ createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL),
+ createStringConfig("unixsocket", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.unixsocket, NULL, NULL, NULL),
+ createStringConfig("pidfile", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.pidfile, NULL, NULL, NULL),
+ createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.slave_announce_ip, NULL, NULL, NULL),
+ createStringConfig("masteruser", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.masteruser, NULL, NULL, NULL),
+ createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, NULL, updateClusterIp),
+ createStringConfig("cluster-config-file", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.cluster_configfile, "nodes.conf", NULL, NULL),
+ createStringConfig("cluster-announce-hostname", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_hostname, NULL, isValidAnnouncedHostname, updateClusterHostname),
+ createStringConfig("cluster-announce-human-nodename", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_human_nodename, NULL, isValidAnnouncedNodename, updateClusterHumanNodename),
+ createStringConfig("syslog-ident", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.syslog_ident, "redis", NULL, NULL),
+ createStringConfig("dbfilename", NULL, MODIFIABLE_CONFIG | PROTECTED_CONFIG, ALLOW_EMPTY_STRING, server.rdb_filename, "dump.rdb", isValidDBfilename, NULL),
+ createStringConfig("appendfilename", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.aof_filename, "appendonly.aof", isValidAOFfilename, NULL),
+ createStringConfig("appenddirname", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.aof_dirname, "appendonlydir", isValidAOFdirname, NULL),
+ createStringConfig("server_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.server_cpulist, NULL, NULL, NULL),
+ createStringConfig("bio_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bio_cpulist, NULL, NULL, NULL),
+ createStringConfig("aof_rewrite_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.aof_rewrite_cpulist, NULL, NULL, NULL),
+ createStringConfig("bgsave_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bgsave_cpulist, NULL, NULL, NULL),
+ createStringConfig("ignore-warnings", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.ignore_warnings, "", NULL, NULL),
+ createStringConfig("proc-title-template", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.proc_title_template, CONFIG_DEFAULT_PROC_TITLE_TEMPLATE, isValidProcTitleTemplate, updateProcTitleTemplate),
+ createStringConfig("bind-source-addr", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bind_source_addr, NULL, NULL, NULL),
+ createStringConfig("logfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.logfile, "", NULL, NULL),
+#ifdef LOG_REQ_RES
+ createStringConfig("req-res-logfile", NULL, IMMUTABLE_CONFIG | HIDDEN_CONFIG, EMPTY_STRING_IS_NULL, server.req_res_logfile, NULL, NULL, NULL),
+#endif
+ createStringConfig("locale-collate", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.locale_collate, "", NULL, updateLocaleCollate),
+
+ /* SDS Configs */
+ createSDSConfig("masterauth", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.masterauth, NULL, NULL, NULL),
+ createSDSConfig("requirepass", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.requirepass, NULL, NULL, updateRequirePass),
+
+ /* Enum Configs */
+ createEnumConfig("supervised", NULL, IMMUTABLE_CONFIG, supervised_mode_enum, server.supervised_mode, SUPERVISED_NONE, NULL, NULL),
+ createEnumConfig("syslog-facility", NULL, IMMUTABLE_CONFIG, syslog_facility_enum, server.syslog_facility, LOG_LOCAL0, NULL, NULL),
+ createEnumConfig("repl-diskless-load", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG | DENY_LOADING_CONFIG, repl_diskless_load_enum, server.repl_diskless_load, REPL_DISKLESS_LOAD_DISABLED, NULL, NULL),
+ createEnumConfig("loglevel", NULL, MODIFIABLE_CONFIG, loglevel_enum, server.verbosity, LL_NOTICE, NULL, NULL),
+ createEnumConfig("maxmemory-policy", NULL, MODIFIABLE_CONFIG, maxmemory_policy_enum, server.maxmemory_policy, MAXMEMORY_NO_EVICTION, NULL, NULL),
+ createEnumConfig("appendfsync", NULL, MODIFIABLE_CONFIG, aof_fsync_enum, server.aof_fsync, AOF_FSYNC_EVERYSEC, NULL, updateAppendFsync),
+ createEnumConfig("oom-score-adj", NULL, MODIFIABLE_CONFIG, oom_score_adj_enum, server.oom_score_adj, OOM_SCORE_ADJ_NO, NULL, updateOOMScoreAdj),
+ createEnumConfig("acl-pubsub-default", NULL, MODIFIABLE_CONFIG, acl_pubsub_default_enum, server.acl_pubsub_default, 0, NULL, NULL),
+ createEnumConfig("sanitize-dump-payload", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, sanitize_dump_payload_enum, server.sanitize_dump_payload, SANITIZE_DUMP_NO, NULL, NULL),
+ createEnumConfig("enable-protected-configs", NULL, IMMUTABLE_CONFIG, protected_action_enum, server.enable_protected_configs, PROTECTED_ACTION_ALLOWED_NO, NULL, NULL),
+ createEnumConfig("enable-debug-command", NULL, IMMUTABLE_CONFIG, protected_action_enum, server.enable_debug_cmd, PROTECTED_ACTION_ALLOWED_NO, NULL, NULL),
+ createEnumConfig("enable-module-command", NULL, IMMUTABLE_CONFIG, protected_action_enum, server.enable_module_cmd, PROTECTED_ACTION_ALLOWED_NO, NULL, NULL),
+ createEnumConfig("cluster-preferred-endpoint-type", NULL, MODIFIABLE_CONFIG, cluster_preferred_endpoint_type_enum, server.cluster_preferred_endpoint_type, CLUSTER_ENDPOINT_TYPE_IP, NULL, NULL),
+ createEnumConfig("propagation-error-behavior", NULL, MODIFIABLE_CONFIG, propagation_error_behavior_enum, server.propagation_error_behavior, PROPAGATION_ERR_BEHAVIOR_IGNORE, NULL, NULL),
+ createEnumConfig("shutdown-on-sigint", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, shutdown_on_sig_enum, server.shutdown_on_sigint, 0, isValidShutdownOnSigFlags, NULL),
+ createEnumConfig("shutdown-on-sigterm", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, shutdown_on_sig_enum, server.shutdown_on_sigterm, 0, isValidShutdownOnSigFlags, NULL),
+
+ /* Integer configs */
+ createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, server.dbnum, 16, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.port, 6379, INTEGER_CONFIG, NULL, updatePort), /* TCP port. */
+ createIntConfig("io-threads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, 1, 128, server.io_threads_num, 1, INTEGER_CONFIG, NULL, NULL), /* Single threaded by default */
+ createIntConfig("auto-aof-rewrite-percentage", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.aof_rewrite_perc, 100, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("cluster-replica-validity-factor", "cluster-slave-validity-factor", MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_slave_validity_factor, 10, INTEGER_CONFIG, NULL, NULL), /* Slave max data age factor. */
+ createIntConfig("list-max-listpack-size", "list-max-ziplist-size", MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_listpack_size, -2, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("tcp-keepalive", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tcpkeepalive, 300, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("cluster-migration-barrier", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_migration_barrier, 1, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("active-defrag-cycle-min", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_min, 1, INTEGER_CONFIG, NULL, NULL), /* Default: 1% CPU min (at lower threshold) */
+ createIntConfig("active-defrag-cycle-max", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_max, 25, INTEGER_CONFIG, NULL, NULL), /* Default: 25% CPU max (at upper threshold) */
+ createIntConfig("active-defrag-threshold-lower", NULL, MODIFIABLE_CONFIG, 0, 1000, server.active_defrag_threshold_lower, 10, INTEGER_CONFIG, NULL, NULL), /* Default: don't defrag when fragmentation is below 10% */
+ createIntConfig("active-defrag-threshold-upper", NULL, MODIFIABLE_CONFIG, 0, 1000, server.active_defrag_threshold_upper, 100, INTEGER_CONFIG, NULL, NULL), /* Default: maximum defrag force at 100% fragmentation */
+ createIntConfig("lfu-log-factor", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.lfu_log_factor, 10, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("lfu-decay-time", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.lfu_decay_time, 1, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("replica-priority", "slave-priority", MODIFIABLE_CONFIG, 0, INT_MAX, server.slave_priority, 100, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("repl-diskless-sync-delay", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_diskless_sync_delay, 5, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("maxmemory-samples", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, server.maxmemory_samples, 5, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("maxmemory-eviction-tenacity", NULL, MODIFIABLE_CONFIG, 0, 100, server.maxmemory_eviction_tenacity, 10, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.maxidletime, 0, INTEGER_CONFIG, NULL, NULL), /* Default client timeout: infinite */
+ createIntConfig("replica-announce-port", "slave-announce-port", MODIFIABLE_CONFIG, 0, 65535, server.slave_announce_port, 0, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("tcp-backlog", NULL, IMMUTABLE_CONFIG, 0, INT_MAX, server.tcp_backlog, 511, INTEGER_CONFIG, NULL, NULL), /* TCP listen backlog. */
+ createIntConfig("cluster-port", NULL, IMMUTABLE_CONFIG, 0, 65535, server.cluster_port, 0, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("cluster-announce-bus-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.cluster_announce_bus_port, 0, INTEGER_CONFIG, NULL, updateClusterAnnouncedPort), /* Default: Use +10000 offset. */
+ createIntConfig("cluster-announce-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.cluster_announce_port, 0, INTEGER_CONFIG, NULL, updateClusterAnnouncedPort), /* Use server.port */
+ createIntConfig("cluster-announce-tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.cluster_announce_tls_port, 0, INTEGER_CONFIG, NULL, updateClusterAnnouncedPort), /* Use server.tls_port */
+ createIntConfig("repl-timeout", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, server.repl_timeout, 60, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("repl-ping-replica-period", "repl-ping-slave-period", MODIFIABLE_CONFIG, 1, INT_MAX, server.repl_ping_slave_period, 10, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("list-compress-depth", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 0, INT_MAX, server.list_compress_depth, 0, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("rdb-key-save-delay", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, INT_MIN, INT_MAX, server.rdb_key_save_delay, 0, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("key-load-delay", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, INT_MIN, INT_MAX, server.key_load_delay, 0, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("active-expire-effort", NULL, MODIFIABLE_CONFIG, 1, 10, server.active_expire_effort, 1, INTEGER_CONFIG, NULL, NULL), /* From 1 to 10. */
+ createIntConfig("hz", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.config_hz, CONFIG_DEFAULT_HZ, INTEGER_CONFIG, NULL, updateHZ),
+ createIntConfig("min-replicas-to-write", "min-slaves-to-write", MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_min_slaves_to_write, 0, INTEGER_CONFIG, NULL, updateGoodSlaves),
+ createIntConfig("min-replicas-max-lag", "min-slaves-max-lag", MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_min_slaves_max_lag, 10, INTEGER_CONFIG, NULL, updateGoodSlaves),
+ createIntConfig("watchdog-period", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, INT_MAX, server.watchdog_period, 0, INTEGER_CONFIG, NULL, updateWatchdogPeriod),
+ createIntConfig("shutdown-timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.shutdown_timeout, 10, INTEGER_CONFIG, NULL, NULL),
+ createIntConfig("repl-diskless-sync-max-replicas", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_diskless_sync_max_replicas, 0, INTEGER_CONFIG, NULL, NULL),
+
+ /* Unsigned int configs */
+ createUIntConfig("maxclients", NULL, MODIFIABLE_CONFIG, 1, UINT_MAX, server.maxclients, 10000, INTEGER_CONFIG, NULL, updateMaxclients),
+ createUIntConfig("unixsocketperm", NULL, IMMUTABLE_CONFIG, 0, 0777, server.unixsocketperm, 0, OCTAL_CONFIG, NULL, NULL),
+ createUIntConfig("socket-mark-id", NULL, IMMUTABLE_CONFIG, 0, UINT_MAX, server.socket_mark_id, 0, INTEGER_CONFIG, NULL, NULL),
+#ifdef LOG_REQ_RES
+ createUIntConfig("client-default-resp", NULL, IMMUTABLE_CONFIG | HIDDEN_CONFIG, 2, 3, server.client_default_resp, 2, INTEGER_CONFIG, NULL, NULL),
+#endif
+
+ /* Unsigned Long configs */
+ createULongConfig("active-defrag-max-scan-fields", NULL, MODIFIABLE_CONFIG, 1, LONG_MAX, server.active_defrag_max_scan_fields, 1000, INTEGER_CONFIG, NULL, NULL), /* Default: keys with more than 1000 fields will be processed separately */
+ createULongConfig("slowlog-max-len", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.slowlog_max_len, 128, INTEGER_CONFIG, NULL, NULL),
+ createULongConfig("acllog-max-len", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.acllog_max_len, 128, INTEGER_CONFIG, NULL, NULL),
+
+ /* Long Long configs */
+ createLongLongConfig("busy-reply-threshold", "lua-time-limit", MODIFIABLE_CONFIG, 0, LONG_MAX, server.busy_reply_threshold, 5000, INTEGER_CONFIG, NULL, NULL),/* milliseconds */
+ createLongLongConfig("cluster-node-timeout", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.cluster_node_timeout, 15000, INTEGER_CONFIG, NULL, NULL),
+ createLongLongConfig("cluster-ping-interval", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, LLONG_MAX, server.cluster_ping_interval, 0, INTEGER_CONFIG, NULL, NULL),
+ createLongLongConfig("slowlog-log-slower-than", NULL, MODIFIABLE_CONFIG, -1, LLONG_MAX, server.slowlog_log_slower_than, 10000, INTEGER_CONFIG, NULL, NULL),
+ createLongLongConfig("latency-monitor-threshold", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.latency_monitor_threshold, 0, INTEGER_CONFIG, NULL, NULL),
+ createLongLongConfig("proto-max-bulk-len", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.proto_max_bulk_len, 512ll*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Bulk request max size */
+ createLongLongConfig("stream-node-max-entries", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.stream_node_max_entries, 100, INTEGER_CONFIG, NULL, NULL),
+ createLongLongConfig("repl-backlog-size", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.repl_backlog_size, 1024*1024, MEMORY_CONFIG, NULL, updateReplBacklogSize), /* Default: 1mb */
+
+ /* Unsigned Long Long configs */
+ createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory),
+ createULongLongConfig("cluster-link-sendbuf-limit", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.cluster_link_msg_queue_limit_bytes, 0, MEMORY_CONFIG, NULL, NULL),
+
+ /* Size_t configs */
+ createSizeTConfig("hash-max-listpack-entries", "hash-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_entries, 512, INTEGER_CONFIG, NULL, NULL),
+ createSizeTConfig("set-max-intset-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_intset_entries, 512, INTEGER_CONFIG, NULL, NULL),
+ createSizeTConfig("set-max-listpack-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
+ createSizeTConfig("set-max-listpack-value", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_value, 64, INTEGER_CONFIG, NULL, NULL),
+ createSizeTConfig("zset-max-listpack-entries", "zset-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.zset_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
+ createSizeTConfig("active-defrag-ignore-bytes", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.active_defrag_ignore_bytes, 100<<20, MEMORY_CONFIG, NULL, NULL), /* Default: don't defrag if frag overhead is below 100mb */
+ createSizeTConfig("hash-max-listpack-value", "hash-max-ziplist-value", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_value, 64, MEMORY_CONFIG, NULL, NULL),
+ createSizeTConfig("stream-node-max-bytes", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.stream_node_max_bytes, 4096, MEMORY_CONFIG, NULL, NULL),
+ createSizeTConfig("zset-max-listpack-value", "zset-max-ziplist-value", MODIFIABLE_CONFIG, 0, LONG_MAX, server.zset_max_listpack_value, 64, MEMORY_CONFIG, NULL, NULL),
+ createSizeTConfig("hll-sparse-max-bytes", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.hll_sparse_max_bytes, 3000, MEMORY_CONFIG, NULL, NULL),
+ createSizeTConfig("tracking-table-max-keys", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.tracking_table_max_keys, 1000000, INTEGER_CONFIG, NULL, NULL), /* Default: 1 million keys max. */
+ createSizeTConfig("client-query-buffer-limit", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.client_max_querybuf_len, 1024*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Default: 1GB max query buffer. */
+ createSSizeTConfig("maxmemory-clients", NULL, MODIFIABLE_CONFIG, -100, SSIZE_MAX, server.maxmemory_clients, 0, MEMORY_CONFIG | PERCENT_CONFIG, NULL, applyClientMaxMemoryUsage),
+
+ /* Other configs */
+ createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */
+ createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL),
+ createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*1024*2, INTEGER_CONFIG, NULL, NULL),
+
+ createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.tls_port, 0, INTEGER_CONFIG, NULL, applyTLSPort), /* TCP port. */
+ createIntConfig("tls-session-cache-size", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tls_ctx_config.session_cache_size, 20*1024, INTEGER_CONFIG, NULL, applyTlsCfg),
+ createIntConfig("tls-session-cache-timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tls_ctx_config.session_cache_timeout, 300, INTEGER_CONFIG, NULL, applyTlsCfg),
+ createBoolConfig("tls-cluster", NULL, MODIFIABLE_CONFIG, server.tls_cluster, 0, NULL, applyTlsCfg),
+ createBoolConfig("tls-replication", NULL, MODIFIABLE_CONFIG, server.tls_replication, 0, NULL, applyTlsCfg),
+ createEnumConfig("tls-auth-clients", NULL, MODIFIABLE_CONFIG, tls_auth_clients_enum, server.tls_auth_clients, TLS_CLIENT_AUTH_YES, NULL, NULL),
+ createBoolConfig("tls-prefer-server-ciphers", NULL, MODIFIABLE_CONFIG, server.tls_ctx_config.prefer_server_ciphers, 0, NULL, applyTlsCfg),
+ createBoolConfig("tls-session-caching", NULL, MODIFIABLE_CONFIG, server.tls_ctx_config.session_caching, 1, NULL, applyTlsCfg),
+ createStringConfig("tls-cert-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.cert_file, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-key-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.key_file, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-key-file-pass", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.key_file_pass, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-client-cert-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.client_cert_file, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-client-key-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.client_key_file, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-client-key-file-pass", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.client_key_file_pass, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-dh-params-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.dh_params_file, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-ca-cert-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.ca_cert_file, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-ca-cert-dir", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.ca_cert_dir, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-protocols", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.protocols, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-ciphers", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.ciphers, NULL, NULL, applyTlsCfg),
+ createStringConfig("tls-ciphersuites", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.ciphersuites, NULL, NULL, applyTlsCfg),
+
+ /* Special configs */
+ createSpecialConfig("dir", NULL, MODIFIABLE_CONFIG | PROTECTED_CONFIG | DENY_LOADING_CONFIG, setConfigDirOption, getConfigDirOption, rewriteConfigDirOption, NULL),
+ createSpecialConfig("save", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigSaveOption, getConfigSaveOption, rewriteConfigSaveOption, NULL),
+ createSpecialConfig("client-output-buffer-limit", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigClientOutputBufferLimitOption, getConfigClientOutputBufferLimitOption, rewriteConfigClientOutputBufferLimitOption, NULL),
+ createSpecialConfig("oom-score-adj-values", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigOOMScoreAdjValuesOption, getConfigOOMScoreAdjValuesOption, rewriteConfigOOMScoreAdjValuesOption, updateOOMScoreAdj),
+ createSpecialConfig("notify-keyspace-events", NULL, MODIFIABLE_CONFIG, setConfigNotifyKeyspaceEventsOption, getConfigNotifyKeyspaceEventsOption, rewriteConfigNotifyKeyspaceEventsOption, NULL),
+ createSpecialConfig("bind", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigBindOption, getConfigBindOption, rewriteConfigBindOption, applyBind),
+ createSpecialConfig("replicaof", "slaveof", IMMUTABLE_CONFIG | MULTI_ARG_CONFIG, setConfigReplicaOfOption, getConfigReplicaOfOption, rewriteConfigReplicaOfOption, NULL),
+ createSpecialConfig("latency-tracking-info-percentiles", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigLatencyTrackingInfoPercentilesOutputOption, getConfigLatencyTrackingInfoPercentilesOutputOption, rewriteConfigLatencyTrackingInfoPercentilesOutputOption, NULL),
+
+ /* NULL Terminator, this is dropped when we convert to the runtime array. */
+ {NULL}
+};
+
+/* Create a new config by copying the passed in config. Returns 1 on success
+ * or 0 when their was already a config with the same name.. */
+int registerConfigValue(const char *name, const standardConfig *config, int alias) {
+ standardConfig *new = zmalloc(sizeof(standardConfig));
+ memcpy(new, config, sizeof(standardConfig));
+ if (alias) {
+ new->flags |= ALIAS_CONFIG;
+ new->name = config->alias;
+ new->alias = config->name;
+ }
+
+ return dictAdd(configs, sdsnew(name), new) == DICT_OK;
+}
+
+/* Initialize configs to their default values and create and populate the
+ * runtime configuration dictionary. */
+void initConfigValues(void) {
+ configs = dictCreate(&sdsHashDictType);
+ dictExpand(configs, sizeof(static_configs) / sizeof(standardConfig));
+ for (standardConfig *config = static_configs; config->name != NULL; config++) {
+ if (config->interface.init) config->interface.init(config);
+ /* Add the primary config to the dictionary. */
+ int ret = registerConfigValue(config->name, config, 0);
+ serverAssert(ret);
+
+ /* Aliases are the same as their primary counter parts, but they
+ * also have a flag indicating they are the alias. */
+ if (config->alias) {
+ int ret = registerConfigValue(config->alias, config, ALIAS_CONFIG);
+ serverAssert(ret);
+ }
+ }
+}
+
+/* Remove a config by name from the configs dict. */
+void removeConfig(sds name) {
+ standardConfig *config = lookupConfig(name);
+ if (!config) return;
+ if (config->flags & MODULE_CONFIG) {
+ sdsfree((sds) config->name);
+ if (config->type == ENUM_CONFIG) {
+ configEnum *enumNode = config->data.enumd.enum_value;
+ while(enumNode->name != NULL) {
+ zfree(enumNode->name);
+ enumNode++;
+ }
+ zfree(config->data.enumd.enum_value);
+ } else if (config->type == SDS_CONFIG) {
+ if (config->data.sds.default_value) sdsfree((sds)config->data.sds.default_value);
+ }
+ }
+ dictDelete(configs, name);
+}
+
+/*-----------------------------------------------------------------------------
+ * Module Config
+ *----------------------------------------------------------------------------*/
+
+/* Create a bool/string/enum/numeric standardConfig for a module config in the configs dictionary */
+void addModuleBoolConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val) {
+ sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name);
+ int config_dummy_address;
+ standardConfig module_config = createBoolConfig(config_name, NULL, flags | MODULE_CONFIG, config_dummy_address, default_val, NULL, NULL);
+ module_config.data.yesno.config = NULL;
+ module_config.privdata = privdata;
+ registerConfigValue(config_name, &module_config, 0);
+}
+
+void addModuleStringConfig(const char *module_name, const char *name, int flags, void *privdata, sds default_val) {
+ sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name);
+ sds config_dummy_address;
+ standardConfig module_config = createSDSConfig(config_name, NULL, flags | MODULE_CONFIG, 0, config_dummy_address, default_val, NULL, NULL);
+ module_config.data.sds.config = NULL;
+ module_config.privdata = privdata;
+ registerConfigValue(config_name, &module_config, 0);
+}
+
+void addModuleEnumConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val, configEnum *enum_vals) {
+ sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name);
+ int config_dummy_address;
+ standardConfig module_config = createEnumConfig(config_name, NULL, flags | MODULE_CONFIG, enum_vals, config_dummy_address, default_val, NULL, NULL);
+ module_config.data.enumd.config = NULL;
+ module_config.privdata = privdata;
+ registerConfigValue(config_name, &module_config, 0);
+}
+
+void addModuleNumericConfig(const char *module_name, const char *name, int flags, void *privdata, long long default_val, int conf_flags, long long lower, long long upper) {
+ sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name);
+ long long config_dummy_address;
+ standardConfig module_config = createLongLongConfig(config_name, NULL, flags | MODULE_CONFIG, lower, upper, config_dummy_address, default_val, conf_flags, NULL, NULL);
+ module_config.data.numeric.config.ll = NULL;
+ module_config.privdata = privdata;
+ registerConfigValue(config_name, &module_config, 0);
+}
+
+/*-----------------------------------------------------------------------------
+ * CONFIG HELP
+ *----------------------------------------------------------------------------*/
+
+void configHelpCommand(client *c) {
+ const char *help[] = {
+"GET <pattern>",
+" Return parameters matching the glob-like <pattern> and their values.",
+"SET <directive> <value>",
+" Set the configuration <directive> to <value>.",
+"RESETSTAT",
+" Reset statistics reported by the INFO command.",
+"REWRITE",
+" Rewrite the configuration file.",
+NULL
+ };
+
+ addReplyHelp(c, help);
+}
+
+/*-----------------------------------------------------------------------------
+ * CONFIG RESETSTAT
+ *----------------------------------------------------------------------------*/
+
+void configResetStatCommand(client *c) {
+ resetServerStats();
+ resetCommandTableStats(server.commands);
+ resetErrorTableStats();
+ addReply(c,shared.ok);
+}
+
+/*-----------------------------------------------------------------------------
+ * CONFIG REWRITE
+ *----------------------------------------------------------------------------*/
+
+void configRewriteCommand(client *c) {
+ if (server.configfile == NULL) {
+ addReplyError(c,"The server is running without a config file");
+ return;
+ }
+ if (rewriteConfig(server.configfile, 0) == -1) {
+ /* save errno in case of being tainted. */
+ int err = errno;
+ serverLog(LL_WARNING,"CONFIG REWRITE failed: %s", strerror(err));
+ addReplyErrorFormat(c,"Rewriting config file: %s", strerror(err));
+ } else {
+ serverLog(LL_NOTICE,"CONFIG REWRITE executed with success.");
+ addReply(c,shared.ok);
+ }
+}
diff --git a/src/config.h b/src/config.h
new file mode 100644
index 0000000..1d8b738
--- /dev/null
+++ b/src/config.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CONFIG_H
+#define __CONFIG_H
+
+#ifdef __APPLE__
+#include <fcntl.h> // for fcntl(fd, F_FULLFSYNC)
+#include <AvailabilityMacros.h>
+#endif
+
+#ifdef __linux__
+#include <features.h>
+#include <fcntl.h>
+#endif
+
+#if defined(__APPLE__) && defined(__MAC_OS_X_VERSION_MAX_ALLOWED) && __MAC_OS_X_VERSION_MAX_ALLOWED >= 1060
+#define MAC_OS_10_6_DETECTED
+#endif
+
+/* Define redis_fstat to fstat or fstat64() */
+#if defined(__APPLE__) && !defined(MAC_OS_10_6_DETECTED)
+#define redis_fstat fstat64
+#define redis_stat stat64
+#else
+#define redis_fstat fstat
+#define redis_stat stat
+#endif
+
+/* Test for proc filesystem */
+#ifdef __linux__
+#define HAVE_PROC_STAT 1
+#define HAVE_PROC_MAPS 1
+#define HAVE_PROC_SMAPS 1
+#define HAVE_PROC_SOMAXCONN 1
+#define HAVE_PROC_OOM_SCORE_ADJ 1
+#endif
+
+/* Test for task_info() */
+#if defined(__APPLE__)
+#define HAVE_TASKINFO 1
+#endif
+
+/* Test for somaxconn check */
+#if defined(__APPLE__) || defined(__FreeBSD__)
+#define HAVE_SYSCTL_KIPC_SOMAXCONN 1
+#elif defined(__OpenBSD__)
+#define HAVE_SYSCTL_KERN_SOMAXCONN 1
+#endif
+
+/* Test for backtrace() */
+#if defined(__APPLE__) || (defined(__linux__) && defined(__GLIBC__)) || \
+ defined(__FreeBSD__) || ((defined(__OpenBSD__) || defined(__NetBSD__) || defined(__sun)) && defined(USE_BACKTRACE))\
+ || defined(__DragonFly__) || (defined(__UCLIBC__) && defined(__UCLIBC_HAS_BACKTRACE__))
+#define HAVE_BACKTRACE 1
+#endif
+
+/* MSG_NOSIGNAL. */
+#ifdef __linux__
+#define HAVE_MSG_NOSIGNAL 1
+#if defined(SO_MARK)
+#define HAVE_SOCKOPTMARKID 1
+#define SOCKOPTMARKID SO_MARK
+#endif
+#endif
+
+/* Test for polling API */
+#ifdef __linux__
+#define HAVE_EPOLL 1
+#endif
+
+/* Test for accept4() */
+#ifdef __linux__
+#define HAVE_ACCEPT4 1
+#endif
+
+#if (defined(__APPLE__) && defined(MAC_OS_10_6_DETECTED)) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined (__NetBSD__)
+#define HAVE_KQUEUE 1
+#endif
+
+#ifdef __sun
+#include <sys/feature_tests.h>
+#ifdef _DTRACE_VERSION
+#define HAVE_EVPORT 1
+#define HAVE_PSINFO 1
+#endif
+#endif
+
+/* Define redis_fsync to fdatasync() in Linux and fsync() for all the rest */
+#if defined(__linux__)
+#define redis_fsync(fd) fdatasync(fd)
+#elif defined(__APPLE__)
+#define redis_fsync(fd) fcntl(fd, F_FULLFSYNC)
+#else
+#define redis_fsync(fd) fsync(fd)
+#endif
+
+#if defined(__FreeBSD__)
+#if defined(SO_USER_COOKIE)
+#define HAVE_SOCKOPTMARKID 1
+#define SOCKOPTMARKID SO_USER_COOKIE
+#endif
+#endif
+
+#if defined(__OpenBSD__)
+#if defined(SO_RTABLE)
+#define HAVE_SOCKOPTMARKID 1
+#define SOCKOPTMARKID SO_RTABLE
+#endif
+#endif
+
+#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
+#define redis_unreachable __builtin_unreachable
+#else
+#define redis_unreachable abort
+#endif
+
+#if __GNUC__ >= 3
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+
+#if defined(__has_attribute)
+#if __has_attribute(no_sanitize)
+#define REDIS_NO_SANITIZE(sanitizer) __attribute__((no_sanitize(sanitizer)))
+#endif
+#endif
+#if !defined(REDIS_NO_SANITIZE)
+#define REDIS_NO_SANITIZE(sanitizer)
+#endif
+
+/* Define rdb_fsync_range to sync_file_range() on Linux, otherwise we use
+ * the plain fsync() call. */
+#if (defined(__linux__) && defined(SYNC_FILE_RANGE_WAIT_BEFORE))
+#define HAVE_SYNC_FILE_RANGE 1
+#define rdb_fsync_range(fd,off,size) sync_file_range(fd,off,size,SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE)
+#elif defined(__APPLE__)
+#define rdb_fsync_range(fd,off,size) fcntl(fd, F_FULLFSYNC)
+#else
+#define rdb_fsync_range(fd,off,size) fsync(fd)
+#endif
+
+/* Check if we can use setproctitle().
+ * BSD systems have support for it, we provide an implementation for
+ * Linux and osx. */
+#if (defined __NetBSD__ || defined __FreeBSD__ || defined __OpenBSD__)
+#define USE_SETPROCTITLE
+#endif
+
+#if defined(__HAIKU__)
+#define ESOCKTNOSUPPORT 0
+#endif
+
+#if (defined __linux || defined __APPLE__)
+#define USE_SETPROCTITLE
+#define INIT_SETPROCTITLE_REPLACEMENT
+void spt_init(int argc, char *argv[]);
+void setproctitle(const char *fmt, ...);
+#endif
+
+/* Byte ordering detection */
+#include <sys/types.h> /* This will likely define BYTE_ORDER */
+
+#ifndef BYTE_ORDER
+#if (BSD >= 199103)
+# include <machine/endian.h>
+#else
+#if defined(linux) || defined(__linux__)
+# include <endian.h>
+#else
+#define LITTLE_ENDIAN 1234 /* least-significant byte first (vax, pc) */
+#define BIG_ENDIAN 4321 /* most-significant byte first (IBM, net) */
+#define PDP_ENDIAN 3412 /* LSB first in word, MSW first in long (pdp)*/
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) || \
+ defined(vax) || defined(ns32000) || defined(sun386) || \
+ defined(MIPSEL) || defined(_MIPSEL) || defined(BIT_ZERO_ON_RIGHT) || \
+ defined(__alpha__) || defined(__alpha)
+#define BYTE_ORDER LITTLE_ENDIAN
+#endif
+
+#if defined(sel) || defined(pyr) || defined(mc68000) || defined(sparc) || \
+ defined(is68k) || defined(tahoe) || defined(ibm032) || defined(ibm370) || \
+ defined(MIPSEB) || defined(_MIPSEB) || defined(_IBMR2) || defined(DGUX) ||\
+ defined(apollo) || defined(__convex__) || defined(_CRAY) || \
+ defined(__hppa) || defined(__hp9000) || \
+ defined(__hp9000s300) || defined(__hp9000s700) || \
+ defined (BIT_ZERO_ON_LEFT) || defined(m68k) || defined(__sparc)
+#define BYTE_ORDER BIG_ENDIAN
+#endif
+#endif /* linux */
+#endif /* BSD */
+#endif /* BYTE_ORDER */
+
+/* Sometimes after including an OS-specific header that defines the
+ * endianness we end with __BYTE_ORDER but not with BYTE_ORDER that is what
+ * the Redis code uses. In this case let's define everything without the
+ * underscores. */
+#ifndef BYTE_ORDER
+#ifdef __BYTE_ORDER
+#if defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
+#ifndef LITTLE_ENDIAN
+#define LITTLE_ENDIAN __LITTLE_ENDIAN
+#endif
+#ifndef BIG_ENDIAN
+#define BIG_ENDIAN __BIG_ENDIAN
+#endif
+#if (__BYTE_ORDER == __LITTLE_ENDIAN)
+#define BYTE_ORDER LITTLE_ENDIAN
+#else
+#define BYTE_ORDER BIG_ENDIAN
+#endif
+#endif
+#endif
+#endif
+
+#if !defined(BYTE_ORDER) || \
+ (BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN)
+ /* you must determine what the correct bit order is for
+ * your compiler - the next line is an intentional error
+ * which will force your compiles to bomb until you fix
+ * the above macros.
+ */
+#error "Undefined or invalid BYTE_ORDER"
+#endif
+
+#if (__i386 || __amd64 || __powerpc__) && __GNUC__
+#define GNUC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#if defined(__clang__)
+#define HAVE_ATOMIC
+#endif
+#if (defined(__GLIBC__) && defined(__GLIBC_PREREQ))
+#if (GNUC_VERSION >= 40100 && __GLIBC_PREREQ(2, 6))
+#define HAVE_ATOMIC
+#endif
+#endif
+#endif
+
+/* Make sure we can test for ARM just checking for __arm__, since sometimes
+ * __arm is defined but __arm__ is not. */
+#if defined(__arm) && !defined(__arm__)
+#define __arm__
+#endif
+#if defined (__aarch64__) && !defined(__arm64__)
+#define __arm64__
+#endif
+
+/* Make sure we can test for SPARC just checking for __sparc__. */
+#if defined(__sparc) && !defined(__sparc__)
+#define __sparc__
+#endif
+
+#if defined(__sparc__) || defined(__arm__)
+#define USE_ALIGNED_ACCESS
+#endif
+
+/* Define for redis_set_thread_title */
+#ifdef __linux__
+#define redis_set_thread_title(name) pthread_setname_np(pthread_self(), name)
+#else
+#if (defined __FreeBSD__ || defined __OpenBSD__)
+#include <pthread_np.h>
+#define redis_set_thread_title(name) pthread_set_name_np(pthread_self(), name)
+#elif defined __NetBSD__
+#include <pthread.h>
+#define redis_set_thread_title(name) pthread_setname_np(pthread_self(), "%s", name)
+#elif defined __HAIKU__
+#include <kernel/OS.h>
+#define redis_set_thread_title(name) rename_thread(find_thread(0), name)
+#else
+#if (defined __APPLE__ && defined(__MAC_OS_X_VERSION_MAX_ALLOWED) && __MAC_OS_X_VERSION_MAX_ALLOWED >= 1070)
+int pthread_setname_np(const char *name);
+#include <pthread.h>
+#define redis_set_thread_title(name) pthread_setname_np(name)
+#else
+#define redis_set_thread_title(name)
+#endif
+#endif
+#endif
+
+/* Check if we can use setcpuaffinity(). */
+#if (defined __linux || defined __NetBSD__ || defined __FreeBSD__ || defined __DragonFly__)
+#define USE_SETCPUAFFINITY
+void setcpuaffinity(const char *cpulist);
+#endif
+
+/* Test for posix_fadvise() */
+#if defined(__linux__) || __FreeBSD__ >= 10
+#define HAVE_FADVISE
+#endif
+
+#endif
diff --git a/src/connection.c b/src/connection.c
new file mode 100644
index 0000000..fd9d5d1
--- /dev/null
+++ b/src/connection.c
@@ -0,0 +1,208 @@
+/* ==========================================================================
+ * connection.c - connection layer framework
+ * --------------------------------------------------------------------------
+ * Copyright (C) 2022 zhenwei pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+ * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * ==========================================================================
+ */
+
+#include "server.h"
+#include "connection.h"
+
+static ConnectionType *connTypes[CONN_TYPE_MAX];
+
+int connTypeRegister(ConnectionType *ct) {
+ const char *typename = ct->get_type(NULL);
+ ConnectionType *tmpct;
+ int type;
+
+ /* find an empty slot to store the new connection type */
+ for (type = 0; type < CONN_TYPE_MAX; type++) {
+ tmpct = connTypes[type];
+ if (!tmpct)
+ break;
+
+ /* ignore case, we really don't care "tls"/"TLS" */
+ if (!strcasecmp(typename, tmpct->get_type(NULL))) {
+ serverLog(LL_WARNING, "Connection types %s already registered", typename);
+ return C_ERR;
+ }
+ }
+
+ serverLog(LL_VERBOSE, "Connection type %s registered", typename);
+ connTypes[type] = ct;
+
+ if (ct->init) {
+ ct->init();
+ }
+
+ return C_OK;
+}
+
+int connTypeInitialize(void) {
+ /* currently socket connection type is necessary */
+ serverAssert(RedisRegisterConnectionTypeSocket() == C_OK);
+
+ /* currently unix socket connection type is necessary */
+ serverAssert(RedisRegisterConnectionTypeUnix() == C_OK);
+
+ /* may fail if without BUILD_TLS=yes */
+ RedisRegisterConnectionTypeTLS();
+
+ return C_OK;
+}
+
+ConnectionType *connectionByType(const char *typename) {
+ ConnectionType *ct;
+
+ for (int type = 0; type < CONN_TYPE_MAX; type++) {
+ ct = connTypes[type];
+ if (!ct)
+ break;
+
+ if (!strcasecmp(typename, ct->get_type(NULL)))
+ return ct;
+ }
+
+ serverLog(LL_WARNING, "Missing implement of connection type %s", typename);
+
+ return NULL;
+}
+
+/* Cache TCP connection type, query it by string once */
+ConnectionType *connectionTypeTcp(void) {
+ static ConnectionType *ct_tcp = NULL;
+
+ if (ct_tcp != NULL)
+ return ct_tcp;
+
+ ct_tcp = connectionByType(CONN_TYPE_SOCKET);
+ serverAssert(ct_tcp != NULL);
+
+ return ct_tcp;
+}
+
+/* Cache TLS connection type, query it by string once */
+ConnectionType *connectionTypeTls(void) {
+ static ConnectionType *ct_tls = NULL;
+ static int cached = 0;
+
+ /* Unlike the TCP and Unix connections, the TLS one can be missing
+ * So we need the cached pointer to handle NULL correctly too. */
+ if (!cached) {
+ cached = 1;
+ ct_tls = connectionByType(CONN_TYPE_TLS);
+ }
+
+ return ct_tls;
+}
+
+/* Cache Unix connection type, query it by string once */
+ConnectionType *connectionTypeUnix(void) {
+ static ConnectionType *ct_unix = NULL;
+
+ if (ct_unix != NULL)
+ return ct_unix;
+
+ ct_unix = connectionByType(CONN_TYPE_UNIX);
+ return ct_unix;
+}
+
+int connectionIndexByType(const char *typename) {
+ ConnectionType *ct;
+
+ for (int type = 0; type < CONN_TYPE_MAX; type++) {
+ ct = connTypes[type];
+ if (!ct)
+ break;
+
+ if (!strcasecmp(typename, ct->get_type(NULL)))
+ return type;
+ }
+
+ return -1;
+}
+
+void connTypeCleanupAll(void) {
+ ConnectionType *ct;
+ int type;
+
+ for (type = 0; type < CONN_TYPE_MAX; type++) {
+ ct = connTypes[type];
+ if (!ct)
+ break;
+
+ if (ct->cleanup)
+ ct->cleanup();
+ }
+}
+
+/* walk all the connection types until has pending data */
+int connTypeHasPendingData(void) {
+ ConnectionType *ct;
+ int type;
+ int ret = 0;
+
+ for (type = 0; type < CONN_TYPE_MAX; type++) {
+ ct = connTypes[type];
+ if (ct && ct->has_pending_data && (ret = ct->has_pending_data())) {
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+/* walk all the connection types and process pending data for each connection type */
+int connTypeProcessPendingData(void) {
+ ConnectionType *ct;
+ int type;
+ int ret = 0;
+
+ for (type = 0; type < CONN_TYPE_MAX; type++) {
+ ct = connTypes[type];
+ if (ct && ct->process_pending_data) {
+ ret += ct->process_pending_data();
+ }
+ }
+
+ return ret;
+}
+
+sds getListensInfoString(sds info) {
+ for (int j = 0; j < CONN_TYPE_MAX; j++) {
+ connListener *listener = &server.listeners[j];
+ if (listener->ct == NULL)
+ continue;
+
+ info = sdscatfmt(info, "listener%i:name=%s", j, listener->ct->get_type(NULL));
+ for (int i = 0; i < listener->count; i++) {
+ info = sdscatfmt(info, ",bind=%s", listener->bindaddr[i]);
+ }
+
+ if (listener->port)
+ info = sdscatfmt(info, ",port=%i", listener->port);
+
+ info = sdscatfmt(info, "\r\n");
+ }
+
+ return info;
+}
diff --git a/src/connection.h b/src/connection.h
new file mode 100644
index 0000000..d0a17ab
--- /dev/null
+++ b/src/connection.h
@@ -0,0 +1,454 @@
+
+/*
+ * Copyright (c) 2019, Redis Labs
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __REDIS_CONNECTION_H
+#define __REDIS_CONNECTION_H
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/uio.h>
+
+#include "ae.h"
+
+#define CONN_INFO_LEN 32
+#define CONN_ADDR_STR_LEN 128 /* Similar to INET6_ADDRSTRLEN, hoping to handle other protocols. */
+#define MAX_ACCEPTS_PER_CALL 1000
+
+struct aeEventLoop;
+typedef struct connection connection;
+typedef struct connListener connListener;
+
+typedef enum {
+ CONN_STATE_NONE = 0,
+ CONN_STATE_CONNECTING,
+ CONN_STATE_ACCEPTING,
+ CONN_STATE_CONNECTED,
+ CONN_STATE_CLOSED,
+ CONN_STATE_ERROR
+} ConnectionState;
+
+#define CONN_FLAG_CLOSE_SCHEDULED (1<<0) /* Closed scheduled by a handler */
+#define CONN_FLAG_WRITE_BARRIER (1<<1) /* Write barrier requested */
+
+#define CONN_TYPE_SOCKET "tcp"
+#define CONN_TYPE_UNIX "unix"
+#define CONN_TYPE_TLS "tls"
+#define CONN_TYPE_MAX 8 /* 8 is enough to be extendable */
+
+typedef void (*ConnectionCallbackFunc)(struct connection *conn);
+
+typedef struct ConnectionType {
+ /* connection type */
+ const char *(*get_type)(struct connection *conn);
+
+ /* connection type initialize & finalize & configure */
+ void (*init)(void); /* auto-call during register */
+ void (*cleanup)(void);
+ int (*configure)(void *priv, int reconfigure);
+
+ /* ae & accept & listen & error & address handler */
+ void (*ae_handler)(struct aeEventLoop *el, int fd, void *clientData, int mask);
+ aeFileProc *accept_handler;
+ int (*addr)(connection *conn, char *ip, size_t ip_len, int *port, int remote);
+ int (*is_local)(connection *conn);
+ int (*listen)(connListener *listener);
+
+ /* create/shutdown/close connection */
+ connection* (*conn_create)(void);
+ connection* (*conn_create_accepted)(int fd, void *priv);
+ void (*shutdown)(struct connection *conn);
+ void (*close)(struct connection *conn);
+
+ /* connect & accept */
+ int (*connect)(struct connection *conn, const char *addr, int port, const char *source_addr, ConnectionCallbackFunc connect_handler);
+ int (*blocking_connect)(struct connection *conn, const char *addr, int port, long long timeout);
+ int (*accept)(struct connection *conn, ConnectionCallbackFunc accept_handler);
+
+ /* IO */
+ int (*write)(struct connection *conn, const void *data, size_t data_len);
+ int (*writev)(struct connection *conn, const struct iovec *iov, int iovcnt);
+ int (*read)(struct connection *conn, void *buf, size_t buf_len);
+ int (*set_write_handler)(struct connection *conn, ConnectionCallbackFunc handler, int barrier);
+ int (*set_read_handler)(struct connection *conn, ConnectionCallbackFunc handler);
+ const char *(*get_last_error)(struct connection *conn);
+ ssize_t (*sync_write)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
+ ssize_t (*sync_read)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
+ ssize_t (*sync_readline)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
+
+ /* pending data */
+ int (*has_pending_data)(void);
+ int (*process_pending_data)(void);
+
+ /* TLS specified methods */
+ sds (*get_peer_cert)(struct connection *conn);
+} ConnectionType;
+
+struct connection {
+ ConnectionType *type;
+ ConnectionState state;
+ int last_errno;
+ int fd;
+ short int flags;
+ short int refs;
+ unsigned short int iovcnt;
+ void *private_data;
+ ConnectionCallbackFunc conn_handler;
+ ConnectionCallbackFunc write_handler;
+ ConnectionCallbackFunc read_handler;
+};
+
+#define CONFIG_BINDADDR_MAX 16
+
+/* Setup a listener by a connection type */
+struct connListener {
+ int fd[CONFIG_BINDADDR_MAX];
+ int count;
+ char **bindaddr;
+ int bindaddr_count;
+ int port;
+ ConnectionType *ct;
+ void *priv; /* used by connection type specified data */
+};
+
+/* The connection module does not deal with listening and accepting sockets,
+ * so we assume we have a socket when an incoming connection is created.
+ *
+ * The fd supplied should therefore be associated with an already accept()ed
+ * socket.
+ *
+ * connAccept() may directly call accept_handler(), or return and call it
+ * at a later time. This behavior is a bit awkward but aims to reduce the need
+ * to wait for the next event loop, if no additional handshake is required.
+ *
+ * IMPORTANT: accept_handler may decide to close the connection, calling connClose().
+ * To make this safe, the connection is only marked with CONN_FLAG_CLOSE_SCHEDULED
+ * in this case, and connAccept() returns with an error.
+ *
+ * connAccept() callers must always check the return value and on error (C_ERR)
+ * a connClose() must be called.
+ */
+
+static inline int connAccept(connection *conn, ConnectionCallbackFunc accept_handler) {
+ return conn->type->accept(conn, accept_handler);
+}
+
+/* Establish a connection. The connect_handler will be called when the connection
+ * is established, or if an error has occurred.
+ *
+ * The connection handler will be responsible to set up any read/write handlers
+ * as needed.
+ *
+ * If C_ERR is returned, the operation failed and the connection handler shall
+ * not be expected.
+ */
+static inline int connConnect(connection *conn, const char *addr, int port, const char *src_addr,
+ ConnectionCallbackFunc connect_handler) {
+ return conn->type->connect(conn, addr, port, src_addr, connect_handler);
+}
+
+/* Blocking connect.
+ *
+ * NOTE: This is implemented in order to simplify the transition to the abstract
+ * connections, but should probably be refactored out of cluster.c and replication.c,
+ * in favor of a pure async implementation.
+ */
+static inline int connBlockingConnect(connection *conn, const char *addr, int port, long long timeout) {
+ return conn->type->blocking_connect(conn, addr, port, timeout);
+}
+
+/* Write to connection, behaves the same as write(2).
+ *
+ * Like write(2), a short write is possible. A -1 return indicates an error.
+ *
+ * The caller should NOT rely on errno. Testing for an EAGAIN-like condition, use
+ * connGetState() to see if the connection state is still CONN_STATE_CONNECTED.
+ */
+static inline int connWrite(connection *conn, const void *data, size_t data_len) {
+ return conn->type->write(conn, data, data_len);
+}
+
+/* Gather output data from the iovcnt buffers specified by the members of the iov
+ * array: iov[0], iov[1], ..., iov[iovcnt-1] and write to connection, behaves the same as writev(3).
+ *
+ * Like writev(3), a short write is possible. A -1 return indicates an error.
+ *
+ * The caller should NOT rely on errno. Testing for an EAGAIN-like condition, use
+ * connGetState() to see if the connection state is still CONN_STATE_CONNECTED.
+ */
+static inline int connWritev(connection *conn, const struct iovec *iov, int iovcnt) {
+ return conn->type->writev(conn, iov, iovcnt);
+}
+
+/* Read from the connection, behaves the same as read(2).
+ *
+ * Like read(2), a short read is possible. A return value of 0 will indicate the
+ * connection was closed, and -1 will indicate an error.
+ *
+ * The caller should NOT rely on errno. Testing for an EAGAIN-like condition, use
+ * connGetState() to see if the connection state is still CONN_STATE_CONNECTED.
+ */
+static inline int connRead(connection *conn, void *buf, size_t buf_len) {
+ int ret = conn->type->read(conn, buf, buf_len);
+ return ret;
+}
+
+/* Register a write handler, to be called when the connection is writable.
+ * If NULL, the existing handler is removed.
+ */
+static inline int connSetWriteHandler(connection *conn, ConnectionCallbackFunc func) {
+ return conn->type->set_write_handler(conn, func, 0);
+}
+
+/* Register a read handler, to be called when the connection is readable.
+ * If NULL, the existing handler is removed.
+ */
+static inline int connSetReadHandler(connection *conn, ConnectionCallbackFunc func) {
+ return conn->type->set_read_handler(conn, func);
+}
+
+/* Set a write handler, and possibly enable a write barrier, this flag is
+ * cleared when write handler is changed or removed.
+ * With barrier enabled, we never fire the event if the read handler already
+ * fired in the same event loop iteration. Useful when you want to persist
+ * things to disk before sending replies, and want to do that in a group fashion. */
+static inline int connSetWriteHandlerWithBarrier(connection *conn, ConnectionCallbackFunc func, int barrier) {
+ return conn->type->set_write_handler(conn, func, barrier);
+}
+
+static inline void connShutdown(connection *conn) {
+ conn->type->shutdown(conn);
+}
+
+static inline void connClose(connection *conn) {
+ conn->type->close(conn);
+}
+
+/* Returns the last error encountered by the connection, as a string. If no error,
+ * a NULL is returned.
+ */
+static inline const char *connGetLastError(connection *conn) {
+ return conn->type->get_last_error(conn);
+}
+
+static inline ssize_t connSyncWrite(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return conn->type->sync_write(conn, ptr, size, timeout);
+}
+
+static inline ssize_t connSyncRead(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return conn->type->sync_read(conn, ptr, size, timeout);
+}
+
+static inline ssize_t connSyncReadLine(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return conn->type->sync_readline(conn, ptr, size, timeout);
+}
+
+/* Return CONN_TYPE_* for the specified connection */
+static inline const char *connGetType(connection *conn) {
+ return conn->type->get_type(conn);
+}
+
+static inline int connLastErrorRetryable(connection *conn) {
+ return conn->last_errno == EINTR;
+}
+
+/* Get address information of a connection.
+ * remote works as boolean type to get local/remote address */
+static inline int connAddr(connection *conn, char *ip, size_t ip_len, int *port, int remote) {
+ if (conn && conn->type->addr) {
+ return conn->type->addr(conn, ip, ip_len, port, remote);
+ }
+
+ return -1;
+}
+
+/* Format an IP,port pair into something easy to parse. If IP is IPv6
+ * (matches for ":"), the ip is surrounded by []. IP and port are just
+ * separated by colons. This the standard to display addresses within Redis. */
+static inline int formatAddr(char *buf, size_t buf_len, char *ip, int port) {
+ return snprintf(buf, buf_len, strchr(ip,':') ?
+ "[%s]:%d" : "%s:%d", ip, port);
+}
+
+static inline int connFormatAddr(connection *conn, char *buf, size_t buf_len, int remote)
+{
+ char ip[CONN_ADDR_STR_LEN];
+ int port;
+
+ if (connAddr(conn, ip, sizeof(ip), &port, remote) < 0) {
+ return -1;
+ }
+
+ return formatAddr(buf, buf_len, ip, port);
+}
+
+static inline int connAddrPeerName(connection *conn, char *ip, size_t ip_len, int *port) {
+ return connAddr(conn, ip, ip_len, port, 1);
+}
+
+static inline int connAddrSockName(connection *conn, char *ip, size_t ip_len, int *port) {
+ return connAddr(conn, ip, ip_len, port, 0);
+}
+
+/* Test a connection is local or loopback.
+ * Return -1 on failure, 0 is not a local connection, 1 is a local connection */
+static inline int connIsLocal(connection *conn) {
+ if (conn && conn->type->is_local) {
+ return conn->type->is_local(conn);
+ }
+
+ return -1;
+}
+
+static inline int connGetState(connection *conn) {
+ return conn->state;
+}
+
+/* Returns true if a write handler is registered */
+static inline int connHasWriteHandler(connection *conn) {
+ return conn->write_handler != NULL;
+}
+
+/* Returns true if a read handler is registered */
+static inline int connHasReadHandler(connection *conn) {
+ return conn->read_handler != NULL;
+}
+
+/* Associate a private data pointer with the connection */
+static inline void connSetPrivateData(connection *conn, void *data) {
+ conn->private_data = data;
+}
+
+/* Get the associated private data pointer */
+static inline void *connGetPrivateData(connection *conn) {
+ return conn->private_data;
+}
+
+/* Return a text that describes the connection, suitable for inclusion
+ * in CLIENT LIST and similar outputs.
+ *
+ * For sockets, we always return "fd=<fdnum>" to maintain compatibility.
+ */
+static inline const char *connGetInfo(connection *conn, char *buf, size_t buf_len) {
+ snprintf(buf, buf_len-1, "fd=%i", conn == NULL ? -1 : conn->fd);
+ return buf;
+}
+
+/* anet-style wrappers to conns */
+int connBlock(connection *conn);
+int connNonBlock(connection *conn);
+int connEnableTcpNoDelay(connection *conn);
+int connDisableTcpNoDelay(connection *conn);
+int connKeepAlive(connection *conn, int interval);
+int connSendTimeout(connection *conn, long long ms);
+int connRecvTimeout(connection *conn, long long ms);
+
+/* Get cert for the secure connection */
+static inline sds connGetPeerCert(connection *conn) {
+ if (conn->type->get_peer_cert) {
+ return conn->type->get_peer_cert(conn);
+ }
+
+ return NULL;
+}
+
+/* Initialize the redis connection framework */
+int connTypeInitialize(void);
+
+/* Register a connection type into redis connection framework */
+int connTypeRegister(ConnectionType *ct);
+
+/* Lookup a connection type by type name */
+ConnectionType *connectionByType(const char *typename);
+
+/* Fast path to get TCP connection type */
+ConnectionType *connectionTypeTcp(void);
+
+/* Fast path to get TLS connection type */
+ConnectionType *connectionTypeTls(void);
+
+/* Fast path to get Unix connection type */
+ConnectionType *connectionTypeUnix(void);
+
+/* Lookup the index of a connection type by type name, return -1 if not found */
+int connectionIndexByType(const char *typename);
+
+/* Create a connection of specified type */
+static inline connection *connCreate(ConnectionType *ct) {
+ return ct->conn_create();
+}
+
+/* Create an accepted connection of specified type.
+ * priv is connection type specified argument */
+static inline connection *connCreateAccepted(ConnectionType *ct, int fd, void *priv) {
+ return ct->conn_create_accepted(fd, priv);
+}
+
+/* Configure a connection type. A typical case is to configure TLS.
+ * priv is connection type specified,
+ * reconfigure is boolean type to specify if overwrite the original config */
+static inline int connTypeConfigure(ConnectionType *ct, void *priv, int reconfigure) {
+ return ct->configure(priv, reconfigure);
+}
+
+/* Walk all the connection types and cleanup them all if possible */
+void connTypeCleanupAll(void);
+
+/* Test all the connection type has pending data or not. */
+int connTypeHasPendingData(void);
+
+/* walk all the connection types and process pending data for each connection type */
+int connTypeProcessPendingData(void);
+
+/* Listen on an initialized listener */
+static inline int connListen(connListener *listener) {
+ return listener->ct->listen(listener);
+}
+
+/* Get accept_handler of a connection type */
+static inline aeFileProc *connAcceptHandler(ConnectionType *ct) {
+ if (ct)
+ return ct->accept_handler;
+ return NULL;
+}
+
+/* Get Listeners information, note that caller should free the non-empty string */
+sds getListensInfoString(sds info);
+
+int RedisRegisterConnectionTypeSocket(void);
+int RedisRegisterConnectionTypeUnix(void);
+int RedisRegisterConnectionTypeTLS(void);
+
+/* Return 1 if connection is using TLS protocol, 0 if otherwise. */
+static inline int connIsTLS(connection *conn) {
+ return conn && conn->type == connectionTypeTls();
+}
+
+#endif /* __REDIS_CONNECTION_H */
diff --git a/src/connhelpers.h b/src/connhelpers.h
new file mode 100644
index 0000000..b32e44d
--- /dev/null
+++ b/src/connhelpers.h
@@ -0,0 +1,88 @@
+
+/*
+ * Copyright (c) 2019, Redis Labs
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __REDIS_CONNHELPERS_H
+#define __REDIS_CONNHELPERS_H
+
+#include "connection.h"
+
+/* These are helper functions that are common to different connection
+ * implementations (currently sockets in connection.c and TLS in tls.c).
+ *
+ * Currently helpers implement the mechanisms for invoking connection
+ * handlers and tracking connection references, to allow safe destruction
+ * of connections from within a handler.
+ */
+
+/* Increment connection references.
+ *
+ * Inside a connection handler, we guarantee refs >= 1 so it is always
+ * safe to connClose().
+ *
+ * In other cases where we don't want to prematurely lose the connection,
+ * it can go beyond 1 as well; currently it is only done by connAccept().
+ */
+static inline void connIncrRefs(connection *conn) {
+ conn->refs++;
+}
+
+/* Decrement connection references.
+ *
+ * Note that this is not intended to provide any automatic free logic!
+ * callHandler() takes care of that for the common flows, and anywhere an
+ * explicit connIncrRefs() is used, the caller is expected to take care of
+ * that.
+ */
+
+static inline void connDecrRefs(connection *conn) {
+ conn->refs--;
+}
+
+static inline int connHasRefs(connection *conn) {
+ return conn->refs;
+}
+
+/* Helper for connection implementations to call handlers:
+ * 1. Increment refs to protect the connection.
+ * 2. Execute the handler (if set).
+ * 3. Decrement refs and perform deferred close, if refs==0.
+ */
+static inline int callHandler(connection *conn, ConnectionCallbackFunc handler) {
+ connIncrRefs(conn);
+ if (handler) handler(conn);
+ connDecrRefs(conn);
+ if (conn->flags & CONN_FLAG_CLOSE_SCHEDULED) {
+ if (!connHasRefs(conn)) connClose(conn);
+ return 0;
+ }
+ return 1;
+}
+
+#endif /* __REDIS_CONNHELPERS_H */
diff --git a/src/crc16.c b/src/crc16.c
new file mode 100644
index 0000000..7b8c1da
--- /dev/null
+++ b/src/crc16.c
@@ -0,0 +1,88 @@
+#include "server.h"
+
+/*
+ * Copyright 2001-2010 Georges Menie (www.menie.org)
+ * Copyright 2010-2012 Salvatore Sanfilippo (adapted to Redis coding style)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the University of California, Berkeley nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* CRC16 implementation according to CCITT standards.
+ *
+ * Note by @antirez: this is actually the XMODEM CRC 16 algorithm, using the
+ * following parameters:
+ *
+ * Name : "XMODEM", also known as "ZMODEM", "CRC-16/ACORN"
+ * Width : 16 bit
+ * Poly : 1021 (That is actually x^16 + x^12 + x^5 + 1)
+ * Initialization : 0000
+ * Reflect Input byte : False
+ * Reflect Output CRC : False
+ * Xor constant to output CRC : 0000
+ * Output for "123456789" : 31C3
+ */
+
+static const uint16_t crc16tab[256]= {
+ 0x0000,0x1021,0x2042,0x3063,0x4084,0x50a5,0x60c6,0x70e7,
+ 0x8108,0x9129,0xa14a,0xb16b,0xc18c,0xd1ad,0xe1ce,0xf1ef,
+ 0x1231,0x0210,0x3273,0x2252,0x52b5,0x4294,0x72f7,0x62d6,
+ 0x9339,0x8318,0xb37b,0xa35a,0xd3bd,0xc39c,0xf3ff,0xe3de,
+ 0x2462,0x3443,0x0420,0x1401,0x64e6,0x74c7,0x44a4,0x5485,
+ 0xa56a,0xb54b,0x8528,0x9509,0xe5ee,0xf5cf,0xc5ac,0xd58d,
+ 0x3653,0x2672,0x1611,0x0630,0x76d7,0x66f6,0x5695,0x46b4,
+ 0xb75b,0xa77a,0x9719,0x8738,0xf7df,0xe7fe,0xd79d,0xc7bc,
+ 0x48c4,0x58e5,0x6886,0x78a7,0x0840,0x1861,0x2802,0x3823,
+ 0xc9cc,0xd9ed,0xe98e,0xf9af,0x8948,0x9969,0xa90a,0xb92b,
+ 0x5af5,0x4ad4,0x7ab7,0x6a96,0x1a71,0x0a50,0x3a33,0x2a12,
+ 0xdbfd,0xcbdc,0xfbbf,0xeb9e,0x9b79,0x8b58,0xbb3b,0xab1a,
+ 0x6ca6,0x7c87,0x4ce4,0x5cc5,0x2c22,0x3c03,0x0c60,0x1c41,
+ 0xedae,0xfd8f,0xcdec,0xddcd,0xad2a,0xbd0b,0x8d68,0x9d49,
+ 0x7e97,0x6eb6,0x5ed5,0x4ef4,0x3e13,0x2e32,0x1e51,0x0e70,
+ 0xff9f,0xefbe,0xdfdd,0xcffc,0xbf1b,0xaf3a,0x9f59,0x8f78,
+ 0x9188,0x81a9,0xb1ca,0xa1eb,0xd10c,0xc12d,0xf14e,0xe16f,
+ 0x1080,0x00a1,0x30c2,0x20e3,0x5004,0x4025,0x7046,0x6067,
+ 0x83b9,0x9398,0xa3fb,0xb3da,0xc33d,0xd31c,0xe37f,0xf35e,
+ 0x02b1,0x1290,0x22f3,0x32d2,0x4235,0x5214,0x6277,0x7256,
+ 0xb5ea,0xa5cb,0x95a8,0x8589,0xf56e,0xe54f,0xd52c,0xc50d,
+ 0x34e2,0x24c3,0x14a0,0x0481,0x7466,0x6447,0x5424,0x4405,
+ 0xa7db,0xb7fa,0x8799,0x97b8,0xe75f,0xf77e,0xc71d,0xd73c,
+ 0x26d3,0x36f2,0x0691,0x16b0,0x6657,0x7676,0x4615,0x5634,
+ 0xd94c,0xc96d,0xf90e,0xe92f,0x99c8,0x89e9,0xb98a,0xa9ab,
+ 0x5844,0x4865,0x7806,0x6827,0x18c0,0x08e1,0x3882,0x28a3,
+ 0xcb7d,0xdb5c,0xeb3f,0xfb1e,0x8bf9,0x9bd8,0xabbb,0xbb9a,
+ 0x4a75,0x5a54,0x6a37,0x7a16,0x0af1,0x1ad0,0x2ab3,0x3a92,
+ 0xfd2e,0xed0f,0xdd6c,0xcd4d,0xbdaa,0xad8b,0x9de8,0x8dc9,
+ 0x7c26,0x6c07,0x5c64,0x4c45,0x3ca2,0x2c83,0x1ce0,0x0cc1,
+ 0xef1f,0xff3e,0xcf5d,0xdf7c,0xaf9b,0xbfba,0x8fd9,0x9ff8,
+ 0x6e17,0x7e36,0x4e55,0x5e74,0x2e93,0x3eb2,0x0ed1,0x1ef0
+};
+
+uint16_t crc16(const char *buf, int len) {
+ int counter;
+ uint16_t crc = 0;
+ for (counter = 0; counter < len; counter++)
+ crc = (crc<<8) ^ crc16tab[((crc>>8) ^ *buf++)&0x00FF];
+ return crc;
+}
diff --git a/src/crc16_slottable.h b/src/crc16_slottable.h
new file mode 100644
index 0000000..652aea9
--- /dev/null
+++ b/src/crc16_slottable.h
@@ -0,0 +1,835 @@
+#ifndef _CRC16_TABLE_H__
+#define _CRC16_TABLE_H__
+
+/* A table of the shortest possible alphanumeric string that is mapped by redis' crc16
+ * to any given redis cluster slot.
+ *
+ * The array indexes are slot numbers, so that given a desired slot, this string is guaranteed
+ * to make redis cluster route a request to the shard holding this slot
+ */
+
+const char *crc16_slot_table[] = {
+"06S", "Qi", "5L5", "4Iu", "4gY", "460", "1Y7", "1LV", "0QG", "ru", "7Ok", "4ji", "4DE", "65n", "2JH", "I8", "F9", "SX", "7nF", "4KD",
+"4eh", "6PK", "2ke", "1Ng", "0Sv", "4L", "491", "4hX", "4Ft", "5C4", "2Hy", "09R", "021", "0cX", "4Xv", "6mU", "6Cy", "42R", "0Mt", "nF",
+"cv", "1Pe", "5kK", "6NI", "74L", "4UF", "0nh", "MZ", "2TJ", "0ai", "4ZG", "6od", "6AH", "40c", "0OE", "lw", "aG", "0Bu", "5iz", "6Lx",
+"5R7", "4Ww", "0lY", "Ok", "5n3", "4ks", "8YE", "7g", "2KR", "1nP", "714", "64t", "69D", "4Ho", "07I", "Ps", "2hN", "1ML", "4fC", "7CA",
+"avs", "4iB", "0Rl", "5V", "2Ic", "08H", "4Gn", "66E", "aUo", "b4e", "05x", "RB", "8f", "8VD", "4dr", "5a2", "4zp", "6OS", "bl", "355",
+"0or", "1j2", "75V", "bno", "4Yl", "6lO", "Ap", "0bB", "0Ln", "2yM", "6Bc", "43H", "4xA", "6Mb", "22D", "14", "0mC", "Nq", "6cN", "4Vm",
+"ban", "aDl", "CA", "14Z", "8GG", "mm", "549", "41y", "53t", "464", "1Y3", "1LR", "06W", "Qm", "5L1", "4Iq", "4DA", "65j", "2JL", "1oN",
+"0QC", "6y", "7Oo", "4jm", "4el", "6PO", "9x", "1Nc", "04f", "2EM", "7nB", "bqs", "4Fp", "5C0", "d6F", "09V", "0Sr", "4H", "495", "bRo",
+"aio", "42V", "0Mp", "nB", "025", "17u", "4Xr", "6mQ", "74H", "4UB", "0nl", "3Kn", "cr", "1Pa", "5kO", "6NM", "6AL", "40g", "0OA", "ls",
+"2TN", "0am", "4ZC", "aEr", "5R3", "4Ws", "18t", "Oo", "aC", "0Bq", "bCl", "afn", "2KV", "1nT", "5Uz", "64p", "5n7", "4kw", "0PY", "7c",
+"2hJ", "1MH", "4fG", "6Sd", "7mi", "4Hk", "07M", "Pw", "2Ig", "08L", "4Gj", "66A", "7LD", "4iF", "0Rh", "5R", "8b", "1Oy", "4dv", "5a6",
+"7oX", "4JZ", "0qt", "RF", "0ov", "LD", "4A9", "4TX", "4zt", "6OW", "bh", "0AZ", "z9", "oX", "6Bg", "43L", "4Yh", "6lK", "At", "0bF",
+"0mG", "Nu", "6cJ", "4Vi", "4xE", "6Mf", "2vH", "10", "8GC", "mi", "5p5", "4uu", "5Kx", "4N8", "CE", "1pV", "0QO", "6u", "7Oc", "4ja",
+"4DM", "65f", "3Za", "I0", "0rS", "Qa", "68V", "b7F", "4gQ", "468", "dSo", "285", "274", "4D", "499", "4hP", "b8G", "67W", "0h3", "09Z",
+"F1", "SP", "7nN", "4KL", "51I", "6PC", "9t", "1No", "21g", "1Pm", "5kC", "6NA", "74D", "4UN", "X3", "MR", "029", "0cP", "bbM", "79t",
+"4c3", "42Z", "8Dd", "nN", "aO", "8Ke", "4yS", "4l2", "76u", "635", "0lQ", "Oc", "BS", "W2", "4ZO", "6ol", "7Qa", "40k", "0OM", "2zn",
+"69L", "4Hg", "07A", "2Fj", "2hF", "k6", "4fK", "6Sh", "7Ny", "6K9", "0PU", "7o", "2KZ", "1nX", "4EW", "4P6", "7oT", "4JV", "05p", "RJ",
+"8n", "1Ou", "4dz", "6QY", "7LH", "4iJ", "d7", "qV", "2Ik", "1li", "4Gf", "66M", "4Yd", "6lG", "Ax", "0bJ", "z5", "oT", "6Bk", "4wH",
+"4zx", "aeI", "bd", "0AV", "0oz", "LH", "4A5", "4TT", "5Kt", "4N4", "CI", "14R", "0NW", "me", "541", "41q", "4xI", "6Mj", "22L", "u4",
+"0mK", "Ny", "6cF", "4Ve", "4DI", "65b", "2JD", "I4", "0QK", "6q", "7Og", "4je", "4gU", "4r4", "2iX", "1LZ", "0rW", "Qe", "5L9", "4Iy",
+"4Fx", "5C8", "0h7", "1mw", "0Sz", "pH", "7MV", "4hT", "4ed", "6PG", "9p", "1Nk", "F5", "ST", "7nJ", "4KH", "7pH", "4UJ", "X7", "MV",
+"cz", "1Pi", "5kG", "6NE", "4c7", "4vV", "0Mx", "nJ", "0v5", "0cT", "4Xz", "6mY", "6bX", "5GZ", "0lU", "Og", "aK", "0By", "4yW", "4l6",
+"6AD", "40o", "0OI", "2zj", "BW", "W6", "4ZK", "6oh", "2hB", "k2", "4fO", "6Sl", "69H", "4Hc", "07E", "2Fn", "d5e", "83m", "4ES", "4P2",
+"a0F", "bQL", "0PQ", "7k", "8j", "1Oq", "50W", "hbv", "7oP", "4JR", "05t", "RN", "2Io", "08D", "4Gb", "66I", "7LL", "4iN", "d3", "5Z",
+"z1", "oP", "6Bo", "43D", "5IA", "6lC", "2Wm", "0bN", "8ff", "LL", "4A1", "4TP", "cPn", "aeM", "0T3", "0AR", "0NS", "ma", "545", "41u",
+"5Kp", "4N0", "CM", "14V", "0mO", "2Xl", "6cB", "4Va", "4xM", "6Mn", "22H", "18", "04s", "SI", "7nW", "4KU", "4ey", "6PZ", "9m", "1Nv",
+"e4", "pU", "7MK", "4hI", "4Fe", "67N", "2Hh", "09C", "06B", "Qx", "68O", "4Id", "4gH", "6Rk", "2iE", "j5", "0QV", "6l", "5o8", "4jx",
+"4DT", "4Q5", "2JY", "82j", "BJ", "0ax", "4ZV", "4O7", "552", "40r", "0OT", "lf", "aV", "t7", "4yJ", "6Li", "6bE", "4Wf", "0lH", "Oz",
+"2Vj", "0cI", "4Xg", "6mD", "6Ch", "42C", "0Me", "nW", "cg", "1Pt", "5kZ", "6NX", "7pU", "4UW", "0ny", "MK", "7LQ", "4iS", "267", "5G",
+"0i0", "08Y", "b9D", "66T", "7oM", "4JO", "G2", "RS", "8w", "1Ol", "4dc", "7Aa", "atS", "4kb", "0PL", "7v", "2KC", "H3", "4EN", "64e",
+"69U", "b6E", "07X", "Pb", "dRl", "296", "4fR", "4s3", "4xP", "4m1", "22U", "8Jf", "0mR", "0x3", "77v", "626", "5Km", "6no", "CP", "V1",
+"0NN", "3kL", "7Pb", "41h", "4za", "6OB", "20d", "0AO", "Y0", "LQ", "6an", "4TM", "bcN", "78w", "Aa", "0bS", "8Eg", "oM", "4b0", "43Y",
+"51T", "azL", "9i", "1Nr", "04w", "SM", "7nS", "4KQ", "4Fa", "67J", "2Hl", "09G", "e0", "4Y", "7MO", "4hM", "4gL", "6Ro", "2iA", "j1",
+"06F", "2Gm", "68K", "5YA", "4DP", "4Q1", "d4f", "82n", "0QR", "6h", "a1E", "bPO", "556", "40v", "0OP", "lb", "BN", "15U", "4ZR", "4O3",
+"6bA", "4Wb", "0lL", "2Yo", "aR", "t3", "4yN", "6Lm", "6Cl", "42G", "0Ma", "nS", "2Vn", "0cM", "4Xc", "79i", "74Y", "4US", "8ge", "MO",
+"cc", "1Pp", "bAL", "adN", "0i4", "1lt", "5WZ", "66P", "7LU", "4iW", "0Ry", "5C", "8s", "1Oh", "4dg", "6QD", "7oI", "4JK", "G6", "RW",
+"2KG", "H7", "4EJ", "64a", "7Nd", "4kf", "0PH", "7r", "1X8", "1MY", "4fV", "4s7", "69Q", "4Hz", "0sT", "Pf", "0mV", "Nd", "5S8", "4Vx",
+"4xT", "4m5", "22Q", "0Cz", "0NJ", "mx", "7Pf", "41l", "5Ki", "6nk", "CT", "V5", "Y4", "LU", "6aj", "4TI", "4ze", "6OF", "by", "0AK",
+"2l9", "oI", "4b4", "4wU", "4Yy", "6lZ", "Ae", "0bW", "0So", "4U", "7MC", "4hA", "4Fm", "67F", "3XA", "09K", "0ps", "SA", "aTl", "b5f",
+"4eq", "6PR", "9e", "8WG", "8XF", "6d", "5o0", "4jp", "707", "65w", "1z2", "1oS", "06J", "Qp", "68G", "4Il", "53i", "6Rc", "2iM", "1LO",
+"23G", "07", "4yB", "6La", "6bM", "4Wn", "18i", "Or", "BB", "0ap", "c4D", "aEo", "5q2", "40z", "8FD", "ln", "co", "346", "5kR", "6NP",
+"74U", "bol", "0nq", "MC", "2Vb", "0cA", "4Xo", "6mL", "7SA", "42K", "0Mm", "2xN", "7oE", "4JG", "05a", "2DJ", "2jf", "1Od", "4dk", "6QH",
+"482", "5yz", "0Ru", "5O", "0i8", "08Q", "4Gw", "5B7", "5M6", "4Hv", "07P", "Pj", "1X4", "1MU", "4fZ", "473", "7Nh", "4kj", "0PD", "sv",
+"2KK", "1nI", "4EF", "64m", "5Ke", "6ng", "CX", "V9", "0NF", "mt", "7Pj", "4uh", "4xX", "4m9", "1F6", "0Cv", "0mZ", "Nh", "5S4", "4Vt",
+"4Yu", "6lV", "Ai", "16r", "0Lw", "oE", "4b8", "43Q", "4zi", "6OJ", "bu", "0AG", "Y8", "LY", "6af", "4TE", "4Fi", "67B", "2Hd", "09O",
+"e8", "4Q", "7MG", "4hE", "4eu", "6PV", "9a", "1Nz", "0pw", "SE", "aTh", "4KY", "4DX", "4Q9", "1z6", "1oW", "0QZ", "rh", "5o4", "4jt",
+"4gD", "6Rg", "2iI", "j9", "06N", "Qt", "68C", "4Ih", "6bI", "4Wj", "0lD", "Ov", "aZ", "03", "4yF", "6Le", "5q6", "4tv", "0OX", "lj",
+"BF", "0at", "4ZZ", "6oy", "74Q", "5Ez", "0nu", "MG", "ck", "1Px", "5kV", "6NT", "6Cd", "42O", "0Mi", "2xJ", "2Vf", "0cE", "4Xk", "6mH",
+"2jb", "8VY", "4do", "6QL", "7oA", "4JC", "05e", "2DN", "d7E", "08U", "4Gs", "5B3", "486", "bSl", "0Rq", "5K", "1X0", "1MQ", "52w", "477",
+"5M2", "4Hr", "07T", "Pn", "2KO", "1nM", "4EB", "64i", "7Nl", "4kn", "8YX", "7z", "0NB", "mp", "7Pn", "41d", "5Ka", "6nc", "2UM", "14G",
+"19w", "Nl", "5S0", "4Vp", "bBo", "agm", "1F2", "0Cr", "0Ls", "oA", "ahl", "43U", "4Yq", "6lR", "Am", "16v", "0oo", "2ZL", "6ab", "4TA",
+"4zm", "6ON", "bq", "0AC", "2VY", "0cz", "4XT", "4M5", "570", "42p", "0MV", "nd", "cT", "v5", "5ki", "6Nk", "74n", "4Ud", "0nJ", "Mx",
+"By", "0aK", "4Ze", "6oF", "6Aj", "40A", "y4", "lU", "ae", "0BW", "4yy", "581", "4B4", "4WU", "18R", "OI", "06q", "QK", "7lU", "4IW",
+"53R", "6RX", "0I4", "1Lt", "g6", "rW", "7OI", "4jK", "4Dg", "65L", "2Jj", "1oh", "0pH", "Sz", "7nd", "4Kf", "4eJ", "6Pi", "2kG", "h7",
+"0ST", "4n", "7Mx", "4hz", "4FV", "4S7", "1x8", "09p", "4zR", "4o3", "bN", "8Hd", "0oP", "Lb", "75t", "604", "4YN", "6lm", "AR", "T3",
+"0LL", "2yo", "6BA", "43j", "4xc", "agR", "22f", "0CM", "0ma", "NS", "6cl", "4VO", "baL", "aDN", "Cc", "14x", "8Ge", "mO", "7PQ", "4uS",
+"7NS", "4kQ", "245", "7E", "0k2", "1nr", "coo", "64V", "69f", "4HM", "E0", "PQ", "2hl", "1Mn", "4fa", "6SB", "7Lb", "5yA", "0RN", "5t",
+"2IA", "J1", "4GL", "66g", "aUM", "b4G", "05Z", "0d3", "8D", "8Vf", "4dP", "459", "574", "42t", "0MR", "0X3", "dln", "17W", "4XP", "4M1",
+"74j", "5EA", "0nN", "3KL", "cP", "29", "5km", "6No", "6An", "40E", "y0", "lQ", "2Tl", "0aO", "4Za", "6oB", "4B0", "4WQ", "18V", "OM",
+"aa", "0BS", "bCN", "585", "53V", "axN", "0I0", "1Lp", "06u", "QO", "68x", "4IS", "4Dc", "65H", "2Jn", "1ol", "g2", "rS", "7OM", "4jO",
+"4eN", "6Pm", "9Z", "h3", "04D", "2Eo", "aTS", "4Kb", "4FR", "4S3", "d6d", "09t", "0SP", "4j", "a3G", "bRM", "0oT", "Lf", "6aY", "4Tz",
+"4zV", "4o7", "bJ", "0Ax", "0LH", "oz", "6BE", "43n", "4YJ", "6li", "AV", "T7", "0me", "NW", "6ch", "4VK", "4xg", "6MD", "22b", "0CI",
+"0Ny", "mK", "7PU", "4uW", "5KZ", "6nX", "Cg", "1pt", "0k6", "1nv", "4Ey", "64R", "7NW", "4kU", "241", "7A", "2hh", "1Mj", "4fe", "6SF",
+"69b", "4HI", "E4", "PU", "2IE", "J5", "4GH", "66c", "7Lf", "4id", "0RJ", "5p", "2jY", "8Vb", "4dT", "4q5", "5O8", "4Jx", "0qV", "Rd",
+"21E", "25", "5ka", "6Nc", "74f", "4Ul", "0nB", "Mp", "1f2", "0cr", "bbo", "79V", "578", "42x", "395", "nl", "am", "364", "4yq", "589",
+"76W", "bmn", "0ls", "OA", "Bq", "0aC", "4Zm", "6oN", "6Ab", "40I", "0Oo", "2zL", "0Qm", "6W", "7OA", "4jC", "4Do", "65D", "2Jb", "82Q",
+"06y", "QC", "68t", "b7d", "4gs", "5b3", "dSM", "8UE", "8ZD", "4f", "5m2", "4hr", "725", "67u", "1x0", "09x", "04H", "Sr", "7nl", "4Kn",
+"4eB", "6Pa", "9V", "1NM", "4YF", "6le", "AZ", "0bh", "0LD", "ov", "6BI", "43b", "4zZ", "6Oy", "bF", "0At", "0oX", "Lj", "5Q6", "4Tv",
+"5KV", "6nT", "Ck", "14p", "0Nu", "mG", "7PY", "41S", "4xk", "6MH", "22n", "0CE", "0mi", "2XJ", "6cd", "4VG", "69n", "4HE", "E8", "PY",
+"2hd", "1Mf", "4fi", "6SJ", "ath", "4kY", "0Pw", "7M", "2Kx", "1nz", "4Eu", "6pV", "5O4", "4Jt", "05R", "Rh", "8L", "1OW", "4dX", "451",
+"7Lj", "4ih", "0RF", "qt", "2II", "J9", "4GD", "66o", "74b", "4Uh", "0nF", "Mt", "cX", "21", "5ke", "6Ng", "5s4", "4vt", "0MZ", "nh",
+"1f6", "0cv", "4XX", "4M9", "4B8", "4WY", "0lw", "OE", "ai", "1Rz", "4yu", "6LV", "6Af", "40M", "y8", "lY", "Bu", "0aG", "4Zi", "6oJ",
+"4Dk", "6qH", "2Jf", "1od", "0Qi", "6S", "7OE", "4jG", "4gw", "5b7", "0I8", "1Lx", "0ru", "QG", "68p", "5Yz", "4FZ", "67q", "1x4", "1mU",
+"0SX", "4b", "5m6", "4hv", "4eF", "6Pe", "9R", "1NI", "04L", "Sv", "7nh", "4Kj", "8EX", "or", "6BM", "43f", "4YB", "6la", "2WO", "0bl",
+"8fD", "Ln", "5Q2", "4Tr", "cPL", "aeo", "bB", "0Ap", "0Nq", "mC", "ajn", "41W", "5KR", "6nP", "Co", "14t", "0mm", "2XN", "77I", "4VC",
+"4xo", "6ML", "22j", "0CA", "3xA", "1Mb", "4fm", "6SN", "69j", "4HA", "07g", "2FL", "d5G", "83O", "4Eq", "64Z", "a0d", "bQn", "0Ps", "7I",
+"8H", "1OS", "50u", "455", "5O0", "4Jp", "05V", "Rl", "2IM", "08f", "5Wa", "66k", "7Ln", "4il", "0RB", "5x", "Bh", "0aZ", "4Zt", "6oW",
+"4a9", "40P", "0Ov", "lD", "at", "0BF", "4yh", "6LK", "6bg", "4WD", "Z9", "OX", "2VH", "U8", "4XE", "6mf", "6CJ", "42a", "0MG", "nu",
+"cE", "1PV", "5kx", "4n8", "5P5", "4Uu", "8gC", "Mi", "04Q", "Sk", "5N7", "4Kw", "51r", "442", "9O", "1NT", "0SE", "pw", "7Mi", "4hk",
+"4FG", "67l", "2HJ", "09a", "3", "QZ", "68m", "4IF", "4gj", "6RI", "2ig", "1Le", "0Qt", "6N", "7OX", "4jZ", "4Dv", "5A6", "0j9", "1oy",
+"4xr", "6MQ", "22w", "377", "0mp", "NB", "77T", "blm", "5KO", "6nM", "Cr", "14i", "0Nl", "3kn", "ajs", "41J", "4zC", "aer", "20F", "36",
+"0oA", "Ls", "6aL", "4To", "bcl", "78U", "AC", "0bq", "386", "oo", "5r3", "4ws", "5l1", "4iq", "9Kf", "5e", "1y3", "1lR", "736", "66v",
+"7oo", "4Jm", "05K", "Rq", "8U", "1ON", "4dA", "6Qb", "7NB", "bQs", "0Pn", "7T", "2Ka", "1nc", "4El", "64G", "69w", "b6g", "07z", "1v2",
+"dRN", "8TF", "4fp", "5c0", "akm", "40T", "0Or", "1J2", "Bl", "15w", "4Zp", "6oS", "6bc", "5Ga", "0ln", "2YM", "ap", "0BB", "4yl", "6LO",
+"6CN", "42e", "0MC", "nq", "2VL", "0co", "4XA", "6mb", "5P1", "4Uq", "8gG", "Mm", "cA", "1PR", "bAn", "adl", "51v", "446", "9K", "1NP",
+"04U", "So", "5N3", "4Ks", "4FC", "67h", "2HN", "09e", "0SA", "ps", "7Mm", "4ho", "4gn", "6RM", "2ic", "1La", "7", "2GO", "68i", "4IB",
+"4Dr", "5A2", "d4D", "82L", "0Qp", "6J", "a1g", "bPm", "0mt", "NF", "6cy", "4VZ", "4xv", "6MU", "0V9", "0CX", "0Nh", "mZ", "7PD", "41N",
+"5KK", "6nI", "Cv", "14m", "0oE", "Lw", "6aH", "4Tk", "4zG", "6Od", "20B", "32", "0LY", "ok", "5r7", "4ww", "5Iz", "6lx", "AG", "0bu",
+"1y7", "1lV", "4GY", "4R8", "5l5", "4iu", "1Bz", "5a", "8Q", "i8", "4dE", "6Qf", "7ok", "4Ji", "05O", "Ru", "2Ke", "1ng", "4Eh", "64C",
+"7NF", "4kD", "f9", "7P", "2hy", "3m9", "4ft", "5c4", "69s", "4HX", "0sv", "PD", "23e", "0BN", "5iA", "6LC", "6bo", "4WL", "Z1", "OP",
+"0t3", "0aR", "c4f", "aEM", "4a1", "40X", "8Ff", "lL", "cM", "8Ig", "5kp", "4n0", "74w", "617", "0nS", "Ma", "3Fa", "U0", "4XM", "6mn",
+"6CB", "42i", "0MO", "2xl", "0SM", "4w", "7Ma", "4hc", "4FO", "67d", "2HB", "K2", "04Y", "Sc", "aTN", "b5D", "4eS", "4p2", "9G", "8We",
+"256", "6F", "7OP", "4jR", "cnl", "65U", "0j1", "1oq", "D3", "QR", "68e", "4IN", "4gb", "6RA", "2io", "1Lm", "5KG", "6nE", "Cz", "14a",
+"x7", "mV", "7PH", "41B", "4xz", "592", "0V5", "0CT", "0mx", "NJ", "4C7", "4VV", "4YW", "4L6", "AK", "0by", "0LU", "og", "563", "43s",
+"4zK", "6Oh", "bW", "w6", "0oI", "2Zj", "6aD", "4Tg", "7og", "4Je", "05C", "Ry", "2jD", "i4", "4dI", "6Qj", "5l9", "4iy", "0RW", "5m",
+"2IX", "08s", "4GU", "4R4", "7mV", "4HT", "07r", "PH", "0H7", "1Mw", "4fx", "5c8", "7NJ", "4kH", "f5", "sT", "2Ki", "1nk", "4Ed", "64O",
+"6bk", "4WH", "Z5", "OT", "ax", "0BJ", "4yd", "6LG", "4a5", "4tT", "0Oz", "lH", "Bd", "0aV", "4Zx", "aEI", "5P9", "4Uy", "0nW", "Me",
+"cI", "1PZ", "5kt", "4n4", "6CF", "42m", "0MK", "ny", "2VD", "U4", "4XI", "6mj", "4FK", "6sh", "2HF", "K6", "0SI", "4s", "7Me", "4hg",
+"4eW", "4p6", "9C", "1NX", "0pU", "Sg", "7ny", "6k9", "4Dz", "65Q", "0j5", "1ou", "0Qx", "6B", "7OT", "4jV", "4gf", "6RE", "2ik", "1Li",
+"D7", "QV", "68a", "4IJ", "x3", "mR", "7PL", "41F", "5KC", "6nA", "2Uo", "14e", "19U", "NN", "4C3", "4VR", "bBM", "596", "0V1", "0CP",
+"0LQ", "oc", "567", "43w", "4YS", "4L2", "AO", "16T", "0oM", "2Zn", "75i", "4Tc", "4zO", "6Ol", "bS", "w2", "8Y", "i0", "4dM", "6Qn",
+"7oc", "4Ja", "05G", "2Dl", "d7g", "08w", "4GQ", "4R0", "a2D", "bSN", "0RS", "5i", "0H3", "1Ms", "52U", "ayM", "7mR", "4HP", "07v", "PL",
+"2Km", "1no", "5UA", "64K", "7NN", "4kL", "f1", "7X", "5nw", "4k7", "fJ", "0Ex", "0kT", "Hf", "6eY", "4Pz", "5Mk", "6hi", "EV", "P7",
+"0HH", "kz", "6FE", "47n", "48o", "6ID", "26b", "0GI", "0ie", "JW", "6gh", "4RK", "5OZ", "6jX", "Gg", "0dU", "0Jy", "iK", "4d6", "4qW",
+"4z4", "4oU", "1DZ", "3A", "Ye", "0zW", "4Ay", "5D9", "6yj", "4LI", "A4", "TU", "zy", "0YK", "4be", "6WF", "6XG", "4md", "0VJ", "1p",
+"2ME", "N5", "4CH", "62c", "5K8", "4Nx", "0uV", "Vd", "xH", "8Rb", "5pu", "4u5", "D", "13W", "5Lq", "4I1", "534", "46t", "0IR", "28y",
+"gP", "69", "5om", "6Jo", "6dC", "5AA", "0jN", "3OL", "2Pl", "0eO", "aT1", "6kB", "6En", "44E", "98", "hQ", "ea", "0FS", "49u", "abL",
+"4F0", "4SQ", "8ag", "KM", "02u", "UO", "4X2", "4MS", "57V", "a8F", "0M0", "0XQ", "c2", "vS", "7KM", "4nO", "5PB", "61H", "2Nn", "1kl",
+"00D", "2Ao", "6zA", "4Ob", "4aN", "6Tm", "yR", "l3", "0WP", "0j", "a7G", "58W", "4BR", "4W3", "ZN", "84l", "0kP", "Hb", "71t", "644",
+"5ns", "4k3", "fN", "8Ld", "0HL", "29g", "6FA", "47j", "5Mo", "6hm", "ER", "P3", "0ia", "JS", "6gl", "4RO", "48k", "7Ya", "26f", "0GM",
+"8Ce", "iO", "4d2", "4qS", "beL", "hYw", "Gc", "0dQ", "Ya", "0zS", "cko", "60V", "4z0", "4oQ", "205", "3E", "2ll", "0YO", "4ba", "6WB",
+"6yn", "4LM", "A0", "TQ", "2MA", "N1", "4CL", "62g", "6XC", "59I", "0VN", "1t", "xL", "8Rf", "54y", "419", "aQM", "b0G", "01Z", "3PP",
+"530", "46p", "0IV", "jd", "DH", "0gz", "5Lu", "4I5", "6dG", "4Qd", "0jJ", "Ix", "gT", "r5", "5oi", "6Jk", "6Ej", "44A", "0Kg", "hU",
+"Fy", "0eK", "5ND", "6kF", "4F4", "4SU", "1xZ", "KI", "ee", "0FW", "49q", "5x9", "57R", "6VX", "0M4", "0XU", "02q", "UK", "4X6", "4MW",
+"5PF", "61L", "2Nj", "1kh", "c6", "vW", "7KI", "4nK", "4aJ", "6Ti", "yV", "l7", "0tH", "Wz", "6zE", "4Of", "4BV", "4W7", "ZJ", "0yx",
+"0WT", "0n", "6YY", "4lz", "5Mc", "6ha", "2SO", "0fl", "1Xa", "kr", "6FM", "47f", "bDm", "aao", "fB", "0Ep", "8bD", "Hn", "5U2", "4Pr",
+"5OR", "5Z3", "Go", "10t", "0Jq", "iC", "ann", "45W", "48g", "6IL", "ds", "0GA", "0im", "3Lo", "73I", "4RC", "6yb", "4LA", "03g", "2BL",
+"zq", "0YC", "4bm", "6WN", "a4d", "bUn", "0Ts", "3I", "Ym", "87O", "4Aq", "5D1", "5K0", "4Np", "01V", "Vl", "2nQ", "1KS", "54u", "415",
+"6XO", "4ml", "0VB", "1x", "2MM", "0xn", "5Sa", "62k", "gX", "61", "5oe", "6Jg", "6dK", "4Qh", "0jF", "It", "L", "0gv", "5Ly", "4I9",
+"5w4", "4rt", "0IZ", "jh", "ei", "1Vz", "5mT", "5x5", "4F8", "4SY", "0hw", "KE", "Fu", "0eG", "5NH", "6kJ", "6Ef", "44M", "90", "hY",
+"0Ui", "2S", "7KE", "4nG", "5PJ", "6uH", "Xw", "1kd", "0vu", "UG", "6xx", "790", "4cw", "5f7", "0M8", "0XY", "0WX", "0b", "5i6", "4lv",
+"4BZ", "63q", "ZF", "0yt", "00L", "Wv", "6zI", "4Oj", "4aF", "6Te", "yZ", "0Zh", "0HD", "kv", "6FI", "47b", "5Mg", "6he", "EZ", "0fh",
+"0kX", "Hj", "5U6", "4Pv", "7N9", "6Ky", "fF", "0Et", "0Ju", "iG", "6Dx", "45S", "5OV", "5Z7", "Gk", "0dY", "0ii", "3Lk", "6gd", "4RG",
+"48c", "6IH", "dw", "0GE", "zu", "0YG", "4bi", "6WJ", "6yf", "4LE", "A8", "TY", "Yi", "1jz", "4Au", "5D5", "4z8", "4oY", "0Tw", "3M",
+"xD", "1KW", "54q", "411", "5K4", "4Nt", "01R", "Vh", "2MI", "N9", "4CD", "62o", "6XK", "4mh", "0VF", "ut", "6dO", "4Ql", "0jB", "Ip",
+"25E", "65", "5oa", "6Jc", "538", "46x", "9Pg", "jl", "H", "0gr", "bfo", "aCm", "72W", "bin", "0hs", "KA", "em", "324", "49y", "5x1",
+"6Eb", "44I", "94", "3nm", "Fq", "0eC", "5NL", "6kN", "5PN", "61D", "Xs", "86Q", "0Um", "2W", "7KA", "4nC", "4cs", "5f3", "39W", "8QE",
+"02y", "UC", "aRn", "794", "765", "63u", "ZB", "0yp", "9Ne", "0f", "5i2", "4lr", "4aB", "6Ta", "2oO", "0Zl", "00H", "Wr", "6zM", "4On",
+"5lW", "5y6", "dj", "0GX", "0it", "JF", "6gy", "4RZ", "5OK", "6jI", "Gv", "0dD", "83", "iZ", "6De", "45N", "5nf", "6Kd", "24B", "72",
+"0kE", "Hw", "6eH", "4Pk", "5Mz", "6hx", "EG", "0fu", "0HY", "kk", "5v7", "4sw", "5h5", "4mu", "1Fz", "1a", "2MT", "0xw", "4CY", "4V8",
+"7kk", "4Ni", "01O", "Vu", "xY", "m8", "54l", "6Uf", "6Zg", "4oD", "b9", "3P", "Yt", "0zF", "4Ah", "60C", "4Y9", "4LX", "0wv", "TD",
+"zh", "0YZ", "4bt", "5g4", "Fl", "11w", "5NQ", "6kS", "aom", "44T", "0Kr", "1N2", "ep", "0FB", "49d", "6HO", "6fc", "5Ca", "0hn", "3Ml",
+"U", "0go", "bfr", "6ib", "6GN", "46e", "0IC", "jq", "gA", "0Ds", "bEn", "hyU", "5T1", "4Qq", "8cG", "Im", "00U", "Wo", "5J3", "4Os",
+"55v", "406", "yC", "0Zq", "0WA", "ts", "6YL", "4lo", "4BC", "63h", "2LN", "0ym", "02d", "2CO", "6xa", "4MB", "4cn", "6VM", "2mc", "1Ha",
+"0Up", "2J", "a5g", "bTm", "5PS", "5E2", "Xn", "86L", "0ip", "JB", "73T", "bhm", "48z", "5y2", "dn", "337", "87", "3on", "6Da", "45J",
+"5OO", "6jM", "Gr", "10i", "0kA", "Hs", "6eL", "4Po", "5nb", "aar", "24F", "76", "8AE", "ko", "5v3", "4ss", "bgl", "aBn", "EC", "0fq",
+"2MP", "0xs", "776", "62v", "5h1", "4mq", "9Of", "1e", "2nL", "1KN", "54h", "6Ub", "7ko", "4Nm", "01K", "Vq", "Yp", "0zB", "4Al", "60G",
+"6Zc", "bUs", "0Tn", "3T", "zl", "8PF", "4bp", "5g0", "aSm", "787", "03z", "1r2", "4e9", "44P", "0Kv", "hD", "Fh", "0eZ", "5NU", "6kW",
+"6fg", "4SD", "0hj", "KX", "et", "0FF", "5mI", "6HK", "6GJ", "46a", "0IG", "ju", "Q", "Q8", "5Ld", "6if", "5T5", "4Qu", "1zz", "Ii",
+"gE", "0Dw", "5ox", "4j8", "55r", "402", "yG", "0Zu", "00Q", "Wk", "5J7", "4Ow", "4BG", "63l", "2LJ", "0yi", "0WE", "tw", "6YH", "4lk",
+"4cj", "6VI", "2mg", "0XD", "0vh", "UZ", "6xe", "4MF", "5PW", "5E6", "Xj", "1ky", "0Ut", "2N", "7KX", "4nZ", "5OC", "6jA", "2Qo", "0dL",
+"1ZA", "iR", "6Dm", "45F", "48v", "acO", "db", "0GP", "94M", "JN", "4G3", "4RR", "5Mr", "4H2", "EO", "12T", "0HQ", "kc", "527", "47w",
+"5nn", "6Kl", "fS", "s2", "0kM", "3NO", "71i", "4Pc", "7kc", "4Na", "01G", "3PM", "xQ", "m0", "54d", "6Un", "a6D", "59T", "0VS", "1i",
+"197", "85o", "4CQ", "4V0", "4Y1", "4LP", "03v", "TL", "0L3", "0YR", "56U", "a9E", "6Zo", "4oL", "b1", "3X", "2Om", "0zN", "5QA", "60K",
+"ex", "0FJ", "49l", "6HG", "6fk", "4SH", "0hf", "KT", "Fd", "0eV", "5NY", "aAI", "4e5", "4pT", "0Kz", "hH", "gI", "1TZ", "5ot", "4j4",
+"5T9", "4Qy", "0jW", "Ie", "DU", "Q4", "5Lh", "6ij", "6GF", "46m", "0IK", "jy", "0WI", "0s", "6YD", "4lg", "4BK", "6wh", "ZW", "O6",
+"0tU", "Wg", "6zX", "6o9", "4aW", "4t6", "yK", "0Zy", "0Ux", "2B", "7KT", "4nV", "bzI", "61Q", "Xf", "1ku", "02l", "UV", "6xi", "4MJ",
+"4cf", "6VE", "2mk", "0XH", "0Jd", "iV", "6Di", "45B", "5OG", "6jE", "Gz", "0dH", "0ix", "JJ", "4G7", "4RV", "48r", "6IY", "df", "0GT",
+"0HU", "kg", "523", "47s", "5Mv", "4H6", "EK", "0fy", "0kI", "3NK", "6eD", "4Pg", "5nj", "6Kh", "fW", "s6", "xU", "m4", "5ph", "6Uj",
+"7kg", "4Ne", "01C", "Vy", "193", "1hZ", "4CU", "4V4", "5h9", "4my", "0VW", "1m", "zd", "0YV", "4bx", "5g8", "4Y5", "4LT", "03r", "TH",
+"Yx", "0zJ", "4Ad", "60O", "6Zk", "4oH", "b5", "wT", "6fo", "4SL", "0hb", "KP", "27e", "0FN", "49h", "6HC", "4e1", "44X", "8Bf", "hL",
+"0p3", "0eR", "bdO", "aAM", "70w", "657", "0jS", "Ia", "gM", "8Mg", "5op", "4j0", "6GB", "46i", "0IO", "28d", "Y", "Q0", "5Ll", "6in",
+"4BO", "63d", "ZS", "O2", "0WM", "0w", "7Ia", "4lc", "4aS", "4t2", "yO", "8Se", "00Y", "Wc", "aPN", "b1D", "bzM", "61U", "Xb", "1kq",
+"216", "2F", "7KP", "4nR", "4cb", "6VA", "2mo", "0XL", "02h", "UR", "6xm", "4MN", "5j7", "4ow", "0TY", "3c", "YG", "0zu", "5Qz", "60p",
+"6yH", "4Lk", "03M", "Tw", "2lJ", "0Yi", "4bG", "6Wd", "6Xe", "4mF", "0Vh", "1R", "2Mg", "0xD", "4Cj", "62A", "7kX", "4NZ", "0ut", "VF",
+"xj", "1Ky", "5pW", "5e6", "5nU", "6KW", "fh", "0EZ", "0kv", "HD", "4E9", "4PX", "5MI", "6hK", "Et", "0fF", "0Hj", "kX", "6Fg", "47L",
+"48M", "6If", "dY", "50", "0iG", "Ju", "6gJ", "4Ri", "5Ox", "4J8", "GE", "0dw", "1Zz", "ii", "5t5", "4qu", "02W", "Um", "5H1", "4Mq",
+"57t", "424", "2mP", "0Xs", "0UC", "2y", "7Ko", "4nm", "bzr", "61j", "2NL", "1kN", "00f", "2AM", "6zc", "bus", "4al", "6TO", "yp", "0ZB",
+"0Wr", "0H", "a7e", "58u", "4Bp", "5G0", "Zl", "84N", "f", "13u", "5LS", "5Y2", "amo", "46V", "0Ip", "jB", "gr", "1Ta", "5oO", "6JM",
+"6da", "4QB", "0jl", "3On", "2PN", "0em", "5Nb", "aAr", "6EL", "44g", "0KA", "hs", "eC", "0Fq", "49W", "abn", "5V3", "4Ss", "8aE", "Ko",
+"YC", "0zq", "754", "60t", "5j3", "4os", "9Md", "3g", "2lN", "0Ym", "4bC", "7GA", "6yL", "4Lo", "03I", "Ts", "2Mc", "1ha", "4Cn", "62E",
+"6Xa", "4mB", "0Vl", "1V", "xn", "8RD", "5pS", "5e2", "aQo", "b0e", "01x", "VB", "0kr", "1n2", "71V", "bjo", "5nQ", "6KS", "fl", "315",
+"0Hn", "29E", "6Fc", "47H", "5MM", "6hO", "Ep", "0fB", "0iC", "Jq", "6gN", "4Rm", "48I", "6Ib", "26D", "54", "8CG", "im", "509", "45y",
+"ben", "hYU", "GA", "0ds", "4cY", "420", "2mT", "0Xw", "02S", "Ui", "5H5", "4Mu", "5Pd", "61n", "XY", "M8", "0UG", "vu", "7Kk", "4ni",
+"4ah", "6TK", "yt", "0ZF", "B9", "WX", "6zg", "4OD", "4Bt", "5G4", "Zh", "0yZ", "0Wv", "0L", "4y9", "4lX", "6Gy", "46R", "0It", "jF",
+"b", "0gX", "5LW", "5Y6", "6de", "4QF", "0jh", "IZ", "gv", "0DD", "5oK", "6JI", "6EH", "44c", "0KE", "hw", "2PJ", "0ei", "5Nf", "6kd",
+"5V7", "4Sw", "0hY", "Kk", "eG", "0Fu", "49S", "6Hx", "7ia", "4Lc", "03E", "2Bn", "zS", "o2", "4bO", "6Wl", "a4F", "bUL", "0TQ", "3k",
+"YO", "87m", "4AS", "4T2", "7kP", "4NR", "01t", "VN", "xb", "1Kq", "54W", "hfv", "6Xm", "4mN", "1FA", "1Z", "2Mo", "0xL", "4Cb", "62I",
+"5MA", "6hC", "2Sm", "0fN", "0Hb", "kP", "6Fo", "47D", "bDO", "aaM", "0P3", "0ER", "8bf", "HL", "4E1", "4PP", "5Op", "4J0", "GM", "10V",
+"0JS", "ia", "505", "45u", "48E", "6In", "dQ", "58", "0iO", "3LM", "6gB", "4Ra", "0UK", "2q", "7Kg", "4ne", "5Ph", "61b", "XU", "M4",
+"0vW", "Ue", "5H9", "4My", "4cU", "4v4", "2mX", "1HZ", "0Wz", "tH", "4y5", "4lT", "4Bx", "5G8", "Zd", "0yV", "B5", "WT", "6zk", "4OH",
+"4ad", "6TG", "yx", "0ZJ", "gz", "0DH", "5oG", "6JE", "6di", "4QJ", "0jd", "IV", "n", "0gT", "680", "6iY", "4g7", "4rV", "0Ix", "jJ",
+"eK", "0Fy", "5mv", "4h6", "6fX", "5CZ", "0hU", "Kg", "FW", "S6", "5Nj", "6kh", "6ED", "44o", "0KI", "3nK", "zW", "o6", "4bK", "6Wh",
+"6yD", "4Lg", "03A", "2Bj", "YK", "0zy", "4AW", "4T6", "6ZX", "6O9", "0TU", "3o", "xf", "1Ku", "54S", "6UY", "7kT", "4NV", "01p", "VJ",
+"2Mk", "0xH", "4Cf", "62M", "6Xi", "4mJ", "0Vd", "uV", "0Hf", "kT", "6Fk", "4sH", "5ME", "6hG", "Ex", "0fJ", "0kz", "HH", "4E5", "4PT",
+"5nY", "aaI", "fd", "0EV", "0JW", "ie", "501", "45q", "5Ot", "4J4", "GI", "10R", "0iK", "Jy", "6gF", "4Re", "48A", "6Ij", "dU", "q4",
+"5Pl", "61f", "XQ", "M0", "0UO", "2u", "7Kc", "4na", "4cQ", "428", "39u", "8Qg", "0vS", "Ua", "aRL", "b3F", "bxO", "63W", "0l3", "0yR",
+"234", "0D", "4y1", "4lP", "55I", "6TC", "2om", "0ZN", "B1", "WP", "6zo", "4OL", "6dm", "4QN", "1zA", "IR", "25g", "0DL", "5oC", "6JA",
+"4g3", "46Z", "9PE", "jN", "j", "0gP", "684", "aCO", "72u", "675", "0hQ", "Kc", "eO", "8Oe", "5mr", "4h2", "7Ua", "44k", "0KM", "3nO",
+"FS", "S2", "5Nn", "6kl", "4x6", "4mW", "0Vy", "1C", "0m4", "0xU", "5SZ", "62P", "7kI", "4NK", "C6", "VW", "2nj", "1Kh", "54N", "6UD",
+"6ZE", "4of", "0TH", "3r", "YV", "L7", "4AJ", "60a", "6yY", "4Lz", "0wT", "Tf", "zJ", "0Yx", "4bV", "4w7", "5lu", "4i5", "dH", "0Gz",
+"0iV", "Jd", "5W8", "4Rx", "5Oi", "6jk", "GT", "R5", "0JJ", "ix", "6DG", "45l", "5nD", "6KF", "fy", "0EK", "0kg", "HU", "6ej", "4PI",
+"5MX", "5X9", "Ee", "0fW", "1XZ", "kI", "4f4", "4sU", "00w", "WM", "4Z0", "4OQ", "55T", "hgu", "ya", "0ZS", "a0", "0Y", "6Yn", "4lM",
+"4Ba", "63J", "2Ll", "0yO", "02F", "2Cm", "6xC", "aG0", "4cL", "6Vo", "2mA", "n1", "0UR", "2h", "a5E", "bTO", "5Pq", "4U1", "XL", "86n",
+"FN", "11U", "5Ns", "4K3", "516", "44v", "0KP", "hb", "eR", "p3", "49F", "6Hm", "6fA", "4Sb", "0hL", "3MN", "w", "0gM", "5LB", "7ya",
+"6Gl", "46G", "0Ia", "jS", "gc", "0DQ", "bEL", "hyw", "4D2", "4QS", "8ce", "IO", "0m0", "0xQ", "byL", "62T", "4x2", "4mS", "227", "1G",
+"2nn", "1Kl", "54J", "7Ea", "7kM", "4NO", "C2", "VS", "YR", "L3", "4AN", "60e", "6ZA", "4ob", "0TL", "3v", "zN", "8Pd", "4bR", "4w3",
+"aSO", "b2E", "03X", "Tb", "0iR", "3LP", "73v", "666", "48X", "4i1", "dL", "8Nf", "0JN", "3oL", "6DC", "45h", "5Om", "6jo", "GP", "R1",
+"0kc", "HQ", "6en", "4PM", "a09", "6KB", "24d", "0EO", "8Ag", "kM", "4f0", "47Y", "697", "aBL", "Ea", "0fS", "4ay", "5d9", "ye", "0ZW",
+"00s", "WI", "4Z4", "4OU", "4Be", "63N", "Zy", "0yK", "a4", "tU", "6Yj", "4lI", "4cH", "6Vk", "2mE", "n5", "02B", "Ux", "6xG", "4Md",
+"5Pu", "4U5", "XH", "86j", "0UV", "2l", "5k8", "4nx", "512", "44r", "0KT", "hf", "FJ", "0ex", "5Nw", "4K7", "6fE", "4Sf", "0hH", "Kz",
+"eV", "p7", "49B", "6Hi", "6Gh", "46C", "0Ie", "jW", "s", "0gI", "5LF", "6iD", "4D6", "4QW", "0jy", "IK", "gg", "0DU", "5oZ", "6JX",
+"7kA", "4NC", "01e", "3Po", "xs", "8RY", "54F", "6UL", "a6f", "59v", "0Vq", "1K", "d3E", "85M", "4Cs", "5F3", "5I2", "4Lr", "03T", "Tn",
+"zB", "0Yp", "56w", "437", "6ZM", "4on", "1Da", "3z", "2OO", "0zl", "4AB", "60i", "5Oa", "6jc", "2QM", "0dn", "0JB", "ip", "6DO", "45d",
+"48T", "acm", "1B2", "0Gr", "94o", "Jl", "5W0", "4Rp", "5MP", "5X1", "Em", "12v", "0Hs", "kA", "all", "47U", "5nL", "6KN", "fq", "0EC",
+"0ko", "3Nm", "6eb", "4PA", "a8", "0Q", "6Yf", "4lE", "4Bi", "63B", "Zu", "0yG", "0tw", "WE", "4Z8", "4OY", "4au", "5d5", "yi", "1Jz",
+"0UZ", "vh", "5k4", "4nt", "5Py", "4U9", "XD", "1kW", "02N", "Ut", "6xK", "4Mh", "4cD", "6Vg", "2mI", "n9", "eZ", "43", "49N", "6He",
+"6fI", "4Sj", "0hD", "Kv", "FF", "0et", "7n9", "6ky", "5u6", "4pv", "0KX", "hj", "gk", "0DY", "5oV", "5z7", "6dx", "5Az", "0ju", "IG",
+"Dw", "0gE", "5LJ", "6iH", "6Gd", "46O", "0Ii", "28B", "xw", "1Kd", "54B", "6UH", "7kE", "4NG", "01a", "3Pk", "0m8", "0xY", "4Cw", "5F7",
+"6Xx", "59r", "0Vu", "1O", "zF", "0Yt", "4bZ", "433", "5I6", "4Lv", "03P", "Tj", "YZ", "0zh", "4AF", "60m", "6ZI", "4oj", "0TD", "wv",
+"0JF", "it", "6DK", "4qh", "5Oe", "6jg", "GX", "R9", "0iZ", "Jh", "5W4", "4Rt", "48P", "4i9", "dD", "0Gv", "0Hw", "kE", "4f8", "47Q",
+"5MT", "5X5", "Ei", "12r", "0kk", "HY", "6ef", "4PE", "5nH", "6KJ", "fu", "0EG", "4Bm", "63F", "Zq", "0yC", "0Wo", "0U", "6Yb", "4lA",
+"4aq", "5d1", "ym", "8SG", "0ts", "WA", "aPl", "b1f", "747", "61w", "2NQ", "1kS", "9Lg", "2d", "5k0", "4np", "57i", "6Vc", "2mM", "0Xn",
+"02J", "Up", "6xO", "4Ml", "6fM", "4Sn", "1xa", "Kr", "27G", "47", "49J", "6Ha", "5u2", "44z", "8BD", "hn", "FB", "0ep", "bdm", "aAo",
+"70U", "bkl", "0jq", "IC", "go", "306", "5oR", "5z3", "7WA", "46K", "0Im", "28F", "Ds", "0gA", "5LN", "6iL", "0cY", "020", "6mT", "4Xw",
+"42S", "6Cx", "nG", "0Mu", "1Pd", "cw", "6NH", "5kJ", "4UG", "74M", "3Kk", "0ni", "0ah", "BZ", "6oe", "4ZF", "40b", "6AI", "lv", "0OD",
+"0Bt", "aF", "6Ly", "4yZ", "4Wv", "5R6", "Oj", "0lX", "Qh", "06R", "4It", "5L4", "461", "4gX", "1LW", "1Y6", "rt", "0QF", "4jh", "7Oj",
+"65o", "4DD", "I9", "2JI", "SY", "F8", "4KE", "7nG", "6PJ", "4ei", "1Nf", "2kd", "4M", "0Sw", "4hY", "490", "5C5", "4Fu", "09S", "2Hx",
+"6OR", "4zq", "354", "bm", "LA", "0os", "bnn", "75W", "6lN", "4Ym", "0bC", "Aq", "2yL", "0Lo", "43I", "6Bb", "6Mc", "5ha", "15", "22E",
+"Np", "0mB", "4Vl", "6cO", "aDm", "bao", "1pS", "1e2", "ml", "8GF", "41x", "548", "4kr", "5n2", "7f", "8YD", "1nQ", "2KS", "64u", "715",
+"4Hn", "69E", "Pr", "07H", "1MM", "2hO", "6Sa", "4fB", "4iC", "7LA", "5W", "0Rm", "08I", "2Ib", "66D", "4Go", "b4d", "aUn", "RC", "05y",
+"8VE", "8g", "5a3", "4ds", "42W", "ain", "nC", "0Mq", "17t", "024", "6mP", "4Xs", "4UC", "74I", "3Ko", "0nm", "8IY", "cs", "6NL", "5kN",
+"40f", "6AM", "lr", "8FX", "0al", "2TO", "6oa", "4ZB", "4Wr", "5R2", "On", "18u", "0Bp", "aB", "afo", "bCm", "465", "53u", "1LS", "1Y2",
+"Ql", "06V", "4Ip", "5L0", "65k", "5Ta", "1oO", "2JM", "6x", "0QB", "4jl", "7On", "6PN", "4em", "1Nb", "9y", "2EL", "04g", "4KA", "7nC",
+"5C1", "4Fq", "09W", "d6G", "4I", "0Ss", "bRn", "494", "LE", "0ow", "4TY", "4A8", "6OV", "4zu", "1Qz", "bi", "oY", "z8", "43M", "6Bf",
+"6lJ", "4Yi", "0bG", "Au", "Nt", "0mF", "4Vh", "6cK", "6Mg", "4xD", "11", "22A", "mh", "0NZ", "4ut", "5p4", "4N9", "5Ky", "1pW", "CD",
+"1nU", "2KW", "64q", "4EZ", "4kv", "5n6", "7b", "0PX", "1MI", "2hK", "6Se", "4fF", "4Hj", "69A", "Pv", "07L", "08M", "2If", "6rH", "4Gk",
+"4iG", "7LE", "5S", "0Ri", "1Ox", "8c", "5a7", "4dw", "5Zz", "7oY", "RG", "0qu", "1Pl", "21f", "adR", "5kB", "4UO", "74E", "MS", "X2",
+"0cQ", "028", "79u", "bbL", "4vS", "4c2", "nO", "8De", "8Kd", "aN", "4l3", "4yR", "634", "76t", "Ob", "0lP", "W3", "BR", "6om", "4ZN",
+"40j", "6AA", "2zo", "0OL", "6t", "0QN", "5zA", "7Ob", "65g", "4DL", "I1", "2JA", "0g3", "06Z", "b7G", "68W", "469", "4gP", "284", "dSn",
+"4E", "275", "4hQ", "498", "67V", "b8F", "1mr", "0h2", "SQ", "F0", "4KM", "7nO", "6PB", "4ea", "1Nn", "9u", "6lF", "4Ye", "0bK", "Ay",
+"oU", "z4", "43A", "6Bj", "6OZ", "4zy", "0AW", "be", "LI", "2O9", "4TU", "4A4", "4N5", "5Ku", "14S", "CH", "md", "0NV", "41p", "540",
+"6Mk", "4xH", "u5", "22M", "Nx", "0mJ", "4Vd", "6cG", "4Hf", "69M", "Pz", "0sH", "k7", "2hG", "6Si", "4fJ", "4kz", "7Nx", "7n", "0PT",
+"1nY", "dqh", "4P7", "4EV", "4JW", "7oU", "RK", "05q", "1Ot", "8o", "6QX", "50R", "4iK", "7LI", "qW", "d6", "08A", "2Ij", "66L", "4Gg",
+"4UK", "74A", "MW", "X6", "1Ph", "21b", "6ND", "5kF", "4vW", "4c6", "nK", "0My", "0cU", "0v4", "6mX", "5HZ", "4Wz", "6bY", "Of", "0lT",
+"0Bx", "aJ", "4l7", "4yV", "40n", "6AE", "lz", "0OH", "W7", "BV", "6oi", "4ZJ", "65c", "4DH", "I5", "2JE", "6p", "0QJ", "4jd", "7Of",
+"4r5", "4gT", "280", "2iY", "Qd", "0rV", "4Ix", "5L8", "5C9", "4Fy", "1mv", "0h6", "4A", "1CZ", "4hU", "7MW", "6PF", "4ee", "1Nj", "9q",
+"SU", "F4", "4KI", "7nK", "oQ", "z0", "43E", "6Bn", "6lB", "4Ya", "0bO", "2Wl", "LM", "8fg", "4TQ", "4A0", "aeL", "cPo", "0AS", "ba",
+"3kP", "0NR", "41t", "544", "4N1", "5Kq", "14W", "CL", "2Xm", "0mN", "5FA", "6cC", "6Mo", "4xL", "19", "22I", "k3", "2hC", "6Sm", "4fN",
+"4Hb", "69I", "2Fo", "07D", "83l", "d5d", "4P3", "4ER", "bQM", "a0G", "7j", "0PP", "1Op", "8k", "hbw", "50V", "4JS", "7oQ", "RO", "05u",
+"08E", "2In", "66H", "4Gc", "4iO", "7LM", "qS", "d2", "0ay", "BK", "4O6", "4ZW", "40s", "553", "lg", "0OU", "t6", "aW", "6Lh", "4yK",
+"4Wg", "6bD", "2Yj", "0lI", "0cH", "2Vk", "6mE", "4Xf", "42B", "6Ci", "nV", "0Md", "1Pu", "cf", "6NY", "bAI", "4UV", "7pT", "MJ", "0nx",
+"SH", "04r", "4KT", "7nV", "azI", "4ex", "1Nw", "9l", "pT", "e5", "4hH", "7MJ", "67O", "4Fd", "09B", "2Hi", "Qy", "06C", "4Ie", "68N",
+"6Rj", "4gI", "j4", "2iD", "6m", "0QW", "4jy", "5o9", "4Q4", "4DU", "1oZ", "2JX", "4m0", "4xQ", "8Jg", "22T", "Na", "0mS", "627", "77w",
+"6nn", "5Kl", "V0", "CQ", "3kM", "0NO", "41i", "7Pc", "6OC", "5jA", "0AN", "20e", "LP", "Y1", "4TL", "6ao", "78v", "bcO", "0bR", "0w3",
+"oL", "8Ef", "43X", "4b1", "4iR", "7LP", "5F", "266", "08X", "0i1", "66U", "b9E", "4JN", "7oL", "RR", "G3", "1Om", "8v", "6QA", "4db",
+"4kc", "7Na", "7w", "0PM", "H2", "2KB", "64d", "4EO", "b6D", "69T", "Pc", "07Y", "297", "dRm", "4s2", "4fS", "40w", "557", "lc", "0OQ",
+"15T", "BO", "4O2", "4ZS", "4Wc", "76i", "2Yn", "0lM", "t2", "aS", "6Ll", "4yO", "42F", "6Cm", "nR", "8Dx", "0cL", "2Vo", "6mA", "4Xb",
+"4UR", "74X", "MN", "8gd", "1Pq", "cb", "adO", "bAM", "azM", "51U", "1Ns", "9h", "SL", "04v", "4KP", "7nR", "67K", "5VA", "09F", "2Hm",
+"4X", "e1", "4hL", "7MN", "6Rn", "4gM", "j0", "3ya", "2Gl", "06G", "4Ia", "68J", "4Q0", "4DQ", "82o", "d4g", "6i", "0QS", "bPN", "a1D",
+"Ne", "0mW", "4Vy", "5S9", "4m4", "4xU", "1SZ", "22P", "my", "0NK", "41m", "7Pg", "6nj", "5Kh", "V4", "CU", "LT", "Y5", "4TH", "6ak",
+"6OG", "4zd", "0AJ", "bx", "oH", "0Lz", "4wT", "4b5", "78r", "4Yx", "0bV", "Ad", "1lu", "0i5", "66Q", "4Gz", "4iV", "7LT", "5B", "0Rx",
+"1Oi", "8r", "6QE", "4df", "4JJ", "7oH", "RV", "G7", "H6", "2KF", "6ph", "4EK", "4kg", "7Ne", "7s", "0PI", "1MX", "1X9", "4s6", "4fW",
+"5XZ", "69P", "Pg", "0sU", "06", "23F", "afr", "4yC", "4Wo", "6bL", "Os", "0lA", "0aq", "BC", "aEn", "c4E", "4ts", "5q3", "lo", "8FE",
+"347", "cn", "6NQ", "5kS", "bom", "74T", "MB", "0np", "17i", "2Vc", "6mM", "4Xn", "42J", "6Ca", "2xO", "0Ml", "4T", "0Sn", "5xa", "7MB",
+"67G", "4Fl", "09J", "2Ha", "1u2", "04z", "b5g", "aTm", "6PS", "4ep", "8WF", "9d", "6e", "8XG", "4jq", "5o1", "65v", "706", "1oR", "1z3",
+"Qq", "06K", "4Im", "68F", "6Rb", "4gA", "1LN", "2iL", "6nf", "5Kd", "V8", "CY", "mu", "0NG", "41a", "7Pk", "4m8", "4xY", "0Cw", "1F7",
+"Ni", "19r", "4Vu", "5S5", "6lW", "4Yt", "0bZ", "Ah", "oD", "0Lv", "43P", "4b9", "6OK", "4zh", "0AF", "bt", "LX", "Y9", "4TD", "6ag",
+"4JF", "7oD", "RZ", "0qh", "1Oe", "2jg", "6QI", "4dj", "4iZ", "483", "5N", "0Rt", "08P", "0i9", "5B6", "4Gv", "4Hw", "5M7", "Pk", "07Q",
+"1MT", "1X5", "472", "52r", "4kk", "7Ni", "sw", "0PE", "1nH", "2KJ", "64l", "4EG", "4Wk", "6bH", "Ow", "0lE", "02", "23B", "6Ld", "4yG",
+"4tw", "5q7", "lk", "0OY", "0au", "BG", "6ox", "5Jz", "4UZ", "74P", "MF", "0nt", "1Py", "cj", "6NU", "5kW", "42N", "6Ce", "nZ", "0Mh",
+"0cD", "2Vg", "6mI", "4Xj", "67C", "4Fh", "09N", "2He", "4P", "e9", "4hD", "7MF", "6PW", "4et", "3n9", "2ky", "SD", "0pv", "4KX", "7nZ",
+"4Q8", "4DY", "1oV", "1z7", "6a", "1Az", "4ju", "5o5", "6Rf", "4gE", "j8", "2iH", "Qu", "06O", "4Ii", "68B", "mq", "0NC", "41e", "7Po",
+"6nb", "bar", "14F", "2UL", "Nm", "19v", "4Vq", "5S1", "agl", "bBn", "0Cs", "1F3", "1I2", "0Lr", "43T", "ahm", "6lS", "4Yp", "16w", "Al",
+"2ZM", "0on", "5Da", "6ac", "6OO", "4zl", "0AB", "bp", "1Oa", "8z", "6QM", "4dn", "4JB", "aUs", "2DO", "05d", "08T", "d7D", "5B2", "4Gr",
+"bSm", "487", "5J", "0Rp", "1MP", "1X1", "476", "52v", "4Hs", "5M3", "Po", "07U", "1nL", "2KN", "64h", "4EC", "4ko", "7Nm", "ss", "0PA",
+"QJ", "06p", "4IV", "7lT", "6RY", "4gz", "1Lu", "0I5", "rV", "g7", "4jJ", "7OH", "65M", "4Df", "1oi", "2Jk", "2Ej", "04A", "4Kg", "7ne",
+"6Ph", "4eK", "h6", "2kF", "4o", "0SU", "5xZ", "7My", "4S6", "4FW", "09q", "1x9", "17R", "2VX", "4M4", "4XU", "42q", "571", "ne", "0MW",
+"v4", "cU", "6Nj", "5kh", "4Ue", "74o", "My", "0nK", "0aJ", "Bx", "6oG", "4Zd", "4tH", "6Ak", "lT", "y5", "0BV", "ad", "580", "4yx",
+"4WT", "4B5", "OH", "0lz", "4kP", "7NR", "7D", "244", "1ns", "0k3", "64W", "con", "4HL", "69g", "PP", "E1", "1Mo", "2hm", "6SC", "52I",
+"4ia", "7Lc", "5u", "0RO", "J0", "3Ya", "66f", "4GM", "b4F", "aUL", "Ra", "0qS", "8Vg", "8E", "458", "4dQ", "4o2", "4zS", "8He", "bO",
+"Lc", "0oQ", "605", "75u", "6ll", "4YO", "T2", "AS", "2yn", "0LM", "43k", "7Ra", "6MA", "4xb", "0CL", "22g", "NR", "19I", "4VN", "6cm",
+"aDO", "baM", "14y", "Cb", "mN", "8Gd", "41Z", "7PP", "axO", "53W", "1Lq", "0I1", "QN", "06t", "4IR", "68y", "65I", "4Db", "1om", "2Jo",
+"6Z", "g3", "4jN", "7OL", "6Pl", "4eO", "h2", "2kB", "2En", "04E", "4Kc", "7na", "4S2", "4FS", "09u", "d6e", "4k", "0SQ", "bRL", "a3F",
+"42u", "575", "na", "0MS", "17V", "dlo", "4M0", "4XQ", "4Ua", "74k", "3KM", "0nO", "28", "cQ", "6Nn", "5kl", "40D", "6Ao", "lP", "y1",
+"0aN", "2Tm", "6oC", "5JA", "4WP", "4B1", "OL", "18W", "0BR", "0W3", "584", "bCO", "1nw", "0k7", "64S", "4Ex", "4kT", "7NV", "sH", "0Pz",
+"1Mk", "2hi", "6SG", "4fd", "4HH", "69c", "PT", "E5", "J4", "2ID", "66b", "4GI", "4ie", "7Lg", "5q", "0RK", "1OZ", "8A", "4q4", "4dU",
+"4Jy", "5O9", "Re", "0qW", "Lg", "0oU", "5DZ", "6aX", "4o6", "4zW", "0Ay", "bK", "2yj", "0LI", "43o", "6BD", "6lh", "4YK", "T6", "AW",
+"NV", "0md", "4VJ", "6ci", "6ME", "4xf", "0CH", "22c", "mJ", "0Nx", "4uV", "7PT", "6nY", "baI", "1pu", "Cf", "6V", "0Ql", "4jB", "aus",
+"65E", "4Dn", "1oa", "2Jc", "QB", "06x", "b7e", "68u", "5b2", "4gr", "8UD", "dSL", "4g", "8ZE", "4hs", "5m3", "67t", "724", "09y", "1x1",
+"Ss", "04I", "4Ko", "7nm", "azr", "4eC", "1NL", "9W", "24", "21D", "6Nb", "bAr", "4Um", "74g", "Mq", "0nC", "0cs", "1f3", "79W", "bbn",
+"42y", "579", "nm", "394", "365", "al", "588", "4yp", "bmo", "76V", "1i2", "0lr", "0aB", "Bp", "6oO", "4Zl", "40H", "6Ac", "2zM", "0On",
+"4HD", "69o", "PX", "E9", "1Mg", "2he", "6SK", "4fh", "4kX", "7NZ", "7L", "0Pv", "3N9", "2Ky", "6pW", "4Et", "4Ju", "5O5", "Ri", "05S",
+"1OV", "8M", "450", "4dY", "4ii", "7Lk", "qu", "0RG", "J8", "2IH", "66n", "4GE", "6ld", "4YG", "0bi", "2WJ", "ow", "0LE", "43c", "6BH",
+"6Ox", "5jz", "0Au", "bG", "Lk", "0oY", "4Tw", "5Q7", "6nU", "5KW", "14q", "Cj", "mF", "0Nt", "41R", "7PX", "6MI", "4xj", "0CD", "22o",
+"NZ", "0mh", "4VF", "6ce", "65A", "4Dj", "1oe", "2Jg", "6R", "0Qh", "4jF", "7OD", "5b6", "4gv", "1Ly", "0I9", "QF", "0rt", "4IZ", "68q",
+"67p", "5Vz", "1mT", "1x5", "4c", "0SY", "4hw", "5m7", "6Pd", "4eG", "1NH", "9S", "Sw", "04M", "4Kk", "7ni", "4Ui", "74c", "Mu", "0nG",
+"20", "cY", "6Nf", "5kd", "4vu", "5s5", "ni", "390", "0cw", "1f7", "4M8", "4XY", "4WX", "4B9", "OD", "0lv", "0BZ", "ah", "6LW", "4yt",
+"40L", "6Ag", "lX", "y9", "0aF", "Bt", "6oK", "4Zh", "1Mc", "2ha", "6SO", "4fl", "5Xa", "69k", "2FM", "07f", "83N", "d5F", "6pS", "4Ep",
+"bQo", "a0e", "7H", "0Pr", "1OR", "8I", "454", "50t", "4Jq", "5O1", "Rm", "05W", "08g", "2IL", "66j", "4GA", "4im", "7Lo", "5y", "0RC",
+"os", "0LA", "43g", "6BL", "78I", "4YC", "0bm", "2WN", "Lo", "8fE", "4Ts", "5Q3", "aen", "cPM", "0Aq", "bC", "mB", "0Np", "41V", "ajo",
+"6nQ", "5KS", "14u", "Cn", "2XO", "0ml", "4VB", "6ca", "6MM", "4xn", "1Sa", "22k", "Sj", "04P", "4Kv", "5N6", "443", "4eZ", "1NU", "9N",
+"pv", "0SD", "4hj", "7Mh", "67m", "4FF", "1mI", "2HK", "2GJ", "2", "4IG", "68l", "6RH", "4gk", "1Ld", "2if", "6O", "0Qu", "5zz", "7OY",
+"5A7", "4Dw", "1ox", "0j8", "15r", "Bi", "6oV", "4Zu", "40Q", "4a8", "lE", "0Ow", "0BG", "au", "6LJ", "4yi", "4WE", "6bf", "OY", "Z8",
+"U9", "2VI", "6mg", "4XD", "4vh", "6CK", "nt", "0MF", "1PW", "cD", "4n9", "5ky", "4Ut", "5P4", "Mh", "0nZ", "4ip", "5l0", "5d", "9Kg",
+"08z", "1y2", "66w", "737", "4Jl", "7on", "Rp", "05J", "1OO", "8T", "6Qc", "50i", "4kA", "7NC", "7U", "0Po", "1nb", "dqS", "64F", "4Em",
+"b6f", "69v", "PA", "0ss", "8TG", "dRO", "5c1", "4fq", "6MP", "4xs", "376", "22v", "NC", "0mq", "bll", "77U", "6nL", "5KN", "14h", "Cs",
+"3ko", "0Nm", "41K", "7PA", "6Oa", "4zB", "37", "20G", "Lr", "8fX", "4Tn", "6aM", "78T", "bcm", "0bp", "AB", "on", "387", "43z", "5r2",
+"447", "51w", "1NQ", "9J", "Sn", "04T", "4Kr", "5N2", "67i", "4FB", "09d", "2HO", "4z", "1Ca", "4hn", "7Ml", "6RL", "4go", "8UY", "2ib",
+"2GN", "6", "4IC", "68h", "5A3", "4Ds", "82M", "d4E", "6K", "0Qq", "bPl", "a1f", "40U", "akl", "lA", "0Os", "15v", "Bm", "6oR", "4Zq",
+"4WA", "6bb", "2YL", "0lo", "0BC", "aq", "6LN", "4ym", "42d", "6CO", "np", "0MB", "0cn", "2VM", "6mc", "5Ha", "4Up", "5P0", "Ml", "8gF",
+"1PS", "1E2", "adm", "bAo", "1lW", "1y6", "4R9", "4GX", "4it", "5l4", "qh", "0RZ", "i9", "8P", "6Qg", "4dD", "4Jh", "7oj", "Rt", "05N",
+"1nf", "2Kd", "64B", "4Ei", "4kE", "7NG", "7Q", "f8", "1Mz", "2hx", "5c5", "4fu", "4HY", "69r", "PE", "0sw", "NG", "0mu", "5Fz", "6cx",
+"6MT", "4xw", "0CY", "0V8", "3kk", "0Ni", "41O", "7PE", "6nH", "5KJ", "14l", "Cw", "Lv", "0oD", "4Tj", "6aI", "6Oe", "4zF", "33", "bZ",
+"oj", "0LX", "4wv", "5r6", "6ly", "4YZ", "0bt", "AF", "4v", "0SL", "4hb", "awS", "67e", "4FN", "K3", "2HC", "Sb", "04X", "b5E", "aTO",
+"4p3", "4eR", "8Wd", "9F", "6G", "257", "4jS", "7OQ", "65T", "cnm", "1op", "0j0", "QS", "D2", "4IO", "68d", "7Ba", "4gc", "1Ll", "2in",
+"0BO", "23d", "6LB", "4ya", "4WM", "6bn", "OQ", "Z0", "0aS", "Ba", "aEL", "c4g", "40Y", "4a0", "lM", "8Fg", "8If", "cL", "4n1", "5kq",
+"616", "74v", "3KP", "0nR", "U1", "2VA", "6mo", "4XL", "42h", "6CC", "2xm", "0MN", "4Jd", "7of", "Rx", "05B", "i5", "2jE", "6Qk", "4dH",
+"4ix", "5l8", "5l", "0RV", "08r", "2IY", "4R5", "4GT", "4HU", "7mW", "PI", "07s", "1Mv", "0H6", "5c9", "4fy", "4kI", "7NK", "sU", "f4",
+"1nj", "2Kh", "64N", "4Ee", "6nD", "5KF", "1ph", "2Uj", "mW", "x6", "41C", "7PI", "593", "5hZ", "0CU", "0V4", "NK", "0my", "4VW", "4C6",
+"4L7", "4YV", "0bx", "AJ", "of", "0LT", "43r", "562", "6Oi", "4zJ", "w7", "bV", "Lz", "0oH", "4Tf", "6aE", "67a", "4FJ", "K7", "2HG",
+"4r", "0SH", "4hf", "7Md", "4p7", "4eV", "1NY", "9B", "Sf", "0pT", "4Kz", "7nx", "65P", "5TZ", "1ot", "0j4", "6C", "0Qy", "4jW", "7OU",
+"6RD", "4gg", "1Lh", "2ij", "QW", "D6", "4IK", "7lI", "4WI", "6bj", "OU", "Z4", "0BK", "ay", "6LF", "4ye", "4tU", "4a4", "lI", "2o9",
+"0aW", "Be", "6oZ", "4Zy", "4Ux", "5P8", "Md", "0nV", "8Ib", "cH", "4n5", "5ku", "42l", "6CG", "nx", "0MJ", "U5", "2VE", "6mk", "4XH",
+"i1", "8X", "6Qo", "4dL", "5ZA", "7ob", "2Dm", "05F", "08v", "d7f", "4R1", "4GP", "bSO", "a2E", "5h", "0RR", "1Mr", "0H2", "ayL", "52T",
+"4HQ", "69z", "PM", "07w", "1nn", "2Kl", "64J", "4Ea", "4kM", "7NO", "7Y", "f0", "mS", "x2", "41G", "7PM", "aDR", "5KB", "14d", "2Un",
+"NO", "19T", "4VS", "4C2", "597", "bBL", "0CQ", "0V0", "ob", "0LP", "43v", "566", "4L3", "4YR", "16U", "AN", "2Zo", "0oL", "4Tb", "6aA",
+"6Om", "4zN", "w3", "bR", "4oT", "4z5", "wH", "0Tz", "0zV", "Yd", "5D8", "4Ax", "4LH", "6yk", "TT", "A5", "0YJ", "zx", "6WG", "4bd",
+"4me", "6XF", "1q", "0VK", "N4", "2MD", "62b", "4CI", "4Ny", "5K9", "Ve", "0uW", "1KZ", "xI", "4u4", "5pt", "4k6", "5nv", "0Ey", "fK",
+"Hg", "0kU", "641", "6eX", "6hh", "5Mj", "P6", "EW", "29b", "0HI", "47o", "6FD", "6IE", "48n", "0GH", "dz", "JV", "0id", "4RJ", "6gi",
+"6jY", "beI", "0dT", "Gf", "iJ", "0Jx", "4qV", "4d7", "UN", "02t", "4MR", "4X3", "a8G", "57W", "0XP", "0M1", "2Z", "c3", "4nN", "7KL",
+"61I", "5PC", "1km", "2No", "2An", "00E", "4Oc", "7ja", "6Tl", "4aO", "l2", "yS", "0k", "0WQ", "58V", "a7F", "4W2", "4BS", "84m", "ZO",
+"13V", "E", "4I0", "5Lp", "46u", "535", "ja", "0IS", "68", "gQ", "6Jn", "5ol", "4Qa", "6dB", "3OM", "0jO", "0eN", "2Pm", "6kC", "5NA",
+"44D", "6Eo", "hP", "99", "0FR", "0S3", "abM", "49t", "4SP", "4F1", "KL", "8af", "0zR", "0o3", "60W", "ckn", "4oP", "4z1", "3D", "204",
+"0YN", "2lm", "6WC", "56I", "4LL", "6yo", "TP", "A1", "N0", "903", "62f", "4CM", "4ma", "6XB", "1u", "0VO", "8Rg", "xM", "418", "54x",
+"b0F", "aQL", "Va", "0uS", "Hc", "0kQ", "645", "71u", "4k2", "5nr", "8Le", "fO", "29f", "0HM", "47k", "7Va", "6hl", "5Mn", "P2", "ES",
+"JR", "1yA", "4RN", "6gm", "6IA", "48j", "0GL", "26g", "iN", "8Cd", "45Z", "4d3", "hYv", "beM", "0dP", "Gb", "6VY", "4cz", "0XT", "0M5",
+"UJ", "02p", "4MV", "4X7", "61M", "5PG", "1ki", "Xz", "vV", "c7", "4nJ", "7KH", "6Th", "4aK", "l6", "yW", "2Aj", "00A", "4Og", "6zD",
+"4W6", "4BW", "0yy", "ZK", "0o", "0WU", "58R", "6YX", "46q", "531", "je", "0IW", "13R", "A", "4I4", "5Lt", "4Qe", "6dF", "Iy", "0jK",
+"r4", "gU", "6Jj", "5oh", "4pH", "6Ek", "hT", "0Kf", "0eJ", "Fx", "6kG", "5NE", "4ST", "4F5", "KH", "0hz", "0FV", "ed", "5x8", "49p",
+"bvs", "6yc", "2BM", "03f", "0YB", "zp", "6WO", "4bl", "bUo", "a4e", "3H", "0Tr", "87N", "Yl", "5D0", "4Ap", "4Nq", "5K1", "Vm", "01W",
+"1KR", "xA", "414", "54t", "4mm", "6XN", "1y", "0VC", "0xo", "2ML", "62j", "4CA", "7xA", "5Mb", "0fm", "2SN", "ks", "0HA", "47g", "6FL",
+"aan", "bDl", "0Eq", "fC", "Ho", "8bE", "4Ps", "5U3", "5Z2", "5OS", "10u", "Gn", "iB", "0Jp", "45V", "ano", "6IM", "48f", "1Wa", "dr",
+"3Ln", "0il", "4RB", "6ga", "2R", "0Uh", "4nF", "7KD", "61A", "5PK", "1ke", "Xv", "UF", "0vt", "4MZ", "6xy", "5f6", "4cv", "0XX", "0M9",
+"0c", "0WY", "4lw", "5i7", "63p", "5Rz", "0yu", "ZG", "Ww", "00M", "4Ok", "6zH", "6Td", "4aG", "0Zi", "2oJ", "60", "gY", "6Jf", "5od",
+"4Qi", "6dJ", "Iu", "0jG", "0gw", "M", "4I8", "5Lx", "4ru", "5w5", "ji", "1Yz", "0FZ", "eh", "5x4", "5mU", "4SX", "4F9", "KD", "0hv",
+"0eF", "Ft", "6kK", "5NI", "44L", "6Eg", "hX", "91", "0YF", "zt", "6WK", "4bh", "4LD", "6yg", "TX", "A9", "0zZ", "Yh", "5D4", "4At",
+"4oX", "4z9", "3L", "0Tv", "1KV", "xE", "410", "54p", "4Nu", "5K5", "Vi", "01S", "N8", "2MH", "62n", "4CE", "4mi", "6XJ", "uu", "0VG",
+"kw", "0HE", "47c", "6FH", "6hd", "5Mf", "0fi", "2SJ", "Hk", "0kY", "4Pw", "5U7", "6Kx", "5nz", "0Eu", "fG", "iF", "0Jt", "45R", "6Dy",
+"5Z6", "5OW", "0dX", "Gj", "JZ", "0ih", "4RF", "6ge", "6II", "48b", "0GD", "dv", "61E", "5PO", "1ka", "Xr", "2V", "0Ul", "4nB", "aqs",
+"5f2", "4cr", "8QD", "39V", "UB", "02x", "795", "aRo", "63t", "764", "0yq", "ZC", "0g", "9Nd", "4ls", "5i3", "7DA", "4aC", "0Zm", "2oN",
+"Ws", "00I", "4Oo", "6zL", "4Qm", "6dN", "Iq", "0jC", "64", "25D", "6Jb", "bEr", "46y", "539", "jm", "9Pf", "0gs", "I", "aCl", "bfn",
+"bio", "72V", "1m2", "0hr", "325", "el", "5x0", "49x", "44H", "6Ec", "3nl", "95", "0eB", "Fp", "6kO", "5NM", "4mt", "5h4", "uh", "0VZ",
+"0xv", "2MU", "4V9", "4CX", "4Nh", "7kj", "Vt", "01N", "m9", "xX", "6Ug", "54m", "4oE", "6Zf", "3Q", "b8", "0zG", "Yu", "60B", "4Ai",
+"4LY", "4Y8", "TE", "0ww", "1Iz", "zi", "5g5", "4bu", "5y7", "5lV", "0GY", "dk", "JG", "0iu", "5Bz", "6gx", "6jH", "5OJ", "0dE", "Gw",
+"3ok", "82", "45O", "6Dd", "6Ke", "5ng", "73", "fZ", "Hv", "0kD", "4Pj", "6eI", "6hy", "7m9", "0ft", "EF", "kj", "0HX", "4sv", "5v6",
+"Wn", "00T", "4Or", "5J2", "407", "55w", "0Zp", "yB", "0z", "1Ga", "4ln", "6YM", "63i", "4BB", "0yl", "2LO", "2CN", "02e", "4MC", "7hA",
+"6VL", "4co", "0XA", "2mb", "2K", "0Uq", "bTl", "a5f", "5E3", "5PR", "86M", "Xo", "11v", "Fm", "6kR", "5NP", "44U", "aol", "hA", "0Ks",
+"0FC", "eq", "6HN", "49e", "4SA", "6fb", "3Mm", "0ho", "0gn", "T", "6ic", "5La", "46d", "6GO", "jp", "0IB", "0Dr", "1A2", "hyT", "bEo",
+"4Qp", "5T0", "Il", "8cF", "0xr", "2MQ", "62w", "777", "4mp", "5h0", "1d", "9Og", "1KO", "2nM", "6Uc", "54i", "4Nl", "7kn", "Vp", "01J",
+"0zC", "Yq", "60F", "4Am", "4oA", "6Zb", "3U", "0To", "8PG", "zm", "5g1", "4bq", "786", "aSl", "TA", "0ws", "JC", "0iq", "bhl", "73U",
+"5y3", "5lR", "336", "do", "3oo", "86", "45K", "7TA", "6jL", "5ON", "0dA", "Gs", "Hr", "8bX", "4Pn", "6eM", "6Ka", "5nc", "77", "24G",
+"kn", "8AD", "47z", "5v2", "aBo", "bgm", "0fp", "EB", "403", "4aZ", "0Zt", "yF", "Wj", "00P", "4Ov", "5J6", "63m", "4BF", "0yh", "ZZ",
+"tv", "0WD", "4lj", "6YI", "6VH", "4ck", "0XE", "2mf", "2CJ", "02a", "4MG", "6xd", "5E7", "5PV", "1kx", "Xk", "2O", "0Uu", "bTh", "7KY",
+"44Q", "4e8", "hE", "0Kw", "11r", "Fi", "6kV", "5NT", "4SE", "6ff", "KY", "0hk", "0FG", "eu", "6HJ", "49a", "4rh", "6GK", "jt", "0IF",
+"Q9", "P", "6ig", "5Le", "4Qt", "5T4", "Ih", "0jZ", "0Dv", "gD", "4j9", "5oy", "aD0", "7kb", "3PL", "01F", "m1", "xP", "6Uo", "54e",
+"59U", "a6E", "1h", "0VR", "85n", "196", "4V1", "4CP", "4LQ", "4Y0", "TM", "03w", "0YS", "za", "a9D", "56T", "4oM", "6Zn", "3Y", "b0",
+"0zO", "2Ol", "60J", "4Aa", "7za", "5OB", "0dM", "2Qn", "iS", "0Ja", "45G", "6Dl", "acN", "48w", "0GQ", "dc", "JO", "94L", "4RS", "4G2",
+"4H3", "5Ms", "12U", "EN", "kb", "0HP", "47v", "526", "6Km", "5no", "s3", "fR", "3NN", "0kL", "4Pb", "6eA", "0r", "0WH", "4lf", "6YE",
+"63a", "4BJ", "O7", "ZV", "Wf", "0tT", "4Oz", "6zY", "4t7", "4aV", "0Zx", "yJ", "2C", "0Uy", "4nW", "7KU", "61P", "5PZ", "1kt", "Xg",
+"UW", "02m", "4MK", "6xh", "6VD", "4cg", "0XI", "2mj", "0FK", "ey", "6HF", "49m", "4SI", "6fj", "KU", "0hg", "0eW", "Fe", "6kZ", "5NX",
+"4pU", "4e4", "hI", "2k9", "0Dz", "gH", "4j5", "5ou", "4Qx", "5T8", "Id", "0jV", "Q5", "DT", "6ik", "5Li", "46l", "6GG", "jx", "0IJ",
+"m5", "xT", "6Uk", "54a", "4Nd", "7kf", "Vx", "01B", "0xz", "192", "4V5", "4CT", "4mx", "5h8", "1l", "0VV", "0YW", "ze", "5g9", "4by",
+"4LU", "4Y4", "TI", "03s", "0zK", "Yy", "60N", "4Ae", "4oI", "6Zj", "wU", "b4", "iW", "0Je", "45C", "6Dh", "6jD", "5OF", "0dI", "2Qj",
+"JK", "0iy", "4RW", "4G6", "6IX", "48s", "0GU", "dg", "kf", "0HT", "47r", "522", "4H7", "5Mw", "0fx", "EJ", "Hz", "0kH", "4Pf", "6eE",
+"6Ki", "5nk", "s7", "fV", "63e", "4BN", "O3", "ZR", "0v", "0WL", "4lb", "6YA", "4t3", "4aR", "8Sd", "yN", "Wb", "00X", "b1E", "aPO",
+"61T", "bzL", "1kp", "Xc", "2G", "217", "4nS", "7KQ", "7Fa", "4cc", "0XM", "2mn", "US", "02i", "4MO", "6xl", "4SM", "6fn", "KQ", "0hc",
+"0FO", "27d", "6HB", "49i", "44Y", "4e0", "hM", "8Bg", "0eS", "Fa", "aAL", "bdN", "656", "70v", "3OP", "0jR", "8Mf", "gL", "4j1", "5oq",
+"46h", "6GC", "28e", "0IN", "Q1", "X", "6io", "5Lm", "6KV", "5nT", "1Uz", "fi", "HE", "0kw", "4PY", "4E8", "6hJ", "5MH", "0fG", "Eu",
+"kY", "0Hk", "47M", "6Ff", "6Ig", "48L", "51", "dX", "Jt", "0iF", "4Rh", "6gK", "4J9", "5Oy", "0dv", "GD", "ih", "0JZ", "4qt", "5t4",
+"4ov", "5j6", "3b", "0TX", "0zt", "YF", "60q", "4AZ", "4Lj", "6yI", "Tv", "03L", "0Yh", "zZ", "6We", "4bF", "4mG", "6Xd", "1S", "0Vi",
+"0xE", "2Mf", "6vH", "4Ck", "bth", "7kY", "VG", "0uu", "1Kx", "xk", "5e7", "5pV", "13t", "g", "5Y3", "5LR", "46W", "amn", "jC", "0Iq",
+"0DA", "gs", "6JL", "5oN", "4QC", "70I", "3Oo", "0jm", "0el", "2PO", "6ka", "5Nc", "44f", "6EM", "hr", "8BX", "0Fp", "eB", "abo", "49V",
+"4Sr", "5V2", "Kn", "8aD", "Ul", "02V", "4Mp", "5H0", "425", "57u", "0Xr", "2mQ", "2x", "0UB", "4nl", "7Kn", "61k", "5Pa", "1kO", "2NM",
+"2AL", "00g", "4OA", "6zb", "6TN", "4am", "0ZC", "yq", "0I", "0Ws", "58t", "a7d", "5G1", "4Bq", "84O", "Zm", "HA", "0ks", "bjn", "71W",
+"6KR", "5nP", "314", "fm", "29D", "0Ho", "47I", "6Fb", "6hN", "5ML", "0fC", "Eq", "Jp", "0iB", "4Rl", "6gO", "6Ic", "48H", "55", "26E",
+"il", "8CF", "45x", "508", "hYT", "beo", "0dr", "1a2", "0zp", "YB", "60u", "755", "4or", "5j2", "3f", "9Me", "0Yl", "2lO", "6Wa", "4bB",
+"4Ln", "6yM", "Tr", "03H", "0xA", "2Mb", "62D", "4Co", "4mC", "7HA", "1W", "0Vm", "8RE", "xo", "5e3", "54Z", "b0d", "aQn", "VC", "01y",
+"46S", "6Gx", "jG", "0Iu", "0gY", "c", "5Y7", "5LV", "4QG", "6dd", "3Ok", "0ji", "0DE", "gw", "6JH", "5oJ", "44b", "6EI", "hv", "0KD",
+"0eh", "FZ", "6ke", "5Ng", "4Sv", "5V6", "Kj", "0hX", "0Ft", "eF", "6Hy", "49R", "421", "4cX", "0Xv", "2mU", "Uh", "02R", "4Mt", "5H4",
+"61o", "5Pe", "M9", "XX", "vt", "0UF", "4nh", "7Kj", "6TJ", "4ai", "0ZG", "yu", "WY", "B8", "4OE", "6zf", "5G5", "4Bu", "1iz", "Zi",
+"0M", "0Ww", "4lY", "4y8", "6hB", "aW1", "0fO", "2Sl", "kQ", "0Hc", "47E", "6Fn", "aaL", "bDN", "0ES", "fa", "HM", "8bg", "4PQ", "4E0",
+"4J1", "5Oq", "10W", "GL", "3oP", "0JR", "45t", "504", "6Io", "48D", "59", "dP", "3LL", "0iN", "5BA", "6gC", "4Lb", "6yA", "2Bo", "03D",
+"o3", "zR", "6Wm", "4bN", "bUM", "a4G", "3j", "0TP", "87l", "YN", "4T3", "4AR", "4NS", "7kQ", "VO", "01u", "1Kp", "xc", "hfw", "54V",
+"4mO", "6Xl", "uS", "0Va", "0xM", "2Mn", "62H", "4Cc", "0DI", "25b", "6JD", "5oF", "4QK", "6dh", "IW", "0je", "0gU", "o", "6iX", "5LZ",
+"4rW", "4g6", "jK", "0Iy", "0Fx", "eJ", "4h7", "5mw", "4Sz", "6fY", "Kf", "0hT", "S7", "FV", "6ki", "5Nk", "44n", "6EE", "hz", "0KH",
+"2p", "0UJ", "4nd", "7Kf", "61c", "5Pi", "M5", "XT", "Ud", "0vV", "4Mx", "5H8", "4v5", "4cT", "0Xz", "2mY", "0A", "1GZ", "4lU", "4y4",
+"5G9", "4By", "0yW", "Ze", "WU", "B4", "4OI", "6zj", "6TF", "4ae", "0ZK", "yy", "kU", "0Hg", "47A", "6Fj", "6hF", "5MD", "0fK", "Ey",
+"HI", "2K9", "4PU", "4E4", "6KZ", "5nX", "0EW", "fe", "id", "0JV", "45p", "500", "4J5", "5Ou", "0dz", "GH", "Jx", "0iJ", "4Rd", "6gG",
+"6Ik", "5li", "q5", "dT", "o7", "zV", "6Wi", "4bJ", "4Lf", "6yE", "Tz", "0wH", "0zx", "YJ", "4T7", "4AV", "4oz", "6ZY", "3n", "0TT",
+"1Kt", "xg", "6UX", "54R", "4NW", "7kU", "VK", "01q", "0xI", "2Mj", "62L", "4Cg", "4mK", "6Xh", "uW", "0Ve", "4QO", "6dl", "IS", "0ja",
+"0DM", "25f", "7Za", "5oB", "4rS", "4g2", "jO", "9PD", "0gQ", "k", "aCN", "685", "674", "72t", "Kb", "0hP", "8Od", "eN", "4h3", "49Z",
+"44j", "6EA", "3nN", "0KL", "S3", "FR", "6km", "5No", "61g", "5Pm", "M1", "XP", "2t", "0UN", "ad0", "7Kb", "429", "4cP", "8Qf", "39t",
+"0c3", "02Z", "b3G", "aRM", "63V", "bxN", "0yS", "Za", "0E", "235", "4lQ", "4y0", "6TB", "4aa", "0ZO", "2ol", "WQ", "B0", "4OM", "6zn",
+"4i4", "5lt", "1WZ", "dI", "Je", "0iW", "4Ry", "5W9", "6jj", "5Oh", "R4", "GU", "iy", "0JK", "45m", "6DF", "6KG", "5nE", "0EJ", "fx",
+"HT", "0kf", "4PH", "6ek", "5X8", "5MY", "0fV", "Ed", "kH", "0Hz", "4sT", "4f5", "4mV", "4x7", "1B", "0Vx", "0xT", "0m5", "62Q", "4Cz",
+"4NJ", "7kH", "VV", "C7", "1Ki", "xz", "6UE", "54O", "4og", "6ZD", "3s", "0TI", "L6", "YW", "6th", "4AK", "6l9", "6yX", "Tg", "0wU",
+"0Yy", "zK", "4w6", "4bW", "11T", "FO", "4K2", "5Nr", "44w", "517", "hc", "0KQ", "p2", "eS", "6Hl", "49G", "4Sc", "72i", "3MO", "0hM",
+"0gL", "v", "6iA", "5LC", "46F", "6Gm", "jR", "1YA", "0DP", "gb", "hyv", "bEM", "4QR", "4D3", "IN", "8cd", "WL", "00v", "4OP", "4Z1",
+"hgt", "55U", "0ZR", "0O3", "0X", "a1", "4lL", "6Yo", "63K", "5RA", "0yN", "2Lm", "2Cl", "02G", "4Ma", "6xB", "6Vn", "4cM", "n0", "39i",
+"2i", "0US", "bTN", "a5D", "4U0", "5Pp", "86o", "XM", "Ja", "0iS", "667", "73w", "4i0", "48Y", "8Ng", "dM", "3oM", "0JO", "45i", "6DB",
+"6jn", "5Ol", "R0", "GQ", "HP", "0kb", "4PL", "6eo", "6KC", "5nA", "0EN", "24e", "kL", "8Af", "47X", "4f1", "aBM", "696", "0fR", "0s3",
+"0xP", "0m1", "62U", "byM", "4mR", "4x3", "1F", "226", "1Km", "2no", "6UA", "54K", "4NN", "7kL", "VR", "C3", "L2", "YS", "60d", "4AO",
+"4oc", "7Ja", "3w", "0TM", "8Pe", "zO", "4w2", "4bS", "b2D", "aSN", "Tc", "03Y", "44s", "513", "hg", "0KU", "0ey", "FK", "4K6", "5Nv",
+"4Sg", "6fD", "3MK", "0hI", "p6", "eW", "6Hh", "49C", "46B", "6Gi", "jV", "0Id", "0gH", "r", "6iE", "5LG", "4QV", "4D7", "IJ", "0jx",
+"0DT", "gf", "6JY", "bEI", "5d8", "4ax", "0ZV", "yd", "WH", "00r", "4OT", "4Z5", "63O", "4Bd", "0yJ", "Zx", "tT", "a5", "4lH", "6Yk",
+"6Vj", "4cI", "n4", "2mD", "Uy", "02C", "4Me", "6xF", "4U4", "5Pt", "1kZ", "XI", "2m", "0UW", "4ny", "5k9", "6jb", "ber", "0do", "2QL",
+"iq", "0JC", "45e", "6DN", "acl", "48U", "0Gs", "dA", "Jm", "94n", "4Rq", "5W1", "5X0", "5MQ", "12w", "El", "1M2", "0Hr", "47T", "alm",
+"6KO", "5nM", "0EB", "fp", "3Nl", "0kn", "bjs", "6ec", "4NB", "aQs", "3Pn", "01d", "1Ka", "xr", "6UM", "54G", "59w", "a6g", "1J", "0Vp",
+"85L", "d3D", "5F2", "4Cr", "4Ls", "5I3", "To", "03U", "0Yq", "zC", "436", "56v", "4oo", "6ZL", "ws", "0TA", "0zm", "2ON", "60h", "4AC",
+"42", "27B", "6Hd", "49O", "4Sk", "6fH", "Kw", "0hE", "0eu", "FG", "6kx", "5Nz", "4pw", "5u7", "hk", "0KY", "0DX", "gj", "5z6", "5oW",
+"4QZ", "6dy", "IF", "0jt", "0gD", "Dv", "6iI", "5LK", "46N", "6Ge", "jZ", "0Ih", "0P", "a9", "4lD", "6Yg", "63C", "4Bh", "0yF", "Zt",
+"WD", "0tv", "4OX", "4Z9", "5d4", "4at", "0ZZ", "yh", "2a", "1Ez", "4nu", "5k5", "4U8", "5Px", "1kV", "XE", "Uu", "02O", "4Mi", "6xJ",
+"6Vf", "4cE", "n8", "2mH", "iu", "0JG", "45a", "6DJ", "6jf", "5Od", "R8", "GY", "Ji", "1yz", "4Ru", "5W5", "4i8", "48Q", "0Gw", "dE",
+"kD", "0Hv", "47P", "4f9", "5X4", "5MU", "0fZ", "Eh", "HX", "0kj", "4PD", "6eg", "6KK", "5nI", "0EF", "ft", "1Ke", "xv", "6UI", "54C",
+"4NF", "7kD", "VZ", "0uh", "0xX", "0m9", "5F6", "4Cv", "4mZ", "6Xy", "1N", "0Vt", "0Yu", "zG", "432", "56r", "4Lw", "5I7", "Tk", "03Q",
+"0zi", "2OJ", "60l", "4AG", "4ok", "6ZH", "ww", "0TE", "4So", "6fL", "Ks", "0hA", "46", "27F", "7XA", "49K", "4ps", "5u3", "ho", "8BE",
+"0eq", "FC", "aAn", "bdl", "bkm", "70T", "IB", "0jp", "307", "gn", "5z2", "5oS", "46J", "6Ga", "28G", "0Il", "13i", "z", "6iM", "5LO",
+"63G", "4Bl", "0yB", "Zp", "0T", "0Wn", "58i", "6Yc", "5d0", "4ap", "8SF", "yl", "1q2", "00z", "b1g", "aPm", "61v", "746", "1kR", "XA",
+"2e", "9Lf", "4nq", "5k1", "6Vb", "4cA", "0Xo", "2mL", "Uq", "02K", "4Mm", "6xN", "8YG", "7e", "5n1", "4kq", "716", "64v", "2KP", "1nR",
+"07K", "Pq", "69F", "4Hm", "4fA", "6Sb", "2hL", "1MN", "0Rn", "5T", "7LB", "5ya", "4Gl", "66G", "2Ia", "08J", "05z", "1t2", "aUm", "b4g",
+"4dp", "5a0", "8d", "8VF", "bn", "357", "4zr", "6OQ", "75T", "bnm", "0op", "LB", "Ar", "16i", "4Yn", "6lM", "6Ba", "43J", "0Ll", "2yO",
+"22F", "16", "4xC", "agr", "6cL", "4Vo", "0mA", "Ns", "CC", "14X", "bal", "aDn", "5p3", "4us", "8GE", "mo", "5L7", "4Iw", "06Q", "Qk",
+"1Y5", "1LT", "53r", "462", "7Oi", "4jk", "0QE", "rw", "2JJ", "1oH", "4DG", "65l", "7nD", "4KF", "0ph", "SZ", "2kg", "1Ne", "4ej", "6PI",
+"493", "4hZ", "0St", "4N", "0h9", "09P", "4Fv", "5C6", "4Xt", "6mW", "023", "0cZ", "0Mv", "nD", "4c9", "42P", "5kI", "6NK", "ct", "1Pg",
+"X9", "MX", "74N", "4UD", "4ZE", "6of", "BY", "W8", "0OG", "lu", "6AJ", "40a", "4yY", "4l8", "aE", "0Bw", "18r", "Oi", "5R5", "4Wu",
+"4EY", "4P8", "2KT", "1nV", "8YC", "7a", "5n5", "4ku", "4fE", "6Sf", "2hH", "k8", "07O", "Pu", "69B", "4Hi", "4Gh", "66C", "2Ie", "08N",
+"d9", "5P", "7LF", "4iD", "4dt", "5a4", "2jy", "3o9", "0qv", "RD", "7oZ", "4JX", "6ay", "4TZ", "0ot", "LF", "bj", "0AX", "4zv", "6OU",
+"6Be", "43N", "0Lh", "oZ", "Av", "0bD", "4Yj", "6lI", "6cH", "4Vk", "0mE", "Nw", "22B", "12", "4xG", "6Md", "5p7", "4uw", "0NY", "mk",
+"CG", "1pT", "5Kz", "6nx", "1Y1", "1LP", "53v", "466", "5L3", "4Is", "06U", "Qo", "2JN", "1oL", "4DC", "65h", "7Om", "4jo", "0QA", "rs",
+"9z", "1Na", "4en", "6PM", "aTs", "4KB", "04d", "2EO", "d6D", "09T", "4Fr", "5C2", "497", "bRm", "0Sp", "4J", "0Mr", "1H2", "aim", "42T",
+"4Xp", "6mS", "027", "17w", "0nn", "3Kl", "74J", "5Ea", "5kM", "6NO", "cp", "1Pc", "0OC", "lq", "6AN", "40e", "4ZA", "6ob", "2TL", "0ao",
+"18v", "Om", "5R1", "4Wq", "bCn", "afl", "aA", "0Bs", "07C", "Py", "69N", "4He", "4fI", "6Sj", "2hD", "k4", "0PW", "7m", "5n9", "4ky",
+"4EU", "4P4", "2KX", "1nZ", "05r", "RH", "7oV", "4JT", "4dx", "5a8", "8l", "1Ow", "d5", "qT", "7LJ", "4iH", "4Gd", "66O", "2Ii", "08B",
+"Az", "0bH", "4Yf", "6lE", "6Bi", "43B", "z7", "oV", "bf", "0AT", "4zz", "6OY", "4A7", "4TV", "0ox", "LJ", "CK", "14P", "5Kv", "4N6",
+"543", "41s", "0NU", "mg", "22N", "u6", "4xK", "6Mh", "6cD", "4Vg", "0mI", "2Xj", "7Oa", "4jc", "0QM", "6w", "2JB", "I2", "4DO", "65d",
+"68T", "b7D", "06Y", "Qc", "dSm", "287", "4gS", "4r2", "7MP", "4hR", "276", "4F", "0h1", "09X", "b8E", "67U", "7nL", "4KN", "F3", "SR",
+"9v", "1Nm", "4eb", "6PA", "5kA", "6NC", "21e", "1Po", "X1", "MP", "74F", "4UL", "bbO", "79v", "0v3", "0cR", "8Df", "nL", "4c1", "42X",
+"4yQ", "4l0", "aM", "8Kg", "0lS", "Oa", "76w", "637", "4ZM", "6on", "BQ", "W0", "0OO", "2zl", "6AB", "40i", "4fM", "6Sn", "3xa", "k0",
+"07G", "2Fl", "69J", "4Ha", "4EQ", "4P0", "d5g", "83o", "0PS", "7i", "a0D", "bQN", "50U", "hbt", "8h", "1Os", "05v", "RL", "7oR", "4JP",
+"5WA", "66K", "2Im", "08F", "d1", "5X", "7LN", "4iL", "6Bm", "43F", "z3", "oR", "2Wo", "0bL", "4Yb", "6lA", "4A3", "4TR", "8fd", "LN",
+"bb", "0AP", "cPl", "aeO", "547", "41w", "0NQ", "mc", "CO", "14T", "5Kr", "4N2", "77i", "4Vc", "0mM", "2Xn", "22J", "u2", "4xO", "6Ml",
+"2JF", "I6", "4DK", "6qh", "7Oe", "4jg", "0QI", "6s", "1Y9", "1LX", "4gW", "4r6", "68P", "5YZ", "0rU", "Qg", "0h5", "1mu", "4Fz", "67Q",
+"7MT", "4hV", "0Sx", "4B", "9r", "1Ni", "4ef", "6PE", "7nH", "4KJ", "F7", "SV", "X5", "MT", "74B", "4UH", "5kE", "6NG", "cx", "1Pk",
+"0Mz", "nH", "4c5", "4vT", "4Xx", "79r", "0v7", "0cV", "0lW", "Oe", "5R9", "4Wy", "4yU", "4l4", "aI", "1RZ", "0OK", "ly", "6AF", "40m",
+"4ZI", "6oj", "BU", "W4", "265", "5E", "488", "4iQ", "b9F", "66V", "0i2", "1lr", "G0", "RQ", "7oO", "4JM", "4da", "6QB", "8u", "1On",
+"0PN", "7t", "7Nb", "aa0", "4EL", "64g", "2KA", "H1", "07Z", "0f3", "69W", "b6G", "4fP", "479", "dRn", "294", "22W", "8Jd", "4xR", "4m3",
+"77t", "624", "0mP", "Nb", "CR", "V3", "5Ko", "6nm", "ajS", "41j", "0NL", "3kN", "20f", "0AM", "4zc", "aeR", "6al", "4TO", "Y2", "LS",
+"Ac", "0bQ", "bcL", "78u", "4b2", "4wS", "8Ee", "oO", "7nU", "4KW", "04q", "SK", "9o", "1Nt", "51R", "6PX", "7MI", "4hK", "e6", "pW",
+"2Hj", "09A", "4Fg", "67L", "68M", "4If", "0rH", "Qz", "2iG", "j7", "4gJ", "6Ri", "7Ox", "4jz", "0QT", "6n", "1z8", "1oY", "4DV", "4Q7",
+"4ZT", "4O5", "BH", "0az", "0OV", "ld", "550", "40p", "4yH", "6Lk", "aT", "t5", "0lJ", "Ox", "6bG", "4Wd", "4Xe", "6mF", "2Vh", "0cK",
+"0Mg", "nU", "6Cj", "42A", "5kX", "6NZ", "ce", "1Pv", "2N9", "MI", "7pW", "4UU", "4Gy", "5B9", "0i6", "1lv", "1BZ", "5A", "7LW", "4iU",
+"4de", "6QF", "8q", "1Oj", "G4", "RU", "7oK", "4JI", "4EH", "64c", "2KE", "H5", "0PJ", "7p", "7Nf", "4kd", "4fT", "4s5", "2hY", "290",
+"0sV", "Pd", "5M8", "4Hx", "6cY", "4Vz", "0mT", "Nf", "1F8", "0Cx", "4xV", "4m7", "7Pd", "41n", "0NH", "mz", "CV", "V7", "5Kk", "6ni",
+"6ah", "4TK", "Y6", "LW", "20b", "0AI", "4zg", "6OD", "4b6", "4wW", "0Ly", "oK", "Ag", "0bU", "5IZ", "6lX", "9k", "1Np", "51V", "azN",
+"7nQ", "4KS", "04u", "SO", "2Hn", "09E", "4Fc", "67H", "7MM", "4hO", "e2", "pS", "2iC", "j3", "4gN", "6Rm", "68I", "4Ib", "06D", "2Go",
+"d4d", "82l", "4DR", "4Q3", "a1G", "bPM", "0QP", "6j", "0OR", "0Z3", "554", "40t", "4ZP", "4O1", "BL", "15W", "0lN", "2Ym", "6bC", "5GA",
+"4yL", "6Lo", "aP", "09", "0Mc", "nQ", "6Cn", "42E", "4Xa", "6mB", "2Vl", "0cO", "8gg", "MM", "7pS", "4UQ", "bAN", "adL", "ca", "1Pr",
+"G8", "RY", "7oG", "4JE", "4di", "6QJ", "2jd", "1Of", "0Rw", "5M", "480", "4iY", "4Gu", "5B5", "2Ix", "08S", "07R", "Ph", "5M4", "4Ht",
+"4fX", "471", "1X6", "1MW", "0PF", "st", "7Nj", "4kh", "4ED", "64o", "2KI", "H9", "CZ", "14A", "5Kg", "6ne", "7Ph", "41b", "0ND", "mv",
+"1F4", "0Ct", "4xZ", "6My", "5S6", "4Vv", "0mX", "Nj", "Ak", "0bY", "4Yw", "6lT", "6Bx", "43S", "0Lu", "oG", "bw", "0AE", "4zk", "6OH",
+"6ad", "4TG", "0oi", "2ZJ", "7MA", "4hC", "0Sm", "4W", "2Hb", "09I", "4Fo", "67D", "aTn", "b5d", "04y", "SC", "9g", "8WE", "4es", "6PP",
+"5o2", "4jr", "8XD", "6f", "1z0", "1oQ", "705", "65u", "68E", "4In", "06H", "Qr", "2iO", "1LM", "4gB", "6Ra", "5ia", "6Lc", "23E", "05",
+"0lB", "Op", "6bO", "4Wl", "c4F", "aEm", "1d2", "0ar", "8FF", "ll", "558", "40x", "5kP", "6NR", "cm", "344", "0ns", "MA", "74W", "bon",
+"4Xm", "6mN", "3FA", "0cC", "0Mo", "2xL", "6Cb", "42I", "4dm", "6QN", "8y", "1Ob", "05g", "2DL", "7oC", "4JA", "4Gq", "5B1", "d7G", "08W",
+"0Rs", "5I", "484", "bSn", "52u", "475", "1X2", "1MS", "07V", "Pl", "5M0", "4Hp", "5Ua", "64k", "2KM", "1nO", "0PB", "7x", "7Nn", "4kl",
+"7Pl", "41f", "8GX", "mr", "2UO", "14E", "5Kc", "6na", "5S2", "4Vr", "19u", "Nn", "1F0", "0Cp", "bBm", "ago", "ahn", "43W", "0Lq", "oC",
+"Ao", "16t", "4Ys", "6lP", "75I", "4TC", "0om", "2ZN", "bs", "0AA", "4zo", "6OL", "2Hf", "09M", "4Fk", "6sH", "7ME", "4hG", "0Si", "4S",
+"9c", "1Nx", "4ew", "6PT", "7nY", "bqh", "0pu", "SG", "1z4", "1oU", "4DZ", "65q", "5o6", "4jv", "0QX", "6b", "2iK", "1LI", "4gF", "6Re",
+"68A", "4Ij", "06L", "Qv", "0lF", "Ot", "6bK", "4Wh", "4yD", "6Lg", "aX", "01", "0OZ", "lh", "5q4", "4tt", "4ZX", "4O9", "BD", "0av",
+"0nw", "ME", "74S", "4UY", "5kT", "6NV", "ci", "1Pz", "0Mk", "nY", "6Cf", "42M", "4Xi", "6mJ", "2Vd", "0cG", "bL", "8Hf", "4zP", "4o1",
+"75v", "606", "0oR", "0z3", "AP", "T1", "4YL", "6lo", "6BC", "43h", "0LN", "2ym", "22d", "0CO", "4xa", "6MB", "6cn", "4VM", "0mc", "NQ",
+"Ca", "14z", "baN", "aDL", "7PS", "41Y", "8Gg", "mM", "247", "7G", "7NQ", "4kS", "com", "64T", "0k0", "1np", "E2", "PS", "69d", "4HO",
+"4fc", "7Ca", "2hn", "1Ml", "0RL", "5v", "avS", "4ib", "4GN", "66e", "2IC", "J3", "05X", "Rb", "aUO", "b4E", "4dR", "4q3", "8F", "8Vd",
+"4XV", "4M7", "1f8", "0cx", "0MT", "nf", "572", "42r", "5kk", "6Ni", "cV", "v7", "0nH", "Mz", "74l", "4Uf", "4Zg", "6oD", "2Tj", "0aI",
+"y6", "lW", "6Ah", "40C", "5iZ", "583", "ag", "0BU", "0ly", "OK", "4B6", "4WW", "7lW", "4IU", "06s", "QI", "0I6", "1Lv", "4gy", "5b9",
+"7OK", "4jI", "g4", "rU", "2Jh", "1oj", "4De", "65N", "7nf", "4Kd", "04B", "Sx", "2kE", "h5", "4eH", "6Pk", "5m8", "4hx", "0SV", "4l",
+"2HY", "09r", "4FT", "4S5", "5Q8", "4Tx", "0oV", "Ld", "bH", "0Az", "4zT", "4o5", "6BG", "43l", "0LJ", "ox", "AT", "T5", "4YH", "6lk",
+"6cj", "4VI", "0mg", "NU", "2vh", "0CK", "4xe", "6MF", "7PW", "4uU", "2n9", "mI", "Ce", "1pv", "5KX", "6nZ", "5UZ", "64P", "0k4", "1nt",
+"0Py", "7C", "7NU", "4kW", "4fg", "6SD", "2hj", "1Mh", "E6", "PW", "7mI", "4HK", "4GJ", "66a", "2IG", "J7", "0RH", "5r", "7Ld", "4if",
+"4dV", "4q7", "8B", "1OY", "0qT", "Rf", "7ox", "4Jz", "0MP", "nb", "576", "42v", "4XR", "4M3", "dll", "17U", "0nL", "3KN", "74h", "4Ub",
+"5ko", "6Nm", "cR", "v3", "y2", "lS", "6Al", "40G", "4Zc", "aER", "2Tn", "0aM", "18T", "OO", "4B2", "4WS", "bCL", "587", "ac", "0BQ",
+"0I2", "1Lr", "53T", "axL", "68z", "4IQ", "06w", "QM", "2Jl", "1on", "4Da", "65J", "7OO", "4jM", "g0", "6Y", "9X", "h1", "4eL", "6Po",
+"7nb", "aA0", "04F", "2Em", "d6f", "09v", "4FP", "4S1", "a3E", "bRO", "0SR", "4h", "AX", "T9", "4YD", "6lg", "6BK", "4wh", "0LF", "ot",
+"bD", "0Av", "4zX", "4o9", "5Q4", "4Tt", "0oZ", "Lh", "Ci", "14r", "5KT", "6nV", "ajh", "41Q", "0Nw", "mE", "22l", "0CG", "4xi", "6MJ",
+"6cf", "4VE", "0mk", "NY", "07a", "2FJ", "69l", "4HG", "4fk", "6SH", "2hf", "1Md", "0Pu", "7O", "7NY", "bQh", "4Ew", "6pT", "0k8", "1nx",
+"05P", "Rj", "5O6", "4Jv", "4dZ", "453", "8N", "1OU", "0RD", "qv", "7Lh", "4ij", "4GF", "66m", "2IK", "1lI", "5kc", "6Na", "21G", "27",
+"8gX", "Mr", "74d", "4Un", "bbm", "79T", "1f0", "0cp", "397", "nn", "5s2", "42z", "4ys", "6LP", "ao", "366", "0lq", "OC", "76U", "bml",
+"4Zo", "6oL", "Bs", "0aA", "0Om", "2zN", "7QA", "40K", "7OC", "4jA", "0Qo", "6U", "3ZA", "1ob", "4Dm", "65F", "68v", "b7f", "0rs", "QA",
+"dSO", "8UG", "4gq", "5b1", "5m0", "4hp", "8ZF", "4d", "1x2", "09z", "727", "67w", "7nn", "4Kl", "04J", "Sp", "9T", "1NO", "51i", "6Pc",
+"6BO", "43d", "0LB", "op", "2WM", "0bn", "5Ia", "6lc", "5Q0", "4Tp", "8fF", "Ll", "1D2", "0Ar", "cPN", "aem", "ajl", "41U", "0Ns", "mA",
+"Cm", "14v", "5KP", "6nR", "6cb", "4VA", "0mo", "2XL", "22h", "0CC", "4xm", "6MN", "4fo", "6SL", "2hb", "8TY", "07e", "2FN", "69h", "4HC",
+"4Es", "64X", "d5E", "83M", "0Pq", "7K", "a0f", "bQl", "50w", "457", "8J", "1OQ", "05T", "Rn", "5O2", "4Jr", "4GB", "66i", "2IO", "08d",
+"1Ba", "5z", "7Ll", "4in", "0nD", "Mv", "7ph", "4Uj", "5kg", "6Ne", "cZ", "23", "0MX", "nj", "5s6", "4vv", "4XZ", "6my", "1f4", "0ct",
+"0lu", "OG", "6bx", "5Gz", "4yw", "6LT", "ak", "0BY", "0Oi", "2zJ", "6Ad", "40O", "4Zk", "6oH", "Bw", "0aE", "2Jd", "1of", "4Di", "65B",
+"7OG", "4jE", "g8", "6Q", "2ix", "1Lz", "4gu", "5b5", "68r", "4IY", "0rw", "QE", "1x6", "1mW", "4FX", "4S9", "5m4", "4ht", "0SZ", "ph",
+"9P", "h9", "4eD", "6Pg", "7nj", "4Kh", "04N", "St", "22u", "375", "4xp", "598", "77V", "blo", "0mr", "1h2", "Cp", "14k", "5KM", "6nO",
+"7PB", "41H", "0Nn", "3kl", "20D", "34", "4zA", "6Ob", "6aN", "4Tm", "0oC", "Lq", "AA", "0bs", "bcn", "78W", "569", "43y", "384", "om",
+"9Kd", "5g", "5l3", "4is", "734", "66t", "1y1", "08y", "05I", "Rs", "7om", "4Jo", "4dC", "7AA", "8W", "1OL", "0Pl", "7V", "ats", "4kB",
+"4En", "64E", "2Kc", "1na", "07x", "PB", "69u", "b6e", "4fr", "5c2", "dRL", "8TD", "4Zv", "6oU", "Bj", "0aX", "0Ot", "lF", "6Ay", "40R",
+"4yj", "6LI", "av", "0BD", "0lh", "OZ", "6be", "4WF", "4XG", "6md", "2VJ", "0ci", "0ME", "nw", "6CH", "42c", "5kz", "6Nx", "cG", "1PT",
+"0nY", "Mk", "5P7", "4Uw", "5N5", "4Ku", "04S", "Si", "9M", "1NV", "4eY", "440", "7Mk", "4hi", "0SG", "pu", "2HH", "K8", "4FE", "67n",
+"68o", "4ID", "1", "QX", "2ie", "1Lg", "4gh", "6RK", "7OZ", "4jX", "0Qv", "6L", "2Jy", "3O9", "4Dt", "5A4", "4C9", "4VX", "0mv", "ND",
+"22q", "0CZ", "4xt", "6MW", "7PF", "41L", "x9", "mX", "Ct", "14o", "5KI", "6nK", "6aJ", "4Ti", "0oG", "Lu", "bY", "30", "4zE", "6Of",
+"5r5", "4wu", "380", "oi", "AE", "0bw", "4YY", "4L8", "5Wz", "66p", "1y5", "1lT", "0RY", "5c", "5l7", "4iw", "4dG", "6Qd", "8S", "1OH",
+"05M", "Rw", "7oi", "4Jk", "4Ej", "64A", "2Kg", "1ne", "0Ph", "7R", "7ND", "4kF", "4fv", "5c6", "0H9", "1My", "0st", "PF", "69q", "4HZ",
+"0Op", "lB", "ako", "40V", "4Zr", "6oQ", "Bn", "15u", "0ll", "2YO", "6ba", "4WB", "4yn", "6LM", "ar", "1Ra", "0MA", "ns", "6CL", "42g",
+"4XC", "79I", "2VN", "0cm", "8gE", "Mo", "5P3", "4Us", "bAl", "adn", "cC", "1PP", "9I", "1NR", "51t", "444", "5N1", "4Kq", "04W", "Sm",
+"2HL", "09g", "4FA", "67j", "7Mo", "4hm", "0SC", "4y", "2ia", "1Lc", "4gl", "6RO", "68k", "5Ya", "5", "2GM", "d4F", "82N", "4Dp", "5A0",
+"a1e", "bPo", "0Qr", "6H", "Cx", "14c", "5KE", "6nG", "7PJ", "4uH", "x5", "mT", "0V7", "0CV", "4xx", "590", "4C5", "4VT", "0mz", "NH",
+"AI", "16R", "4YU", "4L4", "561", "43q", "0LW", "oe", "bU", "w4", "4zI", "6Oj", "6aF", "4Te", "0oK", "Ly", "05A", "2Dj", "7oe", "4Jg",
+"4dK", "6Qh", "2jF", "i6", "0RU", "5o", "7Ly", "5yZ", "4GW", "4R6", "1y9", "08q", "07p", "PJ", "7mT", "4HV", "4fz", "6SY", "0H5", "1Mu",
+"f7", "sV", "7NH", "4kJ", "4Ef", "64M", "2Kk", "1ni", "4yb", "6LA", "23g", "0BL", "Z3", "OR", "6bm", "4WN", "c4d", "aEO", "Bb", "0aP",
+"8Fd", "lN", "4a3", "40Z", "5kr", "4n2", "cO", "8Ie", "0nQ", "Mc", "74u", "615", "4XO", "6ml", "2VB", "U2", "0MM", "2xn", "7Sa", "42k",
+"7Mc", "4ha", "0SO", "4u", "3Xa", "K0", "4FM", "67f", "aTL", "b5F", "0pS", "Sa", "9E", "8Wg", "4eQ", "448", "7OR", "4jP", "254", "6D",
+"0j3", "1os", "cnn", "65W", "68g", "4IL", "9", "QP", "2im", "1Lo", "53I", "6RC", "7PN", "41D", "x1", "mP", "2Um", "14g", "5KA", "6nC",
+"4C1", "4VP", "19W", "NL", "0V3", "0CR", "bBO", "594", "565", "43u", "0LS", "oa", "AM", "16V", "4YQ", "4L0", "6aB", "4Ta", "0oO", "2Zl",
+"bQ", "38", "4zM", "6On", "4dO", "6Ql", "2jB", "i2", "05E", "2Dn", "7oa", "4Jc", "4GS", "4R2", "d7e", "08u", "0RQ", "5k", "a2F", "bSL",
+"52W", "ayO", "0H1", "1Mq", "07t", "PN", "69y", "4HR", "4Eb", "64I", "2Ko", "1nm", "f3", "7Z", "7NL", "4kN", "Z7", "OV", "6bi", "4WJ",
+"4yf", "6LE", "az", "0BH", "0Ox", "lJ", "4a7", "4tV", "4Zz", "6oY", "Bf", "0aT", "0nU", "Mg", "74q", "5EZ", "5kv", "4n6", "cK", "1PX",
+"0MI", "2xj", "6CD", "42o", "4XK", "6mh", "2VF", "U6", "2HD", "K4", "4FI", "67b", "7Mg", "4he", "0SK", "4q", "9A", "1NZ", "4eU", "4p4",
+"5N9", "4Ky", "0pW", "Se", "0j7", "1ow", "4Dx", "5A8", "7OV", "4jT", "0Qz", "rH", "2ii", "1Lk", "4gd", "6RG", "68c", "4IH", "D5", "QT",
+"5Ls", "4I3", "F", "13U", "0IP", "jb", "536", "46v", "5oo", "6Jm", "gR", "r3", "0jL", "3ON", "6dA", "4Qb", "5NB", "aAR", "2Pn", "0eM",
+"0Ka", "hS", "6El", "44G", "49w", "abN", "ec", "0FQ", "8ae", "KO", "4F2", "4SS", "4X0", "4MQ", "02w", "UM", "0M2", "0XS", "57T", "a8D",
+"7KO", "4nM", "c0", "2Y", "2Nl", "1kn", "aJ1", "61J", "6zC", "aE0", "00F", "2Am", "yP", "l1", "4aL", "6To", "a7E", "58U", "0WR", "0h",
+"ZL", "84n", "4BP", "4W1", "fH", "0Ez", "5nu", "4k5", "5U8", "4Px", "0kV", "Hd", "ET", "P5", "5Mi", "6hk", "6FG", "47l", "0HJ", "kx",
+"dy", "0GK", "48m", "6IF", "6gj", "4RI", "0ig", "JU", "Ge", "0dW", "5OX", "5Z9", "4d4", "4qU", "1ZZ", "iI", "0Ty", "3C", "4z6", "4oW",
+"5QZ", "60P", "Yg", "0zU", "A6", "TW", "6yh", "4LK", "4bg", "6WD", "2lj", "0YI", "0VH", "1r", "6XE", "4mf", "4CJ", "62a", "2MG", "N7",
+"0uT", "Vf", "7kx", "4Nz", "5pw", "4u7", "xJ", "1KY", "0IT", "jf", "532", "46r", "5Lw", "4I7", "B", "0gx", "0jH", "Iz", "6dE", "4Qf",
+"5ok", "6Ji", "gV", "r7", "0Ke", "hW", "6Eh", "44C", "5NF", "6kD", "2Pj", "0eI", "0hy", "KK", "4F6", "4SW", "49s", "6HX", "eg", "0FU",
+"0M6", "0XW", "4cy", "5f9", "4X4", "4MU", "02s", "UI", "Xy", "1kj", "5PD", "61N", "7KK", "4nI", "c4", "vU", "yT", "l5", "4aH", "6Tk",
+"6zG", "4Od", "00B", "Wx", "ZH", "0yz", "4BT", "4W5", "5i8", "4lx", "0WV", "0l", "71v", "646", "0kR", "3NP", "fL", "8Lf", "5nq", "4k1",
+"6FC", "47h", "0HN", "29e", "EP", "P1", "5Mm", "6ho", "6gn", "4RM", "0ic", "JQ", "26d", "0GO", "48i", "6IB", "4d0", "45Y", "8Cg", "iM",
+"Ga", "0dS", "beN", "hYu", "ckm", "60T", "Yc", "0zQ", "207", "3G", "4z2", "4oS", "4bc", "7Ga", "2ln", "0YM", "A2", "TS", "6yl", "4LO",
+"4CN", "62e", "2MC", "N3", "0VL", "1v", "6XA", "4mb", "5ps", "4u3", "xN", "8Rd", "01X", "Vb", "aQO", "b0E", "5og", "6Je", "gZ", "63",
+"0jD", "Iv", "6dI", "4Qj", "7l9", "6iy", "N", "0gt", "0IX", "jj", "5w6", "4rv", "5mV", "5x7", "ek", "0FY", "0hu", "KG", "6fx", "5Cz",
+"5NJ", "6kH", "Fw", "0eE", "92", "3nk", "6Ed", "44O", "7KG", "4nE", "c8", "2Q", "Xu", "1kf", "5PH", "61B", "4X8", "4MY", "0vw", "UE",
+"2mx", "1Hz", "4cu", "5f5", "5i4", "4lt", "0WZ", "th", "ZD", "0yv", "4BX", "4W9", "6zK", "4Oh", "00N", "Wt", "yX", "l9", "4aD", "6Tg",
+"2SM", "0fn", "5Ma", "6hc", "6FO", "47d", "0HB", "kp", "24Y", "0Er", "bDo", "aam", "5U0", "4Pp", "8bF", "Hl", "Gm", "10v", "5OP", "5Z1",
+"anl", "45U", "0Js", "iA", "dq", "0GC", "48e", "6IN", "6gb", "4RA", "0io", "3Lm", "03e", "2BN", "7iA", "4LC", "4bo", "6WL", "zs", "0YA",
+"0Tq", "3K", "a4f", "bUl", "4As", "5D3", "Yo", "87M", "01T", "Vn", "5K2", "4Nr", "54w", "417", "xB", "1KQ", "1Fa", "1z", "6XM", "4mn",
+"4CB", "62i", "2MO", "0xl", "1za", "Ir", "6dM", "4Qn", "5oc", "6Ja", "25G", "67", "9Pe", "jn", "5w2", "46z", "bfm", "aCo", "J", "0gp",
+"0hq", "KC", "72U", "bil", "5mR", "5x3", "eo", "326", "96", "3no", "7UA", "44K", "5NN", "6kL", "Fs", "0eA", "Xq", "1kb", "5PL", "61F",
+"7KC", "4nA", "0Uo", "2U", "39U", "8QG", "4cq", "5f1", "aRl", "796", "0vs", "UA", "2LQ", "0yr", "767", "63w", "5i0", "4lp", "9Ng", "0d",
+"2oM", "0Zn", "55i", "6Tc", "6zO", "4Ol", "00J", "Wp", "6FK", "4sh", "0HF", "kt", "EX", "P9", "5Me", "6hg", "5U4", "4Pt", "0kZ", "Hh",
+"fD", "0Ev", "5ny", "4k9", "4d8", "45Q", "0Jw", "iE", "Gi", "10r", "5OT", "5Z5", "6gf", "4RE", "0ik", "JY", "du", "0GG", "48a", "6IJ",
+"4bk", "6WH", "zw", "0YE", "03a", "2BJ", "6yd", "4LG", "4Aw", "5D7", "Yk", "0zY", "0Tu", "3O", "6Zx", "bUh", "54s", "413", "xF", "1KU",
+"01P", "Vj", "5K6", "4Nv", "4CF", "62m", "2MK", "0xh", "0VD", "uv", "6XI", "4mj", "5NS", "6kQ", "Fn", "11u", "0Kp", "hB", "aoo", "44V",
+"49f", "6HM", "er", "1Va", "0hl", "3Mn", "6fa", "4SB", "5Lb", "7yA", "W", "0gm", "0IA", "js", "6GL", "46g", "bEl", "hyW", "gC", "0Dq",
+"8cE", "Io", "5T3", "4Qs", "5J1", "4Oq", "00W", "Wm", "yA", "0Zs", "55t", "404", "6YN", "4lm", "0WC", "0y", "2LL", "0yo", "4BA", "63j",
+"6xc", "bws", "02f", "2CM", "2ma", "0XB", "4cl", "6VO", "a5e", "bTo", "0Ur", "2H", "Xl", "86N", "5PQ", "5E0", "dh", "0GZ", "5lU", "5y4",
+"4G9", "4RX", "0iv", "JD", "Gt", "0dF", "5OI", "6jK", "6Dg", "45L", "81", "iX", "fY", "70", "5nd", "6Kf", "6eJ", "4Pi", "0kG", "Hu",
+"EE", "0fw", "5Mx", "4H8", "5v5", "4su", "1Xz", "ki", "0VY", "1c", "5h7", "4mw", "5Sz", "62p", "2MV", "0xu", "01M", "Vw", "7ki", "4Nk",
+"54n", "6Ud", "2nJ", "1KH", "0Th", "3R", "6Ze", "4oF", "4Aj", "60A", "Yv", "0zD", "0wt", "TF", "6yy", "4LZ", "4bv", "5g6", "zj", "0YX",
+"0Kt", "hF", "6Ey", "44R", "5NW", "6kU", "Fj", "0eX", "0hh", "KZ", "6fe", "4SF", "49b", "6HI", "ev", "0FD", "0IE", "jw", "6GH", "46c",
+"5Lf", "6id", "S", "0gi", "0jY", "Ik", "5T7", "4Qw", "5oz", "6Jx", "gG", "0Du", "yE", "0Zw", "4aY", "400", "5J5", "4Ou", "00S", "Wi",
+"ZY", "O8", "4BE", "63n", "6YJ", "4li", "0WG", "tu", "2me", "0XF", "4ch", "6VK", "6xg", "4MD", "02b", "UX", "Xh", "3K9", "5PU", "5E4",
+"7KZ", "4nX", "0Uv", "2L", "73V", "bho", "0ir", "1l2", "dl", "335", "48x", "5y0", "6Dc", "45H", "85", "3ol", "Gp", "0dB", "5OM", "6jO",
+"6eN", "4Pm", "0kC", "Hq", "24D", "74", "bDr", "6Kb", "529", "47y", "8AG", "km", "EA", "0fs", "bgn", "aBl", "774", "62t", "199", "0xq",
+"9Od", "1g", "5h3", "4ms", "54j", "7EA", "2nN", "1KL", "01I", "Vs", "7km", "4No", "4An", "60E", "Yr", "1ja", "0Tl", "3V", "6Za", "4oB",
+"4br", "5g2", "zn", "8PD", "03x", "TB", "aSo", "785", "49n", "6HE", "ez", "0FH", "0hd", "KV", "6fi", "4SJ", "bdI", "6kY", "Ff", "0eT",
+"0Kx", "hJ", "4e7", "4pV", "5ov", "4j6", "gK", "0Dy", "0jU", "Ig", "6dX", "5AZ", "5Lj", "6ih", "DW", "Q6", "0II", "28b", "6GD", "46o",
+"6YF", "4le", "0WK", "0q", "ZU", "O4", "4BI", "63b", "5J9", "4Oy", "0tW", "We", "yI", "1JZ", "4aU", "4t4", "7KV", "4nT", "0Uz", "vH",
+"Xd", "1kw", "5PY", "5E8", "6xk", "4MH", "02n", "UT", "2mi", "0XJ", "4cd", "6VG", "2Qm", "0dN", "5OA", "6jC", "6Do", "45D", "89", "iP",
+"0R3", "0GR", "48t", "acM", "4G1", "4RP", "94O", "JL", "EM", "12V", "5Mp", "4H0", "525", "47u", "0HS", "ka", "fQ", "78", "5nl", "6Kn",
+"6eB", "4Pa", "0kO", "3NM", "01E", "3PO", "7ka", "4Nc", "54f", "6Ul", "xS", "m2", "0VQ", "1k", "a6F", "59V", "4CS", "4V2", "195", "85m",
+"03t", "TN", "4Y3", "4LR", "56W", "a9G", "zb", "0YP", "b3", "3Z", "6Zm", "4oN", "4Ab", "60I", "2Oo", "0zL", "1xA", "KR", "6fm", "4SN",
+"49j", "6HA", "27g", "0FL", "8Bd", "hN", "4e3", "44Z", "bdM", "aAO", "Fb", "0eP", "0jQ", "Ic", "70u", "655", "5or", "4j2", "gO", "8Me",
+"0IM", "28f", "7Wa", "46k", "5Ln", "6il", "DS", "Q2", "ZQ", "O0", "4BM", "63f", "6YB", "4la", "0WO", "0u", "yM", "8Sg", "4aQ", "408",
+"aPL", "b1F", "0tS", "Wa", "0n3", "1ks", "bzO", "61W", "7KR", "4nP", "214", "2D", "2mm", "0XN", "57I", "6VC", "6xo", "4ML", "02j", "UP",
+"6Dk", "4qH", "0Jf", "iT", "Gx", "0dJ", "5OE", "6jG", "4G5", "4RT", "0iz", "JH", "dd", "0GV", "48p", "5y8", "521", "47q", "0HW", "ke",
+"EI", "12R", "5Mt", "4H4", "6eF", "4Pe", "0kK", "Hy", "fU", "s4", "5nh", "6Kj", "54b", "6Uh", "xW", "m6", "01A", "3PK", "7ke", "4Ng",
+"4CW", "4V6", "191", "0xy", "0VU", "1o", "6XX", "59R", "4bz", "6WY", "zf", "0YT", "03p", "TJ", "4Y7", "4LV", "4Af", "60M", "Yz", "0zH",
+"b7", "wV", "6Zi", "4oJ", "5H3", "4Ms", "02U", "Uo", "2mR", "0Xq", "57v", "426", "7Km", "4no", "0UA", "vs", "2NN", "1kL", "5Pb", "61h",
+"6za", "4OB", "00d", "2AO", "yr", "1Ja", "4an", "6TM", "a7g", "58w", "0Wp", "0J", "Zn", "84L", "4Br", "5G2", "5LQ", "5Y0", "d", "13w",
+"0Ir", "1L2", "amm", "46T", "5oM", "6JO", "gp", "0DB", "0jn", "3Ol", "6dc", "5Aa", "bdr", "6kb", "2PL", "0eo", "0KC", "hq", "6EN", "44e",
+"49U", "abl", "eA", "0Fs", "8aG", "Km", "5V1", "4Sq", "1Dz", "3a", "5j5", "4ou", "4AY", "4T8", "YE", "0zw", "03O", "Tu", "6yJ", "4Li",
+"4bE", "6Wf", "zY", "o8", "0Vj", "1P", "6Xg", "4mD", "4Ch", "62C", "2Me", "0xF", "0uv", "VD", "7kZ", "4NX", "5pU", "5e4", "xh", "3k9",
+"fj", "0EX", "5nW", "6KU", "6ey", "4PZ", "0kt", "HF", "Ev", "0fD", "5MK", "6hI", "6Fe", "47N", "0Hh", "kZ", "26B", "52", "48O", "6Id",
+"6gH", "4Rk", "0iE", "Jw", "GG", "0du", "5Oz", "6jx", "5t7", "4qw", "0JY", "ik", "2mV", "0Xu", "57r", "422", "5H7", "4Mw", "02Q", "Uk",
+"2NJ", "1kH", "5Pf", "61l", "7Ki", "4nk", "0UE", "vw", "yv", "0ZD", "4aj", "6TI", "6ze", "4OF", "0th", "WZ", "Zj", "0yX", "4Bv", "5G6",
+"6Yy", "4lZ", "0Wt", "0N", "0Iv", "jD", "4g9", "46P", "5LU", "5Y4", "Dh", "0gZ", "0jj", "IX", "6dg", "4QD", "5oI", "6JK", "gt", "0DF",
+"0KG", "hu", "6EJ", "44a", "5Nd", "6kf", "FY", "S8", "1xz", "Ki", "5V5", "4Su", "49Q", "4h8", "eE", "0Fw", "756", "60v", "YA", "0zs",
+"9Mf", "3e", "5j1", "4oq", "4bA", "6Wb", "2lL", "0Yo", "03K", "Tq", "6yN", "4Lm", "4Cl", "62G", "2Ma", "0xB", "0Vn", "1T", "6Xc", "59i",
+"54Y", "5e0", "xl", "8RF", "01z", "1p2", "aQm", "b0g", "71T", "bjm", "0kp", "HB", "fn", "317", "5nS", "6KQ", "6Fa", "47J", "0Hl", "29G",
+"Er", "12i", "5MO", "6hM", "6gL", "4Ro", "0iA", "Js", "26F", "56", "48K", "7YA", "5t3", "4qs", "8CE", "io", "GC", "0dq", "bel", "hYW",
+"7Ke", "4ng", "0UI", "2s", "XW", "M6", "5Pj", "6uh", "6xX", "6m9", "0vU", "Ug", "2mZ", "0Xy", "4cW", "4v6", "4y7", "4lV", "0Wx", "0B",
+"Zf", "0yT", "4Bz", "63Q", "6zi", "4OJ", "B7", "WV", "yz", "0ZH", "4af", "6TE", "5oE", "6JG", "gx", "0DJ", "0jf", "IT", "6dk", "4QH",
+"5LY", "5Y8", "l", "0gV", "0Iz", "jH", "4g5", "4rT", "5mt", "4h4", "eI", "1VZ", "0hW", "Ke", "5V9", "4Sy", "5Nh", "6kj", "FU", "S4",
+"0KK", "hy", "6EF", "44m", "03G", "2Bl", "6yB", "4La", "4bM", "6Wn", "zQ", "o0", "0TS", "3i", "a4D", "bUN", "4AQ", "4T0", "YM", "87o",
+"01v", "VL", "7kR", "4NP", "54U", "hft", "0N3", "1Ks", "0Vb", "1X", "6Xo", "4mL", "5SA", "62K", "2Mm", "0xN", "2So", "0fL", "5MC", "6hA",
+"6Fm", "47F", "1XA", "kR", "fb", "0EP", "bDM", "aaO", "4E3", "4PR", "8bd", "HN", "GO", "10T", "5Or", "4J2", "507", "45w", "0JQ", "ic",
+"dS", "q2", "48G", "6Il", "73i", "4Rc", "0iM", "3LO", "XS", "M2", "5Pn", "61d", "7Ka", "4nc", "0UM", "2w", "39w", "8Qe", "4cS", "4v2",
+"aRN", "b3D", "02Y", "Uc", "Zb", "0yP", "bxM", "63U", "4y3", "4lR", "236", "0F", "2oo", "0ZL", "4ab", "6TA", "6zm", "4ON", "B3", "WR",
+"0jb", "IP", "6do", "4QL", "5oA", "6JC", "25e", "0DN", "9PG", "jL", "4g1", "46X", "686", "aCM", "h", "0gR", "0hS", "Ka", "72w", "677",
+"49Y", "4h0", "eM", "8Og", "0KO", "3nM", "6EB", "44i", "5Nl", "6kn", "FQ", "S0", "4bI", "6Wj", "zU", "o4", "03C", "Ty", "6yF", "4Le",
+"4AU", "4T4", "YI", "1jZ", "0TW", "3m", "5j9", "4oy", "54Q", "5e8", "xd", "1Kw", "01r", "VH", "7kV", "4NT", "4Cd", "62O", "2Mi", "0xJ",
+"0Vf", "uT", "6Xk", "4mH", "6Fi", "47B", "0Hd", "kV", "Ez", "0fH", "5MG", "6hE", "4E7", "4PV", "0kx", "HJ", "ff", "0ET", "bDI", "6KY",
+"503", "45s", "0JU", "ig", "GK", "0dy", "5Ov", "4J6", "6gD", "4Rg", "0iI", "3LK", "dW", "q6", "48C", "6Ih", "4Z2", "4OS", "00u", "WO",
+"yc", "0ZQ", "55V", "hgw", "6Yl", "4lO", "a2", "tS", "2Ln", "0yM", "4Bc", "63H", "6xA", "4Mb", "02D", "2Co", "2mC", "n3", "4cN", "6Vm",
+"a5G", "bTM", "0UP", "2j", "XN", "86l", "5Ps", "4U3", "5Nq", "4K1", "FL", "11W", "0KR", "3nP", "514", "44t", "49D", "6Ho", "eP", "49",
+"0hN", "3ML", "6fC", "5CA", "aV1", "6iB", "u", "0gO", "0Ic", "jQ", "6Gn", "46E", "bEN", "hyu", "ga", "0DS", "8cg", "IM", "4D0", "4QQ",
+"1FZ", "1A", "4x4", "4mU", "4Cy", "5F9", "0m6", "0xW", "C4", "VU", "7kK", "4NI", "54L", "6UF", "xy", "1Kj", "0TJ", "3p", "6ZG", "4od",
+"4AH", "60c", "YT", "L5", "0wV", "Td", "5I8", "4Lx", "4bT", "4w5", "zH", "0Yz", "dJ", "0Gx", "5lw", "4i7", "6gY", "4Rz", "0iT", "Jf",
+"GV", "R7", "5Ok", "6ji", "6DE", "45n", "0JH", "iz", "24b", "0EI", "5nF", "6KD", "6eh", "4PK", "0ke", "HW", "Eg", "0fU", "5MZ", "6hX",
+"4f6", "4sW", "0Hy", "kK", "yg", "0ZU", "55R", "6TX", "4Z6", "4OW", "00q", "WK", "2Lj", "0yI", "4Bg", "63L", "6Yh", "4lK", "a6", "tW",
+"2mG", "n7", "4cJ", "6Vi", "6xE", "4Mf", "0vH", "Uz", "XJ", "1kY", "5Pw", "4U7", "7Kx", "4nz", "0UT", "2n", "0KV", "hd", "510", "44p",
+"5Nu", "4K5", "FH", "0ez", "0hJ", "Kx", "6fG", "4Sd", "5mi", "6Hk", "eT", "p5", "0Ig", "jU", "6Gj", "46A", "5LD", "6iF", "q", "0gK",
+"1zZ", "II", "4D4", "4QU", "5oX", "5z9", "ge", "0DW", "byN", "62V", "0m2", "0xS", "225", "1E", "4x0", "4mQ", "54H", "6UB", "2nl", "1Kn",
+"C0", "VQ", "7kO", "4NM", "4AL", "60g", "YP", "L1", "0TN", "3t", "6ZC", "ae0", "4bP", "439", "zL", "8Pf", "03Z", "0b3", "aSM", "b2G",
+"73t", "664", "0iP", "Jb", "dN", "8Nd", "48Z", "4i3", "6DA", "45j", "0JL", "3oN", "GR", "R3", "5Oo", "6jm", "6el", "4PO", "0ka", "HS",
+"24f", "0EM", "5nB", "aaR", "4f2", "4sS", "8Ae", "kO", "Ec", "0fQ", "695", "aBN", "6Yd", "4lG", "0Wi", "0S", "Zw", "0yE", "4Bk", "6wH",
+"6zx", "buh", "0tu", "WG", "yk", "0ZY", "4aw", "5d7", "5k6", "4nv", "0UX", "2b", "XF", "1kU", "741", "61q", "6xI", "4Mj", "02L", "Uv",
+"2mK", "0Xh", "4cF", "6Ve", "49L", "6Hg", "eX", "41", "0hF", "Kt", "6fK", "4Sh", "5Ny", "4K9", "FD", "0ev", "0KZ", "hh", "5u4", "4pt",
+"5oT", "5z5", "gi", "1Tz", "0jw", "IE", "4D8", "4QY", "5LH", "6iJ", "Du", "0gG", "0Ik", "jY", "6Gf", "46M", "01g", "3Pm", "7kC", "4NA",
+"54D", "6UN", "xq", "1Kb", "0Vs", "1I", "a6d", "59t", "4Cq", "5F1", "d3G", "85O", "03V", "Tl", "5I0", "4Lp", "56u", "435", "2lQ", "0Yr",
+"0TB", "3x", "6ZO", "4ol", "5Qa", "60k", "2OM", "0zn", "2QO", "0dl", "5Oc", "6ja", "6DM", "45f", "1Za", "ir", "dB", "0Gp", "48V", "aco",
+"5W2", "4Rr", "94m", "Jn", "Eo", "12t", "5MR", "5X3", "aln", "47W", "0Hq", "kC", "fs", "0EA", "5nN", "6KL", "71I", "4PC", "0km", "3No",
+"Zs", "0yA", "4Bo", "63D", "7IA", "4lC", "0Wm", "0W", "yo", "8SE", "4as", "5d3", "aPn", "b1d", "00y", "WC", "XB", "1kQ", "745", "61u",
+"5k2", "4nr", "9Le", "2f", "2mO", "0Xl", "4cB", "6Va", "6xM", "4Mn", "02H", "Ur", "0hB", "Kp", "6fO", "4Sl", "49H", "6Hc", "27E", "45",
+"8BF", "hl", "518", "44x", "bdo", "aAm", "2PQ", "0er", "0js", "IA", "70W", "bkn", "5oP", "5z1", "gm", "304", "0Io", "28D", "6Gb", "46I",
+"5LL", "6iN", "y", "0gC", "5pH", "6UJ", "xu", "1Kf", "C8", "VY", "7kG", "4NE", "4Cu", "5F5", "2Mx", "1hz", "0Vw", "1M", "4x8", "4mY",
+"4bX", "431", "zD", "0Yv", "03R", "Th", "5I4", "4Lt", "4AD", "60o", "YX", "L9", "0TF", "wt", "6ZK", "4oh", "6DI", "45b", "0JD", "iv",
+"GZ", "0dh", "5Og", "6je", "5W6", "4Rv", "0iX", "Jj", "dF", "0Gt", "48R", "6Iy", "6Fx", "47S", "0Hu", "kG", "Ek", "0fY", "5MV", "5X7",
+"6ed", "4PG", "0ki", "3Nk", "fw", "0EE", "5nJ", "6KH", "356", "bo", "6OP", "4zs", "bnl", "75U", "LC", "0oq", "0bA", "As", "6lL", "4Yo",
+"43K", "7RA", "2yN", "0Lm", "17", "22G", "6Ma", "4xB", "4Vn", "6cM", "Nr", "19i", "14Y", "CB", "aDo", "bam", "41z", "5p2", "mn", "8GD",
+"7d", "8YF", "4kp", "5n0", "64w", "717", "1nS", "2KQ", "Pp", "07J", "4Hl", "69G", "6Sc", "52i", "1MO", "2hM", "5U", "0Ro", "4iA", "7LC",
+"66F", "4Gm", "08K", "3YA", "RA", "0qs", "b4f", "aUl", "5a1", "4dq", "8VG", "8e", "6mV", "4Xu", "17r", "022", "nE", "0Mw", "42Q", "4c8",
+"6NJ", "5kH", "1Pf", "cu", "MY", "X8", "4UE", "74O", "6og", "4ZD", "W9", "BX", "lt", "0OF", "4th", "6AK", "4l9", "4yX", "0Bv", "aD",
+"Oh", "0lZ", "4Wt", "5R4", "4Iv", "5L6", "Qj", "06P", "1LU", "1Y4", "463", "4gZ", "4jj", "7Oh", "rv", "0QD", "1oI", "2JK", "65m", "4DF",
+"4KG", "7nE", "2EJ", "04a", "1Nd", "2kf", "6PH", "4ek", "5xz", "492", "4O", "0Su", "09Q", "0h8", "5C7", "4Fw", "5Dz", "6ax", "LG", "0ou",
+"0AY", "bk", "6OT", "4zw", "43O", "6Bd", "2yJ", "0Li", "0bE", "Aw", "6lH", "4Yk", "4Vj", "6cI", "Nv", "0mD", "13", "22C", "6Me", "4xF",
+"4uv", "5p6", "mj", "0NX", "1pU", "CF", "6ny", "7k9", "4P9", "4EX", "1nW", "2KU", "sh", "0PZ", "4kt", "5n4", "6Sg", "4fD", "k9", "2hI",
+"Pt", "07N", "4Hh", "69C", "66B", "4Gi", "08O", "2Id", "5Q", "d8", "4iE", "7LG", "5a5", "4du", "1Oz", "8a", "RE", "0qw", "4JY", "aUh",
+"nA", "0Ms", "42U", "ail", "6mR", "4Xq", "17v", "026", "3Km", "0no", "4UA", "74K", "6NN", "5kL", "1Pb", "cq", "lp", "0OB", "40d", "6AO",
+"6oc", "5Ja", "0an", "2TM", "Ol", "18w", "4Wp", "5R0", "afm", "bCo", "0Br", "1G2", "1LQ", "1Y0", "467", "53w", "4Ir", "5L2", "Qn", "06T",
+"1oM", "2JO", "65i", "4DB", "4jn", "7Ol", "6z", "1Aa", "8WY", "2kb", "6PL", "4eo", "4KC", "7nA", "2EN", "04e", "09U", "d6E", "5C3", "4Fs",
+"bRl", "496", "4K", "0Sq", "0bI", "2Wj", "6lD", "4Yg", "43C", "6Bh", "oW", "z6", "0AU", "bg", "6OX", "5jZ", "4TW", "4A6", "LK", "0oy",
+"14Q", "CJ", "4N7", "5Kw", "41r", "542", "mf", "0NT", "u7", "22O", "6Mi", "4xJ", "4Vf", "6cE", "Nz", "0mH", "Px", "07B", "4Hd", "69O",
+"6Sk", "4fH", "k5", "2hE", "7l", "0PV", "4kx", "5n8", "4P5", "4ET", "83j", "2KY", "RI", "05s", "4JU", "7oW", "5a9", "4dy", "1Ov", "8m",
+"qU", "d4", "4iI", "7LK", "66N", "4Ge", "08C", "2Ih", "6NB", "a59", "1Pn", "21d", "MQ", "X0", "4UM", "74G", "79w", "bbN", "0cS", "0v2",
+"nM", "8Dg", "42Y", "4c0", "4l1", "4yP", "8Kf", "aL", "0y3", "0lR", "636", "76v", "6oo", "4ZL", "W1", "BP", "2zm", "0ON", "40h", "6AC",
+"4jb", "auS", "6v", "0QL", "I3", "2JC", "65e", "4DN", "b7E", "68U", "Qb", "06X", "286", "dSl", "4r3", "4gR", "4hS", "7MQ", "4G", "277",
+"09Y", "0h0", "67T", "b8D", "4KO", "7nM", "SS", "F2", "1Nl", "9w", "azR", "4ec", "43G", "6Bl", "oS", "z2", "0bM", "2Wn", "78i", "4Yc",
+"4TS", "4A2", "LO", "8fe", "0AQ", "bc", "aeN", "cPm", "41v", "546", "mb", "0NP", "14U", "CN", "4N3", "5Ks", "4Vb", "6cA", "2Xo", "0mL",
+"u3", "22K", "6Mm", "4xN", "6So", "4fL", "k1", "2hA", "2Fm", "07F", "5XA", "69K", "4P1", "4EP", "83n", "d5f", "7h", "0PR", "bQO", "a0E",
+"hbu", "50T", "1Or", "8i", "RM", "05w", "4JQ", "7oS", "66J", "4Ga", "08G", "2Il", "5Y", "d0", "4iM", "7LO", "MU", "X4", "4UI", "74C",
+"6NF", "5kD", "1Pj", "cy", "nI", "2m9", "4vU", "4c4", "6mZ", "4Xy", "0cW", "0v6", "Od", "0lV", "4Wx", "5R8", "4l5", "4yT", "0Bz", "aH",
+"lx", "0OJ", "40l", "6AG", "6ok", "4ZH", "W5", "BT", "I7", "2JG", "65a", "4DJ", "4jf", "7Od", "6r", "0QH", "1LY", "1Y8", "4r7", "4gV",
+"4Iz", "68Q", "Qf", "0rT", "1mt", "0h4", "67P", "5VZ", "4hW", "7MU", "4C", "0Sy", "1Nh", "9s", "6PD", "4eg", "4KK", "7nI", "SW", "F6",
+"8Je", "22V", "4m2", "4xS", "625", "77u", "Nc", "0mQ", "V2", "CS", "6nl", "5Kn", "41k", "7Pa", "3kO", "0NM", "0AL", "20g", "6OA", "4zb",
+"4TN", "6am", "LR", "Y3", "0bP", "Ab", "78t", "bcM", "43Z", "4b3", "oN", "8Ed", "5D", "264", "4iP", "489", "66W", "b9G", "08Z", "0i3",
+"RP", "G1", "4JL", "7oN", "6QC", "50I", "1Oo", "8t", "7u", "0PO", "4ka", "7Nc", "64f", "4EM", "H0", "963", "Pa", "0sS", "b6F", "69V",
+"478", "4fQ", "295", "dRo", "4O4", "4ZU", "15R", "BI", "le", "0OW", "40q", "551", "6Lj", "4yI", "t4", "aU", "Oy", "0lK", "4We", "6bF",
+"6mG", "4Xd", "0cJ", "2Vi", "nT", "0Mf", "4vH", "6Ck", "adI", "5kY", "1Pw", "cd", "MH", "0nz", "4UT", "7pV", "4KV", "7nT", "SJ", "04p",
+"1Nu", "9n", "6PY", "4ez", "4hJ", "7MH", "pV", "e7", "1mi", "2Hk", "67M", "4Ff", "4Ig", "68L", "2Gj", "06A", "j6", "2iF", "6Rh", "4gK",
+"5zZ", "7Oy", "6o", "0QU", "1oX", "1z9", "4Q6", "4DW", "5FZ", "6cX", "Ng", "0mU", "0Cy", "1F9", "4m6", "4xW", "41o", "7Pe", "3kK", "0NI",
+"V6", "CW", "6nh", "5Kj", "4TJ", "6ai", "LV", "Y7", "0AH", "bz", "6OE", "4zf", "4wV", "4b7", "oJ", "0Lx", "0bT", "Af", "6lY", "4Yz",
+"5B8", "4Gx", "1lw", "0i7", "qH", "0Rz", "4iT", "7LV", "6QG", "4dd", "1Ok", "8p", "RT", "G5", "4JH", "7oJ", "64b", "4EI", "H4", "2KD",
+"7q", "0PK", "4ke", "7Ng", "4s4", "4fU", "1MZ", "2hX", "Pe", "0sW", "4Hy", "5M9", "la", "0OS", "40u", "555", "4O0", "4ZQ", "15V", "BM",
+"2Yl", "0lO", "4Wa", "6bB", "6Ln", "4yM", "08", "aQ", "nP", "0Mb", "42D", "6Co", "6mC", "5HA", "0cN", "2Vm", "ML", "8gf", "4UP", "74Z",
+"adM", "bAO", "1Ps", "0U3", "1Nq", "9j", "azO", "51W", "4KR", "7nP", "SN", "04t", "09D", "2Ho", "67I", "4Fb", "4hN", "7ML", "4Z", "e3",
+"j2", "2iB", "6Rl", "4gO", "4Ic", "68H", "2Gn", "06E", "82m", "d4e", "4Q2", "4DS", "bPL", "a1F", "6k", "0QQ", "1pH", "2UJ", "6nd", "5Kf",
+"41c", "7Pi", "mw", "0NE", "0Cu", "1F5", "6Mx", "5hz", "4Vw", "5S7", "Nk", "0mY", "0bX", "Aj", "6lU", "4Yv", "43R", "6By", "oF", "0Lt",
+"0AD", "bv", "6OI", "4zj", "4TF", "6ae", "LZ", "0oh", "RX", "G9", "4JD", "7oF", "6QK", "4dh", "1Og", "2je", "5L", "0Rv", "4iX", "481",
+"5B4", "4Gt", "08R", "2Iy", "Pi", "07S", "4Hu", "5M5", "470", "4fY", "1MV", "1X7", "su", "0PG", "4ki", "7Nk", "64n", "4EE", "H8", "2KH",
+"6Lb", "4yA", "04", "23D", "Oq", "0lC", "4Wm", "6bN", "aEl", "c4G", "0as", "BA", "lm", "8FG", "40y", "559", "6NS", "5kQ", "345", "cl",
+"1k2", "0nr", "boo", "74V", "6mO", "4Xl", "0cB", "2Va", "2xM", "0Mn", "42H", "6Cc", "4hB", "aws", "4V", "0Sl", "09H", "2Hc", "67E", "4Fn",
+"b5e", "aTo", "SB", "04x", "8WD", "9f", "6PQ", "4er", "4js", "5o3", "6g", "8XE", "1oP", "1z1", "65t", "704", "4Io", "68D", "Qs", "06I",
+"1LL", "2iN", "7BA", "4gC", "41g", "7Pm", "ms", "0NA", "14D", "2UN", "aDr", "5Kb", "4Vs", "5S3", "No", "19t", "0Cq", "1F1", "agn", "bBl",
+"43V", "aho", "oB", "0Lp", "16u", "An", "6lQ", "4Yr", "4TB", "6aa", "2ZO", "0ol", "1Qa", "br", "6OM", "4zn", "6QO", "4dl", "1Oc", "8x",
+"2DM", "05f", "5Za", "7oB", "5B0", "4Gp", "08V", "d7F", "5H", "0Rr", "bSo", "485", "474", "52t", "1MR", "1X3", "Pm", "07W", "4Hq", "5M1",
+"64j", "4EA", "1nN", "2KL", "7y", "0PC", "4km", "7No", "Ou", "0lG", "4Wi", "6bJ", "6Lf", "4yE", "00", "aY", "li", "8FC", "4tu", "5q5",
+"4O8", "4ZY", "0aw", "BE", "MD", "0nv", "4UX", "74R", "6NW", "5kU", "341", "ch", "nX", "0Mj", "42L", "6Cg", "6mK", "4Xh", "0cF", "2Ve",
+"09L", "2Hg", "67A", "4Fj", "4hF", "7MD", "4R", "0Sh", "1Ny", "9b", "6PU", "4ev", "4KZ", "7nX", "SF", "0pt", "1oT", "1z5", "65p", "5Tz",
+"4jw", "5o7", "6c", "0QY", "1LH", "2iJ", "6Rd", "4gG", "4Ik", "7li", "Qw", "06M", "7F", "246", "4kR", "7NP", "64U", "col", "1nq", "0k1",
+"PR", "E3", "4HN", "69e", "6SA", "4fb", "1Mm", "2ho", "5w", "0RM", "4ic", "7La", "66d", "4GO", "J2", "2IB", "Rc", "05Y", "b4D", "aUN",
+"4q2", "4dS", "8Ve", "8G", "8Hg", "bM", "4o0", "4zQ", "607", "75w", "La", "0oS", "T0", "AQ", "6ln", "4YM", "43i", "6BB", "2yl", "0LO",
+"0CN", "22e", "6MC", "5hA", "4VL", "6co", "NP", "0mb", "1ps", "0u3", "aDM", "baO", "41X", "7PR", "mL", "8Gf", "4IT", "7lV", "QH", "06r",
+"1Lw", "0I7", "5b8", "4gx", "4jH", "7OJ", "rT", "g5", "1ok", "2Ji", "65O", "4Dd", "4Ke", "7ng", "Sy", "04C", "h4", "2kD", "6Pj", "4eI",
+"4hy", "5m9", "4m", "0SW", "09s", "2HX", "4S4", "4FU", "4M6", "4XW", "0cy", "1f9", "ng", "0MU", "42s", "573", "6Nh", "5kj", "v6", "cW",
+"3KK", "0nI", "4Ug", "74m", "6oE", "4Zf", "0aH", "Bz", "lV", "y7", "40B", "6Ai", "582", "4yz", "0BT", "af", "OJ", "0lx", "4WV", "4B7",
+"64Q", "4Ez", "1nu", "0k5", "7B", "0Px", "4kV", "7NT", "6SE", "4ff", "1Mi", "2hk", "PV", "E7", "4HJ", "69a", "6rh", "4GK", "J6", "2IF",
+"5s", "0RI", "4ig", "7Le", "4q6", "4dW", "1OX", "8C", "Rg", "0qU", "5ZZ", "7oy", "4Ty", "5Q9", "Le", "0oW", "1QZ", "bI", "4o4", "4zU",
+"43m", "6BF", "oy", "0LK", "T4", "AU", "6lj", "4YI", "4VH", "6ck", "NT", "0mf", "0CJ", "22a", "6MG", "4xd", "4uT", "7PV", "mH", "0Nz",
+"1pw", "Cd", "aDI", "5KY", "1Ls", "0I3", "axM", "53U", "4IP", "7lR", "QL", "06v", "1oo", "2Jm", "65K", "5TA", "4jL", "7ON", "6X", "g1",
+"h0", "9Y", "6Pn", "4eM", "4Ka", "7nc", "2El", "04G", "09w", "d6g", "4S0", "4FQ", "bRN", "a3D", "4i", "0SS", "nc", "0MQ", "42w", "577",
+"4M2", "4XS", "17T", "dlm", "3KO", "0nM", "4Uc", "74i", "6Nl", "5kn", "v2", "cS", "lR", "y3", "40F", "6Am", "6oA", "4Zb", "0aL", "2To",
+"ON", "18U", "4WR", "4B3", "586", "bCM", "0BP", "ab", "PZ", "0sh", "4HF", "69m", "6SI", "4fj", "1Me", "2hg", "7N", "0Pt", "4kZ", "7NX",
+"6pU", "4Ev", "1ny", "0k9", "Rk", "05Q", "4Jw", "5O7", "452", "50r", "1OT", "8O", "qw", "0RE", "4ik", "7Li", "66l", "4GG", "08a", "2IJ",
+"T8", "AY", "6lf", "4YE", "43a", "6BJ", "ou", "0LG", "0Aw", "bE", "4o8", "4zY", "4Tu", "5Q5", "Li", "8fC", "14s", "Ch", "6nW", "5KU",
+"41P", "7PZ", "mD", "0Nv", "0CF", "22m", "6MK", "4xh", "4VD", "6cg", "NX", "0mj", "5za", "7OB", "6T", "0Qn", "1oc", "2Ja", "65G", "4Dl",
+"b7g", "68w", "1w2", "06z", "8UF", "dSN", "5b0", "4gp", "4hq", "5m1", "4e", "8ZG", "1mR", "1x3", "67v", "726", "4Km", "7no", "Sq", "04K",
+"1NN", "9U", "6Pb", "4eA", "adr", "5kb", "26", "21F", "Ms", "0nA", "4Uo", "74e", "79U", "bbl", "0cq", "1f1", "no", "396", "4vs", "5s3",
+"6LQ", "4yr", "367", "an", "OB", "0lp", "bmm", "76T", "6oM", "4Zn", "15i", "Br", "2zO", "0Ol", "40J", "6Aa", "6SM", "4fn", "1Ma", "2hc",
+"2FO", "07d", "4HB", "69i", "64Y", "4Er", "83L", "d5D", "7J", "0Pp", "bQm", "a0g", "456", "50v", "1OP", "8K", "Ro", "05U", "4Js", "5O3",
+"66h", "4GC", "08e", "2IN", "qs", "0RA", "4io", "7Lm", "43e", "6BN", "oq", "0LC", "0bo", "2WL", "6lb", "4YA", "4Tq", "5Q1", "Lm", "8fG",
+"0As", "bA", "ael", "cPO", "41T", "ajm", "1K2", "0Nr", "14w", "Cl", "6nS", "5KQ", "5Fa", "6cc", "2XM", "0mn", "0CB", "22i", "6MO", "4xl",
+"1og", "2Je", "65C", "4Dh", "4jD", "7OF", "6P", "g9", "3l9", "2iy", "5b4", "4gt", "4IX", "68s", "QD", "0rv", "1mV", "1x7", "4S8", "4FY",
+"4hu", "5m5", "4a", "1Cz", "h8", "9Q", "6Pf", "4eE", "4Ki", "7nk", "Su", "04O", "Mw", "0nE", "4Uk", "74a", "6Nd", "5kf", "22", "21B",
+"nk", "0MY", "4vw", "5s7", "6mx", "5Hz", "0cu", "1f5", "OF", "0lt", "4WZ", "6by", "6LU", "4yv", "0BX", "aj", "lZ", "0Oh", "40N", "6Ae",
+"6oI", "4Zj", "0aD", "Bv", "5f", "9Ke", "4ir", "5l2", "66u", "735", "08x", "1y0", "Rr", "05H", "4Jn", "7ol", "6Qa", "4dB", "1OM", "8V",
+"7W", "0Pm", "4kC", "7NA", "64D", "4Eo", "83Q", "2Kb", "PC", "07y", "b6d", "69t", "5c3", "4fs", "8TE", "dRM", "374", "22t", "599", "4xq",
+"bln", "77W", "NA", "0ms", "14j", "Cq", "6nN", "5KL", "41I", "7PC", "3km", "0No", "35", "20E", "6Oc", "5ja", "4Tl", "6aO", "Lp", "0oB",
+"0br", "1g2", "78V", "bco", "43x", "568", "ol", "385", "4Kt", "5N4", "Sh", "04R", "1NW", "9L", "441", "4eX", "4hh", "7Mj", "pt", "0SF",
+"K9", "2HI", "67o", "4FD", "4IE", "68n", "QY", "0", "1Lf", "2id", "6RJ", "4gi", "4jY", "auh", "6M", "0Qw", "1oz", "2Jx", "5A5", "4Du",
+"6oT", "4Zw", "0aY", "Bk", "lG", "0Ou", "40S", "6Ax", "6LH", "4yk", "0BE", "aw", "2YJ", "0li", "4WG", "6bd", "6me", "4XF", "0ch", "2VK",
+"nv", "0MD", "42b", "6CI", "6Ny", "7K9", "1PU", "cF", "Mj", "0nX", "4Uv", "5P6", "66q", "4GZ", "1lU", "1y4", "5b", "0RX", "4iv", "5l6",
+"6Qe", "4dF", "1OI", "8R", "Rv", "05L", "4Jj", "7oh", "6pH", "4Ek", "1nd", "2Kf", "7S", "0Pi", "4kG", "7NE", "5c7", "4fw", "1Mx", "0H8",
+"PG", "0su", "5Xz", "69p", "4VY", "4C8", "NE", "0mw", "1Sz", "22p", "6MV", "4xu", "41M", "7PG", "mY", "x8", "14n", "Cu", "6nJ", "5KH",
+"4Th", "6aK", "Lt", "0oF", "31", "bX", "6Og", "4zD", "4wt", "5r4", "oh", "0LZ", "0bv", "AD", "4L9", "4YX", "1NS", "9H", "445", "51u",
+"4Kp", "5N0", "Sl", "04V", "09f", "2HM", "67k", "5Va", "4hl", "7Mn", "4x", "0SB", "1Lb", "3yA", "6RN", "4gm", "4IA", "68j", "2GL", "4",
+"82O", "d4G", "5A1", "4Dq", "bPn", "a1d", "6I", "0Qs", "lC", "0Oq", "40W", "akn", "6oP", "4Zs", "15t", "Bo", "2YN", "0lm", "4WC", "76I",
+"6LL", "4yo", "0BA", "as", "nr", "8DX", "42f", "6CM", "6ma", "4XB", "0cl", "2VO", "Mn", "8gD", "4Ur", "5P2", "ado", "bAm", "1PQ", "cB",
+"Rz", "0qH", "4Jf", "7od", "6Qi", "4dJ", "i7", "2jG", "5n", "0RT", "4iz", "7Lx", "4R7", "4GV", "08p", "1y8", "PK", "07q", "4HW", "7mU",
+"6SX", "52R", "1Mt", "0H4", "sW", "f6", "4kK", "7NI", "64L", "4Eg", "1nh", "2Kj", "14b", "Cy", "6nF", "5KD", "41A", "7PK", "mU", "x4",
+"0CW", "0V6", "591", "4xy", "4VU", "4C4", "NI", "19R", "0bz", "AH", "4L5", "4YT", "43p", "560", "od", "0LV", "w5", "bT", "6Ok", "4zH",
+"4Td", "6aG", "Lx", "0oJ", "5xA", "7Mb", "4t", "0SN", "K1", "2HA", "67g", "4FL", "b5G", "aTM", "0e3", "04Z", "8Wf", "9D", "449", "4eP",
+"4jQ", "7OS", "6E", "255", "1or", "0j2", "65V", "cno", "4IM", "68f", "QQ", "8", "1Ln", "2il", "6RB", "4ga", "afR", "4yc", "0BM", "23f",
+"OS", "Z2", "4WO", "6bl", "aEN", "c4e", "0aQ", "Bc", "lO", "8Fe", "4tS", "4a2", "4n3", "5ks", "8Id", "cN", "Mb", "0nP", "614", "74t",
+"6mm", "4XN", "U3", "2VC", "2xo", "0ML", "42j", "6CA", "6Qm", "4dN", "i3", "8Z", "2Do", "05D", "4Jb", "aUS", "4R3", "4GR", "08t", "d7d",
+"5j", "0RP", "bSM", "a2G", "ayN", "52V", "1Mp", "0H0", "PO", "07u", "4HS", "69x", "64H", "4Ec", "1nl", "2Kn", "sS", "f2", "4kO", "7NM",
+"41E", "7PO", "mQ", "x0", "14f", "2Ul", "6nB", "aQ1", "4VQ", "4C0", "NM", "19V", "0CS", "0V2", "595", "bBN", "43t", "564", "0Y3", "0LR",
+"16W", "AL", "4L1", "4YP", "5DA", "6aC", "2Zm", "0oN", "39", "bP", "6Oo", "4zL", "K5", "2HE", "67c", "4FH", "4hd", "7Mf", "4p", "0SJ",
+"8Wb", "2kY", "4p5", "4eT", "4Kx", "5N8", "Sd", "0pV", "1ov", "0j6", "5A9", "4Dy", "4jU", "7OW", "6A", "1AZ", "1Lj", "2ih", "6RF", "4ge",
+"4II", "68b", "QU", "D4", "OW", "Z6", "4WK", "6bh", "6LD", "4yg", "0BI", "23b", "lK", "0Oy", "4tW", "4a6", "6oX", "5JZ", "0aU", "Bg",
+"Mf", "0nT", "4Uz", "74p", "4n7", "5kw", "1PY", "cJ", "nz", "0MH", "42n", "6CE", "6mi", "4XJ", "U7", "2VG", "4MP", "4X1", "UL", "02v",
+"0XR", "0M3", "a8E", "57U", "4nL", "7KN", "2X", "c1", "1ko", "2Nm", "61K", "5PA", "4Oa", "6zB", "2Al", "00G", "l0", "yQ", "6Tn", "4aM",
+"58T", "a7D", "0i", "0WS", "84o", "ZM", "4W0", "4BQ", "4I2", "5Lr", "13T", "G", "jc", "0IQ", "46w", "537", "6Jl", "5on", "r2", "gS",
+"3OO", "0jM", "4Qc", "70i", "6kA", "5NC", "0eL", "2Po", "hR", "8Bx", "44F", "6Em", "abO", "49v", "0FP", "eb", "KN", "8ad", "4SR", "4F3",
+"3B", "0Tx", "4oV", "4z7", "60Q", "4Az", "0zT", "Yf", "TV", "A7", "4LJ", "6yi", "6WE", "4bf", "0YH", "zz", "1s", "0VI", "4mg", "6XD",
+"6vh", "4CK", "N6", "2MF", "Vg", "0uU", "6n9", "7ky", "4u6", "5pv", "1KX", "xK", "1UZ", "fI", "4k4", "5nt", "4Py", "5U9", "He", "0kW",
+"P4", "EU", "6hj", "5Mh", "47m", "6FF", "ky", "0HK", "0GJ", "dx", "6IG", "48l", "4RH", "6gk", "JT", "0if", "0dV", "Gd", "5Z8", "5OY",
+"4qT", "4d5", "iH", "0Jz", "0XV", "0M7", "5f8", "4cx", "4MT", "4X5", "UH", "02r", "1kk", "Xx", "61O", "5PE", "4nH", "7KJ", "vT", "c5",
+"l4", "yU", "6Tj", "4aI", "4Oe", "6zF", "Wy", "00C", "1iZ", "ZI", "4W4", "4BU", "4ly", "5i9", "0m", "0WW", "jg", "0IU", "46s", "533",
+"4I6", "5Lv", "0gy", "C", "3OK", "0jI", "4Qg", "6dD", "6Jh", "5oj", "r6", "gW", "hV", "0Kd", "44B", "6Ei", "6kE", "5NG", "0eH", "Fz",
+"KJ", "0hx", "4SV", "4F7", "6HY", "49r", "0FT", "ef", "60U", "ckl", "0zP", "Yb", "3F", "206", "4oR", "4z3", "6WA", "4bb", "0YL", "2lo",
+"TR", "A3", "4LN", "6ym", "62d", "4CO", "N2", "2MB", "1w", "0VM", "4mc", "7Ha", "4u2", "54z", "8Re", "xO", "Vc", "01Y", "b0D", "aQN",
+"647", "71w", "Ha", "0kS", "8Lg", "fM", "4k0", "5np", "47i", "6FB", "29d", "0HO", "P0", "EQ", "6hn", "5Ml", "4RL", "6go", "JP", "0ib",
+"0GN", "26e", "6IC", "48h", "45X", "4d1", "iL", "8Cf", "0dR", "0q3", "hYt", "beO", "4nD", "7KF", "2P", "c9", "1kg", "Xt", "61C", "5PI",
+"4MX", "4X9", "UD", "0vv", "0XZ", "2my", "5f4", "4ct", "4lu", "5i5", "0a", "1Gz", "0yw", "ZE", "4W8", "4BY", "4Oi", "6zJ", "Wu", "00O",
+"l8", "yY", "6Tf", "4aE", "6Jd", "5of", "62", "25B", "Iw", "0jE", "4Qk", "6dH", "6ix", "5Lz", "0gu", "O", "jk", "0IY", "4rw", "5w7",
+"5x6", "5mW", "0FX", "ej", "KF", "0ht", "4SZ", "6fy", "6kI", "5NK", "0eD", "Fv", "hZ", "93", "44N", "6Ee", "2BO", "03d", "4LB", "6ya",
+"6WM", "4bn", "1Ia", "zr", "3J", "0Tp", "bUm", "a4g", "5D2", "4Ar", "87L", "Yn", "Vo", "01U", "4Ns", "5K3", "416", "54v", "1KP", "xC",
+"us", "0VA", "4mo", "6XL", "62h", "4CC", "0xm", "2MN", "0fo", "2SL", "6hb", "bgr", "47e", "6FN", "kq", "0HC", "0Es", "fA", "aal", "bDn",
+"4Pq", "5U1", "Hm", "8bG", "10w", "Gl", "5Z0", "5OQ", "45T", "anm", "1O2", "0Jr", "0GB", "dp", "6IO", "48d", "5Ba", "6gc", "3Ll", "0in",
+"1kc", "Xp", "61G", "5PM", "bTs", "7KB", "2T", "0Un", "8QF", "39T", "5f0", "4cp", "797", "aRm", "1s2", "02z", "0ys", "ZA", "63v", "766",
+"4lq", "5i1", "0e", "9Nf", "0Zo", "2oL", "6Tb", "4aA", "4Om", "6zN", "Wq", "00K", "Is", "0jA", "4Qo", "6dL", "7ZA", "5ob", "66", "25F",
+"jo", "9Pd", "4rs", "5w3", "aCn", "bfl", "0gq", "K", "KB", "0hp", "bim", "72T", "5x2", "49z", "327", "en", "3nn", "97", "44J", "6Ea",
+"6kM", "5NO", "11i", "Fr", "6WI", "4bj", "0YD", "zv", "TZ", "0wh", "4LF", "6ye", "5D6", "4Av", "0zX", "Yj", "3N", "0Tt", "4oZ", "6Zy",
+"412", "54r", "1KT", "xG", "Vk", "01Q", "4Nw", "5K7", "62l", "4CG", "0xi", "2MJ", "uw", "0VE", "4mk", "6XH", "47a", "6FJ", "ku", "0HG",
+"P8", "EY", "6hf", "5Md", "4Pu", "5U5", "Hi", "8bC", "0Ew", "fE", "4k8", "5nx", "45P", "4d9", "iD", "0Jv", "0dZ", "Gh", "5Z4", "5OU",
+"4RD", "6gg", "JX", "0ij", "0GF", "dt", "6IK", "5lI", "4Op", "5J0", "Wl", "00V", "0Zr", "2oQ", "405", "55u", "4ll", "6YO", "0x", "0WB",
+"0yn", "2LM", "63k", "5Ra", "4MA", "6xb", "2CL", "02g", "0XC", "39I", "6VN", "4cm", "bTn", "a5d", "2I", "0Us", "86O", "Xm", "5E1", "5PP",
+"6kP", "5NR", "11t", "Fo", "hC", "0Kq", "44W", "aon", "6HL", "49g", "0FA", "es", "3Mo", "0hm", "4SC", "72I", "6ia", "5Lc", "0gl", "V",
+"jr", "1Ya", "46f", "6GM", "hyV", "bEm", "0Dp", "gB", "In", "8cD", "4Qr", "5T2", "1b", "0VX", "4mv", "5h6", "62q", "4CZ", "0xt", "2MW",
+"Vv", "01L", "4Nj", "7kh", "6Ue", "54o", "1KI", "xZ", "3S", "0Ti", "4oG", "6Zd", "6tH", "4Ak", "0zE", "Yw", "TG", "0wu", "780", "6yx",
+"5g7", "4bw", "0YY", "zk", "1Wz", "di", "5y5", "5lT", "4RY", "4G8", "JE", "0iw", "0dG", "Gu", "6jJ", "5OH", "45M", "6Df", "iY", "80",
+"71", "fX", "6Kg", "5ne", "4Ph", "6eK", "Ht", "0kF", "0fv", "ED", "4H9", "5My", "4st", "5v4", "kh", "0HZ", "0Zv", "yD", "401", "4aX",
+"4Ot", "5J4", "Wh", "00R", "O9", "ZX", "63o", "4BD", "4lh", "6YK", "tt", "0WF", "0XG", "2md", "6VJ", "4ci", "4ME", "6xf", "UY", "02c",
+"1kz", "Xi", "5E5", "5PT", "4nY", "aqh", "2M", "0Uw", "hG", "0Ku", "44S", "6Ex", "6kT", "5NV", "0eY", "Fk", "3Mk", "0hi", "4SG", "6fd",
+"6HH", "49c", "0FE", "ew", "jv", "0ID", "46b", "6GI", "6ie", "5Lg", "0gh", "R", "Ij", "0jX", "4Qv", "5T6", "6Jy", "7O9", "0Dt", "gF",
+"62u", "775", "0xp", "198", "1f", "9Oe", "4mr", "5h2", "6Ua", "54k", "1KM", "2nO", "Vr", "01H", "4Nn", "7kl", "60D", "4Ao", "0zA", "Ys",
+"3W", "0Tm", "4oC", "7JA", "5g3", "4bs", "8PE", "zo", "TC", "03y", "784", "aSn", "bhn", "73W", "JA", "0is", "334", "dm", "5y1", "48y",
+"45I", "6Db", "3om", "84", "0dC", "Gq", "6jN", "5OL", "4Pl", "6eO", "Hp", "0kB", "75", "24E", "6Kc", "5na", "47x", "528", "kl", "8AF",
+"0fr", "1c2", "aBm", "bgo", "4ld", "6YG", "0p", "0WJ", "O5", "ZT", "63c", "4BH", "4Ox", "5J8", "Wd", "0tV", "0Zz", "yH", "4t5", "4aT",
+"4nU", "7KW", "2A", "1EZ", "1kv", "Xe", "5E9", "5PX", "4MI", "6xj", "UU", "02o", "0XK", "2mh", "6VF", "4ce", "6HD", "49o", "0FI", "27b",
+"KW", "0he", "4SK", "6fh", "6kX", "5NZ", "0eU", "Fg", "hK", "0Ky", "4pW", "4e6", "4j7", "5ow", "0Dx", "gJ", "If", "0jT", "4Qz", "6dY",
+"6ii", "5Lk", "Q7", "DV", "jz", "0IH", "46n", "6GE", "3PN", "01D", "4Nb", "aQS", "6Um", "54g", "m3", "xR", "1j", "0VP", "59W", "a6G",
+"4V3", "4CR", "85l", "194", "TO", "03u", "4LS", "4Y2", "a9F", "56V", "0YQ", "zc", "wS", "b2", "4oO", "6Zl", "60H", "4Ac", "0zM", "2On",
+"0dO", "2Ql", "6jB", "aU1", "45E", "6Dn", "iQ", "88", "0GS", "da", "acL", "48u", "4RQ", "4G0", "JM", "94N", "12W", "EL", "4H1", "5Mq",
+"47t", "524", "29y", "0HR", "79", "fP", "6Ko", "5nm", "aZ0", "6eC", "3NL", "0kN", "O1", "ZP", "63g", "4BL", "58I", "6YC", "0t", "0WN",
+"8Sf", "yL", "409", "4aP", "b1G", "aPM", "0a3", "00Z", "1kr", "Xa", "61V", "bzN", "4nQ", "7KS", "2E", "215", "0XO", "2ml", "6VB", "4ca",
+"4MM", "6xn", "UQ", "02k", "KS", "0ha", "4SO", "6fl", "7Xa", "49k", "0FM", "27f", "hO", "8Be", "4pS", "4e2", "aAN", "bdL", "0eQ", "Fc",
+"Ib", "0jP", "654", "70t", "4j3", "5os", "8Md", "gN", "28g", "0IL", "46j", "6GA", "6im", "5Lo", "Q3", "Z", "6Ui", "54c", "m7", "xV",
+"Vz", "0uH", "4Nf", "7kd", "4V7", "4CV", "0xx", "190", "1n", "0VT", "4mz", "6XY", "6WX", "56R", "0YU", "zg", "TK", "03q", "4LW", "4Y6",
+"60L", "4Ag", "0zI", "2Oj", "wW", "b6", "4oK", "6Zh", "45A", "6Dj", "iU", "0Jg", "0dK", "Gy", "6jF", "5OD", "4RU", "4G4", "JI", "1yZ",
+"0GW", "de", "5y9", "48q", "47p", "520", "kd", "0HV", "0fz", "EH", "4H5", "5Mu", "4Pd", "6eG", "Hx", "0kJ", "s5", "fT", "6Kk", "5ni",
+"5Y1", "5LP", "13v", "e", "jA", "0Is", "46U", "aml", "6JN", "5oL", "0DC", "gq", "3Om", "0jo", "4QA", "6db", "6kc", "5Na", "0en", "2PM",
+"hp", "0KB", "44d", "6EO", "abm", "49T", "0Fr", "1C2", "Kl", "8aF", "4Sp", "5V0", "4Mr", "5H2", "Un", "02T", "0Xp", "2mS", "427", "57w",
+"4nn", "7Kl", "2z", "1Ea", "1kM", "2NO", "61i", "5Pc", "4OC", "7jA", "2AN", "00e", "0ZA", "ys", "6TL", "4ao", "58v", "a7f", "0K", "0Wq",
+"84M", "Zo", "5G3", "4Bs", "0EY", "fk", "6KT", "5nV", "bjh", "6ex", "HG", "0ku", "0fE", "Ew", "6hH", "5MJ", "47O", "6Fd", "29B", "0Hi",
+"53", "dZ", "6Ie", "48N", "4Rj", "6gI", "Jv", "0iD", "0dt", "GF", "6jy", "7o9", "4qv", "5t6", "ij", "0JX", "wh", "0TZ", "4ot", "5j4",
+"4T9", "4AX", "0zv", "YD", "Tt", "03N", "4Lh", "6yK", "6Wg", "4bD", "o9", "zX", "1Q", "0Vk", "4mE", "6Xf", "62B", "4Ci", "0xG", "2Md",
+"VE", "0uw", "4NY", "aQh", "5e5", "5pT", "1Kz", "xi", "jE", "0Iw", "46Q", "4g8", "5Y5", "5LT", "13r", "a", "IY", "0jk", "4QE", "6df",
+"6JJ", "5oH", "0DG", "gu", "ht", "0KF", "4ph", "6EK", "6kg", "5Ne", "S9", "FX", "Kh", "0hZ", "4St", "5V4", "4h9", "49P", "0Fv", "eD",
+"0Xt", "2mW", "423", "4cZ", "4Mv", "5H6", "Uj", "02P", "1kI", "XZ", "61m", "5Pg", "4nj", "7Kh", "vv", "0UD", "0ZE", "yw", "6TH", "4ak",
+"4OG", "6zd", "2AJ", "00a", "0yY", "Zk", "5G7", "4Bw", "58r", "6Yx", "0O", "0Wu", "bjl", "71U", "HC", "0kq", "316", "fo", "6KP", "5nR",
+"47K", "7VA", "29F", "0Hm", "0fA", "Es", "6hL", "5MN", "4Rn", "6gM", "Jr", "1ya", "57", "26G", "6Ia", "48J", "45z", "5t2", "in", "8CD",
+"0dp", "GB", "hYV", "bem", "60w", "757", "0zr", "2OQ", "3d", "9Mg", "4op", "5j0", "6Wc", "56i", "0Yn", "2lM", "Tp", "03J", "4Ll", "6yO",
+"62F", "4Cm", "0xC", "dwS", "1U", "0Vo", "4mA", "6Xb", "5e1", "54X", "8RG", "xm", "VA", "0us", "b0f", "aQl", "6JF", "5oD", "0DK", "gy",
+"IU", "0jg", "4QI", "6dj", "5Y9", "5LX", "0gW", "m", "jI", "1YZ", "4rU", "4g4", "4h5", "5mu", "0Fz", "eH", "Kd", "0hV", "4Sx", "5V8",
+"6kk", "5Ni", "S5", "FT", "hx", "0KJ", "44l", "6EG", "4nf", "7Kd", "2r", "0UH", "M7", "XV", "61a", "5Pk", "4Mz", "6xY", "Uf", "0vT",
+"0Xx", "39r", "4v7", "4cV", "4lW", "4y6", "0C", "0Wy", "0yU", "Zg", "63P", "5RZ", "4OK", "6zh", "WW", "B6", "0ZI", "2oj", "6TD", "4ag",
+"0fM", "2Sn", "7xa", "5MB", "47G", "6Fl", "kS", "0Ha", "0EQ", "fc", "aaN", "bDL", "4PS", "4E2", "HO", "8be", "10U", "GN", "4J3", "5Os",
+"45v", "506", "ib", "0JP", "q3", "dR", "6Im", "48F", "4Rb", "6gA", "3LN", "0iL", "2Bm", "03F", "aF0", "6yC", "6Wo", "4bL", "o1", "zP",
+"3h", "0TR", "bUO", "a4E", "4T1", "4AP", "87n", "YL", "VM", "01w", "4NQ", "7kS", "hfu", "54T", "1Kr", "xa", "1Y", "0Vc", "4mM", "6Xn",
+"62J", "4Ca", "0xO", "2Ml", "IQ", "0jc", "4QM", "6dn", "6JB", "a19", "0DO", "25d", "jM", "9PF", "46Y", "4g0", "aCL", "687", "0gS", "i",
+"3MP", "0hR", "676", "72v", "4h1", "49X", "8Of", "eL", "3nL", "0KN", "44h", "6EC", "6ko", "5Nm", "S1", "FP", "M3", "XR", "61e", "5Po",
+"4nb", "aqS", "2v", "0UL", "8Qd", "39v", "4v3", "4cR", "b3E", "aRO", "Ub", "02X", "0yQ", "Zc", "63T", "bxL", "4lS", "4y2", "0G", "237",
+"0ZM", "2on", "7Da", "4ac", "4OO", "6zl", "WS", "B2", "47C", "6Fh", "kW", "0He", "0fI", "2Sj", "6hD", "5MF", "4PW", "4E6", "HK", "0ky",
+"0EU", "fg", "6KX", "5nZ", "45r", "502", "if", "0JT", "0dx", "GJ", "4J7", "5Ow", "4Rf", "6gE", "Jz", "0iH", "q7", "dV", "6Ii", "48B",
+"6Wk", "4bH", "o5", "zT", "Tx", "03B", "4Ld", "6yG", "4T5", "4AT", "0zz", "YH", "3l", "0TV", "4ox", "5j8", "5e9", "54P", "1Kv", "xe",
+"VI", "01s", "4NU", "7kW", "62N", "4Ce", "0xK", "2Mh", "uU", "0Vg", "4mI", "6Xj", "4K0", "5Np", "11V", "FM", "ha", "0KS", "44u", "515",
+"6Hn", "49E", "48", "eQ", "3MM", "0hO", "4Sa", "6fB", "6iC", "5LA", "0gN", "t", "jP", "0Ib", "46D", "6Go", "hyt", "bEO", "0DR", "0Q3",
+"IL", "8cf", "4QP", "4D1", "4OR", "4Z3", "WN", "00t", "0ZP", "yb", "hgv", "55W", "4lN", "6Ym", "0Z", "a3", "0yL", "2Lo", "63I", "4Bb",
+"4Mc", "7ha", "2Cn", "02E", "n2", "2mB", "6Vl", "4cO", "bTL", "a5F", "2k", "0UQ", "86m", "XO", "4U2", "5Pr", "0Gy", "dK", "4i6", "5lv",
+"5BZ", "6gX", "Jg", "0iU", "R6", "GW", "6jh", "5Oj", "45o", "6DD", "3oK", "0JI", "0EH", "fz", "6KE", "5nG", "4PJ", "6ei", "HV", "0kd",
+"0fT", "Ef", "6hY", "690", "4sV", "4f7", "kJ", "0Hx", "uH", "0Vz", "4mT", "4x5", "5F8", "4Cx", "0xV", "0m7", "VT", "C5", "4NH", "7kJ",
+"6UG", "54M", "1Kk", "xx", "3q", "0TK", "4oe", "6ZF", "60b", "4AI", "L4", "YU", "Te", "0wW", "4Ly", "5I9", "4w4", "4bU", "1IZ", "zI",
+"he", "0KW", "44q", "511", "4K4", "5Nt", "11R", "FI", "Ky", "0hK", "4Se", "6fF", "6Hj", "49A", "p4", "eU", "jT", "0If", "4rH", "6Gk",
+"6iG", "5LE", "0gJ", "p", "IH", "0jz", "4QT", "4D5", "5z8", "5oY", "0DV", "gd", "0ZT", "yf", "6TY", "4az", "4OV", "4Z7", "WJ", "00p",
+"0yH", "Zz", "63M", "4Bf", "4lJ", "6Yi", "tV", "a7", "n6", "2mF", "6Vh", "4cK", "4Mg", "6xD", "2Cj", "02A", "1kX", "XK", "4U6", "5Pv",
+"6N9", "7Ky", "2o", "0UU", "665", "73u", "Jc", "0iQ", "8Ne", "dO", "4i2", "5lr", "45k", "7Ta", "3oO", "0JM", "R2", "GS", "6jl", "5On",
+"4PN", "6em", "HR", "8bx", "0EL", "24g", "6KA", "5nC", "47Z", "4f3", "kN", "8Ad", "0fP", "Eb", "aBO", "694", "62W", "byO", "0xR", "0m3",
+"1D", "224", "4mP", "4x1", "6UC", "54I", "1Ko", "2nm", "VP", "C1", "4NL", "7kN", "60f", "4AM", "L0", "YQ", "3u", "0TO", "4oa", "6ZB",
+"438", "4bQ", "8Pg", "zM", "Ta", "0wS", "b2F", "aSL", "6Hf", "49M", "40", "eY", "Ku", "0hG", "4Si", "6fJ", "4K8", "5Nx", "0ew", "FE",
+"hi", "8BC", "4pu", "5u5", "5z4", "5oU", "0DZ", "gh", "ID", "0jv", "4QX", "4D9", "6iK", "5LI", "0gF", "Dt", "jX", "0Ij", "46L", "6Gg",
+"4lF", "6Ye", "0R", "0Wh", "0yD", "Zv", "63A", "4Bj", "4OZ", "6zy", "WF", "0tt", "0ZX", "yj", "5d6", "4av", "4nw", "5k7", "2c", "0UY",
+"1kT", "XG", "61p", "5Pz", "4Mk", "6xH", "Uw", "02M", "0Xi", "2mJ", "6Vd", "4cG", "0dm", "2QN", "7zA", "5Ob", "45g", "6DL", "is", "0JA",
+"0Gq", "dC", "acn", "48W", "4Rs", "5W3", "Jo", "94l", "12u", "En", "5X2", "5MS", "47V", "alo", "kB", "0Hp", "1Ua", "fr", "6KM", "5nO",
+"4PB", "6ea", "3Nn", "0kl", "3Pl", "01f", "bts", "7kB", "6UO", "54E", "1Kc", "xp", "1H", "0Vr", "59u", "a6e", "5F0", "4Cp", "85N", "d3F",
+"Tm", "03W", "4Lq", "5I1", "434", "56t", "0Ys", "zA", "3y", "0TC", "4om", "6ZN", "60j", "4AA", "0zo", "2OL", "Kq", "0hC", "4Sm", "6fN",
+"6Hb", "49I", "44", "27D", "hm", "8BG", "44y", "519", "aAl", "bdn", "0es", "FA", "1o2", "0jr", "bko", "70V", "5z0", "5oQ", "305", "gl",
+"28E", "0In", "46H", "6Gc", "6iO", "5LM", "0gB", "x", "1ia", "Zr", "63E", "4Bn", "4lB", "6Ya", "0V", "0Wl", "8SD", "yn", "5d2", "4ar",
+"b1e", "aPo", "WB", "00x", "1kP", "XC", "61t", "744", "4ns", "5k3", "2g", "9Ld", "0Xm", "2mN", "7FA", "4cC", "4Mo", "6xL", "Us", "02I",
+"45c", "6DH", "iw", "0JE", "0di", "2QJ", "6jd", "5Of", "4Rw", "5W7", "Jk", "0iY", "0Gu", "dG", "6Ix", "48S", "47R", "6Fy", "kF", "0Ht",
+"0fX", "Ej", "5X6", "5MW", "4PF", "6ee", "HZ", "0kh", "0ED", "fv", "6KI", "5nK", "6UK", "54A", "1Kg", "xt", "VX", "C9", "4ND", "7kF",
+"5F4", "4Ct", "0xZ", "2My", "1L", "0Vv", "4mX", "4x9", "430", "4bY", "0Yw", "zE", "Ti", "03S", "4Lu", "5I5", "60n", "4AE", "L8", "YY",
+"wu", "0TG", "4oi", "6ZJ" };
+
+
+#endif
+
diff --git a/src/crc64.c b/src/crc64.c
new file mode 100644
index 0000000..73e0391
--- /dev/null
+++ b/src/crc64.c
@@ -0,0 +1,161 @@
+/* Copyright (c) 2014, Matt Stancliff <matt@genges.com>
+ * Copyright (c) 2020, Amazon Web Services
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE. */
+
+#include "crc64.h"
+#include "crcspeed.h"
+static uint64_t crc64_table[8][256] = {{0}};
+
+#define POLY UINT64_C(0xad93d23594c935a9)
+/******************** BEGIN GENERATED PYCRC FUNCTIONS ********************/
+/**
+ * Generated on Sun Dec 21 14:14:07 2014,
+ * by pycrc v0.8.2, https://www.tty1.net/pycrc/
+ *
+ * LICENSE ON GENERATED CODE:
+ * ==========================
+ * As of version 0.6, pycrc is released under the terms of the MIT licence.
+ * The code generated by pycrc is not considered a substantial portion of the
+ * software, therefore the author of pycrc will not claim any copyright on
+ * the generated code.
+ * ==========================
+ *
+ * CRC configuration:
+ * Width = 64
+ * Poly = 0xad93d23594c935a9
+ * XorIn = 0xffffffffffffffff
+ * ReflectIn = True
+ * XorOut = 0x0000000000000000
+ * ReflectOut = True
+ * Algorithm = bit-by-bit-fast
+ *
+ * Modifications after generation (by matt):
+ * - included finalize step in-line with update for single-call generation
+ * - re-worked some inner variable architectures
+ * - adjusted function parameters to match expected prototypes.
+ *****************************************************************************/
+
+/**
+ * Reflect all bits of a \a data word of \a data_len bytes.
+ *
+ * \param data The data word to be reflected.
+ * \param data_len The width of \a data expressed in number of bits.
+ * \return The reflected data.
+ *****************************************************************************/
+static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) {
+ uint_fast64_t ret = data & 0x01;
+
+ for (size_t i = 1; i < data_len; i++) {
+ data >>= 1;
+ ret = (ret << 1) | (data & 0x01);
+ }
+
+ return ret;
+}
+
+/**
+ * Update the crc value with new data.
+ *
+ * \param crc The current crc value.
+ * \param data Pointer to a buffer of \a data_len bytes.
+ * \param data_len Number of bytes in the \a data buffer.
+ * \return The updated crc value.
+ ******************************************************************************/
+uint64_t _crc64(uint_fast64_t crc, const void *in_data, const uint64_t len) {
+ const uint8_t *data = in_data;
+ unsigned long long bit;
+
+ for (uint64_t offset = 0; offset < len; offset++) {
+ uint8_t c = data[offset];
+ for (uint_fast8_t i = 0x01; i & 0xff; i <<= 1) {
+ bit = crc & 0x8000000000000000;
+ if (c & i) {
+ bit = !bit;
+ }
+
+ crc <<= 1;
+ if (bit) {
+ crc ^= POLY;
+ }
+ }
+
+ crc &= 0xffffffffffffffff;
+ }
+
+ crc = crc & 0xffffffffffffffff;
+ return crc_reflect(crc, 64) ^ 0x0000000000000000;
+}
+
+/******************** END GENERATED PYCRC FUNCTIONS ********************/
+
+/* Initializes the 16KB lookup tables. */
+void crc64_init(void) {
+ crcspeed64native_init(_crc64, crc64_table);
+}
+
+/* Compute crc64 */
+uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) {
+ return crcspeed64native(crc64_table, crc, (void *) s, l);
+}
+
+/* Test main */
+#ifdef REDIS_TEST
+#include <stdio.h>
+
+#define UNUSED(x) (void)(x)
+int crc64Test(int argc, char *argv[], int flags) {
+ UNUSED(argc);
+ UNUSED(argv);
+ UNUSED(flags);
+ crc64_init();
+ printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
+ (uint64_t)_crc64(0, "123456789", 9));
+ printf("[64speed]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
+ (uint64_t)crc64(0, (unsigned char*)"123456789", 9));
+ char li[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed "
+ "do eiusmod tempor incididunt ut labore et dolore magna "
+ "aliqua. Ut enim ad minim veniam, quis nostrud exercitation "
+ "ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis "
+ "aute irure dolor in reprehenderit in voluptate velit esse "
+ "cillum dolore eu fugiat nulla pariatur. Excepteur sint "
+ "occaecat cupidatat non proident, sunt in culpa qui officia "
+ "deserunt mollit anim id est laborum.";
+ printf("[calcula]: c7794709e69683b3 == %016" PRIx64 "\n",
+ (uint64_t)_crc64(0, li, sizeof(li)));
+ printf("[64speed]: c7794709e69683b3 == %016" PRIx64 "\n",
+ (uint64_t)crc64(0, (unsigned char*)li, sizeof(li)));
+ return 0;
+}
+
+#endif
+
+#ifdef REDIS_TEST_MAIN
+int main(int argc, char *argv[]) {
+ return crc64Test(argc, argv);
+}
+
+#endif
diff --git a/src/crc64.h b/src/crc64.h
new file mode 100644
index 0000000..e0fccd9
--- /dev/null
+++ b/src/crc64.h
@@ -0,0 +1,13 @@
+#ifndef CRC64_H
+#define CRC64_H
+
+#include <stdint.h>
+
+void crc64_init(void);
+uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
+
+#ifdef REDIS_TEST
+int crc64Test(int argc, char *argv[], int flags);
+#endif
+
+#endif
diff --git a/src/crcspeed.c b/src/crcspeed.c
new file mode 100644
index 0000000..9682d8e
--- /dev/null
+++ b/src/crcspeed.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2013 Mark Adler
+ * Originally by: crc64.c Version 1.4 16 Dec 2013 Mark Adler
+ * Modifications by Matt Stancliff <matt@genges.com>:
+ * - removed CRC64-specific behavior
+ * - added generation of lookup tables by parameters
+ * - removed inversion of CRC input/result
+ * - removed automatic initialization in favor of explicit initialization
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the author be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Mark Adler
+ madler@alumni.caltech.edu
+ */
+
+#include "crcspeed.h"
+
+/* Fill in a CRC constants table. */
+void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) {
+ uint64_t crc;
+
+ /* generate CRCs for all single byte sequences */
+ for (int n = 0; n < 256; n++) {
+ unsigned char v = n;
+ table[0][n] = crcfn(0, &v, 1);
+ }
+
+ /* generate nested CRC table for future slice-by-8 lookup */
+ for (int n = 0; n < 256; n++) {
+ crc = table[0][n];
+ for (int k = 1; k < 8; k++) {
+ crc = table[0][crc & 0xff] ^ (crc >> 8);
+ table[k][n] = crc;
+ }
+ }
+}
+
+void crcspeed16little_init(crcfn16 crcfn, uint16_t table[8][256]) {
+ uint16_t crc;
+
+ /* generate CRCs for all single byte sequences */
+ for (int n = 0; n < 256; n++) {
+ table[0][n] = crcfn(0, &n, 1);
+ }
+
+ /* generate nested CRC table for future slice-by-8 lookup */
+ for (int n = 0; n < 256; n++) {
+ crc = table[0][n];
+ for (int k = 1; k < 8; k++) {
+ crc = table[0][(crc >> 8) & 0xff] ^ (crc << 8);
+ table[k][n] = crc;
+ }
+ }
+}
+
+/* Reverse the bytes in a 64-bit word. */
+static inline uint64_t rev8(uint64_t a) {
+#if defined(__GNUC__) || defined(__clang__)
+ return __builtin_bswap64(a);
+#else
+ uint64_t m;
+
+ m = UINT64_C(0xff00ff00ff00ff);
+ a = ((a >> 8) & m) | (a & m) << 8;
+ m = UINT64_C(0xffff0000ffff);
+ a = ((a >> 16) & m) | (a & m) << 16;
+ return a >> 32 | a << 32;
+#endif
+}
+
+/* This function is called once to initialize the CRC table for use on a
+ big-endian architecture. */
+void crcspeed64big_init(crcfn64 fn, uint64_t big_table[8][256]) {
+ /* Create the little endian table then reverse all the entries. */
+ crcspeed64little_init(fn, big_table);
+ for (int k = 0; k < 8; k++) {
+ for (int n = 0; n < 256; n++) {
+ big_table[k][n] = rev8(big_table[k][n]);
+ }
+ }
+}
+
+void crcspeed16big_init(crcfn16 fn, uint16_t big_table[8][256]) {
+ /* Create the little endian table then reverse all the entries. */
+ crcspeed16little_init(fn, big_table);
+ for (int k = 0; k < 8; k++) {
+ for (int n = 0; n < 256; n++) {
+ big_table[k][n] = rev8(big_table[k][n]);
+ }
+ }
+}
+
+/* Calculate a non-inverted CRC multiple bytes at a time on a little-endian
+ * architecture. If you need inverted CRC, invert *before* calling and invert
+ * *after* calling.
+ * 64 bit crc = process 8 bytes at once;
+ */
+uint64_t crcspeed64little(uint64_t little_table[8][256], uint64_t crc,
+ void *buf, size_t len) {
+ unsigned char *next = buf;
+
+ /* process individual bytes until we reach an 8-byte aligned pointer */
+ while (len && ((uintptr_t)next & 7) != 0) {
+ crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
+ len--;
+ }
+
+ /* fast middle processing, 8 bytes (aligned!) per loop */
+ while (len >= 8) {
+ crc ^= *(uint64_t *)next;
+ crc = little_table[7][crc & 0xff] ^
+ little_table[6][(crc >> 8) & 0xff] ^
+ little_table[5][(crc >> 16) & 0xff] ^
+ little_table[4][(crc >> 24) & 0xff] ^
+ little_table[3][(crc >> 32) & 0xff] ^
+ little_table[2][(crc >> 40) & 0xff] ^
+ little_table[1][(crc >> 48) & 0xff] ^
+ little_table[0][crc >> 56];
+ next += 8;
+ len -= 8;
+ }
+
+ /* process remaining bytes (can't be larger than 8) */
+ while (len) {
+ crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
+ len--;
+ }
+
+ return crc;
+}
+
+uint16_t crcspeed16little(uint16_t little_table[8][256], uint16_t crc,
+ void *buf, size_t len) {
+ unsigned char *next = buf;
+
+ /* process individual bytes until we reach an 8-byte aligned pointer */
+ while (len && ((uintptr_t)next & 7) != 0) {
+ crc = little_table[0][((crc >> 8) ^ *next++) & 0xff] ^ (crc << 8);
+ len--;
+ }
+
+ /* fast middle processing, 8 bytes (aligned!) per loop */
+ while (len >= 8) {
+ uint64_t n = *(uint64_t *)next;
+ crc = little_table[7][(n & 0xff) ^ ((crc >> 8) & 0xff)] ^
+ little_table[6][((n >> 8) & 0xff) ^ (crc & 0xff)] ^
+ little_table[5][(n >> 16) & 0xff] ^
+ little_table[4][(n >> 24) & 0xff] ^
+ little_table[3][(n >> 32) & 0xff] ^
+ little_table[2][(n >> 40) & 0xff] ^
+ little_table[1][(n >> 48) & 0xff] ^
+ little_table[0][n >> 56];
+ next += 8;
+ len -= 8;
+ }
+
+ /* process remaining bytes (can't be larger than 8) */
+ while (len) {
+ crc = little_table[0][((crc >> 8) ^ *next++) & 0xff] ^ (crc << 8);
+ len--;
+ }
+
+ return crc;
+}
+
+/* Calculate a non-inverted CRC eight bytes at a time on a big-endian
+ * architecture.
+ */
+uint64_t crcspeed64big(uint64_t big_table[8][256], uint64_t crc, void *buf,
+ size_t len) {
+ unsigned char *next = buf;
+
+ crc = rev8(crc);
+ while (len && ((uintptr_t)next & 7) != 0) {
+ crc = big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8);
+ len--;
+ }
+
+ while (len >= 8) {
+ crc ^= *(uint64_t *)next;
+ crc = big_table[0][crc & 0xff] ^
+ big_table[1][(crc >> 8) & 0xff] ^
+ big_table[2][(crc >> 16) & 0xff] ^
+ big_table[3][(crc >> 24) & 0xff] ^
+ big_table[4][(crc >> 32) & 0xff] ^
+ big_table[5][(crc >> 40) & 0xff] ^
+ big_table[6][(crc >> 48) & 0xff] ^
+ big_table[7][crc >> 56];
+ next += 8;
+ len -= 8;
+ }
+
+ while (len) {
+ crc = big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8);
+ len--;
+ }
+
+ return rev8(crc);
+}
+
+/* WARNING: Completely untested on big endian architecture. Possibly broken. */
+uint16_t crcspeed16big(uint16_t big_table[8][256], uint16_t crc_in, void *buf,
+ size_t len) {
+ unsigned char *next = buf;
+ uint64_t crc = crc_in;
+
+ crc = rev8(crc);
+ while (len && ((uintptr_t)next & 7) != 0) {
+ crc = big_table[0][((crc >> (56 - 8)) ^ *next++) & 0xff] ^ (crc >> 8);
+ len--;
+ }
+
+ while (len >= 8) {
+ uint64_t n = *(uint64_t *)next;
+ crc = big_table[0][(n & 0xff) ^ ((crc >> (56 - 8)) & 0xff)] ^
+ big_table[1][((n >> 8) & 0xff) ^ (crc & 0xff)] ^
+ big_table[2][(n >> 16) & 0xff] ^
+ big_table[3][(n >> 24) & 0xff] ^
+ big_table[4][(n >> 32) & 0xff] ^
+ big_table[5][(n >> 40) & 0xff] ^
+ big_table[6][(n >> 48) & 0xff] ^
+ big_table[7][n >> 56];
+ next += 8;
+ len -= 8;
+ }
+
+ while (len) {
+ crc = big_table[0][((crc >> (56 - 8)) ^ *next++) & 0xff] ^ (crc >> 8);
+ len--;
+ }
+
+ return rev8(crc);
+}
+
+/* Return the CRC of buf[0..len-1] with initial crc, processing eight bytes
+ at a time using passed-in lookup table.
+ This selects one of two routines depending on the endianness of
+ the architecture. */
+uint64_t crcspeed64native(uint64_t table[8][256], uint64_t crc, void *buf,
+ size_t len) {
+ uint64_t n = 1;
+
+ return *(char *)&n ? crcspeed64little(table, crc, buf, len)
+ : crcspeed64big(table, crc, buf, len);
+}
+
+uint16_t crcspeed16native(uint16_t table[8][256], uint16_t crc, void *buf,
+ size_t len) {
+ uint64_t n = 1;
+
+ return *(char *)&n ? crcspeed16little(table, crc, buf, len)
+ : crcspeed16big(table, crc, buf, len);
+}
+
+/* Initialize CRC lookup table in architecture-dependent manner. */
+void crcspeed64native_init(crcfn64 fn, uint64_t table[8][256]) {
+ uint64_t n = 1;
+
+ *(char *)&n ? crcspeed64little_init(fn, table)
+ : crcspeed64big_init(fn, table);
+}
+
+void crcspeed16native_init(crcfn16 fn, uint16_t table[8][256]) {
+ uint64_t n = 1;
+
+ *(char *)&n ? crcspeed16little_init(fn, table)
+ : crcspeed16big_init(fn, table);
+}
diff --git a/src/crcspeed.h b/src/crcspeed.h
new file mode 100644
index 0000000..d7ee95e
--- /dev/null
+++ b/src/crcspeed.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2014, Matt Stancliff <matt@genges.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE. */
+
+#ifndef CRCSPEED_H
+#define CRCSPEED_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+typedef uint64_t (*crcfn64)(uint64_t, const void *, const uint64_t);
+typedef uint16_t (*crcfn16)(uint16_t, const void *, const uint64_t);
+
+/* CRC-64 */
+void crcspeed64little_init(crcfn64 fn, uint64_t table[8][256]);
+void crcspeed64big_init(crcfn64 fn, uint64_t table[8][256]);
+void crcspeed64native_init(crcfn64 fn, uint64_t table[8][256]);
+
+uint64_t crcspeed64little(uint64_t table[8][256], uint64_t crc, void *buf,
+ size_t len);
+uint64_t crcspeed64big(uint64_t table[8][256], uint64_t crc, void *buf,
+ size_t len);
+uint64_t crcspeed64native(uint64_t table[8][256], uint64_t crc, void *buf,
+ size_t len);
+
+/* CRC-16 */
+void crcspeed16little_init(crcfn16 fn, uint16_t table[8][256]);
+void crcspeed16big_init(crcfn16 fn, uint16_t table[8][256]);
+void crcspeed16native_init(crcfn16 fn, uint16_t table[8][256]);
+
+uint16_t crcspeed16little(uint16_t table[8][256], uint16_t crc, void *buf,
+ size_t len);
+uint16_t crcspeed16big(uint16_t table[8][256], uint16_t crc, void *buf,
+ size_t len);
+uint16_t crcspeed16native(uint16_t table[8][256], uint16_t crc, void *buf,
+ size_t len);
+#endif
diff --git a/src/db.c b/src/db.c
new file mode 100644
index 0000000..a7cb4db
--- /dev/null
+++ b/src/db.c
@@ -0,0 +1,2558 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "cluster.h"
+#include "atomicvar.h"
+#include "latency.h"
+#include "script.h"
+#include "functions.h"
+
+#include <signal.h>
+#include <ctype.h>
+
+/*-----------------------------------------------------------------------------
+ * C-level DB API
+ *----------------------------------------------------------------------------*/
+
+/* Flags for expireIfNeeded */
+#define EXPIRE_FORCE_DELETE_EXPIRED 1
+#define EXPIRE_AVOID_DELETE_EXPIRED 2
+
+int expireIfNeeded(redisDb *db, robj *key, int flags);
+int keyIsExpired(redisDb *db, robj *key);
+static void dbSetValue(redisDb *db, robj *key, robj *val, int overwrite, dictEntry *de);
+
+/* Update LFU when an object is accessed.
+ * Firstly, decrement the counter if the decrement time is reached.
+ * Then logarithmically increment the counter, and update the access time. */
+void updateLFU(robj *val) {
+ unsigned long counter = LFUDecrAndReturn(val);
+ counter = LFULogIncr(counter);
+ val->lru = (LFUGetTimeInMinutes()<<8) | counter;
+}
+
+/* Lookup a key for read or write operations, or return NULL if the key is not
+ * found in the specified DB. This function implements the functionality of
+ * lookupKeyRead(), lookupKeyWrite() and their ...WithFlags() variants.
+ *
+ * Side-effects of calling this function:
+ *
+ * 1. A key gets expired if it reached it's TTL.
+ * 2. The key's last access time is updated.
+ * 3. The global keys hits/misses stats are updated (reported in INFO).
+ * 4. If keyspace notifications are enabled, a "keymiss" notification is fired.
+ *
+ * Flags change the behavior of this command:
+ *
+ * LOOKUP_NONE (or zero): No special flags are passed.
+ * LOOKUP_NOTOUCH: Don't alter the last access time of the key.
+ * LOOKUP_NONOTIFY: Don't trigger keyspace event on key miss.
+ * LOOKUP_NOSTATS: Don't increment key hits/misses counters.
+ * LOOKUP_WRITE: Prepare the key for writing (delete expired keys even on
+ * replicas, use separate keyspace stats and events (TODO)).
+ * LOOKUP_NOEXPIRE: Perform expiration check, but avoid deleting the key,
+ * so that we don't have to propagate the deletion.
+ *
+ * Note: this function also returns NULL if the key is logically expired but
+ * still existing, in case this is a replica and the LOOKUP_WRITE is not set.
+ * Even if the key expiry is master-driven, we can correctly report a key is
+ * expired on replicas even if the master is lagging expiring our key via DELs
+ * in the replication link. */
+robj *lookupKey(redisDb *db, robj *key, int flags) {
+ dictEntry *de = dictFind(db->dict,key->ptr);
+ robj *val = NULL;
+ if (de) {
+ val = dictGetVal(de);
+ /* Forcing deletion of expired keys on a replica makes the replica
+ * inconsistent with the master. We forbid it on readonly replicas, but
+ * we have to allow it on writable replicas to make write commands
+ * behave consistently.
+ *
+ * It's possible that the WRITE flag is set even during a readonly
+ * command, since the command may trigger events that cause modules to
+ * perform additional writes. */
+ int is_ro_replica = server.masterhost && server.repl_slave_ro;
+ int expire_flags = 0;
+ if (flags & LOOKUP_WRITE && !is_ro_replica)
+ expire_flags |= EXPIRE_FORCE_DELETE_EXPIRED;
+ if (flags & LOOKUP_NOEXPIRE)
+ expire_flags |= EXPIRE_AVOID_DELETE_EXPIRED;
+ if (expireIfNeeded(db, key, expire_flags)) {
+ /* The key is no longer valid. */
+ val = NULL;
+ }
+ }
+
+ if (val) {
+ /* Update the access time for the ageing algorithm.
+ * Don't do it if we have a saving child, as this will trigger
+ * a copy on write madness. */
+ if (server.current_client && server.current_client->flags & CLIENT_NO_TOUCH &&
+ server.current_client->cmd->proc != touchCommand)
+ flags |= LOOKUP_NOTOUCH;
+ if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)){
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ updateLFU(val);
+ } else {
+ val->lru = LRU_CLOCK();
+ }
+ }
+
+ if (!(flags & (LOOKUP_NOSTATS | LOOKUP_WRITE)))
+ server.stat_keyspace_hits++;
+ /* TODO: Use separate hits stats for WRITE */
+ } else {
+ if (!(flags & (LOOKUP_NONOTIFY | LOOKUP_WRITE)))
+ notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
+ if (!(flags & (LOOKUP_NOSTATS | LOOKUP_WRITE)))
+ server.stat_keyspace_misses++;
+ /* TODO: Use separate misses stats and notify event for WRITE */
+ }
+
+ return val;
+}
+
+/* Lookup a key for read operations, or return NULL if the key is not found
+ * in the specified DB.
+ *
+ * This API should not be used when we write to the key after obtaining
+ * the object linked to the key, but only for read only operations.
+ *
+ * This function is equivalent to lookupKey(). The point of using this function
+ * rather than lookupKey() directly is to indicate that the purpose is to read
+ * the key. */
+robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) {
+ serverAssert(!(flags & LOOKUP_WRITE));
+ return lookupKey(db, key, flags);
+}
+
+/* Like lookupKeyReadWithFlags(), but does not use any flag, which is the
+ * common case. */
+robj *lookupKeyRead(redisDb *db, robj *key) {
+ return lookupKeyReadWithFlags(db,key,LOOKUP_NONE);
+}
+
+/* Lookup a key for write operations, and as a side effect, if needed, expires
+ * the key if its TTL is reached. It's equivalent to lookupKey() with the
+ * LOOKUP_WRITE flag added.
+ *
+ * Returns the linked value object if the key exists or NULL if the key
+ * does not exist in the specified DB. */
+robj *lookupKeyWriteWithFlags(redisDb *db, robj *key, int flags) {
+ return lookupKey(db, key, flags | LOOKUP_WRITE);
+}
+
+robj *lookupKeyWrite(redisDb *db, robj *key) {
+ return lookupKeyWriteWithFlags(db, key, LOOKUP_NONE);
+}
+
+robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply) {
+ robj *o = lookupKeyRead(c->db, key);
+ if (!o) addReplyOrErrorObject(c, reply);
+ return o;
+}
+
+robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) {
+ robj *o = lookupKeyWrite(c->db, key);
+ if (!o) addReplyOrErrorObject(c, reply);
+ return o;
+}
+
+/* Add the key to the DB. It's up to the caller to increment the reference
+ * counter of the value if needed.
+ *
+ * If the update_if_existing argument is false, the the program is aborted
+ * if the key already exists, otherwise, it can fall back to dbOverwite. */
+static void dbAddInternal(redisDb *db, robj *key, robj *val, int update_if_existing) {
+ dictEntry *existing;
+ dictEntry *de = dictAddRaw(db->dict, key->ptr, &existing);
+ if (update_if_existing && existing) {
+ dbSetValue(db, key, val, 1, existing);
+ return;
+ }
+ serverAssertWithInfo(NULL, key, de != NULL);
+ dictSetKey(db->dict, de, sdsdup(key->ptr));
+ initObjectLRUOrLFU(val);
+ dictSetVal(db->dict, de, val);
+ signalKeyAsReady(db, key, val->type);
+ if (server.cluster_enabled) slotToKeyAddEntry(de, db);
+ notifyKeyspaceEvent(NOTIFY_NEW,"new",key,db->id);
+}
+
+void dbAdd(redisDb *db, robj *key, robj *val) {
+ dbAddInternal(db, key, val, 0);
+}
+
+/* This is a special version of dbAdd() that is used only when loading
+ * keys from the RDB file: the key is passed as an SDS string that is
+ * retained by the function (and not freed by the caller).
+ *
+ * Moreover this function will not abort if the key is already busy, to
+ * give more control to the caller, nor will signal the key as ready
+ * since it is not useful in this context.
+ *
+ * The function returns 1 if the key was added to the database, taking
+ * ownership of the SDS string, otherwise 0 is returned, and is up to the
+ * caller to free the SDS string. */
+int dbAddRDBLoad(redisDb *db, sds key, robj *val) {
+ dictEntry *de = dictAddRaw(db->dict, key, NULL);
+ if (de == NULL) return 0;
+ initObjectLRUOrLFU(val);
+ dictSetVal(db->dict, de, val);
+ if (server.cluster_enabled) slotToKeyAddEntry(de, db);
+ return 1;
+}
+
+/* Overwrite an existing key with a new value. Incrementing the reference
+ * count of the new value is up to the caller.
+ * This function does not modify the expire time of the existing key.
+ *
+ * The 'overwrite' flag is an indication whether this is done as part of a
+ * complete replacement of their key, which can be thought as a deletion and
+ * replacement (in which case we need to emit deletion signals), or just an
+ * update of a value of an existing key (when false).
+ *
+ * The dictEntry input is optional, can be used if we already have one.
+ *
+ * The program is aborted if the key was not already present. */
+static void dbSetValue(redisDb *db, robj *key, robj *val, int overwrite, dictEntry *de) {
+ if (!de) de = dictFind(db->dict,key->ptr);
+ serverAssertWithInfo(NULL,key,de != NULL);
+ robj *old = dictGetVal(de);
+
+ val->lru = old->lru;
+
+ if (overwrite) {
+ /* RM_StringDMA may call dbUnshareStringValue which may free val, so we
+ * need to incr to retain old */
+ incrRefCount(old);
+ /* Although the key is not really deleted from the database, we regard
+ * overwrite as two steps of unlink+add, so we still need to call the unlink
+ * callback of the module. */
+ moduleNotifyKeyUnlink(key,old,db->id,DB_FLAG_KEY_OVERWRITE);
+ /* We want to try to unblock any module clients or clients using a blocking XREADGROUP */
+ signalDeletedKeyAsReady(db,key,old->type);
+ decrRefCount(old);
+ /* Because of RM_StringDMA, old may be changed, so we need get old again */
+ old = dictGetVal(de);
+ }
+ dictSetVal(db->dict, de, val);
+
+ if (server.lazyfree_lazy_server_del) {
+ freeObjAsync(key,old,db->id);
+ } else {
+ /* This is just decrRefCount(old); */
+ db->dict->type->valDestructor(db->dict, old);
+ }
+}
+
+/* Replace an existing key with a new value, we just replace value and don't
+ * emit any events */
+void dbReplaceValue(redisDb *db, robj *key, robj *val) {
+ dbSetValue(db, key, val, 0, NULL);
+}
+
+/* High level Set operation. This function can be used in order to set
+ * a key, whatever it was existing or not, to a new object.
+ *
+ * 1) The ref count of the value object is incremented.
+ * 2) clients WATCHing for the destination key notified.
+ * 3) The expire time of the key is reset (the key is made persistent),
+ * unless 'SETKEY_KEEPTTL' is enabled in flags.
+ * 4) The key lookup can take place outside this interface outcome will be
+ * delivered with 'SETKEY_ALREADY_EXIST' or 'SETKEY_DOESNT_EXIST'
+ *
+ * All the new keys in the database should be created via this interface.
+ * The client 'c' argument may be set to NULL if the operation is performed
+ * in a context where there is no clear client performing the operation. */
+void setKey(client *c, redisDb *db, robj *key, robj *val, int flags) {
+ int keyfound = 0;
+
+ if (flags & SETKEY_ALREADY_EXIST)
+ keyfound = 1;
+ else if (flags & SETKEY_ADD_OR_UPDATE)
+ keyfound = -1;
+ else if (!(flags & SETKEY_DOESNT_EXIST))
+ keyfound = (lookupKeyWrite(db,key) != NULL);
+
+ if (!keyfound) {
+ dbAdd(db,key,val);
+ } else if (keyfound<0) {
+ dbAddInternal(db,key,val,1);
+ } else {
+ dbSetValue(db,key,val,1,NULL);
+ }
+ incrRefCount(val);
+ if (!(flags & SETKEY_KEEPTTL)) removeExpire(db,key);
+ if (!(flags & SETKEY_NO_SIGNAL)) signalModifiedKey(c,db,key);
+}
+
+/* Return a random key, in form of a Redis object.
+ * If there are no keys, NULL is returned.
+ *
+ * The function makes sure to return keys not already expired. */
+robj *dbRandomKey(redisDb *db) {
+ dictEntry *de;
+ int maxtries = 100;
+ int allvolatile = dictSize(db->dict) == dictSize(db->expires);
+
+ while(1) {
+ sds key;
+ robj *keyobj;
+
+ de = dictGetFairRandomKey(db->dict);
+ if (de == NULL) return NULL;
+
+ key = dictGetKey(de);
+ keyobj = createStringObject(key,sdslen(key));
+ if (dictFind(db->expires,key)) {
+ if (allvolatile && server.masterhost && --maxtries == 0) {
+ /* If the DB is composed only of keys with an expire set,
+ * it could happen that all the keys are already logically
+ * expired in the slave, so the function cannot stop because
+ * expireIfNeeded() is false, nor it can stop because
+ * dictGetFairRandomKey() returns NULL (there are keys to return).
+ * To prevent the infinite loop we do some tries, but if there
+ * are the conditions for an infinite loop, eventually we
+ * return a key name that may be already expired. */
+ return keyobj;
+ }
+ if (expireIfNeeded(db,keyobj,0)) {
+ decrRefCount(keyobj);
+ continue; /* search for another key. This expired. */
+ }
+ }
+ return keyobj;
+ }
+}
+
+/* Helper for sync and async delete. */
+int dbGenericDelete(redisDb *db, robj *key, int async, int flags) {
+ dictEntry **plink;
+ int table;
+ dictEntry *de = dictTwoPhaseUnlinkFind(db->dict,key->ptr,&plink,&table);
+ if (de) {
+ robj *val = dictGetVal(de);
+ /* RM_StringDMA may call dbUnshareStringValue which may free val, so we
+ * need to incr to retain val */
+ incrRefCount(val);
+ /* Tells the module that the key has been unlinked from the database. */
+ moduleNotifyKeyUnlink(key,val,db->id,flags);
+ /* We want to try to unblock any module clients or clients using a blocking XREADGROUP */
+ signalDeletedKeyAsReady(db,key,val->type);
+ /* We should call decr before freeObjAsync. If not, the refcount may be
+ * greater than 1, so freeObjAsync doesn't work */
+ decrRefCount(val);
+ if (async) {
+ /* Because of dbUnshareStringValue, the val in de may change. */
+ freeObjAsync(key, dictGetVal(de), db->id);
+ dictSetVal(db->dict, de, NULL);
+ }
+ if (server.cluster_enabled) slotToKeyDelEntry(de, db);
+
+ /* Deleting an entry from the expires dict will not free the sds of
+ * the key, because it is shared with the main dictionary. */
+ if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
+ dictTwoPhaseUnlinkFree(db->dict,de,plink,table);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Delete a key, value, and associated expiration entry if any, from the DB */
+int dbSyncDelete(redisDb *db, robj *key) {
+ return dbGenericDelete(db, key, 0, DB_FLAG_KEY_DELETED);
+}
+
+/* Delete a key, value, and associated expiration entry if any, from the DB. If
+ * the value consists of many allocations, it may be freed asynchronously. */
+int dbAsyncDelete(redisDb *db, robj *key) {
+ return dbGenericDelete(db, key, 1, DB_FLAG_KEY_DELETED);
+}
+
+/* This is a wrapper whose behavior depends on the Redis lazy free
+ * configuration. Deletes the key synchronously or asynchronously. */
+int dbDelete(redisDb *db, robj *key) {
+ return dbGenericDelete(db, key, server.lazyfree_lazy_server_del, DB_FLAG_KEY_DELETED);
+}
+
+/* Prepare the string object stored at 'key' to be modified destructively
+ * to implement commands like SETBIT or APPEND.
+ *
+ * An object is usually ready to be modified unless one of the two conditions
+ * are true:
+ *
+ * 1) The object 'o' is shared (refcount > 1), we don't want to affect
+ * other users.
+ * 2) The object encoding is not "RAW".
+ *
+ * If the object is found in one of the above conditions (or both) by the
+ * function, an unshared / not-encoded copy of the string object is stored
+ * at 'key' in the specified 'db'. Otherwise the object 'o' itself is
+ * returned.
+ *
+ * USAGE:
+ *
+ * The object 'o' is what the caller already obtained by looking up 'key'
+ * in 'db', the usage pattern looks like this:
+ *
+ * o = lookupKeyWrite(db,key);
+ * if (checkType(c,o,OBJ_STRING)) return;
+ * o = dbUnshareStringValue(db,key,o);
+ *
+ * At this point the caller is ready to modify the object, for example
+ * using an sdscat() call to append some data, or anything else.
+ */
+robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) {
+ serverAssert(o->type == OBJ_STRING);
+ if (o->refcount != 1 || o->encoding != OBJ_ENCODING_RAW) {
+ robj *decoded = getDecodedObject(o);
+ o = createRawStringObject(decoded->ptr, sdslen(decoded->ptr));
+ decrRefCount(decoded);
+ dbReplaceValue(db,key,o);
+ }
+ return o;
+}
+
+/* Remove all keys from the database(s) structure. The dbarray argument
+ * may not be the server main DBs (could be a temporary DB).
+ *
+ * The dbnum can be -1 if all the DBs should be emptied, or the specified
+ * DB index if we want to empty only a single database.
+ * The function returns the number of keys removed from the database(s). */
+long long emptyDbStructure(redisDb *dbarray, int dbnum, int async,
+ void(callback)(dict*))
+{
+ long long removed = 0;
+ int startdb, enddb;
+
+ if (dbnum == -1) {
+ startdb = 0;
+ enddb = server.dbnum-1;
+ } else {
+ startdb = enddb = dbnum;
+ }
+
+ for (int j = startdb; j <= enddb; j++) {
+ removed += dictSize(dbarray[j].dict);
+ if (async) {
+ emptyDbAsync(&dbarray[j]);
+ } else {
+ dictEmpty(dbarray[j].dict,callback);
+ dictEmpty(dbarray[j].expires,callback);
+ }
+ /* Because all keys of database are removed, reset average ttl. */
+ dbarray[j].avg_ttl = 0;
+ dbarray[j].expires_cursor = 0;
+ }
+
+ return removed;
+}
+
+/* Remove all data (keys and functions) from all the databases in a
+ * Redis server. If callback is given the function is called from
+ * time to time to signal that work is in progress.
+ *
+ * The dbnum can be -1 if all the DBs should be flushed, or the specified
+ * DB number if we want to flush only a single Redis database number.
+ *
+ * Flags are be EMPTYDB_NO_FLAGS if no special flags are specified or
+ * EMPTYDB_ASYNC if we want the memory to be freed in a different thread
+ * and the function to return ASAP. EMPTYDB_NOFUNCTIONS can also be set
+ * to specify that we do not want to delete the functions.
+ *
+ * On success the function returns the number of keys removed from the
+ * database(s). Otherwise -1 is returned in the specific case the
+ * DB number is out of range, and errno is set to EINVAL. */
+long long emptyData(int dbnum, int flags, void(callback)(dict*)) {
+ int async = (flags & EMPTYDB_ASYNC);
+ int with_functions = !(flags & EMPTYDB_NOFUNCTIONS);
+ RedisModuleFlushInfoV1 fi = {REDISMODULE_FLUSHINFO_VERSION,!async,dbnum};
+ long long removed = 0;
+
+ if (dbnum < -1 || dbnum >= server.dbnum) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* Fire the flushdb modules event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB,
+ REDISMODULE_SUBEVENT_FLUSHDB_START,
+ &fi);
+
+ /* Make sure the WATCHed keys are affected by the FLUSH* commands.
+ * Note that we need to call the function while the keys are still
+ * there. */
+ signalFlushedDb(dbnum, async);
+
+ /* Empty redis database structure. */
+ removed = emptyDbStructure(server.db, dbnum, async, callback);
+
+ /* Flush slots to keys map if enable cluster, we can flush entire
+ * slots to keys map whatever dbnum because only support one DB
+ * in cluster mode. */
+ if (server.cluster_enabled) slotToKeyFlush(server.db);
+
+ if (dbnum == -1) flushSlaveKeysWithExpireList();
+
+ if (with_functions) {
+ serverAssert(dbnum == -1);
+ functionsLibCtxClearCurrent(async);
+ }
+
+ /* Also fire the end event. Note that this event will fire almost
+ * immediately after the start event if the flush is asynchronous. */
+ moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB,
+ REDISMODULE_SUBEVENT_FLUSHDB_END,
+ &fi);
+
+ return removed;
+}
+
+/* Initialize temporary db on replica for use during diskless replication. */
+redisDb *initTempDb(void) {
+ redisDb *tempDb = zcalloc(sizeof(redisDb)*server.dbnum);
+ for (int i=0; i<server.dbnum; i++) {
+ tempDb[i].dict = dictCreate(&dbDictType);
+ tempDb[i].expires = dictCreate(&dbExpiresDictType);
+ tempDb[i].slots_to_keys = NULL;
+ }
+
+ if (server.cluster_enabled) {
+ /* Prepare temp slot to key map to be written during async diskless replication. */
+ slotToKeyInit(tempDb);
+ }
+
+ return tempDb;
+}
+
+/* Discard tempDb, this can be slow (similar to FLUSHALL), but it's always async. */
+void discardTempDb(redisDb *tempDb, void(callback)(dict*)) {
+ int async = 1;
+
+ /* Release temp DBs. */
+ emptyDbStructure(tempDb, -1, async, callback);
+ for (int i=0; i<server.dbnum; i++) {
+ dictRelease(tempDb[i].dict);
+ dictRelease(tempDb[i].expires);
+ }
+
+ if (server.cluster_enabled) {
+ /* Release temp slot to key map. */
+ slotToKeyDestroy(tempDb);
+ }
+
+ zfree(tempDb);
+}
+
+int selectDb(client *c, int id) {
+ if (id < 0 || id >= server.dbnum)
+ return C_ERR;
+ c->db = &server.db[id];
+ return C_OK;
+}
+
+long long dbTotalServerKeyCount(void) {
+ long long total = 0;
+ int j;
+ for (j = 0; j < server.dbnum; j++) {
+ total += dictSize(server.db[j].dict);
+ }
+ return total;
+}
+
+/*-----------------------------------------------------------------------------
+ * Hooks for key space changes.
+ *
+ * Every time a key in the database is modified the function
+ * signalModifiedKey() is called.
+ *
+ * Every time a DB is flushed the function signalFlushDb() is called.
+ *----------------------------------------------------------------------------*/
+
+/* Note that the 'c' argument may be NULL if the key was modified out of
+ * a context of a client. */
+void signalModifiedKey(client *c, redisDb *db, robj *key) {
+ touchWatchedKey(db,key);
+ trackingInvalidateKey(c,key,1);
+}
+
+void signalFlushedDb(int dbid, int async) {
+ int startdb, enddb;
+ if (dbid == -1) {
+ startdb = 0;
+ enddb = server.dbnum-1;
+ } else {
+ startdb = enddb = dbid;
+ }
+
+ for (int j = startdb; j <= enddb; j++) {
+ scanDatabaseForDeletedKeys(&server.db[j], NULL);
+ touchAllWatchedKeysInDb(&server.db[j], NULL);
+ }
+
+ trackingInvalidateKeysOnFlush(async);
+
+ /* Changes in this method may take place in swapMainDbWithTempDb as well,
+ * where we execute similar calls, but with subtle differences as it's
+ * not simply flushing db. */
+}
+
+/*-----------------------------------------------------------------------------
+ * Type agnostic commands operating on the key space
+ *----------------------------------------------------------------------------*/
+
+/* Return the set of flags to use for the emptyDb() call for FLUSHALL
+ * and FLUSHDB commands.
+ *
+ * sync: flushes the database in an sync manner.
+ * async: flushes the database in an async manner.
+ * no option: determine sync or async according to the value of lazyfree-lazy-user-flush.
+ *
+ * On success C_OK is returned and the flags are stored in *flags, otherwise
+ * C_ERR is returned and the function sends an error to the client. */
+int getFlushCommandFlags(client *c, int *flags) {
+ /* Parse the optional ASYNC option. */
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"sync")) {
+ *flags = EMPTYDB_NO_FLAGS;
+ } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"async")) {
+ *flags = EMPTYDB_ASYNC;
+ } else if (c->argc == 1) {
+ *flags = server.lazyfree_lazy_user_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+/* Flushes the whole server data set. */
+void flushAllDataAndResetRDB(int flags) {
+ server.dirty += emptyData(-1,flags,NULL);
+ if (server.child_type == CHILD_TYPE_RDB) killRDBChild();
+ if (server.saveparamslen > 0) {
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ rdbSave(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE);
+ }
+
+#if defined(USE_JEMALLOC)
+ /* jemalloc 5 doesn't release pages back to the OS when there's no traffic.
+ * for large databases, flushdb blocks for long anyway, so a bit more won't
+ * harm and this way the flush and purge will be synchronous. */
+ if (!(flags & EMPTYDB_ASYNC))
+ jemalloc_purge();
+#endif
+}
+
+/* FLUSHDB [ASYNC]
+ *
+ * Flushes the currently SELECTed Redis DB. */
+void flushdbCommand(client *c) {
+ int flags;
+
+ if (getFlushCommandFlags(c,&flags) == C_ERR) return;
+ /* flushdb should not flush the functions */
+ server.dirty += emptyData(c->db->id,flags | EMPTYDB_NOFUNCTIONS,NULL);
+
+ /* Without the forceCommandPropagation, when DB was already empty,
+ * FLUSHDB will not be replicated nor put into the AOF. */
+ forceCommandPropagation(c, PROPAGATE_REPL | PROPAGATE_AOF);
+
+ addReply(c,shared.ok);
+
+#if defined(USE_JEMALLOC)
+ /* jemalloc 5 doesn't release pages back to the OS when there's no traffic.
+ * for large databases, flushdb blocks for long anyway, so a bit more won't
+ * harm and this way the flush and purge will be synchronous. */
+ if (!(flags & EMPTYDB_ASYNC))
+ jemalloc_purge();
+#endif
+}
+
+/* FLUSHALL [ASYNC]
+ *
+ * Flushes the whole server data set. */
+void flushallCommand(client *c) {
+ int flags;
+ if (getFlushCommandFlags(c,&flags) == C_ERR) return;
+ /* flushall should not flush the functions */
+ flushAllDataAndResetRDB(flags | EMPTYDB_NOFUNCTIONS);
+
+ /* Without the forceCommandPropagation, when DBs were already empty,
+ * FLUSHALL will not be replicated nor put into the AOF. */
+ forceCommandPropagation(c, PROPAGATE_REPL | PROPAGATE_AOF);
+
+ addReply(c,shared.ok);
+}
+
+/* This command implements DEL and UNLINK. */
+void delGenericCommand(client *c, int lazy) {
+ int numdel = 0, j;
+
+ for (j = 1; j < c->argc; j++) {
+ expireIfNeeded(c->db,c->argv[j],0);
+ int deleted = lazy ? dbAsyncDelete(c->db,c->argv[j]) :
+ dbSyncDelete(c->db,c->argv[j]);
+ if (deleted) {
+ signalModifiedKey(c,c->db,c->argv[j]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,
+ "del",c->argv[j],c->db->id);
+ server.dirty++;
+ numdel++;
+ }
+ }
+ addReplyLongLong(c,numdel);
+}
+
+void delCommand(client *c) {
+ delGenericCommand(c,server.lazyfree_lazy_user_del);
+}
+
+void unlinkCommand(client *c) {
+ delGenericCommand(c,1);
+}
+
+/* EXISTS key1 key2 ... key_N.
+ * Return value is the number of keys existing. */
+void existsCommand(client *c) {
+ long long count = 0;
+ int j;
+
+ for (j = 1; j < c->argc; j++) {
+ if (lookupKeyReadWithFlags(c->db,c->argv[j],LOOKUP_NOTOUCH)) count++;
+ }
+ addReplyLongLong(c,count);
+}
+
+void selectCommand(client *c) {
+ int id;
+
+ if (getIntFromObjectOrReply(c, c->argv[1], &id, NULL) != C_OK)
+ return;
+
+ if (server.cluster_enabled && id != 0) {
+ addReplyError(c,"SELECT is not allowed in cluster mode");
+ return;
+ }
+ if (selectDb(c,id) == C_ERR) {
+ addReplyError(c,"DB index is out of range");
+ } else {
+ addReply(c,shared.ok);
+ }
+}
+
+void randomkeyCommand(client *c) {
+ robj *key;
+
+ if ((key = dbRandomKey(c->db)) == NULL) {
+ addReplyNull(c);
+ return;
+ }
+
+ addReplyBulk(c,key);
+ decrRefCount(key);
+}
+
+void keysCommand(client *c) {
+ dictIterator *di;
+ dictEntry *de;
+ sds pattern = c->argv[1]->ptr;
+ int plen = sdslen(pattern), allkeys;
+ unsigned long numkeys = 0;
+ void *replylen = addReplyDeferredLen(c);
+
+ di = dictGetSafeIterator(c->db->dict);
+ allkeys = (pattern[0] == '*' && plen == 1);
+ robj keyobj;
+ while((de = dictNext(di)) != NULL) {
+ sds key = dictGetKey(de);
+
+ if (allkeys || stringmatchlen(pattern,plen,key,sdslen(key),0)) {
+ initStaticStringObject(keyobj, key);
+ if (!keyIsExpired(c->db, &keyobj)) {
+ addReplyBulkCBuffer(c, key, sdslen(key));
+ numkeys++;
+ }
+ }
+ if (c->flags & CLIENT_CLOSE_ASAP)
+ break;
+ }
+ dictReleaseIterator(di);
+ setDeferredArrayLen(c,replylen,numkeys);
+}
+
+/* Data used by the dict scan callback. */
+typedef struct {
+ list *keys; /* elements that collect from dict */
+ robj *o; /* o must be a hash/set/zset object, NULL means current db */
+ long long type; /* the particular type when scan the db */
+ sds pattern; /* pattern string, NULL means no pattern */
+ long sampled; /* cumulative number of keys sampled */
+} scanData;
+
+/* Helper function to compare key type in scan commands */
+int objectTypeCompare(robj *o, long long target) {
+ if (o->type != OBJ_MODULE) {
+ if (o->type != target)
+ return 0;
+ else
+ return 1;
+ }
+ /* module type compare */
+ long long mt = (long long)REDISMODULE_TYPE_SIGN(((moduleValue *)o->ptr)->type->id);
+ if (target != -mt)
+ return 0;
+ else
+ return 1;
+}
+/* This callback is used by scanGenericCommand in order to collect elements
+ * returned by the dictionary iterator into a list. */
+void scanCallback(void *privdata, const dictEntry *de) {
+ scanData *data = (scanData *)privdata;
+ list *keys = data->keys;
+ robj *o = data->o;
+ sds val = NULL;
+ sds key = NULL;
+ data->sampled++;
+
+ /* o and typename can not have values at the same time. */
+ serverAssert(!((data->type != LLONG_MAX) && o));
+
+ /* Filter an element if it isn't the type we want. */
+ /* TODO: uncomment in redis 8.0
+ if (!o && data->type != LLONG_MAX) {
+ robj *rval = dictGetVal(de);
+ if (!objectTypeCompare(rval, data->type)) return;
+ }*/
+
+ /* Filter element if it does not match the pattern. */
+ sds keysds = dictGetKey(de);
+ if (data->pattern) {
+ if (!stringmatchlen(data->pattern, sdslen(data->pattern), keysds, sdslen(keysds), 0)) {
+ return;
+ }
+ }
+
+ if (o == NULL) {
+ key = keysds;
+ } else if (o->type == OBJ_SET) {
+ key = keysds;
+ } else if (o->type == OBJ_HASH) {
+ key = keysds;
+ val = dictGetVal(de);
+ } else if (o->type == OBJ_ZSET) {
+ char buf[MAX_LONG_DOUBLE_CHARS];
+ int len = ld2string(buf, sizeof(buf), *(double *)dictGetVal(de), LD_STR_AUTO);
+ key = sdsdup(keysds);
+ val = sdsnewlen(buf, len);
+ } else {
+ serverPanic("Type not handled in SCAN callback.");
+ }
+
+ listAddNodeTail(keys, key);
+ if (val) listAddNodeTail(keys, val);
+}
+
+/* Try to parse a SCAN cursor stored at object 'o':
+ * if the cursor is valid, store it as unsigned integer into *cursor and
+ * returns C_OK. Otherwise return C_ERR and send an error to the
+ * client. */
+int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor) {
+ char *eptr;
+
+ /* Use strtoul() because we need an *unsigned* long, so
+ * getLongLongFromObject() does not cover the whole cursor space. */
+ errno = 0;
+ *cursor = strtoul(o->ptr, &eptr, 10);
+ if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' || errno == ERANGE)
+ {
+ addReplyError(c, "invalid cursor");
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+char *obj_type_name[OBJ_TYPE_MAX] = {
+ "string",
+ "list",
+ "set",
+ "zset",
+ "hash",
+ NULL, /* module type is special */
+ "stream"
+};
+
+/* Helper function to get type from a string in scan commands */
+long long getObjectTypeByName(char *name) {
+
+ for (long long i = 0; i < OBJ_TYPE_MAX; i++) {
+ if (obj_type_name[i] && !strcasecmp(name, obj_type_name[i])) {
+ return i;
+ }
+ }
+
+ moduleType *mt = moduleTypeLookupModuleByNameIgnoreCase(name);
+ if (mt != NULL) return -(REDISMODULE_TYPE_SIGN(mt->id));
+
+ return LLONG_MAX;
+}
+
+char *getObjectTypeName(robj *o) {
+ if (o == NULL) {
+ return "none";
+ }
+
+ serverAssert(o->type >= 0 && o->type < OBJ_TYPE_MAX);
+
+ if (o->type == OBJ_MODULE) {
+ moduleValue *mv = o->ptr;
+ return mv->type->name;
+ } else {
+ return obj_type_name[o->type];
+ }
+}
+
+/* This command implements SCAN, HSCAN and SSCAN commands.
+ * If object 'o' is passed, then it must be a Hash, Set or Zset object, otherwise
+ * if 'o' is NULL the command will operate on the dictionary associated with
+ * the current database.
+ *
+ * When 'o' is not NULL the function assumes that the first argument in
+ * the client arguments vector is a key so it skips it before iterating
+ * in order to parse options.
+ *
+ * In the case of a Hash object the function returns both the field and value
+ * of every element on the Hash. */
+void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
+ int i, j;
+ listNode *node;
+ long count = 10;
+ sds pat = NULL;
+ sds typename = NULL;
+ long long type = LLONG_MAX;
+ int patlen = 0, use_pattern = 0;
+ dict *ht;
+
+ /* Object must be NULL (to iterate keys names), or the type of the object
+ * must be Set, Sorted Set, or Hash. */
+ serverAssert(o == NULL || o->type == OBJ_SET || o->type == OBJ_HASH ||
+ o->type == OBJ_ZSET);
+
+ /* Set i to the first option argument. The previous one is the cursor. */
+ i = (o == NULL) ? 2 : 3; /* Skip the key argument if needed. */
+
+ /* Step 1: Parse options. */
+ while (i < c->argc) {
+ j = c->argc - i;
+ if (!strcasecmp(c->argv[i]->ptr, "count") && j >= 2) {
+ if (getLongFromObjectOrReply(c, c->argv[i+1], &count, NULL)
+ != C_OK)
+ {
+ return;
+ }
+
+ if (count < 1) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ i += 2;
+ } else if (!strcasecmp(c->argv[i]->ptr, "match") && j >= 2) {
+ pat = c->argv[i+1]->ptr;
+ patlen = sdslen(pat);
+
+ /* The pattern always matches if it is exactly "*", so it is
+ * equivalent to disabling it. */
+ use_pattern = !(patlen == 1 && pat[0] == '*');
+
+ i += 2;
+ } else if (!strcasecmp(c->argv[i]->ptr, "type") && o == NULL && j >= 2) {
+ /* SCAN for a particular type only applies to the db dict */
+ typename = c->argv[i+1]->ptr;
+ type = getObjectTypeByName(typename);
+ if (type == LLONG_MAX) {
+ /* TODO: uncomment in redis 8.0
+ addReplyErrorFormat(c, "unknown type name '%s'", typename);
+ return; */
+ }
+ i+= 2;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* Step 2: Iterate the collection.
+ *
+ * Note that if the object is encoded with a listpack, intset, or any other
+ * representation that is not a hash table, we are sure that it is also
+ * composed of a small number of elements. So to avoid taking state we
+ * just return everything inside the object in a single call, setting the
+ * cursor to zero to signal the end of the iteration. */
+
+ /* Handle the case of a hash table. */
+ ht = NULL;
+ if (o == NULL) {
+ ht = c->db->dict;
+ } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) {
+ ht = o->ptr;
+ } else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) {
+ ht = o->ptr;
+ } else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = o->ptr;
+ ht = zs->dict;
+ }
+
+ list *keys = listCreate();
+ /* Set a free callback for the contents of the collected keys list.
+ * For the main keyspace dict, and when we scan a key that's dict encoded
+ * (we have 'ht'), we don't need to define free method because the strings
+ * in the list are just a shallow copy from the pointer in the dictEntry.
+ * When scanning a key with other encodings (e.g. listpack), we need to
+ * free the temporary strings we add to that list.
+ * The exception to the above is ZSET, where we do allocate temporary
+ * strings even when scanning a dict. */
+ if (o && (!ht || o->type == OBJ_ZSET)) {
+ listSetFreeMethod(keys, (void (*)(void*))sdsfree);
+ }
+
+ if (ht) {
+ /* We set the max number of iterations to ten times the specified
+ * COUNT, so if the hash table is in a pathological state (very
+ * sparsely populated) we avoid to block too much time at the cost
+ * of returning no or very few elements. */
+ long maxiterations = count*10;
+
+ /* We pass scanData which have three pointers to the callback:
+ * 1. data.keys: the list to which it will add new elements;
+ * 2. data.o: the object containing the dictionary so that
+ * it is possible to fetch more data in a type-dependent way;
+ * 3. data.type: the specified type scan in the db, LLONG_MAX means
+ * type matching is no needed;
+ * 4. data.pattern: the pattern string
+ * 5. data.sampled: the maxiteration limit is there in case we're
+ * working on an empty dict, one with a lot of empty buckets, and
+ * for the buckets are not empty, we need to limit the spampled number
+ * to prevent a long hang time caused by filtering too many keys*/
+ scanData data = {
+ .keys = keys,
+ .o = o,
+ .type = type,
+ .pattern = use_pattern ? pat : NULL,
+ .sampled = 0,
+ };
+ do {
+ cursor = dictScan(ht, cursor, scanCallback, &data);
+ } while (cursor && maxiterations-- && data.sampled < count);
+ } else if (o->type == OBJ_SET) {
+ char *str;
+ char buf[LONG_STR_SIZE];
+ size_t len;
+ int64_t llele;
+ setTypeIterator *si = setTypeInitIterator(o);
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ if (str == NULL) {
+ len = ll2string(buf, sizeof(buf), llele);
+ }
+ char *key = str ? str : buf;
+ if (use_pattern && !stringmatchlen(pat, sdslen(pat), key, len, 0)) {
+ continue;
+ }
+ listAddNodeTail(keys, sdsnewlen(key, len));
+ }
+ setTypeReleaseIterator(si);
+ cursor = 0;
+ } else if ((o->type == OBJ_HASH || o->type == OBJ_ZSET) &&
+ o->encoding == OBJ_ENCODING_LISTPACK)
+ {
+ unsigned char *p = lpFirst(o->ptr);
+ unsigned char *str;
+ int64_t len;
+ unsigned char intbuf[LP_INTBUF_SIZE];
+
+ while(p) {
+ str = lpGet(p, &len, intbuf);
+ /* point to the value */
+ p = lpNext(o->ptr, p);
+ if (use_pattern && !stringmatchlen(pat, sdslen(pat), (char *)str, len, 0)) {
+ /* jump to the next key/val pair */
+ p = lpNext(o->ptr, p);
+ continue;
+ }
+ /* add key object */
+ listAddNodeTail(keys, sdsnewlen(str, len));
+ /* add value object */
+ str = lpGet(p, &len, intbuf);
+ listAddNodeTail(keys, sdsnewlen(str, len));
+ p = lpNext(o->ptr, p);
+ }
+ cursor = 0;
+ } else {
+ serverPanic("Not handled encoding in SCAN.");
+ }
+
+ /* Step 3: Filter the expired keys */
+ if (o == NULL && listLength(keys)) {
+ robj kobj;
+ listIter li;
+ listNode *ln;
+ listRewind(keys, &li);
+ while ((ln = listNext(&li))) {
+ sds key = listNodeValue(ln);
+ initStaticStringObject(kobj, key);
+ /* Filter an element if it isn't the type we want. */
+ /* TODO: remove this in redis 8.0 */
+ if (typename) {
+ robj* typecheck = lookupKeyReadWithFlags(c->db, &kobj, LOOKUP_NOTOUCH|LOOKUP_NONOTIFY);
+ if (!typecheck || !objectTypeCompare(typecheck, type)) {
+ listDelNode(keys, ln);
+ }
+ continue;
+ }
+ if (expireIfNeeded(c->db, &kobj, 0)) {
+ listDelNode(keys, ln);
+ }
+ }
+ }
+
+ /* Step 4: Reply to the client. */
+ addReplyArrayLen(c, 2);
+ addReplyBulkLongLong(c,cursor);
+
+ addReplyArrayLen(c, listLength(keys));
+ while ((node = listFirst(keys)) != NULL) {
+ sds key = listNodeValue(node);
+ addReplyBulkCBuffer(c, key, sdslen(key));
+ listDelNode(keys, node);
+ }
+
+ listRelease(keys);
+}
+
+/* The SCAN command completely relies on scanGenericCommand. */
+void scanCommand(client *c) {
+ unsigned long cursor;
+ if (parseScanCursorOrReply(c,c->argv[1],&cursor) == C_ERR) return;
+ scanGenericCommand(c,NULL,cursor);
+}
+
+void dbsizeCommand(client *c) {
+ addReplyLongLong(c,dictSize(c->db->dict));
+}
+
+void lastsaveCommand(client *c) {
+ addReplyLongLong(c,server.lastsave);
+}
+
+void typeCommand(client *c) {
+ robj *o;
+ o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH);
+ addReplyStatus(c, getObjectTypeName(o));
+}
+
+void shutdownCommand(client *c) {
+ int flags = SHUTDOWN_NOFLAGS;
+ int abort = 0;
+ for (int i = 1; i < c->argc; i++) {
+ if (!strcasecmp(c->argv[i]->ptr,"nosave")) {
+ flags |= SHUTDOWN_NOSAVE;
+ } else if (!strcasecmp(c->argv[i]->ptr,"save")) {
+ flags |= SHUTDOWN_SAVE;
+ } else if (!strcasecmp(c->argv[i]->ptr, "now")) {
+ flags |= SHUTDOWN_NOW;
+ } else if (!strcasecmp(c->argv[i]->ptr, "force")) {
+ flags |= SHUTDOWN_FORCE;
+ } else if (!strcasecmp(c->argv[i]->ptr, "abort")) {
+ abort = 1;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+ if ((abort && flags != SHUTDOWN_NOFLAGS) ||
+ (flags & SHUTDOWN_NOSAVE && flags & SHUTDOWN_SAVE))
+ {
+ /* Illegal combo. */
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ if (abort) {
+ if (abortShutdown() == C_OK)
+ addReply(c, shared.ok);
+ else
+ addReplyError(c, "No shutdown in progress.");
+ return;
+ }
+
+ if (!(flags & SHUTDOWN_NOW) && c->flags & CLIENT_DENY_BLOCKING) {
+ addReplyError(c, "SHUTDOWN without NOW or ABORT isn't allowed for DENY BLOCKING client");
+ return;
+ }
+
+ if (!(flags & SHUTDOWN_NOSAVE) && isInsideYieldingLongCommand()) {
+ /* Script timed out. Shutdown allowed only with the NOSAVE flag. See
+ * also processCommand where these errors are returned. */
+ if (server.busy_module_yield_flags && server.busy_module_yield_reply) {
+ addReplyErrorFormat(c, "-BUSY %s", server.busy_module_yield_reply);
+ } else if (server.busy_module_yield_flags) {
+ addReplyErrorObject(c, shared.slowmoduleerr);
+ } else if (scriptIsEval()) {
+ addReplyErrorObject(c, shared.slowevalerr);
+ } else {
+ addReplyErrorObject(c, shared.slowscripterr);
+ }
+ return;
+ }
+
+ blockClientShutdown(c);
+ if (prepareForShutdown(flags) == C_OK) exit(0);
+ /* If we're here, then shutdown is ongoing (the client is still blocked) or
+ * failed (the client has received an error). */
+}
+
+void renameGenericCommand(client *c, int nx) {
+ robj *o;
+ long long expire;
+ int samekey = 0;
+
+ /* When source and dest key is the same, no operation is performed,
+ * if the key exists, however we still return an error on unexisting key. */
+ if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) samekey = 1;
+
+ if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
+ return;
+
+ if (samekey) {
+ addReply(c,nx ? shared.czero : shared.ok);
+ return;
+ }
+
+ incrRefCount(o);
+ expire = getExpire(c->db,c->argv[1]);
+ if (lookupKeyWrite(c->db,c->argv[2]) != NULL) {
+ if (nx) {
+ decrRefCount(o);
+ addReply(c,shared.czero);
+ return;
+ }
+ /* Overwrite: delete the old key before creating the new one
+ * with the same name. */
+ dbDelete(c->db,c->argv[2]);
+ }
+ dbAdd(c->db,c->argv[2],o);
+ if (expire != -1) setExpire(c,c->db,c->argv[2],expire);
+ dbDelete(c->db,c->argv[1]);
+ signalModifiedKey(c,c->db,c->argv[1]);
+ signalModifiedKey(c,c->db,c->argv[2]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_from",
+ c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_to",
+ c->argv[2],c->db->id);
+ server.dirty++;
+ addReply(c,nx ? shared.cone : shared.ok);
+}
+
+void renameCommand(client *c) {
+ renameGenericCommand(c,0);
+}
+
+void renamenxCommand(client *c) {
+ renameGenericCommand(c,1);
+}
+
+void moveCommand(client *c) {
+ robj *o;
+ redisDb *src, *dst;
+ int srcid, dbid;
+ long long expire;
+
+ if (server.cluster_enabled) {
+ addReplyError(c,"MOVE is not allowed in cluster mode");
+ return;
+ }
+
+ /* Obtain source and target DB pointers */
+ src = c->db;
+ srcid = c->db->id;
+
+ if (getIntFromObjectOrReply(c, c->argv[2], &dbid, NULL) != C_OK)
+ return;
+
+ if (selectDb(c,dbid) == C_ERR) {
+ addReplyError(c,"DB index is out of range");
+ return;
+ }
+ dst = c->db;
+ selectDb(c,srcid); /* Back to the source DB */
+
+ /* If the user is moving using as target the same
+ * DB as the source DB it is probably an error. */
+ if (src == dst) {
+ addReplyErrorObject(c,shared.sameobjecterr);
+ return;
+ }
+
+ /* Check if the element exists and get a reference */
+ o = lookupKeyWrite(c->db,c->argv[1]);
+ if (!o) {
+ addReply(c,shared.czero);
+ return;
+ }
+ expire = getExpire(c->db,c->argv[1]);
+
+ /* Return zero if the key already exists in the target DB */
+ if (lookupKeyWrite(dst,c->argv[1]) != NULL) {
+ addReply(c,shared.czero);
+ return;
+ }
+ dbAdd(dst,c->argv[1],o);
+ if (expire != -1) setExpire(c,dst,c->argv[1],expire);
+ incrRefCount(o);
+
+ /* OK! key moved, free the entry in the source DB */
+ dbDelete(src,c->argv[1]);
+ signalModifiedKey(c,src,c->argv[1]);
+ signalModifiedKey(c,dst,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,
+ "move_from",c->argv[1],src->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,
+ "move_to",c->argv[1],dst->id);
+
+ server.dirty++;
+ addReply(c,shared.cone);
+}
+
+void copyCommand(client *c) {
+ robj *o;
+ redisDb *src, *dst;
+ int srcid, dbid;
+ long long expire;
+ int j, replace = 0, delete = 0;
+
+ /* Obtain source and target DB pointers
+ * Default target DB is the same as the source DB
+ * Parse the REPLACE option and targetDB option. */
+ src = c->db;
+ dst = c->db;
+ srcid = c->db->id;
+ dbid = c->db->id;
+ for (j = 3; j < c->argc; j++) {
+ int additional = c->argc - j - 1;
+ if (!strcasecmp(c->argv[j]->ptr,"replace")) {
+ replace = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr, "db") && additional >= 1) {
+ if (getIntFromObjectOrReply(c, c->argv[j+1], &dbid, NULL) != C_OK)
+ return;
+
+ if (selectDb(c, dbid) == C_ERR) {
+ addReplyError(c,"DB index is out of range");
+ return;
+ }
+ dst = c->db;
+ selectDb(c,srcid); /* Back to the source DB */
+ j++; /* Consume additional arg. */
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ if ((server.cluster_enabled == 1) && (srcid != 0 || dbid != 0)) {
+ addReplyError(c,"Copying to another database is not allowed in cluster mode");
+ return;
+ }
+
+ /* If the user select the same DB as
+ * the source DB and using newkey as the same key
+ * it is probably an error. */
+ robj *key = c->argv[1];
+ robj *newkey = c->argv[2];
+ if (src == dst && (sdscmp(key->ptr, newkey->ptr) == 0)) {
+ addReplyErrorObject(c,shared.sameobjecterr);
+ return;
+ }
+
+ /* Check if the element exists and get a reference */
+ o = lookupKeyRead(c->db, key);
+ if (!o) {
+ addReply(c,shared.czero);
+ return;
+ }
+ expire = getExpire(c->db,key);
+
+ /* Return zero if the key already exists in the target DB.
+ * If REPLACE option is selected, delete newkey from targetDB. */
+ if (lookupKeyWrite(dst,newkey) != NULL) {
+ if (replace) {
+ delete = 1;
+ } else {
+ addReply(c,shared.czero);
+ return;
+ }
+ }
+
+ /* Duplicate object according to object's type. */
+ robj *newobj;
+ switch(o->type) {
+ case OBJ_STRING: newobj = dupStringObject(o); break;
+ case OBJ_LIST: newobj = listTypeDup(o); break;
+ case OBJ_SET: newobj = setTypeDup(o); break;
+ case OBJ_ZSET: newobj = zsetDup(o); break;
+ case OBJ_HASH: newobj = hashTypeDup(o); break;
+ case OBJ_STREAM: newobj = streamDup(o); break;
+ case OBJ_MODULE:
+ newobj = moduleTypeDupOrReply(c, key, newkey, dst->id, o);
+ if (!newobj) return;
+ break;
+ default:
+ addReplyError(c, "unknown type object");
+ return;
+ }
+
+ if (delete) {
+ dbDelete(dst,newkey);
+ }
+
+ dbAdd(dst,newkey,newobj);
+ if (expire != -1) setExpire(c, dst, newkey, expire);
+
+ /* OK! key copied */
+ signalModifiedKey(c,dst,c->argv[2]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"copy_to",c->argv[2],dst->id);
+
+ server.dirty++;
+ addReply(c,shared.cone);
+}
+
+/* Helper function for dbSwapDatabases(): scans the list of keys that have
+ * one or more blocked clients for B[LR]POP or other blocking commands
+ * and signal the keys as ready if they are of the right type. See the comment
+ * where the function is used for more info. */
+void scanDatabaseForReadyKeys(redisDb *db) {
+ dictEntry *de;
+ dictIterator *di = dictGetSafeIterator(db->blocking_keys);
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ dictEntry *kde = dictFind(db->dict,key->ptr);
+ if (kde) {
+ robj *value = dictGetVal(kde);
+ signalKeyAsReady(db, key, value->type);
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* Since we are unblocking XREADGROUP clients in the event the
+ * key was deleted/overwritten we must do the same in case the
+ * database was flushed/swapped. */
+void scanDatabaseForDeletedKeys(redisDb *emptied, redisDb *replaced_with) {
+ dictEntry *de;
+ dictIterator *di = dictGetSafeIterator(emptied->blocking_keys);
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ int existed = 0, exists = 0;
+ int original_type = -1, curr_type = -1;
+
+ dictEntry *kde = dictFind(emptied->dict, key->ptr);
+ if (kde) {
+ robj *value = dictGetVal(kde);
+ original_type = value->type;
+ existed = 1;
+ }
+
+ if (replaced_with) {
+ dictEntry *kde = dictFind(replaced_with->dict, key->ptr);
+ if (kde) {
+ robj *value = dictGetVal(kde);
+ curr_type = value->type;
+ exists = 1;
+ }
+ }
+ /* We want to try to unblock any client using a blocking XREADGROUP */
+ if ((existed && !exists) || original_type != curr_type)
+ signalDeletedKeyAsReady(emptied, key, original_type);
+ }
+ dictReleaseIterator(di);
+}
+
+/* Swap two databases at runtime so that all clients will magically see
+ * the new database even if already connected. Note that the client
+ * structure c->db points to a given DB, so we need to be smarter and
+ * swap the underlying referenced structures, otherwise we would need
+ * to fix all the references to the Redis DB structure.
+ *
+ * Returns C_ERR if at least one of the DB ids are out of range, otherwise
+ * C_OK is returned. */
+int dbSwapDatabases(int id1, int id2) {
+ if (id1 < 0 || id1 >= server.dbnum ||
+ id2 < 0 || id2 >= server.dbnum) return C_ERR;
+ if (id1 == id2) return C_OK;
+ redisDb aux = server.db[id1];
+ redisDb *db1 = &server.db[id1], *db2 = &server.db[id2];
+
+ /* Swapdb should make transaction fail if there is any
+ * client watching keys */
+ touchAllWatchedKeysInDb(db1, db2);
+ touchAllWatchedKeysInDb(db2, db1);
+
+ /* Try to unblock any XREADGROUP clients if the key no longer exists. */
+ scanDatabaseForDeletedKeys(db1, db2);
+ scanDatabaseForDeletedKeys(db2, db1);
+
+ /* Swap hash tables. Note that we don't swap blocking_keys,
+ * ready_keys and watched_keys, since we want clients to
+ * remain in the same DB they were. */
+ db1->dict = db2->dict;
+ db1->expires = db2->expires;
+ db1->avg_ttl = db2->avg_ttl;
+ db1->expires_cursor = db2->expires_cursor;
+
+ db2->dict = aux.dict;
+ db2->expires = aux.expires;
+ db2->avg_ttl = aux.avg_ttl;
+ db2->expires_cursor = aux.expires_cursor;
+
+ /* Now we need to handle clients blocked on lists: as an effect
+ * of swapping the two DBs, a client that was waiting for list
+ * X in a given DB, may now actually be unblocked if X happens
+ * to exist in the new version of the DB, after the swap.
+ *
+ * However normally we only do this check for efficiency reasons
+ * in dbAdd() when a list is created. So here we need to rescan
+ * the list of clients blocked on lists and signal lists as ready
+ * if needed. */
+ scanDatabaseForReadyKeys(db1);
+ scanDatabaseForReadyKeys(db2);
+ return C_OK;
+}
+
+/* Logically, this discards (flushes) the old main database, and apply the newly loaded
+ * database (temp) as the main (active) database, the actual freeing of old database
+ * (which will now be placed in the temp one) is done later. */
+void swapMainDbWithTempDb(redisDb *tempDb) {
+ if (server.cluster_enabled) {
+ /* Swap slots_to_keys from tempdb just loaded with main db slots_to_keys. */
+ clusterSlotToKeyMapping *aux = server.db->slots_to_keys;
+ server.db->slots_to_keys = tempDb->slots_to_keys;
+ tempDb->slots_to_keys = aux;
+ }
+
+ for (int i=0; i<server.dbnum; i++) {
+ redisDb aux = server.db[i];
+ redisDb *activedb = &server.db[i], *newdb = &tempDb[i];
+
+ /* Swapping databases should make transaction fail if there is any
+ * client watching keys. */
+ touchAllWatchedKeysInDb(activedb, newdb);
+
+ /* Try to unblock any XREADGROUP clients if the key no longer exists. */
+ scanDatabaseForDeletedKeys(activedb, newdb);
+
+ /* Swap hash tables. Note that we don't swap blocking_keys,
+ * ready_keys and watched_keys, since clients
+ * remain in the same DB they were. */
+ activedb->dict = newdb->dict;
+ activedb->expires = newdb->expires;
+ activedb->avg_ttl = newdb->avg_ttl;
+ activedb->expires_cursor = newdb->expires_cursor;
+
+ newdb->dict = aux.dict;
+ newdb->expires = aux.expires;
+ newdb->avg_ttl = aux.avg_ttl;
+ newdb->expires_cursor = aux.expires_cursor;
+
+ /* Now we need to handle clients blocked on lists: as an effect
+ * of swapping the two DBs, a client that was waiting for list
+ * X in a given DB, may now actually be unblocked if X happens
+ * to exist in the new version of the DB, after the swap.
+ *
+ * However normally we only do this check for efficiency reasons
+ * in dbAdd() when a list is created. So here we need to rescan
+ * the list of clients blocked on lists and signal lists as ready
+ * if needed. */
+ scanDatabaseForReadyKeys(activedb);
+ }
+
+ trackingInvalidateKeysOnFlush(1);
+ flushSlaveKeysWithExpireList();
+}
+
+/* SWAPDB db1 db2 */
+void swapdbCommand(client *c) {
+ int id1, id2;
+
+ /* Not allowed in cluster mode: we have just DB 0 there. */
+ if (server.cluster_enabled) {
+ addReplyError(c,"SWAPDB is not allowed in cluster mode");
+ return;
+ }
+
+ /* Get the two DBs indexes. */
+ if (getIntFromObjectOrReply(c, c->argv[1], &id1,
+ "invalid first DB index") != C_OK)
+ return;
+
+ if (getIntFromObjectOrReply(c, c->argv[2], &id2,
+ "invalid second DB index") != C_OK)
+ return;
+
+ /* Swap... */
+ if (dbSwapDatabases(id1,id2) == C_ERR) {
+ addReplyError(c,"DB index is out of range");
+ return;
+ } else {
+ RedisModuleSwapDbInfo si = {REDISMODULE_SWAPDBINFO_VERSION,id1,id2};
+ moduleFireServerEvent(REDISMODULE_EVENT_SWAPDB,0,&si);
+ server.dirty++;
+ addReply(c,shared.ok);
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ * Expires API
+ *----------------------------------------------------------------------------*/
+
+int removeExpire(redisDb *db, robj *key) {
+ return dictDelete(db->expires,key->ptr) == DICT_OK;
+}
+
+/* Set an expire to the specified key. If the expire is set in the context
+ * of an user calling a command 'c' is the client, otherwise 'c' is set
+ * to NULL. The 'when' parameter is the absolute unix time in milliseconds
+ * after which the key will no longer be considered valid. */
+void setExpire(client *c, redisDb *db, robj *key, long long when) {
+ dictEntry *kde, *de;
+
+ /* Reuse the sds from the main dict in the expire dict */
+ kde = dictFind(db->dict,key->ptr);
+ serverAssertWithInfo(NULL,key,kde != NULL);
+ de = dictAddOrFind(db->expires,dictGetKey(kde));
+ dictSetSignedIntegerVal(de,when);
+
+ int writable_slave = server.masterhost && server.repl_slave_ro == 0;
+ if (c && writable_slave && !(c->flags & CLIENT_MASTER))
+ rememberSlaveKeyWithExpire(db,key);
+}
+
+/* Return the expire time of the specified key, or -1 if no expire
+ * is associated with this key (i.e. the key is non volatile) */
+long long getExpire(redisDb *db, robj *key) {
+ dictEntry *de;
+
+ /* No expire? return ASAP */
+ if (dictSize(db->expires) == 0 ||
+ (de = dictFind(db->expires,key->ptr)) == NULL) return -1;
+
+ return dictGetSignedIntegerVal(de);
+}
+
+/* Delete the specified expired key and propagate expire. */
+void deleteExpiredKeyAndPropagate(redisDb *db, robj *keyobj) {
+ mstime_t expire_latency;
+ latencyStartMonitor(expire_latency);
+ dbGenericDelete(db,keyobj,server.lazyfree_lazy_expire,DB_FLAG_KEY_EXPIRED);
+ latencyEndMonitor(expire_latency);
+ latencyAddSampleIfNeeded("expire-del",expire_latency);
+ notifyKeyspaceEvent(NOTIFY_EXPIRED,"expired",keyobj,db->id);
+ signalModifiedKey(NULL, db, keyobj);
+ propagateDeletion(db,keyobj,server.lazyfree_lazy_expire);
+ server.stat_expiredkeys++;
+}
+
+/* Propagate expires into slaves and the AOF file.
+ * When a key expires in the master, a DEL operation for this key is sent
+ * to all the slaves and the AOF file if enabled.
+ *
+ * This way the key expiry is centralized in one place, and since both
+ * AOF and the master->slave link guarantee operation ordering, everything
+ * will be consistent even if we allow write operations against expiring
+ * keys.
+ *
+ * This function may be called from:
+ * 1. Within call(): Example: Lazy-expire on key access.
+ * In this case the caller doesn't have to do anything
+ * because call() handles server.also_propagate(); or
+ * 2. Outside of call(): Example: Active-expire, eviction.
+ * In this the caller must remember to call
+ * postExecutionUnitOperations, preferably just after a
+ * single deletion batch, so that DELs will NOT be wrapped
+ * in MULTI/EXEC */
+void propagateDeletion(redisDb *db, robj *key, int lazy) {
+ robj *argv[2];
+
+ argv[0] = lazy ? shared.unlink : shared.del;
+ argv[1] = key;
+ incrRefCount(argv[0]);
+ incrRefCount(argv[1]);
+
+ /* If the master decided to expire a key we must propagate it to replicas no matter what..
+ * Even if module executed a command without asking for propagation. */
+ int prev_replication_allowed = server.replication_allowed;
+ server.replication_allowed = 1;
+ alsoPropagate(db->id,argv,2,PROPAGATE_AOF|PROPAGATE_REPL);
+ server.replication_allowed = prev_replication_allowed;
+
+ decrRefCount(argv[0]);
+ decrRefCount(argv[1]);
+}
+
+/* Check if the key is expired. */
+int keyIsExpired(redisDb *db, robj *key) {
+ /* Don't expire anything while loading. It will be done later. */
+ if (server.loading) return 0;
+
+ mstime_t when = getExpire(db,key);
+ mstime_t now;
+
+ if (when < 0) return 0; /* No expire for this key */
+
+ now = commandTimeSnapshot();
+
+ /* The key expired if the current (virtual or real) time is greater
+ * than the expire time of the key. */
+ return now > when;
+}
+
+/* This function is called when we are going to perform some operation
+ * in a given key, but such key may be already logically expired even if
+ * it still exists in the database. The main way this function is called
+ * is via lookupKey*() family of functions.
+ *
+ * The behavior of the function depends on the replication role of the
+ * instance, because by default replicas do not delete expired keys. They
+ * wait for DELs from the master for consistency matters. However even
+ * replicas will try to have a coherent return value for the function,
+ * so that read commands executed in the replica side will be able to
+ * behave like if the key is expired even if still present (because the
+ * master has yet to propagate the DEL).
+ *
+ * In masters as a side effect of finding a key which is expired, such
+ * key will be evicted from the database. Also this may trigger the
+ * propagation of a DEL/UNLINK command in AOF / replication stream.
+ *
+ * On replicas, this function does not delete expired keys by default, but
+ * it still returns 1 if the key is logically expired. To force deletion
+ * of logically expired keys even on replicas, use the EXPIRE_FORCE_DELETE_EXPIRED
+ * flag. Note though that if the current client is executing
+ * replicated commands from the master, keys are never considered expired.
+ *
+ * On the other hand, if you just want expiration check, but need to avoid
+ * the actual key deletion and propagation of the deletion, use the
+ * EXPIRE_AVOID_DELETE_EXPIRED flag.
+ *
+ * The return value of the function is 0 if the key is still valid,
+ * otherwise the function returns 1 if the key is expired. */
+int expireIfNeeded(redisDb *db, robj *key, int flags) {
+ if (server.lazy_expire_disabled) return 0;
+ if (!keyIsExpired(db,key)) return 0;
+
+ /* If we are running in the context of a replica, instead of
+ * evicting the expired key from the database, we return ASAP:
+ * the replica key expiration is controlled by the master that will
+ * send us synthesized DEL operations for expired keys. The
+ * exception is when write operations are performed on writable
+ * replicas.
+ *
+ * Still we try to return the right information to the caller,
+ * that is, 0 if we think the key should be still valid, 1 if
+ * we think the key is expired at this time.
+ *
+ * When replicating commands from the master, keys are never considered
+ * expired. */
+ if (server.masterhost != NULL) {
+ if (server.current_client && (server.current_client->flags & CLIENT_MASTER)) return 0;
+ if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return 1;
+ }
+
+ /* In some cases we're explicitly instructed to return an indication of a
+ * missing key without actually deleting it, even on masters. */
+ if (flags & EXPIRE_AVOID_DELETE_EXPIRED)
+ return 1;
+
+ /* If 'expire' action is paused, for whatever reason, then don't expire any key.
+ * Typically, at the end of the pause we will properly expire the key OR we
+ * will have failed over and the new primary will send us the expire. */
+ if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return 1;
+
+ /* The key needs to be converted from static to heap before deleted */
+ int static_key = key->refcount == OBJ_STATIC_REFCOUNT;
+ if (static_key) {
+ key = createStringObject(key->ptr, sdslen(key->ptr));
+ }
+ /* Delete the key */
+ deleteExpiredKeyAndPropagate(db,key);
+ if (static_key) {
+ decrRefCount(key);
+ }
+ return 1;
+}
+
+/* -----------------------------------------------------------------------------
+ * API to get key arguments from commands
+ * ---------------------------------------------------------------------------*/
+
+/* Prepare the getKeysResult struct to hold numkeys, either by using the
+ * pre-allocated keysbuf or by allocating a new array on the heap.
+ *
+ * This function must be called at least once before starting to populate
+ * the result, and can be called repeatedly to enlarge the result array.
+ */
+keyReference *getKeysPrepareResult(getKeysResult *result, int numkeys) {
+ /* GETKEYS_RESULT_INIT initializes keys to NULL, point it to the pre-allocated stack
+ * buffer here. */
+ if (!result->keys) {
+ serverAssert(!result->numkeys);
+ result->keys = result->keysbuf;
+ }
+
+ /* Resize if necessary */
+ if (numkeys > result->size) {
+ if (result->keys != result->keysbuf) {
+ /* We're not using a static buffer, just (re)alloc */
+ result->keys = zrealloc(result->keys, numkeys * sizeof(keyReference));
+ } else {
+ /* We are using a static buffer, copy its contents */
+ result->keys = zmalloc(numkeys * sizeof(keyReference));
+ if (result->numkeys)
+ memcpy(result->keys, result->keysbuf, result->numkeys * sizeof(keyReference));
+ }
+ result->size = numkeys;
+ }
+
+ return result->keys;
+}
+
+/* Returns a bitmask with all the flags found in any of the key specs of the command.
+ * The 'inv' argument means we'll return a mask with all flags that are missing in at least one spec. */
+int64_t getAllKeySpecsFlags(struct redisCommand *cmd, int inv) {
+ int64_t flags = 0;
+ for (int j = 0; j < cmd->key_specs_num; j++) {
+ keySpec *spec = cmd->key_specs + j;
+ flags |= inv? ~spec->flags : spec->flags;
+ }
+ return flags;
+}
+
+/* Fetch the keys based of the provided key specs. Returns the number of keys found, or -1 on error.
+ * There are several flags that can be used to modify how this function finds keys in a command.
+ *
+ * GET_KEYSPEC_INCLUDE_NOT_KEYS: Return 'fake' keys as if they were keys.
+ * GET_KEYSPEC_RETURN_PARTIAL: Skips invalid and incomplete keyspecs but returns the keys
+ * found in other valid keyspecs.
+ */
+int getKeysUsingKeySpecs(struct redisCommand *cmd, robj **argv, int argc, int search_flags, getKeysResult *result) {
+ int j, i, last, first, step;
+ keyReference *keys;
+ serverAssert(result->numkeys == 0); /* caller should initialize or reset it */
+
+ for (j = 0; j < cmd->key_specs_num; j++) {
+ keySpec *spec = cmd->key_specs + j;
+ serverAssert(spec->begin_search_type != KSPEC_BS_INVALID);
+ /* Skip specs that represent 'fake' keys */
+ if ((spec->flags & CMD_KEY_NOT_KEY) && !(search_flags & GET_KEYSPEC_INCLUDE_NOT_KEYS)) {
+ continue;
+ }
+
+ first = 0;
+ if (spec->begin_search_type == KSPEC_BS_INDEX) {
+ first = spec->bs.index.pos;
+ } else if (spec->begin_search_type == KSPEC_BS_KEYWORD) {
+ int start_index = spec->bs.keyword.startfrom > 0 ? spec->bs.keyword.startfrom : argc+spec->bs.keyword.startfrom;
+ int end_index = spec->bs.keyword.startfrom > 0 ? argc-1: 1;
+ for (i = start_index; i != end_index; i = start_index <= end_index ? i + 1 : i - 1) {
+ if (i >= argc || i < 1)
+ break;
+ if (!strcasecmp((char*)argv[i]->ptr,spec->bs.keyword.keyword)) {
+ first = i+1;
+ break;
+ }
+ }
+ /* keyword not found */
+ if (!first) {
+ continue;
+ }
+ } else {
+ /* unknown spec */
+ goto invalid_spec;
+ }
+
+ if (spec->find_keys_type == KSPEC_FK_RANGE) {
+ step = spec->fk.range.keystep;
+ if (spec->fk.range.lastkey >= 0) {
+ last = first + spec->fk.range.lastkey;
+ } else {
+ if (!spec->fk.range.limit) {
+ last = argc + spec->fk.range.lastkey;
+ } else {
+ serverAssert(spec->fk.range.lastkey == -1);
+ last = first + ((argc-first)/spec->fk.range.limit + spec->fk.range.lastkey);
+ }
+ }
+ } else if (spec->find_keys_type == KSPEC_FK_KEYNUM) {
+ step = spec->fk.keynum.keystep;
+ long long numkeys;
+ if (spec->fk.keynum.keynumidx >= argc)
+ goto invalid_spec;
+
+ sds keynum_str = argv[first + spec->fk.keynum.keynumidx]->ptr;
+ if (!string2ll(keynum_str,sdslen(keynum_str),&numkeys) || numkeys < 0) {
+ /* Unable to parse the numkeys argument or it was invalid */
+ goto invalid_spec;
+ }
+
+ first += spec->fk.keynum.firstkey;
+ last = first + (int)numkeys-1;
+ } else {
+ /* unknown spec */
+ goto invalid_spec;
+ }
+
+ int count = ((last - first)+1);
+ keys = getKeysPrepareResult(result, result->numkeys + count);
+
+ /* First or last is out of bounds, which indicates a syntax error */
+ if (last >= argc || last < first || first >= argc) {
+ goto invalid_spec;
+ }
+
+ for (i = first; i <= last; i += step) {
+ if (i >= argc || i < first) {
+ /* Modules commands, and standard commands with a not fixed number
+ * of arguments (negative arity parameter) do not have dispatch
+ * time arity checks, so we need to handle the case where the user
+ * passed an invalid number of arguments here. In this case we
+ * return no keys and expect the command implementation to report
+ * an arity or syntax error. */
+ if (cmd->flags & CMD_MODULE || cmd->arity < 0) {
+ continue;
+ } else {
+ serverPanic("Redis built-in command declared keys positions not matching the arity requirements.");
+ }
+ }
+ keys[result->numkeys].pos = i;
+ keys[result->numkeys].flags = spec->flags;
+ result->numkeys++;
+ }
+
+ /* Handle incomplete specs (only after we added the current spec
+ * to `keys`, just in case GET_KEYSPEC_RETURN_PARTIAL was given) */
+ if (spec->flags & CMD_KEY_INCOMPLETE) {
+ goto invalid_spec;
+ }
+
+ /* Done with this spec */
+ continue;
+
+invalid_spec:
+ if (search_flags & GET_KEYSPEC_RETURN_PARTIAL) {
+ continue;
+ } else {
+ result->numkeys = 0;
+ return -1;
+ }
+ }
+
+ return result->numkeys;
+}
+
+/* Return all the arguments that are keys in the command passed via argc / argv.
+ * This function will eventually replace getKeysFromCommand.
+ *
+ * The command returns the positions of all the key arguments inside the array,
+ * so the actual return value is a heap allocated array of integers. The
+ * length of the array is returned by reference into *numkeys.
+ *
+ * Along with the position, this command also returns the flags that are
+ * associated with how Redis will access the key.
+ *
+ * 'cmd' must be point to the corresponding entry into the redisCommand
+ * table, according to the command name in argv[0]. */
+int getKeysFromCommandWithSpecs(struct redisCommand *cmd, robj **argv, int argc, int search_flags, getKeysResult *result) {
+ /* The command has at least one key-spec not marked as NOT_KEY */
+ int has_keyspec = (getAllKeySpecsFlags(cmd, 1) & CMD_KEY_NOT_KEY);
+ /* The command has at least one key-spec marked as VARIABLE_FLAGS */
+ int has_varflags = (getAllKeySpecsFlags(cmd, 0) & CMD_KEY_VARIABLE_FLAGS);
+
+ /* We prefer key-specs if there are any, and their flags are reliable. */
+ if (has_keyspec && !has_varflags) {
+ int ret = getKeysUsingKeySpecs(cmd,argv,argc,search_flags,result);
+ if (ret >= 0)
+ return ret;
+ /* If the specs returned with an error (probably an INVALID or INCOMPLETE spec),
+ * fallback to the callback method. */
+ }
+
+ /* Resort to getkeys callback methods. */
+ if (cmd->flags & CMD_MODULE_GETKEYS)
+ return moduleGetCommandKeysViaAPI(cmd,argv,argc,result);
+
+ /* We use native getkeys as a last resort, since not all these native getkeys provide
+ * flags properly (only the ones that correspond to INVALID, INCOMPLETE or VARIABLE_FLAGS do.*/
+ if (cmd->getkeys_proc)
+ return cmd->getkeys_proc(cmd,argv,argc,result);
+ return 0;
+}
+
+/* This function returns a sanity check if the command may have keys. */
+int doesCommandHaveKeys(struct redisCommand *cmd) {
+ return cmd->getkeys_proc || /* has getkeys_proc (non modules) */
+ (cmd->flags & CMD_MODULE_GETKEYS) || /* module with GETKEYS */
+ (getAllKeySpecsFlags(cmd, 1) & CMD_KEY_NOT_KEY); /* has at least one key-spec not marked as NOT_KEY */
+}
+
+/* A simplified channel spec table that contains all of the redis commands
+ * and which channels they have and how they are accessed. */
+typedef struct ChannelSpecs {
+ redisCommandProc *proc; /* Command procedure to match against */
+ uint64_t flags; /* CMD_CHANNEL_* flags for this command */
+ int start; /* The initial position of the first channel */
+ int count; /* The number of channels, or -1 if all remaining
+ * arguments are channels. */
+} ChannelSpecs;
+
+ChannelSpecs commands_with_channels[] = {
+ {subscribeCommand, CMD_CHANNEL_SUBSCRIBE, 1, -1},
+ {ssubscribeCommand, CMD_CHANNEL_SUBSCRIBE, 1, -1},
+ {unsubscribeCommand, CMD_CHANNEL_UNSUBSCRIBE, 1, -1},
+ {sunsubscribeCommand, CMD_CHANNEL_UNSUBSCRIBE, 1, -1},
+ {psubscribeCommand, CMD_CHANNEL_PATTERN | CMD_CHANNEL_SUBSCRIBE, 1, -1},
+ {punsubscribeCommand, CMD_CHANNEL_PATTERN | CMD_CHANNEL_UNSUBSCRIBE, 1, -1},
+ {publishCommand, CMD_CHANNEL_PUBLISH, 1, 1},
+ {spublishCommand, CMD_CHANNEL_PUBLISH, 1, 1},
+ {NULL,0} /* Terminator. */
+};
+
+/* Returns 1 if the command may access any channels matched by the flags
+ * argument. */
+int doesCommandHaveChannelsWithFlags(struct redisCommand *cmd, int flags) {
+ /* If a module declares get channels, we are just going to assume
+ * has channels. This API is allowed to return false positives. */
+ if (cmd->flags & CMD_MODULE_GETCHANNELS) {
+ return 1;
+ }
+ for (ChannelSpecs *spec = commands_with_channels; spec->proc != NULL; spec += 1) {
+ if (cmd->proc == spec->proc) {
+ return !!(spec->flags & flags);
+ }
+ }
+ return 0;
+}
+
+/* Return all the arguments that are channels in the command passed via argc / argv.
+ * This function behaves similar to getKeysFromCommandWithSpecs, but with channels
+ * instead of keys.
+ *
+ * The command returns the positions of all the channel arguments inside the array,
+ * so the actual return value is a heap allocated array of integers. The
+ * length of the array is returned by reference into *numkeys.
+ *
+ * Along with the position, this command also returns the flags that are
+ * associated with how Redis will access the channel.
+ *
+ * 'cmd' must be point to the corresponding entry into the redisCommand
+ * table, according to the command name in argv[0]. */
+int getChannelsFromCommand(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ keyReference *keys;
+ /* If a module declares get channels, use that. */
+ if (cmd->flags & CMD_MODULE_GETCHANNELS) {
+ return moduleGetCommandChannelsViaAPI(cmd, argv, argc, result);
+ }
+ /* Otherwise check the channel spec table */
+ for (ChannelSpecs *spec = commands_with_channels; spec != NULL; spec += 1) {
+ if (cmd->proc == spec->proc) {
+ int start = spec->start;
+ int stop = (spec->count == -1) ? argc : start + spec->count;
+ if (stop > argc) stop = argc;
+ int count = 0;
+ keys = getKeysPrepareResult(result, stop - start);
+ for (int i = start; i < stop; i++ ) {
+ keys[count].pos = i;
+ keys[count++].flags = spec->flags;
+ }
+ result->numkeys = count;
+ return count;
+ }
+ }
+ return 0;
+}
+
+/* The base case is to use the keys position as given in the command table
+ * (firstkey, lastkey, step).
+ * This function works only on command with the legacy_range_key_spec,
+ * all other commands should be handled by getkeys_proc.
+ *
+ * If the commands keyspec is incomplete, no keys will be returned, and the provided
+ * keys function should be called instead.
+ *
+ * NOTE: This function does not guarantee populating the flags for
+ * the keys, in order to get flags you should use getKeysUsingKeySpecs. */
+int getKeysUsingLegacyRangeSpec(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ int j, i = 0, last, first, step;
+ keyReference *keys;
+ UNUSED(argv);
+
+ if (cmd->legacy_range_key_spec.begin_search_type == KSPEC_BS_INVALID) {
+ result->numkeys = 0;
+ return 0;
+ }
+
+ first = cmd->legacy_range_key_spec.bs.index.pos;
+ last = cmd->legacy_range_key_spec.fk.range.lastkey;
+ if (last >= 0)
+ last += first;
+ step = cmd->legacy_range_key_spec.fk.range.keystep;
+
+ if (last < 0) last = argc+last;
+
+ int count = ((last - first)+1);
+ keys = getKeysPrepareResult(result, count);
+
+ for (j = first; j <= last; j += step) {
+ if (j >= argc || j < first) {
+ /* Modules commands, and standard commands with a not fixed number
+ * of arguments (negative arity parameter) do not have dispatch
+ * time arity checks, so we need to handle the case where the user
+ * passed an invalid number of arguments here. In this case we
+ * return no keys and expect the command implementation to report
+ * an arity or syntax error. */
+ if (cmd->flags & CMD_MODULE || cmd->arity < 0) {
+ result->numkeys = 0;
+ return 0;
+ } else {
+ serverPanic("Redis built-in command declared keys positions not matching the arity requirements.");
+ }
+ }
+ keys[i].pos = j;
+ /* Flags are omitted from legacy key specs */
+ keys[i++].flags = 0;
+ }
+ result->numkeys = i;
+ return i;
+}
+
+/* Return all the arguments that are keys in the command passed via argc / argv.
+ *
+ * The command returns the positions of all the key arguments inside the array,
+ * so the actual return value is a heap allocated array of integers. The
+ * length of the array is returned by reference into *numkeys.
+ *
+ * 'cmd' must be point to the corresponding entry into the redisCommand
+ * table, according to the command name in argv[0].
+ *
+ * This function uses the command table if a command-specific helper function
+ * is not required, otherwise it calls the command-specific function. */
+int getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ if (cmd->flags & CMD_MODULE_GETKEYS) {
+ return moduleGetCommandKeysViaAPI(cmd,argv,argc,result);
+ } else if (cmd->getkeys_proc) {
+ return cmd->getkeys_proc(cmd,argv,argc,result);
+ } else {
+ return getKeysUsingLegacyRangeSpec(cmd,argv,argc,result);
+ }
+}
+
+/* Free the result of getKeysFromCommand. */
+void getKeysFreeResult(getKeysResult *result) {
+ if (result && result->keys != result->keysbuf)
+ zfree(result->keys);
+}
+
+/* Helper function to extract keys from following commands:
+ * COMMAND [destkey] <num-keys> <key> [...] <key> [...] ... <options>
+ *
+ * eg:
+ * ZUNION <num-keys> <key> <key> ... <key> <options>
+ * ZUNIONSTORE <destkey> <num-keys> <key> <key> ... <key> <options>
+ *
+ * 'storeKeyOfs': destkey index, 0 means destkey not exists.
+ * 'keyCountOfs': num-keys index.
+ * 'firstKeyOfs': firstkey index.
+ * 'keyStep': the interval of each key, usually this value is 1.
+ *
+ * The commands using this function have a fully defined keyspec, so returning flags isn't needed. */
+int genericGetKeys(int storeKeyOfs, int keyCountOfs, int firstKeyOfs, int keyStep,
+ robj **argv, int argc, getKeysResult *result) {
+ int i, num;
+ keyReference *keys;
+
+ num = atoi(argv[keyCountOfs]->ptr);
+ /* Sanity check. Don't return any key if the command is going to
+ * reply with syntax error. (no input keys). */
+ if (num < 1 || num > (argc - firstKeyOfs)/keyStep) {
+ result->numkeys = 0;
+ return 0;
+ }
+
+ int numkeys = storeKeyOfs ? num + 1 : num;
+ keys = getKeysPrepareResult(result, numkeys);
+ result->numkeys = numkeys;
+
+ /* Add all key positions for argv[firstKeyOfs...n] to keys[] */
+ for (i = 0; i < num; i++) {
+ keys[i].pos = firstKeyOfs+(i*keyStep);
+ keys[i].flags = 0;
+ }
+
+ if (storeKeyOfs) {
+ keys[num].pos = storeKeyOfs;
+ keys[num].flags = 0;
+ }
+ return result->numkeys;
+}
+
+int sintercardGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ UNUSED(cmd);
+ return genericGetKeys(0, 1, 2, 1, argv, argc, result);
+}
+
+int zunionInterDiffStoreGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ UNUSED(cmd);
+ return genericGetKeys(1, 2, 3, 1, argv, argc, result);
+}
+
+int zunionInterDiffGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ UNUSED(cmd);
+ return genericGetKeys(0, 1, 2, 1, argv, argc, result);
+}
+
+int evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ UNUSED(cmd);
+ return genericGetKeys(0, 2, 3, 1, argv, argc, result);
+}
+
+int functionGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ UNUSED(cmd);
+ return genericGetKeys(0, 2, 3, 1, argv, argc, result);
+}
+
+int lmpopGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ UNUSED(cmd);
+ return genericGetKeys(0, 1, 2, 1, argv, argc, result);
+}
+
+int blmpopGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ UNUSED(cmd);
+ return genericGetKeys(0, 2, 3, 1, argv, argc, result);
+}
+
+int zmpopGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ UNUSED(cmd);
+ return genericGetKeys(0, 1, 2, 1, argv, argc, result);
+}
+
+int bzmpopGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ UNUSED(cmd);
+ return genericGetKeys(0, 2, 3, 1, argv, argc, result);
+}
+
+/* Helper function to extract keys from the SORT RO command.
+ *
+ * SORT <sort-key>
+ *
+ * The second argument of SORT is always a key, however an arbitrary number of
+ * keys may be accessed while doing the sort (the BY and GET args), so the
+ * key-spec declares incomplete keys which is why we have to provide a concrete
+ * implementation to fetch the keys.
+ *
+ * This command declares incomplete keys, so the flags are correctly set for this function */
+int sortROGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ keyReference *keys;
+ UNUSED(cmd);
+ UNUSED(argv);
+ UNUSED(argc);
+
+ keys = getKeysPrepareResult(result, 1);
+ keys[0].pos = 1; /* <sort-key> is always present. */
+ keys[0].flags = CMD_KEY_RO | CMD_KEY_ACCESS;
+ result->numkeys = 1;
+ return result->numkeys;
+}
+
+/* Helper function to extract keys from the SORT command.
+ *
+ * SORT <sort-key> ... STORE <store-key> ...
+ *
+ * The first argument of SORT is always a key, however a list of options
+ * follow in SQL-alike style. Here we parse just the minimum in order to
+ * correctly identify keys in the "STORE" option.
+ *
+ * This command declares incomplete keys, so the flags are correctly set for this function */
+int sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ int i, j, num, found_store = 0;
+ keyReference *keys;
+ UNUSED(cmd);
+
+ num = 0;
+ keys = getKeysPrepareResult(result, 2); /* Alloc 2 places for the worst case. */
+ keys[num].pos = 1; /* <sort-key> is always present. */
+ keys[num++].flags = CMD_KEY_RO | CMD_KEY_ACCESS;
+
+ /* Search for STORE option. By default we consider options to don't
+ * have arguments, so if we find an unknown option name we scan the
+ * next. However there are options with 1 or 2 arguments, so we
+ * provide a list here in order to skip the right number of args. */
+ struct {
+ char *name;
+ int skip;
+ } skiplist[] = {
+ {"limit", 2},
+ {"get", 1},
+ {"by", 1},
+ {NULL, 0} /* End of elements. */
+ };
+
+ for (i = 2; i < argc; i++) {
+ for (j = 0; skiplist[j].name != NULL; j++) {
+ if (!strcasecmp(argv[i]->ptr,skiplist[j].name)) {
+ i += skiplist[j].skip;
+ break;
+ } else if (!strcasecmp(argv[i]->ptr,"store") && i+1 < argc) {
+ /* Note: we don't increment "num" here and continue the loop
+ * to be sure to process the *last* "STORE" option if multiple
+ * ones are provided. This is same behavior as SORT. */
+ found_store = 1;
+ keys[num].pos = i+1; /* <store-key> */
+ keys[num].flags = CMD_KEY_OW | CMD_KEY_UPDATE;
+ break;
+ }
+ }
+ }
+ result->numkeys = num + found_store;
+ return result->numkeys;
+}
+
+/* This command declares incomplete keys, so the flags are correctly set for this function */
+int migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ int i, j, num, first;
+ keyReference *keys;
+ UNUSED(cmd);
+
+ /* Assume the obvious form. */
+ first = 3;
+ num = 1;
+
+ /* But check for the extended one with the KEYS option. */
+ struct {
+ char* name;
+ int skip;
+ } skip_keywords[] = {
+ {"copy", 0},
+ {"replace", 0},
+ {"auth", 1},
+ {"auth2", 2},
+ {NULL, 0}
+ };
+ if (argc > 6) {
+ for (i = 6; i < argc; i++) {
+ if (!strcasecmp(argv[i]->ptr, "keys")) {
+ if (sdslen(argv[3]->ptr) > 0) {
+ /* This is a syntax error. So ignore the keys and leave
+ * the syntax error to be handled by migrateCommand. */
+ num = 0;
+ } else {
+ first = i + 1;
+ num = argc - first;
+ }
+ break;
+ }
+ for (j = 0; skip_keywords[j].name != NULL; j++) {
+ if (!strcasecmp(argv[i]->ptr, skip_keywords[j].name)) {
+ i += skip_keywords[j].skip;
+ break;
+ }
+ }
+ }
+ }
+
+ keys = getKeysPrepareResult(result, num);
+ for (i = 0; i < num; i++) {
+ keys[i].pos = first+i;
+ keys[i].flags = CMD_KEY_RW | CMD_KEY_ACCESS | CMD_KEY_DELETE;
+ }
+ result->numkeys = num;
+ return num;
+}
+
+/* Helper function to extract keys from following commands:
+ * GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC]
+ * [COUNT count] [STORE key|STOREDIST key]
+ * GEORADIUSBYMEMBER key member radius unit ... options ...
+ *
+ * This command has a fully defined keyspec, so returning flags isn't needed. */
+int georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ int i, num;
+ keyReference *keys;
+ UNUSED(cmd);
+
+ /* Check for the presence of the stored key in the command */
+ int stored_key = -1;
+ for (i = 5; i < argc; i++) {
+ char *arg = argv[i]->ptr;
+ /* For the case when user specifies both "store" and "storedist" options, the
+ * second key specified would override the first key. This behavior is kept
+ * the same as in georadiusCommand method.
+ */
+ if ((!strcasecmp(arg, "store") || !strcasecmp(arg, "storedist")) && ((i+1) < argc)) {
+ stored_key = i+1;
+ i++;
+ }
+ }
+ num = 1 + (stored_key == -1 ? 0 : 1);
+
+ /* Keys in the command come from two places:
+ * argv[1] = key,
+ * argv[5...n] = stored key if present
+ */
+ keys = getKeysPrepareResult(result, num);
+
+ /* Add all key positions to keys[] */
+ keys[0].pos = 1;
+ keys[0].flags = 0;
+ if(num > 1) {
+ keys[1].pos = stored_key;
+ keys[1].flags = 0;
+ }
+ result->numkeys = num;
+ return num;
+}
+
+/* XREAD [BLOCK <milliseconds>] [COUNT <count>] [GROUP <groupname> <ttl>]
+ * STREAMS key_1 key_2 ... key_N ID_1 ID_2 ... ID_N
+ *
+ * This command has a fully defined keyspec, so returning flags isn't needed. */
+int xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ int i, num = 0;
+ keyReference *keys;
+ UNUSED(cmd);
+
+ /* We need to parse the options of the command in order to seek the first
+ * "STREAMS" string which is actually the option. This is needed because
+ * "STREAMS" could also be the name of the consumer group and even the
+ * name of the stream key. */
+ int streams_pos = -1;
+ for (i = 1; i < argc; i++) {
+ char *arg = argv[i]->ptr;
+ if (!strcasecmp(arg, "block")) {
+ i++; /* Skip option argument. */
+ } else if (!strcasecmp(arg, "count")) {
+ i++; /* Skip option argument. */
+ } else if (!strcasecmp(arg, "group")) {
+ i += 2; /* Skip option argument. */
+ } else if (!strcasecmp(arg, "noack")) {
+ /* Nothing to do. */
+ } else if (!strcasecmp(arg, "streams")) {
+ streams_pos = i;
+ break;
+ } else {
+ break; /* Syntax error. */
+ }
+ }
+ if (streams_pos != -1) num = argc - streams_pos - 1;
+
+ /* Syntax error. */
+ if (streams_pos == -1 || num == 0 || num % 2 != 0) {
+ result->numkeys = 0;
+ return 0;
+ }
+ num /= 2; /* We have half the keys as there are arguments because
+ there are also the IDs, one per key. */
+
+ keys = getKeysPrepareResult(result, num);
+ for (i = streams_pos+1; i < argc-num; i++) {
+ keys[i-streams_pos-1].pos = i;
+ keys[i-streams_pos-1].flags = 0;
+ }
+ result->numkeys = num;
+ return num;
+}
+
+/* Helper function to extract keys from the SET command, which may have
+ * a read flag if the GET argument is passed in. */
+int setGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ keyReference *keys;
+ UNUSED(cmd);
+
+ keys = getKeysPrepareResult(result, 1);
+ keys[0].pos = 1; /* We always know the position */
+ result->numkeys = 1;
+
+ for (int i = 3; i < argc; i++) {
+ char *arg = argv[i]->ptr;
+ if ((arg[0] == 'g' || arg[0] == 'G') &&
+ (arg[1] == 'e' || arg[1] == 'E') &&
+ (arg[2] == 't' || arg[2] == 'T') && arg[3] == '\0')
+ {
+ keys[0].flags = CMD_KEY_RW | CMD_KEY_ACCESS | CMD_KEY_UPDATE;
+ return 1;
+ }
+ }
+
+ keys[0].flags = CMD_KEY_OW | CMD_KEY_UPDATE;
+ return 1;
+}
+
+/* Helper function to extract keys from the BITFIELD command, which may be
+ * read-only if the BITFIELD GET subcommand is used. */
+int bitfieldGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ keyReference *keys;
+ int readonly = 1;
+ UNUSED(cmd);
+
+ keys = getKeysPrepareResult(result, 1);
+ keys[0].pos = 1; /* We always know the position */
+ result->numkeys = 1;
+
+ for (int i = 2; i < argc; i++) {
+ int remargs = argc - i - 1; /* Remaining args other than current. */
+ char *arg = argv[i]->ptr;
+ if (!strcasecmp(arg, "get") && remargs >= 2) {
+ i += 2;
+ } else if ((!strcasecmp(arg, "set") || !strcasecmp(arg, "incrby")) && remargs >= 3) {
+ readonly = 0;
+ i += 3;
+ break;
+ } else if (!strcasecmp(arg, "overflow") && remargs >= 1) {
+ i += 1;
+ } else {
+ readonly = 0; /* Syntax error. safer to assume non-RO. */
+ break;
+ }
+ }
+
+ if (readonly) {
+ keys[0].flags = CMD_KEY_RO | CMD_KEY_ACCESS;
+ } else {
+ keys[0].flags = CMD_KEY_RW | CMD_KEY_ACCESS | CMD_KEY_UPDATE;
+ }
+ return 1;
+}
diff --git a/src/debug.c b/src/debug.c
new file mode 100644
index 0000000..684f692
--- /dev/null
+++ b/src/debug.c
@@ -0,0 +1,2322 @@
+/*
+ * Copyright (c) 2009-2020, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2020, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "util.h"
+#include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
+#include "crc64.h"
+#include "bio.h"
+#include "quicklist.h"
+#include "fpconv_dtoa.h"
+#include "cluster.h"
+
+#include <arpa/inet.h>
+#include <signal.h>
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#ifdef HAVE_BACKTRACE
+#include <execinfo.h>
+#ifndef __OpenBSD__
+#include <ucontext.h>
+#else
+typedef ucontext_t sigcontext_t;
+#endif
+#endif /* HAVE_BACKTRACE */
+
+#ifdef __CYGWIN__
+#ifndef SA_ONSTACK
+#define SA_ONSTACK 0x08000000
+#endif
+#endif
+
+#if defined(__APPLE__) && defined(__arm64__)
+#include <mach/mach.h>
+#endif
+
+/* Globals */
+static int bug_report_start = 0; /* True if bug report header was already logged. */
+static pthread_mutex_t bug_report_start_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* Forward declarations */
+void bugReportStart(void);
+void printCrashReport(void);
+void bugReportEnd(int killViaSignal, int sig);
+void logStackTrace(void *eip, int uplevel);
+
+/* ================================= Debugging ============================== */
+
+/* Compute the sha1 of string at 's' with 'len' bytes long.
+ * The SHA1 is then xored against the string pointed by digest.
+ * Since xor is commutative, this operation is used in order to
+ * "add" digests relative to unordered elements.
+ *
+ * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
+void xorDigest(unsigned char *digest, const void *ptr, size_t len) {
+ SHA1_CTX ctx;
+ unsigned char hash[20];
+ int j;
+
+ SHA1Init(&ctx);
+ SHA1Update(&ctx,ptr,len);
+ SHA1Final(hash,&ctx);
+
+ for (j = 0; j < 20; j++)
+ digest[j] ^= hash[j];
+}
+
+void xorStringObjectDigest(unsigned char *digest, robj *o) {
+ o = getDecodedObject(o);
+ xorDigest(digest,o->ptr,sdslen(o->ptr));
+ decrRefCount(o);
+}
+
+/* This function instead of just computing the SHA1 and xoring it
+ * against digest, also perform the digest of "digest" itself and
+ * replace the old value with the new one.
+ *
+ * So the final digest will be:
+ *
+ * digest = SHA1(digest xor SHA1(data))
+ *
+ * This function is used every time we want to preserve the order so
+ * that digest(a,b,c,d) will be different than digest(b,c,d,a)
+ *
+ * Also note that mixdigest("foo") followed by mixdigest("bar")
+ * will lead to a different digest compared to "fo", "obar".
+ */
+void mixDigest(unsigned char *digest, const void *ptr, size_t len) {
+ SHA1_CTX ctx;
+
+ xorDigest(digest,ptr,len);
+ SHA1Init(&ctx);
+ SHA1Update(&ctx,digest,20);
+ SHA1Final(digest,&ctx);
+}
+
+void mixStringObjectDigest(unsigned char *digest, robj *o) {
+ o = getDecodedObject(o);
+ mixDigest(digest,o->ptr,sdslen(o->ptr));
+ decrRefCount(o);
+}
+
+/* This function computes the digest of a data structure stored in the
+ * object 'o'. It is the core of the DEBUG DIGEST command: when taking the
+ * digest of a whole dataset, we take the digest of the key and the value
+ * pair, and xor all those together.
+ *
+ * Note that this function does not reset the initial 'digest' passed, it
+ * will continue mixing this object digest to anything that was already
+ * present. */
+void xorObjectDigest(redisDb *db, robj *keyobj, unsigned char *digest, robj *o) {
+ uint32_t aux = htonl(o->type);
+ mixDigest(digest,&aux,sizeof(aux));
+ long long expiretime = getExpire(db,keyobj);
+ char buf[128];
+
+ /* Save the key and associated value */
+ if (o->type == OBJ_STRING) {
+ mixStringObjectDigest(digest,o);
+ } else if (o->type == OBJ_LIST) {
+ listTypeIterator *li = listTypeInitIterator(o,0,LIST_TAIL);
+ listTypeEntry entry;
+ while(listTypeNext(li,&entry)) {
+ robj *eleobj = listTypeGet(&entry);
+ mixStringObjectDigest(digest,eleobj);
+ decrRefCount(eleobj);
+ }
+ listTypeReleaseIterator(li);
+ } else if (o->type == OBJ_SET) {
+ setTypeIterator *si = setTypeInitIterator(o);
+ sds sdsele;
+ while((sdsele = setTypeNextObject(si)) != NULL) {
+ xorDigest(digest,sdsele,sdslen(sdsele));
+ sdsfree(sdsele);
+ }
+ setTypeReleaseIterator(si);
+ } else if (o->type == OBJ_ZSET) {
+ unsigned char eledigest[20];
+
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = o->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vll;
+ double score;
+
+ eptr = lpSeek(zl,0);
+ serverAssert(eptr != NULL);
+ sptr = lpNext(zl,eptr);
+ serverAssert(sptr != NULL);
+
+ while (eptr != NULL) {
+ vstr = lpGetValue(eptr,&vlen,&vll);
+ score = zzlGetScore(sptr);
+
+ memset(eledigest,0,20);
+ if (vstr != NULL) {
+ mixDigest(eledigest,vstr,vlen);
+ } else {
+ ll2string(buf,sizeof(buf),vll);
+ mixDigest(eledigest,buf,strlen(buf));
+ }
+ const int len = fpconv_dtoa(score, buf);
+ buf[len] = '\0';
+ mixDigest(eledigest,buf,strlen(buf));
+ xorDigest(digest,eledigest,20);
+ zzlNext(zl,&eptr,&sptr);
+ }
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = o->ptr;
+ dictIterator *di = dictGetIterator(zs->dict);
+ dictEntry *de;
+
+ while((de = dictNext(di)) != NULL) {
+ sds sdsele = dictGetKey(de);
+ double *score = dictGetVal(de);
+ const int len = fpconv_dtoa(*score, buf);
+ buf[len] = '\0';
+ memset(eledigest,0,20);
+ mixDigest(eledigest,sdsele,sdslen(sdsele));
+ mixDigest(eledigest,buf,strlen(buf));
+ xorDigest(digest,eledigest,20);
+ }
+ dictReleaseIterator(di);
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else if (o->type == OBJ_HASH) {
+ hashTypeIterator *hi = hashTypeInitIterator(o);
+ while (hashTypeNext(hi) != C_ERR) {
+ unsigned char eledigest[20];
+ sds sdsele;
+
+ memset(eledigest,0,20);
+ sdsele = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
+ mixDigest(eledigest,sdsele,sdslen(sdsele));
+ sdsfree(sdsele);
+ sdsele = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+ mixDigest(eledigest,sdsele,sdslen(sdsele));
+ sdsfree(sdsele);
+ xorDigest(digest,eledigest,20);
+ }
+ hashTypeReleaseIterator(hi);
+ } else if (o->type == OBJ_STREAM) {
+ streamIterator si;
+ streamIteratorStart(&si,o->ptr,NULL,NULL,0);
+ streamID id;
+ int64_t numfields;
+
+ while(streamIteratorGetID(&si,&id,&numfields)) {
+ sds itemid = sdscatfmt(sdsempty(),"%U.%U",id.ms,id.seq);
+ mixDigest(digest,itemid,sdslen(itemid));
+ sdsfree(itemid);
+
+ while(numfields--) {
+ unsigned char *field, *value;
+ int64_t field_len, value_len;
+ streamIteratorGetField(&si,&field,&value,
+ &field_len,&value_len);
+ mixDigest(digest,field,field_len);
+ mixDigest(digest,value,value_len);
+ }
+ }
+ streamIteratorStop(&si);
+ } else if (o->type == OBJ_MODULE) {
+ RedisModuleDigest md = {{0},{0},keyobj,db->id};
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+ moduleInitDigestContext(md);
+ if (mt->digest) {
+ mt->digest(&md,mv->value);
+ xorDigest(digest,md.x,sizeof(md.x));
+ }
+ } else {
+ serverPanic("Unknown object type");
+ }
+ /* If the key has an expire, add it to the mix */
+ if (expiretime != -1) xorDigest(digest,"!!expire!!",10);
+}
+
+/* Compute the dataset digest. Since keys, sets elements, hashes elements
+ * are not ordered, we use a trick: every aggregate digest is the xor
+ * of the digests of their elements. This way the order will not change
+ * the result. For list instead we use a feedback entering the output digest
+ * as input in order to ensure that a different ordered list will result in
+ * a different digest. */
+void computeDatasetDigest(unsigned char *final) {
+ unsigned char digest[20];
+ dictIterator *di = NULL;
+ dictEntry *de;
+ int j;
+ uint32_t aux;
+
+ memset(final,0,20); /* Start with a clean result */
+
+ for (j = 0; j < server.dbnum; j++) {
+ redisDb *db = server.db+j;
+
+ if (dictSize(db->dict) == 0) continue;
+ di = dictGetSafeIterator(db->dict);
+
+ /* hash the DB id, so the same dataset moved in a different
+ * DB will lead to a different digest */
+ aux = htonl(j);
+ mixDigest(final,&aux,sizeof(aux));
+
+ /* Iterate this DB writing every entry */
+ while((de = dictNext(di)) != NULL) {
+ sds key;
+ robj *keyobj, *o;
+
+ memset(digest,0,20); /* This key-val digest */
+ key = dictGetKey(de);
+ keyobj = createStringObject(key,sdslen(key));
+
+ mixDigest(digest,key,sdslen(key));
+
+ o = dictGetVal(de);
+ xorObjectDigest(db,keyobj,digest,o);
+
+ /* We can finally xor the key-val digest to the final digest */
+ xorDigest(final,digest,20);
+ decrRefCount(keyobj);
+ }
+ dictReleaseIterator(di);
+ }
+}
+
+#ifdef USE_JEMALLOC
+void mallctl_int(client *c, robj **argv, int argc) {
+ int ret;
+ /* start with the biggest size (int64), and if that fails, try smaller sizes (int32, bool) */
+ int64_t old = 0, val;
+ if (argc > 1) {
+ long long ll;
+ if (getLongLongFromObjectOrReply(c, argv[1], &ll, NULL) != C_OK)
+ return;
+ val = ll;
+ }
+ size_t sz = sizeof(old);
+ while (sz > 0) {
+ size_t zz = sz;
+ if ((ret=je_mallctl(argv[0]->ptr, &old, &zz, argc > 1? &val: NULL, argc > 1?sz: 0))) {
+ if (ret == EPERM && argc > 1) {
+ /* if this option is write only, try just writing to it. */
+ if (!(ret=je_mallctl(argv[0]->ptr, NULL, 0, &val, sz))) {
+ addReply(c, shared.ok);
+ return;
+ }
+ }
+ if (ret==EINVAL) {
+ /* size might be wrong, try a smaller one */
+ sz /= 2;
+#if BYTE_ORDER == BIG_ENDIAN
+ val <<= 8*sz;
+#endif
+ continue;
+ }
+ addReplyErrorFormat(c,"%s", strerror(ret));
+ return;
+ } else {
+#if BYTE_ORDER == BIG_ENDIAN
+ old >>= 64 - 8*sz;
+#endif
+ addReplyLongLong(c, old);
+ return;
+ }
+ }
+ addReplyErrorFormat(c,"%s", strerror(EINVAL));
+}
+
+void mallctl_string(client *c, robj **argv, int argc) {
+ int rret, wret;
+ char *old;
+ size_t sz = sizeof(old);
+ /* for strings, it seems we need to first get the old value, before overriding it. */
+ if ((rret=je_mallctl(argv[0]->ptr, &old, &sz, NULL, 0))) {
+ /* return error unless this option is write only. */
+ if (!(rret == EPERM && argc > 1)) {
+ addReplyErrorFormat(c,"%s", strerror(rret));
+ return;
+ }
+ }
+ if(argc > 1) {
+ char *val = argv[1]->ptr;
+ char **valref = &val;
+ if ((!strcmp(val,"VOID")))
+ valref = NULL, sz = 0;
+ wret = je_mallctl(argv[0]->ptr, NULL, 0, valref, sz);
+ }
+ if (!rret)
+ addReplyBulkCString(c, old);
+ else if (wret)
+ addReplyErrorFormat(c,"%s", strerror(wret));
+ else
+ addReply(c, shared.ok);
+}
+#endif
+
+void debugCommand(client *c) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"AOF-FLUSH-SLEEP <microsec>",
+" Server will sleep before flushing the AOF, this is used for testing.",
+"ASSERT",
+" Crash by assertion failed.",
+"CHANGE-REPL-ID",
+" Change the replication IDs of the instance.",
+" Dangerous: should be used only for testing the replication subsystem.",
+"CONFIG-REWRITE-FORCE-ALL",
+" Like CONFIG REWRITE but writes all configuration options, including",
+" keywords not listed in original configuration file or default values.",
+"CRASH-AND-RECOVER [<milliseconds>]",
+" Hard crash and restart after a <milliseconds> delay (default 0).",
+"DIGEST",
+" Output a hex signature representing the current DB content.",
+"DIGEST-VALUE <key> [<key> ...]",
+" Output a hex signature of the values of all the specified keys.",
+"ERROR <string>",
+" Return a Redis protocol error with <string> as message. Useful for clients",
+" unit tests to simulate Redis errors.",
+"LEAK <string>",
+" Create a memory leak of the input string.",
+"LOG <message>",
+" Write <message> to the server log.",
+"HTSTATS <dbid> [full]",
+" Return hash table statistics of the specified Redis database.",
+"HTSTATS-KEY <key> [full]",
+" Like HTSTATS but for the hash table stored at <key>'s value.",
+"LOADAOF",
+" Flush the AOF buffers on disk and reload the AOF in memory.",
+"REPLICATE <string>",
+" Replicates the provided string to replicas, allowing data divergence.",
+#ifdef USE_JEMALLOC
+"MALLCTL <key> [<val>]",
+" Get or set a malloc tuning integer.",
+"MALLCTL-STR <key> [<val>]",
+" Get or set a malloc tuning string.",
+#endif
+"OBJECT <key>",
+" Show low level info about `key` and associated value.",
+"DROP-CLUSTER-PACKET-FILTER <packet-type>",
+" Drop all packets that match the filtered type. Set to -1 allow all packets.",
+"OOM",
+" Crash the server simulating an out-of-memory error.",
+"PANIC",
+" Crash the server simulating a panic.",
+"POPULATE <count> [<prefix>] [<size>]",
+" Create <count> string keys named key:<num>. If <prefix> is specified then",
+" it is used instead of the 'key' prefix. These are not propagated to",
+" replicas. Cluster slots are not respected so keys not belonging to the",
+" current node can be created in cluster mode.",
+"PROTOCOL <type>",
+" Reply with a test value of the specified type. <type> can be: string,",
+" integer, double, bignum, null, array, set, map, attrib, push, verbatim,",
+" true, false.",
+"RELOAD [option ...]",
+" Save the RDB on disk and reload it back to memory. Valid <option> values:",
+" * MERGE: conflicting keys will be loaded from RDB.",
+" * NOFLUSH: the existing database will not be removed before load, but",
+" conflicting keys will generate an exception and kill the server.",
+" * NOSAVE: the database will be loaded from an existing RDB file.",
+" Examples:",
+" * DEBUG RELOAD: verify that the server is able to persist, flush and reload",
+" the database.",
+" * DEBUG RELOAD NOSAVE: replace the current database with the contents of an",
+" existing RDB file.",
+" * DEBUG RELOAD NOSAVE NOFLUSH MERGE: add the contents of an existing RDB",
+" file to the database.",
+"RESTART [<milliseconds>]",
+" Graceful restart: save config, db, restart after a <milliseconds> delay (default 0).",
+"SDSLEN <key>",
+" Show low level SDS string info representing `key` and value.",
+"SEGFAULT",
+" Crash the server with sigsegv.",
+"SET-ACTIVE-EXPIRE <0|1>",
+" Setting it to 0 disables expiring keys in background when they are not",
+" accessed (otherwise the Redis behavior). Setting it to 1 reenables back the",
+" default.",
+"QUICKLIST-PACKED-THRESHOLD <size>",
+" Sets the threshold for elements to be inserted as plain vs packed nodes",
+" Default value is 1GB, allows values up to 4GB. Setting to 0 restores to default.",
+"SET-SKIP-CHECKSUM-VALIDATION <0|1>",
+" Enables or disables checksum checks for RDB files and RESTORE's payload.",
+"SLEEP <seconds>",
+" Stop the server for <seconds>. Decimals allowed.",
+"STRINGMATCH-TEST",
+" Run a fuzz tester against the stringmatchlen() function.",
+"STRUCTSIZE",
+" Return the size of different Redis core C structures.",
+"LISTPACK <key>",
+" Show low level info about the listpack encoding of <key>.",
+"QUICKLIST <key> [<0|1>]",
+" Show low level info about the quicklist encoding of <key>.",
+" The optional argument (0 by default) sets the level of detail",
+"CLIENT-EVICTION",
+" Show low level client eviction pools info (maxmemory-clients).",
+"PAUSE-CRON <0|1>",
+" Stop periodic cron job processing.",
+"REPLYBUFFER PEAK-RESET-TIME <NEVER||RESET|time>",
+" Sets the time (in milliseconds) to wait between client reply buffer peak resets.",
+" In case NEVER is provided the last observed peak will never be reset",
+" In case RESET is provided the peak reset time will be restored to the default value",
+"REPLYBUFFER RESIZING <0|1>",
+" Enable or disable the reply buffer resize cron job",
+"CLUSTERLINK KILL <to|from|all> <node-id>",
+" Kills the link based on the direction to/from (both) with the provided node." ,
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
+ /* Compiler gives warnings about writing to a random address
+ * e.g "*((char*)-1) = 'x';". As a workaround, we map a read-only area
+ * and try to write there to trigger segmentation fault. */
+ char* p = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+ *p = 'x';
+ } else if (!strcasecmp(c->argv[1]->ptr,"panic")) {
+ serverPanic("DEBUG PANIC called at Unix time %lld", (long long)time(NULL));
+ } else if (!strcasecmp(c->argv[1]->ptr,"restart") ||
+ !strcasecmp(c->argv[1]->ptr,"crash-and-recover"))
+ {
+ long long delay = 0;
+ if (c->argc >= 3) {
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &delay, NULL)
+ != C_OK) return;
+ if (delay < 0) delay = 0;
+ }
+ int flags = !strcasecmp(c->argv[1]->ptr,"restart") ?
+ (RESTART_SERVER_GRACEFULLY|RESTART_SERVER_CONFIG_REWRITE) :
+ RESTART_SERVER_NONE;
+ restartServer(flags,delay);
+ addReplyError(c,"failed to restart the server. Check server logs.");
+ } else if (!strcasecmp(c->argv[1]->ptr,"oom")) {
+ void *ptr = zmalloc(SIZE_MAX/2); /* Should trigger an out of memory. */
+ zfree(ptr);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"assert")) {
+ serverAssertWithInfo(c,c->argv[0],1 == 2);
+ } else if (!strcasecmp(c->argv[1]->ptr,"log") && c->argc == 3) {
+ serverLog(LL_WARNING, "DEBUG LOG: %s", (char*)c->argv[2]->ptr);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"leak") && c->argc == 3) {
+ sdsdup(c->argv[2]->ptr);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
+ int flush = 1, save = 1;
+ int flags = RDBFLAGS_NONE;
+
+ /* Parse the additional options that modify the RELOAD
+ * behavior. */
+ for (int j = 2; j < c->argc; j++) {
+ char *opt = c->argv[j]->ptr;
+ if (!strcasecmp(opt,"MERGE")) {
+ flags |= RDBFLAGS_ALLOW_DUP;
+ } else if (!strcasecmp(opt,"NOFLUSH")) {
+ flush = 0;
+ } else if (!strcasecmp(opt,"NOSAVE")) {
+ save = 0;
+ } else {
+ addReplyError(c,"DEBUG RELOAD only supports the "
+ "MERGE, NOFLUSH and NOSAVE options.");
+ return;
+ }
+ }
+
+ /* The default behavior is to save the RDB file before loading
+ * it back. */
+ if (save) {
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ if (rdbSave(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE) != C_OK) {
+ addReplyErrorObject(c,shared.err);
+ return;
+ }
+ }
+
+ /* The default behavior is to remove the current dataset from
+ * memory before loading the RDB file, however when MERGE is
+ * used together with NOFLUSH, we are able to merge two datasets. */
+ if (flush) emptyData(-1,EMPTYDB_NO_FLAGS,NULL);
+
+ protectClient(c);
+ int ret = rdbLoad(server.rdb_filename,NULL,flags);
+ unprotectClient(c);
+ if (ret != RDB_OK) {
+ addReplyError(c,"Error trying to load the RDB dump, check server logs.");
+ return;
+ }
+ serverLog(LL_NOTICE,"DB reloaded by DEBUG RELOAD");
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
+ if (server.aof_state != AOF_OFF) flushAppendOnlyFile(1);
+ emptyData(-1,EMPTYDB_NO_FLAGS,NULL);
+ protectClient(c);
+ if (server.aof_manifest) aofManifestFree(server.aof_manifest);
+ aofLoadManifestFromDisk();
+ aofDelHistoryFiles();
+ int ret = loadAppendOnlyFiles(server.aof_manifest);
+ unprotectClient(c);
+ if (ret != AOF_OK && ret != AOF_EMPTY) {
+ addReplyError(c, "Error trying to load the AOF files, check server logs.");
+ return;
+ }
+ server.dirty = 0; /* Prevent AOF / replication */
+ serverLog(LL_NOTICE,"Append Only File loaded by DEBUG LOADAOF");
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"drop-cluster-packet-filter") && c->argc == 3) {
+ long packet_type;
+ if (getLongFromObjectOrReply(c, c->argv[2], &packet_type, NULL) != C_OK)
+ return;
+ server.cluster_drop_packet_filter = packet_type;
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
+ dictEntry *de;
+ robj *val;
+ char *strenc;
+
+ if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
+ addReplyErrorObject(c,shared.nokeyerr);
+ return;
+ }
+ val = dictGetVal(de);
+ strenc = strEncoding(val->encoding);
+
+ char extra[138] = {0};
+ if (val->encoding == OBJ_ENCODING_QUICKLIST) {
+ char *nextra = extra;
+ int remaining = sizeof(extra);
+ quicklist *ql = val->ptr;
+ /* Add number of quicklist nodes */
+ int used = snprintf(nextra, remaining, " ql_nodes:%lu", ql->len);
+ nextra += used;
+ remaining -= used;
+ /* Add average quicklist fill factor */
+ double avg = (double)ql->count/ql->len;
+ used = snprintf(nextra, remaining, " ql_avg_node:%.2f", avg);
+ nextra += used;
+ remaining -= used;
+ /* Add quicklist fill level / max listpack size */
+ used = snprintf(nextra, remaining, " ql_listpack_max:%d", ql->fill);
+ nextra += used;
+ remaining -= used;
+ /* Add isCompressed? */
+ int compressed = ql->compress != 0;
+ used = snprintf(nextra, remaining, " ql_compressed:%d", compressed);
+ nextra += used;
+ remaining -= used;
+ /* Add total uncompressed size */
+ unsigned long sz = 0;
+ for (quicklistNode *node = ql->head; node; node = node->next) {
+ sz += node->sz;
+ }
+ used = snprintf(nextra, remaining, " ql_uncompressed_size:%lu", sz);
+ nextra += used;
+ remaining -= used;
+ }
+
+ addReplyStatusFormat(c,
+ "Value at:%p refcount:%d "
+ "encoding:%s serializedlength:%zu "
+ "lru:%d lru_seconds_idle:%llu%s",
+ (void*)val, val->refcount,
+ strenc, rdbSavedObjectLen(val, c->argv[2], c->db->id),
+ val->lru, estimateObjectIdleTime(val)/1000, extra);
+ } else if (!strcasecmp(c->argv[1]->ptr,"sdslen") && c->argc == 3) {
+ dictEntry *de;
+ robj *val;
+ sds key;
+
+ if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
+ addReplyErrorObject(c,shared.nokeyerr);
+ return;
+ }
+ val = dictGetVal(de);
+ key = dictGetKey(de);
+
+ if (val->type != OBJ_STRING || !sdsEncodedObject(val)) {
+ addReplyError(c,"Not an sds encoded string.");
+ } else {
+ addReplyStatusFormat(c,
+ "key_sds_len:%lld, key_sds_avail:%lld, key_zmalloc: %lld, "
+ "val_sds_len:%lld, val_sds_avail:%lld, val_zmalloc: %lld",
+ (long long) sdslen(key),
+ (long long) sdsavail(key),
+ (long long) sdsZmallocSize(key),
+ (long long) sdslen(val->ptr),
+ (long long) sdsavail(val->ptr),
+ (long long) getStringObjectSdsUsedMemory(val));
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"listpack") && c->argc == 3) {
+ robj *o;
+
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
+ == NULL) return;
+
+ if (o->encoding != OBJ_ENCODING_LISTPACK) {
+ addReplyError(c,"Not a listpack encoded object.");
+ } else {
+ lpRepr(o->ptr);
+ addReplyStatus(c,"Listpack structure printed on stdout");
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"quicklist") && (c->argc == 3 || c->argc == 4)) {
+ robj *o;
+
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
+ == NULL) return;
+
+ int full = 0;
+ if (c->argc == 4)
+ full = atoi(c->argv[3]->ptr);
+ if (o->encoding != OBJ_ENCODING_QUICKLIST) {
+ addReplyError(c,"Not a quicklist encoded object.");
+ } else {
+ quicklistRepr(o->ptr, full);
+ addReplyStatus(c,"Quicklist structure printed on stdout");
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"populate") &&
+ c->argc >= 3 && c->argc <= 5) {
+ long keys, j;
+ robj *key, *val;
+ char buf[128];
+
+ if (getPositiveLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != C_OK)
+ return;
+
+ if (dictTryExpand(c->db->dict, keys) != DICT_OK) {
+ addReplyError(c, "OOM in dictTryExpand");
+ return;
+ }
+ long valsize = 0;
+ if ( c->argc == 5 && getPositiveLongFromObjectOrReply(c, c->argv[4], &valsize, NULL) != C_OK )
+ return;
+
+ for (j = 0; j < keys; j++) {
+ snprintf(buf,sizeof(buf),"%s:%lu",
+ (c->argc == 3) ? "key" : (char*)c->argv[3]->ptr, j);
+ key = createStringObject(buf,strlen(buf));
+ if (lookupKeyWrite(c->db,key) != NULL) {
+ decrRefCount(key);
+ continue;
+ }
+ snprintf(buf,sizeof(buf),"value:%lu",j);
+ if (valsize==0)
+ val = createStringObject(buf,strlen(buf));
+ else {
+ int buflen = strlen(buf);
+ val = createStringObject(NULL,valsize);
+ memcpy(val->ptr, buf, valsize<=buflen? valsize: buflen);
+ }
+ dbAdd(c->db,key,val);
+ signalModifiedKey(c,c->db,key);
+ decrRefCount(key);
+ }
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"digest") && c->argc == 2) {
+ /* DEBUG DIGEST (form without keys specified) */
+ unsigned char digest[20];
+ sds d = sdsempty();
+
+ computeDatasetDigest(digest);
+ for (int i = 0; i < 20; i++) d = sdscatprintf(d, "%02x",digest[i]);
+ addReplyStatus(c,d);
+ sdsfree(d);
+ } else if (!strcasecmp(c->argv[1]->ptr,"digest-value") && c->argc >= 2) {
+ /* DEBUG DIGEST-VALUE key key key ... key. */
+ addReplyArrayLen(c,c->argc-2);
+ for (int j = 2; j < c->argc; j++) {
+ unsigned char digest[20];
+ memset(digest,0,20); /* Start with a clean result */
+
+ /* We don't use lookupKey because a debug command should
+ * work on logically expired keys */
+ dictEntry *de;
+ robj *o = ((de = dictFind(c->db->dict,c->argv[j]->ptr)) == NULL) ? NULL : dictGetVal(de);
+ if (o) xorObjectDigest(c->db,c->argv[j],digest,o);
+
+ sds d = sdsempty();
+ for (int i = 0; i < 20; i++) d = sdscatprintf(d, "%02x",digest[i]);
+ addReplyStatus(c,d);
+ sdsfree(d);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"protocol") && c->argc == 3) {
+ /* DEBUG PROTOCOL [string|integer|double|bignum|null|array|set|map|
+ * attrib|push|verbatim|true|false] */
+ char *name = c->argv[2]->ptr;
+ if (!strcasecmp(name,"string")) {
+ addReplyBulkCString(c,"Hello World");
+ } else if (!strcasecmp(name,"integer")) {
+ addReplyLongLong(c,12345);
+ } else if (!strcasecmp(name,"double")) {
+ addReplyDouble(c,3.141);
+ } else if (!strcasecmp(name,"bignum")) {
+ addReplyBigNum(c,"1234567999999999999999999999999999999",37);
+ } else if (!strcasecmp(name,"null")) {
+ addReplyNull(c);
+ } else if (!strcasecmp(name,"array")) {
+ addReplyArrayLen(c,3);
+ for (int j = 0; j < 3; j++) addReplyLongLong(c,j);
+ } else if (!strcasecmp(name,"set")) {
+ addReplySetLen(c,3);
+ for (int j = 0; j < 3; j++) addReplyLongLong(c,j);
+ } else if (!strcasecmp(name,"map")) {
+ addReplyMapLen(c,3);
+ for (int j = 0; j < 3; j++) {
+ addReplyLongLong(c,j);
+ addReplyBool(c, j == 1);
+ }
+ } else if (!strcasecmp(name,"attrib")) {
+ if (c->resp >= 3) {
+ addReplyAttributeLen(c,1);
+ addReplyBulkCString(c,"key-popularity");
+ addReplyArrayLen(c,2);
+ addReplyBulkCString(c,"key:123");
+ addReplyLongLong(c,90);
+ }
+ /* Attributes are not real replies, so a well formed reply should
+ * also have a normal reply type after the attribute. */
+ addReplyBulkCString(c,"Some real reply following the attribute");
+ } else if (!strcasecmp(name,"push")) {
+ if (c->resp < 3) {
+ addReplyError(c,"RESP2 is not supported by this command");
+ return;
+ }
+ uint64_t old_flags = c->flags;
+ c->flags |= CLIENT_PUSHING;
+ addReplyPushLen(c,2);
+ addReplyBulkCString(c,"server-cpu-usage");
+ addReplyLongLong(c,42);
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+ /* Push replies are not synchronous replies, so we emit also a
+ * normal reply in order for blocking clients just discarding the
+ * push reply, to actually consume the reply and continue. */
+ addReplyBulkCString(c,"Some real reply following the push reply");
+ } else if (!strcasecmp(name,"true")) {
+ addReplyBool(c,1);
+ } else if (!strcasecmp(name,"false")) {
+ addReplyBool(c,0);
+ } else if (!strcasecmp(name,"verbatim")) {
+ addReplyVerbatim(c,"This is a verbatim\nstring",25,"txt");
+ } else {
+ addReplyError(c,"Wrong protocol type name. Please use one of the following: string|integer|double|bignum|null|array|set|map|attrib|push|verbatim|true|false");
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"sleep") && c->argc == 3) {
+ double dtime = strtod(c->argv[2]->ptr,NULL);
+ long long utime = dtime*1000000;
+ struct timespec tv;
+
+ tv.tv_sec = utime / 1000000;
+ tv.tv_nsec = (utime % 1000000) * 1000;
+ nanosleep(&tv, NULL);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"set-active-expire") &&
+ c->argc == 3)
+ {
+ server.active_expire_enabled = atoi(c->argv[2]->ptr);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"quicklist-packed-threshold") &&
+ c->argc == 3)
+ {
+ int memerr;
+ unsigned long long sz = memtoull((const char *)c->argv[2]->ptr, &memerr);
+ if (memerr || !quicklistisSetPackedThreshold(sz)) {
+ addReplyError(c, "argument must be a memory value bigger than 1 and smaller than 4gb");
+ } else {
+ addReply(c,shared.ok);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"set-skip-checksum-validation") &&
+ c->argc == 3)
+ {
+ server.skip_checksum_validation = atoi(c->argv[2]->ptr);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"aof-flush-sleep") &&
+ c->argc == 3)
+ {
+ server.aof_flush_sleep = atoi(c->argv[2]->ptr);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"replicate") && c->argc >= 3) {
+ replicationFeedSlaves(server.slaves, -1,
+ c->argv + 2, c->argc - 2);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"error") && c->argc == 3) {
+ sds errstr = sdsnewlen("-",1);
+
+ errstr = sdscatsds(errstr,c->argv[2]->ptr);
+ errstr = sdsmapchars(errstr,"\n\r"," ",2); /* no newlines in errors. */
+ errstr = sdscatlen(errstr,"\r\n",2);
+ addReplySds(c,errstr);
+ } else if (!strcasecmp(c->argv[1]->ptr,"structsize") && c->argc == 2) {
+ sds sizes = sdsempty();
+ sizes = sdscatprintf(sizes,"bits:%d ",(sizeof(void*) == 8)?64:32);
+ sizes = sdscatprintf(sizes,"robj:%d ",(int)sizeof(robj));
+ sizes = sdscatprintf(sizes,"dictentry:%d ",(int)dictEntryMemUsage());
+ sizes = sdscatprintf(sizes,"sdshdr5:%d ",(int)sizeof(struct sdshdr5));
+ sizes = sdscatprintf(sizes,"sdshdr8:%d ",(int)sizeof(struct sdshdr8));
+ sizes = sdscatprintf(sizes,"sdshdr16:%d ",(int)sizeof(struct sdshdr16));
+ sizes = sdscatprintf(sizes,"sdshdr32:%d ",(int)sizeof(struct sdshdr32));
+ sizes = sdscatprintf(sizes,"sdshdr64:%d ",(int)sizeof(struct sdshdr64));
+ addReplyBulkSds(c,sizes);
+ } else if (!strcasecmp(c->argv[1]->ptr,"htstats") && c->argc >= 3) {
+ long dbid;
+ sds stats = sdsempty();
+ char buf[4096];
+ int full = 0;
+
+ if (getLongFromObjectOrReply(c, c->argv[2], &dbid, NULL) != C_OK) {
+ sdsfree(stats);
+ return;
+ }
+ if (dbid < 0 || dbid >= server.dbnum) {
+ sdsfree(stats);
+ addReplyError(c,"Out of range database");
+ return;
+ }
+ if (c->argc >= 4 && !strcasecmp(c->argv[3]->ptr,"full"))
+ full = 1;
+
+ stats = sdscatprintf(stats,"[Dictionary HT]\n");
+ dictGetStats(buf,sizeof(buf),server.db[dbid].dict,full);
+ stats = sdscat(stats,buf);
+
+ stats = sdscatprintf(stats,"[Expires HT]\n");
+ dictGetStats(buf,sizeof(buf),server.db[dbid].expires,full);
+ stats = sdscat(stats,buf);
+
+ addReplyVerbatim(c,stats,sdslen(stats),"txt");
+ sdsfree(stats);
+ } else if (!strcasecmp(c->argv[1]->ptr,"htstats-key") && c->argc >= 3) {
+ robj *o;
+ dict *ht = NULL;
+ int full = 0;
+
+ if (c->argc >= 4 && !strcasecmp(c->argv[3]->ptr,"full"))
+ full = 1;
+
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
+ == NULL) return;
+
+ /* Get the hash table reference from the object, if possible. */
+ switch (o->encoding) {
+ case OBJ_ENCODING_SKIPLIST:
+ {
+ zset *zs = o->ptr;
+ ht = zs->dict;
+ }
+ break;
+ case OBJ_ENCODING_HT:
+ ht = o->ptr;
+ break;
+ }
+
+ if (ht == NULL) {
+ addReplyError(c,"The value stored at the specified key is not "
+ "represented using an hash table");
+ } else {
+ char buf[4096];
+ dictGetStats(buf,sizeof(buf),ht,full);
+ addReplyVerbatim(c,buf,strlen(buf),"txt");
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"change-repl-id") && c->argc == 2) {
+ serverLog(LL_NOTICE,"Changing replication IDs after receiving DEBUG change-repl-id");
+ changeReplicationId();
+ clearReplicationId2();
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"stringmatch-test") && c->argc == 2)
+ {
+ stringmatchlen_fuzz_test();
+ addReplyStatus(c,"Apparently Redis did not crash: test passed");
+ } else if (!strcasecmp(c->argv[1]->ptr,"set-disable-deny-scripts") && c->argc == 3)
+ {
+ server.script_disable_deny_script = atoi(c->argv[2]->ptr);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"config-rewrite-force-all") && c->argc == 2)
+ {
+ if (rewriteConfig(server.configfile, 1) == -1)
+ addReplyErrorFormat(c, "CONFIG-REWRITE-FORCE-ALL failed: %s", strerror(errno));
+ else
+ addReply(c, shared.ok);
+ } else if(!strcasecmp(c->argv[1]->ptr,"client-eviction") && c->argc == 2) {
+ if (!server.client_mem_usage_buckets) {
+ addReplyError(c,"maxmemory-clients is disabled.");
+ return;
+ }
+ sds bucket_info = sdsempty();
+ for (int j = 0; j < CLIENT_MEM_USAGE_BUCKETS; j++) {
+ if (j == 0)
+ bucket_info = sdscatprintf(bucket_info, "bucket 0");
+ else
+ bucket_info = sdscatprintf(bucket_info, "bucket %10zu", (size_t)1<<(j-1+CLIENT_MEM_USAGE_BUCKET_MIN_LOG));
+ if (j == CLIENT_MEM_USAGE_BUCKETS-1)
+ bucket_info = sdscatprintf(bucket_info, "+ : ");
+ else
+ bucket_info = sdscatprintf(bucket_info, " - %10zu: ", ((size_t)1<<(j+CLIENT_MEM_USAGE_BUCKET_MIN_LOG))-1);
+ bucket_info = sdscatprintf(bucket_info, "tot-mem: %10zu, clients: %lu\n",
+ server.client_mem_usage_buckets[j].mem_usage_sum,
+ server.client_mem_usage_buckets[j].clients->len);
+ }
+ addReplyVerbatim(c,bucket_info,sdslen(bucket_info),"txt");
+ sdsfree(bucket_info);
+#ifdef USE_JEMALLOC
+ } else if(!strcasecmp(c->argv[1]->ptr,"mallctl") && c->argc >= 3) {
+ mallctl_int(c, c->argv+2, c->argc-2);
+ return;
+ } else if(!strcasecmp(c->argv[1]->ptr,"mallctl-str") && c->argc >= 3) {
+ mallctl_string(c, c->argv+2, c->argc-2);
+ return;
+#endif
+ } else if (!strcasecmp(c->argv[1]->ptr,"pause-cron") && c->argc == 3)
+ {
+ server.pause_cron = atoi(c->argv[2]->ptr);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"replybuffer") && c->argc == 4 ) {
+ if(!strcasecmp(c->argv[2]->ptr, "peak-reset-time")) {
+ if (!strcasecmp(c->argv[3]->ptr, "never")) {
+ server.reply_buffer_peak_reset_time = -1;
+ } else if(!strcasecmp(c->argv[3]->ptr, "reset")) {
+ server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME;
+ } else {
+ if (getLongFromObjectOrReply(c, c->argv[3], &server.reply_buffer_peak_reset_time, NULL) != C_OK)
+ return;
+ }
+ } else if(!strcasecmp(c->argv[2]->ptr,"resizing")) {
+ server.reply_buffer_resizing_enabled = atoi(c->argv[3]->ptr);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+ addReply(c, shared.ok);
+ } else if(!strcasecmp(c->argv[1]->ptr,"CLUSTERLINK") &&
+ !strcasecmp(c->argv[2]->ptr,"KILL") &&
+ c->argc == 5) {
+ if (!server.cluster_enabled) {
+ addReplyError(c, "Debug option only available for cluster mode enabled setup!");
+ return;
+ }
+
+ /* Find the node. */
+ clusterNode *n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
+ if (!n) {
+ addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[4]->ptr);
+ return;
+ }
+
+ /* Terminate the link based on the direction or all. */
+ if (!strcasecmp(c->argv[3]->ptr,"from")) {
+ freeClusterLink(n->inbound_link);
+ } else if (!strcasecmp(c->argv[3]->ptr,"to")) {
+ freeClusterLink(n->link);
+ } else if (!strcasecmp(c->argv[3]->ptr,"all")) {
+ freeClusterLink(n->link);
+ freeClusterLink(n->inbound_link);
+ } else {
+ addReplyErrorFormat(c, "Unknown direction %s", (char*) c->argv[3]->ptr);
+ }
+ addReply(c,shared.ok);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+}
+
+/* =========================== Crash handling ============================== */
+
+void _serverAssert(const char *estr, const char *file, int line) {
+ bugReportStart();
+ serverLog(LL_WARNING,"=== ASSERTION FAILED ===");
+ serverLog(LL_WARNING,"==> %s:%d '%s' is not true",file,line,estr);
+
+ if (server.crashlog_enabled) {
+#ifdef HAVE_BACKTRACE
+ logStackTrace(NULL, 1);
+#endif
+ printCrashReport();
+ }
+
+ // remove the signal handler so on abort() we will output the crash report.
+ removeSignalHandlers();
+ bugReportEnd(0, 0);
+}
+
+void _serverAssertPrintClientInfo(const client *c) {
+ int j;
+ char conninfo[CONN_INFO_LEN];
+
+ bugReportStart();
+ serverLog(LL_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ===");
+ serverLog(LL_WARNING,"client->flags = %llu", (unsigned long long) c->flags);
+ serverLog(LL_WARNING,"client->conn = %s", connGetInfo(c->conn, conninfo, sizeof(conninfo)));
+ serverLog(LL_WARNING,"client->argc = %d", c->argc);
+ for (j=0; j < c->argc; j++) {
+ char buf[128];
+ char *arg;
+
+ if (c->argv[j]->type == OBJ_STRING && sdsEncodedObject(c->argv[j])) {
+ arg = (char*) c->argv[j]->ptr;
+ } else {
+ snprintf(buf,sizeof(buf),"Object type: %u, encoding: %u",
+ c->argv[j]->type, c->argv[j]->encoding);
+ arg = buf;
+ }
+ serverLog(LL_WARNING,"client->argv[%d] = \"%s\" (refcount: %d)",
+ j, arg, c->argv[j]->refcount);
+ }
+}
+
+void serverLogObjectDebugInfo(const robj *o) {
+ serverLog(LL_WARNING,"Object type: %u", o->type);
+ serverLog(LL_WARNING,"Object encoding: %u", o->encoding);
+ serverLog(LL_WARNING,"Object refcount: %d", o->refcount);
+#if UNSAFE_CRASH_REPORT
+ /* This code is now disabled. o->ptr may be unreliable to print. in some
+ * cases a ziplist could have already been freed by realloc, but not yet
+ * updated to o->ptr. in other cases the call to ziplistLen may need to
+ * iterate on all the items in the list (and possibly crash again).
+ * For some cases it may be ok to crash here again, but these could cause
+ * invalid memory access which will bother valgrind and also possibly cause
+ * random memory portion to be "leaked" into the logfile. */
+ if (o->type == OBJ_STRING && sdsEncodedObject(o)) {
+ serverLog(LL_WARNING,"Object raw string len: %zu", sdslen(o->ptr));
+ if (sdslen(o->ptr) < 4096) {
+ sds repr = sdscatrepr(sdsempty(),o->ptr,sdslen(o->ptr));
+ serverLog(LL_WARNING,"Object raw string content: %s", repr);
+ sdsfree(repr);
+ }
+ } else if (o->type == OBJ_LIST) {
+ serverLog(LL_WARNING,"List length: %d", (int) listTypeLength(o));
+ } else if (o->type == OBJ_SET) {
+ serverLog(LL_WARNING,"Set size: %d", (int) setTypeSize(o));
+ } else if (o->type == OBJ_HASH) {
+ serverLog(LL_WARNING,"Hash size: %d", (int) hashTypeLength(o));
+ } else if (o->type == OBJ_ZSET) {
+ serverLog(LL_WARNING,"Sorted set size: %d", (int) zsetLength(o));
+ if (o->encoding == OBJ_ENCODING_SKIPLIST)
+ serverLog(LL_WARNING,"Skiplist level: %d", (int) ((const zset*)o->ptr)->zsl->level);
+ } else if (o->type == OBJ_STREAM) {
+ serverLog(LL_WARNING,"Stream size: %d", (int) streamLength(o));
+ }
+#endif
+}
+
+void _serverAssertPrintObject(const robj *o) {
+ bugReportStart();
+ serverLog(LL_WARNING,"=== ASSERTION FAILED OBJECT CONTEXT ===");
+ serverLogObjectDebugInfo(o);
+}
+
+void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line) {
+ if (c) _serverAssertPrintClientInfo(c);
+ if (o) _serverAssertPrintObject(o);
+ _serverAssert(estr,file,line);
+}
+
+void _serverPanic(const char *file, int line, const char *msg, ...) {
+ va_list ap;
+ va_start(ap,msg);
+ char fmtmsg[256];
+ vsnprintf(fmtmsg,sizeof(fmtmsg),msg,ap);
+ va_end(ap);
+
+ bugReportStart();
+ serverLog(LL_WARNING,"------------------------------------------------");
+ serverLog(LL_WARNING,"!!! Software Failure. Press left mouse button to continue");
+ serverLog(LL_WARNING,"Guru Meditation: %s #%s:%d",fmtmsg,file,line);
+
+ if (server.crashlog_enabled) {
+#ifdef HAVE_BACKTRACE
+ logStackTrace(NULL, 1);
+#endif
+ printCrashReport();
+ }
+
+ // remove the signal handler so on abort() we will output the crash report.
+ removeSignalHandlers();
+ bugReportEnd(0, 0);
+}
+
+void bugReportStart(void) {
+ pthread_mutex_lock(&bug_report_start_mutex);
+ if (bug_report_start == 0) {
+ serverLogRaw(LL_WARNING|LL_RAW,
+ "\n\n=== REDIS BUG REPORT START: Cut & paste starting from here ===\n");
+ bug_report_start = 1;
+ }
+ pthread_mutex_unlock(&bug_report_start_mutex);
+}
+
+#ifdef HAVE_BACKTRACE
+
+/* Returns the current eip and set it to the given new value (if its not NULL) */
+static void* getAndSetMcontextEip(ucontext_t *uc, void *eip) {
+#define NOT_SUPPORTED() do {\
+ UNUSED(uc);\
+ UNUSED(eip);\
+ return NULL;\
+} while(0)
+#define GET_SET_RETURN(target_var, new_val) do {\
+ void *old_val = (void*)target_var; \
+ if (new_val) { \
+ void **temp = (void**)&target_var; \
+ *temp = new_val; \
+ } \
+ return old_val; \
+} while(0)
+#if defined(__APPLE__) && !defined(MAC_OS_10_6_DETECTED)
+ /* OSX < 10.6 */
+ #if defined(__x86_64__)
+ GET_SET_RETURN(uc->uc_mcontext->__ss.__rip, eip);
+ #elif defined(__i386__)
+ GET_SET_RETURN(uc->uc_mcontext->__ss.__eip, eip);
+ #else
+ GET_SET_RETURN(uc->uc_mcontext->__ss.__srr0, eip);
+ #endif
+#elif defined(__APPLE__) && defined(MAC_OS_10_6_DETECTED)
+ /* OSX >= 10.6 */
+ #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
+ GET_SET_RETURN(uc->uc_mcontext->__ss.__rip, eip);
+ #elif defined(__i386__)
+ GET_SET_RETURN(uc->uc_mcontext->__ss.__eip, eip);
+ #else
+ /* OSX ARM64 */
+ void *old_val = (void*)arm_thread_state64_get_pc(uc->uc_mcontext->__ss);
+ if (eip) {
+ arm_thread_state64_set_pc_fptr(uc->uc_mcontext->__ss, eip);
+ }
+ return old_val;
+ #endif
+#elif defined(__linux__)
+ /* Linux */
+ #if defined(__i386__) || ((defined(__X86_64__) || defined(__x86_64__)) && defined(__ILP32__))
+ GET_SET_RETURN(uc->uc_mcontext.gregs[14], eip);
+ #elif defined(__X86_64__) || defined(__x86_64__)
+ GET_SET_RETURN(uc->uc_mcontext.gregs[16], eip);
+ #elif defined(__ia64__) /* Linux IA64 */
+ GET_SET_RETURN(uc->uc_mcontext.sc_ip, eip);
+ #elif defined(__riscv) /* Linux RISC-V */
+ GET_SET_RETURN(uc->uc_mcontext.__gregs[REG_PC], eip);
+ #elif defined(__arm__) /* Linux ARM */
+ GET_SET_RETURN(uc->uc_mcontext.arm_pc, eip);
+ #elif defined(__aarch64__) /* Linux AArch64 */
+ GET_SET_RETURN(uc->uc_mcontext.pc, eip);
+ #else
+ NOT_SUPPORTED();
+ #endif
+#elif defined(__FreeBSD__)
+ /* FreeBSD */
+ #if defined(__i386__)
+ GET_SET_RETURN(uc->uc_mcontext.mc_eip, eip);
+ #elif defined(__x86_64__)
+ GET_SET_RETURN(uc->uc_mcontext.mc_rip, eip);
+ #else
+ NOT_SUPPORTED();
+ #endif
+#elif defined(__OpenBSD__)
+ /* OpenBSD */
+ #if defined(__i386__)
+ GET_SET_RETURN(uc->sc_eip, eip);
+ #elif defined(__x86_64__)
+ GET_SET_RETURN(uc->sc_rip, eip);
+ #else
+ NOT_SUPPORTED();
+ #endif
+#elif defined(__NetBSD__)
+ #if defined(__i386__)
+ GET_SET_RETURN(uc->uc_mcontext.__gregs[_REG_EIP], eip);
+ #elif defined(__x86_64__)
+ GET_SET_RETURN(uc->uc_mcontext.__gregs[_REG_RIP], eip);
+ #else
+ NOT_SUPPORTED();
+ #endif
+#elif defined(__DragonFly__)
+ GET_SET_RETURN(uc->uc_mcontext.mc_rip, eip);
+#elif defined(__sun) && defined(__x86_64__)
+ GET_SET_RETURN(uc->uc_mcontext.gregs[REG_RIP], eip);
+#else
+ NOT_SUPPORTED();
+#endif
+#undef NOT_SUPPORTED
+}
+
+REDIS_NO_SANITIZE("address")
+void logStackContent(void **sp) {
+ int i;
+ for (i = 15; i >= 0; i--) {
+ unsigned long addr = (unsigned long) sp+i;
+ unsigned long val = (unsigned long) sp[i];
+
+ if (sizeof(long) == 4)
+ serverLog(LL_WARNING, "(%08lx) -> %08lx", addr, val);
+ else
+ serverLog(LL_WARNING, "(%016lx) -> %016lx", addr, val);
+ }
+}
+
+/* Log dump of processor registers */
+void logRegisters(ucontext_t *uc) {
+ serverLog(LL_WARNING|LL_RAW, "\n------ REGISTERS ------\n");
+#define NOT_SUPPORTED() do {\
+ UNUSED(uc);\
+ serverLog(LL_WARNING,\
+ " Dumping of registers not supported for this OS/arch");\
+} while(0)
+
+/* OSX */
+#if defined(__APPLE__) && defined(MAC_OS_10_6_DETECTED)
+ /* OSX AMD64 */
+ #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
+ serverLog(LL_WARNING,
+ "\n"
+ "RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
+ "RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
+ "R8 :%016lx R9 :%016lx\nR10:%016lx R11:%016lx\n"
+ "R12:%016lx R13:%016lx\nR14:%016lx R15:%016lx\n"
+ "RIP:%016lx EFL:%016lx\nCS :%016lx FS:%016lx GS:%016lx",
+ (unsigned long) uc->uc_mcontext->__ss.__rax,
+ (unsigned long) uc->uc_mcontext->__ss.__rbx,
+ (unsigned long) uc->uc_mcontext->__ss.__rcx,
+ (unsigned long) uc->uc_mcontext->__ss.__rdx,
+ (unsigned long) uc->uc_mcontext->__ss.__rdi,
+ (unsigned long) uc->uc_mcontext->__ss.__rsi,
+ (unsigned long) uc->uc_mcontext->__ss.__rbp,
+ (unsigned long) uc->uc_mcontext->__ss.__rsp,
+ (unsigned long) uc->uc_mcontext->__ss.__r8,
+ (unsigned long) uc->uc_mcontext->__ss.__r9,
+ (unsigned long) uc->uc_mcontext->__ss.__r10,
+ (unsigned long) uc->uc_mcontext->__ss.__r11,
+ (unsigned long) uc->uc_mcontext->__ss.__r12,
+ (unsigned long) uc->uc_mcontext->__ss.__r13,
+ (unsigned long) uc->uc_mcontext->__ss.__r14,
+ (unsigned long) uc->uc_mcontext->__ss.__r15,
+ (unsigned long) uc->uc_mcontext->__ss.__rip,
+ (unsigned long) uc->uc_mcontext->__ss.__rflags,
+ (unsigned long) uc->uc_mcontext->__ss.__cs,
+ (unsigned long) uc->uc_mcontext->__ss.__fs,
+ (unsigned long) uc->uc_mcontext->__ss.__gs
+ );
+ logStackContent((void**)uc->uc_mcontext->__ss.__rsp);
+ #elif defined(__i386__)
+ /* OSX x86 */
+ serverLog(LL_WARNING,
+ "\n"
+ "EAX:%08lx EBX:%08lx ECX:%08lx EDX:%08lx\n"
+ "EDI:%08lx ESI:%08lx EBP:%08lx ESP:%08lx\n"
+ "SS:%08lx EFL:%08lx EIP:%08lx CS :%08lx\n"
+ "DS:%08lx ES:%08lx FS :%08lx GS :%08lx",
+ (unsigned long) uc->uc_mcontext->__ss.__eax,
+ (unsigned long) uc->uc_mcontext->__ss.__ebx,
+ (unsigned long) uc->uc_mcontext->__ss.__ecx,
+ (unsigned long) uc->uc_mcontext->__ss.__edx,
+ (unsigned long) uc->uc_mcontext->__ss.__edi,
+ (unsigned long) uc->uc_mcontext->__ss.__esi,
+ (unsigned long) uc->uc_mcontext->__ss.__ebp,
+ (unsigned long) uc->uc_mcontext->__ss.__esp,
+ (unsigned long) uc->uc_mcontext->__ss.__ss,
+ (unsigned long) uc->uc_mcontext->__ss.__eflags,
+ (unsigned long) uc->uc_mcontext->__ss.__eip,
+ (unsigned long) uc->uc_mcontext->__ss.__cs,
+ (unsigned long) uc->uc_mcontext->__ss.__ds,
+ (unsigned long) uc->uc_mcontext->__ss.__es,
+ (unsigned long) uc->uc_mcontext->__ss.__fs,
+ (unsigned long) uc->uc_mcontext->__ss.__gs
+ );
+ logStackContent((void**)uc->uc_mcontext->__ss.__esp);
+ #else
+ /* OSX ARM64 */
+ serverLog(LL_WARNING,
+ "\n"
+ "x0:%016lx x1:%016lx x2:%016lx x3:%016lx\n"
+ "x4:%016lx x5:%016lx x6:%016lx x7:%016lx\n"
+ "x8:%016lx x9:%016lx x10:%016lx x11:%016lx\n"
+ "x12:%016lx x13:%016lx x14:%016lx x15:%016lx\n"
+ "x16:%016lx x17:%016lx x18:%016lx x19:%016lx\n"
+ "x20:%016lx x21:%016lx x22:%016lx x23:%016lx\n"
+ "x24:%016lx x25:%016lx x26:%016lx x27:%016lx\n"
+ "x28:%016lx fp:%016lx lr:%016lx\n"
+ "sp:%016lx pc:%016lx cpsr:%08lx\n",
+ (unsigned long) uc->uc_mcontext->__ss.__x[0],
+ (unsigned long) uc->uc_mcontext->__ss.__x[1],
+ (unsigned long) uc->uc_mcontext->__ss.__x[2],
+ (unsigned long) uc->uc_mcontext->__ss.__x[3],
+ (unsigned long) uc->uc_mcontext->__ss.__x[4],
+ (unsigned long) uc->uc_mcontext->__ss.__x[5],
+ (unsigned long) uc->uc_mcontext->__ss.__x[6],
+ (unsigned long) uc->uc_mcontext->__ss.__x[7],
+ (unsigned long) uc->uc_mcontext->__ss.__x[8],
+ (unsigned long) uc->uc_mcontext->__ss.__x[9],
+ (unsigned long) uc->uc_mcontext->__ss.__x[10],
+ (unsigned long) uc->uc_mcontext->__ss.__x[11],
+ (unsigned long) uc->uc_mcontext->__ss.__x[12],
+ (unsigned long) uc->uc_mcontext->__ss.__x[13],
+ (unsigned long) uc->uc_mcontext->__ss.__x[14],
+ (unsigned long) uc->uc_mcontext->__ss.__x[15],
+ (unsigned long) uc->uc_mcontext->__ss.__x[16],
+ (unsigned long) uc->uc_mcontext->__ss.__x[17],
+ (unsigned long) uc->uc_mcontext->__ss.__x[18],
+ (unsigned long) uc->uc_mcontext->__ss.__x[19],
+ (unsigned long) uc->uc_mcontext->__ss.__x[20],
+ (unsigned long) uc->uc_mcontext->__ss.__x[21],
+ (unsigned long) uc->uc_mcontext->__ss.__x[22],
+ (unsigned long) uc->uc_mcontext->__ss.__x[23],
+ (unsigned long) uc->uc_mcontext->__ss.__x[24],
+ (unsigned long) uc->uc_mcontext->__ss.__x[25],
+ (unsigned long) uc->uc_mcontext->__ss.__x[26],
+ (unsigned long) uc->uc_mcontext->__ss.__x[27],
+ (unsigned long) uc->uc_mcontext->__ss.__x[28],
+ (unsigned long) arm_thread_state64_get_fp(uc->uc_mcontext->__ss),
+ (unsigned long) arm_thread_state64_get_lr(uc->uc_mcontext->__ss),
+ (unsigned long) arm_thread_state64_get_sp(uc->uc_mcontext->__ss),
+ (unsigned long) arm_thread_state64_get_pc(uc->uc_mcontext->__ss),
+ (unsigned long) uc->uc_mcontext->__ss.__cpsr
+ );
+ logStackContent((void**) arm_thread_state64_get_sp(uc->uc_mcontext->__ss));
+ #endif
+/* Linux */
+#elif defined(__linux__)
+ /* Linux x86 */
+ #if defined(__i386__) || ((defined(__X86_64__) || defined(__x86_64__)) && defined(__ILP32__))
+ serverLog(LL_WARNING,
+ "\n"
+ "EAX:%08lx EBX:%08lx ECX:%08lx EDX:%08lx\n"
+ "EDI:%08lx ESI:%08lx EBP:%08lx ESP:%08lx\n"
+ "SS :%08lx EFL:%08lx EIP:%08lx CS:%08lx\n"
+ "DS :%08lx ES :%08lx FS :%08lx GS:%08lx",
+ (unsigned long) uc->uc_mcontext.gregs[11],
+ (unsigned long) uc->uc_mcontext.gregs[8],
+ (unsigned long) uc->uc_mcontext.gregs[10],
+ (unsigned long) uc->uc_mcontext.gregs[9],
+ (unsigned long) uc->uc_mcontext.gregs[4],
+ (unsigned long) uc->uc_mcontext.gregs[5],
+ (unsigned long) uc->uc_mcontext.gregs[6],
+ (unsigned long) uc->uc_mcontext.gregs[7],
+ (unsigned long) uc->uc_mcontext.gregs[18],
+ (unsigned long) uc->uc_mcontext.gregs[17],
+ (unsigned long) uc->uc_mcontext.gregs[14],
+ (unsigned long) uc->uc_mcontext.gregs[15],
+ (unsigned long) uc->uc_mcontext.gregs[3],
+ (unsigned long) uc->uc_mcontext.gregs[2],
+ (unsigned long) uc->uc_mcontext.gregs[1],
+ (unsigned long) uc->uc_mcontext.gregs[0]
+ );
+ logStackContent((void**)uc->uc_mcontext.gregs[7]);
+ #elif defined(__X86_64__) || defined(__x86_64__)
+ /* Linux AMD64 */
+ serverLog(LL_WARNING,
+ "\n"
+ "RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
+ "RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
+ "R8 :%016lx R9 :%016lx\nR10:%016lx R11:%016lx\n"
+ "R12:%016lx R13:%016lx\nR14:%016lx R15:%016lx\n"
+ "RIP:%016lx EFL:%016lx\nCSGSFS:%016lx",
+ (unsigned long) uc->uc_mcontext.gregs[13],
+ (unsigned long) uc->uc_mcontext.gregs[11],
+ (unsigned long) uc->uc_mcontext.gregs[14],
+ (unsigned long) uc->uc_mcontext.gregs[12],
+ (unsigned long) uc->uc_mcontext.gregs[8],
+ (unsigned long) uc->uc_mcontext.gregs[9],
+ (unsigned long) uc->uc_mcontext.gregs[10],
+ (unsigned long) uc->uc_mcontext.gregs[15],
+ (unsigned long) uc->uc_mcontext.gregs[0],
+ (unsigned long) uc->uc_mcontext.gregs[1],
+ (unsigned long) uc->uc_mcontext.gregs[2],
+ (unsigned long) uc->uc_mcontext.gregs[3],
+ (unsigned long) uc->uc_mcontext.gregs[4],
+ (unsigned long) uc->uc_mcontext.gregs[5],
+ (unsigned long) uc->uc_mcontext.gregs[6],
+ (unsigned long) uc->uc_mcontext.gregs[7],
+ (unsigned long) uc->uc_mcontext.gregs[16],
+ (unsigned long) uc->uc_mcontext.gregs[17],
+ (unsigned long) uc->uc_mcontext.gregs[18]
+ );
+ logStackContent((void**)uc->uc_mcontext.gregs[15]);
+ #elif defined(__riscv) /* Linux RISC-V */
+ serverLog(LL_WARNING,
+ "\n"
+ "ra:%016lx gp:%016lx\ntp:%016lx t0:%016lx\n"
+ "t1:%016lx t2:%016lx\ns0:%016lx s1:%016lx\n"
+ "a0:%016lx a1:%016lx\na2:%016lx a3:%016lx\n"
+ "a4:%016lx a5:%016lx\na6:%016lx a7:%016lx\n"
+ "s2:%016lx s3:%016lx\ns4:%016lx s5:%016lx\n"
+ "s6:%016lx s7:%016lx\ns8:%016lx s9:%016lx\n"
+ "s10:%016lx s11:%016lx\nt3:%016lx t4:%016lx\n"
+ "t5:%016lx t6:%016lx\n",
+ (unsigned long) uc->uc_mcontext.__gregs[1],
+ (unsigned long) uc->uc_mcontext.__gregs[3],
+ (unsigned long) uc->uc_mcontext.__gregs[4],
+ (unsigned long) uc->uc_mcontext.__gregs[5],
+ (unsigned long) uc->uc_mcontext.__gregs[6],
+ (unsigned long) uc->uc_mcontext.__gregs[7],
+ (unsigned long) uc->uc_mcontext.__gregs[8],
+ (unsigned long) uc->uc_mcontext.__gregs[9],
+ (unsigned long) uc->uc_mcontext.__gregs[10],
+ (unsigned long) uc->uc_mcontext.__gregs[11],
+ (unsigned long) uc->uc_mcontext.__gregs[12],
+ (unsigned long) uc->uc_mcontext.__gregs[13],
+ (unsigned long) uc->uc_mcontext.__gregs[14],
+ (unsigned long) uc->uc_mcontext.__gregs[15],
+ (unsigned long) uc->uc_mcontext.__gregs[16],
+ (unsigned long) uc->uc_mcontext.__gregs[17],
+ (unsigned long) uc->uc_mcontext.__gregs[18],
+ (unsigned long) uc->uc_mcontext.__gregs[19],
+ (unsigned long) uc->uc_mcontext.__gregs[20],
+ (unsigned long) uc->uc_mcontext.__gregs[21],
+ (unsigned long) uc->uc_mcontext.__gregs[22],
+ (unsigned long) uc->uc_mcontext.__gregs[23],
+ (unsigned long) uc->uc_mcontext.__gregs[24],
+ (unsigned long) uc->uc_mcontext.__gregs[25],
+ (unsigned long) uc->uc_mcontext.__gregs[26],
+ (unsigned long) uc->uc_mcontext.__gregs[27],
+ (unsigned long) uc->uc_mcontext.__gregs[28],
+ (unsigned long) uc->uc_mcontext.__gregs[29],
+ (unsigned long) uc->uc_mcontext.__gregs[30],
+ (unsigned long) uc->uc_mcontext.__gregs[31]
+ );
+ logStackContent((void**)uc->uc_mcontext.__gregs[REG_SP]);
+ #elif defined(__aarch64__) /* Linux AArch64 */
+ serverLog(LL_WARNING,
+ "\n"
+ "X18:%016lx X19:%016lx\nX20:%016lx X21:%016lx\n"
+ "X22:%016lx X23:%016lx\nX24:%016lx X25:%016lx\n"
+ "X26:%016lx X27:%016lx\nX28:%016lx X29:%016lx\n"
+ "X30:%016lx\n"
+ "pc:%016lx sp:%016lx\npstate:%016lx fault_address:%016lx\n",
+ (unsigned long) uc->uc_mcontext.regs[18],
+ (unsigned long) uc->uc_mcontext.regs[19],
+ (unsigned long) uc->uc_mcontext.regs[20],
+ (unsigned long) uc->uc_mcontext.regs[21],
+ (unsigned long) uc->uc_mcontext.regs[22],
+ (unsigned long) uc->uc_mcontext.regs[23],
+ (unsigned long) uc->uc_mcontext.regs[24],
+ (unsigned long) uc->uc_mcontext.regs[25],
+ (unsigned long) uc->uc_mcontext.regs[26],
+ (unsigned long) uc->uc_mcontext.regs[27],
+ (unsigned long) uc->uc_mcontext.regs[28],
+ (unsigned long) uc->uc_mcontext.regs[29],
+ (unsigned long) uc->uc_mcontext.regs[30],
+ (unsigned long) uc->uc_mcontext.pc,
+ (unsigned long) uc->uc_mcontext.sp,
+ (unsigned long) uc->uc_mcontext.pstate,
+ (unsigned long) uc->uc_mcontext.fault_address
+ );
+ logStackContent((void**)uc->uc_mcontext.sp);
+ #elif defined(__arm__) /* Linux ARM */
+ serverLog(LL_WARNING,
+ "\n"
+ "R10:%016lx R9 :%016lx\nR8 :%016lx R7 :%016lx\n"
+ "R6 :%016lx R5 :%016lx\nR4 :%016lx R3 :%016lx\n"
+ "R2 :%016lx R1 :%016lx\nR0 :%016lx EC :%016lx\n"
+ "fp: %016lx ip:%016lx\n"
+ "pc:%016lx sp:%016lx\ncpsr:%016lx fault_address:%016lx\n",
+ (unsigned long) uc->uc_mcontext.arm_r10,
+ (unsigned long) uc->uc_mcontext.arm_r9,
+ (unsigned long) uc->uc_mcontext.arm_r8,
+ (unsigned long) uc->uc_mcontext.arm_r7,
+ (unsigned long) uc->uc_mcontext.arm_r6,
+ (unsigned long) uc->uc_mcontext.arm_r5,
+ (unsigned long) uc->uc_mcontext.arm_r4,
+ (unsigned long) uc->uc_mcontext.arm_r3,
+ (unsigned long) uc->uc_mcontext.arm_r2,
+ (unsigned long) uc->uc_mcontext.arm_r1,
+ (unsigned long) uc->uc_mcontext.arm_r0,
+ (unsigned long) uc->uc_mcontext.error_code,
+ (unsigned long) uc->uc_mcontext.arm_fp,
+ (unsigned long) uc->uc_mcontext.arm_ip,
+ (unsigned long) uc->uc_mcontext.arm_pc,
+ (unsigned long) uc->uc_mcontext.arm_sp,
+ (unsigned long) uc->uc_mcontext.arm_cpsr,
+ (unsigned long) uc->uc_mcontext.fault_address
+ );
+ logStackContent((void**)uc->uc_mcontext.arm_sp);
+ #else
+ NOT_SUPPORTED();
+ #endif
+#elif defined(__FreeBSD__)
+ #if defined(__x86_64__)
+ serverLog(LL_WARNING,
+ "\n"
+ "RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
+ "RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
+ "R8 :%016lx R9 :%016lx\nR10:%016lx R11:%016lx\n"
+ "R12:%016lx R13:%016lx\nR14:%016lx R15:%016lx\n"
+ "RIP:%016lx EFL:%016lx\nCSGSFS:%016lx",
+ (unsigned long) uc->uc_mcontext.mc_rax,
+ (unsigned long) uc->uc_mcontext.mc_rbx,
+ (unsigned long) uc->uc_mcontext.mc_rcx,
+ (unsigned long) uc->uc_mcontext.mc_rdx,
+ (unsigned long) uc->uc_mcontext.mc_rdi,
+ (unsigned long) uc->uc_mcontext.mc_rsi,
+ (unsigned long) uc->uc_mcontext.mc_rbp,
+ (unsigned long) uc->uc_mcontext.mc_rsp,
+ (unsigned long) uc->uc_mcontext.mc_r8,
+ (unsigned long) uc->uc_mcontext.mc_r9,
+ (unsigned long) uc->uc_mcontext.mc_r10,
+ (unsigned long) uc->uc_mcontext.mc_r11,
+ (unsigned long) uc->uc_mcontext.mc_r12,
+ (unsigned long) uc->uc_mcontext.mc_r13,
+ (unsigned long) uc->uc_mcontext.mc_r14,
+ (unsigned long) uc->uc_mcontext.mc_r15,
+ (unsigned long) uc->uc_mcontext.mc_rip,
+ (unsigned long) uc->uc_mcontext.mc_rflags,
+ (unsigned long) uc->uc_mcontext.mc_cs
+ );
+ logStackContent((void**)uc->uc_mcontext.mc_rsp);
+ #elif defined(__i386__)
+ serverLog(LL_WARNING,
+ "\n"
+ "EAX:%08lx EBX:%08lx ECX:%08lx EDX:%08lx\n"
+ "EDI:%08lx ESI:%08lx EBP:%08lx ESP:%08lx\n"
+ "SS :%08lx EFL:%08lx EIP:%08lx CS:%08lx\n"
+ "DS :%08lx ES :%08lx FS :%08lx GS:%08lx",
+ (unsigned long) uc->uc_mcontext.mc_eax,
+ (unsigned long) uc->uc_mcontext.mc_ebx,
+ (unsigned long) uc->uc_mcontext.mc_ebx,
+ (unsigned long) uc->uc_mcontext.mc_edx,
+ (unsigned long) uc->uc_mcontext.mc_edi,
+ (unsigned long) uc->uc_mcontext.mc_esi,
+ (unsigned long) uc->uc_mcontext.mc_ebp,
+ (unsigned long) uc->uc_mcontext.mc_esp,
+ (unsigned long) uc->uc_mcontext.mc_ss,
+ (unsigned long) uc->uc_mcontext.mc_eflags,
+ (unsigned long) uc->uc_mcontext.mc_eip,
+ (unsigned long) uc->uc_mcontext.mc_cs,
+ (unsigned long) uc->uc_mcontext.mc_es,
+ (unsigned long) uc->uc_mcontext.mc_fs,
+ (unsigned long) uc->uc_mcontext.mc_gs
+ );
+ logStackContent((void**)uc->uc_mcontext.mc_esp);
+ #else
+ NOT_SUPPORTED();
+ #endif
+#elif defined(__OpenBSD__)
+ #if defined(__x86_64__)
+ serverLog(LL_WARNING,
+ "\n"
+ "RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
+ "RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
+ "R8 :%016lx R9 :%016lx\nR10:%016lx R11:%016lx\n"
+ "R12:%016lx R13:%016lx\nR14:%016lx R15:%016lx\n"
+ "RIP:%016lx EFL:%016lx\nCSGSFS:%016lx",
+ (unsigned long) uc->sc_rax,
+ (unsigned long) uc->sc_rbx,
+ (unsigned long) uc->sc_rcx,
+ (unsigned long) uc->sc_rdx,
+ (unsigned long) uc->sc_rdi,
+ (unsigned long) uc->sc_rsi,
+ (unsigned long) uc->sc_rbp,
+ (unsigned long) uc->sc_rsp,
+ (unsigned long) uc->sc_r8,
+ (unsigned long) uc->sc_r9,
+ (unsigned long) uc->sc_r10,
+ (unsigned long) uc->sc_r11,
+ (unsigned long) uc->sc_r12,
+ (unsigned long) uc->sc_r13,
+ (unsigned long) uc->sc_r14,
+ (unsigned long) uc->sc_r15,
+ (unsigned long) uc->sc_rip,
+ (unsigned long) uc->sc_rflags,
+ (unsigned long) uc->sc_cs
+ );
+ logStackContent((void**)uc->sc_rsp);
+ #elif defined(__i386__)
+ serverLog(LL_WARNING,
+ "\n"
+ "EAX:%08lx EBX:%08lx ECX:%08lx EDX:%08lx\n"
+ "EDI:%08lx ESI:%08lx EBP:%08lx ESP:%08lx\n"
+ "SS :%08lx EFL:%08lx EIP:%08lx CS:%08lx\n"
+ "DS :%08lx ES :%08lx FS :%08lx GS:%08lx",
+ (unsigned long) uc->sc_eax,
+ (unsigned long) uc->sc_ebx,
+ (unsigned long) uc->sc_ebx,
+ (unsigned long) uc->sc_edx,
+ (unsigned long) uc->sc_edi,
+ (unsigned long) uc->sc_esi,
+ (unsigned long) uc->sc_ebp,
+ (unsigned long) uc->sc_esp,
+ (unsigned long) uc->sc_ss,
+ (unsigned long) uc->sc_eflags,
+ (unsigned long) uc->sc_eip,
+ (unsigned long) uc->sc_cs,
+ (unsigned long) uc->sc_es,
+ (unsigned long) uc->sc_fs,
+ (unsigned long) uc->sc_gs
+ );
+ logStackContent((void**)uc->sc_esp);
+ #else
+ NOT_SUPPORTED();
+ #endif
+#elif defined(__NetBSD__)
+ #if defined(__x86_64__)
+ serverLog(LL_WARNING,
+ "\n"
+ "RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
+ "RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
+ "R8 :%016lx R9 :%016lx\nR10:%016lx R11:%016lx\n"
+ "R12:%016lx R13:%016lx\nR14:%016lx R15:%016lx\n"
+ "RIP:%016lx EFL:%016lx\nCSGSFS:%016lx",
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_RAX],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_RBX],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_RCX],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_RDX],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_RDI],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_RSI],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_RBP],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_RSP],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_R8],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_R9],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_R10],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_R11],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_R12],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_R13],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_R14],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_R15],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_RIP],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_RFLAGS],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_CS]
+ );
+ logStackContent((void**)uc->uc_mcontext.__gregs[_REG_RSP]);
+ #elif defined(__i386__)
+ serverLog(LL_WARNING,
+ "\n"
+ "EAX:%08lx EBX:%08lx ECX:%08lx EDX:%08lx\n"
+ "EDI:%08lx ESI:%08lx EBP:%08lx ESP:%08lx\n"
+ "SS :%08lx EFL:%08lx EIP:%08lx CS:%08lx\n"
+ "DS :%08lx ES :%08lx FS :%08lx GS:%08lx",
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_EAX],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_EBX],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_EDX],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_EDI],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_ESI],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_EBP],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_ESP],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_SS],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_EFLAGS],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_EIP],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_CS],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_ES],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_FS],
+ (unsigned long) uc->uc_mcontext.__gregs[_REG_GS]
+ );
+ #else
+ NOT_SUPPORTED();
+ #endif
+#elif defined(__DragonFly__)
+ serverLog(LL_WARNING,
+ "\n"
+ "RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
+ "RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
+ "R8 :%016lx R9 :%016lx\nR10:%016lx R11:%016lx\n"
+ "R12:%016lx R13:%016lx\nR14:%016lx R15:%016lx\n"
+ "RIP:%016lx EFL:%016lx\nCSGSFS:%016lx",
+ (unsigned long) uc->uc_mcontext.mc_rax,
+ (unsigned long) uc->uc_mcontext.mc_rbx,
+ (unsigned long) uc->uc_mcontext.mc_rcx,
+ (unsigned long) uc->uc_mcontext.mc_rdx,
+ (unsigned long) uc->uc_mcontext.mc_rdi,
+ (unsigned long) uc->uc_mcontext.mc_rsi,
+ (unsigned long) uc->uc_mcontext.mc_rbp,
+ (unsigned long) uc->uc_mcontext.mc_rsp,
+ (unsigned long) uc->uc_mcontext.mc_r8,
+ (unsigned long) uc->uc_mcontext.mc_r9,
+ (unsigned long) uc->uc_mcontext.mc_r10,
+ (unsigned long) uc->uc_mcontext.mc_r11,
+ (unsigned long) uc->uc_mcontext.mc_r12,
+ (unsigned long) uc->uc_mcontext.mc_r13,
+ (unsigned long) uc->uc_mcontext.mc_r14,
+ (unsigned long) uc->uc_mcontext.mc_r15,
+ (unsigned long) uc->uc_mcontext.mc_rip,
+ (unsigned long) uc->uc_mcontext.mc_rflags,
+ (unsigned long) uc->uc_mcontext.mc_cs
+ );
+ logStackContent((void**)uc->uc_mcontext.mc_rsp);
+#elif defined(__sun)
+ #if defined(__x86_64__)
+ serverLog(LL_WARNING,
+ "\n"
+ "RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
+ "RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
+ "R8 :%016lx R9 :%016lx\nR10:%016lx R11:%016lx\n"
+ "R12:%016lx R13:%016lx\nR14:%016lx R15:%016lx\n"
+ "RIP:%016lx EFL:%016lx\nCSGSFS:%016lx",
+ (unsigned long) uc->uc_mcontext.gregs[REG_RAX],
+ (unsigned long) uc->uc_mcontext.gregs[REG_RBX],
+ (unsigned long) uc->uc_mcontext.gregs[REG_RCX],
+ (unsigned long) uc->uc_mcontext.gregs[REG_RDX],
+ (unsigned long) uc->uc_mcontext.gregs[REG_RDI],
+ (unsigned long) uc->uc_mcontext.gregs[REG_RSI],
+ (unsigned long) uc->uc_mcontext.gregs[REG_RBP],
+ (unsigned long) uc->uc_mcontext.gregs[REG_RSP],
+ (unsigned long) uc->uc_mcontext.gregs[REG_R8],
+ (unsigned long) uc->uc_mcontext.gregs[REG_R9],
+ (unsigned long) uc->uc_mcontext.gregs[REG_R10],
+ (unsigned long) uc->uc_mcontext.gregs[REG_R11],
+ (unsigned long) uc->uc_mcontext.gregs[REG_R12],
+ (unsigned long) uc->uc_mcontext.gregs[REG_R13],
+ (unsigned long) uc->uc_mcontext.gregs[REG_R14],
+ (unsigned long) uc->uc_mcontext.gregs[REG_R15],
+ (unsigned long) uc->uc_mcontext.gregs[REG_RIP],
+ (unsigned long) uc->uc_mcontext.gregs[REG_RFL],
+ (unsigned long) uc->uc_mcontext.gregs[REG_CS]
+ );
+ logStackContent((void**)uc->uc_mcontext.gregs[REG_RSP]);
+ #endif
+#else
+ NOT_SUPPORTED();
+#endif
+#undef NOT_SUPPORTED
+}
+
+#endif /* HAVE_BACKTRACE */
+
+/* Return a file descriptor to write directly to the Redis log with the
+ * write(2) syscall, that can be used in critical sections of the code
+ * where the rest of Redis can't be trusted (for example during the memory
+ * test) or when an API call requires a raw fd.
+ *
+ * Close it with closeDirectLogFiledes(). */
+int openDirectLogFiledes(void) {
+ int log_to_stdout = server.logfile[0] == '\0';
+ int fd = log_to_stdout ?
+ STDOUT_FILENO :
+ open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644);
+ return fd;
+}
+
+/* Used to close what closeDirectLogFiledes() returns. */
+void closeDirectLogFiledes(int fd) {
+ int log_to_stdout = server.logfile[0] == '\0';
+ if (!log_to_stdout) close(fd);
+}
+
+#ifdef HAVE_BACKTRACE
+
+/* Logs the stack trace using the backtrace() call. This function is designed
+ * to be called from signal handlers safely.
+ * The eip argument is optional (can take NULL).
+ * The uplevel argument indicates how many of the calling functions to skip.
+ */
+void logStackTrace(void *eip, int uplevel) {
+ void *trace[100];
+ int trace_size = 0, fd = openDirectLogFiledes();
+ char *msg;
+ uplevel++; /* skip this function */
+
+ if (fd == -1) return; /* If we can't log there is anything to do. */
+
+ /* Get the stack trace first! */
+ trace_size = backtrace(trace, 100);
+
+ msg = "\n------ STACK TRACE ------\n";
+ if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */};
+
+ if (eip) {
+ /* Write EIP to the log file*/
+ msg = "EIP:\n";
+ if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */};
+ backtrace_symbols_fd(&eip, 1, fd);
+ }
+
+ /* Write symbols to log file */
+ msg = "\nBacktrace:\n";
+ if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */};
+ backtrace_symbols_fd(trace+uplevel, trace_size-uplevel, fd);
+
+ /* Cleanup */
+ closeDirectLogFiledes(fd);
+}
+
+#endif /* HAVE_BACKTRACE */
+
+sds genClusterDebugString(sds infostring) {
+ infostring = sdscatprintf(infostring, "\r\n# Cluster info\r\n");
+ infostring = sdscatsds(infostring, genClusterInfoString());
+ infostring = sdscatprintf(infostring, "\n------ CLUSTER NODES OUTPUT ------\n");
+ infostring = sdscatsds(infostring, clusterGenNodesDescription(NULL, 0, 0));
+
+ return infostring;
+}
+
+/* Log global server info */
+void logServerInfo(void) {
+ sds infostring, clients;
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ INFO OUTPUT ------\n");
+ int all = 0, everything = 0;
+ robj *argv[1];
+ argv[0] = createStringObject("all", strlen("all"));
+ dict *section_dict = genInfoSectionDict(argv, 1, NULL, &all, &everything);
+ infostring = genRedisInfoString(section_dict, all, everything);
+ if (server.cluster_enabled){
+ infostring = genClusterDebugString(infostring);
+ }
+ serverLogRaw(LL_WARNING|LL_RAW, infostring);
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n");
+ clients = getAllClientsInfoString(-1);
+ serverLogRaw(LL_WARNING|LL_RAW, clients);
+ sdsfree(infostring);
+ sdsfree(clients);
+ releaseInfoSectionDict(section_dict);
+ decrRefCount(argv[0]);
+}
+
+/* Log certain config values, which can be used for debugging */
+void logConfigDebugInfo(void) {
+ sds configstring;
+ configstring = getConfigDebugInfo();
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ CONFIG DEBUG OUTPUT ------\n");
+ serverLogRaw(LL_WARNING|LL_RAW, configstring);
+ sdsfree(configstring);
+}
+
+/* Log modules info. Something we wanna do last since we fear it may crash. */
+void logModulesInfo(void) {
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ MODULES INFO OUTPUT ------\n");
+ sds infostring = modulesCollectInfo(sdsempty(), NULL, 1, 0);
+ serverLogRaw(LL_WARNING|LL_RAW, infostring);
+ sdsfree(infostring);
+}
+
+/* Log information about the "current" client, that is, the client that is
+ * currently being served by Redis. May be NULL if Redis is not serving a
+ * client right now. */
+void logCurrentClient(client *cc, const char *title) {
+ if (cc == NULL) return;
+
+ sds client;
+ int j;
+
+ serverLog(LL_WARNING|LL_RAW, "\n------ %s CLIENT INFO ------\n", title);
+ client = catClientInfoString(sdsempty(),cc);
+ serverLog(LL_WARNING|LL_RAW,"%s\n", client);
+ sdsfree(client);
+ serverLog(LL_WARNING|LL_RAW,"argc: '%d'\n", cc->argc);
+ for (j = 0; j < cc->argc; j++) {
+ robj *decoded;
+ decoded = getDecodedObject(cc->argv[j]);
+ sds repr = sdscatrepr(sdsempty(),decoded->ptr, min(sdslen(decoded->ptr), 128));
+ serverLog(LL_WARNING|LL_RAW,"argv[%d]: '%s'\n", j, (char*)repr);
+ if (!strcasecmp(decoded->ptr, "auth") || !strcasecmp(decoded->ptr, "auth2")) {
+ sdsfree(repr);
+ decrRefCount(decoded);
+ break;
+ }
+ sdsfree(repr);
+ decrRefCount(decoded);
+ }
+ /* Check if the first argument, usually a key, is found inside the
+ * selected DB, and if so print info about the associated object. */
+ if (cc->argc > 1) {
+ robj *val, *key;
+ dictEntry *de;
+
+ key = getDecodedObject(cc->argv[1]);
+ de = dictFind(cc->db->dict, key->ptr);
+ if (de) {
+ val = dictGetVal(de);
+ serverLog(LL_WARNING,"key '%s' found in DB containing the following object:", (char*)key->ptr);
+ serverLogObjectDebugInfo(val);
+ }
+ decrRefCount(key);
+ }
+}
+
+#if defined(HAVE_PROC_MAPS)
+
+#define MEMTEST_MAX_REGIONS 128
+
+/* A non destructive memory test executed during segfault. */
+int memtest_test_linux_anonymous_maps(void) {
+ FILE *fp;
+ char line[1024];
+ char logbuf[1024];
+ size_t start_addr, end_addr, size;
+ size_t start_vect[MEMTEST_MAX_REGIONS];
+ size_t size_vect[MEMTEST_MAX_REGIONS];
+ int regions = 0, j;
+
+ int fd = openDirectLogFiledes();
+ if (!fd) return 0;
+
+ fp = fopen("/proc/self/maps","r");
+ if (!fp) {
+ closeDirectLogFiledes(fd);
+ return 0;
+ }
+ while(fgets(line,sizeof(line),fp) != NULL) {
+ char *start, *end, *p = line;
+
+ start = p;
+ p = strchr(p,'-');
+ if (!p) continue;
+ *p++ = '\0';
+ end = p;
+ p = strchr(p,' ');
+ if (!p) continue;
+ *p++ = '\0';
+ if (strstr(p,"stack") ||
+ strstr(p,"vdso") ||
+ strstr(p,"vsyscall")) continue;
+ if (!strstr(p,"00:00")) continue;
+ if (!strstr(p,"rw")) continue;
+
+ start_addr = strtoul(start,NULL,16);
+ end_addr = strtoul(end,NULL,16);
+ size = end_addr-start_addr;
+
+ start_vect[regions] = start_addr;
+ size_vect[regions] = size;
+ snprintf(logbuf,sizeof(logbuf),
+ "*** Preparing to test memory region %lx (%lu bytes)\n",
+ (unsigned long) start_vect[regions],
+ (unsigned long) size_vect[regions]);
+ if (write(fd,logbuf,strlen(logbuf)) == -1) { /* Nothing to do. */ }
+ regions++;
+ }
+
+ int errors = 0;
+ for (j = 0; j < regions; j++) {
+ if (write(fd,".",1) == -1) { /* Nothing to do. */ }
+ errors += memtest_preserving_test((void*)start_vect[j],size_vect[j],1);
+ if (write(fd, errors ? "E" : "O",1) == -1) { /* Nothing to do. */ }
+ }
+ if (write(fd,"\n",1) == -1) { /* Nothing to do. */ }
+
+ /* NOTE: It is very important to close the file descriptor only now
+ * because closing it before may result into unmapping of some memory
+ * region that we are testing. */
+ fclose(fp);
+ closeDirectLogFiledes(fd);
+ return errors;
+}
+#endif /* HAVE_PROC_MAPS */
+
+static void killMainThread(void) {
+ int err;
+ if (pthread_self() != server.main_thread_id && pthread_cancel(server.main_thread_id) == 0) {
+ if ((err = pthread_join(server.main_thread_id,NULL)) != 0) {
+ serverLog(LL_WARNING, "main thread can not be joined: %s", strerror(err));
+ } else {
+ serverLog(LL_WARNING, "main thread terminated");
+ }
+ }
+}
+
+/* Kill the running threads (other than current) in an unclean way. This function
+ * should be used only when it's critical to stop the threads for some reason.
+ * Currently Redis does this only on crash (for instance on SIGSEGV) in order
+ * to perform a fast memory check without other threads messing with memory. */
+void killThreads(void) {
+ killMainThread();
+ bioKillThreads();
+ killIOThreads();
+}
+
+void doFastMemoryTest(void) {
+#if defined(HAVE_PROC_MAPS)
+ if (server.memcheck_enabled) {
+ /* Test memory */
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ FAST MEMORY TEST ------\n");
+ killThreads();
+ if (memtest_test_linux_anonymous_maps()) {
+ serverLogRaw(LL_WARNING|LL_RAW,
+ "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!\n");
+ } else {
+ serverLogRaw(LL_WARNING|LL_RAW,
+ "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible.\n");
+ }
+ }
+#endif /* HAVE_PROC_MAPS */
+}
+
+/* Scans the (assumed) x86 code starting at addr, for a max of `len`
+ * bytes, searching for E8 (callq) opcodes, and dumping the symbols
+ * and the call offset if they appear to be valid. */
+void dumpX86Calls(void *addr, size_t len) {
+ size_t j;
+ unsigned char *p = addr;
+ Dl_info info;
+ /* Hash table to best-effort avoid printing the same symbol
+ * multiple times. */
+ unsigned long ht[256] = {0};
+
+ if (len < 5) return;
+ for (j = 0; j < len-4; j++) {
+ if (p[j] != 0xE8) continue; /* Not an E8 CALL opcode. */
+ unsigned long target = (unsigned long)addr+j+5;
+ uint32_t tmp;
+ memcpy(&tmp, p+j+1, sizeof(tmp));
+ target += tmp;
+ if (dladdr((void*)target, &info) != 0 && info.dli_sname != NULL) {
+ if (ht[target&0xff] != target) {
+ printf("Function at 0x%lx is %s\n",target,info.dli_sname);
+ ht[target&0xff] = target;
+ }
+ j += 4; /* Skip the 32 bit immediate. */
+ }
+ }
+}
+
+void dumpCodeAroundEIP(void *eip) {
+ Dl_info info;
+ if (dladdr(eip, &info) != 0) {
+ serverLog(LL_WARNING|LL_RAW,
+ "\n------ DUMPING CODE AROUND EIP ------\n"
+ "Symbol: %s (base: %p)\n"
+ "Module: %s (base %p)\n"
+ "$ xxd -r -p /tmp/dump.hex /tmp/dump.bin\n"
+ "$ objdump --adjust-vma=%p -D -b binary -m i386:x86-64 /tmp/dump.bin\n"
+ "------\n",
+ info.dli_sname, info.dli_saddr, info.dli_fname, info.dli_fbase,
+ info.dli_saddr);
+ size_t len = (long)eip - (long)info.dli_saddr;
+ unsigned long sz = sysconf(_SC_PAGESIZE);
+ if (len < 1<<13) { /* we don't have functions over 8k (verified) */
+ /* Find the address of the next page, which is our "safety"
+ * limit when dumping. Then try to dump just 128 bytes more
+ * than EIP if there is room, or stop sooner. */
+ void *base = (void *)info.dli_saddr;
+ unsigned long next = ((unsigned long)eip + sz) & ~(sz-1);
+ unsigned long end = (unsigned long)eip + 128;
+ if (end > next) end = next;
+ len = end - (unsigned long)base;
+ serverLogHexDump(LL_WARNING, "dump of function",
+ base, len);
+ dumpX86Calls(base, len);
+ }
+ }
+}
+
+void invalidFunctionWasCalled(void) {}
+
+typedef void (*invalidFunctionWasCalledType)(void);
+
+void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
+ UNUSED(secret);
+ UNUSED(info);
+
+ bugReportStart();
+ serverLog(LL_WARNING,
+ "Redis %s crashed by signal: %d, si_code: %d", REDIS_VERSION, sig, info->si_code);
+ if (sig == SIGSEGV || sig == SIGBUS) {
+ serverLog(LL_WARNING,
+ "Accessing address: %p", (void*)info->si_addr);
+ }
+ if (info->si_code == SI_USER && info->si_pid != -1) {
+ serverLog(LL_WARNING, "Killed by PID: %ld, UID: %d", (long) info->si_pid, info->si_uid);
+ }
+
+#ifdef HAVE_BACKTRACE
+ ucontext_t *uc = (ucontext_t*) secret;
+ void *eip = getAndSetMcontextEip(uc, NULL);
+ if (eip != NULL) {
+ serverLog(LL_WARNING,
+ "Crashed running the instruction at: %p", eip);
+ }
+
+ if (eip == info->si_addr) {
+ /* When eip matches the bad address, it's an indication that we crashed when calling a non-mapped
+ * function pointer. In that case the call to backtrace will crash trying to access that address and we
+ * won't get a crash report logged. Set it to a valid point to avoid that crash. */
+
+ /* This trick allow to avoid compiler warning */
+ void *ptr;
+ invalidFunctionWasCalledType *ptr_ptr = (invalidFunctionWasCalledType*)&ptr;
+ *ptr_ptr = invalidFunctionWasCalled;
+ getAndSetMcontextEip(uc, ptr);
+ }
+
+ logStackTrace(eip, 1);
+
+ if (eip == info->si_addr) {
+ /* Restore old eip */
+ getAndSetMcontextEip(uc, eip);
+ }
+
+ logRegisters(uc);
+#endif
+
+ printCrashReport();
+
+#ifdef HAVE_BACKTRACE
+ if (eip != NULL)
+ dumpCodeAroundEIP(eip);
+#endif
+
+ bugReportEnd(1, sig);
+}
+
+void printCrashReport(void) {
+ /* Log INFO and CLIENT LIST */
+ logServerInfo();
+
+ /* Log the current client */
+ logCurrentClient(server.current_client, "CURRENT");
+ logCurrentClient(server.executing_client, "EXECUTING");
+
+ /* Log modules info. Something we wanna do last since we fear it may crash. */
+ logModulesInfo();
+
+ /* Log debug config information, which are some values
+ * which may be useful for debugging crashes. */
+ logConfigDebugInfo();
+
+ /* Run memory test in case the crash was triggered by memory corruption. */
+ doFastMemoryTest();
+}
+
+void bugReportEnd(int killViaSignal, int sig) {
+ struct sigaction act;
+
+ serverLogRaw(LL_WARNING|LL_RAW,
+"\n=== REDIS BUG REPORT END. Make sure to include from START to END. ===\n\n"
+" Please report the crash by opening an issue on github:\n\n"
+" http://github.com/redis/redis/issues\n\n"
+" If a Redis module was involved, please open in the module's repo instead.\n\n"
+" Suspect RAM error? Use redis-server --test-memory to verify it.\n\n"
+" Some other issues could be detected by redis-server --check-system\n"
+);
+
+ /* free(messages); Don't call free() with possibly corrupted memory. */
+ if (server.daemonize && server.supervised == 0 && server.pidfile) unlink(server.pidfile);
+
+ if (!killViaSignal) {
+ /* To avoid issues with valgrind, we may wanna exit rahter than generate a signal */
+ if (server.use_exit_on_panic) {
+ /* Using _exit to bypass false leak reports by gcc ASAN */
+ fflush(stdout);
+ _exit(1);
+ }
+ abort();
+ }
+
+ /* Make sure we exit with the right signal at the end. So for instance
+ * the core will be dumped if enabled. */
+ sigemptyset (&act.sa_mask);
+ act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND;
+ act.sa_handler = SIG_DFL;
+ sigaction (sig, &act, NULL);
+ kill(getpid(),sig);
+}
+
+/* ==================== Logging functions for debugging ===================== */
+
+void serverLogHexDump(int level, char *descr, void *value, size_t len) {
+ char buf[65], *b;
+ unsigned char *v = value;
+ char charset[] = "0123456789abcdef";
+
+ serverLog(level,"%s (hexdump of %zu bytes):", descr, len);
+ b = buf;
+ while(len) {
+ b[0] = charset[(*v)>>4];
+ b[1] = charset[(*v)&0xf];
+ b[2] = '\0';
+ b += 2;
+ len--;
+ v++;
+ if (b-buf == 64 || len == 0) {
+ serverLogRaw(level|LL_RAW,buf);
+ b = buf;
+ }
+ }
+ serverLogRaw(level|LL_RAW,"\n");
+}
+
+/* =========================== Software Watchdog ============================ */
+#include <sys/time.h>
+
+void watchdogSignalHandler(int sig, siginfo_t *info, void *secret) {
+#ifdef HAVE_BACKTRACE
+ ucontext_t *uc = (ucontext_t*) secret;
+#else
+ (void)secret;
+#endif
+ UNUSED(info);
+ UNUSED(sig);
+
+ serverLogFromHandler(LL_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---");
+#ifdef HAVE_BACKTRACE
+ logStackTrace(getAndSetMcontextEip(uc, NULL), 1);
+#else
+ serverLogFromHandler(LL_WARNING,"Sorry: no support for backtrace().");
+#endif
+ serverLogFromHandler(LL_WARNING,"--------\n");
+}
+
+/* Schedule a SIGALRM delivery after the specified period in milliseconds.
+ * If a timer is already scheduled, this function will re-schedule it to the
+ * specified time. If period is 0 the current timer is disabled. */
+void watchdogScheduleSignal(int period) {
+ struct itimerval it;
+
+ /* Will stop the timer if period is 0. */
+ it.it_value.tv_sec = period/1000;
+ it.it_value.tv_usec = (period%1000)*1000;
+ /* Don't automatically restart. */
+ it.it_interval.tv_sec = 0;
+ it.it_interval.tv_usec = 0;
+ setitimer(ITIMER_REAL, &it, NULL);
+}
+void applyWatchdogPeriod(void) {
+ struct sigaction act;
+
+ /* Disable watchdog when period is 0 */
+ if (server.watchdog_period == 0) {
+ watchdogScheduleSignal(0); /* Stop the current timer. */
+
+ /* Set the signal handler to SIG_IGN, this will also remove pending
+ * signals from the queue. */
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = SIG_IGN;
+ sigaction(SIGALRM, &act, NULL);
+ } else {
+ /* Setup the signal handler. */
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = watchdogSignalHandler;
+ sigaction(SIGALRM, &act, NULL);
+
+ /* If the configured period is smaller than twice the timer period, it is
+ * too short for the software watchdog to work reliably. Fix it now
+ * if needed. */
+ int min_period = (1000/server.hz)*2;
+ if (server.watchdog_period < min_period) server.watchdog_period = min_period;
+ watchdogScheduleSignal(server.watchdog_period); /* Adjust the current timer. */
+ }
+}
+
+/* Positive input is sleep time in microseconds. Negative input is fractions
+ * of microseconds, i.e. -10 means 100 nanoseconds. */
+void debugDelay(int usec) {
+ /* Since even the shortest sleep results in context switch and system call,
+ * the way we achieve short sleeps is by statistically sleeping less often. */
+ if (usec < 0) usec = (rand() % -usec) == 0 ? 1: 0;
+ if (usec) usleep(usec);
+}
diff --git a/src/debugmacro.h b/src/debugmacro.h
new file mode 100644
index 0000000..dcd79a3
--- /dev/null
+++ b/src/debugmacro.h
@@ -0,0 +1,46 @@
+/* This file contains debugging macros to be used when investigating issues.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _REDIS_DEBUGMACRO_H_
+#define _REDIS_DEBUGMACRO_H_
+
+#include <stdio.h>
+#define D(...) \
+ do { \
+ FILE *fp = fopen("/tmp/log.txt","a"); \
+ fprintf(fp,"%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
+ fprintf(fp,__VA_ARGS__); \
+ fprintf(fp,"\n"); \
+ fclose(fp); \
+ } while (0)
+
+#endif /* _REDIS_DEBUGMACRO_H_ */
diff --git a/src/defrag.c b/src/defrag.c
new file mode 100644
index 0000000..ff63cf8
--- /dev/null
+++ b/src/defrag.c
@@ -0,0 +1,1079 @@
+/*
+ * Active memory defragmentation
+ * Try to find key / value allocations that need to be re-allocated in order
+ * to reduce external fragmentation.
+ * We do that by scanning the keyspace and for each pointer we have, we can try to
+ * ask the allocator if moving it to a new address will help reduce fragmentation.
+ *
+ * Copyright (c) 2020, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "cluster.h"
+#include <time.h>
+#include <assert.h>
+#include <stddef.h>
+
+#ifdef HAVE_DEFRAG
+
+/* this method was added to jemalloc in order to help us understand which
+ * pointers are worthwhile moving and which aren't */
+int je_get_defrag_hint(void* ptr);
+
+/* Defrag helper for generic allocations.
+ *
+ * returns NULL in case the allocation wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+void* activeDefragAlloc(void *ptr) {
+ size_t size;
+ void *newptr;
+ if(!je_get_defrag_hint(ptr)) {
+ server.stat_active_defrag_misses++;
+ return NULL;
+ }
+ /* move this allocation to a new allocation.
+ * make sure not to use the thread cache. so that we don't get back the same
+ * pointers we try to free */
+ size = zmalloc_size(ptr);
+ newptr = zmalloc_no_tcache(size);
+ memcpy(newptr, ptr, size);
+ zfree_no_tcache(ptr);
+ server.stat_active_defrag_hits++;
+ return newptr;
+}
+
+/*Defrag helper for sds strings
+ *
+ * returns NULL in case the allocation wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+sds activeDefragSds(sds sdsptr) {
+ void* ptr = sdsAllocPtr(sdsptr);
+ void* newptr = activeDefragAlloc(ptr);
+ if (newptr) {
+ size_t offset = sdsptr - (char*)ptr;
+ sdsptr = (char*)newptr + offset;
+ return sdsptr;
+ }
+ return NULL;
+}
+
+/* Defrag helper for robj and/or string objects
+ *
+ * returns NULL in case the allocation wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+robj *activeDefragStringOb(robj* ob) {
+ robj *ret = NULL;
+ if (ob->refcount!=1)
+ return NULL;
+
+ /* try to defrag robj (only if not an EMBSTR type (handled below). */
+ if (ob->type!=OBJ_STRING || ob->encoding!=OBJ_ENCODING_EMBSTR) {
+ if ((ret = activeDefragAlloc(ob))) {
+ ob = ret;
+ }
+ }
+
+ /* try to defrag string object */
+ if (ob->type == OBJ_STRING) {
+ if(ob->encoding==OBJ_ENCODING_RAW) {
+ sds newsds = activeDefragSds((sds)ob->ptr);
+ if (newsds) {
+ ob->ptr = newsds;
+ }
+ } else if (ob->encoding==OBJ_ENCODING_EMBSTR) {
+ /* The sds is embedded in the object allocation, calculate the
+ * offset and update the pointer in the new allocation. */
+ long ofs = (intptr_t)ob->ptr - (intptr_t)ob;
+ if ((ret = activeDefragAlloc(ob))) {
+ ret->ptr = (void*)((intptr_t)ret + ofs);
+ }
+ } else if (ob->encoding!=OBJ_ENCODING_INT) {
+ serverPanic("Unknown string encoding");
+ }
+ }
+ return ret;
+}
+
+/* Defrag helper for lua scripts
+ *
+ * returns NULL in case the allocation wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+luaScript *activeDefragLuaScript(luaScript *script) {
+ luaScript *ret = NULL;
+
+ /* try to defrag script struct */
+ if ((ret = activeDefragAlloc(script))) {
+ script = ret;
+ }
+
+ /* try to defrag actual script object */
+ robj *ob = activeDefragStringOb(script->body);
+ if (ob) script->body = ob;
+
+ return ret;
+}
+
+/* Defrag helper for dict main allocations (dict struct, and hash tables).
+ * receives a pointer to the dict* and implicitly updates it when the dict
+ * struct itself was moved. Returns a stat of how many pointers were moved. */
+void dictDefragTables(dict* d) {
+ dictEntry **newtable;
+ /* handle the first hash table */
+ newtable = activeDefragAlloc(d->ht_table[0]);
+ if (newtable)
+ d->ht_table[0] = newtable;
+ /* handle the second hash table */
+ if (d->ht_table[1]) {
+ newtable = activeDefragAlloc(d->ht_table[1]);
+ if (newtable)
+ d->ht_table[1] = newtable;
+ }
+}
+
+/* Internal function used by zslDefrag */
+void zslUpdateNode(zskiplist *zsl, zskiplistNode *oldnode, zskiplistNode *newnode, zskiplistNode **update) {
+ int i;
+ for (i = 0; i < zsl->level; i++) {
+ if (update[i]->level[i].forward == oldnode)
+ update[i]->level[i].forward = newnode;
+ }
+ serverAssert(zsl->header!=oldnode);
+ if (newnode->level[0].forward) {
+ serverAssert(newnode->level[0].forward->backward==oldnode);
+ newnode->level[0].forward->backward = newnode;
+ } else {
+ serverAssert(zsl->tail==oldnode);
+ zsl->tail = newnode;
+ }
+}
+
+/* Defrag helper for sorted set.
+ * Update the robj pointer, defrag the skiplist struct and return the new score
+ * reference. We may not access oldele pointer (not even the pointer stored in
+ * the skiplist), as it was already freed. Newele may be null, in which case we
+ * only need to defrag the skiplist, but not update the obj pointer.
+ * When return value is non-NULL, it is the score reference that must be updated
+ * in the dict record. */
+double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x, *newx;
+ int i;
+ sds ele = newele? newele: oldele;
+
+ /* find the skiplist node referring to the object that was moved,
+ * and all pointers that need to be updated if we'll end up moving the skiplist node. */
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward &&
+ x->level[i].forward->ele != oldele && /* make sure not to access the
+ ->obj pointer if it matches
+ oldele */
+ (x->level[i].forward->score < score ||
+ (x->level[i].forward->score == score &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ x = x->level[i].forward;
+ update[i] = x;
+ }
+
+ /* update the robj pointer inside the skip list record. */
+ x = x->level[0].forward;
+ serverAssert(x && score == x->score && x->ele==oldele);
+ if (newele)
+ x->ele = newele;
+
+ /* try to defrag the skiplist record itself */
+ newx = activeDefragAlloc(x);
+ if (newx) {
+ zslUpdateNode(zsl, x, newx, update);
+ return &newx->score;
+ }
+ return NULL;
+}
+
+/* Defrag helper for sorted set.
+ * Defrag a single dict entry key name, and corresponding skiplist struct */
+void activeDefragZsetEntry(zset *zs, dictEntry *de) {
+ sds newsds;
+ double* newscore;
+ sds sdsele = dictGetKey(de);
+ if ((newsds = activeDefragSds(sdsele)))
+ dictSetKey(zs->dict, de, newsds);
+ newscore = zslDefrag(zs->zsl, *(double*)dictGetVal(de), sdsele, newsds);
+ if (newscore) {
+ dictSetVal(zs->dict, de, newscore);
+ }
+}
+
+#define DEFRAG_SDS_DICT_NO_VAL 0
+#define DEFRAG_SDS_DICT_VAL_IS_SDS 1
+#define DEFRAG_SDS_DICT_VAL_IS_STROB 2
+#define DEFRAG_SDS_DICT_VAL_VOID_PTR 3
+#define DEFRAG_SDS_DICT_VAL_LUA_SCRIPT 4
+
+void activeDefragSdsDictCallback(void *privdata, const dictEntry *de) {
+ UNUSED(privdata);
+ UNUSED(de);
+}
+
+/* Defrag a dict with sds key and optional value (either ptr, sds or robj string) */
+void activeDefragSdsDict(dict* d, int val_type) {
+ unsigned long cursor = 0;
+ dictDefragFunctions defragfns = {
+ .defragAlloc = activeDefragAlloc,
+ .defragKey = (dictDefragAllocFunction *)activeDefragSds,
+ .defragVal = (val_type == DEFRAG_SDS_DICT_VAL_IS_SDS ? (dictDefragAllocFunction *)activeDefragSds :
+ val_type == DEFRAG_SDS_DICT_VAL_IS_STROB ? (dictDefragAllocFunction *)activeDefragStringOb :
+ val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR ? (dictDefragAllocFunction *)activeDefragAlloc :
+ val_type == DEFRAG_SDS_DICT_VAL_LUA_SCRIPT ? (dictDefragAllocFunction *)activeDefragLuaScript :
+ NULL)
+ };
+ do {
+ cursor = dictScanDefrag(d, cursor, activeDefragSdsDictCallback,
+ &defragfns, NULL);
+ } while (cursor != 0);
+}
+
+/* Defrag a list of ptr, sds or robj string values */
+void activeDefragList(list *l, int val_type) {
+ listNode *ln, *newln;
+ for (ln = l->head; ln; ln = ln->next) {
+ if ((newln = activeDefragAlloc(ln))) {
+ if (newln->prev)
+ newln->prev->next = newln;
+ else
+ l->head = newln;
+ if (newln->next)
+ newln->next->prev = newln;
+ else
+ l->tail = newln;
+ ln = newln;
+ }
+ if (val_type == DEFRAG_SDS_DICT_VAL_IS_SDS) {
+ sds newsds, sdsele = ln->value;
+ if ((newsds = activeDefragSds(sdsele)))
+ ln->value = newsds;
+ } else if (val_type == DEFRAG_SDS_DICT_VAL_IS_STROB) {
+ robj *newele, *ele = ln->value;
+ if ((newele = activeDefragStringOb(ele)))
+ ln->value = newele;
+ } else if (val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR) {
+ void *newptr, *ptr = ln->value;
+ if ((newptr = activeDefragAlloc(ptr)))
+ ln->value = newptr;
+ }
+ }
+}
+
+void activeDefragQuickListNode(quicklist *ql, quicklistNode **node_ref) {
+ quicklistNode *newnode, *node = *node_ref;
+ unsigned char *newzl;
+ if ((newnode = activeDefragAlloc(node))) {
+ if (newnode->prev)
+ newnode->prev->next = newnode;
+ else
+ ql->head = newnode;
+ if (newnode->next)
+ newnode->next->prev = newnode;
+ else
+ ql->tail = newnode;
+ *node_ref = node = newnode;
+ }
+ if ((newzl = activeDefragAlloc(node->entry)))
+ node->entry = newzl;
+}
+
+void activeDefragQuickListNodes(quicklist *ql) {
+ quicklistNode *node = ql->head;
+ while (node) {
+ activeDefragQuickListNode(ql, &node);
+ node = node->next;
+ }
+}
+
+/* when the value has lots of elements, we want to handle it later and not as
+ * part of the main dictionary scan. this is needed in order to prevent latency
+ * spikes when handling large items */
+void defragLater(redisDb *db, dictEntry *kde) {
+ sds key = sdsdup(dictGetKey(kde));
+ listAddNodeTail(db->defrag_later, key);
+}
+
+/* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
+long scanLaterList(robj *ob, unsigned long *cursor, long long endtime) {
+ quicklist *ql = ob->ptr;
+ quicklistNode *node;
+ long iterations = 0;
+ int bookmark_failed = 0;
+ if (ob->type != OBJ_LIST || ob->encoding != OBJ_ENCODING_QUICKLIST)
+ return 0;
+
+ if (*cursor == 0) {
+ /* if cursor is 0, we start new iteration */
+ node = ql->head;
+ } else {
+ node = quicklistBookmarkFind(ql, "_AD");
+ if (!node) {
+ /* if the bookmark was deleted, it means we reached the end. */
+ *cursor = 0;
+ return 0;
+ }
+ node = node->next;
+ }
+
+ (*cursor)++;
+ while (node) {
+ activeDefragQuickListNode(ql, &node);
+ server.stat_active_defrag_scanned++;
+ if (++iterations > 128 && !bookmark_failed) {
+ if (ustime() > endtime) {
+ if (!quicklistBookmarkCreate(&ql, "_AD", node)) {
+ bookmark_failed = 1;
+ } else {
+ ob->ptr = ql; /* bookmark creation may have re-allocated the quicklist */
+ return 1;
+ }
+ }
+ iterations = 0;
+ }
+ node = node->next;
+ }
+ quicklistBookmarkDelete(ql, "_AD");
+ *cursor = 0;
+ return bookmark_failed? 1: 0;
+}
+
+typedef struct {
+ zset *zs;
+} scanLaterZsetData;
+
+void scanLaterZsetCallback(void *privdata, const dictEntry *_de) {
+ dictEntry *de = (dictEntry*)_de;
+ scanLaterZsetData *data = privdata;
+ activeDefragZsetEntry(data->zs, de);
+ server.stat_active_defrag_scanned++;
+}
+
+void scanLaterZset(robj *ob, unsigned long *cursor) {
+ if (ob->type != OBJ_ZSET || ob->encoding != OBJ_ENCODING_SKIPLIST)
+ return;
+ zset *zs = (zset*)ob->ptr;
+ dict *d = zs->dict;
+ scanLaterZsetData data = {zs};
+ dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc};
+ *cursor = dictScanDefrag(d, *cursor, scanLaterZsetCallback, &defragfns, &data);
+}
+
+/* Used as scan callback when all the work is done in the dictDefragFunctions. */
+void scanCallbackCountScanned(void *privdata, const dictEntry *de) {
+ UNUSED(privdata);
+ UNUSED(de);
+ server.stat_active_defrag_scanned++;
+}
+
+void scanLaterSet(robj *ob, unsigned long *cursor) {
+ if (ob->type != OBJ_SET || ob->encoding != OBJ_ENCODING_HT)
+ return;
+ dict *d = ob->ptr;
+ dictDefragFunctions defragfns = {
+ .defragAlloc = activeDefragAlloc,
+ .defragKey = (dictDefragAllocFunction *)activeDefragSds
+ };
+ *cursor = dictScanDefrag(d, *cursor, scanCallbackCountScanned, &defragfns, NULL);
+}
+
+void scanLaterHash(robj *ob, unsigned long *cursor) {
+ if (ob->type != OBJ_HASH || ob->encoding != OBJ_ENCODING_HT)
+ return;
+ dict *d = ob->ptr;
+ dictDefragFunctions defragfns = {
+ .defragAlloc = activeDefragAlloc,
+ .defragKey = (dictDefragAllocFunction *)activeDefragSds,
+ .defragVal = (dictDefragAllocFunction *)activeDefragSds
+ };
+ *cursor = dictScanDefrag(d, *cursor, scanCallbackCountScanned, &defragfns, NULL);
+}
+
+void defragQuicklist(redisDb *db, dictEntry *kde) {
+ robj *ob = dictGetVal(kde);
+ quicklist *ql = ob->ptr, *newql;
+ serverAssert(ob->type == OBJ_LIST && ob->encoding == OBJ_ENCODING_QUICKLIST);
+ if ((newql = activeDefragAlloc(ql)))
+ ob->ptr = ql = newql;
+ if (ql->len > server.active_defrag_max_scan_fields)
+ defragLater(db, kde);
+ else
+ activeDefragQuickListNodes(ql);
+}
+
+void defragZsetSkiplist(redisDb *db, dictEntry *kde) {
+ robj *ob = dictGetVal(kde);
+ zset *zs = (zset*)ob->ptr;
+ zset *newzs;
+ zskiplist *newzsl;
+ dict *newdict;
+ dictEntry *de;
+ struct zskiplistNode *newheader;
+ serverAssert(ob->type == OBJ_ZSET && ob->encoding == OBJ_ENCODING_SKIPLIST);
+ if ((newzs = activeDefragAlloc(zs)))
+ ob->ptr = zs = newzs;
+ if ((newzsl = activeDefragAlloc(zs->zsl)))
+ zs->zsl = newzsl;
+ if ((newheader = activeDefragAlloc(zs->zsl->header)))
+ zs->zsl->header = newheader;
+ if (dictSize(zs->dict) > server.active_defrag_max_scan_fields)
+ defragLater(db, kde);
+ else {
+ dictIterator *di = dictGetIterator(zs->dict);
+ while((de = dictNext(di)) != NULL) {
+ activeDefragZsetEntry(zs, de);
+ }
+ dictReleaseIterator(di);
+ }
+ /* handle the dict struct */
+ if ((newdict = activeDefragAlloc(zs->dict)))
+ zs->dict = newdict;
+ /* defrag the dict tables */
+ dictDefragTables(zs->dict);
+}
+
+void defragHash(redisDb *db, dictEntry *kde) {
+ robj *ob = dictGetVal(kde);
+ dict *d, *newd;
+ serverAssert(ob->type == OBJ_HASH && ob->encoding == OBJ_ENCODING_HT);
+ d = ob->ptr;
+ if (dictSize(d) > server.active_defrag_max_scan_fields)
+ defragLater(db, kde);
+ else
+ activeDefragSdsDict(d, DEFRAG_SDS_DICT_VAL_IS_SDS);
+ /* handle the dict struct */
+ if ((newd = activeDefragAlloc(ob->ptr)))
+ ob->ptr = newd;
+ /* defrag the dict tables */
+ dictDefragTables(ob->ptr);
+}
+
+void defragSet(redisDb *db, dictEntry *kde) {
+ robj *ob = dictGetVal(kde);
+ dict *d, *newd;
+ serverAssert(ob->type == OBJ_SET && ob->encoding == OBJ_ENCODING_HT);
+ d = ob->ptr;
+ if (dictSize(d) > server.active_defrag_max_scan_fields)
+ defragLater(db, kde);
+ else
+ activeDefragSdsDict(d, DEFRAG_SDS_DICT_NO_VAL);
+ /* handle the dict struct */
+ if ((newd = activeDefragAlloc(ob->ptr)))
+ ob->ptr = newd;
+ /* defrag the dict tables */
+ dictDefragTables(ob->ptr);
+}
+
+/* Defrag callback for radix tree iterator, called for each node,
+ * used in order to defrag the nodes allocations. */
+int defragRaxNode(raxNode **noderef) {
+ raxNode *newnode = activeDefragAlloc(*noderef);
+ if (newnode) {
+ *noderef = newnode;
+ return 1;
+ }
+ return 0;
+}
+
+/* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
+int scanLaterStreamListpacks(robj *ob, unsigned long *cursor, long long endtime) {
+ static unsigned char last[sizeof(streamID)];
+ raxIterator ri;
+ long iterations = 0;
+ if (ob->type != OBJ_STREAM || ob->encoding != OBJ_ENCODING_STREAM) {
+ *cursor = 0;
+ return 0;
+ }
+
+ stream *s = ob->ptr;
+ raxStart(&ri,s->rax);
+ if (*cursor == 0) {
+ /* if cursor is 0, we start new iteration */
+ defragRaxNode(&s->rax->head);
+ /* assign the iterator node callback before the seek, so that the
+ * initial nodes that are processed till the first item are covered */
+ ri.node_cb = defragRaxNode;
+ raxSeek(&ri,"^",NULL,0);
+ } else {
+ /* if cursor is non-zero, we seek to the static 'last' */
+ if (!raxSeek(&ri,">", last, sizeof(last))) {
+ *cursor = 0;
+ raxStop(&ri);
+ return 0;
+ }
+ /* assign the iterator node callback after the seek, so that the
+ * initial nodes that are processed till now aren't covered */
+ ri.node_cb = defragRaxNode;
+ }
+
+ (*cursor)++;
+ while (raxNext(&ri)) {
+ void *newdata = activeDefragAlloc(ri.data);
+ if (newdata)
+ raxSetData(ri.node, ri.data=newdata);
+ server.stat_active_defrag_scanned++;
+ if (++iterations > 128) {
+ if (ustime() > endtime) {
+ serverAssert(ri.key_len==sizeof(last));
+ memcpy(last,ri.key,ri.key_len);
+ raxStop(&ri);
+ return 1;
+ }
+ iterations = 0;
+ }
+ }
+ raxStop(&ri);
+ *cursor = 0;
+ return 0;
+}
+
+/* optional callback used defrag each rax element (not including the element pointer itself) */
+typedef void *(raxDefragFunction)(raxIterator *ri, void *privdata);
+
+/* defrag radix tree including:
+ * 1) rax struct
+ * 2) rax nodes
+ * 3) rax entry data (only if defrag_data is specified)
+ * 4) call a callback per element, and allow the callback to return a new pointer for the element */
+void defragRadixTree(rax **raxref, int defrag_data, raxDefragFunction *element_cb, void *element_cb_data) {
+ raxIterator ri;
+ rax* rax;
+ if ((rax = activeDefragAlloc(*raxref)))
+ *raxref = rax;
+ rax = *raxref;
+ raxStart(&ri,rax);
+ ri.node_cb = defragRaxNode;
+ defragRaxNode(&rax->head);
+ raxSeek(&ri,"^",NULL,0);
+ while (raxNext(&ri)) {
+ void *newdata = NULL;
+ if (element_cb)
+ newdata = element_cb(&ri, element_cb_data);
+ if (defrag_data && !newdata)
+ newdata = activeDefragAlloc(ri.data);
+ if (newdata)
+ raxSetData(ri.node, ri.data=newdata);
+ }
+ raxStop(&ri);
+}
+
+typedef struct {
+ streamCG *cg;
+ streamConsumer *c;
+} PendingEntryContext;
+
+void* defragStreamConsumerPendingEntry(raxIterator *ri, void *privdata) {
+ PendingEntryContext *ctx = privdata;
+ streamNACK *nack = ri->data, *newnack;
+ nack->consumer = ctx->c; /* update nack pointer to consumer */
+ newnack = activeDefragAlloc(nack);
+ if (newnack) {
+ /* update consumer group pointer to the nack */
+ void *prev;
+ raxInsert(ctx->cg->pel, ri->key, ri->key_len, newnack, &prev);
+ serverAssert(prev==nack);
+ }
+ return newnack;
+}
+
+void* defragStreamConsumer(raxIterator *ri, void *privdata) {
+ streamConsumer *c = ri->data;
+ streamCG *cg = privdata;
+ void *newc = activeDefragAlloc(c);
+ if (newc) {
+ c = newc;
+ }
+ sds newsds = activeDefragSds(c->name);
+ if (newsds)
+ c->name = newsds;
+ if (c->pel) {
+ PendingEntryContext pel_ctx = {cg, c};
+ defragRadixTree(&c->pel, 0, defragStreamConsumerPendingEntry, &pel_ctx);
+ }
+ return newc; /* returns NULL if c was not defragged */
+}
+
+void* defragStreamConsumerGroup(raxIterator *ri, void *privdata) {
+ streamCG *cg = ri->data;
+ UNUSED(privdata);
+ if (cg->consumers)
+ defragRadixTree(&cg->consumers, 0, defragStreamConsumer, cg);
+ if (cg->pel)
+ defragRadixTree(&cg->pel, 0, NULL, NULL);
+ return NULL;
+}
+
+void defragStream(redisDb *db, dictEntry *kde) {
+ robj *ob = dictGetVal(kde);
+ serverAssert(ob->type == OBJ_STREAM && ob->encoding == OBJ_ENCODING_STREAM);
+ stream *s = ob->ptr, *news;
+
+ /* handle the main struct */
+ if ((news = activeDefragAlloc(s)))
+ ob->ptr = s = news;
+
+ if (raxSize(s->rax) > server.active_defrag_max_scan_fields) {
+ rax *newrax = activeDefragAlloc(s->rax);
+ if (newrax)
+ s->rax = newrax;
+ defragLater(db, kde);
+ } else
+ defragRadixTree(&s->rax, 1, NULL, NULL);
+
+ if (s->cgroups)
+ defragRadixTree(&s->cgroups, 1, defragStreamConsumerGroup, NULL);
+}
+
+/* Defrag a module key. This is either done immediately or scheduled
+ * for later. Returns then number of pointers defragged.
+ */
+void defragModule(redisDb *db, dictEntry *kde) {
+ robj *obj = dictGetVal(kde);
+ serverAssert(obj->type == OBJ_MODULE);
+
+ if (!moduleDefragValue(dictGetKey(kde), obj, db->id))
+ defragLater(db, kde);
+}
+
+/* for each key we scan in the main dict, this function will attempt to defrag
+ * all the various pointers it has. Returns a stat of how many pointers were
+ * moved. */
+void defragKey(redisDb *db, dictEntry *de) {
+ sds keysds = dictGetKey(de);
+ robj *newob, *ob;
+ unsigned char *newzl;
+ sds newsds;
+
+ /* Try to defrag the key name. */
+ newsds = activeDefragSds(keysds);
+ if (newsds) {
+ dictSetKey(db->dict, de, newsds);
+ if (dictSize(db->expires)) {
+ /* We can't search in db->expires for that key after we've released
+ * the pointer it holds, since it won't be able to do the string
+ * compare, but we can find the entry using key hash and pointer. */
+ uint64_t hash = dictGetHash(db->dict, newsds);
+ dictEntry *expire_de = dictFindEntryByPtrAndHash(db->expires, keysds, hash);
+ if (expire_de) dictSetKey(db->expires, expire_de, newsds);
+ }
+ }
+
+ /* Try to defrag robj and / or string value. */
+ ob = dictGetVal(de);
+ if ((newob = activeDefragStringOb(ob))) {
+ dictSetVal(db->dict, de, newob);
+ ob = newob;
+ }
+
+ if (ob->type == OBJ_STRING) {
+ /* Already handled in activeDefragStringOb. */
+ } else if (ob->type == OBJ_LIST) {
+ if (ob->encoding == OBJ_ENCODING_QUICKLIST) {
+ defragQuicklist(db, de);
+ } else if (ob->encoding == OBJ_ENCODING_LISTPACK) {
+ if ((newzl = activeDefragAlloc(ob->ptr)))
+ ob->ptr = newzl;
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ } else if (ob->type == OBJ_SET) {
+ if (ob->encoding == OBJ_ENCODING_HT) {
+ defragSet(db, de);
+ } else if (ob->encoding == OBJ_ENCODING_INTSET ||
+ ob->encoding == OBJ_ENCODING_LISTPACK)
+ {
+ void *newptr, *ptr = ob->ptr;
+ if ((newptr = activeDefragAlloc(ptr)))
+ ob->ptr = newptr;
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (ob->type == OBJ_ZSET) {
+ if (ob->encoding == OBJ_ENCODING_LISTPACK) {
+ if ((newzl = activeDefragAlloc(ob->ptr)))
+ ob->ptr = newzl;
+ } else if (ob->encoding == OBJ_ENCODING_SKIPLIST) {
+ defragZsetSkiplist(db, de);
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else if (ob->type == OBJ_HASH) {
+ if (ob->encoding == OBJ_ENCODING_LISTPACK) {
+ if ((newzl = activeDefragAlloc(ob->ptr)))
+ ob->ptr = newzl;
+ } else if (ob->encoding == OBJ_ENCODING_HT) {
+ defragHash(db, de);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ } else if (ob->type == OBJ_STREAM) {
+ defragStream(db, de);
+ } else if (ob->type == OBJ_MODULE) {
+ defragModule(db, de);
+ } else {
+ serverPanic("Unknown object type");
+ }
+}
+
+/* Defrag scan callback for the main db dictionary. */
+void defragScanCallback(void *privdata, const dictEntry *de) {
+ long long hits_before = server.stat_active_defrag_hits;
+ defragKey((redisDb*)privdata, (dictEntry*)de);
+ if (server.stat_active_defrag_hits != hits_before)
+ server.stat_active_defrag_key_hits++;
+ else
+ server.stat_active_defrag_key_misses++;
+ server.stat_active_defrag_scanned++;
+}
+
+/* Utility function to get the fragmentation ratio from jemalloc.
+ * It is critical to do that by comparing only heap maps that belong to
+ * jemalloc, and skip ones the jemalloc keeps as spare. Since we use this
+ * fragmentation ratio in order to decide if a defrag action should be taken
+ * or not, a false detection can cause the defragmenter to waste a lot of CPU
+ * without the possibility of getting any results. */
+float getAllocatorFragmentation(size_t *out_frag_bytes) {
+ size_t resident, active, allocated;
+ zmalloc_get_allocator_info(&allocated, &active, &resident);
+ float frag_pct = ((float)active / allocated)*100 - 100;
+ size_t frag_bytes = active - allocated;
+ float rss_pct = ((float)resident / allocated)*100 - 100;
+ size_t rss_bytes = resident - allocated;
+ if(out_frag_bytes)
+ *out_frag_bytes = frag_bytes;
+ serverLog(LL_DEBUG,
+ "allocated=%zu, active=%zu, resident=%zu, frag=%.0f%% (%.0f%% rss), frag_bytes=%zu (%zu rss)",
+ allocated, active, resident, frag_pct, rss_pct, frag_bytes, rss_bytes);
+ return frag_pct;
+}
+
+/* We may need to defrag other globals, one small allocation can hold a full allocator run.
+ * so although small, it is still important to defrag these */
+void defragOtherGlobals(void) {
+
+ /* there are many more pointers to defrag (e.g. client argv, output / aof buffers, etc.
+ * but we assume most of these are short lived, we only need to defrag allocations
+ * that remain static for a long time */
+ activeDefragSdsDict(evalScriptsDict(), DEFRAG_SDS_DICT_VAL_LUA_SCRIPT);
+ moduleDefragGlobals();
+}
+
+/* returns 0 more work may or may not be needed (see non-zero cursor),
+ * and 1 if time is up and more work is needed. */
+int defragLaterItem(dictEntry *de, unsigned long *cursor, long long endtime, int dbid) {
+ if (de) {
+ robj *ob = dictGetVal(de);
+ if (ob->type == OBJ_LIST) {
+ return scanLaterList(ob, cursor, endtime);
+ } else if (ob->type == OBJ_SET) {
+ scanLaterSet(ob, cursor);
+ } else if (ob->type == OBJ_ZSET) {
+ scanLaterZset(ob, cursor);
+ } else if (ob->type == OBJ_HASH) {
+ scanLaterHash(ob, cursor);
+ } else if (ob->type == OBJ_STREAM) {
+ return scanLaterStreamListpacks(ob, cursor, endtime);
+ } else if (ob->type == OBJ_MODULE) {
+ return moduleLateDefrag(dictGetKey(de), ob, cursor, endtime, dbid);
+ } else {
+ *cursor = 0; /* object type may have changed since we schedule it for later */
+ }
+ } else {
+ *cursor = 0; /* object may have been deleted already */
+ }
+ return 0;
+}
+
+/* static variables serving defragLaterStep to continue scanning a key from were we stopped last time. */
+static sds defrag_later_current_key = NULL;
+static unsigned long defrag_later_cursor = 0;
+
+/* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
+int defragLaterStep(redisDb *db, long long endtime) {
+ unsigned int iterations = 0;
+ unsigned long long prev_defragged = server.stat_active_defrag_hits;
+ unsigned long long prev_scanned = server.stat_active_defrag_scanned;
+ long long key_defragged;
+
+ do {
+ /* if we're not continuing a scan from the last call or loop, start a new one */
+ if (!defrag_later_cursor) {
+ listNode *head = listFirst(db->defrag_later);
+
+ /* Move on to next key */
+ if (defrag_later_current_key) {
+ serverAssert(defrag_later_current_key == head->value);
+ listDelNode(db->defrag_later, head);
+ defrag_later_cursor = 0;
+ defrag_later_current_key = NULL;
+ }
+
+ /* stop if we reached the last one. */
+ head = listFirst(db->defrag_later);
+ if (!head)
+ return 0;
+
+ /* start a new key */
+ defrag_later_current_key = head->value;
+ defrag_later_cursor = 0;
+ }
+
+ /* each time we enter this function we need to fetch the key from the dict again (if it still exists) */
+ dictEntry *de = dictFind(db->dict, defrag_later_current_key);
+ key_defragged = server.stat_active_defrag_hits;
+ do {
+ int quit = 0;
+ if (defragLaterItem(de, &defrag_later_cursor, endtime,db->id))
+ quit = 1; /* time is up, we didn't finish all the work */
+
+ /* Once in 16 scan iterations, 512 pointer reallocations, or 64 fields
+ * (if we have a lot of pointers in one hash bucket, or rehashing),
+ * check if we reached the time limit. */
+ if (quit || (++iterations > 16 ||
+ server.stat_active_defrag_hits - prev_defragged > 512 ||
+ server.stat_active_defrag_scanned - prev_scanned > 64)) {
+ if (quit || ustime() > endtime) {
+ if(key_defragged != server.stat_active_defrag_hits)
+ server.stat_active_defrag_key_hits++;
+ else
+ server.stat_active_defrag_key_misses++;
+ return 1;
+ }
+ iterations = 0;
+ prev_defragged = server.stat_active_defrag_hits;
+ prev_scanned = server.stat_active_defrag_scanned;
+ }
+ } while(defrag_later_cursor);
+ if(key_defragged != server.stat_active_defrag_hits)
+ server.stat_active_defrag_key_hits++;
+ else
+ server.stat_active_defrag_key_misses++;
+ } while(1);
+}
+
+#define INTERPOLATE(x, x1, x2, y1, y2) ( (y1) + ((x)-(x1)) * ((y2)-(y1)) / ((x2)-(x1)) )
+#define LIMIT(y, min, max) ((y)<(min)? min: ((y)>(max)? max: (y)))
+
+/* decide if defrag is needed, and at what CPU effort to invest in it */
+void computeDefragCycles(void) {
+ size_t frag_bytes;
+ float frag_pct = getAllocatorFragmentation(&frag_bytes);
+ /* If we're not already running, and below the threshold, exit. */
+ if (!server.active_defrag_running) {
+ if(frag_pct < server.active_defrag_threshold_lower || frag_bytes < server.active_defrag_ignore_bytes)
+ return;
+ }
+
+ /* Calculate the adaptive aggressiveness of the defrag */
+ int cpu_pct = INTERPOLATE(frag_pct,
+ server.active_defrag_threshold_lower,
+ server.active_defrag_threshold_upper,
+ server.active_defrag_cycle_min,
+ server.active_defrag_cycle_max);
+ cpu_pct = LIMIT(cpu_pct,
+ server.active_defrag_cycle_min,
+ server.active_defrag_cycle_max);
+ /* We allow increasing the aggressiveness during a scan, but don't
+ * reduce it. */
+ if (cpu_pct > server.active_defrag_running) {
+ server.active_defrag_running = cpu_pct;
+ serverLog(LL_VERBOSE,
+ "Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%",
+ frag_pct, frag_bytes, cpu_pct);
+ }
+}
+
+/* Perform incremental defragmentation work from the serverCron.
+ * This works in a similar way to activeExpireCycle, in the sense that
+ * we do incremental work across calls. */
+void activeDefragCycle(void) {
+ static int current_db = -1;
+ static unsigned long cursor = 0;
+ static unsigned long expires_cursor = 0;
+ static redisDb *db = NULL;
+ static long long start_scan, start_stat;
+ unsigned int iterations = 0;
+ unsigned long long prev_defragged = server.stat_active_defrag_hits;
+ unsigned long long prev_scanned = server.stat_active_defrag_scanned;
+ long long start, timelimit, endtime;
+ mstime_t latency;
+ int quit = 0;
+
+ if (!server.active_defrag_enabled) {
+ if (server.active_defrag_running) {
+ /* if active defrag was disabled mid-run, start from fresh next time. */
+ server.active_defrag_running = 0;
+ if (db)
+ listEmpty(db->defrag_later);
+ defrag_later_current_key = NULL;
+ defrag_later_cursor = 0;
+ current_db = -1;
+ cursor = 0;
+ db = NULL;
+ goto update_metrics;
+ }
+ return;
+ }
+
+ if (hasActiveChildProcess())
+ return; /* Defragging memory while there's a fork will just do damage. */
+
+ /* Once a second, check if the fragmentation justfies starting a scan
+ * or making it more aggressive. */
+ run_with_period(1000) {
+ computeDefragCycles();
+ }
+ if (!server.active_defrag_running)
+ return;
+
+ /* See activeExpireCycle for how timelimit is handled. */
+ start = ustime();
+ timelimit = 1000000*server.active_defrag_running/server.hz/100;
+ if (timelimit <= 0) timelimit = 1;
+ endtime = start + timelimit;
+ latencyStartMonitor(latency);
+
+ dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc};
+ do {
+ /* if we're not continuing a scan from the last call or loop, start a new one */
+ if (!cursor && !expires_cursor) {
+ /* finish any leftovers from previous db before moving to the next one */
+ if (db && defragLaterStep(db, endtime)) {
+ quit = 1; /* time is up, we didn't finish all the work */
+ break; /* this will exit the function and we'll continue on the next cycle */
+ }
+
+ /* Move on to next database, and stop if we reached the last one. */
+ if (++current_db >= server.dbnum) {
+ /* defrag other items not part of the db / keys */
+ defragOtherGlobals();
+
+ long long now = ustime();
+ size_t frag_bytes;
+ float frag_pct = getAllocatorFragmentation(&frag_bytes);
+ serverLog(LL_VERBOSE,
+ "Active defrag done in %dms, reallocated=%d, frag=%.0f%%, frag_bytes=%zu",
+ (int)((now - start_scan)/1000), (int)(server.stat_active_defrag_hits - start_stat), frag_pct, frag_bytes);
+
+ start_scan = now;
+ current_db = -1;
+ cursor = 0;
+ db = NULL;
+ server.active_defrag_running = 0;
+
+ computeDefragCycles(); /* if another scan is needed, start it right away */
+ if (server.active_defrag_running != 0 && ustime() < endtime)
+ continue;
+ break;
+ }
+ else if (current_db==0) {
+ /* Start a scan from the first database. */
+ start_scan = ustime();
+ start_stat = server.stat_active_defrag_hits;
+ }
+
+ db = &server.db[current_db];
+ cursor = 0;
+ }
+
+ do {
+ /* before scanning the next bucket, see if we have big keys left from the previous bucket to scan */
+ if (defragLaterStep(db, endtime)) {
+ quit = 1; /* time is up, we didn't finish all the work */
+ break; /* this will exit the function and we'll continue on the next cycle */
+ }
+
+ /* Scan the keyspace dict unless we're scanning the expire dict. */
+ if (!expires_cursor)
+ cursor = dictScanDefrag(db->dict, cursor, defragScanCallback,
+ &defragfns, db);
+
+ /* When done scanning the keyspace dict, we scan the expire dict. */
+ if (!cursor)
+ expires_cursor = dictScanDefrag(db->expires, expires_cursor,
+ scanCallbackCountScanned,
+ &defragfns, NULL);
+
+ /* Once in 16 scan iterations, 512 pointer reallocations. or 64 keys
+ * (if we have a lot of pointers in one hash bucket or rehashing),
+ * check if we reached the time limit.
+ * But regardless, don't start a new db in this loop, this is because after
+ * the last db we call defragOtherGlobals, which must be done in one cycle */
+ if (!(cursor || expires_cursor) ||
+ ++iterations > 16 ||
+ server.stat_active_defrag_hits - prev_defragged > 512 ||
+ server.stat_active_defrag_scanned - prev_scanned > 64)
+ {
+ if (!cursor || ustime() > endtime) {
+ quit = 1;
+ break;
+ }
+ iterations = 0;
+ prev_defragged = server.stat_active_defrag_hits;
+ prev_scanned = server.stat_active_defrag_scanned;
+ }
+ } while((cursor || expires_cursor) && !quit);
+ } while(!quit);
+
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("active-defrag-cycle",latency);
+
+update_metrics:
+ if (server.active_defrag_running > 0) {
+ if (server.stat_last_active_defrag_time == 0)
+ elapsedStart(&server.stat_last_active_defrag_time);
+ } else if (server.stat_last_active_defrag_time != 0) {
+ server.stat_total_active_defrag_time += elapsedUs(server.stat_last_active_defrag_time);
+ server.stat_last_active_defrag_time = 0;
+ }
+}
+
+#else /* HAVE_DEFRAG */
+
+void activeDefragCycle(void) {
+ /* Not implemented yet. */
+}
+
+void *activeDefragAlloc(void *ptr) {
+ UNUSED(ptr);
+ return NULL;
+}
+
+robj *activeDefragStringOb(robj *ob) {
+ UNUSED(ob);
+ return NULL;
+}
+
+#endif
diff --git a/src/dict.c b/src/dict.c
new file mode 100644
index 0000000..e34fa02
--- /dev/null
+++ b/src/dict.c
@@ -0,0 +1,1749 @@
+/* Hash Tables Implementation.
+ *
+ * This file implements in memory hash tables with insert/del/replace/find/
+ * get-random-element operations. Hash tables will auto resize if needed
+ * tables of power of two in size are used, collisions are handled by
+ * chaining. See the source code for more information... :)
+ *
+ * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fmacros.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <sys/time.h>
+
+#include "dict.h"
+#include "zmalloc.h"
+#include "redisassert.h"
+
+/* Using dictEnableResize() / dictDisableResize() we make possible to disable
+ * resizing and rehashing of the hash table as needed. This is very important
+ * for Redis, as we use copy-on-write and don't want to move too much memory
+ * around when there is a child performing saving operations.
+ *
+ * Note that even when dict_can_resize is set to DICT_RESIZE_AVOID, not all
+ * resizes are prevented: a hash table is still allowed to grow if the ratio
+ * between the number of elements and the buckets > dict_force_resize_ratio. */
+static dictResizeEnable dict_can_resize = DICT_RESIZE_ENABLE;
+static unsigned int dict_force_resize_ratio = 5;
+
+/* -------------------------- types ----------------------------------------- */
+
+struct dictEntry {
+ void *key;
+ union {
+ void *val;
+ uint64_t u64;
+ int64_t s64;
+ double d;
+ } v;
+ struct dictEntry *next; /* Next entry in the same hash bucket. */
+ void *metadata[]; /* An arbitrary number of bytes (starting at a
+ * pointer-aligned address) of size as returned
+ * by dictType's dictEntryMetadataBytes(). */
+};
+
+typedef struct {
+ void *key;
+ dictEntry *next;
+} dictEntryNoValue;
+
+/* -------------------------- private prototypes ---------------------------- */
+
+static int _dictExpandIfNeeded(dict *d);
+static signed char _dictNextExp(unsigned long size);
+static int _dictInit(dict *d, dictType *type);
+static dictEntry *dictGetNext(const dictEntry *de);
+static dictEntry **dictGetNextRef(dictEntry *de);
+static void dictSetNext(dictEntry *de, dictEntry *next);
+
+/* -------------------------- hash functions -------------------------------- */
+
+static uint8_t dict_hash_function_seed[16];
+
+void dictSetHashFunctionSeed(uint8_t *seed) {
+ memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));
+}
+
+uint8_t *dictGetHashFunctionSeed(void) {
+ return dict_hash_function_seed;
+}
+
+/* The default hashing function uses SipHash implementation
+ * in siphash.c. */
+
+uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
+uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);
+
+uint64_t dictGenHashFunction(const void *key, size_t len) {
+ return siphash(key,len,dict_hash_function_seed);
+}
+
+uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) {
+ return siphash_nocase(buf,len,dict_hash_function_seed);
+}
+
+/* --------------------- dictEntry pointer bit tricks ---------------------- */
+
+/* The 3 least significant bits in a pointer to a dictEntry determines what the
+ * pointer actually points to. If the least bit is set, it's a key. Otherwise,
+ * the bit pattern of the least 3 significant bits mark the kind of entry. */
+
+#define ENTRY_PTR_MASK 7 /* 111 */
+#define ENTRY_PTR_NORMAL 0 /* 000 */
+#define ENTRY_PTR_NO_VALUE 2 /* 010 */
+
+/* Returns 1 if the entry pointer is a pointer to a key, rather than to an
+ * allocated entry. Returns 0 otherwise. */
+static inline int entryIsKey(const dictEntry *de) {
+ return (uintptr_t)(void *)de & 1;
+}
+
+/* Returns 1 if the pointer is actually a pointer to a dictEntry struct. Returns
+ * 0 otherwise. */
+static inline int entryIsNormal(const dictEntry *de) {
+ return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_NORMAL;
+}
+
+/* Returns 1 if the entry is a special entry with key and next, but without
+ * value. Returns 0 otherwise. */
+static inline int entryIsNoValue(const dictEntry *de) {
+ return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_NO_VALUE;
+}
+
+/* Creates an entry without a value field. */
+static inline dictEntry *createEntryNoValue(void *key, dictEntry *next) {
+ dictEntryNoValue *entry = zmalloc(sizeof(*entry));
+ entry->key = key;
+ entry->next = next;
+ return (dictEntry *)(void *)((uintptr_t)(void *)entry | ENTRY_PTR_NO_VALUE);
+}
+
+static inline dictEntry *encodeMaskedPtr(const void *ptr, unsigned int bits) {
+ assert(((uintptr_t)ptr & ENTRY_PTR_MASK) == 0);
+ return (dictEntry *)(void *)((uintptr_t)ptr | bits);
+}
+
+static inline void *decodeMaskedPtr(const dictEntry *de) {
+ assert(!entryIsKey(de));
+ return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK);
+}
+
+/* Decodes the pointer to an entry without value, when you know it is an entry
+ * without value. Hint: Use entryIsNoValue to check. */
+static inline dictEntryNoValue *decodeEntryNoValue(const dictEntry *de) {
+ return decodeMaskedPtr(de);
+}
+
+/* Returns 1 if the entry has a value field and 0 otherwise. */
+static inline int entryHasValue(const dictEntry *de) {
+ return entryIsNormal(de);
+}
+
+/* ----------------------------- API implementation ------------------------- */
+
+/* Reset hash table parameters already initialized with _dictInit()*/
+static void _dictReset(dict *d, int htidx)
+{
+ d->ht_table[htidx] = NULL;
+ d->ht_size_exp[htidx] = -1;
+ d->ht_used[htidx] = 0;
+}
+
+/* Create a new hash table */
+dict *dictCreate(dictType *type)
+{
+ size_t metasize = type->dictMetadataBytes ? type->dictMetadataBytes() : 0;
+ dict *d = zmalloc(sizeof(*d) + metasize);
+ if (metasize) {
+ memset(dictMetadata(d), 0, metasize);
+ }
+
+ _dictInit(d,type);
+ return d;
+}
+
+/* Initialize the hash table */
+int _dictInit(dict *d, dictType *type)
+{
+ _dictReset(d, 0);
+ _dictReset(d, 1);
+ d->type = type;
+ d->rehashidx = -1;
+ d->pauserehash = 0;
+ return DICT_OK;
+}
+
+/* Resize the table to the minimal size that contains all the elements,
+ * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
+int dictResize(dict *d)
+{
+ unsigned long minimal;
+
+ if (dict_can_resize != DICT_RESIZE_ENABLE || dictIsRehashing(d)) return DICT_ERR;
+ minimal = d->ht_used[0];
+ if (minimal < DICT_HT_INITIAL_SIZE)
+ minimal = DICT_HT_INITIAL_SIZE;
+ return dictExpand(d, minimal);
+}
+
+/* Expand or create the hash table,
+ * when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1).
+ * Returns DICT_OK if expand was performed, and DICT_ERR if skipped. */
+int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
+{
+ if (malloc_failed) *malloc_failed = 0;
+
+ /* the size is invalid if it is smaller than the number of
+ * elements already inside the hash table */
+ if (dictIsRehashing(d) || d->ht_used[0] > size)
+ return DICT_ERR;
+
+ /* the new hash table */
+ dictEntry **new_ht_table;
+ unsigned long new_ht_used;
+ signed char new_ht_size_exp = _dictNextExp(size);
+
+ /* Detect overflows */
+ size_t newsize = 1ul<<new_ht_size_exp;
+ if (newsize < size || newsize * sizeof(dictEntry*) < newsize)
+ return DICT_ERR;
+
+ /* Rehashing to the same table size is not useful. */
+ if (new_ht_size_exp == d->ht_size_exp[0]) return DICT_ERR;
+
+ /* Allocate the new hash table and initialize all pointers to NULL */
+ if (malloc_failed) {
+ new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*));
+ *malloc_failed = new_ht_table == NULL;
+ if (*malloc_failed)
+ return DICT_ERR;
+ } else
+ new_ht_table = zcalloc(newsize*sizeof(dictEntry*));
+
+ new_ht_used = 0;
+
+ /* Is this the first initialization? If so it's not really a rehashing
+ * we just set the first hash table so that it can accept keys. */
+ if (d->ht_table[0] == NULL) {
+ d->ht_size_exp[0] = new_ht_size_exp;
+ d->ht_used[0] = new_ht_used;
+ d->ht_table[0] = new_ht_table;
+ return DICT_OK;
+ }
+
+ /* Prepare a second hash table for incremental rehashing */
+ d->ht_size_exp[1] = new_ht_size_exp;
+ d->ht_used[1] = new_ht_used;
+ d->ht_table[1] = new_ht_table;
+ d->rehashidx = 0;
+ return DICT_OK;
+}
+
+/* return DICT_ERR if expand was not performed */
+int dictExpand(dict *d, unsigned long size) {
+ return _dictExpand(d, size, NULL);
+}
+
+/* return DICT_ERR if expand failed due to memory allocation failure */
+int dictTryExpand(dict *d, unsigned long size) {
+ int malloc_failed;
+ _dictExpand(d, size, &malloc_failed);
+ return malloc_failed? DICT_ERR : DICT_OK;
+}
+
+/* Performs N steps of incremental rehashing. Returns 1 if there are still
+ * keys to move from the old to the new hash table, otherwise 0 is returned.
+ *
+ * Note that a rehashing step consists in moving a bucket (that may have more
+ * than one key as we use chaining) from the old to the new hash table, however
+ * since part of the hash table may be composed of empty spaces, it is not
+ * guaranteed that this function will rehash even a single bucket, since it
+ * will visit at max N*10 empty buckets in total, otherwise the amount of
+ * work it does would be unbound and the function may block for a long time. */
+int dictRehash(dict *d, int n) {
+ int empty_visits = n*10; /* Max number of empty buckets to visit. */
+ unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]);
+ unsigned long s1 = DICTHT_SIZE(d->ht_size_exp[1]);
+ if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0;
+ if (dict_can_resize == DICT_RESIZE_AVOID &&
+ ((s1 > s0 && s1 / s0 < dict_force_resize_ratio) ||
+ (s1 < s0 && s0 / s1 < dict_force_resize_ratio)))
+ {
+ return 0;
+ }
+
+ while(n-- && d->ht_used[0] != 0) {
+ dictEntry *de, *nextde;
+
+ /* Note that rehashidx can't overflow as we are sure there are more
+ * elements because ht[0].used != 0 */
+ assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);
+ while(d->ht_table[0][d->rehashidx] == NULL) {
+ d->rehashidx++;
+ if (--empty_visits == 0) return 1;
+ }
+ de = d->ht_table[0][d->rehashidx];
+ /* Move all the keys in this bucket from the old to the new hash HT */
+ while(de) {
+ uint64_t h;
+
+ nextde = dictGetNext(de);
+ void *key = dictGetKey(de);
+ /* Get the index in the new hash table */
+ if (d->ht_size_exp[1] > d->ht_size_exp[0]) {
+ h = dictHashKey(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
+ } else {
+ /* We're shrinking the table. The tables sizes are powers of
+ * two, so we simply mask the bucket index in the larger table
+ * to get the bucket index in the smaller table. */
+ h = d->rehashidx & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
+ }
+ if (d->type->no_value) {
+ if (d->type->keys_are_odd && !d->ht_table[1][h]) {
+ /* Destination bucket is empty and we can store the key
+ * directly without an allocated entry. Free the old entry
+ * if it's an allocated entry.
+ *
+ * TODO: Add a flag 'keys_are_even' and if set, we can use
+ * this optimization for these dicts too. We can set the LSB
+ * bit when stored as a dict entry and clear it again when
+ * we need the key back. */
+ assert(entryIsKey(key));
+ if (!entryIsKey(de)) zfree(decodeMaskedPtr(de));
+ de = key;
+ } else if (entryIsKey(de)) {
+ /* We don't have an allocated entry but we need one. */
+ de = createEntryNoValue(key, d->ht_table[1][h]);
+ } else {
+ /* Just move the existing entry to the destination table and
+ * update the 'next' field. */
+ assert(entryIsNoValue(de));
+ dictSetNext(de, d->ht_table[1][h]);
+ }
+ } else {
+ dictSetNext(de, d->ht_table[1][h]);
+ }
+ d->ht_table[1][h] = de;
+ d->ht_used[0]--;
+ d->ht_used[1]++;
+ de = nextde;
+ }
+ d->ht_table[0][d->rehashidx] = NULL;
+ d->rehashidx++;
+ }
+
+ /* Check if we already rehashed the whole table... */
+ if (d->ht_used[0] == 0) {
+ zfree(d->ht_table[0]);
+ /* Copy the new ht onto the old one */
+ d->ht_table[0] = d->ht_table[1];
+ d->ht_used[0] = d->ht_used[1];
+ d->ht_size_exp[0] = d->ht_size_exp[1];
+ _dictReset(d, 1);
+ d->rehashidx = -1;
+ return 0;
+ }
+
+ /* More to rehash... */
+ return 1;
+}
+
+long long timeInMilliseconds(void) {
+ struct timeval tv;
+
+ gettimeofday(&tv,NULL);
+ return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
+}
+
+/* Rehash in ms+"delta" milliseconds. The value of "delta" is larger
+ * than 0, and is smaller than 1 in most cases. The exact upper bound
+ * depends on the running time of dictRehash(d,100).*/
+int dictRehashMilliseconds(dict *d, int ms) {
+ if (d->pauserehash > 0) return 0;
+
+ long long start = timeInMilliseconds();
+ int rehashes = 0;
+
+ while(dictRehash(d,100)) {
+ rehashes += 100;
+ if (timeInMilliseconds()-start > ms) break;
+ }
+ return rehashes;
+}
+
+/* This function performs just a step of rehashing, and only if hashing has
+ * not been paused for our hash table. When we have iterators in the
+ * middle of a rehashing we can't mess with the two hash tables otherwise
+ * some elements can be missed or duplicated.
+ *
+ * This function is called by common lookup or update operations in the
+ * dictionary so that the hash table automatically migrates from H1 to H2
+ * while it is actively used. */
+static void _dictRehashStep(dict *d) {
+ if (d->pauserehash == 0) dictRehash(d,1);
+}
+
+/* Return a pointer to the metadata section within the dict. */
+void *dictMetadata(dict *d) {
+ return &d->metadata;
+}
+
+/* Add an element to the target hash table */
+int dictAdd(dict *d, void *key, void *val)
+{
+ dictEntry *entry = dictAddRaw(d,key,NULL);
+
+ if (!entry) return DICT_ERR;
+ if (!d->type->no_value) dictSetVal(d, entry, val);
+ return DICT_OK;
+}
+
+/* Low level add or find:
+ * This function adds the entry but instead of setting a value returns the
+ * dictEntry structure to the user, that will make sure to fill the value
+ * field as they wish.
+ *
+ * This function is also directly exposed to the user API to be called
+ * mainly in order to store non-pointers inside the hash value, example:
+ *
+ * entry = dictAddRaw(dict,mykey,NULL);
+ * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
+ *
+ * Return values:
+ *
+ * If key already exists NULL is returned, and "*existing" is populated
+ * with the existing entry if existing is not NULL.
+ *
+ * If key was added, the hash entry is returned to be manipulated by the caller.
+ */
+dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
+{
+ /* Get the position for the new key or NULL if the key already exists. */
+ void *position = dictFindPositionForInsert(d, key, existing);
+ if (!position) return NULL;
+
+ /* Dup the key if necessary. */
+ if (d->type->keyDup) key = d->type->keyDup(d, key);
+
+ return dictInsertAtPosition(d, key, position);
+}
+
+/* Adds a key in the dict's hashtable at the position returned by a preceding
+ * call to dictFindPositionForInsert. This is a low level function which allows
+ * splitting dictAddRaw in two parts. Normally, dictAddRaw or dictAdd should be
+ * used instead. */
+dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) {
+ dictEntry **bucket = position; /* It's a bucket, but the API hides that. */
+ dictEntry *entry;
+ /* If rehashing is ongoing, we insert in table 1, otherwise in table 0.
+ * Assert that the provided bucket is the right table. */
+ int htidx = dictIsRehashing(d) ? 1 : 0;
+ assert(bucket >= &d->ht_table[htidx][0] &&
+ bucket <= &d->ht_table[htidx][DICTHT_SIZE_MASK(d->ht_size_exp[htidx])]);
+ size_t metasize = dictEntryMetadataSize(d);
+ if (d->type->no_value) {
+ assert(!metasize); /* Entry metadata + no value not supported. */
+ if (d->type->keys_are_odd && !*bucket) {
+ /* We can store the key directly in the destination bucket without the
+ * allocated entry.
+ *
+ * TODO: Add a flag 'keys_are_even' and if set, we can use this
+ * optimization for these dicts too. We can set the LSB bit when
+ * stored as a dict entry and clear it again when we need the key
+ * back. */
+ entry = key;
+ assert(entryIsKey(entry));
+ } else {
+ /* Allocate an entry without value. */
+ entry = createEntryNoValue(key, *bucket);
+ }
+ } else {
+ /* Allocate the memory and store the new entry.
+ * Insert the element in top, with the assumption that in a database
+ * system it is more likely that recently added entries are accessed
+ * more frequently. */
+ entry = zmalloc(sizeof(*entry) + metasize);
+ assert(entryIsNormal(entry)); /* Check alignment of allocation */
+ if (metasize > 0) {
+ memset(dictEntryMetadata(entry), 0, metasize);
+ }
+ entry->key = key;
+ entry->next = *bucket;
+ }
+ *bucket = entry;
+ d->ht_used[htidx]++;
+
+ return entry;
+}
+
+/* Add or Overwrite:
+ * Add an element, discarding the old value if the key already exists.
+ * Return 1 if the key was added from scratch, 0 if there was already an
+ * element with such key and dictReplace() just performed a value update
+ * operation. */
+int dictReplace(dict *d, void *key, void *val)
+{
+ dictEntry *entry, *existing;
+
+ /* Try to add the element. If the key
+ * does not exists dictAdd will succeed. */
+ entry = dictAddRaw(d,key,&existing);
+ if (entry) {
+ dictSetVal(d, entry, val);
+ return 1;
+ }
+
+ /* Set the new value and free the old one. Note that it is important
+ * to do that in this order, as the value may just be exactly the same
+ * as the previous one. In this context, think to reference counting,
+ * you want to increment (set), and then decrement (free), and not the
+ * reverse. */
+ void *oldval = dictGetVal(existing);
+ dictSetVal(d, existing, val);
+ if (d->type->valDestructor)
+ d->type->valDestructor(d, oldval);
+ return 0;
+}
+
+/* Add or Find:
+ * dictAddOrFind() is simply a version of dictAddRaw() that always
+ * returns the hash entry of the specified key, even if the key already
+ * exists and can't be added (in that case the entry of the already
+ * existing key is returned.)
+ *
+ * See dictAddRaw() for more information. */
+dictEntry *dictAddOrFind(dict *d, void *key) {
+ dictEntry *entry, *existing;
+ entry = dictAddRaw(d,key,&existing);
+ return entry ? entry : existing;
+}
+
+/* Search and remove an element. This is a helper function for
+ * dictDelete() and dictUnlink(), please check the top comment
+ * of those functions. */
+static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
+ uint64_t h, idx;
+ dictEntry *he, *prevHe;
+ int table;
+
+ /* dict is empty */
+ if (dictSize(d) == 0) return NULL;
+
+ if (dictIsRehashing(d)) _dictRehashStep(d);
+ h = dictHashKey(d, key);
+
+ for (table = 0; table <= 1; table++) {
+ idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
+ he = d->ht_table[table][idx];
+ prevHe = NULL;
+ while(he) {
+ void *he_key = dictGetKey(he);
+ if (key == he_key || dictCompareKeys(d, key, he_key)) {
+ /* Unlink the element from the list */
+ if (prevHe)
+ dictSetNext(prevHe, dictGetNext(he));
+ else
+ d->ht_table[table][idx] = dictGetNext(he);
+ if (!nofree) {
+ dictFreeUnlinkedEntry(d, he);
+ }
+ d->ht_used[table]--;
+ return he;
+ }
+ prevHe = he;
+ he = dictGetNext(he);
+ }
+ if (!dictIsRehashing(d)) break;
+ }
+ return NULL; /* not found */
+}
+
+/* Remove an element, returning DICT_OK on success or DICT_ERR if the
+ * element was not found. */
+int dictDelete(dict *ht, const void *key) {
+ return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
+}
+
+/* Remove an element from the table, but without actually releasing
+ * the key, value and dictionary entry. The dictionary entry is returned
+ * if the element was found (and unlinked from the table), and the user
+ * should later call `dictFreeUnlinkedEntry()` with it in order to release it.
+ * Otherwise if the key is not found, NULL is returned.
+ *
+ * This function is useful when we want to remove something from the hash
+ * table but want to use its value before actually deleting the entry.
+ * Without this function the pattern would require two lookups:
+ *
+ * entry = dictFind(...);
+ * // Do something with entry
+ * dictDelete(dictionary,entry);
+ *
+ * Thanks to this function it is possible to avoid this, and use
+ * instead:
+ *
+ * entry = dictUnlink(dictionary,entry);
+ * // Do something with entry
+ * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.
+ */
+dictEntry *dictUnlink(dict *d, const void *key) {
+ return dictGenericDelete(d,key,1);
+}
+
+/* You need to call this function to really free the entry after a call
+ * to dictUnlink(). It's safe to call this function with 'he' = NULL. */
+void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
+ if (he == NULL) return;
+ dictFreeKey(d, he);
+ dictFreeVal(d, he);
+ if (!entryIsKey(he)) zfree(decodeMaskedPtr(he));
+}
+
+/* Destroy an entire dictionary */
+int _dictClear(dict *d, int htidx, void(callback)(dict*)) {
+ unsigned long i;
+
+ /* Free all the elements */
+ for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]) && d->ht_used[htidx] > 0; i++) {
+ dictEntry *he, *nextHe;
+
+ if (callback && (i & 65535) == 0) callback(d);
+
+ if ((he = d->ht_table[htidx][i]) == NULL) continue;
+ while(he) {
+ nextHe = dictGetNext(he);
+ dictFreeKey(d, he);
+ dictFreeVal(d, he);
+ if (!entryIsKey(he)) zfree(decodeMaskedPtr(he));
+ d->ht_used[htidx]--;
+ he = nextHe;
+ }
+ }
+ /* Free the table and the allocated cache structure */
+ zfree(d->ht_table[htidx]);
+ /* Re-initialize the table */
+ _dictReset(d, htidx);
+ return DICT_OK; /* never fails */
+}
+
+/* Clear & Release the hash table */
+void dictRelease(dict *d)
+{
+ _dictClear(d,0,NULL);
+ _dictClear(d,1,NULL);
+ zfree(d);
+}
+
+dictEntry *dictFind(dict *d, const void *key)
+{
+ dictEntry *he;
+ uint64_t h, idx, table;
+
+ if (dictSize(d) == 0) return NULL; /* dict is empty */
+ if (dictIsRehashing(d)) _dictRehashStep(d);
+ h = dictHashKey(d, key);
+ for (table = 0; table <= 1; table++) {
+ idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
+ he = d->ht_table[table][idx];
+ while(he) {
+ void *he_key = dictGetKey(he);
+ if (key == he_key || dictCompareKeys(d, key, he_key))
+ return he;
+ he = dictGetNext(he);
+ }
+ if (!dictIsRehashing(d)) return NULL;
+ }
+ return NULL;
+}
+
+void *dictFetchValue(dict *d, const void *key) {
+ dictEntry *he;
+
+ he = dictFind(d,key);
+ return he ? dictGetVal(he) : NULL;
+}
+
+/* Find an element from the table, also get the plink of the entry. The entry
+ * is returned if the element is found, and the user should later call
+ * `dictTwoPhaseUnlinkFree` with it in order to unlink and release it. Otherwise if
+ * the key is not found, NULL is returned. These two functions should be used in pair.
+ * `dictTwoPhaseUnlinkFind` pauses rehash and `dictTwoPhaseUnlinkFree` resumes rehash.
+ *
+ * We can use like this:
+ *
+ * dictEntry *de = dictTwoPhaseUnlinkFind(db->dict,key->ptr,&plink, &table);
+ * // Do something, but we can't modify the dict
+ * dictTwoPhaseUnlinkFree(db->dict,de,plink,table); // We don't need to lookup again
+ *
+ * If we want to find an entry before delete this entry, this an optimization to avoid
+ * dictFind followed by dictDelete. i.e. the first API is a find, and it gives some info
+ * to the second one to avoid repeating the lookup
+ */
+dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink, int *table_index) {
+ uint64_t h, idx, table;
+
+ if (dictSize(d) == 0) return NULL; /* dict is empty */
+ if (dictIsRehashing(d)) _dictRehashStep(d);
+ h = dictHashKey(d, key);
+
+ for (table = 0; table <= 1; table++) {
+ idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
+ dictEntry **ref = &d->ht_table[table][idx];
+ while (ref && *ref) {
+ void *de_key = dictGetKey(*ref);
+ if (key == de_key || dictCompareKeys(d, key, de_key)) {
+ *table_index = table;
+ *plink = ref;
+ dictPauseRehashing(d);
+ return *ref;
+ }
+ ref = dictGetNextRef(*ref);
+ }
+ if (!dictIsRehashing(d)) return NULL;
+ }
+ return NULL;
+}
+
+void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table_index) {
+ if (he == NULL) return;
+ d->ht_used[table_index]--;
+ *plink = dictGetNext(he);
+ dictFreeKey(d, he);
+ dictFreeVal(d, he);
+ if (!entryIsKey(he)) zfree(decodeMaskedPtr(he));
+ dictResumeRehashing(d);
+}
+
+void dictSetKey(dict *d, dictEntry* de, void *key) {
+ assert(!d->type->no_value);
+ if (d->type->keyDup)
+ de->key = d->type->keyDup(d, key);
+ else
+ de->key = key;
+}
+
+void dictSetVal(dict *d, dictEntry *de, void *val) {
+ assert(entryHasValue(de));
+ de->v.val = d->type->valDup ? d->type->valDup(d, val) : val;
+}
+
+void dictSetSignedIntegerVal(dictEntry *de, int64_t val) {
+ assert(entryHasValue(de));
+ de->v.s64 = val;
+}
+
+void dictSetUnsignedIntegerVal(dictEntry *de, uint64_t val) {
+ assert(entryHasValue(de));
+ de->v.u64 = val;
+}
+
+void dictSetDoubleVal(dictEntry *de, double val) {
+ assert(entryHasValue(de));
+ de->v.d = val;
+}
+
+int64_t dictIncrSignedIntegerVal(dictEntry *de, int64_t val) {
+ assert(entryHasValue(de));
+ return de->v.s64 += val;
+}
+
+uint64_t dictIncrUnsignedIntegerVal(dictEntry *de, uint64_t val) {
+ assert(entryHasValue(de));
+ return de->v.u64 += val;
+}
+
+double dictIncrDoubleVal(dictEntry *de, double val) {
+ assert(entryHasValue(de));
+ return de->v.d += val;
+}
+
+/* A pointer to the metadata section within the dict entry. */
+void *dictEntryMetadata(dictEntry *de) {
+ assert(entryHasValue(de));
+ return &de->metadata;
+}
+
+void *dictGetKey(const dictEntry *de) {
+ if (entryIsKey(de)) return (void*)de;
+ if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key;
+ return de->key;
+}
+
+void *dictGetVal(const dictEntry *de) {
+ assert(entryHasValue(de));
+ return de->v.val;
+}
+
+int64_t dictGetSignedIntegerVal(const dictEntry *de) {
+ assert(entryHasValue(de));
+ return de->v.s64;
+}
+
+uint64_t dictGetUnsignedIntegerVal(const dictEntry *de) {
+ assert(entryHasValue(de));
+ return de->v.u64;
+}
+
+double dictGetDoubleVal(const dictEntry *de) {
+ assert(entryHasValue(de));
+ return de->v.d;
+}
+
+/* Returns a mutable reference to the value as a double within the entry. */
+double *dictGetDoubleValPtr(dictEntry *de) {
+ assert(entryHasValue(de));
+ return &de->v.d;
+}
+
+/* Returns the 'next' field of the entry or NULL if the entry doesn't have a
+ * 'next' field. */
+static dictEntry *dictGetNext(const dictEntry *de) {
+ if (entryIsKey(de)) return NULL; /* there's no next */
+ if (entryIsNoValue(de)) return decodeEntryNoValue(de)->next;
+ return de->next;
+}
+
+/* Returns a pointer to the 'next' field in the entry or NULL if the entry
+ * doesn't have a next field. */
+static dictEntry **dictGetNextRef(dictEntry *de) {
+ if (entryIsKey(de)) return NULL;
+ if (entryIsNoValue(de)) return &decodeEntryNoValue(de)->next;
+ return &de->next;
+}
+
+static void dictSetNext(dictEntry *de, dictEntry *next) {
+ assert(!entryIsKey(de));
+ if (entryIsNoValue(de)) {
+ dictEntryNoValue *entry = decodeEntryNoValue(de);
+ entry->next = next;
+ } else {
+ de->next = next;
+ }
+}
+
+/* Returns the memory usage in bytes of the dict, excluding the size of the keys
+ * and values. */
+size_t dictMemUsage(const dict *d) {
+ return dictSize(d) * sizeof(dictEntry) +
+ dictSlots(d) * sizeof(dictEntry*);
+}
+
+size_t dictEntryMemUsage(void) {
+ return sizeof(dictEntry);
+}
+
+/* A fingerprint is a 64 bit number that represents the state of the dictionary
+ * at a given time, it's just a few dict properties xored together.
+ * When an unsafe iterator is initialized, we get the dict fingerprint, and check
+ * the fingerprint again when the iterator is released.
+ * If the two fingerprints are different it means that the user of the iterator
+ * performed forbidden operations against the dictionary while iterating. */
+unsigned long long dictFingerprint(dict *d) {
+ unsigned long long integers[6], hash = 0;
+ int j;
+
+ integers[0] = (long) d->ht_table[0];
+ integers[1] = d->ht_size_exp[0];
+ integers[2] = d->ht_used[0];
+ integers[3] = (long) d->ht_table[1];
+ integers[4] = d->ht_size_exp[1];
+ integers[5] = d->ht_used[1];
+
+ /* We hash N integers by summing every successive integer with the integer
+ * hashing of the previous sum. Basically:
+ *
+ * Result = hash(hash(hash(int1)+int2)+int3) ...
+ *
+ * This way the same set of integers in a different order will (likely) hash
+ * to a different number. */
+ for (j = 0; j < 6; j++) {
+ hash += integers[j];
+ /* For the hashing step we use Tomas Wang's 64 bit integer hash. */
+ hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
+ hash = hash ^ (hash >> 24);
+ hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
+ hash = hash ^ (hash >> 14);
+ hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
+ hash = hash ^ (hash >> 28);
+ hash = hash + (hash << 31);
+ }
+ return hash;
+}
+
+void dictInitIterator(dictIterator *iter, dict *d)
+{
+ iter->d = d;
+ iter->table = 0;
+ iter->index = -1;
+ iter->safe = 0;
+ iter->entry = NULL;
+ iter->nextEntry = NULL;
+}
+
+void dictInitSafeIterator(dictIterator *iter, dict *d)
+{
+ dictInitIterator(iter, d);
+ iter->safe = 1;
+}
+
+void dictResetIterator(dictIterator *iter)
+{
+ if (!(iter->index == -1 && iter->table == 0)) {
+ if (iter->safe)
+ dictResumeRehashing(iter->d);
+ else
+ assert(iter->fingerprint == dictFingerprint(iter->d));
+ }
+}
+
+dictIterator *dictGetIterator(dict *d)
+{
+ dictIterator *iter = zmalloc(sizeof(*iter));
+ dictInitIterator(iter, d);
+ return iter;
+}
+
+dictIterator *dictGetSafeIterator(dict *d) {
+ dictIterator *i = dictGetIterator(d);
+
+ i->safe = 1;
+ return i;
+}
+
+dictEntry *dictNext(dictIterator *iter)
+{
+ while (1) {
+ if (iter->entry == NULL) {
+ if (iter->index == -1 && iter->table == 0) {
+ if (iter->safe)
+ dictPauseRehashing(iter->d);
+ else
+ iter->fingerprint = dictFingerprint(iter->d);
+ }
+ iter->index++;
+ if (iter->index >= (long) DICTHT_SIZE(iter->d->ht_size_exp[iter->table])) {
+ if (dictIsRehashing(iter->d) && iter->table == 0) {
+ iter->table++;
+ iter->index = 0;
+ } else {
+ break;
+ }
+ }
+ iter->entry = iter->d->ht_table[iter->table][iter->index];
+ } else {
+ iter->entry = iter->nextEntry;
+ }
+ if (iter->entry) {
+ /* We need to save the 'next' here, the iterator user
+ * may delete the entry we are returning. */
+ iter->nextEntry = dictGetNext(iter->entry);
+ return iter->entry;
+ }
+ }
+ return NULL;
+}
+
+void dictReleaseIterator(dictIterator *iter)
+{
+ dictResetIterator(iter);
+ zfree(iter);
+}
+
+/* Return a random entry from the hash table. Useful to
+ * implement randomized algorithms */
+dictEntry *dictGetRandomKey(dict *d)
+{
+ dictEntry *he, *orighe;
+ unsigned long h;
+ int listlen, listele;
+
+ if (dictSize(d) == 0) return NULL;
+ if (dictIsRehashing(d)) _dictRehashStep(d);
+ if (dictIsRehashing(d)) {
+ unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]);
+ do {
+ /* We are sure there are no elements in indexes from 0
+ * to rehashidx-1 */
+ h = d->rehashidx + (randomULong() % (dictSlots(d) - d->rehashidx));
+ he = (h >= s0) ? d->ht_table[1][h - s0] : d->ht_table[0][h];
+ } while(he == NULL);
+ } else {
+ unsigned long m = DICTHT_SIZE_MASK(d->ht_size_exp[0]);
+ do {
+ h = randomULong() & m;
+ he = d->ht_table[0][h];
+ } while(he == NULL);
+ }
+
+ /* Now we found a non empty bucket, but it is a linked
+ * list and we need to get a random element from the list.
+ * The only sane way to do so is counting the elements and
+ * select a random index. */
+ listlen = 0;
+ orighe = he;
+ while(he) {
+ he = dictGetNext(he);
+ listlen++;
+ }
+ listele = random() % listlen;
+ he = orighe;
+ while(listele--) he = dictGetNext(he);
+ return he;
+}
+
+/* This function samples the dictionary to return a few keys from random
+ * locations.
+ *
+ * It does not guarantee to return all the keys specified in 'count', nor
+ * it does guarantee to return non-duplicated elements, however it will make
+ * some effort to do both things.
+ *
+ * Returned pointers to hash table entries are stored into 'des' that
+ * points to an array of dictEntry pointers. The array must have room for
+ * at least 'count' elements, that is the argument we pass to the function
+ * to tell how many random elements we need.
+ *
+ * The function returns the number of items stored into 'des', that may
+ * be less than 'count' if the hash table has less than 'count' elements
+ * inside, or if not enough elements were found in a reasonable amount of
+ * steps.
+ *
+ * Note that this function is not suitable when you need a good distribution
+ * of the returned items, but only when you need to "sample" a given number
+ * of continuous elements to run some kind of algorithm or to produce
+ * statistics. However the function is much faster than dictGetRandomKey()
+ * at producing N elements. */
+unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
+ unsigned long j; /* internal hash table id, 0 or 1. */
+ unsigned long tables; /* 1 or 2 tables? */
+ unsigned long stored = 0, maxsizemask;
+ unsigned long maxsteps;
+
+ if (dictSize(d) < count) count = dictSize(d);
+ maxsteps = count*10;
+
+ /* Try to do a rehashing work proportional to 'count'. */
+ for (j = 0; j < count; j++) {
+ if (dictIsRehashing(d))
+ _dictRehashStep(d);
+ else
+ break;
+ }
+
+ tables = dictIsRehashing(d) ? 2 : 1;
+ maxsizemask = DICTHT_SIZE_MASK(d->ht_size_exp[0]);
+ if (tables > 1 && maxsizemask < DICTHT_SIZE_MASK(d->ht_size_exp[1]))
+ maxsizemask = DICTHT_SIZE_MASK(d->ht_size_exp[1]);
+
+ /* Pick a random point inside the larger table. */
+ unsigned long i = randomULong() & maxsizemask;
+ unsigned long emptylen = 0; /* Continuous empty entries so far. */
+ while(stored < count && maxsteps--) {
+ for (j = 0; j < tables; j++) {
+ /* Invariant of the dict.c rehashing: up to the indexes already
+ * visited in ht[0] during the rehashing, there are no populated
+ * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
+ if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) {
+ /* Moreover, if we are currently out of range in the second
+ * table, there will be no elements in both tables up to
+ * the current rehashing index, so we jump if possible.
+ * (this happens when going from big to small table). */
+ if (i >= DICTHT_SIZE(d->ht_size_exp[1]))
+ i = d->rehashidx;
+ else
+ continue;
+ }
+ if (i >= DICTHT_SIZE(d->ht_size_exp[j])) continue; /* Out of range for this table. */
+ dictEntry *he = d->ht_table[j][i];
+
+ /* Count contiguous empty buckets, and jump to other
+ * locations if they reach 'count' (with a minimum of 5). */
+ if (he == NULL) {
+ emptylen++;
+ if (emptylen >= 5 && emptylen > count) {
+ i = randomULong() & maxsizemask;
+ emptylen = 0;
+ }
+ } else {
+ emptylen = 0;
+ while (he) {
+ /* Collect all the elements of the buckets found non empty while iterating.
+ * To avoid the issue of being unable to sample the end of a long chain,
+ * we utilize the Reservoir Sampling algorithm to optimize the sampling process.
+ * This means that even when the maximum number of samples has been reached,
+ * we continue sampling until we reach the end of the chain.
+ * See https://en.wikipedia.org/wiki/Reservoir_sampling. */
+ if (stored < count) {
+ des[stored] = he;
+ } else {
+ unsigned long r = randomULong() % (stored + 1);
+ if (r < count) des[r] = he;
+ }
+
+ he = dictGetNext(he);
+ stored++;
+ }
+ if (stored >= count) goto end;
+ }
+ }
+ i = (i+1) & maxsizemask;
+ }
+
+end:
+ return stored > count ? count : stored;
+}
+
+
+/* Reallocate the dictEntry, key and value allocations in a bucket using the
+ * provided allocation functions in order to defrag them. */
+static void dictDefragBucket(dict *d, dictEntry **bucketref, dictDefragFunctions *defragfns) {
+ dictDefragAllocFunction *defragalloc = defragfns->defragAlloc;
+ dictDefragAllocFunction *defragkey = defragfns->defragKey;
+ dictDefragAllocFunction *defragval = defragfns->defragVal;
+ while (bucketref && *bucketref) {
+ dictEntry *de = *bucketref, *newde = NULL;
+ void *newkey = defragkey ? defragkey(dictGetKey(de)) : NULL;
+ void *newval = defragval ? defragval(dictGetVal(de)) : NULL;
+ if (entryIsKey(de)) {
+ if (newkey) *bucketref = newkey;
+ assert(entryIsKey(*bucketref));
+ } else if (entryIsNoValue(de)) {
+ dictEntryNoValue *entry = decodeEntryNoValue(de), *newentry;
+ if ((newentry = defragalloc(entry))) {
+ newde = encodeMaskedPtr(newentry, ENTRY_PTR_NO_VALUE);
+ entry = newentry;
+ }
+ if (newkey) entry->key = newkey;
+ } else {
+ assert(entryIsNormal(de));
+ newde = defragalloc(de);
+ if (newde) de = newde;
+ if (newkey) de->key = newkey;
+ if (newval) de->v.val = newval;
+ }
+ if (newde) {
+ *bucketref = newde;
+ if (d->type->afterReplaceEntry)
+ d->type->afterReplaceEntry(d, newde);
+ }
+ bucketref = dictGetNextRef(*bucketref);
+ }
+}
+
+/* This is like dictGetRandomKey() from the POV of the API, but will do more
+ * work to ensure a better distribution of the returned element.
+ *
+ * This function improves the distribution because the dictGetRandomKey()
+ * problem is that it selects a random bucket, then it selects a random
+ * element from the chain in the bucket. However elements being in different
+ * chain lengths will have different probabilities of being reported. With
+ * this function instead what we do is to consider a "linear" range of the table
+ * that may be constituted of N buckets with chains of different lengths
+ * appearing one after the other. Then we report a random element in the range.
+ * In this way we smooth away the problem of different chain lengths. */
+#define GETFAIR_NUM_ENTRIES 15
+dictEntry *dictGetFairRandomKey(dict *d) {
+ dictEntry *entries[GETFAIR_NUM_ENTRIES];
+ unsigned int count = dictGetSomeKeys(d,entries,GETFAIR_NUM_ENTRIES);
+ /* Note that dictGetSomeKeys() may return zero elements in an unlucky
+ * run() even if there are actually elements inside the hash table. So
+ * when we get zero, we call the true dictGetRandomKey() that will always
+ * yield the element if the hash table has at least one. */
+ if (count == 0) return dictGetRandomKey(d);
+ unsigned int idx = rand() % count;
+ return entries[idx];
+}
+
+/* Function to reverse bits. Algorithm from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
+static unsigned long rev(unsigned long v) {
+ unsigned long s = CHAR_BIT * sizeof(v); // bit size; must be power of 2
+ unsigned long mask = ~0UL;
+ while ((s >>= 1) > 0) {
+ mask ^= (mask << s);
+ v = ((v >> s) & mask) | ((v << s) & ~mask);
+ }
+ return v;
+}
+
+/* dictScan() is used to iterate over the elements of a dictionary.
+ *
+ * Iterating works the following way:
+ *
+ * 1) Initially you call the function using a cursor (v) value of 0.
+ * 2) The function performs one step of the iteration, and returns the
+ * new cursor value you must use in the next call.
+ * 3) When the returned cursor is 0, the iteration is complete.
+ *
+ * The function guarantees all elements present in the
+ * dictionary get returned between the start and end of the iteration.
+ * However it is possible some elements get returned multiple times.
+ *
+ * For every element returned, the callback argument 'fn' is
+ * called with 'privdata' as first argument and the dictionary entry
+ * 'de' as second argument.
+ *
+ * HOW IT WORKS.
+ *
+ * The iteration algorithm was designed by Pieter Noordhuis.
+ * The main idea is to increment a cursor starting from the higher order
+ * bits. That is, instead of incrementing the cursor normally, the bits
+ * of the cursor are reversed, then the cursor is incremented, and finally
+ * the bits are reversed again.
+ *
+ * This strategy is needed because the hash table may be resized between
+ * iteration calls.
+ *
+ * dict.c hash tables are always power of two in size, and they
+ * use chaining, so the position of an element in a given table is given
+ * by computing the bitwise AND between Hash(key) and SIZE-1
+ * (where SIZE-1 is always the mask that is equivalent to taking the rest
+ * of the division between the Hash of the key and SIZE).
+ *
+ * For example if the current hash table size is 16, the mask is
+ * (in binary) 1111. The position of a key in the hash table will always be
+ * the last four bits of the hash output, and so forth.
+ *
+ * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE?
+ *
+ * If the hash table grows, elements can go anywhere in one multiple of
+ * the old bucket: for example let's say we already iterated with
+ * a 4 bit cursor 1100 (the mask is 1111 because hash table size = 16).
+ *
+ * If the hash table will be resized to 64 elements, then the new mask will
+ * be 111111. The new buckets you obtain by substituting in ??1100
+ * with either 0 or 1 can be targeted only by keys we already visited
+ * when scanning the bucket 1100 in the smaller hash table.
+ *
+ * By iterating the higher bits first, because of the inverted counter, the
+ * cursor does not need to restart if the table size gets bigger. It will
+ * continue iterating using cursors without '1100' at the end, and also
+ * without any other combination of the final 4 bits already explored.
+ *
+ * Similarly when the table size shrinks over time, for example going from
+ * 16 to 8, if a combination of the lower three bits (the mask for size 8
+ * is 111) were already completely explored, it would not be visited again
+ * because we are sure we tried, for example, both 0111 and 1111 (all the
+ * variations of the higher bit) so we don't need to test it again.
+ *
+ * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING!
+ *
+ * Yes, this is true, but we always iterate the smaller table first, then
+ * we test all the expansions of the current cursor into the larger
+ * table. For example if the current cursor is 101 and we also have a
+ * larger table of size 16, we also test (0)101 and (1)101 inside the larger
+ * table. This reduces the problem back to having only one table, where
+ * the larger one, if it exists, is just an expansion of the smaller one.
+ *
+ * LIMITATIONS
+ *
+ * This iterator is completely stateless, and this is a huge advantage,
+ * including no additional memory used.
+ *
+ * The disadvantages resulting from this design are:
+ *
+ * 1) It is possible we return elements more than once. However this is usually
+ * easy to deal with in the application level.
+ * 2) The iterator must return multiple elements per call, as it needs to always
+ * return all the keys chained in a given bucket, and all the expansions, so
+ * we are sure we don't miss keys moving during rehashing.
+ * 3) The reverse cursor is somewhat hard to understand at first, but this
+ * comment is supposed to help.
+ */
+unsigned long dictScan(dict *d,
+ unsigned long v,
+ dictScanFunction *fn,
+ void *privdata)
+{
+ return dictScanDefrag(d, v, fn, NULL, privdata);
+}
+
+/* Like dictScan, but additionally reallocates the memory used by the dict
+ * entries using the provided allocation function. This feature was added for
+ * the active defrag feature.
+ *
+ * The 'defragfns' callbacks are called with a pointer to memory that callback
+ * can reallocate. The callbacks should return a new memory address or NULL,
+ * where NULL means that no reallocation happened and the old memory is still
+ * valid. */
+unsigned long dictScanDefrag(dict *d,
+ unsigned long v,
+ dictScanFunction *fn,
+ dictDefragFunctions *defragfns,
+ void *privdata)
+{
+ int htidx0, htidx1;
+ const dictEntry *de, *next;
+ unsigned long m0, m1;
+
+ if (dictSize(d) == 0) return 0;
+
+ /* This is needed in case the scan callback tries to do dictFind or alike. */
+ dictPauseRehashing(d);
+
+ if (!dictIsRehashing(d)) {
+ htidx0 = 0;
+ m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]);
+
+ /* Emit entries at cursor */
+ if (defragfns) {
+ dictDefragBucket(d, &d->ht_table[htidx0][v & m0], defragfns);
+ }
+ de = d->ht_table[htidx0][v & m0];
+ while (de) {
+ next = dictGetNext(de);
+ fn(privdata, de);
+ de = next;
+ }
+
+ /* Set unmasked bits so incrementing the reversed cursor
+ * operates on the masked bits */
+ v |= ~m0;
+
+ /* Increment the reverse cursor */
+ v = rev(v);
+ v++;
+ v = rev(v);
+
+ } else {
+ htidx0 = 0;
+ htidx1 = 1;
+
+ /* Make sure t0 is the smaller and t1 is the bigger table */
+ if (DICTHT_SIZE(d->ht_size_exp[htidx0]) > DICTHT_SIZE(d->ht_size_exp[htidx1])) {
+ htidx0 = 1;
+ htidx1 = 0;
+ }
+
+ m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]);
+ m1 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx1]);
+
+ /* Emit entries at cursor */
+ if (defragfns) {
+ dictDefragBucket(d, &d->ht_table[htidx0][v & m0], defragfns);
+ }
+ de = d->ht_table[htidx0][v & m0];
+ while (de) {
+ next = dictGetNext(de);
+ fn(privdata, de);
+ de = next;
+ }
+
+ /* Iterate over indices in larger table that are the expansion
+ * of the index pointed to by the cursor in the smaller table */
+ do {
+ /* Emit entries at cursor */
+ if (defragfns) {
+ dictDefragBucket(d, &d->ht_table[htidx1][v & m1], defragfns);
+ }
+ de = d->ht_table[htidx1][v & m1];
+ while (de) {
+ next = dictGetNext(de);
+ fn(privdata, de);
+ de = next;
+ }
+
+ /* Increment the reverse cursor not covered by the smaller mask.*/
+ v |= ~m1;
+ v = rev(v);
+ v++;
+ v = rev(v);
+
+ /* Continue while bits covered by mask difference is non-zero */
+ } while (v & (m0 ^ m1));
+ }
+
+ dictResumeRehashing(d);
+
+ return v;
+}
+
+/* ------------------------- private functions ------------------------------ */
+
+/* Because we may need to allocate huge memory chunk at once when dict
+ * expands, we will check this allocation is allowed or not if the dict
+ * type has expandAllowed member function. */
+static int dictTypeExpandAllowed(dict *d) {
+ if (d->type->expandAllowed == NULL) return 1;
+ return d->type->expandAllowed(
+ DICTHT_SIZE(_dictNextExp(d->ht_used[0] + 1)) * sizeof(dictEntry*),
+ (double)d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]));
+}
+
+/* Expand the hash table if needed */
+static int _dictExpandIfNeeded(dict *d)
+{
+ /* Incremental rehashing already in progress. Return. */
+ if (dictIsRehashing(d)) return DICT_OK;
+
+ /* If the hash table is empty expand it to the initial size. */
+ if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
+
+ /* If we reached the 1:1 ratio, and we are allowed to resize the hash
+ * table (global setting) or we should avoid it but the ratio between
+ * elements/buckets is over the "safe" threshold, we resize doubling
+ * the number of buckets. */
+ if ((dict_can_resize == DICT_RESIZE_ENABLE &&
+ d->ht_used[0] >= DICTHT_SIZE(d->ht_size_exp[0])) ||
+ (dict_can_resize != DICT_RESIZE_FORBID &&
+ d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]) > dict_force_resize_ratio))
+ {
+ if (!dictTypeExpandAllowed(d))
+ return DICT_OK;
+ return dictExpand(d, d->ht_used[0] + 1);
+ }
+ return DICT_OK;
+}
+
+/* Our hash table capability is a power of two */
+static signed char _dictNextExp(unsigned long size)
+{
+ if (size <= DICT_HT_INITIAL_SIZE) return DICT_HT_INITIAL_EXP;
+ if (size >= LONG_MAX) return (8*sizeof(long)-1);
+
+ return 8*sizeof(long) - __builtin_clzl(size-1);
+}
+
+/* Finds and returns the position within the dict where the provided key should
+ * be inserted using dictInsertAtPosition if the key does not already exist in
+ * the dict. If the key exists in the dict, NULL is returned and the optional
+ * 'existing' entry pointer is populated, if provided. */
+void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing) {
+ unsigned long idx, table;
+ dictEntry *he;
+ uint64_t hash = dictHashKey(d, key);
+ if (existing) *existing = NULL;
+ if (dictIsRehashing(d)) _dictRehashStep(d);
+
+ /* Expand the hash table if needed */
+ if (_dictExpandIfNeeded(d) == DICT_ERR)
+ return NULL;
+ for (table = 0; table <= 1; table++) {
+ idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
+ /* Search if this slot does not already contain the given key */
+ he = d->ht_table[table][idx];
+ while(he) {
+ void *he_key = dictGetKey(he);
+ if (key == he_key || dictCompareKeys(d, key, he_key)) {
+ if (existing) *existing = he;
+ return NULL;
+ }
+ he = dictGetNext(he);
+ }
+ if (!dictIsRehashing(d)) break;
+ }
+
+ /* If we are in the process of rehashing the hash table, the bucket is
+ * always returned in the context of the second (new) hash table. */
+ dictEntry **bucket = &d->ht_table[dictIsRehashing(d) ? 1 : 0][idx];
+ return bucket;
+}
+
+void dictEmpty(dict *d, void(callback)(dict*)) {
+ _dictClear(d,0,callback);
+ _dictClear(d,1,callback);
+ d->rehashidx = -1;
+ d->pauserehash = 0;
+}
+
+void dictSetResizeEnabled(dictResizeEnable enable) {
+ dict_can_resize = enable;
+}
+
+uint64_t dictGetHash(dict *d, const void *key) {
+ return dictHashKey(d, key);
+}
+
+/* Finds the dictEntry using pointer and pre-calculated hash.
+ * oldkey is a dead pointer and should not be accessed.
+ * the hash value should be provided using dictGetHash.
+ * no string / key comparison is performed.
+ * return value is a pointer to the dictEntry if found, or NULL if not found. */
+dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) {
+ dictEntry *he;
+ unsigned long idx, table;
+
+ if (dictSize(d) == 0) return NULL; /* dict is empty */
+ for (table = 0; table <= 1; table++) {
+ idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
+ he = d->ht_table[table][idx];
+ while(he) {
+ if (oldptr == dictGetKey(he))
+ return he;
+ he = dictGetNext(he);
+ }
+ if (!dictIsRehashing(d)) return NULL;
+ }
+ return NULL;
+}
+
+/* ------------------------------- Debugging ---------------------------------*/
+
+#define DICT_STATS_VECTLEN 50
+size_t _dictGetStatsHt(char *buf, size_t bufsize, dict *d, int htidx, int full) {
+ unsigned long i, slots = 0, chainlen, maxchainlen = 0;
+ unsigned long totchainlen = 0;
+ unsigned long clvector[DICT_STATS_VECTLEN];
+ size_t l = 0;
+
+ if (d->ht_used[htidx] == 0) {
+ return snprintf(buf,bufsize,
+ "Hash table %d stats (%s):\n"
+ "No stats available for empty dictionaries\n",
+ htidx, (htidx == 0) ? "main hash table" : "rehashing target");
+ }
+
+ if (!full) {
+ l += snprintf(buf+l,bufsize-l,
+ "Hash table %d stats (%s):\n"
+ " table size: %lu\n"
+ " number of elements: %lu\n",
+ htidx, (htidx == 0) ? "main hash table" : "rehashing target",
+ DICTHT_SIZE(d->ht_size_exp[htidx]), d->ht_used[htidx]);
+
+ /* Make sure there is a NULL term at the end. */
+ buf[bufsize-1] = '\0';
+ /* Unlike snprintf(), return the number of characters actually written. */
+ return strlen(buf);
+ }
+
+ /* Compute stats. */
+ for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
+ for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]); i++) {
+ dictEntry *he;
+
+ if (d->ht_table[htidx][i] == NULL) {
+ clvector[0]++;
+ continue;
+ }
+ slots++;
+ /* For each hash entry on this slot... */
+ chainlen = 0;
+ he = d->ht_table[htidx][i];
+ while(he) {
+ chainlen++;
+ he = dictGetNext(he);
+ }
+ clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
+ if (chainlen > maxchainlen) maxchainlen = chainlen;
+ totchainlen += chainlen;
+ }
+
+ /* Generate human readable stats. */
+ l += snprintf(buf+l,bufsize-l,
+ "Hash table %d stats (%s):\n"
+ " table size: %lu\n"
+ " number of elements: %lu\n"
+ " different slots: %lu\n"
+ " max chain length: %lu\n"
+ " avg chain length (counted): %.02f\n"
+ " avg chain length (computed): %.02f\n"
+ " Chain length distribution:\n",
+ htidx, (htidx == 0) ? "main hash table" : "rehashing target",
+ DICTHT_SIZE(d->ht_size_exp[htidx]), d->ht_used[htidx], slots, maxchainlen,
+ (float)totchainlen/slots, (float)d->ht_used[htidx]/slots);
+
+ for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
+ if (clvector[i] == 0) continue;
+ if (l >= bufsize) break;
+ l += snprintf(buf+l,bufsize-l,
+ " %ld: %ld (%.02f%%)\n",
+ i, clvector[i], ((float)clvector[i]/DICTHT_SIZE(d->ht_size_exp[htidx]))*100);
+ }
+
+ /* Make sure there is a NULL term at the end. */
+ buf[bufsize-1] = '\0';
+ /* Unlike snprintf(), return the number of characters actually written. */
+ return strlen(buf);
+}
+
+void dictGetStats(char *buf, size_t bufsize, dict *d, int full) {
+ size_t l;
+ char *orig_buf = buf;
+ size_t orig_bufsize = bufsize;
+
+ l = _dictGetStatsHt(buf,bufsize,d,0,full);
+ if (dictIsRehashing(d) && bufsize > l) {
+ buf += l;
+ bufsize -= l;
+ _dictGetStatsHt(buf,bufsize,d,1,full);
+ }
+ /* Make sure there is a NULL term at the end. */
+ orig_buf[orig_bufsize-1] = '\0';
+}
+
+/* ------------------------------- Benchmark ---------------------------------*/
+
+#ifdef REDIS_TEST
+#include "testhelp.h"
+
+#define UNUSED(V) ((void) V)
+
+uint64_t hashCallback(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
+}
+
+int compareCallback(dict *d, const void *key1, const void *key2) {
+ int l1,l2;
+ UNUSED(d);
+
+ l1 = strlen((char*)key1);
+ l2 = strlen((char*)key2);
+ if (l1 != l2) return 0;
+ return memcmp(key1, key2, l1) == 0;
+}
+
+void freeCallback(dict *d, void *val) {
+ UNUSED(d);
+
+ zfree(val);
+}
+
+char *stringFromLongLong(long long value) {
+ char buf[32];
+ int len;
+ char *s;
+
+ len = snprintf(buf,sizeof(buf),"%lld",value);
+ s = zmalloc(len+1);
+ memcpy(s, buf, len);
+ s[len] = '\0';
+ return s;
+}
+
+dictType BenchmarkDictType = {
+ hashCallback,
+ NULL,
+ NULL,
+ compareCallback,
+ freeCallback,
+ NULL,
+ NULL
+};
+
+#define start_benchmark() start = timeInMilliseconds()
+#define end_benchmark(msg) do { \
+ elapsed = timeInMilliseconds()-start; \
+ printf(msg ": %ld items in %lld ms\n", count, elapsed); \
+} while(0)
+
+/* ./redis-server test dict [<count> | --accurate] */
+int dictTest(int argc, char **argv, int flags) {
+ long j;
+ long long start, elapsed;
+ dict *dict = dictCreate(&BenchmarkDictType);
+ long count = 0;
+ int accurate = (flags & REDIS_TEST_ACCURATE);
+
+ if (argc == 4) {
+ if (accurate) {
+ count = 5000000;
+ } else {
+ count = strtol(argv[3],NULL,10);
+ }
+ } else {
+ count = 5000;
+ }
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ int retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
+ assert(retval == DICT_OK);
+ }
+ end_benchmark("Inserting");
+ assert((long)dictSize(dict) == count);
+
+ /* Wait for rehashing. */
+ while (dictIsRehashing(dict)) {
+ dictRehashMilliseconds(dict,100);
+ }
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ char *key = stringFromLongLong(j);
+ dictEntry *de = dictFind(dict,key);
+ assert(de != NULL);
+ zfree(key);
+ }
+ end_benchmark("Linear access of existing elements");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ char *key = stringFromLongLong(j);
+ dictEntry *de = dictFind(dict,key);
+ assert(de != NULL);
+ zfree(key);
+ }
+ end_benchmark("Linear access of existing elements (2nd round)");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ char *key = stringFromLongLong(rand() % count);
+ dictEntry *de = dictFind(dict,key);
+ assert(de != NULL);
+ zfree(key);
+ }
+ end_benchmark("Random access of existing elements");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ dictEntry *de = dictGetRandomKey(dict);
+ assert(de != NULL);
+ }
+ end_benchmark("Accessing random keys");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ char *key = stringFromLongLong(rand() % count);
+ key[0] = 'X';
+ dictEntry *de = dictFind(dict,key);
+ assert(de == NULL);
+ zfree(key);
+ }
+ end_benchmark("Accessing missing");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ char *key = stringFromLongLong(j);
+ int retval = dictDelete(dict,key);
+ assert(retval == DICT_OK);
+ key[0] += 17; /* Change first number to letter. */
+ retval = dictAdd(dict,key,(void*)j);
+ assert(retval == DICT_OK);
+ }
+ end_benchmark("Removing and adding");
+ dictRelease(dict);
+ return 0;
+}
+#endif
diff --git a/src/dict.h b/src/dict.h
new file mode 100644
index 0000000..e96cd44
--- /dev/null
+++ b/src/dict.h
@@ -0,0 +1,231 @@
+/* Hash Tables Implementation.
+ *
+ * This file implements in-memory hash tables with insert/del/replace/find/
+ * get-random-element operations. Hash tables will auto-resize if needed
+ * tables of power of two in size are used, collisions are handled by
+ * chaining. See the source code for more information... :)
+ *
+ * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DICT_H
+#define __DICT_H
+
+#include "mt19937-64.h"
+#include <limits.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define DICT_OK 0
+#define DICT_ERR 1
+
+typedef struct dictEntry dictEntry; /* opaque */
+
+typedef struct dict dict;
+
+typedef struct dictType {
+ uint64_t (*hashFunction)(const void *key);
+ void *(*keyDup)(dict *d, const void *key);
+ void *(*valDup)(dict *d, const void *obj);
+ int (*keyCompare)(dict *d, const void *key1, const void *key2);
+ void (*keyDestructor)(dict *d, void *key);
+ void (*valDestructor)(dict *d, void *obj);
+ int (*expandAllowed)(size_t moreMem, double usedRatio);
+ /* Flags */
+ /* The 'no_value' flag, if set, indicates that values are not used, i.e. the
+ * dict is a set. When this flag is set, it's not possible to access the
+ * value of a dictEntry and it's also impossible to use dictSetKey(). Entry
+ * metadata can also not be used. */
+ unsigned int no_value:1;
+ /* If no_value = 1 and all keys are odd (LSB=1), setting keys_are_odd = 1
+ * enables one more optimization: to store a key without an allocated
+ * dictEntry. */
+ unsigned int keys_are_odd:1;
+ /* TODO: Add a 'keys_are_even' flag and use a similar optimization if that
+ * flag is set. */
+
+ /* Allow each dict and dictEntry to carry extra caller-defined metadata. The
+ * extra memory is initialized to 0 when allocated. */
+ size_t (*dictEntryMetadataBytes)(dict *d);
+ size_t (*dictMetadataBytes)(void);
+ /* Optional callback called after an entry has been reallocated (due to
+ * active defrag). Only called if the entry has metadata. */
+ void (*afterReplaceEntry)(dict *d, dictEntry *entry);
+} dictType;
+
+#define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1<<(exp))
+#define DICTHT_SIZE_MASK(exp) ((exp) == -1 ? 0 : (DICTHT_SIZE(exp))-1)
+
+struct dict {
+ dictType *type;
+
+ dictEntry **ht_table[2];
+ unsigned long ht_used[2];
+
+ long rehashidx; /* rehashing not in progress if rehashidx == -1 */
+
+ /* Keep small vars at end for optimal (minimal) struct padding */
+ int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */
+ signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
+
+ void *metadata[]; /* An arbitrary number of bytes (starting at a
+ * pointer-aligned address) of size as defined
+ * by dictType's dictEntryBytes. */
+};
+
+/* If safe is set to 1 this is a safe iterator, that means, you can call
+ * dictAdd, dictFind, and other functions against the dictionary even while
+ * iterating. Otherwise it is a non safe iterator, and only dictNext()
+ * should be called while iterating. */
+typedef struct dictIterator {
+ dict *d;
+ long index;
+ int table, safe;
+ dictEntry *entry, *nextEntry;
+ /* unsafe iterator fingerprint for misuse detection. */
+ unsigned long long fingerprint;
+} dictIterator;
+
+typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
+typedef void *(dictDefragAllocFunction)(void *ptr);
+typedef struct {
+ dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */
+ dictDefragAllocFunction *defragKey; /* Defrag-realloc keys (optional) */
+ dictDefragAllocFunction *defragVal; /* Defrag-realloc values (optional) */
+} dictDefragFunctions;
+
+/* This is the initial size of every hash table */
+#define DICT_HT_INITIAL_EXP 2
+#define DICT_HT_INITIAL_SIZE (1<<(DICT_HT_INITIAL_EXP))
+
+/* ------------------------------- Macros ------------------------------------*/
+#define dictFreeVal(d, entry) do { \
+ if ((d)->type->valDestructor) \
+ (d)->type->valDestructor((d), dictGetVal(entry)); \
+ } while(0)
+
+#define dictFreeKey(d, entry) \
+ if ((d)->type->keyDestructor) \
+ (d)->type->keyDestructor((d), dictGetKey(entry))
+
+#define dictCompareKeys(d, key1, key2) \
+ (((d)->type->keyCompare) ? \
+ (d)->type->keyCompare((d), key1, key2) : \
+ (key1) == (key2))
+
+#define dictEntryMetadataSize(d) ((d)->type->dictEntryMetadataBytes \
+ ? (d)->type->dictEntryMetadataBytes(d) : 0)
+#define dictMetadataSize(d) ((d)->type->dictMetadataBytes \
+ ? (d)->type->dictMetadataBytes() : 0)
+
+#define dictHashKey(d, key) ((d)->type->hashFunction(key))
+#define dictSlots(d) (DICTHT_SIZE((d)->ht_size_exp[0])+DICTHT_SIZE((d)->ht_size_exp[1]))
+#define dictSize(d) ((d)->ht_used[0]+(d)->ht_used[1])
+#define dictIsRehashing(d) ((d)->rehashidx != -1)
+#define dictPauseRehashing(d) ((d)->pauserehash++)
+#define dictResumeRehashing(d) ((d)->pauserehash--)
+
+/* If our unsigned long type can store a 64 bit number, use a 64 bit PRNG. */
+#if ULONG_MAX >= 0xffffffffffffffff
+#define randomULong() ((unsigned long) genrand64_int64())
+#else
+#define randomULong() random()
+#endif
+
+typedef enum {
+ DICT_RESIZE_ENABLE,
+ DICT_RESIZE_AVOID,
+ DICT_RESIZE_FORBID,
+} dictResizeEnable;
+
+/* API */
+dict *dictCreate(dictType *type);
+int dictExpand(dict *d, unsigned long size);
+int dictTryExpand(dict *d, unsigned long size);
+void *dictMetadata(dict *d);
+int dictAdd(dict *d, void *key, void *val);
+dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
+void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing);
+dictEntry *dictInsertAtPosition(dict *d, void *key, void *position);
+dictEntry *dictAddOrFind(dict *d, void *key);
+int dictReplace(dict *d, void *key, void *val);
+int dictDelete(dict *d, const void *key);
+dictEntry *dictUnlink(dict *d, const void *key);
+void dictFreeUnlinkedEntry(dict *d, dictEntry *he);
+dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink, int *table_index);
+void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table_index);
+void dictRelease(dict *d);
+dictEntry * dictFind(dict *d, const void *key);
+void *dictFetchValue(dict *d, const void *key);
+int dictResize(dict *d);
+void dictSetKey(dict *d, dictEntry* de, void *key);
+void dictSetVal(dict *d, dictEntry *de, void *val);
+void dictSetSignedIntegerVal(dictEntry *de, int64_t val);
+void dictSetUnsignedIntegerVal(dictEntry *de, uint64_t val);
+void dictSetDoubleVal(dictEntry *de, double val);
+int64_t dictIncrSignedIntegerVal(dictEntry *de, int64_t val);
+uint64_t dictIncrUnsignedIntegerVal(dictEntry *de, uint64_t val);
+double dictIncrDoubleVal(dictEntry *de, double val);
+void *dictEntryMetadata(dictEntry *de);
+void *dictGetKey(const dictEntry *de);
+void *dictGetVal(const dictEntry *de);
+int64_t dictGetSignedIntegerVal(const dictEntry *de);
+uint64_t dictGetUnsignedIntegerVal(const dictEntry *de);
+double dictGetDoubleVal(const dictEntry *de);
+double *dictGetDoubleValPtr(dictEntry *de);
+size_t dictMemUsage(const dict *d);
+size_t dictEntryMemUsage(void);
+dictIterator *dictGetIterator(dict *d);
+dictIterator *dictGetSafeIterator(dict *d);
+void dictInitIterator(dictIterator *iter, dict *d);
+void dictInitSafeIterator(dictIterator *iter, dict *d);
+void dictResetIterator(dictIterator *iter);
+dictEntry *dictNext(dictIterator *iter);
+void dictReleaseIterator(dictIterator *iter);
+dictEntry *dictGetRandomKey(dict *d);
+dictEntry *dictGetFairRandomKey(dict *d);
+unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
+void dictGetStats(char *buf, size_t bufsize, dict *d, int full);
+uint64_t dictGenHashFunction(const void *key, size_t len);
+uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len);
+void dictEmpty(dict *d, void(callback)(dict*));
+void dictSetResizeEnabled(dictResizeEnable enable);
+int dictRehash(dict *d, int n);
+int dictRehashMilliseconds(dict *d, int ms);
+void dictSetHashFunctionSeed(uint8_t *seed);
+uint8_t *dictGetHashFunctionSeed(void);
+unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);
+unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
+uint64_t dictGetHash(dict *d, const void *key);
+dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);
+
+#ifdef REDIS_TEST
+int dictTest(int argc, char *argv[], int flags);
+#endif
+
+#endif /* __DICT_H */
diff --git a/src/endianconv.c b/src/endianconv.c
new file mode 100644
index 0000000..8eb6b22
--- /dev/null
+++ b/src/endianconv.c
@@ -0,0 +1,129 @@
+/* endinconv.c -- Endian conversions utilities.
+ *
+ * This functions are never called directly, but always using the macros
+ * defined into endianconv.h, this way we define everything is a non-operation
+ * if the arch is already little endian.
+ *
+ * Redis tries to encode everything as little endian (but a few things that need
+ * to be backward compatible are still in big endian) because most of the
+ * production environments are little endian, and we have a lot of conversions
+ * in a few places because ziplists, intsets, zipmaps, need to be endian-neutral
+ * even in memory, since they are serialized on RDB files directly with a single
+ * write(2) without other additional steps.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2011-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <stdint.h>
+
+/* Toggle the 16 bit unsigned integer pointed by *p from little endian to
+ * big endian */
+void memrev16(void *p) {
+ unsigned char *x = p, t;
+
+ t = x[0];
+ x[0] = x[1];
+ x[1] = t;
+}
+
+/* Toggle the 32 bit unsigned integer pointed by *p from little endian to
+ * big endian */
+void memrev32(void *p) {
+ unsigned char *x = p, t;
+
+ t = x[0];
+ x[0] = x[3];
+ x[3] = t;
+ t = x[1];
+ x[1] = x[2];
+ x[2] = t;
+}
+
+/* Toggle the 64 bit unsigned integer pointed by *p from little endian to
+ * big endian */
+void memrev64(void *p) {
+ unsigned char *x = p, t;
+
+ t = x[0];
+ x[0] = x[7];
+ x[7] = t;
+ t = x[1];
+ x[1] = x[6];
+ x[6] = t;
+ t = x[2];
+ x[2] = x[5];
+ x[5] = t;
+ t = x[3];
+ x[3] = x[4];
+ x[4] = t;
+}
+
+uint16_t intrev16(uint16_t v) {
+ memrev16(&v);
+ return v;
+}
+
+uint32_t intrev32(uint32_t v) {
+ memrev32(&v);
+ return v;
+}
+
+uint64_t intrev64(uint64_t v) {
+ memrev64(&v);
+ return v;
+}
+
+#ifdef REDIS_TEST
+#include <stdio.h>
+
+#define UNUSED(x) (void)(x)
+int endianconvTest(int argc, char *argv[], int flags) {
+ char buf[32];
+
+ UNUSED(argc);
+ UNUSED(argv);
+ UNUSED(flags);
+
+ snprintf(buf,sizeof(buf),"ciaoroma");
+ memrev16(buf);
+ printf("%s\n", buf);
+
+ snprintf(buf,sizeof(buf),"ciaoroma");
+ memrev32(buf);
+ printf("%s\n", buf);
+
+ snprintf(buf,sizeof(buf),"ciaoroma");
+ memrev64(buf);
+ printf("%s\n", buf);
+
+ return 0;
+}
+#endif
diff --git a/src/endianconv.h b/src/endianconv.h
new file mode 100644
index 0000000..bfe9b7d
--- /dev/null
+++ b/src/endianconv.h
@@ -0,0 +1,78 @@
+/* See endianconv.c top comments for more information
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2011-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ENDIANCONV_H
+#define __ENDIANCONV_H
+
+#include "config.h"
+#include <stdint.h>
+
+void memrev16(void *p);
+void memrev32(void *p);
+void memrev64(void *p);
+uint16_t intrev16(uint16_t v);
+uint32_t intrev32(uint32_t v);
+uint64_t intrev64(uint64_t v);
+
+/* variants of the function doing the actual conversion only if the target
+ * host is big endian */
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+#define memrev16ifbe(p) ((void)(0))
+#define memrev32ifbe(p) ((void)(0))
+#define memrev64ifbe(p) ((void)(0))
+#define intrev16ifbe(v) (v)
+#define intrev32ifbe(v) (v)
+#define intrev64ifbe(v) (v)
+#else
+#define memrev16ifbe(p) memrev16(p)
+#define memrev32ifbe(p) memrev32(p)
+#define memrev64ifbe(p) memrev64(p)
+#define intrev16ifbe(v) intrev16(v)
+#define intrev32ifbe(v) intrev32(v)
+#define intrev64ifbe(v) intrev64(v)
+#endif
+
+/* The functions htonu64() and ntohu64() convert the specified value to
+ * network byte ordering and back. In big endian systems they are no-ops. */
+#if (BYTE_ORDER == BIG_ENDIAN)
+#define htonu64(v) (v)
+#define ntohu64(v) (v)
+#else
+#define htonu64(v) intrev64(v)
+#define ntohu64(v) intrev64(v)
+#endif
+
+#ifdef REDIS_TEST
+int endianconvTest(int argc, char *argv[], int flags);
+#endif
+
+#endif
diff --git a/src/eval.c b/src/eval.c
new file mode 100644
index 0000000..eb4b529
--- /dev/null
+++ b/src/eval.c
@@ -0,0 +1,1667 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "sha1.h"
+#include "rand.h"
+#include "cluster.h"
+#include "monotonic.h"
+#include "resp_parser.h"
+#include "script_lua.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+#include <lualib.h>
+#include <ctype.h>
+#include <math.h>
+
+void ldbInit(void);
+void ldbDisable(client *c);
+void ldbEnable(client *c);
+void evalGenericCommandWithDebugging(client *c, int evalsha);
+sds ldbCatStackValue(sds s, lua_State *lua, int idx);
+
+static void dictLuaScriptDestructor(dict *d, void *val) {
+ UNUSED(d);
+ if (val == NULL) return; /* Lazy freeing will set value to NULL. */
+ decrRefCount(((luaScript*)val)->body);
+ zfree(val);
+}
+
+static uint64_t dictStrCaseHash(const void *key) {
+ return dictGenCaseHashFunction((unsigned char*)key, strlen((char*)key));
+}
+
+/* server.lua_scripts sha (as sds string) -> scripts (as luaScript) cache. */
+dictType shaScriptObjectDictType = {
+ dictStrCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictLuaScriptDestructor, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Lua context */
+struct luaCtx {
+ lua_State *lua; /* The Lua interpreter. We use just one for all clients */
+ client *lua_client; /* The "fake client" to query Redis from Lua */
+ dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */
+ unsigned long long lua_scripts_mem; /* Cached scripts' memory + oh */
+} lctx;
+
+/* Debugger shared state is stored inside this global structure. */
+#define LDB_BREAKPOINTS_MAX 64 /* Max number of breakpoints. */
+#define LDB_MAX_LEN_DEFAULT 256 /* Default len limit for replies / var dumps. */
+struct ldbState {
+ connection *conn; /* Connection of the debugging client. */
+ int active; /* Are we debugging EVAL right now? */
+ int forked; /* Is this a fork()ed debugging session? */
+ list *logs; /* List of messages to send to the client. */
+ list *traces; /* Messages about Redis commands executed since last stop.*/
+ list *children; /* All forked debugging sessions pids. */
+ int bp[LDB_BREAKPOINTS_MAX]; /* An array of breakpoints line numbers. */
+ int bpcount; /* Number of valid entries inside bp. */
+ int step; /* Stop at next line regardless of breakpoints. */
+ int luabp; /* Stop at next line because redis.breakpoint() was called. */
+ sds *src; /* Lua script source code split by line. */
+ int lines; /* Number of lines in 'src'. */
+ int currentline; /* Current line number. */
+ sds cbuf; /* Debugger client command buffer. */
+ size_t maxlen; /* Max var dump / reply length. */
+ int maxlen_hint_sent; /* Did we already hint about "set maxlen"? */
+} ldb;
+
+/* ---------------------------------------------------------------------------
+ * Utility functions.
+ * ------------------------------------------------------------------------- */
+
+/* Perform the SHA1 of the input string. We use this both for hashing script
+ * bodies in order to obtain the Lua function name, and in the implementation
+ * of redis.sha1().
+ *
+ * 'digest' should point to a 41 bytes buffer: 40 for SHA1 converted into an
+ * hexadecimal number, plus 1 byte for null term. */
+void sha1hex(char *digest, char *script, size_t len) {
+ SHA1_CTX ctx;
+ unsigned char hash[20];
+ char *cset = "0123456789abcdef";
+ int j;
+
+ SHA1Init(&ctx);
+ SHA1Update(&ctx,(unsigned char*)script,len);
+ SHA1Final(hash,&ctx);
+
+ for (j = 0; j < 20; j++) {
+ digest[j*2] = cset[((hash[j]&0xF0)>>4)];
+ digest[j*2+1] = cset[(hash[j]&0xF)];
+ }
+ digest[40] = '\0';
+}
+
+/* redis.breakpoint()
+ *
+ * Allows to stop execution during a debugging session from within
+ * the Lua code implementation, like if a breakpoint was set in the code
+ * immediately after the function. */
+int luaRedisBreakpointCommand(lua_State *lua) {
+ if (ldb.active) {
+ ldb.luabp = 1;
+ lua_pushboolean(lua,1);
+ } else {
+ lua_pushboolean(lua,0);
+ }
+ return 1;
+}
+
+/* redis.debug()
+ *
+ * Log a string message into the output console.
+ * Can take multiple arguments that will be separated by commas.
+ * Nothing is returned to the caller. */
+int luaRedisDebugCommand(lua_State *lua) {
+ if (!ldb.active) return 0;
+ int argc = lua_gettop(lua);
+ sds log = sdscatprintf(sdsempty(),"<debug> line %d: ", ldb.currentline);
+ while(argc--) {
+ log = ldbCatStackValue(log,lua,-1 - argc);
+ if (argc != 0) log = sdscatlen(log,", ",2);
+ }
+ ldbLog(log);
+ return 0;
+}
+
+/* redis.replicate_commands()
+ *
+ * DEPRECATED: Now do nothing and always return true.
+ * Turn on single commands replication if the script never called
+ * a write command so far, and returns true. Otherwise if the script
+ * already started to write, returns false and stick to whole scripts
+ * replication, which is our default. */
+int luaRedisReplicateCommandsCommand(lua_State *lua) {
+ lua_pushboolean(lua,1);
+ return 1;
+}
+
+/* Initialize the scripting environment.
+ *
+ * This function is called the first time at server startup with
+ * the 'setup' argument set to 1.
+ *
+ * It can be called again multiple times during the lifetime of the Redis
+ * process, with 'setup' set to 0, and following a scriptingRelease() call,
+ * in order to reset the Lua scripting environment.
+ *
+ * However it is simpler to just call scriptingReset() that does just that. */
+void scriptingInit(int setup) {
+ lua_State *lua = lua_open();
+
+ if (setup) {
+ lctx.lua_client = NULL;
+ server.script_disable_deny_script = 0;
+ ldbInit();
+ }
+
+ /* Initialize a dictionary we use to map SHAs to scripts.
+ * This is useful for replication, as we need to replicate EVALSHA
+ * as EVAL, so we need to remember the associated script. */
+ lctx.lua_scripts = dictCreate(&shaScriptObjectDictType);
+ lctx.lua_scripts_mem = 0;
+
+ luaRegisterRedisAPI(lua);
+
+ /* register debug commands */
+ lua_getglobal(lua,"redis");
+
+ /* redis.breakpoint */
+ lua_pushstring(lua,"breakpoint");
+ lua_pushcfunction(lua,luaRedisBreakpointCommand);
+ lua_settable(lua,-3);
+
+ /* redis.debug */
+ lua_pushstring(lua,"debug");
+ lua_pushcfunction(lua,luaRedisDebugCommand);
+ lua_settable(lua,-3);
+
+ /* redis.replicate_commands */
+ lua_pushstring(lua, "replicate_commands");
+ lua_pushcfunction(lua, luaRedisReplicateCommandsCommand);
+ lua_settable(lua, -3);
+
+ lua_setglobal(lua,"redis");
+
+ /* Add a helper function we use for pcall error reporting.
+ * Note that when the error is in the C function we want to report the
+ * information about the caller, that's what makes sense from the point
+ * of view of the user debugging a script. */
+ {
+ char *errh_func = "local dbg = debug\n"
+ "debug = nil\n"
+ "function __redis__err__handler(err)\n"
+ " local i = dbg.getinfo(2,'nSl')\n"
+ " if i and i.what == 'C' then\n"
+ " i = dbg.getinfo(3,'nSl')\n"
+ " end\n"
+ " if type(err) ~= 'table' then\n"
+ " err = {err='ERR ' .. tostring(err)}"
+ " end"
+ " if i then\n"
+ " err['source'] = i.source\n"
+ " err['line'] = i.currentline\n"
+ " end"
+ " return err\n"
+ "end\n";
+ luaL_loadbuffer(lua,errh_func,strlen(errh_func),"@err_handler_def");
+ lua_pcall(lua,0,0,0);
+ }
+
+ /* Create the (non connected) client that we use to execute Redis commands
+ * inside the Lua interpreter.
+ * Note: there is no need to create it again when this function is called
+ * by scriptingReset(). */
+ if (lctx.lua_client == NULL) {
+ lctx.lua_client = createClient(NULL);
+ lctx.lua_client->flags |= CLIENT_SCRIPT;
+
+ /* We do not want to allow blocking commands inside Lua */
+ lctx.lua_client->flags |= CLIENT_DENY_BLOCKING;
+ }
+
+ /* Lock the global table from any changes */
+ lua_pushvalue(lua, LUA_GLOBALSINDEX);
+ luaSetErrorMetatable(lua);
+ /* Recursively lock all tables that can be reached from the global table */
+ luaSetTableProtectionRecursively(lua);
+ lua_pop(lua, 1);
+
+ lctx.lua = lua;
+}
+
+/* Release resources related to Lua scripting.
+ * This function is used in order to reset the scripting environment. */
+void scriptingRelease(int async) {
+ if (async)
+ freeLuaScriptsAsync(lctx.lua_scripts);
+ else
+ dictRelease(lctx.lua_scripts);
+ lctx.lua_scripts_mem = 0;
+ lua_close(lctx.lua);
+}
+
+void scriptingReset(int async) {
+ scriptingRelease(async);
+ scriptingInit(0);
+}
+
+/* ---------------------------------------------------------------------------
+ * EVAL and SCRIPT commands implementation
+ * ------------------------------------------------------------------------- */
+
+static void evalCalcFunctionName(int evalsha, sds script, char *out_funcname) {
+ /* We obtain the script SHA1, then check if this function is already
+ * defined into the Lua state */
+ out_funcname[0] = 'f';
+ out_funcname[1] = '_';
+ if (!evalsha) {
+ /* Hash the code if this is an EVAL call */
+ sha1hex(out_funcname+2,script,sdslen(script));
+ } else {
+ /* We already have the SHA if it is an EVALSHA */
+ int j;
+ char *sha = script;
+
+ /* Convert to lowercase. We don't use tolower since the function
+ * managed to always show up in the profiler output consuming
+ * a non trivial amount of time. */
+ for (j = 0; j < 40; j++)
+ out_funcname[j+2] = (sha[j] >= 'A' && sha[j] <= 'Z') ?
+ sha[j]+('a'-'A') : sha[j];
+ out_funcname[42] = '\0';
+ }
+}
+
+/* Helper function to try and extract shebang flags from the script body.
+ * If no shebang is found, return with success and COMPAT mode flag.
+ * The err arg is optional, can be used to get a detailed error string.
+ * The out_shebang_len arg is optional, can be used to trim the shebang from the script.
+ * Returns C_OK on success, and C_ERR on error. */
+int evalExtractShebangFlags(sds body, uint64_t *out_flags, ssize_t *out_shebang_len, sds *err) {
+ ssize_t shebang_len = 0;
+ uint64_t script_flags = SCRIPT_FLAG_EVAL_COMPAT_MODE;
+ if (!strncmp(body, "#!", 2)) {
+ int numparts,j;
+ char *shebang_end = strchr(body, '\n');
+ if (shebang_end == NULL) {
+ if (err)
+ *err = sdsnew("Invalid script shebang");
+ return C_ERR;
+ }
+ shebang_len = shebang_end - body;
+ sds shebang = sdsnewlen(body, shebang_len);
+ sds *parts = sdssplitargs(shebang, &numparts);
+ sdsfree(shebang);
+ if (!parts || numparts == 0) {
+ if (err)
+ *err = sdsnew("Invalid engine in script shebang");
+ sdsfreesplitres(parts, numparts);
+ return C_ERR;
+ }
+ /* Verify lua interpreter was specified */
+ if (strcmp(parts[0], "#!lua")) {
+ if (err)
+ *err = sdscatfmt(sdsempty(), "Unexpected engine in script shebang: %s", parts[0]);
+ sdsfreesplitres(parts, numparts);
+ return C_ERR;
+ }
+ script_flags &= ~SCRIPT_FLAG_EVAL_COMPAT_MODE;
+ for (j = 1; j < numparts; j++) {
+ if (!strncmp(parts[j], "flags=", 6)) {
+ sdsrange(parts[j], 6, -1);
+ int numflags, jj;
+ sds *flags = sdssplitlen(parts[j], sdslen(parts[j]), ",", 1, &numflags);
+ for (jj = 0; jj < numflags; jj++) {
+ scriptFlag *sf;
+ for (sf = scripts_flags_def; sf->flag; sf++) {
+ if (!strcmp(flags[jj], sf->str)) break;
+ }
+ if (!sf->flag) {
+ if (err)
+ *err = sdscatfmt(sdsempty(), "Unexpected flag in script shebang: %s", flags[jj]);
+ sdsfreesplitres(flags, numflags);
+ sdsfreesplitres(parts, numparts);
+ return C_ERR;
+ }
+ script_flags |= sf->flag;
+ }
+ sdsfreesplitres(flags, numflags);
+ } else {
+ /* We only support function flags options for lua scripts */
+ if (err)
+ *err = sdscatfmt(sdsempty(), "Unknown lua shebang option: %s", parts[j]);
+ sdsfreesplitres(parts, numparts);
+ return C_ERR;
+ }
+ }
+ sdsfreesplitres(parts, numparts);
+ }
+ if (out_shebang_len)
+ *out_shebang_len = shebang_len;
+ *out_flags = script_flags;
+ return C_OK;
+}
+
+/* Try to extract command flags if we can, returns the modified flags.
+ * Note that it does not guarantee the command arguments are right. */
+uint64_t evalGetCommandFlags(client *c, uint64_t cmd_flags) {
+ char funcname[43];
+ int evalsha = c->cmd->proc == evalShaCommand || c->cmd->proc == evalShaRoCommand;
+ if (evalsha && sdslen(c->argv[1]->ptr) != 40)
+ return cmd_flags;
+ uint64_t script_flags;
+ evalCalcFunctionName(evalsha, c->argv[1]->ptr, funcname);
+ char *lua_cur_script = funcname + 2;
+ c->cur_script = dictFind(lctx.lua_scripts, lua_cur_script);
+ if (!c->cur_script) {
+ if (evalsha)
+ return cmd_flags;
+ if (evalExtractShebangFlags(c->argv[1]->ptr, &script_flags, NULL, NULL) == C_ERR)
+ return cmd_flags;
+ } else {
+ luaScript *l = dictGetVal(c->cur_script);
+ script_flags = l->flags;
+ }
+ if (script_flags & SCRIPT_FLAG_EVAL_COMPAT_MODE)
+ return cmd_flags;
+ return scriptFlagsToCmdFlags(cmd_flags, script_flags);
+}
+
+/* Define a Lua function with the specified body.
+ * The function name will be generated in the following form:
+ *
+ * f_<hex sha1 sum>
+ *
+ * The function increments the reference count of the 'body' object as a
+ * side effect of a successful call.
+ *
+ * On success a pointer to an SDS string representing the function SHA1 of the
+ * just added function is returned (and will be valid until the next call
+ * to scriptingReset() function), otherwise NULL is returned.
+ *
+ * The function handles the fact of being called with a script that already
+ * exists, and in such a case, it behaves like in the success case.
+ *
+ * If 'c' is not NULL, on error the client is informed with an appropriate
+ * error describing the nature of the problem and the Lua interpreter error. */
+sds luaCreateFunction(client *c, robj *body) {
+ char funcname[43];
+ dictEntry *de;
+ uint64_t script_flags;
+
+ funcname[0] = 'f';
+ funcname[1] = '_';
+ sha1hex(funcname+2,body->ptr,sdslen(body->ptr));
+
+ if ((de = dictFind(lctx.lua_scripts,funcname+2)) != NULL) {
+ return dictGetKey(de);
+ }
+
+ /* Handle shebang header in script code */
+ ssize_t shebang_len = 0;
+ sds err = NULL;
+ if (evalExtractShebangFlags(body->ptr, &script_flags, &shebang_len, &err) == C_ERR) {
+ addReplyErrorSds(c, err);
+ return NULL;
+ }
+
+ /* Note that in case of a shebang line we skip it but keep the line feed to conserve the user's line numbers */
+ if (luaL_loadbuffer(lctx.lua,(char*)body->ptr + shebang_len,sdslen(body->ptr) - shebang_len,"@user_script")) {
+ if (c != NULL) {
+ addReplyErrorFormat(c,
+ "Error compiling script (new function): %s",
+ lua_tostring(lctx.lua,-1));
+ }
+ lua_pop(lctx.lua,1);
+ return NULL;
+ }
+
+ serverAssert(lua_isfunction(lctx.lua, -1));
+
+ lua_setfield(lctx.lua, LUA_REGISTRYINDEX, funcname);
+
+ /* We also save a SHA1 -> Original script map in a dictionary
+ * so that we can replicate / write in the AOF all the
+ * EVALSHA commands as EVAL using the original script. */
+ luaScript *l = zcalloc(sizeof(luaScript));
+ l->body = body;
+ l->flags = script_flags;
+ sds sha = sdsnewlen(funcname+2,40);
+ int retval = dictAdd(lctx.lua_scripts,sha,l);
+ serverAssertWithInfo(c ? c : lctx.lua_client,NULL,retval == DICT_OK);
+ lctx.lua_scripts_mem += sdsZmallocSize(sha) + getStringObjectSdsUsedMemory(body);
+ incrRefCount(body);
+ return sha;
+}
+
+void evalGenericCommand(client *c, int evalsha) {
+ lua_State *lua = lctx.lua;
+ char funcname[43];
+ long long numkeys;
+
+ /* Get the number of arguments that are keys */
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&numkeys,NULL) != C_OK)
+ return;
+ if (numkeys > (c->argc - 3)) {
+ addReplyError(c,"Number of keys can't be greater than number of args");
+ return;
+ } else if (numkeys < 0) {
+ addReplyError(c,"Number of keys can't be negative");
+ return;
+ }
+
+ if (c->cur_script) {
+ funcname[0] = 'f', funcname[1] = '_';
+ memcpy(funcname+2, dictGetKey(c->cur_script), 40);
+ funcname[42] = '\0';
+ } else
+ evalCalcFunctionName(evalsha, c->argv[1]->ptr, funcname);
+
+ /* Push the pcall error handler function on the stack. */
+ lua_getglobal(lua, "__redis__err__handler");
+
+ /* Try to lookup the Lua function */
+ lua_getfield(lua, LUA_REGISTRYINDEX, funcname);
+ if (lua_isnil(lua,-1)) {
+ lua_pop(lua,1); /* remove the nil from the stack */
+ /* Function not defined... let's define it if we have the
+ * body of the function. If this is an EVALSHA call we can just
+ * return an error. */
+ if (evalsha) {
+ lua_pop(lua,1); /* remove the error handler from the stack. */
+ addReplyErrorObject(c, shared.noscripterr);
+ return;
+ }
+ if (luaCreateFunction(c,c->argv[1]) == NULL) {
+ lua_pop(lua,1); /* remove the error handler from the stack. */
+ /* The error is sent to the client by luaCreateFunction()
+ * itself when it returns NULL. */
+ return;
+ }
+ /* Now the following is guaranteed to return non nil */
+ lua_getfield(lua, LUA_REGISTRYINDEX, funcname);
+ serverAssert(!lua_isnil(lua,-1));
+ }
+
+ char *lua_cur_script = funcname + 2;
+ dictEntry *de = c->cur_script;
+ if (!de)
+ de = dictFind(lctx.lua_scripts, lua_cur_script);
+ luaScript *l = dictGetVal(de);
+ int ro = c->cmd->proc == evalRoCommand || c->cmd->proc == evalShaRoCommand;
+
+ scriptRunCtx rctx;
+ if (scriptPrepareForRun(&rctx, lctx.lua_client, c, lua_cur_script, l->flags, ro) != C_OK) {
+ lua_pop(lua,2); /* Remove the function and error handler. */
+ return;
+ }
+ rctx.flags |= SCRIPT_EVAL_MODE; /* mark the current run as EVAL (as opposed to FCALL) so we'll
+ get appropriate error messages and logs */
+
+ luaCallFunction(&rctx, lua, c->argv+3, numkeys, c->argv+3+numkeys, c->argc-3-numkeys, ldb.active);
+ lua_pop(lua,1); /* Remove the error handler. */
+ scriptResetRun(&rctx);
+}
+
+void evalCommand(client *c) {
+ /* Explicitly feed monitor here so that lua commands appear after their
+ * script command. */
+ replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
+ if (!(c->flags & CLIENT_LUA_DEBUG))
+ evalGenericCommand(c,0);
+ else
+ evalGenericCommandWithDebugging(c,0);
+}
+
+void evalRoCommand(client *c) {
+ evalCommand(c);
+}
+
+void evalShaCommand(client *c) {
+ /* Explicitly feed monitor here so that lua commands appear after their
+ * script command. */
+ replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
+ if (sdslen(c->argv[1]->ptr) != 40) {
+ /* We know that a match is not possible if the provided SHA is
+ * not the right length. So we return an error ASAP, this way
+ * evalGenericCommand() can be implemented without string length
+ * sanity check */
+ addReplyErrorObject(c, shared.noscripterr);
+ return;
+ }
+ if (!(c->flags & CLIENT_LUA_DEBUG))
+ evalGenericCommand(c,1);
+ else {
+ addReplyError(c,"Please use EVAL instead of EVALSHA for debugging");
+ return;
+ }
+}
+
+void evalShaRoCommand(client *c) {
+ evalShaCommand(c);
+}
+
+void scriptCommand(client *c) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"DEBUG (YES|SYNC|NO)",
+" Set the debug mode for subsequent scripts executed.",
+"EXISTS <sha1> [<sha1> ...]",
+" Return information about the existence of the scripts in the script cache.",
+"FLUSH [ASYNC|SYNC]",
+" Flush the Lua scripts cache. Very dangerous on replicas.",
+" When called without the optional mode argument, the behavior is determined by the",
+" lazyfree-lazy-user-flush configuration directive. Valid modes are:",
+" * ASYNC: Asynchronously flush the scripts cache.",
+" * SYNC: Synchronously flush the scripts cache.",
+"KILL",
+" Kill the currently executing Lua script.",
+"LOAD <script>",
+" Load a script into the scripts cache without executing it.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (c->argc >= 2 && !strcasecmp(c->argv[1]->ptr,"flush")) {
+ int async = 0;
+ if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr,"sync")) {
+ async = 0;
+ } else if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr,"async")) {
+ async = 1;
+ } else if (c->argc == 2) {
+ async = server.lazyfree_lazy_user_flush ? 1 : 0;
+ } else {
+ addReplyError(c,"SCRIPT FLUSH only support SYNC|ASYNC option");
+ return;
+ }
+ scriptingReset(async);
+ addReply(c,shared.ok);
+ } else if (c->argc >= 2 && !strcasecmp(c->argv[1]->ptr,"exists")) {
+ int j;
+
+ addReplyArrayLen(c, c->argc-2);
+ for (j = 2; j < c->argc; j++) {
+ if (dictFind(lctx.lua_scripts,c->argv[j]->ptr))
+ addReply(c,shared.cone);
+ else
+ addReply(c,shared.czero);
+ }
+ } else if (c->argc == 3 && !strcasecmp(c->argv[1]->ptr,"load")) {
+ sds sha = luaCreateFunction(c,c->argv[2]);
+ if (sha == NULL) return; /* The error was sent by luaCreateFunction(). */
+ addReplyBulkCBuffer(c,sha,40);
+ } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"kill")) {
+ scriptKill(c, 1);
+ } else if (c->argc == 3 && !strcasecmp(c->argv[1]->ptr,"debug")) {
+ if (clientHasPendingReplies(c)) {
+ addReplyError(c,"SCRIPT DEBUG must be called outside a pipeline");
+ return;
+ }
+ if (!strcasecmp(c->argv[2]->ptr,"no")) {
+ ldbDisable(c);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"yes")) {
+ ldbEnable(c);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"sync")) {
+ ldbEnable(c);
+ addReply(c,shared.ok);
+ c->flags |= CLIENT_LUA_DEBUG_SYNC;
+ } else {
+ addReplyError(c,"Use SCRIPT DEBUG YES/SYNC/NO");
+ return;
+ }
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
+
+unsigned long evalMemory(void) {
+ return luaMemory(lctx.lua);
+}
+
+dict* evalScriptsDict(void) {
+ return lctx.lua_scripts;
+}
+
+unsigned long evalScriptsMemory(void) {
+ return lctx.lua_scripts_mem +
+ dictMemUsage(lctx.lua_scripts) +
+ dictSize(lctx.lua_scripts) * sizeof(luaScript);
+}
+
+/* ---------------------------------------------------------------------------
+ * LDB: Redis Lua debugging facilities
+ * ------------------------------------------------------------------------- */
+
+/* Initialize Lua debugger data structures. */
+void ldbInit(void) {
+ ldb.conn = NULL;
+ ldb.active = 0;
+ ldb.logs = listCreate();
+ listSetFreeMethod(ldb.logs,(void (*)(void*))sdsfree);
+ ldb.children = listCreate();
+ ldb.src = NULL;
+ ldb.lines = 0;
+ ldb.cbuf = sdsempty();
+}
+
+/* Remove all the pending messages in the specified list. */
+void ldbFlushLog(list *log) {
+ listNode *ln;
+
+ while((ln = listFirst(log)) != NULL)
+ listDelNode(log,ln);
+}
+
+int ldbIsEnabled(void){
+ return ldb.active && ldb.step;
+}
+
+/* Enable debug mode of Lua scripts for this client. */
+void ldbEnable(client *c) {
+ c->flags |= CLIENT_LUA_DEBUG;
+ ldbFlushLog(ldb.logs);
+ ldb.conn = c->conn;
+ ldb.step = 1;
+ ldb.bpcount = 0;
+ ldb.luabp = 0;
+ sdsfree(ldb.cbuf);
+ ldb.cbuf = sdsempty();
+ ldb.maxlen = LDB_MAX_LEN_DEFAULT;
+ ldb.maxlen_hint_sent = 0;
+}
+
+/* Exit debugging mode from the POV of client. This function is not enough
+ * to properly shut down a client debugging session, see ldbEndSession()
+ * for more information. */
+void ldbDisable(client *c) {
+ c->flags &= ~(CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC);
+}
+
+/* Append a log entry to the specified LDB log. */
+void ldbLog(sds entry) {
+ listAddNodeTail(ldb.logs,entry);
+}
+
+/* A version of ldbLog() which prevents producing logs greater than
+ * ldb.maxlen. The first time the limit is reached a hint is generated
+ * to inform the user that reply trimming can be disabled using the
+ * debugger "maxlen" command. */
+void ldbLogWithMaxLen(sds entry) {
+ int trimmed = 0;
+ if (ldb.maxlen && sdslen(entry) > ldb.maxlen) {
+ sdsrange(entry,0,ldb.maxlen-1);
+ entry = sdscatlen(entry," ...",4);
+ trimmed = 1;
+ }
+ ldbLog(entry);
+ if (trimmed && ldb.maxlen_hint_sent == 0) {
+ ldb.maxlen_hint_sent = 1;
+ ldbLog(sdsnew(
+ "<hint> The above reply was trimmed. Use 'maxlen 0' to disable trimming."));
+ }
+}
+
+/* Send ldb.logs to the debugging client as a multi-bulk reply
+ * consisting of simple strings. Log entries which include newlines have them
+ * replaced with spaces. The entries sent are also consumed. */
+void ldbSendLogs(void) {
+ sds proto = sdsempty();
+ proto = sdscatfmt(proto,"*%i\r\n", (int)listLength(ldb.logs));
+ while(listLength(ldb.logs)) {
+ listNode *ln = listFirst(ldb.logs);
+ proto = sdscatlen(proto,"+",1);
+ sdsmapchars(ln->value,"\r\n"," ",2);
+ proto = sdscatsds(proto,ln->value);
+ proto = sdscatlen(proto,"\r\n",2);
+ listDelNode(ldb.logs,ln);
+ }
+ if (connWrite(ldb.conn,proto,sdslen(proto)) == -1) {
+ /* Avoid warning. We don't check the return value of write()
+ * since the next read() will catch the I/O error and will
+ * close the debugging session. */
+ }
+ sdsfree(proto);
+}
+
+/* Start a debugging session before calling EVAL implementation.
+ * The technique we use is to capture the client socket file descriptor,
+ * in order to perform direct I/O with it from within Lua hooks. This
+ * way we don't have to re-enter Redis in order to handle I/O.
+ *
+ * The function returns 1 if the caller should proceed to call EVAL,
+ * and 0 if instead the caller should abort the operation (this happens
+ * for the parent in a forked session, since it's up to the children
+ * to continue, or when fork returned an error).
+ *
+ * The caller should call ldbEndSession() only if ldbStartSession()
+ * returned 1. */
+int ldbStartSession(client *c) {
+ ldb.forked = (c->flags & CLIENT_LUA_DEBUG_SYNC) == 0;
+ if (ldb.forked) {
+ pid_t cp = redisFork(CHILD_TYPE_LDB);
+ if (cp == -1) {
+ addReplyErrorFormat(c,"Fork() failed: can't run EVAL in debugging mode: %s", strerror(errno));
+ return 0;
+ } else if (cp == 0) {
+ /* Child. Let's ignore important signals handled by the parent. */
+ struct sigaction act;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = SIG_IGN;
+ sigaction(SIGTERM, &act, NULL);
+ sigaction(SIGINT, &act, NULL);
+
+ /* Log the creation of the child and close the listening
+ * socket to make sure if the parent crashes a reset is sent
+ * to the clients. */
+ serverLog(LL_NOTICE,"Redis forked for debugging eval");
+ } else {
+ /* Parent */
+ listAddNodeTail(ldb.children,(void*)(unsigned long)cp);
+ freeClientAsync(c); /* Close the client in the parent side. */
+ return 0;
+ }
+ } else {
+ serverLog(LL_NOTICE,
+ "Redis synchronous debugging eval session started");
+ }
+
+ /* Setup our debugging session. */
+ connBlock(ldb.conn);
+ connSendTimeout(ldb.conn,5000);
+ ldb.active = 1;
+
+ /* First argument of EVAL is the script itself. We split it into different
+ * lines since this is the way the debugger accesses the source code. */
+ sds srcstring = sdsdup(c->argv[1]->ptr);
+ size_t srclen = sdslen(srcstring);
+ while(srclen && (srcstring[srclen-1] == '\n' ||
+ srcstring[srclen-1] == '\r'))
+ {
+ srcstring[--srclen] = '\0';
+ }
+ sdssetlen(srcstring,srclen);
+ ldb.src = sdssplitlen(srcstring,sdslen(srcstring),"\n",1,&ldb.lines);
+ sdsfree(srcstring);
+ return 1;
+}
+
+/* End a debugging session after the EVAL call with debugging enabled
+ * returned. */
+void ldbEndSession(client *c) {
+ /* Emit the remaining logs and an <endsession> mark. */
+ ldbLog(sdsnew("<endsession>"));
+ ldbSendLogs();
+
+ /* If it's a fork()ed session, we just exit. */
+ if (ldb.forked) {
+ writeToClient(c,0);
+ serverLog(LL_NOTICE,"Lua debugging session child exiting");
+ exitFromChild(0);
+ } else {
+ serverLog(LL_NOTICE,
+ "Redis synchronous debugging eval session ended");
+ }
+
+ /* Otherwise let's restore client's state. */
+ connNonBlock(ldb.conn);
+ connSendTimeout(ldb.conn,0);
+
+ /* Close the client connection after sending the final EVAL reply
+ * in order to signal the end of the debugging session. */
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+
+ /* Cleanup. */
+ sdsfreesplitres(ldb.src,ldb.lines);
+ ldb.lines = 0;
+ ldb.active = 0;
+}
+
+/* If the specified pid is among the list of children spawned for
+ * forked debugging sessions, it is removed from the children list.
+ * If the pid was found non-zero is returned. */
+int ldbRemoveChild(pid_t pid) {
+ listNode *ln = listSearchKey(ldb.children,(void*)(unsigned long)pid);
+ if (ln) {
+ listDelNode(ldb.children,ln);
+ return 1;
+ }
+ return 0;
+}
+
+/* Return the number of children we still did not receive termination
+ * acknowledge via wait() in the parent process. */
+int ldbPendingChildren(void) {
+ return listLength(ldb.children);
+}
+
+/* Kill all the forked sessions. */
+void ldbKillForkedSessions(void) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(ldb.children,&li);
+ while((ln = listNext(&li))) {
+ pid_t pid = (unsigned long) ln->value;
+ serverLog(LL_NOTICE,"Killing debugging session %ld",(long)pid);
+ kill(pid,SIGKILL);
+ }
+ listRelease(ldb.children);
+ ldb.children = listCreate();
+}
+
+/* Wrapper for EVAL / EVALSHA that enables debugging, and makes sure
+ * that when EVAL returns, whatever happened, the session is ended. */
+void evalGenericCommandWithDebugging(client *c, int evalsha) {
+ if (ldbStartSession(c)) {
+ evalGenericCommand(c,evalsha);
+ ldbEndSession(c);
+ } else {
+ ldbDisable(c);
+ }
+}
+
+/* Return a pointer to ldb.src source code line, considering line to be
+ * one-based, and returning a special string for out of range lines. */
+char *ldbGetSourceLine(int line) {
+ int idx = line-1;
+ if (idx < 0 || idx >= ldb.lines) return "<out of range source code line>";
+ return ldb.src[idx];
+}
+
+/* Return true if there is a breakpoint in the specified line. */
+int ldbIsBreakpoint(int line) {
+ int j;
+
+ for (j = 0; j < ldb.bpcount; j++)
+ if (ldb.bp[j] == line) return 1;
+ return 0;
+}
+
+/* Add the specified breakpoint. Ignore it if we already reached the max.
+ * Returns 1 if the breakpoint was added (or was already set). 0 if there is
+ * no space for the breakpoint or if the line is invalid. */
+int ldbAddBreakpoint(int line) {
+ if (line <= 0 || line > ldb.lines) return 0;
+ if (!ldbIsBreakpoint(line) && ldb.bpcount != LDB_BREAKPOINTS_MAX) {
+ ldb.bp[ldb.bpcount++] = line;
+ return 1;
+ }
+ return 0;
+}
+
+/* Remove the specified breakpoint, returning 1 if the operation was
+ * performed or 0 if there was no such breakpoint. */
+int ldbDelBreakpoint(int line) {
+ int j;
+
+ for (j = 0; j < ldb.bpcount; j++) {
+ if (ldb.bp[j] == line) {
+ ldb.bpcount--;
+ memmove(ldb.bp+j,ldb.bp+j+1,ldb.bpcount-j);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Expect a valid multi-bulk command in the debugging client query buffer.
+ * On success the command is parsed and returned as an array of SDS strings,
+ * otherwise NULL is returned and there is to read more buffer. */
+sds *ldbReplParseCommand(int *argcp, char** err) {
+ static char* protocol_error = "protocol error";
+ sds *argv = NULL;
+ int argc = 0;
+ if (sdslen(ldb.cbuf) == 0) return NULL;
+
+ /* Working on a copy is simpler in this case. We can modify it freely
+ * for the sake of simpler parsing. */
+ sds copy = sdsdup(ldb.cbuf);
+ char *p = copy;
+
+ /* This Redis protocol parser is a joke... just the simplest thing that
+ * works in this context. It is also very forgiving regarding broken
+ * protocol. */
+
+ /* Seek and parse *<count>\r\n. */
+ p = strchr(p,'*'); if (!p) goto protoerr;
+ char *plen = p+1; /* Multi bulk len pointer. */
+ p = strstr(p,"\r\n"); if (!p) goto keep_reading;
+ *p = '\0'; p += 2;
+ *argcp = atoi(plen);
+ if (*argcp <= 0 || *argcp > 1024) goto protoerr;
+
+ /* Parse each argument. */
+ argv = zmalloc(sizeof(sds)*(*argcp));
+ argc = 0;
+ while(argc < *argcp) {
+ /* reached the end but there should be more data to read */
+ if (*p == '\0') goto keep_reading;
+
+ if (*p != '$') goto protoerr;
+ plen = p+1; /* Bulk string len pointer. */
+ p = strstr(p,"\r\n"); if (!p) goto keep_reading;
+ *p = '\0'; p += 2;
+ int slen = atoi(plen); /* Length of this arg. */
+ if (slen <= 0 || slen > 1024) goto protoerr;
+ if ((size_t)(p + slen + 2 - copy) > sdslen(copy) ) goto keep_reading;
+ argv[argc++] = sdsnewlen(p,slen);
+ p += slen; /* Skip the already parsed argument. */
+ if (p[0] != '\r' || p[1] != '\n') goto protoerr;
+ p += 2; /* Skip \r\n. */
+ }
+ sdsfree(copy);
+ return argv;
+
+protoerr:
+ *err = protocol_error;
+keep_reading:
+ sdsfreesplitres(argv,argc);
+ sdsfree(copy);
+ return NULL;
+}
+
+/* Log the specified line in the Lua debugger output. */
+void ldbLogSourceLine(int lnum) {
+ char *line = ldbGetSourceLine(lnum);
+ char *prefix;
+ int bp = ldbIsBreakpoint(lnum);
+ int current = ldb.currentline == lnum;
+
+ if (current && bp)
+ prefix = "->#";
+ else if (current)
+ prefix = "-> ";
+ else if (bp)
+ prefix = " #";
+ else
+ prefix = " ";
+ sds thisline = sdscatprintf(sdsempty(),"%s%-3d %s", prefix, lnum, line);
+ ldbLog(thisline);
+}
+
+/* Implement the "list" command of the Lua debugger. If around is 0
+ * the whole file is listed, otherwise only a small portion of the file
+ * around the specified line is shown. When a line number is specified
+ * the amount of context (lines before/after) is specified via the
+ * 'context' argument. */
+void ldbList(int around, int context) {
+ int j;
+
+ for (j = 1; j <= ldb.lines; j++) {
+ if (around != 0 && abs(around-j) > context) continue;
+ ldbLogSourceLine(j);
+ }
+}
+
+/* Append a human readable representation of the Lua value at position 'idx'
+ * on the stack of the 'lua' state, to the SDS string passed as argument.
+ * The new SDS string with the represented value attached is returned.
+ * Used in order to implement ldbLogStackValue().
+ *
+ * The element is not automatically removed from the stack, nor it is
+ * converted to a different type. */
+#define LDB_MAX_VALUES_DEPTH (LUA_MINSTACK/2)
+sds ldbCatStackValueRec(sds s, lua_State *lua, int idx, int level) {
+ int t = lua_type(lua,idx);
+
+ if (level++ == LDB_MAX_VALUES_DEPTH)
+ return sdscat(s,"<max recursion level reached! Nested table?>");
+
+ switch(t) {
+ case LUA_TSTRING:
+ {
+ size_t strl;
+ char *strp = (char*)lua_tolstring(lua,idx,&strl);
+ s = sdscatrepr(s,strp,strl);
+ }
+ break;
+ case LUA_TBOOLEAN:
+ s = sdscat(s,lua_toboolean(lua,idx) ? "true" : "false");
+ break;
+ case LUA_TNUMBER:
+ s = sdscatprintf(s,"%g",(double)lua_tonumber(lua,idx));
+ break;
+ case LUA_TNIL:
+ s = sdscatlen(s,"nil",3);
+ break;
+ case LUA_TTABLE:
+ {
+ int expected_index = 1; /* First index we expect in an array. */
+ int is_array = 1; /* Will be set to null if check fails. */
+ /* Note: we create two representations at the same time, one
+ * assuming the table is an array, one assuming it is not. At the
+ * end we know what is true and select the right one. */
+ sds repr1 = sdsempty();
+ sds repr2 = sdsempty();
+ lua_pushnil(lua); /* The first key to start the iteration is nil. */
+ while (lua_next(lua,idx-1)) {
+ /* Test if so far the table looks like an array. */
+ if (is_array &&
+ (lua_type(lua,-2) != LUA_TNUMBER ||
+ lua_tonumber(lua,-2) != expected_index)) is_array = 0;
+ /* Stack now: table, key, value */
+ /* Array repr. */
+ repr1 = ldbCatStackValueRec(repr1,lua,-1,level);
+ repr1 = sdscatlen(repr1,"; ",2);
+ /* Full repr. */
+ repr2 = sdscatlen(repr2,"[",1);
+ repr2 = ldbCatStackValueRec(repr2,lua,-2,level);
+ repr2 = sdscatlen(repr2,"]=",2);
+ repr2 = ldbCatStackValueRec(repr2,lua,-1,level);
+ repr2 = sdscatlen(repr2,"; ",2);
+ lua_pop(lua,1); /* Stack: table, key. Ready for next iteration. */
+ expected_index++;
+ }
+ /* Strip the last " ;" from both the representations. */
+ if (sdslen(repr1)) sdsrange(repr1,0,-3);
+ if (sdslen(repr2)) sdsrange(repr2,0,-3);
+ /* Select the right one and discard the other. */
+ s = sdscatlen(s,"{",1);
+ s = sdscatsds(s,is_array ? repr1 : repr2);
+ s = sdscatlen(s,"}",1);
+ sdsfree(repr1);
+ sdsfree(repr2);
+ }
+ break;
+ case LUA_TFUNCTION:
+ case LUA_TUSERDATA:
+ case LUA_TTHREAD:
+ case LUA_TLIGHTUSERDATA:
+ {
+ const void *p = lua_topointer(lua,idx);
+ char *typename = "unknown";
+ if (t == LUA_TFUNCTION) typename = "function";
+ else if (t == LUA_TUSERDATA) typename = "userdata";
+ else if (t == LUA_TTHREAD) typename = "thread";
+ else if (t == LUA_TLIGHTUSERDATA) typename = "light-userdata";
+ s = sdscatprintf(s,"\"%s@%p\"",typename,p);
+ }
+ break;
+ default:
+ s = sdscat(s,"\"<unknown-lua-type>\"");
+ break;
+ }
+ return s;
+}
+
+/* Higher level wrapper for ldbCatStackValueRec() that just uses an initial
+ * recursion level of '0'. */
+sds ldbCatStackValue(sds s, lua_State *lua, int idx) {
+ return ldbCatStackValueRec(s,lua,idx,0);
+}
+
+/* Produce a debugger log entry representing the value of the Lua object
+ * currently on the top of the stack. The element is not popped nor modified.
+ * Check ldbCatStackValue() for the actual implementation. */
+void ldbLogStackValue(lua_State *lua, char *prefix) {
+ sds s = sdsnew(prefix);
+ s = ldbCatStackValue(s,lua,-1);
+ ldbLogWithMaxLen(s);
+}
+
+char *ldbRedisProtocolToHuman_Int(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Bulk(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Status(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_MultiBulk(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Set(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Map(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Null(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Bool(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Double(sds *o, char *reply);
+
+/* Get Redis protocol from 'reply' and appends it in human readable form to
+ * the passed SDS string 'o'.
+ *
+ * Note that the SDS string is passed by reference (pointer of pointer to
+ * char*) so that we can return a modified pointer, as for SDS semantics. */
+char *ldbRedisProtocolToHuman(sds *o, char *reply) {
+ char *p = reply;
+ switch(*p) {
+ case ':': p = ldbRedisProtocolToHuman_Int(o,reply); break;
+ case '$': p = ldbRedisProtocolToHuman_Bulk(o,reply); break;
+ case '+': p = ldbRedisProtocolToHuman_Status(o,reply); break;
+ case '-': p = ldbRedisProtocolToHuman_Status(o,reply); break;
+ case '*': p = ldbRedisProtocolToHuman_MultiBulk(o,reply); break;
+ case '~': p = ldbRedisProtocolToHuman_Set(o,reply); break;
+ case '%': p = ldbRedisProtocolToHuman_Map(o,reply); break;
+ case '_': p = ldbRedisProtocolToHuman_Null(o,reply); break;
+ case '#': p = ldbRedisProtocolToHuman_Bool(o,reply); break;
+ case ',': p = ldbRedisProtocolToHuman_Double(o,reply); break;
+ }
+ return p;
+}
+
+/* The following functions are helpers for ldbRedisProtocolToHuman(), each
+ * take care of a given Redis return type. */
+
+char *ldbRedisProtocolToHuman_Int(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ *o = sdscatlen(*o,reply+1,p-reply-1);
+ return p+2;
+}
+
+char *ldbRedisProtocolToHuman_Bulk(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ long long bulklen;
+
+ string2ll(reply+1,p-reply-1,&bulklen);
+ if (bulklen == -1) {
+ *o = sdscatlen(*o,"NULL",4);
+ return p+2;
+ } else {
+ *o = sdscatrepr(*o,p+2,bulklen);
+ return p+2+bulklen+2;
+ }
+}
+
+char *ldbRedisProtocolToHuman_Status(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+
+ *o = sdscatrepr(*o,reply,p-reply);
+ return p+2;
+}
+
+char *ldbRedisProtocolToHuman_MultiBulk(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ long long mbulklen;
+ int j = 0;
+
+ string2ll(reply+1,p-reply-1,&mbulklen);
+ p += 2;
+ if (mbulklen == -1) {
+ *o = sdscatlen(*o,"NULL",4);
+ return p;
+ }
+ *o = sdscatlen(*o,"[",1);
+ for (j = 0; j < mbulklen; j++) {
+ p = ldbRedisProtocolToHuman(o,p);
+ if (j != mbulklen-1) *o = sdscatlen(*o,",",1);
+ }
+ *o = sdscatlen(*o,"]",1);
+ return p;
+}
+
+char *ldbRedisProtocolToHuman_Set(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ long long mbulklen;
+ int j = 0;
+
+ string2ll(reply+1,p-reply-1,&mbulklen);
+ p += 2;
+ *o = sdscatlen(*o,"~(",2);
+ for (j = 0; j < mbulklen; j++) {
+ p = ldbRedisProtocolToHuman(o,p);
+ if (j != mbulklen-1) *o = sdscatlen(*o,",",1);
+ }
+ *o = sdscatlen(*o,")",1);
+ return p;
+}
+
+char *ldbRedisProtocolToHuman_Map(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ long long mbulklen;
+ int j = 0;
+
+ string2ll(reply+1,p-reply-1,&mbulklen);
+ p += 2;
+ *o = sdscatlen(*o,"{",1);
+ for (j = 0; j < mbulklen; j++) {
+ p = ldbRedisProtocolToHuman(o,p);
+ *o = sdscatlen(*o," => ",4);
+ p = ldbRedisProtocolToHuman(o,p);
+ if (j != mbulklen-1) *o = sdscatlen(*o,",",1);
+ }
+ *o = sdscatlen(*o,"}",1);
+ return p;
+}
+
+char *ldbRedisProtocolToHuman_Null(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ *o = sdscatlen(*o,"(null)",6);
+ return p+2;
+}
+
+char *ldbRedisProtocolToHuman_Bool(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ if (reply[1] == 't')
+ *o = sdscatlen(*o,"#true",5);
+ else
+ *o = sdscatlen(*o,"#false",6);
+ return p+2;
+}
+
+char *ldbRedisProtocolToHuman_Double(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ *o = sdscatlen(*o,"(double) ",9);
+ *o = sdscatlen(*o,reply+1,p-reply-1);
+ return p+2;
+}
+
+/* Log a Redis reply as debugger output, in a human readable format.
+ * If the resulting string is longer than 'len' plus a few more chars
+ * used as prefix, it gets truncated. */
+void ldbLogRedisReply(char *reply) {
+ sds log = sdsnew("<reply> ");
+ ldbRedisProtocolToHuman(&log,reply);
+ ldbLogWithMaxLen(log);
+}
+
+/* Implements the "print <var>" command of the Lua debugger. It scans for Lua
+ * var "varname" starting from the current stack frame up to the top stack
+ * frame. The first matching variable is printed. */
+void ldbPrint(lua_State *lua, char *varname) {
+ lua_Debug ar;
+
+ int l = 0; /* Stack level. */
+ while (lua_getstack(lua,l,&ar) != 0) {
+ l++;
+ const char *name;
+ int i = 1; /* Variable index. */
+ while((name = lua_getlocal(lua,&ar,i)) != NULL) {
+ i++;
+ if (strcmp(varname,name) == 0) {
+ ldbLogStackValue(lua,"<value> ");
+ lua_pop(lua,1);
+ return;
+ } else {
+ lua_pop(lua,1); /* Discard the var name on the stack. */
+ }
+ }
+ }
+
+ /* Let's try with global vars in two selected cases */
+ if (!strcmp(varname,"ARGV") || !strcmp(varname,"KEYS")) {
+ lua_getglobal(lua, varname);
+ ldbLogStackValue(lua,"<value> ");
+ lua_pop(lua,1);
+ } else {
+ ldbLog(sdsnew("No such variable."));
+ }
+}
+
+/* Implements the "print" command (without arguments) of the Lua debugger.
+ * Prints all the variables in the current stack frame. */
+void ldbPrintAll(lua_State *lua) {
+ lua_Debug ar;
+ int vars = 0;
+
+ if (lua_getstack(lua,0,&ar) != 0) {
+ const char *name;
+ int i = 1; /* Variable index. */
+ while((name = lua_getlocal(lua,&ar,i)) != NULL) {
+ i++;
+ if (!strstr(name,"(*temporary)")) {
+ sds prefix = sdscatprintf(sdsempty(),"<value> %s = ",name);
+ ldbLogStackValue(lua,prefix);
+ sdsfree(prefix);
+ vars++;
+ }
+ lua_pop(lua,1);
+ }
+ }
+
+ if (vars == 0) {
+ ldbLog(sdsnew("No local variables in the current context."));
+ }
+}
+
+/* Implements the break command to list, add and remove breakpoints. */
+void ldbBreak(sds *argv, int argc) {
+ if (argc == 1) {
+ if (ldb.bpcount == 0) {
+ ldbLog(sdsnew("No breakpoints set. Use 'b <line>' to add one."));
+ return;
+ } else {
+ ldbLog(sdscatfmt(sdsempty(),"%i breakpoints set:",ldb.bpcount));
+ int j;
+ for (j = 0; j < ldb.bpcount; j++)
+ ldbLogSourceLine(ldb.bp[j]);
+ }
+ } else {
+ int j;
+ for (j = 1; j < argc; j++) {
+ char *arg = argv[j];
+ long line;
+ if (!string2l(arg,sdslen(arg),&line)) {
+ ldbLog(sdscatfmt(sdsempty(),"Invalid argument:'%s'",arg));
+ } else {
+ if (line == 0) {
+ ldb.bpcount = 0;
+ ldbLog(sdsnew("All breakpoints removed."));
+ } else if (line > 0) {
+ if (ldb.bpcount == LDB_BREAKPOINTS_MAX) {
+ ldbLog(sdsnew("Too many breakpoints set."));
+ } else if (ldbAddBreakpoint(line)) {
+ ldbList(line,1);
+ } else {
+ ldbLog(sdsnew("Wrong line number."));
+ }
+ } else if (line < 0) {
+ if (ldbDelBreakpoint(-line))
+ ldbLog(sdsnew("Breakpoint removed."));
+ else
+ ldbLog(sdsnew("No breakpoint in the specified line."));
+ }
+ }
+ }
+ }
+}
+
+/* Implements the Lua debugger "eval" command. It just compiles the user
+ * passed fragment of code and executes it, showing the result left on
+ * the stack. */
+void ldbEval(lua_State *lua, sds *argv, int argc) {
+ /* Glue the script together if it is composed of multiple arguments. */
+ sds code = sdsjoinsds(argv+1,argc-1," ",1);
+ sds expr = sdscatsds(sdsnew("return "),code);
+
+ /* Try to compile it as an expression, prepending "return ". */
+ if (luaL_loadbuffer(lua,expr,sdslen(expr),"@ldb_eval")) {
+ lua_pop(lua,1);
+ /* Failed? Try as a statement. */
+ if (luaL_loadbuffer(lua,code,sdslen(code),"@ldb_eval")) {
+ ldbLog(sdscatfmt(sdsempty(),"<error> %s",lua_tostring(lua,-1)));
+ lua_pop(lua,1);
+ sdsfree(code);
+ sdsfree(expr);
+ return;
+ }
+ }
+
+ /* Call it. */
+ sdsfree(code);
+ sdsfree(expr);
+ if (lua_pcall(lua,0,1,0)) {
+ ldbLog(sdscatfmt(sdsempty(),"<error> %s",lua_tostring(lua,-1)));
+ lua_pop(lua,1);
+ return;
+ }
+ ldbLogStackValue(lua,"<retval> ");
+ lua_pop(lua,1);
+}
+
+/* Implement the debugger "redis" command. We use a trick in order to make
+ * the implementation very simple: we just call the Lua redis.call() command
+ * implementation, with ldb.step enabled, so as a side effect the Redis command
+ * and its reply are logged. */
+void ldbRedis(lua_State *lua, sds *argv, int argc) {
+ int j;
+
+ if (!lua_checkstack(lua, argc + 1)) {
+ /* Increase the Lua stack if needed to make sure there is enough room
+ * to push 'argc + 1' elements to the stack. On failure, return error.
+ * Notice that we need, in worst case, 'argc + 1' elements because we push all the arguments
+ * given by the user (without the first argument) and we also push the 'redis' global table and
+ * 'redis.call' function so:
+ * (1 (redis table)) + (1 (redis.call function)) + (argc - 1 (all arguments without the first)) = argc + 1*/
+ ldbLogRedisReply("max lua stack reached");
+ return;
+ }
+
+ lua_getglobal(lua,"redis");
+ lua_pushstring(lua,"call");
+ lua_gettable(lua,-2); /* Stack: redis, redis.call */
+ for (j = 1; j < argc; j++)
+ lua_pushlstring(lua,argv[j],sdslen(argv[j]));
+ ldb.step = 1; /* Force redis.call() to log. */
+ lua_pcall(lua,argc-1,1,0); /* Stack: redis, result */
+ ldb.step = 0; /* Disable logging. */
+ lua_pop(lua,2); /* Discard the result and clean the stack. */
+}
+
+/* Implements "trace" command of the Lua debugger. It just prints a backtrace
+ * querying Lua starting from the current callframe back to the outer one. */
+void ldbTrace(lua_State *lua) {
+ lua_Debug ar;
+ int level = 0;
+
+ while(lua_getstack(lua,level,&ar)) {
+ lua_getinfo(lua,"Snl",&ar);
+ if(strstr(ar.short_src,"user_script") != NULL) {
+ ldbLog(sdscatprintf(sdsempty(),"%s %s:",
+ (level == 0) ? "In" : "From",
+ ar.name ? ar.name : "top level"));
+ ldbLogSourceLine(ar.currentline);
+ }
+ level++;
+ }
+ if (level == 0) {
+ ldbLog(sdsnew("<error> Can't retrieve Lua stack."));
+ }
+}
+
+/* Implements the debugger "maxlen" command. It just queries or sets the
+ * ldb.maxlen variable. */
+void ldbMaxlen(sds *argv, int argc) {
+ if (argc == 2) {
+ int newval = atoi(argv[1]);
+ ldb.maxlen_hint_sent = 1; /* User knows about this command. */
+ if (newval != 0 && newval <= 60) newval = 60;
+ ldb.maxlen = newval;
+ }
+ if (ldb.maxlen) {
+ ldbLog(sdscatprintf(sdsempty(),"<value> replies are truncated at %d bytes.",(int)ldb.maxlen));
+ } else {
+ ldbLog(sdscatprintf(sdsempty(),"<value> replies are unlimited."));
+ }
+}
+
+/* Read debugging commands from client.
+ * Return C_OK if the debugging session is continuing, otherwise
+ * C_ERR if the client closed the connection or is timing out. */
+int ldbRepl(lua_State *lua) {
+ sds *argv;
+ int argc;
+ char* err = NULL;
+
+ /* We continue processing commands until a command that should return
+ * to the Lua interpreter is found. */
+ while(1) {
+ while((argv = ldbReplParseCommand(&argc, &err)) == NULL) {
+ char buf[1024];
+ if (err) {
+ luaPushError(lua, err);
+ luaError(lua);
+ }
+ int nread = connRead(ldb.conn,buf,sizeof(buf));
+ if (nread <= 0) {
+ /* Make sure the script runs without user input since the
+ * client is no longer connected. */
+ ldb.step = 0;
+ ldb.bpcount = 0;
+ return C_ERR;
+ }
+ ldb.cbuf = sdscatlen(ldb.cbuf,buf,nread);
+ /* after 1M we will exit with an error
+ * so that the client will not blow the memory
+ */
+ if (sdslen(ldb.cbuf) > 1<<20) {
+ sdsfree(ldb.cbuf);
+ ldb.cbuf = sdsempty();
+ luaPushError(lua, "max client buffer reached");
+ luaError(lua);
+ }
+ }
+
+ /* Flush the old buffer. */
+ sdsfree(ldb.cbuf);
+ ldb.cbuf = sdsempty();
+
+ /* Execute the command. */
+ if (!strcasecmp(argv[0],"h") || !strcasecmp(argv[0],"help")) {
+ldbLog(sdsnew("Redis Lua debugger help:"));
+ldbLog(sdsnew("[h]elp Show this help."));
+ldbLog(sdsnew("[s]tep Run current line and stop again."));
+ldbLog(sdsnew("[n]ext Alias for step."));
+ldbLog(sdsnew("[c]ontinue Run till next breakpoint."));
+ldbLog(sdsnew("[l]ist List source code around current line."));
+ldbLog(sdsnew("[l]ist [line] List source code around [line]."));
+ldbLog(sdsnew(" line = 0 means: current position."));
+ldbLog(sdsnew("[l]ist [line] [ctx] In this form [ctx] specifies how many lines"));
+ldbLog(sdsnew(" to show before/after [line]."));
+ldbLog(sdsnew("[w]hole List all source code. Alias for 'list 1 1000000'."));
+ldbLog(sdsnew("[p]rint Show all the local variables."));
+ldbLog(sdsnew("[p]rint <var> Show the value of the specified variable."));
+ldbLog(sdsnew(" Can also show global vars KEYS and ARGV."));
+ldbLog(sdsnew("[b]reak Show all breakpoints."));
+ldbLog(sdsnew("[b]reak <line> Add a breakpoint to the specified line."));
+ldbLog(sdsnew("[b]reak -<line> Remove breakpoint from the specified line."));
+ldbLog(sdsnew("[b]reak 0 Remove all breakpoints."));
+ldbLog(sdsnew("[t]race Show a backtrace."));
+ldbLog(sdsnew("[e]val <code> Execute some Lua code (in a different callframe)."));
+ldbLog(sdsnew("[r]edis <cmd> Execute a Redis command."));
+ldbLog(sdsnew("[m]axlen [len] Trim logged Redis replies and Lua var dumps to len."));
+ldbLog(sdsnew(" Specifying zero as <len> means unlimited."));
+ldbLog(sdsnew("[a]bort Stop the execution of the script. In sync"));
+ldbLog(sdsnew(" mode dataset changes will be retained."));
+ldbLog(sdsnew(""));
+ldbLog(sdsnew("Debugger functions you can call from Lua scripts:"));
+ldbLog(sdsnew("redis.debug() Produce logs in the debugger console."));
+ldbLog(sdsnew("redis.breakpoint() Stop execution like if there was a breakpoint in the"));
+ldbLog(sdsnew(" next line of code."));
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"s") || !strcasecmp(argv[0],"step") ||
+ !strcasecmp(argv[0],"n") || !strcasecmp(argv[0],"next")) {
+ ldb.step = 1;
+ break;
+ } else if (!strcasecmp(argv[0],"c") || !strcasecmp(argv[0],"continue")){
+ break;
+ } else if (!strcasecmp(argv[0],"t") || !strcasecmp(argv[0],"trace")) {
+ ldbTrace(lua);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"m") || !strcasecmp(argv[0],"maxlen")) {
+ ldbMaxlen(argv,argc);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"b") || !strcasecmp(argv[0],"break")) {
+ ldbBreak(argv,argc);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"e") || !strcasecmp(argv[0],"eval")) {
+ ldbEval(lua,argv,argc);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"a") || !strcasecmp(argv[0],"abort")) {
+ luaPushError(lua, "script aborted for user request");
+ luaError(lua);
+ } else if (argc > 1 &&
+ (!strcasecmp(argv[0],"r") || !strcasecmp(argv[0],"redis"))) {
+ ldbRedis(lua,argv,argc);
+ ldbSendLogs();
+ } else if ((!strcasecmp(argv[0],"p") || !strcasecmp(argv[0],"print"))) {
+ if (argc == 2)
+ ldbPrint(lua,argv[1]);
+ else
+ ldbPrintAll(lua);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"l") || !strcasecmp(argv[0],"list")){
+ int around = ldb.currentline, ctx = 5;
+ if (argc > 1) {
+ int num = atoi(argv[1]);
+ if (num > 0) around = num;
+ }
+ if (argc > 2) ctx = atoi(argv[2]);
+ ldbList(around,ctx);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"w") || !strcasecmp(argv[0],"whole")){
+ ldbList(1,1000000);
+ ldbSendLogs();
+ } else {
+ ldbLog(sdsnew("<error> Unknown Redis Lua debugger command or "
+ "wrong number of arguments."));
+ ldbSendLogs();
+ }
+
+ /* Free the command vector. */
+ sdsfreesplitres(argv,argc);
+ }
+
+ /* Free the current command argv if we break inside the while loop. */
+ sdsfreesplitres(argv,argc);
+ return C_OK;
+}
+
+/* This is the core of our Lua debugger, called each time Lua is about
+ * to start executing a new line. */
+void luaLdbLineHook(lua_State *lua, lua_Debug *ar) {
+ scriptRunCtx* rctx = luaGetFromRegistry(lua, REGISTRY_RUN_CTX_NAME);
+ serverAssert(rctx); /* Only supported inside script invocation */
+ lua_getstack(lua,0,ar);
+ lua_getinfo(lua,"Sl",ar);
+ ldb.currentline = ar->currentline;
+
+ int bp = ldbIsBreakpoint(ldb.currentline) || ldb.luabp;
+ int timeout = 0;
+
+ /* Events outside our script are not interesting. */
+ if(strstr(ar->short_src,"user_script") == NULL) return;
+
+ /* Check if a timeout occurred. */
+ if (ar->event == LUA_HOOKCOUNT && ldb.step == 0 && bp == 0) {
+ mstime_t elapsed = elapsedMs(rctx->start_time);
+ mstime_t timelimit = server.busy_reply_threshold ?
+ server.busy_reply_threshold : 5000;
+ if (elapsed >= timelimit) {
+ timeout = 1;
+ ldb.step = 1;
+ } else {
+ return; /* No timeout, ignore the COUNT event. */
+ }
+ }
+
+ if (ldb.step || bp) {
+ char *reason = "step over";
+ if (bp) reason = ldb.luabp ? "redis.breakpoint() called" :
+ "break point";
+ else if (timeout) reason = "timeout reached, infinite loop?";
+ ldb.step = 0;
+ ldb.luabp = 0;
+ ldbLog(sdscatprintf(sdsempty(),
+ "* Stopped at %d, stop reason = %s",
+ ldb.currentline, reason));
+ ldbLogSourceLine(ldb.currentline);
+ ldbSendLogs();
+ if (ldbRepl(lua) == C_ERR && timeout) {
+ /* If the client closed the connection and we have a timeout
+ * connection, let's kill the script otherwise the process
+ * will remain blocked indefinitely. */
+ luaPushError(lua, "timeout during Lua debugging with client closing connection");
+ luaError(lua);
+ }
+ rctx->start_time = getMonotonicUs();
+ }
+}
diff --git a/src/evict.c b/src/evict.c
new file mode 100644
index 0000000..96a0fef
--- /dev/null
+++ b/src/evict.c
@@ -0,0 +1,757 @@
+/* Maxmemory directive handling (LRU eviction and other policies).
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "bio.h"
+#include "atomicvar.h"
+#include "script.h"
+#include <math.h>
+
+/* ----------------------------------------------------------------------------
+ * Data structures
+ * --------------------------------------------------------------------------*/
+
+/* To improve the quality of the LRU approximation we take a set of keys
+ * that are good candidate for eviction across performEvictions() calls.
+ *
+ * Entries inside the eviction pool are taken ordered by idle time, putting
+ * greater idle times to the right (ascending order).
+ *
+ * When an LFU policy is used instead, a reverse frequency indication is used
+ * instead of the idle time, so that we still evict by larger value (larger
+ * inverse frequency means to evict keys with the least frequent accesses).
+ *
+ * Empty entries have the key pointer set to NULL. */
+#define EVPOOL_SIZE 16
+#define EVPOOL_CACHED_SDS_SIZE 255
+struct evictionPoolEntry {
+ unsigned long long idle; /* Object idle time (inverse frequency for LFU) */
+ sds key; /* Key name. */
+ sds cached; /* Cached SDS object for key name. */
+ int dbid; /* Key DB number. */
+};
+
+static struct evictionPoolEntry *EvictionPoolLRU;
+
+/* ----------------------------------------------------------------------------
+ * Implementation of eviction, aging and LRU
+ * --------------------------------------------------------------------------*/
+
+/* Return the LRU clock, based on the clock resolution. This is a time
+ * in a reduced-bits format that can be used to set and check the
+ * object->lru field of redisObject structures. */
+unsigned int getLRUClock(void) {
+ return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
+}
+
+/* This function is used to obtain the current LRU clock.
+ * If the current resolution is lower than the frequency we refresh the
+ * LRU clock (as it should be in production servers) we return the
+ * precomputed value, otherwise we need to resort to a system call. */
+unsigned int LRU_CLOCK(void) {
+ unsigned int lruclock;
+ if (1000/server.hz <= LRU_CLOCK_RESOLUTION) {
+ lruclock = server.lruclock;
+ } else {
+ lruclock = getLRUClock();
+ }
+ return lruclock;
+}
+
+/* Given an object returns the min number of milliseconds the object was never
+ * requested, using an approximated LRU algorithm. */
+unsigned long long estimateObjectIdleTime(robj *o) {
+ unsigned long long lruclock = LRU_CLOCK();
+ if (lruclock >= o->lru) {
+ return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
+ } else {
+ return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
+ LRU_CLOCK_RESOLUTION;
+ }
+}
+
+/* LRU approximation algorithm
+ *
+ * Redis uses an approximation of the LRU algorithm that runs in constant
+ * memory. Every time there is a key to expire, we sample N keys (with
+ * N very small, usually in around 5) to populate a pool of best keys to
+ * evict of M keys (the pool size is defined by EVPOOL_SIZE).
+ *
+ * The N keys sampled are added in the pool of good keys to expire (the one
+ * with an old access time) if they are better than one of the current keys
+ * in the pool.
+ *
+ * After the pool is populated, the best key we have in the pool is expired.
+ * However note that we don't remove keys from the pool when they are deleted
+ * so the pool may contain keys that no longer exist.
+ *
+ * When we try to evict a key, and all the entries in the pool don't exist
+ * we populate it again. This time we'll be sure that the pool has at least
+ * one key that can be evicted, if there is at least one key that can be
+ * evicted in the whole database. */
+
+/* Create a new eviction pool. */
+void evictionPoolAlloc(void) {
+ struct evictionPoolEntry *ep;
+ int j;
+
+ ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE);
+ for (j = 0; j < EVPOOL_SIZE; j++) {
+ ep[j].idle = 0;
+ ep[j].key = NULL;
+ ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
+ ep[j].dbid = 0;
+ }
+ EvictionPoolLRU = ep;
+}
+
+/* This is a helper function for performEvictions(), it is used in order
+ * to populate the evictionPool with a few entries every time we want to
+ * expire a key. Keys with idle time bigger than one of the current
+ * keys are added. Keys are always added if there are free entries.
+ *
+ * We insert keys on place in ascending order, so keys with the smaller
+ * idle time are on the left, and keys with the higher idle time on the
+ * right. */
+
+void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
+ int j, k, count;
+ dictEntry *samples[server.maxmemory_samples];
+
+ count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
+ for (j = 0; j < count; j++) {
+ unsigned long long idle;
+ sds key;
+ robj *o;
+ dictEntry *de;
+
+ de = samples[j];
+ key = dictGetKey(de);
+
+ /* If the dictionary we are sampling from is not the main
+ * dictionary (but the expires one) we need to lookup the key
+ * again in the key dictionary to obtain the value object. */
+ if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
+ if (sampledict != keydict) de = dictFind(keydict, key);
+ o = dictGetVal(de);
+ }
+
+ /* Calculate the idle time according to the policy. This is called
+ * idle just because the code initially handled LRU, but is in fact
+ * just a score where an higher score means better candidate. */
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
+ idle = estimateObjectIdleTime(o);
+ } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ /* When we use an LRU policy, we sort the keys by idle time
+ * so that we expire keys starting from greater idle time.
+ * However when the policy is an LFU one, we have a frequency
+ * estimation, and we want to evict keys with lower frequency
+ * first. So inside the pool we put objects using the inverted
+ * frequency subtracting the actual frequency to the maximum
+ * frequency of 255. */
+ idle = 255-LFUDecrAndReturn(o);
+ } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
+ /* In this case the sooner the expire the better. */
+ idle = ULLONG_MAX - (long)dictGetVal(de);
+ } else {
+ serverPanic("Unknown eviction policy in evictionPoolPopulate()");
+ }
+
+ /* Insert the element inside the pool.
+ * First, find the first empty bucket or the first populated
+ * bucket that has an idle time smaller than our idle time. */
+ k = 0;
+ while (k < EVPOOL_SIZE &&
+ pool[k].key &&
+ pool[k].idle < idle) k++;
+ if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
+ /* Can't insert if the element is < the worst element we have
+ * and there are no empty buckets. */
+ continue;
+ } else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
+ /* Inserting into empty position. No setup needed before insert. */
+ } else {
+ /* Inserting in the middle. Now k points to the first element
+ * greater than the element to insert. */
+ if (pool[EVPOOL_SIZE-1].key == NULL) {
+ /* Free space on the right? Insert at k shifting
+ * all the elements from k to end to the right. */
+
+ /* Save SDS before overwriting. */
+ sds cached = pool[EVPOOL_SIZE-1].cached;
+ memmove(pool+k+1,pool+k,
+ sizeof(pool[0])*(EVPOOL_SIZE-k-1));
+ pool[k].cached = cached;
+ } else {
+ /* No free space on right? Insert at k-1 */
+ k--;
+ /* Shift all elements on the left of k (included) to the
+ * left, so we discard the element with smaller idle time. */
+ sds cached = pool[0].cached; /* Save SDS before overwriting. */
+ if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
+ memmove(pool,pool+1,sizeof(pool[0])*k);
+ pool[k].cached = cached;
+ }
+ }
+
+ /* Try to reuse the cached SDS string allocated in the pool entry,
+ * because allocating and deallocating this object is costly
+ * (according to the profiler, not my fantasy. Remember:
+ * premature optimization bla bla bla. */
+ int klen = sdslen(key);
+ if (klen > EVPOOL_CACHED_SDS_SIZE) {
+ pool[k].key = sdsdup(key);
+ } else {
+ memcpy(pool[k].cached,key,klen+1);
+ sdssetlen(pool[k].cached,klen);
+ pool[k].key = pool[k].cached;
+ }
+ pool[k].idle = idle;
+ pool[k].dbid = dbid;
+ }
+}
+
+/* ----------------------------------------------------------------------------
+ * LFU (Least Frequently Used) implementation.
+
+ * We have 24 total bits of space in each object in order to implement
+ * an LFU (Least Frequently Used) eviction policy, since we re-use the
+ * LRU field for this purpose.
+ *
+ * We split the 24 bits into two fields:
+ *
+ * 16 bits 8 bits
+ * +----------------+--------+
+ * + Last decr time | LOG_C |
+ * +----------------+--------+
+ *
+ * LOG_C is a logarithmic counter that provides an indication of the access
+ * frequency. However this field must also be decremented otherwise what used
+ * to be a frequently accessed key in the past, will remain ranked like that
+ * forever, while we want the algorithm to adapt to access pattern changes.
+ *
+ * So the remaining 16 bits are used in order to store the "decrement time",
+ * a reduced-precision Unix time (we take 16 bits of the time converted
+ * in minutes since we don't care about wrapping around) where the LOG_C
+ * counter is halved if it has an high value, or just decremented if it
+ * has a low value.
+ *
+ * New keys don't start at zero, in order to have the ability to collect
+ * some accesses before being trashed away, so they start at LFU_INIT_VAL.
+ * The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL
+ * when incrementing the key, so that keys starting at LFU_INIT_VAL
+ * (or having a smaller value) have a very high chance of being incremented
+ * on access.
+ *
+ * During decrement, the value of the logarithmic counter is halved if
+ * its current value is greater than two times the LFU_INIT_VAL, otherwise
+ * it is just decremented by one.
+ * --------------------------------------------------------------------------*/
+
+/* Return the current time in minutes, just taking the least significant
+ * 16 bits. The returned time is suitable to be stored as LDT (last decrement
+ * time) for the LFU implementation. */
+unsigned long LFUGetTimeInMinutes(void) {
+ return (server.unixtime/60) & 65535;
+}
+
+/* Given an object last access time, compute the minimum number of minutes
+ * that elapsed since the last access. Handle overflow (ldt greater than
+ * the current 16 bits minutes time) considering the time as wrapping
+ * exactly once. */
+unsigned long LFUTimeElapsed(unsigned long ldt) {
+ unsigned long now = LFUGetTimeInMinutes();
+ if (now >= ldt) return now-ldt;
+ return 65535-ldt+now;
+}
+
+/* Logarithmically increment a counter. The greater is the current counter value
+ * the less likely is that it gets really incremented. Saturate it at 255. */
+uint8_t LFULogIncr(uint8_t counter) {
+ if (counter == 255) return 255;
+ double r = (double)rand()/RAND_MAX;
+ double baseval = counter - LFU_INIT_VAL;
+ if (baseval < 0) baseval = 0;
+ double p = 1.0/(baseval*server.lfu_log_factor+1);
+ if (r < p) counter++;
+ return counter;
+}
+
+/* If the object decrement time is reached decrement the LFU counter but
+ * do not update LFU fields of the object, we update the access time
+ * and counter in an explicit way when the object is really accessed.
+ * And we will times halve the counter according to the times of
+ * elapsed time than server.lfu_decay_time.
+ * Return the object frequency counter.
+ *
+ * This function is used in order to scan the dataset for the best object
+ * to fit: as we check for the candidate, we incrementally decrement the
+ * counter of the scanned objects if needed. */
+unsigned long LFUDecrAndReturn(robj *o) {
+ unsigned long ldt = o->lru >> 8;
+ unsigned long counter = o->lru & 255;
+ unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0;
+ if (num_periods)
+ counter = (num_periods > counter) ? 0 : counter - num_periods;
+ return counter;
+}
+
+/* We don't want to count AOF buffers and slaves output buffers as
+ * used memory: the eviction should use mostly data size, because
+ * it can cause feedback-loop when we push DELs into them, putting
+ * more and more DELs will make them bigger, if we count them, we
+ * need to evict more keys, and then generate more DELs, maybe cause
+ * massive eviction loop, even all keys are evicted.
+ *
+ * This function returns the sum of AOF and replication buffer. */
+size_t freeMemoryGetNotCountedMemory(void) {
+ size_t overhead = 0;
+
+ /* Since all replicas and replication backlog share global replication
+ * buffer, we think only the part of exceeding backlog size is the extra
+ * separate consumption of replicas.
+ *
+ * Note that although the backlog is also initially incrementally grown
+ * (pushing DELs consumes memory), it'll eventually stop growing and
+ * remain constant in size, so even if its creation will cause some
+ * eviction, it's capped, and also here to stay (no resonance effect)
+ *
+ * Note that, because we trim backlog incrementally in the background,
+ * backlog size may exceeds our setting if slow replicas that reference
+ * vast replication buffer blocks disconnect. To avoid massive eviction
+ * loop, we don't count the delayed freed replication backlog into used
+ * memory even if there are no replicas, i.e. we still regard this memory
+ * as replicas'. */
+ if ((long long)server.repl_buffer_mem > server.repl_backlog_size) {
+ /* We use list structure to manage replication buffer blocks, so backlog
+ * also occupies some extra memory, we can't know exact blocks numbers,
+ * we only get approximate size according to per block size. */
+ size_t extra_approx_size =
+ (server.repl_backlog_size/PROTO_REPLY_CHUNK_BYTES + 1) *
+ (sizeof(replBufBlock)+sizeof(listNode));
+ size_t counted_mem = server.repl_backlog_size + extra_approx_size;
+ if (server.repl_buffer_mem > counted_mem) {
+ overhead += (server.repl_buffer_mem - counted_mem);
+ }
+ }
+
+ if (server.aof_state != AOF_OFF) {
+ overhead += sdsAllocSize(server.aof_buf);
+ }
+ return overhead;
+}
+
+/* Get the memory status from the point of view of the maxmemory directive:
+ * if the memory used is under the maxmemory setting then C_OK is returned.
+ * Otherwise, if we are over the memory limit, the function returns
+ * C_ERR.
+ *
+ * The function may return additional info via reference, only if the
+ * pointers to the respective arguments is not NULL. Certain fields are
+ * populated only when C_ERR is returned:
+ *
+ * 'total' total amount of bytes used.
+ * (Populated both for C_ERR and C_OK)
+ *
+ * 'logical' the amount of memory used minus the slaves/AOF buffers.
+ * (Populated when C_ERR is returned)
+ *
+ * 'tofree' the amount of memory that should be released
+ * in order to return back into the memory limits.
+ * (Populated when C_ERR is returned)
+ *
+ * 'level' this usually ranges from 0 to 1, and reports the amount of
+ * memory currently used. May be > 1 if we are over the memory
+ * limit.
+ * (Populated both for C_ERR and C_OK)
+ */
+int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level) {
+ size_t mem_reported, mem_used, mem_tofree;
+
+ /* Check if we are over the memory usage limit. If we are not, no need
+ * to subtract the slaves output buffers. We can just return ASAP. */
+ mem_reported = zmalloc_used_memory();
+ if (total) *total = mem_reported;
+
+ /* We may return ASAP if there is no need to compute the level. */
+ if (!server.maxmemory) {
+ if (level) *level = 0;
+ return C_OK;
+ }
+ if (mem_reported <= server.maxmemory && !level) return C_OK;
+
+ /* Remove the size of slaves output buffers and AOF buffer from the
+ * count of used memory. */
+ mem_used = mem_reported;
+ size_t overhead = freeMemoryGetNotCountedMemory();
+ mem_used = (mem_used > overhead) ? mem_used-overhead : 0;
+
+ /* Compute the ratio of memory usage. */
+ if (level) *level = (float)mem_used / (float)server.maxmemory;
+
+ if (mem_reported <= server.maxmemory) return C_OK;
+
+ /* Check if we are still over the memory limit. */
+ if (mem_used <= server.maxmemory) return C_OK;
+
+ /* Compute how much memory we need to free. */
+ mem_tofree = mem_used - server.maxmemory;
+
+ if (logical) *logical = mem_used;
+ if (tofree) *tofree = mem_tofree;
+
+ return C_ERR;
+}
+
+/* Return 1 if used memory is more than maxmemory after allocating more memory,
+ * return 0 if not. Redis may reject user's requests or evict some keys if used
+ * memory exceeds maxmemory, especially, when we allocate huge memory at once. */
+int overMaxmemoryAfterAlloc(size_t moremem) {
+ if (!server.maxmemory) return 0; /* No limit. */
+
+ /* Check quickly. */
+ size_t mem_used = zmalloc_used_memory();
+ if (mem_used + moremem <= server.maxmemory) return 0;
+
+ size_t overhead = freeMemoryGetNotCountedMemory();
+ mem_used = (mem_used > overhead) ? mem_used - overhead : 0;
+ return mem_used + moremem > server.maxmemory;
+}
+
+/* The evictionTimeProc is started when "maxmemory" has been breached and
+ * could not immediately be resolved. This will spin the event loop with short
+ * eviction cycles until the "maxmemory" condition has resolved or there are no
+ * more evictable items. */
+static int isEvictionProcRunning = 0;
+static int evictionTimeProc(
+ struct aeEventLoop *eventLoop, long long id, void *clientData) {
+ UNUSED(eventLoop);
+ UNUSED(id);
+ UNUSED(clientData);
+
+ if (performEvictions() == EVICT_RUNNING) return 0; /* keep evicting */
+
+ /* For EVICT_OK - things are good, no need to keep evicting.
+ * For EVICT_FAIL - there is nothing left to evict. */
+ isEvictionProcRunning = 0;
+ return AE_NOMORE;
+}
+
+void startEvictionTimeProc(void) {
+ if (!isEvictionProcRunning) {
+ isEvictionProcRunning = 1;
+ aeCreateTimeEvent(server.el, 0,
+ evictionTimeProc, NULL, NULL);
+ }
+}
+
+/* Check if it's safe to perform evictions.
+ * Returns 1 if evictions can be performed
+ * Returns 0 if eviction processing should be skipped
+ */
+static int isSafeToPerformEvictions(void) {
+ /* - There must be no script in timeout condition.
+ * - Nor we are loading data right now. */
+ if (isInsideYieldingLongCommand() || server.loading) return 0;
+
+ /* By default replicas should ignore maxmemory
+ * and just be masters exact copies. */
+ if (server.masterhost && server.repl_slave_ignore_maxmemory) return 0;
+
+ /* If 'evict' action is paused, for whatever reason, then return false */
+ if (isPausedActionsWithUpdate(PAUSE_ACTION_EVICT)) return 0;
+
+ return 1;
+}
+
+/* Algorithm for converting tenacity (0-100) to a time limit. */
+static unsigned long evictionTimeLimitUs(void) {
+ serverAssert(server.maxmemory_eviction_tenacity >= 0);
+ serverAssert(server.maxmemory_eviction_tenacity <= 100);
+
+ if (server.maxmemory_eviction_tenacity <= 10) {
+ /* A linear progression from 0..500us */
+ return 50uL * server.maxmemory_eviction_tenacity;
+ }
+
+ if (server.maxmemory_eviction_tenacity < 100) {
+ /* A 15% geometric progression, resulting in a limit of ~2 min at tenacity==99 */
+ return (unsigned long)(500.0 * pow(1.15, server.maxmemory_eviction_tenacity - 10.0));
+ }
+
+ return ULONG_MAX; /* No limit to eviction time */
+}
+
+/* Check that memory usage is within the current "maxmemory" limit. If over
+ * "maxmemory", attempt to free memory by evicting data (if it's safe to do so).
+ *
+ * It's possible for Redis to suddenly be significantly over the "maxmemory"
+ * setting. This can happen if there is a large allocation (like a hash table
+ * resize) or even if the "maxmemory" setting is manually adjusted. Because of
+ * this, it's important to evict for a managed period of time - otherwise Redis
+ * would become unresponsive while evicting.
+ *
+ * The goal of this function is to improve the memory situation - not to
+ * immediately resolve it. In the case that some items have been evicted but
+ * the "maxmemory" limit has not been achieved, an aeTimeProc will be started
+ * which will continue to evict items until memory limits are achieved or
+ * nothing more is evictable.
+ *
+ * This should be called before execution of commands. If EVICT_FAIL is
+ * returned, commands which will result in increased memory usage should be
+ * rejected.
+ *
+ * Returns:
+ * EVICT_OK - memory is OK or it's not possible to perform evictions now
+ * EVICT_RUNNING - memory is over the limit, but eviction is still processing
+ * EVICT_FAIL - memory is over the limit, and there's nothing to evict
+ * */
+int performEvictions(void) {
+ /* Note, we don't goto update_metrics here because this check skips eviction
+ * as if it wasn't triggered. it's a fake EVICT_OK. */
+ if (!isSafeToPerformEvictions()) return EVICT_OK;
+
+ int keys_freed = 0;
+ size_t mem_reported, mem_tofree;
+ long long mem_freed; /* May be negative */
+ mstime_t latency, eviction_latency;
+ long long delta;
+ int slaves = listLength(server.slaves);
+ int result = EVICT_FAIL;
+
+ if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK) {
+ result = EVICT_OK;
+ goto update_metrics;
+ }
+
+ if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) {
+ result = EVICT_FAIL; /* We need to free memory, but policy forbids. */
+ goto update_metrics;
+ }
+
+ unsigned long eviction_time_limit_us = evictionTimeLimitUs();
+
+ mem_freed = 0;
+
+ latencyStartMonitor(latency);
+
+ monotime evictionTimer;
+ elapsedStart(&evictionTimer);
+
+ /* Try to smoke-out bugs (server.also_propagate should be empty here) */
+ serverAssert(server.also_propagate.numops == 0);
+
+ while (mem_freed < (long long)mem_tofree) {
+ int j, k, i;
+ static unsigned int next_db = 0;
+ sds bestkey = NULL;
+ int bestdbid;
+ redisDb *db;
+ dict *dict;
+ dictEntry *de;
+
+ if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) ||
+ server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
+ {
+ struct evictionPoolEntry *pool = EvictionPoolLRU;
+
+ while (bestkey == NULL) {
+ unsigned long total_keys = 0, keys;
+
+ /* We don't want to make local-db choices when expiring keys,
+ * so to start populate the eviction pool sampling keys from
+ * every DB. */
+ for (i = 0; i < server.dbnum; i++) {
+ db = server.db+i;
+ dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
+ db->dict : db->expires;
+ if ((keys = dictSize(dict)) != 0) {
+ evictionPoolPopulate(i, dict, db->dict, pool);
+ total_keys += keys;
+ }
+ }
+ if (!total_keys) break; /* No keys to evict. */
+
+ /* Go backward from best to worst element to evict. */
+ for (k = EVPOOL_SIZE-1; k >= 0; k--) {
+ if (pool[k].key == NULL) continue;
+ bestdbid = pool[k].dbid;
+
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
+ de = dictFind(server.db[bestdbid].dict,
+ pool[k].key);
+ } else {
+ de = dictFind(server.db[bestdbid].expires,
+ pool[k].key);
+ }
+
+ /* Remove the entry from the pool. */
+ if (pool[k].key != pool[k].cached)
+ sdsfree(pool[k].key);
+ pool[k].key = NULL;
+ pool[k].idle = 0;
+
+ /* If the key exists, is our pick. Otherwise it is
+ * a ghost and we need to try the next element. */
+ if (de) {
+ bestkey = dictGetKey(de);
+ break;
+ } else {
+ /* Ghost... Iterate again. */
+ }
+ }
+ }
+ }
+
+ /* volatile-random and allkeys-random policy */
+ else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
+ server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
+ {
+ /* When evicting a random key, we try to evict a key for
+ * each DB, so we use the static 'next_db' variable to
+ * incrementally visit all DBs. */
+ for (i = 0; i < server.dbnum; i++) {
+ j = (++next_db) % server.dbnum;
+ db = server.db+j;
+ dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
+ db->dict : db->expires;
+ if (dictSize(dict) != 0) {
+ de = dictGetRandomKey(dict);
+ bestkey = dictGetKey(de);
+ bestdbid = j;
+ break;
+ }
+ }
+ }
+
+ /* Finally remove the selected key. */
+ if (bestkey) {
+ db = server.db+bestdbid;
+ robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
+ /* We compute the amount of memory freed by db*Delete() alone.
+ * It is possible that actually the memory needed to propagate
+ * the DEL in AOF and replication link is greater than the one
+ * we are freeing removing the key, but we can't account for
+ * that otherwise we would never exit the loop.
+ *
+ * Same for CSC invalidation messages generated by signalModifiedKey.
+ *
+ * AOF and Output buffer memory will be freed eventually so
+ * we only care about memory used by the key space. */
+ enterExecutionUnit(1, 0);
+ delta = (long long) zmalloc_used_memory();
+ latencyStartMonitor(eviction_latency);
+ dbGenericDelete(db,keyobj,server.lazyfree_lazy_eviction,DB_FLAG_KEY_EVICTED);
+ latencyEndMonitor(eviction_latency);
+ latencyAddSampleIfNeeded("eviction-del",eviction_latency);
+ delta -= (long long) zmalloc_used_memory();
+ mem_freed += delta;
+ server.stat_evictedkeys++;
+ signalModifiedKey(NULL,db,keyobj);
+ notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
+ keyobj, db->id);
+ propagateDeletion(db,keyobj,server.lazyfree_lazy_eviction);
+ exitExecutionUnit();
+ postExecutionUnitOperations();
+ decrRefCount(keyobj);
+ keys_freed++;
+
+ if (keys_freed % 16 == 0) {
+ /* When the memory to free starts to be big enough, we may
+ * start spending so much time here that is impossible to
+ * deliver data to the replicas fast enough, so we force the
+ * transmission here inside the loop. */
+ if (slaves) flushSlavesOutputBuffers();
+
+ /* Normally our stop condition is the ability to release
+ * a fixed, pre-computed amount of memory. However when we
+ * are deleting objects in another thread, it's better to
+ * check, from time to time, if we already reached our target
+ * memory, since the "mem_freed" amount is computed only
+ * across the dbAsyncDelete() call, while the thread can
+ * release the memory all the time. */
+ if (server.lazyfree_lazy_eviction) {
+ if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
+ break;
+ }
+ }
+
+ /* After some time, exit the loop early - even if memory limit
+ * hasn't been reached. If we suddenly need to free a lot of
+ * memory, don't want to spend too much time here. */
+ if (elapsedUs(evictionTimer) > eviction_time_limit_us) {
+ // We still need to free memory - start eviction timer proc
+ startEvictionTimeProc();
+ break;
+ }
+ }
+ } else {
+ goto cant_free; /* nothing to free... */
+ }
+ }
+ /* at this point, the memory is OK, or we have reached the time limit */
+ result = (isEvictionProcRunning) ? EVICT_RUNNING : EVICT_OK;
+
+cant_free:
+ if (result == EVICT_FAIL) {
+ /* At this point, we have run out of evictable items. It's possible
+ * that some items are being freed in the lazyfree thread. Perform a
+ * short wait here if such jobs exist, but don't wait long. */
+ mstime_t lazyfree_latency;
+ latencyStartMonitor(lazyfree_latency);
+ while (bioPendingJobsOfType(BIO_LAZY_FREE) &&
+ elapsedUs(evictionTimer) < eviction_time_limit_us) {
+ if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
+ result = EVICT_OK;
+ break;
+ }
+ usleep(eviction_time_limit_us < 1000 ? eviction_time_limit_us : 1000);
+ }
+ latencyEndMonitor(lazyfree_latency);
+ latencyAddSampleIfNeeded("eviction-lazyfree",lazyfree_latency);
+ }
+
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("eviction-cycle",latency);
+
+update_metrics:
+ if (result == EVICT_RUNNING || result == EVICT_FAIL) {
+ if (server.stat_last_eviction_exceeded_time == 0)
+ elapsedStart(&server.stat_last_eviction_exceeded_time);
+ } else if (result == EVICT_OK) {
+ if (server.stat_last_eviction_exceeded_time != 0) {
+ server.stat_total_eviction_exceeded_time += elapsedUs(server.stat_last_eviction_exceeded_time);
+ server.stat_last_eviction_exceeded_time = 0;
+ }
+ }
+ return result;
+}
diff --git a/src/expire.c b/src/expire.c
new file mode 100644
index 0000000..33c21c3
--- /dev/null
+++ b/src/expire.c
@@ -0,0 +1,754 @@
+/* Implementation of EXPIRE (keys with fixed time to live).
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+/*-----------------------------------------------------------------------------
+ * Incremental collection of expired keys.
+ *
+ * When keys are accessed they are expired on-access. However we need a
+ * mechanism in order to ensure keys are eventually removed when expired even
+ * if no access is performed on them.
+ *----------------------------------------------------------------------------*/
+
+/* Helper function for the activeExpireCycle() function.
+ * This function will try to expire the key that is stored in the hash table
+ * entry 'de' of the 'expires' hash table of a Redis database.
+ *
+ * If the key is found to be expired, it is removed from the database and
+ * 1 is returned. Otherwise no operation is performed and 0 is returned.
+ *
+ * When a key is expired, server.stat_expiredkeys is incremented.
+ *
+ * The parameter 'now' is the current time in milliseconds as is passed
+ * to the function to avoid too many gettimeofday() syscalls. */
+int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
+ long long t = dictGetSignedIntegerVal(de);
+ if (now > t) {
+ enterExecutionUnit(1, 0);
+ sds key = dictGetKey(de);
+ robj *keyobj = createStringObject(key,sdslen(key));
+ deleteExpiredKeyAndPropagate(db,keyobj);
+ decrRefCount(keyobj);
+ exitExecutionUnit();
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Try to expire a few timed out keys. The algorithm used is adaptive and
+ * will use few CPU cycles if there are few expiring keys, otherwise
+ * it will get more aggressive to avoid that too much memory is used by
+ * keys that can be removed from the keyspace.
+ *
+ * Every expire cycle tests multiple databases: the next call will start
+ * again from the next db. No more than CRON_DBS_PER_CALL databases are
+ * tested at every iteration.
+ *
+ * The function can perform more or less work, depending on the "type"
+ * argument. It can execute a "fast cycle" or a "slow cycle". The slow
+ * cycle is the main way we collect expired cycles: this happens with
+ * the "server.hz" frequency (usually 10 hertz).
+ *
+ * However the slow cycle can exit for timeout, since it used too much time.
+ * For this reason the function is also invoked to perform a fast cycle
+ * at every event loop cycle, in the beforeSleep() function. The fast cycle
+ * will try to perform less work, but will do it much more often.
+ *
+ * The following are the details of the two expire cycles and their stop
+ * conditions:
+ *
+ * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a
+ * "fast" expire cycle that takes no longer than ACTIVE_EXPIRE_CYCLE_FAST_DURATION
+ * microseconds, and is not repeated again before the same amount of time.
+ * The cycle will also refuse to run at all if the latest slow cycle did not
+ * terminate because of a time limit condition.
+ *
+ * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is
+ * executed, where the time limit is a percentage of the REDIS_HZ period
+ * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. In the
+ * fast cycle, the check of every database is interrupted once the number
+ * of already expired keys in the database is estimated to be lower than
+ * a given percentage, in order to avoid doing too much work to gain too
+ * little memory.
+ *
+ * The configured expire "effort" will modify the baseline parameters in
+ * order to do more work in both the fast and slow expire cycles.
+ */
+
+#define ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP 20 /* Keys for each DB loop. */
+#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */
+#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Max % of CPU to use. */
+#define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 /* % of stale keys after which
+ we do extra efforts. */
+
+/* Data used by the expire dict scan callback. */
+typedef struct {
+ redisDb *db;
+ long long now;
+ unsigned long sampled; /* num keys checked */
+ unsigned long expired; /* num keys expired */
+ long long ttl_sum; /* sum of ttl for key with ttl not yet expired */
+ int ttl_samples; /* num keys with ttl not yet expired */
+} expireScanData;
+
+void expireScanCallback(void *privdata, const dictEntry *const_de) {
+ dictEntry *de = (dictEntry *)const_de;
+ expireScanData *data = privdata;
+ long long ttl = dictGetSignedIntegerVal(de) - data->now;
+ if (activeExpireCycleTryExpire(data->db, de, data->now)) {
+ data->expired++;
+ /* Propagate the DEL command */
+ postExecutionUnitOperations();
+ }
+ if (ttl > 0) {
+ /* We want the average TTL of keys yet not expired. */
+ data->ttl_sum += ttl;
+ data->ttl_samples++;
+ }
+ data->sampled++;
+}
+
+void activeExpireCycle(int type) {
+ /* Adjust the running parameters according to the configured expire
+ * effort. The default effort is 1, and the maximum configurable effort
+ * is 10. */
+ unsigned long
+ effort = server.active_expire_effort-1, /* Rescale from 0 to 9. */
+ config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP +
+ ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4*effort,
+ config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION +
+ ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,
+ config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC +
+ 2*effort,
+ config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE-
+ effort;
+
+ /* This function has some global state in order to continue the work
+ * incrementally across calls. */
+ static unsigned int current_db = 0; /* Next DB to test. */
+ static int timelimit_exit = 0; /* Time limit hit in previous call? */
+ static long long last_fast_cycle = 0; /* When last fast cycle ran. */
+
+ int j, iteration = 0;
+ int dbs_per_call = CRON_DBS_PER_CALL;
+ long long start = ustime(), timelimit, elapsed;
+
+ /* If 'expire' action is paused, for whatever reason, then don't expire any key.
+ * Typically, at the end of the pause we will properly expire the key OR we
+ * will have failed over and the new primary will send us the expire. */
+ if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return;
+
+ if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
+ /* Don't start a fast cycle if the previous cycle did not exit
+ * for time limit, unless the percentage of estimated stale keys is
+ * too high. Also never repeat a fast cycle for the same period
+ * as the fast cycle total duration itself. */
+ if (!timelimit_exit &&
+ server.stat_expired_stale_perc < config_cycle_acceptable_stale)
+ return;
+
+ if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2)
+ return;
+
+ last_fast_cycle = start;
+ }
+
+ /* We usually should test CRON_DBS_PER_CALL per iteration, with
+ * two exceptions:
+ *
+ * 1) Don't test more DBs than we have.
+ * 2) If last time we hit the time limit, we want to scan all DBs
+ * in this iteration, as there is work to do in some DB and we don't want
+ * expired keys to use memory for too much time. */
+ if (dbs_per_call > server.dbnum || timelimit_exit)
+ dbs_per_call = server.dbnum;
+
+ /* We can use at max 'config_cycle_slow_time_perc' percentage of CPU
+ * time per iteration. Since this function gets called with a frequency of
+ * server.hz times per second, the following is the max amount of
+ * microseconds we can spend in this function. */
+ timelimit = config_cycle_slow_time_perc*1000000/server.hz/100;
+ timelimit_exit = 0;
+ if (timelimit <= 0) timelimit = 1;
+
+ if (type == ACTIVE_EXPIRE_CYCLE_FAST)
+ timelimit = config_cycle_fast_duration; /* in microseconds. */
+
+ /* Accumulate some global stats as we expire keys, to have some idea
+ * about the number of keys that are already logically expired, but still
+ * existing inside the database. */
+ long total_sampled = 0;
+ long total_expired = 0;
+
+ /* Try to smoke-out bugs (server.also_propagate should be empty here) */
+ serverAssert(server.also_propagate.numops == 0);
+
+ for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
+ /* Scan callback data including expired and checked count per iteration. */
+ expireScanData data;
+
+ redisDb *db = server.db+(current_db % server.dbnum);
+ data.db = db;
+
+ /* Increment the DB now so we are sure if we run out of time
+ * in the current DB we'll restart from the next. This allows to
+ * distribute the time evenly across DBs. */
+ current_db++;
+
+ /* Continue to expire if at the end of the cycle there are still
+ * a big percentage of keys to expire, compared to the number of keys
+ * we scanned. The percentage, stored in config_cycle_acceptable_stale
+ * is not fixed, but depends on the Redis configured "expire effort". */
+ do {
+ unsigned long num, slots;
+ iteration++;
+
+ /* If there is nothing to expire try next DB ASAP. */
+ if ((num = dictSize(db->expires)) == 0) {
+ db->avg_ttl = 0;
+ break;
+ }
+ slots = dictSlots(db->expires);
+ data.now = mstime();
+
+ /* When there are less than 1% filled slots, sampling the key
+ * space is expensive, so stop here waiting for better times...
+ * The dictionary will be resized asap. */
+ if (slots > DICT_HT_INITIAL_SIZE &&
+ (num*100/slots < 1)) break;
+
+ /* The main collection cycle. Scan through keys among keys
+ * with an expire set, checking for expired ones. */
+ data.sampled = 0;
+ data.expired = 0;
+ data.ttl_sum = 0;
+ data.ttl_samples = 0;
+
+ if (num > config_keys_per_loop)
+ num = config_keys_per_loop;
+
+ /* Here we access the low level representation of the hash table
+ * for speed concerns: this makes this code coupled with dict.c,
+ * but it hardly changed in ten years.
+ *
+ * Note that certain places of the hash table may be empty,
+ * so we want also a stop condition about the number of
+ * buckets that we scanned. However scanning for free buckets
+ * is very fast: we are in the cache line scanning a sequential
+ * array of NULL pointers, so we can scan a lot more buckets
+ * than keys in the same time. */
+ long max_buckets = num*20;
+ long checked_buckets = 0;
+
+ while (data.sampled < num && checked_buckets < max_buckets) {
+ db->expires_cursor = dictScan(db->expires, db->expires_cursor,
+ expireScanCallback, &data);
+ checked_buckets++;
+ }
+ total_expired += data.expired;
+ total_sampled += data.sampled;
+
+ /* Update the average TTL stats for this database. */
+ if (data.ttl_samples) {
+ long long avg_ttl = data.ttl_sum / data.ttl_samples;
+
+ /* Do a simple running average with a few samples.
+ * We just use the current estimate with a weight of 2%
+ * and the previous estimate with a weight of 98%. */
+ if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
+ db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
+ }
+
+ /* We can't block forever here even if there are many keys to
+ * expire. So after a given amount of milliseconds return to the
+ * caller waiting for the other active expire cycle. */
+ if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
+ elapsed = ustime()-start;
+ if (elapsed > timelimit) {
+ timelimit_exit = 1;
+ server.stat_expired_time_cap_reached_count++;
+ break;
+ }
+ }
+ /* We don't repeat the cycle for the current database if there are
+ * an acceptable amount of stale keys (logically expired but yet
+ * not reclaimed). */
+ } while (data.sampled == 0 ||
+ (data.expired * 100 / data.sampled) > config_cycle_acceptable_stale);
+ }
+
+ elapsed = ustime()-start;
+ server.stat_expire_cycle_time_used += elapsed;
+ latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
+
+ /* Update our estimate of keys existing but yet to be expired.
+ * Running average with this sample accounting for 5%. */
+ double current_perc;
+ if (total_sampled) {
+ current_perc = (double)total_expired/total_sampled;
+ } else
+ current_perc = 0;
+ server.stat_expired_stale_perc = (current_perc*0.05)+
+ (server.stat_expired_stale_perc*0.95);
+}
+
+/*-----------------------------------------------------------------------------
+ * Expires of keys created in writable slaves
+ *
+ * Normally slaves do not process expires: they wait the masters to synthesize
+ * DEL operations in order to retain consistency. However writable slaves are
+ * an exception: if a key is created in the slave and an expire is assigned
+ * to it, we need a way to expire such a key, since the master does not know
+ * anything about such a key.
+ *
+ * In order to do so, we track keys created in the slave side with an expire
+ * set, and call the expireSlaveKeys() function from time to time in order to
+ * reclaim the keys if they already expired.
+ *
+ * Note that the use case we are trying to cover here, is a popular one where
+ * slaves are put in writable mode in order to compute slow operations in
+ * the slave side that are mostly useful to actually read data in a more
+ * processed way. Think at sets intersections in a tmp key, with an expire so
+ * that it is also used as a cache to avoid intersecting every time.
+ *
+ * This implementation is currently not perfect but a lot better than leaking
+ * the keys as implemented in 3.2.
+ *----------------------------------------------------------------------------*/
+
+/* The dictionary where we remember key names and database ID of keys we may
+ * want to expire from the slave. Since this function is not often used we
+ * don't even care to initialize the database at startup. We'll do it once
+ * the feature is used the first time, that is, when rememberSlaveKeyWithExpire()
+ * is called.
+ *
+ * The dictionary has an SDS string representing the key as the hash table
+ * key, while the value is a 64 bit unsigned integer with the bits corresponding
+ * to the DB where the keys may exist set to 1. Currently the keys created
+ * with a DB id > 63 are not expired, but a trivial fix is to set the bitmap
+ * to the max 64 bit unsigned value when we know there is a key with a DB
+ * ID greater than 63, and check all the configured DBs in such a case. */
+dict *slaveKeysWithExpire = NULL;
+
+/* Check the set of keys created by the master with an expire set in order to
+ * check if they should be evicted. */
+void expireSlaveKeys(void) {
+ if (slaveKeysWithExpire == NULL ||
+ dictSize(slaveKeysWithExpire) == 0) return;
+
+ int cycles = 0, noexpire = 0;
+ mstime_t start = mstime();
+ while(1) {
+ dictEntry *de = dictGetRandomKey(slaveKeysWithExpire);
+ sds keyname = dictGetKey(de);
+ uint64_t dbids = dictGetUnsignedIntegerVal(de);
+ uint64_t new_dbids = 0;
+
+ /* Check the key against every database corresponding to the
+ * bits set in the value bitmap. */
+ int dbid = 0;
+ while(dbids && dbid < server.dbnum) {
+ if ((dbids & 1) != 0) {
+ redisDb *db = server.db+dbid;
+ dictEntry *expire = dictFind(db->expires,keyname);
+ int expired = 0;
+
+ if (expire &&
+ activeExpireCycleTryExpire(server.db+dbid,expire,start))
+ {
+ expired = 1;
+ /* Propagate the DEL (writable replicas do not propagate anything to other replicas,
+ * but they might propagate to AOF) and trigger module hooks. */
+ postExecutionUnitOperations();
+ }
+
+ /* If the key was not expired in this DB, we need to set the
+ * corresponding bit in the new bitmap we set as value.
+ * At the end of the loop if the bitmap is zero, it means we
+ * no longer need to keep track of this key. */
+ if (expire && !expired) {
+ noexpire++;
+ new_dbids |= (uint64_t)1 << dbid;
+ }
+ }
+ dbid++;
+ dbids >>= 1;
+ }
+
+ /* Set the new bitmap as value of the key, in the dictionary
+ * of keys with an expire set directly in the writable slave. Otherwise
+ * if the bitmap is zero, we no longer need to keep track of it. */
+ if (new_dbids)
+ dictSetUnsignedIntegerVal(de,new_dbids);
+ else
+ dictDelete(slaveKeysWithExpire,keyname);
+
+ /* Stop conditions: found 3 keys we can't expire in a row or
+ * time limit was reached. */
+ cycles++;
+ if (noexpire > 3) break;
+ if ((cycles % 64) == 0 && mstime()-start > 1) break;
+ if (dictSize(slaveKeysWithExpire) == 0) break;
+ }
+}
+
+/* Track keys that received an EXPIRE or similar command in the context
+ * of a writable slave. */
+void rememberSlaveKeyWithExpire(redisDb *db, robj *key) {
+ if (slaveKeysWithExpire == NULL) {
+ static dictType dt = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+ };
+ slaveKeysWithExpire = dictCreate(&dt);
+ }
+ if (db->id > 63) return;
+
+ dictEntry *de = dictAddOrFind(slaveKeysWithExpire,key->ptr);
+ /* If the entry was just created, set it to a copy of the SDS string
+ * representing the key: we don't want to need to take those keys
+ * in sync with the main DB. The keys will be removed by expireSlaveKeys()
+ * as it scans to find keys to remove. */
+ if (dictGetKey(de) == key->ptr) {
+ dictSetKey(slaveKeysWithExpire, de, sdsdup(key->ptr));
+ dictSetUnsignedIntegerVal(de,0);
+ }
+
+ uint64_t dbids = dictGetUnsignedIntegerVal(de);
+ dbids |= (uint64_t)1 << db->id;
+ dictSetUnsignedIntegerVal(de,dbids);
+}
+
+/* Return the number of keys we are tracking. */
+size_t getSlaveKeyWithExpireCount(void) {
+ if (slaveKeysWithExpire == NULL) return 0;
+ return dictSize(slaveKeysWithExpire);
+}
+
+/* Remove the keys in the hash table. We need to do that when data is
+ * flushed from the server. We may receive new keys from the master with
+ * the same name/db and it is no longer a good idea to expire them.
+ *
+ * Note: technically we should handle the case of a single DB being flushed
+ * but it is not worth it since anyway race conditions using the same set
+ * of key names in a writable slave and in its master will lead to
+ * inconsistencies. This is just a best-effort thing we do. */
+void flushSlaveKeysWithExpireList(void) {
+ if (slaveKeysWithExpire) {
+ dictRelease(slaveKeysWithExpire);
+ slaveKeysWithExpire = NULL;
+ }
+}
+
+int checkAlreadyExpired(long long when) {
+ /* EXPIRE with negative TTL, or EXPIREAT with a timestamp into the past
+ * should never be executed as a DEL when load the AOF or in the context
+ * of a slave instance.
+ *
+ * Instead we add the already expired key to the database with expire time
+ * (possibly in the past) and wait for an explicit DEL from the master. */
+ return (when <= commandTimeSnapshot() && !server.loading && !server.masterhost);
+}
+
+#define EXPIRE_NX (1<<0)
+#define EXPIRE_XX (1<<1)
+#define EXPIRE_GT (1<<2)
+#define EXPIRE_LT (1<<3)
+
+/* Parse additional flags of expire commands
+ *
+ * Supported flags:
+ * - NX: set expiry only when the key has no expiry
+ * - XX: set expiry only when the key has an existing expiry
+ * - GT: set expiry only when the new expiry is greater than current one
+ * - LT: set expiry only when the new expiry is less than current one */
+int parseExtendedExpireArgumentsOrReply(client *c, int *flags) {
+ int nx = 0, xx = 0, gt = 0, lt = 0;
+
+ int j = 3;
+ while (j < c->argc) {
+ char *opt = c->argv[j]->ptr;
+ if (!strcasecmp(opt,"nx")) {
+ *flags |= EXPIRE_NX;
+ nx = 1;
+ } else if (!strcasecmp(opt,"xx")) {
+ *flags |= EXPIRE_XX;
+ xx = 1;
+ } else if (!strcasecmp(opt,"gt")) {
+ *flags |= EXPIRE_GT;
+ gt = 1;
+ } else if (!strcasecmp(opt,"lt")) {
+ *flags |= EXPIRE_LT;
+ lt = 1;
+ } else {
+ addReplyErrorFormat(c, "Unsupported option %s", opt);
+ return C_ERR;
+ }
+ j++;
+ }
+
+ if ((nx && xx) || (nx && gt) || (nx && lt)) {
+ addReplyError(c, "NX and XX, GT or LT options at the same time are not compatible");
+ return C_ERR;
+ }
+
+ if (gt && lt) {
+ addReplyError(c, "GT and LT options at the same time are not compatible");
+ return C_ERR;
+ }
+
+ return C_OK;
+}
+
+/*-----------------------------------------------------------------------------
+ * Expires Commands
+ *----------------------------------------------------------------------------*/
+
+/* This is the generic command implementation for EXPIRE, PEXPIRE, EXPIREAT
+ * and PEXPIREAT. Because the command second argument may be relative or absolute
+ * the "basetime" argument is used to signal what the base time is (either 0
+ * for *AT variants of the command, or the current time for relative expires).
+ *
+ * unit is either UNIT_SECONDS or UNIT_MILLISECONDS, and is only used for
+ * the argv[2] parameter. The basetime is always specified in milliseconds.
+ *
+ * Additional flags are supported and parsed via parseExtendedExpireArguments */
+void expireGenericCommand(client *c, long long basetime, int unit) {
+ robj *key = c->argv[1], *param = c->argv[2];
+ long long when; /* unix time in milliseconds when the key will expire. */
+ long long current_expire = -1;
+ int flag = 0;
+
+ /* checking optional flags */
+ if (parseExtendedExpireArgumentsOrReply(c, &flag) != C_OK) {
+ return;
+ }
+
+ if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK)
+ return;
+
+ /* EXPIRE allows negative numbers, but we can at least detect an
+ * overflow by either unit conversion or basetime addition. */
+ if (unit == UNIT_SECONDS) {
+ if (when > LLONG_MAX / 1000 || when < LLONG_MIN / 1000) {
+ addReplyErrorExpireTime(c);
+ return;
+ }
+ when *= 1000;
+ }
+
+ if (when > LLONG_MAX - basetime) {
+ addReplyErrorExpireTime(c);
+ return;
+ }
+ when += basetime;
+
+ /* No key, return zero. */
+ if (lookupKeyWrite(c->db,key) == NULL) {
+ addReply(c,shared.czero);
+ return;
+ }
+
+ if (flag) {
+ current_expire = getExpire(c->db, key);
+
+ /* NX option is set, check current expiry */
+ if (flag & EXPIRE_NX) {
+ if (current_expire != -1) {
+ addReply(c,shared.czero);
+ return;
+ }
+ }
+
+ /* XX option is set, check current expiry */
+ if (flag & EXPIRE_XX) {
+ if (current_expire == -1) {
+ /* reply 0 when the key has no expiry */
+ addReply(c,shared.czero);
+ return;
+ }
+ }
+
+ /* GT option is set, check current expiry */
+ if (flag & EXPIRE_GT) {
+ /* When current_expire is -1, we consider it as infinite TTL,
+ * so expire command with gt always fail the GT. */
+ if (when <= current_expire || current_expire == -1) {
+ /* reply 0 when the new expiry is not greater than current */
+ addReply(c,shared.czero);
+ return;
+ }
+ }
+
+ /* LT option is set, check current expiry */
+ if (flag & EXPIRE_LT) {
+ /* When current_expire -1, we consider it as infinite TTL,
+ * but 'when' can still be negative at this point, so if there is
+ * an expiry on the key and it's not less than current, we fail the LT. */
+ if (current_expire != -1 && when >= current_expire) {
+ /* reply 0 when the new expiry is not less than current */
+ addReply(c,shared.czero);
+ return;
+ }
+ }
+ }
+
+ if (checkAlreadyExpired(when)) {
+ robj *aux;
+
+ int deleted = dbGenericDelete(c->db,key,server.lazyfree_lazy_expire,DB_FLAG_KEY_EXPIRED);
+ serverAssertWithInfo(c,key,deleted);
+ server.dirty++;
+
+ /* Replicate/AOF this as an explicit DEL or UNLINK. */
+ aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
+ rewriteClientCommandVector(c,2,aux,key);
+ signalModifiedKey(c,c->db,key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
+ addReply(c, shared.cone);
+ return;
+ } else {
+ setExpire(c,c->db,key,when);
+ addReply(c,shared.cone);
+ /* Propagate as PEXPIREAT millisecond-timestamp
+ * Only rewrite the command arg if not already PEXPIREAT */
+ if (c->cmd->proc != pexpireatCommand) {
+ rewriteClientCommandArgument(c,0,shared.pexpireat);
+ }
+
+ /* Avoid creating a string object when it's the same as argv[2] parameter */
+ if (basetime != 0 || unit == UNIT_SECONDS) {
+ robj *when_obj = createStringObjectFromLongLong(when);
+ rewriteClientCommandArgument(c,2,when_obj);
+ decrRefCount(when_obj);
+ }
+
+ signalModifiedKey(c,c->db,key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id);
+ server.dirty++;
+ return;
+ }
+}
+
+/* EXPIRE key seconds [ NX | XX | GT | LT] */
+void expireCommand(client *c) {
+ expireGenericCommand(c,commandTimeSnapshot(),UNIT_SECONDS);
+}
+
+/* EXPIREAT key unix-time-seconds [ NX | XX | GT | LT] */
+void expireatCommand(client *c) {
+ expireGenericCommand(c,0,UNIT_SECONDS);
+}
+
+/* PEXPIRE key milliseconds [ NX | XX | GT | LT] */
+void pexpireCommand(client *c) {
+ expireGenericCommand(c,commandTimeSnapshot(),UNIT_MILLISECONDS);
+}
+
+/* PEXPIREAT key unix-time-milliseconds [ NX | XX | GT | LT] */
+void pexpireatCommand(client *c) {
+ expireGenericCommand(c,0,UNIT_MILLISECONDS);
+}
+
+/* Implements TTL, PTTL, EXPIRETIME and PEXPIRETIME */
+void ttlGenericCommand(client *c, int output_ms, int output_abs) {
+ long long expire, ttl = -1;
+
+ /* If the key does not exist at all, return -2 */
+ if (lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH) == NULL) {
+ addReplyLongLong(c,-2);
+ return;
+ }
+
+ /* The key exists. Return -1 if it has no expire, or the actual
+ * TTL value otherwise. */
+ expire = getExpire(c->db,c->argv[1]);
+ if (expire != -1) {
+ ttl = output_abs ? expire : expire-commandTimeSnapshot();
+ if (ttl < 0) ttl = 0;
+ }
+ if (ttl == -1) {
+ addReplyLongLong(c,-1);
+ } else {
+ addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000));
+ }
+}
+
+/* TTL key */
+void ttlCommand(client *c) {
+ ttlGenericCommand(c, 0, 0);
+}
+
+/* PTTL key */
+void pttlCommand(client *c) {
+ ttlGenericCommand(c, 1, 0);
+}
+
+/* EXPIRETIME key */
+void expiretimeCommand(client *c) {
+ ttlGenericCommand(c, 0, 1);
+}
+
+/* PEXPIRETIME key */
+void pexpiretimeCommand(client *c) {
+ ttlGenericCommand(c, 1, 1);
+}
+
+/* PERSIST key */
+void persistCommand(client *c) {
+ if (lookupKeyWrite(c->db,c->argv[1])) {
+ if (removeExpire(c->db,c->argv[1])) {
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"persist",c->argv[1],c->db->id);
+ addReply(c,shared.cone);
+ server.dirty++;
+ } else {
+ addReply(c,shared.czero);
+ }
+ } else {
+ addReply(c,shared.czero);
+ }
+}
+
+/* TOUCH key1 [key2 key3 ... keyN] */
+void touchCommand(client *c) {
+ int touched = 0;
+ for (int j = 1; j < c->argc; j++)
+ if (lookupKeyRead(c->db,c->argv[j]) != NULL) touched++;
+ addReplyLongLong(c,touched);
+}
diff --git a/src/fmacros.h b/src/fmacros.h
new file mode 100644
index 0000000..c5da4b7
--- /dev/null
+++ b/src/fmacros.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _REDIS_FMACRO_H
+#define _REDIS_FMACRO_H
+
+#define _BSD_SOURCE
+
+#if defined(__linux__)
+#define _GNU_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
+#if defined(_AIX)
+#define _ALL_SOURCE
+#endif
+
+#if defined(__linux__) || defined(__OpenBSD__)
+#define _XOPEN_SOURCE 700
+/*
+ * On NetBSD, _XOPEN_SOURCE undefines _NETBSD_SOURCE and
+ * thus hides inet_aton etc.
+ */
+#elif !defined(__NetBSD__)
+#define _XOPEN_SOURCE
+#endif
+
+#if defined(__sun)
+#define _POSIX_C_SOURCE 199506L
+#endif
+
+#define _LARGEFILE_SOURCE
+#define _FILE_OFFSET_BITS 64
+
+/* deprecate unsafe functions
+ *
+ * NOTE: We do not use the poison pragma since it
+ * will error on stdlib definitions in files as well*/
+#if (__GNUC__ && __GNUC__ >= 4) && !defined __APPLE__
+int sprintf(char *str, const char *format, ...) __attribute__((deprecated("please avoid use of unsafe C functions. prefer use of snprintf instead")));
+char *strcpy(char *restrict dest, const char *src) __attribute__((deprecated("please avoid use of unsafe C functions. prefer use of redis_strlcpy instead")));
+char *strcat(char *restrict dest, const char *restrict src) __attribute__((deprecated("please avoid use of unsafe C functions. prefer use of redis_strlcat instead")));
+#endif
+
+#ifdef __linux__
+/* features.h uses the defines above to set feature specific defines. */
+#include <features.h>
+#endif
+
+#endif
diff --git a/src/function_lua.c b/src/function_lua.c
new file mode 100644
index 0000000..be79dc1
--- /dev/null
+++ b/src/function_lua.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright (c) 2021, Redis Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * function_lua.c unit provides the Lua engine functionality.
+ * Including registering the engine and implementing the engine
+ * callbacks:
+ * * Create a function from blob (usually text)
+ * * Invoke a function
+ * * Free function memory
+ * * Get memory usage
+ *
+ * Uses script_lua.c to run the Lua code.
+ */
+
+#include "functions.h"
+#include "script_lua.h"
+#include <lua.h>
+#include <lauxlib.h>
+#include <lualib.h>
+
+#define LUA_ENGINE_NAME "LUA"
+#define REGISTRY_ENGINE_CTX_NAME "__ENGINE_CTX__"
+#define REGISTRY_ERROR_HANDLER_NAME "__ERROR_HANDLER__"
+#define REGISTRY_LOAD_CTX_NAME "__LIBRARY_CTX__"
+#define LIBRARY_API_NAME "__LIBRARY_API__"
+#define GLOBALS_API_NAME "__GLOBALS_API__"
+
+/* Lua engine ctx */
+typedef struct luaEngineCtx {
+ lua_State *lua;
+} luaEngineCtx;
+
+/* Lua function ctx */
+typedef struct luaFunctionCtx {
+ /* Special ID that allows getting the Lua function object from the Lua registry */
+ int lua_function_ref;
+} luaFunctionCtx;
+
+typedef struct loadCtx {
+ functionLibInfo *li;
+ monotime start_time;
+ size_t timeout;
+} loadCtx;
+
+typedef struct registerFunctionArgs {
+ sds name;
+ sds desc;
+ luaFunctionCtx *lua_f_ctx;
+ uint64_t f_flags;
+} registerFunctionArgs;
+
+/* Hook for FUNCTION LOAD execution.
+ * Used to cancel the execution in case of a timeout (500ms).
+ * This execution should be fast and should only register
+ * functions so 500ms should be more than enough. */
+static void luaEngineLoadHook(lua_State *lua, lua_Debug *ar) {
+ UNUSED(ar);
+ loadCtx *load_ctx = luaGetFromRegistry(lua, REGISTRY_LOAD_CTX_NAME);
+ serverAssert(load_ctx); /* Only supported inside script invocation */
+ uint64_t duration = elapsedMs(load_ctx->start_time);
+ if (load_ctx->timeout > 0 && duration > load_ctx->timeout) {
+ lua_sethook(lua, luaEngineLoadHook, LUA_MASKLINE, 0);
+
+ luaPushError(lua,"FUNCTION LOAD timeout");
+ luaError(lua);
+ }
+}
+
+/*
+ * Compile a given blob and save it on the registry.
+ * Return a function ctx with Lua ref that allows to later retrieve the
+ * function from the registry.
+ *
+ * Return NULL on compilation error and set the error to the err variable
+ */
+static int luaEngineCreate(void *engine_ctx, functionLibInfo *li, sds blob, size_t timeout, sds *err) {
+ int ret = C_ERR;
+ luaEngineCtx *lua_engine_ctx = engine_ctx;
+ lua_State *lua = lua_engine_ctx->lua;
+
+ /* set load library globals */
+ lua_getmetatable(lua, LUA_GLOBALSINDEX);
+ lua_enablereadonlytable(lua, -1, 0); /* disable global protection */
+ lua_getfield(lua, LUA_REGISTRYINDEX, LIBRARY_API_NAME);
+ lua_setfield(lua, -2, "__index");
+ lua_enablereadonlytable(lua, LUA_GLOBALSINDEX, 1); /* enable global protection */
+ lua_pop(lua, 1); /* pop the metatable */
+
+ /* compile the code */
+ if (luaL_loadbuffer(lua, blob, sdslen(blob), "@user_function")) {
+ *err = sdscatprintf(sdsempty(), "Error compiling function: %s", lua_tostring(lua, -1));
+ lua_pop(lua, 1); /* pops the error */
+ goto done;
+ }
+ serverAssert(lua_isfunction(lua, -1));
+
+ loadCtx load_ctx = {
+ .li = li,
+ .start_time = getMonotonicUs(),
+ .timeout = timeout,
+ };
+ luaSaveOnRegistry(lua, REGISTRY_LOAD_CTX_NAME, &load_ctx);
+
+ lua_sethook(lua,luaEngineLoadHook,LUA_MASKCOUNT,100000);
+ /* Run the compiled code to allow it to register functions */
+ if (lua_pcall(lua,0,0,0)) {
+ errorInfo err_info = {0};
+ luaExtractErrorInformation(lua, &err_info);
+ *err = sdscatprintf(sdsempty(), "Error registering functions: %s", err_info.msg);
+ lua_pop(lua, 1); /* pops the error */
+ luaErrorInformationDiscard(&err_info);
+ goto done;
+ }
+
+ ret = C_OK;
+
+done:
+ /* restore original globals */
+ lua_getmetatable(lua, LUA_GLOBALSINDEX);
+ lua_enablereadonlytable(lua, -1, 0); /* disable global protection */
+ lua_getfield(lua, LUA_REGISTRYINDEX, GLOBALS_API_NAME);
+ lua_setfield(lua, -2, "__index");
+ lua_enablereadonlytable(lua, LUA_GLOBALSINDEX, 1); /* enable global protection */
+ lua_pop(lua, 1); /* pop the metatable */
+
+ lua_sethook(lua,NULL,0,0); /* Disable hook */
+ luaSaveOnRegistry(lua, REGISTRY_LOAD_CTX_NAME, NULL);
+ return ret;
+}
+
+/*
+ * Invole the give function with the given keys and args
+ */
+static void luaEngineCall(scriptRunCtx *run_ctx,
+ void *engine_ctx,
+ void *compiled_function,
+ robj **keys,
+ size_t nkeys,
+ robj **args,
+ size_t nargs)
+{
+ luaEngineCtx *lua_engine_ctx = engine_ctx;
+ lua_State *lua = lua_engine_ctx->lua;
+ luaFunctionCtx *f_ctx = compiled_function;
+
+ /* Push error handler */
+ lua_pushstring(lua, REGISTRY_ERROR_HANDLER_NAME);
+ lua_gettable(lua, LUA_REGISTRYINDEX);
+
+ lua_rawgeti(lua, LUA_REGISTRYINDEX, f_ctx->lua_function_ref);
+
+ serverAssert(lua_isfunction(lua, -1));
+
+ luaCallFunction(run_ctx, lua, keys, nkeys, args, nargs, 0);
+ lua_pop(lua, 1); /* Pop error handler */
+}
+
+static size_t luaEngineGetUsedMemoy(void *engine_ctx) {
+ luaEngineCtx *lua_engine_ctx = engine_ctx;
+ return luaMemory(lua_engine_ctx->lua);
+}
+
+static size_t luaEngineFunctionMemoryOverhead(void *compiled_function) {
+ return zmalloc_size(compiled_function);
+}
+
+static size_t luaEngineMemoryOverhead(void *engine_ctx) {
+ luaEngineCtx *lua_engine_ctx = engine_ctx;
+ return zmalloc_size(lua_engine_ctx);
+}
+
+static void luaEngineFreeFunction(void *engine_ctx, void *compiled_function) {
+ luaEngineCtx *lua_engine_ctx = engine_ctx;
+ lua_State *lua = lua_engine_ctx->lua;
+ luaFunctionCtx *f_ctx = compiled_function;
+ lua_unref(lua, f_ctx->lua_function_ref);
+ zfree(f_ctx);
+}
+
+static void luaRegisterFunctionArgsInitialize(registerFunctionArgs *register_f_args,
+ sds name,
+ sds desc,
+ luaFunctionCtx *lua_f_ctx,
+ uint64_t flags)
+{
+ *register_f_args = (registerFunctionArgs){
+ .name = name,
+ .desc = desc,
+ .lua_f_ctx = lua_f_ctx,
+ .f_flags = flags,
+ };
+}
+
+static void luaRegisterFunctionArgsDispose(lua_State *lua, registerFunctionArgs *register_f_args) {
+ sdsfree(register_f_args->name);
+ if (register_f_args->desc) sdsfree(register_f_args->desc);
+ lua_unref(lua, register_f_args->lua_f_ctx->lua_function_ref);
+ zfree(register_f_args->lua_f_ctx);
+}
+
+/* Read function flags located on the top of the Lua stack.
+ * On success, return C_OK and set the flags to 'flags' out parameter
+ * Return C_ERR if encounter an unknown flag. */
+static int luaRegisterFunctionReadFlags(lua_State *lua, uint64_t *flags) {
+ int j = 1;
+ int ret = C_ERR;
+ int f_flags = 0;
+ while(1) {
+ lua_pushnumber(lua,j++);
+ lua_gettable(lua,-2);
+ int t = lua_type(lua,-1);
+ if (t == LUA_TNIL) {
+ lua_pop(lua,1);
+ break;
+ }
+ if (!lua_isstring(lua, -1)) {
+ lua_pop(lua,1);
+ goto done;
+ }
+
+ const char *flag_str = lua_tostring(lua, -1);
+ int found = 0;
+ for (scriptFlag *flag = scripts_flags_def; flag->str ; ++flag) {
+ if (!strcasecmp(flag->str, flag_str)) {
+ f_flags |= flag->flag;
+ found = 1;
+ break;
+ }
+ }
+ /* pops the value to continue the iteration */
+ lua_pop(lua,1);
+ if (!found) {
+ /* flag not found */
+ goto done;
+ }
+ }
+
+ *flags = f_flags;
+ ret = C_OK;
+
+done:
+ return ret;
+}
+
+static int luaRegisterFunctionReadNamedArgs(lua_State *lua, registerFunctionArgs *register_f_args) {
+ char *err = NULL;
+ sds name = NULL;
+ sds desc = NULL;
+ luaFunctionCtx *lua_f_ctx = NULL;
+ uint64_t flags = 0;
+ if (!lua_istable(lua, 1)) {
+ err = "calling redis.register_function with a single argument is only applicable to Lua table (representing named arguments).";
+ goto error;
+ }
+
+ /* Iterating on all the named arguments */
+ lua_pushnil(lua);
+ while (lua_next(lua, -2)) {
+ /* Stack now: table, key, value */
+ if (!lua_isstring(lua, -2)) {
+ err = "named argument key given to redis.register_function is not a string";
+ goto error;
+ }
+ const char *key = lua_tostring(lua, -2);
+ if (!strcasecmp(key, "function_name")) {
+ if (!(name = luaGetStringSds(lua, -1))) {
+ err = "function_name argument given to redis.register_function must be a string";
+ goto error;
+ }
+ } else if (!strcasecmp(key, "description")) {
+ if (!(desc = luaGetStringSds(lua, -1))) {
+ err = "description argument given to redis.register_function must be a string";
+ goto error;
+ }
+ } else if (!strcasecmp(key, "callback")) {
+ if (!lua_isfunction(lua, -1)) {
+ err = "callback argument given to redis.register_function must be a function";
+ goto error;
+ }
+ int lua_function_ref = luaL_ref(lua, LUA_REGISTRYINDEX);
+
+ lua_f_ctx = zmalloc(sizeof(*lua_f_ctx));
+ lua_f_ctx->lua_function_ref = lua_function_ref;
+ continue; /* value was already popped, so no need to pop it out. */
+ } else if (!strcasecmp(key, "flags")) {
+ if (!lua_istable(lua, -1)) {
+ err = "flags argument to redis.register_function must be a table representing function flags";
+ goto error;
+ }
+ if (luaRegisterFunctionReadFlags(lua, &flags) != C_OK) {
+ err = "unknown flag given";
+ goto error;
+ }
+ } else {
+ /* unknown argument was given, raise an error */
+ err = "unknown argument given to redis.register_function";
+ goto error;
+ }
+ lua_pop(lua, 1); /* pop the value to continue the iteration */
+ }
+
+ if (!name) {
+ err = "redis.register_function must get a function name argument";
+ goto error;
+ }
+
+ if (!lua_f_ctx) {
+ err = "redis.register_function must get a callback argument";
+ goto error;
+ }
+
+ luaRegisterFunctionArgsInitialize(register_f_args, name, desc, lua_f_ctx, flags);
+
+ return C_OK;
+
+error:
+ if (name) sdsfree(name);
+ if (desc) sdsfree(desc);
+ if (lua_f_ctx) {
+ lua_unref(lua, lua_f_ctx->lua_function_ref);
+ zfree(lua_f_ctx);
+ }
+ luaPushError(lua, err);
+ return C_ERR;
+}
+
+static int luaRegisterFunctionReadPositionalArgs(lua_State *lua, registerFunctionArgs *register_f_args) {
+ char *err = NULL;
+ sds name = NULL;
+ sds desc = NULL;
+ luaFunctionCtx *lua_f_ctx = NULL;
+ if (!(name = luaGetStringSds(lua, 1))) {
+ err = "first argument to redis.register_function must be a string";
+ goto error;
+ }
+
+ if (!lua_isfunction(lua, 2)) {
+ err = "second argument to redis.register_function must be a function";
+ goto error;
+ }
+
+ int lua_function_ref = luaL_ref(lua, LUA_REGISTRYINDEX);
+
+ lua_f_ctx = zmalloc(sizeof(*lua_f_ctx));
+ lua_f_ctx->lua_function_ref = lua_function_ref;
+
+ luaRegisterFunctionArgsInitialize(register_f_args, name, NULL, lua_f_ctx, 0);
+
+ return C_OK;
+
+error:
+ if (name) sdsfree(name);
+ if (desc) sdsfree(desc);
+ luaPushError(lua, err);
+ return C_ERR;
+}
+
+static int luaRegisterFunctionReadArgs(lua_State *lua, registerFunctionArgs *register_f_args) {
+ int argc = lua_gettop(lua);
+ if (argc < 1 || argc > 2) {
+ luaPushError(lua, "wrong number of arguments to redis.register_function");
+ return C_ERR;
+ }
+
+ if (argc == 1) {
+ return luaRegisterFunctionReadNamedArgs(lua, register_f_args);
+ } else {
+ return luaRegisterFunctionReadPositionalArgs(lua, register_f_args);
+ }
+}
+
+static int luaRegisterFunction(lua_State *lua) {
+ registerFunctionArgs register_f_args = {0};
+
+ loadCtx *load_ctx = luaGetFromRegistry(lua, REGISTRY_LOAD_CTX_NAME);
+ if (!load_ctx) {
+ luaPushError(lua, "redis.register_function can only be called on FUNCTION LOAD command");
+ return luaError(lua);
+ }
+
+ if (luaRegisterFunctionReadArgs(lua, &register_f_args) != C_OK) {
+ return luaError(lua);
+ }
+
+ sds err = NULL;
+ if (functionLibCreateFunction(register_f_args.name, register_f_args.lua_f_ctx, load_ctx->li, register_f_args.desc, register_f_args.f_flags, &err) != C_OK) {
+ luaRegisterFunctionArgsDispose(lua, &register_f_args);
+ luaPushError(lua, err);
+ sdsfree(err);
+ return luaError(lua);
+ }
+
+ return 0;
+}
+
+/* Initialize Lua engine, should be called once on start. */
+int luaEngineInitEngine(void) {
+ luaEngineCtx *lua_engine_ctx = zmalloc(sizeof(*lua_engine_ctx));
+ lua_engine_ctx->lua = lua_open();
+
+ luaRegisterRedisAPI(lua_engine_ctx->lua);
+
+ /* Register the library commands table and fields and store it to registry */
+ lua_newtable(lua_engine_ctx->lua); /* load library globals */
+ lua_newtable(lua_engine_ctx->lua); /* load library `redis` table */
+
+ lua_pushstring(lua_engine_ctx->lua, "register_function");
+ lua_pushcfunction(lua_engine_ctx->lua, luaRegisterFunction);
+ lua_settable(lua_engine_ctx->lua, -3);
+
+ luaRegisterLogFunction(lua_engine_ctx->lua);
+ luaRegisterVersion(lua_engine_ctx->lua);
+
+ luaSetErrorMetatable(lua_engine_ctx->lua);
+ lua_setfield(lua_engine_ctx->lua, -2, REDIS_API_NAME);
+
+ luaSetErrorMetatable(lua_engine_ctx->lua);
+ luaSetTableProtectionRecursively(lua_engine_ctx->lua); /* protect load library globals */
+ lua_setfield(lua_engine_ctx->lua, LUA_REGISTRYINDEX, LIBRARY_API_NAME);
+
+ /* Save error handler to registry */
+ lua_pushstring(lua_engine_ctx->lua, REGISTRY_ERROR_HANDLER_NAME);
+ char *errh_func = "local dbg = debug\n"
+ "debug = nil\n"
+ "local error_handler = function (err)\n"
+ " local i = dbg.getinfo(2,'nSl')\n"
+ " if i and i.what == 'C' then\n"
+ " i = dbg.getinfo(3,'nSl')\n"
+ " end\n"
+ " if type(err) ~= 'table' then\n"
+ " err = {err='ERR ' .. tostring(err)}"
+ " end"
+ " if i then\n"
+ " err['source'] = i.source\n"
+ " err['line'] = i.currentline\n"
+ " end"
+ " return err\n"
+ "end\n"
+ "return error_handler";
+ luaL_loadbuffer(lua_engine_ctx->lua, errh_func, strlen(errh_func), "@err_handler_def");
+ lua_pcall(lua_engine_ctx->lua,0,1,0);
+ lua_settable(lua_engine_ctx->lua, LUA_REGISTRYINDEX);
+
+ lua_pushvalue(lua_engine_ctx->lua, LUA_GLOBALSINDEX);
+ luaSetErrorMetatable(lua_engine_ctx->lua);
+ luaSetTableProtectionRecursively(lua_engine_ctx->lua); /* protect globals */
+ lua_pop(lua_engine_ctx->lua, 1);
+
+ /* Save default globals to registry */
+ lua_pushvalue(lua_engine_ctx->lua, LUA_GLOBALSINDEX);
+ lua_setfield(lua_engine_ctx->lua, LUA_REGISTRYINDEX, GLOBALS_API_NAME);
+
+ /* save the engine_ctx on the registry so we can get it from the Lua interpreter */
+ luaSaveOnRegistry(lua_engine_ctx->lua, REGISTRY_ENGINE_CTX_NAME, lua_engine_ctx);
+
+ /* Create new empty table to be the new globals, we will be able to control the real globals
+ * using metatable */
+ lua_newtable(lua_engine_ctx->lua); /* new globals */
+ lua_newtable(lua_engine_ctx->lua); /* new globals metatable */
+ lua_pushvalue(lua_engine_ctx->lua, LUA_GLOBALSINDEX);
+ lua_setfield(lua_engine_ctx->lua, -2, "__index");
+ lua_enablereadonlytable(lua_engine_ctx->lua, -1, 1); /* protect the metatable */
+ lua_setmetatable(lua_engine_ctx->lua, -2);
+ lua_enablereadonlytable(lua_engine_ctx->lua, -1, 1); /* protect the new global table */
+ lua_replace(lua_engine_ctx->lua, LUA_GLOBALSINDEX); /* set new global table as the new globals */
+
+
+ engine *lua_engine = zmalloc(sizeof(*lua_engine));
+ *lua_engine = (engine) {
+ .engine_ctx = lua_engine_ctx,
+ .create = luaEngineCreate,
+ .call = luaEngineCall,
+ .get_used_memory = luaEngineGetUsedMemoy,
+ .get_function_memory_overhead = luaEngineFunctionMemoryOverhead,
+ .get_engine_memory_overhead = luaEngineMemoryOverhead,
+ .free_function = luaEngineFreeFunction,
+ };
+ return functionsRegisterEngine(LUA_ENGINE_NAME, lua_engine);
+}
diff --git a/src/functions.c b/src/functions.c
new file mode 100644
index 0000000..c858db9
--- /dev/null
+++ b/src/functions.c
@@ -0,0 +1,1139 @@
+/*
+ * Copyright (c) 2021, Redis Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "functions.h"
+#include "sds.h"
+#include "dict.h"
+#include "adlist.h"
+#include "atomicvar.h"
+
+#define LOAD_TIMEOUT_MS 500
+
+typedef enum {
+ restorePolicy_Flush, restorePolicy_Append, restorePolicy_Replace
+} restorePolicy;
+
+static size_t engine_cache_memory = 0;
+
+/* Forward declaration */
+static void engineFunctionDispose(dict *d, void *obj);
+static void engineStatsDispose(dict *d, void *obj);
+static void engineLibraryDispose(dict *d, void *obj);
+static int functionsVerifyName(sds name);
+
+typedef struct functionsLibEngineStats {
+ size_t n_lib;
+ size_t n_functions;
+} functionsLibEngineStats;
+
+struct functionsLibCtx {
+ dict *libraries; /* Library name -> Library object */
+ dict *functions; /* Function name -> Function object that can be used to run the function */
+ size_t cache_memory; /* Overhead memory (structs, dictionaries, ..) used by all the functions */
+ dict *engines_stats; /* Per engine statistics */
+};
+
+typedef struct functionsLibMataData {
+ sds engine;
+ sds name;
+ sds code;
+} functionsLibMataData;
+
+dictType engineDictType = {
+ dictSdsCaseHash, /* hash function */
+ dictSdsDup, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+dictType functionDictType = {
+ dictSdsCaseHash, /* hash function */
+ dictSdsDup, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare,/* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+dictType engineStatsDictType = {
+ dictSdsCaseHash, /* hash function */
+ dictSdsDup, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare,/* key compare */
+ dictSdsDestructor, /* key destructor */
+ engineStatsDispose, /* val destructor */
+ NULL /* allow to expand */
+};
+
+dictType libraryFunctionDictType = {
+ dictSdsHash, /* hash function */
+ dictSdsDup, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ engineFunctionDispose,/* val destructor */
+ NULL /* allow to expand */
+};
+
+dictType librariesDictType = {
+ dictSdsHash, /* hash function */
+ dictSdsDup, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ engineLibraryDispose, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Dictionary of engines */
+static dict *engines = NULL;
+
+/* Libraries Ctx.
+ * Contains the dictionary that map a library name to library object,
+ * Contains the dictionary that map a function name to function object,
+ * and the cache memory used by all the functions */
+static functionsLibCtx *curr_functions_lib_ctx = NULL;
+
+static size_t functionMallocSize(functionInfo *fi) {
+ return zmalloc_size(fi) + sdsZmallocSize(fi->name)
+ + (fi->desc ? sdsZmallocSize(fi->desc) : 0)
+ + fi->li->ei->engine->get_function_memory_overhead(fi->function);
+}
+
+static size_t libraryMallocSize(functionLibInfo *li) {
+ return zmalloc_size(li) + sdsZmallocSize(li->name)
+ + sdsZmallocSize(li->code);
+}
+
+static void engineStatsDispose(dict *d, void *obj) {
+ UNUSED(d);
+ functionsLibEngineStats *stats = obj;
+ zfree(stats);
+}
+
+/* Dispose function memory */
+static void engineFunctionDispose(dict *d, void *obj) {
+ UNUSED(d);
+ if (!obj) {
+ return;
+ }
+ functionInfo *fi = obj;
+ sdsfree(fi->name);
+ if (fi->desc) {
+ sdsfree(fi->desc);
+ }
+ engine *engine = fi->li->ei->engine;
+ engine->free_function(engine->engine_ctx, fi->function);
+ zfree(fi);
+}
+
+static void engineLibraryFree(functionLibInfo* li) {
+ if (!li) {
+ return;
+ }
+ dictRelease(li->functions);
+ sdsfree(li->name);
+ sdsfree(li->code);
+ zfree(li);
+}
+
+static void engineLibraryDispose(dict *d, void *obj) {
+ UNUSED(d);
+ engineLibraryFree(obj);
+}
+
+/* Clear all the functions from the given library ctx */
+void functionsLibCtxClear(functionsLibCtx *lib_ctx) {
+ dictEmpty(lib_ctx->functions, NULL);
+ dictEmpty(lib_ctx->libraries, NULL);
+ dictIterator *iter = dictGetIterator(lib_ctx->engines_stats);
+ dictEntry *entry = NULL;
+ while ((entry = dictNext(iter))) {
+ functionsLibEngineStats *stats = dictGetVal(entry);
+ stats->n_functions = 0;
+ stats->n_lib = 0;
+ }
+ dictReleaseIterator(iter);
+ curr_functions_lib_ctx->cache_memory = 0;
+}
+
+void functionsLibCtxClearCurrent(int async) {
+ if (async) {
+ functionsLibCtx *old_l_ctx = curr_functions_lib_ctx;
+ curr_functions_lib_ctx = functionsLibCtxCreate();
+ freeFunctionsAsync(old_l_ctx);
+ } else {
+ functionsLibCtxClear(curr_functions_lib_ctx);
+ }
+}
+
+/* Free the given functions ctx */
+void functionsLibCtxFree(functionsLibCtx *functions_lib_ctx) {
+ functionsLibCtxClear(functions_lib_ctx);
+ dictRelease(functions_lib_ctx->functions);
+ dictRelease(functions_lib_ctx->libraries);
+ dictRelease(functions_lib_ctx->engines_stats);
+ zfree(functions_lib_ctx);
+}
+
+/* Swap the current functions ctx with the given one.
+ * Free the old functions ctx. */
+void functionsLibCtxSwapWithCurrent(functionsLibCtx *new_lib_ctx) {
+ functionsLibCtxFree(curr_functions_lib_ctx);
+ curr_functions_lib_ctx = new_lib_ctx;
+}
+
+/* return the current functions ctx */
+functionsLibCtx* functionsLibCtxGetCurrent(void) {
+ return curr_functions_lib_ctx;
+}
+
+/* Create a new functions ctx */
+functionsLibCtx* functionsLibCtxCreate(void) {
+ functionsLibCtx *ret = zmalloc(sizeof(functionsLibCtx));
+ ret->libraries = dictCreate(&librariesDictType);
+ ret->functions = dictCreate(&functionDictType);
+ ret->engines_stats = dictCreate(&engineStatsDictType);
+ dictIterator *iter = dictGetIterator(engines);
+ dictEntry *entry = NULL;
+ while ((entry = dictNext(iter))) {
+ engineInfo *ei = dictGetVal(entry);
+ functionsLibEngineStats *stats = zcalloc(sizeof(*stats));
+ dictAdd(ret->engines_stats, ei->name, stats);
+ }
+ dictReleaseIterator(iter);
+ ret->cache_memory = 0;
+ return ret;
+}
+
+/*
+ * Creating a function inside the given library.
+ * On success, return C_OK.
+ * On error, return C_ERR and set err output parameter with a relevant error message.
+ *
+ * Note: the code assumes 'name' is NULL terminated but not require it to be binary safe.
+ * the function will verify that the given name is following the naming format
+ * and return an error if its not.
+ */
+int functionLibCreateFunction(sds name, void *function, functionLibInfo *li, sds desc, uint64_t f_flags, sds *err) {
+ if (functionsVerifyName(name) != C_OK) {
+ *err = sdsnew("Library names can only contain letters, numbers, or underscores(_) and must be at least one character long");
+ return C_ERR;
+ }
+
+ if (dictFetchValue(li->functions, name)) {
+ *err = sdsnew("Function already exists in the library");
+ return C_ERR;
+ }
+
+ functionInfo *fi = zmalloc(sizeof(*fi));
+ *fi = (functionInfo) {
+ .name = name,
+ .function = function,
+ .li = li,
+ .desc = desc,
+ .f_flags = f_flags,
+ };
+
+ int res = dictAdd(li->functions, fi->name, fi);
+ serverAssert(res == DICT_OK);
+
+ return C_OK;
+}
+
+static functionLibInfo* engineLibraryCreate(sds name, engineInfo *ei, sds code) {
+ functionLibInfo *li = zmalloc(sizeof(*li));
+ *li = (functionLibInfo) {
+ .name = sdsdup(name),
+ .functions = dictCreate(&libraryFunctionDictType),
+ .ei = ei,
+ .code = sdsdup(code),
+ };
+ return li;
+}
+
+static void libraryUnlink(functionsLibCtx *lib_ctx, functionLibInfo* li) {
+ dictIterator *iter = dictGetIterator(li->functions);
+ dictEntry *entry = NULL;
+ while ((entry = dictNext(iter))) {
+ functionInfo *fi = dictGetVal(entry);
+ int ret = dictDelete(lib_ctx->functions, fi->name);
+ serverAssert(ret == DICT_OK);
+ lib_ctx->cache_memory -= functionMallocSize(fi);
+ }
+ dictReleaseIterator(iter);
+ entry = dictUnlink(lib_ctx->libraries, li->name);
+ dictSetVal(lib_ctx->libraries, entry, NULL);
+ dictFreeUnlinkedEntry(lib_ctx->libraries, entry);
+ lib_ctx->cache_memory -= libraryMallocSize(li);
+
+ /* update stats */
+ functionsLibEngineStats *stats = dictFetchValue(lib_ctx->engines_stats, li->ei->name);
+ serverAssert(stats);
+ stats->n_lib--;
+ stats->n_functions -= dictSize(li->functions);
+}
+
+static void libraryLink(functionsLibCtx *lib_ctx, functionLibInfo* li) {
+ dictIterator *iter = dictGetIterator(li->functions);
+ dictEntry *entry = NULL;
+ while ((entry = dictNext(iter))) {
+ functionInfo *fi = dictGetVal(entry);
+ dictAdd(lib_ctx->functions, fi->name, fi);
+ lib_ctx->cache_memory += functionMallocSize(fi);
+ }
+ dictReleaseIterator(iter);
+
+ dictAdd(lib_ctx->libraries, li->name, li);
+ lib_ctx->cache_memory += libraryMallocSize(li);
+
+ /* update stats */
+ functionsLibEngineStats *stats = dictFetchValue(lib_ctx->engines_stats, li->ei->name);
+ serverAssert(stats);
+ stats->n_lib++;
+ stats->n_functions += dictSize(li->functions);
+}
+
+/* Takes all libraries from lib_ctx_src and add to lib_ctx_dst.
+ * On collision, if 'replace' argument is true, replace the existing library with the new one.
+ * Otherwise abort and leave 'lib_ctx_dst' and 'lib_ctx_src' untouched.
+ * Return C_OK on success and C_ERR if aborted. If C_ERR is returned, set a relevant
+ * error message on the 'err' out parameter.
+ * */
+static int libraryJoin(functionsLibCtx *functions_lib_ctx_dst, functionsLibCtx *functions_lib_ctx_src, int replace, sds *err) {
+ int ret = C_ERR;
+ dictIterator *iter = NULL;
+ /* Stores the libraries we need to replace in case a revert is required.
+ * Only initialized when needed */
+ list *old_libraries_list = NULL;
+ dictEntry *entry = NULL;
+ iter = dictGetIterator(functions_lib_ctx_src->libraries);
+ while ((entry = dictNext(iter))) {
+ functionLibInfo *li = dictGetVal(entry);
+ functionLibInfo *old_li = dictFetchValue(functions_lib_ctx_dst->libraries, li->name);
+ if (old_li) {
+ if (!replace) {
+ /* library already exists, failed the restore. */
+ *err = sdscatfmt(sdsempty(), "Library %s already exists", li->name);
+ goto done;
+ } else {
+ if (!old_libraries_list) {
+ old_libraries_list = listCreate();
+ listSetFreeMethod(old_libraries_list, (void (*)(void*))engineLibraryFree);
+ }
+ libraryUnlink(functions_lib_ctx_dst, old_li);
+ listAddNodeTail(old_libraries_list, old_li);
+ }
+ }
+ }
+ dictReleaseIterator(iter);
+ iter = NULL;
+
+ /* Make sure no functions collision */
+ iter = dictGetIterator(functions_lib_ctx_src->functions);
+ while ((entry = dictNext(iter))) {
+ functionInfo *fi = dictGetVal(entry);
+ if (dictFetchValue(functions_lib_ctx_dst->functions, fi->name)) {
+ *err = sdscatfmt(sdsempty(), "Function %s already exists", fi->name);
+ goto done;
+ }
+ }
+ dictReleaseIterator(iter);
+ iter = NULL;
+
+ /* No collision, it is safe to link all the new libraries. */
+ iter = dictGetIterator(functions_lib_ctx_src->libraries);
+ while ((entry = dictNext(iter))) {
+ functionLibInfo *li = dictGetVal(entry);
+ libraryLink(functions_lib_ctx_dst, li);
+ dictSetVal(functions_lib_ctx_src->libraries, entry, NULL);
+ }
+ dictReleaseIterator(iter);
+ iter = NULL;
+
+ functionsLibCtxClear(functions_lib_ctx_src);
+ if (old_libraries_list) {
+ listRelease(old_libraries_list);
+ old_libraries_list = NULL;
+ }
+ ret = C_OK;
+
+done:
+ if (iter) dictReleaseIterator(iter);
+ if (old_libraries_list) {
+ /* Link back all libraries on tmp_l_ctx */
+ while (listLength(old_libraries_list) > 0) {
+ listNode *head = listFirst(old_libraries_list);
+ functionLibInfo *li = listNodeValue(head);
+ listNodeValue(head) = NULL;
+ libraryLink(functions_lib_ctx_dst, li);
+ listDelNode(old_libraries_list, head);
+ }
+ listRelease(old_libraries_list);
+ }
+ return ret;
+}
+
+/* Register an engine, should be called once by the engine on startup and give the following:
+ *
+ * - engine_name - name of the engine to register
+ * - engine_ctx - the engine ctx that should be used by Redis to interact with the engine */
+int functionsRegisterEngine(const char *engine_name, engine *engine) {
+ sds engine_name_sds = sdsnew(engine_name);
+ if (dictFetchValue(engines, engine_name_sds)) {
+ serverLog(LL_WARNING, "Same engine was registered twice");
+ sdsfree(engine_name_sds);
+ return C_ERR;
+ }
+
+ client *c = createClient(NULL);
+ c->flags |= (CLIENT_DENY_BLOCKING | CLIENT_SCRIPT);
+ engineInfo *ei = zmalloc(sizeof(*ei));
+ *ei = (engineInfo ) { .name = engine_name_sds, .engine = engine, .c = c,};
+
+ dictAdd(engines, engine_name_sds, ei);
+
+ engine_cache_memory += zmalloc_size(ei) + sdsZmallocSize(ei->name) +
+ zmalloc_size(engine) +
+ engine->get_engine_memory_overhead(engine->engine_ctx);
+
+ return C_OK;
+}
+
+/*
+ * FUNCTION STATS
+ */
+void functionStatsCommand(client *c) {
+ if (scriptIsRunning() && scriptIsEval()) {
+ addReplyErrorObject(c, shared.slowevalerr);
+ return;
+ }
+
+ addReplyMapLen(c, 2);
+
+ addReplyBulkCString(c, "running_script");
+ if (!scriptIsRunning()) {
+ addReplyNull(c);
+ } else {
+ addReplyMapLen(c, 3);
+ addReplyBulkCString(c, "name");
+ addReplyBulkCString(c, scriptCurrFunction());
+ addReplyBulkCString(c, "command");
+ client *script_client = scriptGetCaller();
+ addReplyArrayLen(c, script_client->argc);
+ for (int i = 0 ; i < script_client->argc ; ++i) {
+ addReplyBulkCBuffer(c, script_client->argv[i]->ptr, sdslen(script_client->argv[i]->ptr));
+ }
+ addReplyBulkCString(c, "duration_ms");
+ addReplyLongLong(c, scriptRunDuration());
+ }
+
+ addReplyBulkCString(c, "engines");
+ addReplyMapLen(c, dictSize(engines));
+ dictIterator *iter = dictGetIterator(engines);
+ dictEntry *entry = NULL;
+ while ((entry = dictNext(iter))) {
+ engineInfo *ei = dictGetVal(entry);
+ addReplyBulkCString(c, ei->name);
+ addReplyMapLen(c, 2);
+ functionsLibEngineStats *e_stats = dictFetchValue(curr_functions_lib_ctx->engines_stats, ei->name);
+ addReplyBulkCString(c, "libraries_count");
+ addReplyLongLong(c, e_stats->n_lib);
+ addReplyBulkCString(c, "functions_count");
+ addReplyLongLong(c, e_stats->n_functions);
+ }
+ dictReleaseIterator(iter);
+}
+
+static void functionListReplyFlags(client *c, functionInfo *fi) {
+ /* First count the number of flags we have */
+ int flagcount = 0;
+ for (scriptFlag *flag = scripts_flags_def; flag->str ; ++flag) {
+ if (fi->f_flags & flag->flag) {
+ ++flagcount;
+ }
+ }
+
+ addReplySetLen(c, flagcount);
+
+ for (scriptFlag *flag = scripts_flags_def; flag->str ; ++flag) {
+ if (fi->f_flags & flag->flag) {
+ addReplyStatus(c, flag->str);
+ }
+ }
+}
+
+/*
+ * FUNCTION LIST [LIBRARYNAME PATTERN] [WITHCODE]
+ *
+ * Return general information about all the libraries:
+ * * Library name
+ * * The engine used to run the Library
+ * * Library description
+ * * Functions list
+ * * Library code (if WITHCODE is given)
+ *
+ * It is also possible to given library name pattern using
+ * LIBRARYNAME argument, if given, return only libraries
+ * that matches the given pattern.
+ */
+void functionListCommand(client *c) {
+ int with_code = 0;
+ sds library_name = NULL;
+ for (int i = 2 ; i < c->argc ; ++i) {
+ robj *next_arg = c->argv[i];
+ if (!with_code && !strcasecmp(next_arg->ptr, "withcode")) {
+ with_code = 1;
+ continue;
+ }
+ if (!library_name && !strcasecmp(next_arg->ptr, "libraryname")) {
+ if (i >= c->argc - 1) {
+ addReplyError(c, "library name argument was not given");
+ return;
+ }
+ library_name = c->argv[++i]->ptr;
+ continue;
+ }
+ addReplyErrorSds(c, sdscatfmt(sdsempty(), "Unknown argument %s", next_arg->ptr));
+ return;
+ }
+ size_t reply_len = 0;
+ void *len_ptr = NULL;
+ if (library_name) {
+ len_ptr = addReplyDeferredLen(c);
+ } else {
+ /* If no pattern is asked we know the reply len and we can just set it */
+ addReplyArrayLen(c, dictSize(curr_functions_lib_ctx->libraries));
+ }
+ dictIterator *iter = dictGetIterator(curr_functions_lib_ctx->libraries);
+ dictEntry *entry = NULL;
+ while ((entry = dictNext(iter))) {
+ functionLibInfo *li = dictGetVal(entry);
+ if (library_name) {
+ if (!stringmatchlen(library_name, sdslen(library_name), li->name, sdslen(li->name), 1)) {
+ continue;
+ }
+ }
+ ++reply_len;
+ addReplyMapLen(c, with_code? 4 : 3);
+ addReplyBulkCString(c, "library_name");
+ addReplyBulkCBuffer(c, li->name, sdslen(li->name));
+ addReplyBulkCString(c, "engine");
+ addReplyBulkCBuffer(c, li->ei->name, sdslen(li->ei->name));
+
+ addReplyBulkCString(c, "functions");
+ addReplyArrayLen(c, dictSize(li->functions));
+ dictIterator *functions_iter = dictGetIterator(li->functions);
+ dictEntry *function_entry = NULL;
+ while ((function_entry = dictNext(functions_iter))) {
+ functionInfo *fi = dictGetVal(function_entry);
+ addReplyMapLen(c, 3);
+ addReplyBulkCString(c, "name");
+ addReplyBulkCBuffer(c, fi->name, sdslen(fi->name));
+ addReplyBulkCString(c, "description");
+ if (fi->desc) {
+ addReplyBulkCBuffer(c, fi->desc, sdslen(fi->desc));
+ } else {
+ addReplyNull(c);
+ }
+ addReplyBulkCString(c, "flags");
+ functionListReplyFlags(c, fi);
+ }
+ dictReleaseIterator(functions_iter);
+
+ if (with_code) {
+ addReplyBulkCString(c, "library_code");
+ addReplyBulkCBuffer(c, li->code, sdslen(li->code));
+ }
+ }
+ dictReleaseIterator(iter);
+ if (len_ptr) {
+ setDeferredArrayLen(c, len_ptr, reply_len);
+ }
+}
+
+/*
+ * FUNCTION DELETE <LIBRARY NAME>
+ */
+void functionDeleteCommand(client *c) {
+ robj *function_name = c->argv[2];
+ functionLibInfo *li = dictFetchValue(curr_functions_lib_ctx->libraries, function_name->ptr);
+ if (!li) {
+ addReplyError(c, "Library not found");
+ return;
+ }
+
+ libraryUnlink(curr_functions_lib_ctx, li);
+ engineLibraryFree(li);
+ /* Indicate that the command changed the data so it will be replicated and
+ * counted as a data change (for persistence configuration) */
+ server.dirty++;
+ addReply(c, shared.ok);
+}
+
+/* FUNCTION KILL */
+void functionKillCommand(client *c) {
+ scriptKill(c, 0);
+}
+
+/* Try to extract command flags if we can, returns the modified flags.
+ * Note that it does not guarantee the command arguments are right. */
+uint64_t fcallGetCommandFlags(client *c, uint64_t cmd_flags) {
+ robj *function_name = c->argv[1];
+ c->cur_script = dictFind(curr_functions_lib_ctx->functions, function_name->ptr);
+ if (!c->cur_script)
+ return cmd_flags;
+ functionInfo *fi = dictGetVal(c->cur_script);
+ uint64_t script_flags = fi->f_flags;
+ return scriptFlagsToCmdFlags(cmd_flags, script_flags);
+}
+
+static void fcallCommandGeneric(client *c, int ro) {
+ /* Functions need to be fed to monitors before the commands they execute. */
+ replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
+
+ robj *function_name = c->argv[1];
+ dictEntry *de = c->cur_script;
+ if (!de)
+ de = dictFind(curr_functions_lib_ctx->functions, function_name->ptr);
+ if (!de) {
+ addReplyError(c, "Function not found");
+ return;
+ }
+ functionInfo *fi = dictGetVal(de);
+ engine *engine = fi->li->ei->engine;
+
+ long long numkeys;
+ /* Get the number of arguments that are keys */
+ if (getLongLongFromObject(c->argv[2], &numkeys) != C_OK) {
+ addReplyError(c, "Bad number of keys provided");
+ return;
+ }
+ if (numkeys > (c->argc - 3)) {
+ addReplyError(c, "Number of keys can't be greater than number of args");
+ return;
+ } else if (numkeys < 0) {
+ addReplyError(c, "Number of keys can't be negative");
+ return;
+ }
+
+ scriptRunCtx run_ctx;
+
+ if (scriptPrepareForRun(&run_ctx, fi->li->ei->c, c, fi->name, fi->f_flags, ro) != C_OK)
+ return;
+
+ engine->call(&run_ctx, engine->engine_ctx, fi->function, c->argv + 3, numkeys,
+ c->argv + 3 + numkeys, c->argc - 3 - numkeys);
+ scriptResetRun(&run_ctx);
+}
+
+/*
+ * FCALL <FUNCTION NAME> nkeys <key1 .. keyn> <arg1 .. argn>
+ */
+void fcallCommand(client *c) {
+ fcallCommandGeneric(c, 0);
+}
+
+/*
+ * FCALL_RO <FUNCTION NAME> nkeys <key1 .. keyn> <arg1 .. argn>
+ */
+void fcallroCommand(client *c) {
+ fcallCommandGeneric(c, 1);
+}
+
+/*
+ * FUNCTION DUMP
+ *
+ * Returns a binary payload representing all the libraries.
+ * Can be loaded using FUNCTION RESTORE
+ *
+ * The payload structure is the same as on RDB. Each library
+ * is saved separately with the following information:
+ * * Library name
+ * * Engine name
+ * * Library description
+ * * Library code
+ * RDB_OPCODE_FUNCTION2 is saved before each library to present
+ * that the payload is a library.
+ * RDB version and crc64 is saved at the end of the payload.
+ * The RDB version is saved for backward compatibility.
+ * crc64 is saved so we can verify the payload content.
+ */
+void functionDumpCommand(client *c) {
+ unsigned char buf[2];
+ uint64_t crc;
+ rio payload;
+ rioInitWithBuffer(&payload, sdsempty());
+
+ rdbSaveFunctions(&payload);
+
+ /* RDB version */
+ buf[0] = RDB_VERSION & 0xff;
+ buf[1] = (RDB_VERSION >> 8) & 0xff;
+ payload.io.buffer.ptr = sdscatlen(payload.io.buffer.ptr, buf, 2);
+
+ /* CRC64 */
+ crc = crc64(0, (unsigned char*) payload.io.buffer.ptr,
+ sdslen(payload.io.buffer.ptr));
+ memrev64ifbe(&crc);
+ payload.io.buffer.ptr = sdscatlen(payload.io.buffer.ptr, &crc, 8);
+
+ addReplyBulkSds(c, payload.io.buffer.ptr);
+}
+
+/*
+ * FUNCTION RESTORE <payload> [FLUSH|APPEND|REPLACE]
+ *
+ * Restore the libraries represented by the give payload.
+ * Restore policy to can be given to control how to handle existing libraries (default APPEND):
+ * * FLUSH: delete all existing libraries.
+ * * APPEND: appends the restored libraries to the existing libraries. On collision, abort.
+ * * REPLACE: appends the restored libraries to the existing libraries.
+ * On collision, replace the old libraries with the new libraries.
+ */
+void functionRestoreCommand(client *c) {
+ if (c->argc > 4) {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+
+ restorePolicy restore_replicy = restorePolicy_Append; /* default policy: APPEND */
+ sds data = c->argv[2]->ptr;
+ size_t data_len = sdslen(data);
+ rio payload;
+ sds err = NULL;
+
+ if (c->argc == 4) {
+ const char *restore_policy_str = c->argv[3]->ptr;
+ if (!strcasecmp(restore_policy_str, "append")) {
+ restore_replicy = restorePolicy_Append;
+ } else if (!strcasecmp(restore_policy_str, "replace")) {
+ restore_replicy = restorePolicy_Replace;
+ } else if (!strcasecmp(restore_policy_str, "flush")) {
+ restore_replicy = restorePolicy_Flush;
+ } else {
+ addReplyError(c, "Wrong restore policy given, value should be either FLUSH, APPEND or REPLACE.");
+ return;
+ }
+ }
+
+ uint16_t rdbver;
+ if (verifyDumpPayload((unsigned char*)data, data_len, &rdbver) != C_OK) {
+ addReplyError(c, "DUMP payload version or checksum are wrong");
+ return;
+ }
+
+ functionsLibCtx *functions_lib_ctx = functionsLibCtxCreate();
+ rioInitWithBuffer(&payload, data);
+
+ /* Read until reaching last 10 bytes that should contain RDB version and checksum. */
+ while (data_len - payload.io.buffer.pos > 10) {
+ int type;
+ if ((type = rdbLoadType(&payload)) == -1) {
+ err = sdsnew("can not read data type");
+ goto load_error;
+ }
+ if (type == RDB_OPCODE_FUNCTION_PRE_GA) {
+ err = sdsnew("Pre-GA function format not supported");
+ goto load_error;
+ }
+ if (type != RDB_OPCODE_FUNCTION2) {
+ err = sdsnew("given type is not a function");
+ goto load_error;
+ }
+ if (rdbFunctionLoad(&payload, rdbver, functions_lib_ctx, RDBFLAGS_NONE, &err) != C_OK) {
+ if (!err) {
+ err = sdsnew("failed loading the given functions payload");
+ }
+ goto load_error;
+ }
+ }
+
+ if (restore_replicy == restorePolicy_Flush) {
+ functionsLibCtxSwapWithCurrent(functions_lib_ctx);
+ functions_lib_ctx = NULL; /* avoid releasing the f_ctx in the end */
+ } else {
+ if (libraryJoin(curr_functions_lib_ctx, functions_lib_ctx, restore_replicy == restorePolicy_Replace, &err) != C_OK) {
+ goto load_error;
+ }
+ }
+
+ /* Indicate that the command changed the data so it will be replicated and
+ * counted as a data change (for persistence configuration) */
+ server.dirty++;
+
+load_error:
+ if (err) {
+ addReplyErrorSds(c, err);
+ } else {
+ addReply(c, shared.ok);
+ }
+ if (functions_lib_ctx) {
+ functionsLibCtxFree(functions_lib_ctx);
+ }
+}
+
+/* FUNCTION FLUSH [ASYNC | SYNC] */
+void functionFlushCommand(client *c) {
+ if (c->argc > 3) {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+ int async = 0;
+ if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr,"sync")) {
+ async = 0;
+ } else if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr,"async")) {
+ async = 1;
+ } else if (c->argc == 2) {
+ async = server.lazyfree_lazy_user_flush ? 1 : 0;
+ } else {
+ addReplyError(c,"FUNCTION FLUSH only supports SYNC|ASYNC option");
+ return;
+ }
+
+ functionsLibCtxClearCurrent(async);
+
+ /* Indicate that the command changed the data so it will be replicated and
+ * counted as a data change (for persistence configuration) */
+ server.dirty++;
+ addReply(c,shared.ok);
+}
+
+/* FUNCTION HELP */
+void functionHelpCommand(client *c) {
+ const char *help[] = {
+"LOAD [REPLACE] <FUNCTION CODE>",
+" Create a new library with the given library name and code.",
+"DELETE <LIBRARY NAME>",
+" Delete the given library.",
+"LIST [LIBRARYNAME PATTERN] [WITHCODE]",
+" Return general information on all the libraries:",
+" * Library name",
+" * The engine used to run the Library",
+" * Library description",
+" * Functions list",
+" * Library code (if WITHCODE is given)",
+" It also possible to get only function that matches a pattern using LIBRARYNAME argument.",
+"STATS",
+" Return information about the current function running:",
+" * Function name",
+" * Command used to run the function",
+" * Duration in MS that the function is running",
+" If no function is running, return nil",
+" In addition, returns a list of available engines.",
+"KILL",
+" Kill the current running function.",
+"FLUSH [ASYNC|SYNC]",
+" Delete all the libraries.",
+" When called without the optional mode argument, the behavior is determined by the",
+" lazyfree-lazy-user-flush configuration directive. Valid modes are:",
+" * ASYNC: Asynchronously flush the libraries.",
+" * SYNC: Synchronously flush the libraries.",
+"DUMP",
+" Return a serialized payload representing the current libraries, can be restored using FUNCTION RESTORE command",
+"RESTORE <PAYLOAD> [FLUSH|APPEND|REPLACE]",
+" Restore the libraries represented by the given payload, it is possible to give a restore policy to",
+" control how to handle existing libraries (default APPEND):",
+" * FLUSH: delete all existing libraries.",
+" * APPEND: appends the restored libraries to the existing libraries. On collision, abort.",
+" * REPLACE: appends the restored libraries to the existing libraries, On collision, replace the old",
+" libraries with the new libraries (notice that even on this option there is a chance of failure",
+" in case of functions name collision with another library).",
+NULL };
+ addReplyHelp(c, help);
+}
+
+/* Verify that the function name is of the format: [a-zA-Z0-9_][a-zA-Z0-9_]? */
+static int functionsVerifyName(sds name) {
+ if (sdslen(name) == 0) {
+ return C_ERR;
+ }
+ for (size_t i = 0 ; i < sdslen(name) ; ++i) {
+ char curr_char = name[i];
+ if ((curr_char >= 'a' && curr_char <= 'z') ||
+ (curr_char >= 'A' && curr_char <= 'Z') ||
+ (curr_char >= '0' && curr_char <= '9') ||
+ (curr_char == '_'))
+ {
+ continue;
+ }
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+int functionExtractLibMetaData(sds payload, functionsLibMataData *md, sds *err) {
+ sds name = NULL;
+ sds desc = NULL;
+ sds engine = NULL;
+ sds code = NULL;
+ if (strncmp(payload, "#!", 2) != 0) {
+ *err = sdsnew("Missing library metadata");
+ return C_ERR;
+ }
+ char *shebang_end = strchr(payload, '\n');
+ if (shebang_end == NULL) {
+ *err = sdsnew("Invalid library metadata");
+ return C_ERR;
+ }
+ size_t shebang_len = shebang_end - payload;
+ sds shebang = sdsnewlen(payload, shebang_len);
+ int numparts;
+ sds *parts = sdssplitargs(shebang, &numparts);
+ sdsfree(shebang);
+ if (!parts || numparts == 0) {
+ *err = sdsnew("Invalid library metadata");
+ sdsfreesplitres(parts, numparts);
+ return C_ERR;
+ }
+ engine = sdsdup(parts[0]);
+ sdsrange(engine, 2, -1);
+ for (int i = 1 ; i < numparts ; ++i) {
+ sds part = parts[i];
+ if (strncasecmp(part, "name=", 5) == 0) {
+ if (name) {
+ *err = sdscatfmt(sdsempty(), "Invalid metadata value, name argument was given multiple times");
+ goto error;
+ }
+ name = sdsdup(part);
+ sdsrange(name, 5, -1);
+ continue;
+ }
+ *err = sdscatfmt(sdsempty(), "Invalid metadata value given: %s", part);
+ goto error;
+ }
+
+ if (!name) {
+ *err = sdsnew("Library name was not given");
+ goto error;
+ }
+
+ sdsfreesplitres(parts, numparts);
+
+ md->name = name;
+ md->code = sdsnewlen(shebang_end, sdslen(payload) - shebang_len);
+ md->engine = engine;
+
+ return C_OK;
+
+error:
+ if (name) sdsfree(name);
+ if (desc) sdsfree(desc);
+ if (engine) sdsfree(engine);
+ if (code) sdsfree(code);
+ sdsfreesplitres(parts, numparts);
+ return C_ERR;
+}
+
+void functionFreeLibMetaData(functionsLibMataData *md) {
+ if (md->code) sdsfree(md->code);
+ if (md->name) sdsfree(md->name);
+ if (md->engine) sdsfree(md->engine);
+}
+
+/* Compile and save the given library, return the loaded library name on success
+ * and NULL on failure. In case on failure the err out param is set with relevant error message */
+sds functionsCreateWithLibraryCtx(sds code, int replace, sds* err, functionsLibCtx *lib_ctx, size_t timeout) {
+ dictIterator *iter = NULL;
+ dictEntry *entry = NULL;
+ functionLibInfo *new_li = NULL;
+ functionLibInfo *old_li = NULL;
+ functionsLibMataData md = {0};
+ if (functionExtractLibMetaData(code, &md, err) != C_OK) {
+ return NULL;
+ }
+
+ if (functionsVerifyName(md.name)) {
+ *err = sdsnew("Library names can only contain letters, numbers, or underscores(_) and must be at least one character long");
+ goto error;
+ }
+
+ engineInfo *ei = dictFetchValue(engines, md.engine);
+ if (!ei) {
+ *err = sdscatfmt(sdsempty(), "Engine '%S' not found", md.engine);
+ goto error;
+ }
+ engine *engine = ei->engine;
+
+ old_li = dictFetchValue(lib_ctx->libraries, md.name);
+ if (old_li && !replace) {
+ old_li = NULL;
+ *err = sdscatfmt(sdsempty(), "Library '%S' already exists", md.name);
+ goto error;
+ }
+
+ if (old_li) {
+ libraryUnlink(lib_ctx, old_li);
+ }
+
+ new_li = engineLibraryCreate(md.name, ei, code);
+ if (engine->create(engine->engine_ctx, new_li, md.code, timeout, err) != C_OK) {
+ goto error;
+ }
+
+ if (dictSize(new_li->functions) == 0) {
+ *err = sdsnew("No functions registered");
+ goto error;
+ }
+
+ /* Verify no duplicate functions */
+ iter = dictGetIterator(new_li->functions);
+ while ((entry = dictNext(iter))) {
+ functionInfo *fi = dictGetVal(entry);
+ if (dictFetchValue(lib_ctx->functions, fi->name)) {
+ /* functions name collision, abort. */
+ *err = sdscatfmt(sdsempty(), "Function %s already exists", fi->name);
+ goto error;
+ }
+ }
+ dictReleaseIterator(iter);
+ iter = NULL;
+
+ libraryLink(lib_ctx, new_li);
+
+ if (old_li) {
+ engineLibraryFree(old_li);
+ }
+
+ sds loaded_lib_name = md.name;
+ md.name = NULL;
+ functionFreeLibMetaData(&md);
+
+ return loaded_lib_name;
+
+error:
+ if (iter) dictReleaseIterator(iter);
+ if (new_li) engineLibraryFree(new_li);
+ if (old_li) libraryLink(lib_ctx, old_li);
+ functionFreeLibMetaData(&md);
+ return NULL;
+}
+
+/*
+ * FUNCTION LOAD [REPLACE] <LIBRARY CODE>
+ * REPLACE - optional, replace existing library
+ * LIBRARY CODE - library code to pass to the engine
+ */
+void functionLoadCommand(client *c) {
+ int replace = 0;
+ int argc_pos = 2;
+ while (argc_pos < c->argc - 1) {
+ robj *next_arg = c->argv[argc_pos++];
+ if (!strcasecmp(next_arg->ptr, "replace")) {
+ replace = 1;
+ continue;
+ }
+ addReplyErrorFormat(c, "Unknown option given: %s", (char*)next_arg->ptr);
+ return;
+ }
+
+ if (argc_pos >= c->argc) {
+ addReplyError(c, "Function code is missing");
+ return;
+ }
+
+ robj *code = c->argv[argc_pos];
+ sds err = NULL;
+ sds library_name = NULL;
+ size_t timeout = LOAD_TIMEOUT_MS;
+ if (mustObeyClient(c)) {
+ timeout = 0;
+ }
+ if (!(library_name = functionsCreateWithLibraryCtx(code->ptr, replace, &err, curr_functions_lib_ctx, timeout)))
+ {
+ addReplyErrorSds(c, err);
+ return;
+ }
+ /* Indicate that the command changed the data so it will be replicated and
+ * counted as a data change (for persistence configuration) */
+ server.dirty++;
+ addReplyBulkSds(c, library_name);
+}
+
+/* Return memory usage of all the engines combine */
+unsigned long functionsMemory(void) {
+ dictIterator *iter = dictGetIterator(engines);
+ dictEntry *entry = NULL;
+ size_t engines_nemory = 0;
+ while ((entry = dictNext(iter))) {
+ engineInfo *ei = dictGetVal(entry);
+ engine *engine = ei->engine;
+ engines_nemory += engine->get_used_memory(engine->engine_ctx);
+ }
+ dictReleaseIterator(iter);
+
+ return engines_nemory;
+}
+
+/* Return memory overhead of all the engines combine */
+unsigned long functionsMemoryOverhead(void) {
+ size_t memory_overhead = dictMemUsage(engines);
+ memory_overhead += dictMemUsage(curr_functions_lib_ctx->functions);
+ memory_overhead += sizeof(functionsLibCtx);
+ memory_overhead += curr_functions_lib_ctx->cache_memory;
+ memory_overhead += engine_cache_memory;
+
+ return memory_overhead;
+}
+
+/* Returns the number of functions */
+unsigned long functionsNum(void) {
+ return dictSize(curr_functions_lib_ctx->functions);
+}
+
+unsigned long functionsLibNum(void) {
+ return dictSize(curr_functions_lib_ctx->libraries);
+}
+
+dict* functionsLibGet(void) {
+ return curr_functions_lib_ctx->libraries;
+}
+
+size_t functionsLibCtxfunctionsLen(functionsLibCtx *functions_ctx) {
+ return dictSize(functions_ctx->functions);
+}
+
+/* Initialize engine data structures.
+ * Should be called once on server initialization */
+int functionsInit(void) {
+ engines = dictCreate(&engineDictType);
+
+ if (luaEngineInitEngine() != C_OK) {
+ return C_ERR;
+ }
+
+ /* Must be initialized after engines initialization */
+ curr_functions_lib_ctx = functionsLibCtxCreate();
+
+ return C_OK;
+}
diff --git a/src/functions.h b/src/functions.h
new file mode 100644
index 0000000..22af139
--- /dev/null
+++ b/src/functions.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021, Redis Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __FUNCTIONS_H_
+#define __FUNCTIONS_H_
+
+/*
+ * functions.c unit provides the Redis Functions API:
+ * * FUNCTION CREATE
+ * * FUNCTION CALL
+ * * FUNCTION DELETE
+ * * FUNCTION KILL
+ * * FUNCTION INFO
+ *
+ * Also contains implementation for:
+ * * Save/Load function from rdb
+ * * Register engines
+ */
+
+#include "server.h"
+#include "script.h"
+#include "redismodule.h"
+
+typedef struct functionLibInfo functionLibInfo;
+
+typedef struct engine {
+ /* engine specific context */
+ void *engine_ctx;
+
+ /* Create function callback, get the engine_ctx, and function code
+ * engine_ctx - opaque struct that was created on engine initialization
+ * li - library information that need to be provided and when add functions
+ * code - the library code
+ * timeout - timeout for the library creation (0 for no timeout)
+ * err - description of error (if occurred)
+ * returns NULL on error and set sds to be the error message */
+ int (*create)(void *engine_ctx, functionLibInfo *li, sds code, size_t timeout, sds *err);
+
+ /* Invoking a function, r_ctx is an opaque object (from engine POV).
+ * The r_ctx should be used by the engine to interaction with Redis,
+ * such interaction could be running commands, set resp, or set
+ * replication mode
+ */
+ void (*call)(scriptRunCtx *r_ctx, void *engine_ctx, void *compiled_function,
+ robj **keys, size_t nkeys, robj **args, size_t nargs);
+
+ /* get current used memory by the engine */
+ size_t (*get_used_memory)(void *engine_ctx);
+
+ /* Return memory overhead for a given function,
+ * such memory is not counted as engine memory but as general
+ * structs memory that hold different information */
+ size_t (*get_function_memory_overhead)(void *compiled_function);
+
+ /* Return memory overhead for engine (struct size holding the engine)*/
+ size_t (*get_engine_memory_overhead)(void *engine_ctx);
+
+ /* free the given function */
+ void (*free_function)(void *engine_ctx, void *compiled_function);
+} engine;
+
+/* Hold information about an engine.
+ * Used on rdb.c so it must be declared here. */
+typedef struct engineInfo {
+ sds name; /* Name of the engine */
+ engine *engine; /* engine callbacks that allows to interact with the engine */
+ client *c; /* Client that is used to run commands */
+} engineInfo;
+
+/* Hold information about the specific function.
+ * Used on rdb.c so it must be declared here. */
+typedef struct functionInfo {
+ sds name; /* Function name */
+ void *function; /* Opaque object that set by the function's engine and allow it
+ to run the function, usually it's the function compiled code. */
+ functionLibInfo* li; /* Pointer to the library created the function */
+ sds desc; /* Function description */
+ uint64_t f_flags; /* Function flags */
+} functionInfo;
+
+/* Hold information about the specific library.
+ * Used on rdb.c so it must be declared here. */
+struct functionLibInfo {
+ sds name; /* Library name */
+ dict *functions; /* Functions dictionary */
+ engineInfo *ei; /* Pointer to the function engine */
+ sds code; /* Library code */
+};
+
+int functionsRegisterEngine(const char *engine_name, engine *engine_ctx);
+sds functionsCreateWithLibraryCtx(sds code, int replace, sds* err, functionsLibCtx *lib_ctx, size_t timeout);
+unsigned long functionsMemory(void);
+unsigned long functionsMemoryOverhead(void);
+unsigned long functionsNum(void);
+unsigned long functionsLibNum(void);
+dict* functionsLibGet(void);
+size_t functionsLibCtxfunctionsLen(functionsLibCtx *functions_ctx);
+functionsLibCtx* functionsLibCtxGetCurrent(void);
+functionsLibCtx* functionsLibCtxCreate(void);
+void functionsLibCtxClearCurrent(int async);
+void functionsLibCtxFree(functionsLibCtx *lib_ctx);
+void functionsLibCtxClear(functionsLibCtx *lib_ctx);
+void functionsLibCtxSwapWithCurrent(functionsLibCtx *lib_ctx);
+
+int functionLibCreateFunction(sds name, void *function, functionLibInfo *li, sds desc, uint64_t f_flags, sds *err);
+
+int luaEngineInitEngine(void);
+int functionsInit(void);
+
+#endif /* __FUNCTIONS_H_ */
diff --git a/src/geo.c b/src/geo.c
new file mode 100644
index 0000000..ac25a20
--- /dev/null
+++ b/src/geo.c
@@ -0,0 +1,1005 @@
+/*
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "geo.h"
+#include "geohash_helper.h"
+#include "debugmacro.h"
+#include "pqsort.h"
+
+/* Things exported from t_zset.c only for geo.c, since it is the only other
+ * part of Redis that requires close zset introspection. */
+unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range);
+int zslValueLteMax(double value, zrangespec *spec);
+
+/* ====================================================================
+ * This file implements the following commands:
+ *
+ * - geoadd - add coordinates for value to geoset
+ * - georadius - search radius by coordinates in geoset
+ * - georadiusbymember - search radius based on geoset member position
+ * ==================================================================== */
+
+/* ====================================================================
+ * geoArray implementation
+ * ==================================================================== */
+
+/* Create a new array of geoPoints. */
+geoArray *geoArrayCreate(void) {
+ geoArray *ga = zmalloc(sizeof(*ga));
+ /* It gets allocated on first geoArrayAppend() call. */
+ ga->array = NULL;
+ ga->buckets = 0;
+ ga->used = 0;
+ return ga;
+}
+
+/* Add and populate with data a new entry to the geoArray. */
+geoPoint *geoArrayAppend(geoArray *ga, double *xy, double dist,
+ double score, char *member)
+{
+ if (ga->used == ga->buckets) {
+ ga->buckets = (ga->buckets == 0) ? 8 : ga->buckets*2;
+ ga->array = zrealloc(ga->array,sizeof(geoPoint)*ga->buckets);
+ }
+ geoPoint *gp = ga->array+ga->used;
+ gp->longitude = xy[0];
+ gp->latitude = xy[1];
+ gp->dist = dist;
+ gp->member = member;
+ gp->score = score;
+ ga->used++;
+ return gp;
+}
+
+/* Destroy a geoArray created with geoArrayCreate(). */
+void geoArrayFree(geoArray *ga) {
+ size_t i;
+ for (i = 0; i < ga->used; i++) sdsfree(ga->array[i].member);
+ zfree(ga->array);
+ zfree(ga);
+}
+
+/* ====================================================================
+ * Helpers
+ * ==================================================================== */
+int decodeGeohash(double bits, double *xy) {
+ GeoHashBits hash = { .bits = (uint64_t)bits, .step = GEO_STEP_MAX };
+ return geohashDecodeToLongLatWGS84(hash, xy);
+}
+
+/* Input Argument Helper */
+/* Take a pointer to the latitude arg then use the next arg for longitude.
+ * On parse error C_ERR is returned, otherwise C_OK. */
+int extractLongLatOrReply(client *c, robj **argv, double *xy) {
+ int i;
+ for (i = 0; i < 2; i++) {
+ if (getDoubleFromObjectOrReply(c, argv[i], xy + i, NULL) !=
+ C_OK) {
+ return C_ERR;
+ }
+ }
+ if (xy[0] < GEO_LONG_MIN || xy[0] > GEO_LONG_MAX ||
+ xy[1] < GEO_LAT_MIN || xy[1] > GEO_LAT_MAX) {
+ addReplyErrorFormat(c,
+ "-ERR invalid longitude,latitude pair %f,%f\r\n",xy[0],xy[1]);
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+/* Input Argument Helper */
+/* Decode lat/long from a zset member's score.
+ * Returns C_OK on successful decoding, otherwise C_ERR is returned. */
+int longLatFromMember(robj *zobj, robj *member, double *xy) {
+ double score = 0;
+
+ if (zsetScore(zobj, member->ptr, &score) == C_ERR) return C_ERR;
+ if (!decodeGeohash(score, xy)) return C_ERR;
+ return C_OK;
+}
+
+/* Check that the unit argument matches one of the known units, and returns
+ * the conversion factor to meters (you need to divide meters by the conversion
+ * factor to convert to the right unit).
+ *
+ * If the unit is not valid, an error is reported to the client, and a value
+ * less than zero is returned. */
+double extractUnitOrReply(client *c, robj *unit) {
+ char *u = unit->ptr;
+
+ if (!strcasecmp(u, "m")) {
+ return 1;
+ } else if (!strcasecmp(u, "km")) {
+ return 1000;
+ } else if (!strcasecmp(u, "ft")) {
+ return 0.3048;
+ } else if (!strcasecmp(u, "mi")) {
+ return 1609.34;
+ } else {
+ addReplyError(c,
+ "unsupported unit provided. please use M, KM, FT, MI");
+ return -1;
+ }
+}
+
+/* Input Argument Helper.
+ * Extract the distance from the specified two arguments starting at 'argv'
+ * that should be in the form: <number> <unit>, and return C_OK or C_ERR means success or failure
+ * *conversions is populated with the coefficient to use in order to convert meters to the unit.*/
+int extractDistanceOrReply(client *c, robj **argv,
+ double *conversion, double *radius) {
+ double distance;
+ if (getDoubleFromObjectOrReply(c, argv[0], &distance,
+ "need numeric radius") != C_OK) {
+ return C_ERR;
+ }
+
+ if (distance < 0) {
+ addReplyError(c,"radius cannot be negative");
+ return C_ERR;
+ }
+ if (radius) *radius = distance;
+
+ double to_meters = extractUnitOrReply(c,argv[1]);
+ if (to_meters < 0) {
+ return C_ERR;
+ }
+
+ if (conversion) *conversion = to_meters;
+ return C_OK;
+}
+
+/* Input Argument Helper.
+ * Extract height and width from the specified three arguments starting at 'argv'
+ * that should be in the form: <number> <number> <unit>, and return C_OK or C_ERR means success or failure
+ * *conversions is populated with the coefficient to use in order to convert meters to the unit.*/
+int extractBoxOrReply(client *c, robj **argv, double *conversion,
+ double *width, double *height) {
+ double h, w;
+ if ((getDoubleFromObjectOrReply(c, argv[0], &w, "need numeric width") != C_OK) ||
+ (getDoubleFromObjectOrReply(c, argv[1], &h, "need numeric height") != C_OK)) {
+ return C_ERR;
+ }
+
+ if (h < 0 || w < 0) {
+ addReplyError(c, "height or width cannot be negative");
+ return C_ERR;
+ }
+ if (height) *height = h;
+ if (width) *width = w;
+
+ double to_meters = extractUnitOrReply(c,argv[2]);
+ if (to_meters < 0) {
+ return C_ERR;
+ }
+
+ if (conversion) *conversion = to_meters;
+ return C_OK;
+}
+
+/* The default addReplyDouble has too much accuracy. We use this
+ * for returning location distances. "5.2145 meters away" is nicer
+ * than "5.2144992818115 meters away." We provide 4 digits after the dot
+ * so that the returned value is decently accurate even when the unit is
+ * the kilometer. */
+void addReplyDoubleDistance(client *c, double d) {
+ char dbuf[128];
+ const int dlen = fixedpoint_d2string(dbuf, sizeof(dbuf), d, 4);
+ addReplyBulkCBuffer(c, dbuf, dlen);
+}
+
+/* Helper function for geoGetPointsInRange(): given a sorted set score
+ * representing a point, and a GeoShape, checks if the point is within the search area.
+ *
+ * shape: the rectangle
+ * score: the encoded version of lat,long
+ * xy: output variable, the decoded lat,long
+ * distance: output variable, the distance between the center of the shape and the point
+ *
+ * Return values:
+ *
+ * The return value is C_OK if the point is within search area, or C_ERR if it is outside.
+ * "*xy" is populated with the decoded lat,long.
+ * "*distance" is populated with the distance between the center of the shape and the point.
+ */
+int geoWithinShape(GeoShape *shape, double score, double *xy, double *distance) {
+ if (!decodeGeohash(score,xy)) return C_ERR; /* Can't decode. */
+ /* Note that geohashGetDistanceIfInRadiusWGS84() takes arguments in
+ * reverse order: longitude first, latitude later. */
+ if (shape->type == CIRCULAR_TYPE) {
+ if (!geohashGetDistanceIfInRadiusWGS84(shape->xy[0], shape->xy[1], xy[0], xy[1],
+ shape->t.radius*shape->conversion, distance))
+ return C_ERR;
+ } else if (shape->type == RECTANGLE_TYPE) {
+ if (!geohashGetDistanceIfInRectangle(shape->t.r.width * shape->conversion,
+ shape->t.r.height * shape->conversion,
+ shape->xy[0], shape->xy[1], xy[0], xy[1], distance))
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+/* Query a Redis sorted set to extract all the elements between 'min' and
+ * 'max', appending them into the array of geoPoint structures 'geoArray'.
+ * The command returns the number of elements added to the array.
+ *
+ * Elements which are farther than 'radius' from the specified 'x' and 'y'
+ * coordinates are not included.
+ *
+ * The ability of this function to append to an existing set of points is
+ * important for good performances because querying by radius is performed
+ * using multiple queries to the sorted set, that we later need to sort
+ * via qsort. Similarly we need to be able to reject points outside the search
+ * radius area ASAP in order to allocate and process more points than needed. */
+int geoGetPointsInRange(robj *zobj, double min, double max, GeoShape *shape, geoArray *ga, unsigned long limit) {
+ /* minex 0 = include min in range; maxex 1 = exclude max in range */
+ /* That's: min <= val < max */
+ zrangespec range = { .min = min, .max = max, .minex = 0, .maxex = 1 };
+ size_t origincount = ga->used;
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr = NULL;
+ unsigned int vlen = 0;
+ long long vlong = 0;
+ double score = 0;
+
+ if ((eptr = zzlFirstInRange(zl, &range)) == NULL) {
+ /* Nothing exists starting at our min. No results. */
+ return 0;
+ }
+
+ sptr = lpNext(zl, eptr);
+ while (eptr) {
+ double xy[2];
+ double distance = 0;
+ score = zzlGetScore(sptr);
+
+ /* If we fell out of range, break. */
+ if (!zslValueLteMax(score, &range))
+ break;
+
+ vstr = lpGetValue(eptr, &vlen, &vlong);
+ if (geoWithinShape(shape, score, xy, &distance) == C_OK) {
+ /* Append the new element. */
+ char *member = (vstr == NULL) ? sdsfromlonglong(vlong) : sdsnewlen(vstr, vlen);
+ geoArrayAppend(ga, xy, distance, score, member);
+ }
+ if (ga->used && limit && ga->used >= limit) break;
+ zzlNext(zl, &eptr, &sptr);
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *ln;
+
+ if ((ln = zslFirstInRange(zsl, &range)) == NULL) {
+ /* Nothing exists starting at our min. No results. */
+ return 0;
+ }
+
+ while (ln) {
+ double xy[2];
+ double distance = 0;
+ /* Abort when the node is no longer in range. */
+ if (!zslValueLteMax(ln->score, &range))
+ break;
+ if (geoWithinShape(shape, ln->score, xy, &distance) == C_OK) {
+ /* Append the new element. */
+ geoArrayAppend(ga, xy, distance, ln->score, sdsdup(ln->ele));
+ }
+ if (ga->used && limit && ga->used >= limit) break;
+ ln = ln->level[0].forward;
+ }
+ }
+ return ga->used - origincount;
+}
+
+/* Compute the sorted set scores min (inclusive), max (exclusive) we should
+ * query in order to retrieve all the elements inside the specified area
+ * 'hash'. The two scores are returned by reference in *min and *max. */
+void scoresOfGeoHashBox(GeoHashBits hash, GeoHashFix52Bits *min, GeoHashFix52Bits *max) {
+ /* We want to compute the sorted set scores that will include all the
+ * elements inside the specified Geohash 'hash', which has as many
+ * bits as specified by hash.step * 2.
+ *
+ * So if step is, for example, 3, and the hash value in binary
+ * is 101010, since our score is 52 bits we want every element which
+ * is in binary: 101010?????????????????????????????????????????????
+ * Where ? can be 0 or 1.
+ *
+ * To get the min score we just use the initial hash value left
+ * shifted enough to get the 52 bit value. Later we increment the
+ * 6 bit prefix (see the hash.bits++ statement), and get the new
+ * prefix: 101011, which we align again to 52 bits to get the maximum
+ * value (which is excluded from the search). So we get everything
+ * between the two following scores (represented in binary):
+ *
+ * 1010100000000000000000000000000000000000000000000000 (included)
+ * and
+ * 1010110000000000000000000000000000000000000000000000 (excluded).
+ */
+ *min = geohashAlign52Bits(hash);
+ hash.bits++;
+ *max = geohashAlign52Bits(hash);
+}
+
+/* Obtain all members between the min/max of this geohash bounding box.
+ * Populate a geoArray of GeoPoints by calling geoGetPointsInRange().
+ * Return the number of points added to the array. */
+int membersOfGeoHashBox(robj *zobj, GeoHashBits hash, geoArray *ga, GeoShape *shape, unsigned long limit) {
+ GeoHashFix52Bits min, max;
+
+ scoresOfGeoHashBox(hash,&min,&max);
+ return geoGetPointsInRange(zobj, min, max, shape, ga, limit);
+}
+
+/* Search all eight neighbors + self geohash box */
+int membersOfAllNeighbors(robj *zobj, const GeoHashRadius *n, GeoShape *shape, geoArray *ga, unsigned long limit) {
+ GeoHashBits neighbors[9];
+ unsigned int i, count = 0, last_processed = 0;
+ int debugmsg = 0;
+
+ neighbors[0] = n->hash;
+ neighbors[1] = n->neighbors.north;
+ neighbors[2] = n->neighbors.south;
+ neighbors[3] = n->neighbors.east;
+ neighbors[4] = n->neighbors.west;
+ neighbors[5] = n->neighbors.north_east;
+ neighbors[6] = n->neighbors.north_west;
+ neighbors[7] = n->neighbors.south_east;
+ neighbors[8] = n->neighbors.south_west;
+
+ /* For each neighbor (*and* our own hashbox), get all the matching
+ * members and add them to the potential result list. */
+ for (i = 0; i < sizeof(neighbors) / sizeof(*neighbors); i++) {
+ if (HASHISZERO(neighbors[i])) {
+ if (debugmsg) D("neighbors[%d] is zero",i);
+ continue;
+ }
+
+ /* Debugging info. */
+ if (debugmsg) {
+ GeoHashRange long_range, lat_range;
+ geohashGetCoordRange(&long_range,&lat_range);
+ GeoHashArea myarea = {{0}};
+ geohashDecode(long_range, lat_range, neighbors[i], &myarea);
+
+ /* Dump center square. */
+ D("neighbors[%d]:\n",i);
+ D("area.longitude.min: %f\n", myarea.longitude.min);
+ D("area.longitude.max: %f\n", myarea.longitude.max);
+ D("area.latitude.min: %f\n", myarea.latitude.min);
+ D("area.latitude.max: %f\n", myarea.latitude.max);
+ D("\n");
+ }
+
+ /* When a huge Radius (in the 5000 km range or more) is used,
+ * adjacent neighbors can be the same, leading to duplicated
+ * elements. Skip every range which is the same as the one
+ * processed previously. */
+ if (last_processed &&
+ neighbors[i].bits == neighbors[last_processed].bits &&
+ neighbors[i].step == neighbors[last_processed].step)
+ {
+ if (debugmsg)
+ D("Skipping processing of %d, same as previous\n",i);
+ continue;
+ }
+ if (ga->used && limit && ga->used >= limit) break;
+ count += membersOfGeoHashBox(zobj, neighbors[i], ga, shape, limit);
+ last_processed = i;
+ }
+ return count;
+}
+
+/* Sort comparators for qsort() */
+static int sort_gp_asc(const void *a, const void *b) {
+ const struct geoPoint *gpa = a, *gpb = b;
+ /* We can't do adist - bdist because they are doubles and
+ * the comparator returns an int. */
+ if (gpa->dist > gpb->dist)
+ return 1;
+ else if (gpa->dist == gpb->dist)
+ return 0;
+ else
+ return -1;
+}
+
+static int sort_gp_desc(const void *a, const void *b) {
+ return -sort_gp_asc(a, b);
+}
+
+/* ====================================================================
+ * Commands
+ * ==================================================================== */
+
+/* GEOADD key [CH] [NX|XX] long lat name [long2 lat2 name2 ... longN latN nameN] */
+void geoaddCommand(client *c) {
+ int xx = 0, nx = 0, longidx = 2;
+ int i;
+
+ /* Parse options. At the end 'longidx' is set to the argument position
+ * of the longitude of the first element. */
+ while (longidx < c->argc) {
+ char *opt = c->argv[longidx]->ptr;
+ if (!strcasecmp(opt,"nx")) nx = 1;
+ else if (!strcasecmp(opt,"xx")) xx = 1;
+ else if (!strcasecmp(opt,"ch")) { /* Handle in zaddCommand. */ }
+ else break;
+ longidx++;
+ }
+
+ if ((c->argc - longidx) % 3 || (xx && nx)) {
+ /* Need an odd number of arguments if we got this far... */
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Set up the vector for calling ZADD. */
+ int elements = (c->argc - longidx) / 3;
+ int argc = longidx+elements*2; /* ZADD key [CH] [NX|XX] score ele ... */
+ robj **argv = zcalloc(argc*sizeof(robj*));
+ argv[0] = createRawStringObject("zadd",4);
+ for (i = 1; i < longidx; i++) {
+ argv[i] = c->argv[i];
+ incrRefCount(argv[i]);
+ }
+
+ /* Create the argument vector to call ZADD in order to add all
+ * the score,value pairs to the requested zset, where score is actually
+ * an encoded version of lat,long. */
+ for (i = 0; i < elements; i++) {
+ double xy[2];
+
+ if (extractLongLatOrReply(c, (c->argv+longidx)+(i*3),xy) == C_ERR) {
+ for (i = 0; i < argc; i++)
+ if (argv[i]) decrRefCount(argv[i]);
+ zfree(argv);
+ return;
+ }
+
+ /* Turn the coordinates into the score of the element. */
+ GeoHashBits hash;
+ geohashEncodeWGS84(xy[0], xy[1], GEO_STEP_MAX, &hash);
+ GeoHashFix52Bits bits = geohashAlign52Bits(hash);
+ robj *score = createStringObjectFromLongLongWithSds(bits);
+ robj *val = c->argv[longidx + i * 3 + 2];
+ argv[longidx+i*2] = score;
+ argv[longidx+1+i*2] = val;
+ incrRefCount(val);
+ }
+
+ /* Finally call ZADD that will do the work for us. */
+ replaceClientCommandVector(c,argc,argv);
+ zaddCommand(c);
+}
+
+#define SORT_NONE 0
+#define SORT_ASC 1
+#define SORT_DESC 2
+
+#define RADIUS_COORDS (1<<0) /* Search around coordinates. */
+#define RADIUS_MEMBER (1<<1) /* Search around member. */
+#define RADIUS_NOSTORE (1<<2) /* Do not accept STORE/STOREDIST option. */
+#define GEOSEARCH (1<<3) /* GEOSEARCH command variant (different arguments supported) */
+#define GEOSEARCHSTORE (1<<4) /* GEOSEARCHSTORE just accept STOREDIST option */
+
+/* GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC]
+ * [COUNT count [ANY]] [STORE key|STOREDIST key]
+ * GEORADIUSBYMEMBER key member radius unit ... options ...
+ * GEOSEARCH key [FROMMEMBER member] [FROMLONLAT long lat] [BYRADIUS radius unit]
+ * [BYBOX width height unit] [WITHCOORD] [WITHDIST] [WITHASH] [COUNT count [ANY]] [ASC|DESC]
+ * GEOSEARCHSTORE dest_key src_key [FROMMEMBER member] [FROMLONLAT long lat] [BYRADIUS radius unit]
+ * [BYBOX width height unit] [COUNT count [ANY]] [ASC|DESC] [STOREDIST]
+ * */
+void georadiusGeneric(client *c, int srcKeyIndex, int flags) {
+ robj *storekey = NULL;
+ int storedist = 0; /* 0 for STORE, 1 for STOREDIST. */
+
+ /* Look up the requested zset */
+ robj *zobj = lookupKeyRead(c->db, c->argv[srcKeyIndex]);
+ if (checkType(c, zobj, OBJ_ZSET)) return;
+
+ /* Find long/lat to use for radius or box search based on inquiry type */
+ int base_args;
+ GeoShape shape = {0};
+ if (flags & RADIUS_COORDS) {
+ /* GEORADIUS or GEORADIUS_RO */
+ base_args = 6;
+ shape.type = CIRCULAR_TYPE;
+ if (extractLongLatOrReply(c, c->argv + 2, shape.xy) == C_ERR) return;
+ if (extractDistanceOrReply(c, c->argv+base_args-2, &shape.conversion, &shape.t.radius) != C_OK) return;
+ } else if ((flags & RADIUS_MEMBER) && !zobj) {
+ /* We don't have a source key, but we need to proceed with argument
+ * parsing, so we know which reply to use depending on the STORE flag. */
+ base_args = 5;
+ } else if (flags & RADIUS_MEMBER) {
+ /* GEORADIUSBYMEMBER or GEORADIUSBYMEMBER_RO */
+ base_args = 5;
+ shape.type = CIRCULAR_TYPE;
+ robj *member = c->argv[2];
+ if (longLatFromMember(zobj, member, shape.xy) == C_ERR) {
+ addReplyError(c, "could not decode requested zset member");
+ return;
+ }
+ if (extractDistanceOrReply(c, c->argv+base_args-2, &shape.conversion, &shape.t.radius) != C_OK) return;
+ } else if (flags & GEOSEARCH) {
+ /* GEOSEARCH or GEOSEARCHSTORE */
+ base_args = 2;
+ if (flags & GEOSEARCHSTORE) {
+ base_args = 3;
+ storekey = c->argv[1];
+ }
+ } else {
+ addReplyError(c, "Unknown georadius search type");
+ return;
+ }
+
+ /* Discover and populate all optional parameters. */
+ int withdist = 0, withhash = 0, withcoords = 0;
+ int frommember = 0, fromloc = 0, byradius = 0, bybox = 0;
+ int sort = SORT_NONE;
+ int any = 0; /* any=1 means a limited search, stop as soon as enough results were found. */
+ long long count = 0; /* Max number of results to return. 0 means unlimited. */
+ if (c->argc > base_args) {
+ int remaining = c->argc - base_args;
+ for (int i = 0; i < remaining; i++) {
+ char *arg = c->argv[base_args + i]->ptr;
+ if (!strcasecmp(arg, "withdist")) {
+ withdist = 1;
+ } else if (!strcasecmp(arg, "withhash")) {
+ withhash = 1;
+ } else if (!strcasecmp(arg, "withcoord")) {
+ withcoords = 1;
+ } else if (!strcasecmp(arg, "any")) {
+ any = 1;
+ } else if (!strcasecmp(arg, "asc")) {
+ sort = SORT_ASC;
+ } else if (!strcasecmp(arg, "desc")) {
+ sort = SORT_DESC;
+ } else if (!strcasecmp(arg, "count") && (i+1) < remaining) {
+ if (getLongLongFromObjectOrReply(c, c->argv[base_args+i+1],
+ &count, NULL) != C_OK) return;
+ if (count <= 0) {
+ addReplyError(c,"COUNT must be > 0");
+ return;
+ }
+ i++;
+ } else if (!strcasecmp(arg, "store") &&
+ (i+1) < remaining &&
+ !(flags & RADIUS_NOSTORE) &&
+ !(flags & GEOSEARCH))
+ {
+ storekey = c->argv[base_args+i+1];
+ storedist = 0;
+ i++;
+ } else if (!strcasecmp(arg, "storedist") &&
+ (i+1) < remaining &&
+ !(flags & RADIUS_NOSTORE) &&
+ !(flags & GEOSEARCH))
+ {
+ storekey = c->argv[base_args+i+1];
+ storedist = 1;
+ i++;
+ } else if (!strcasecmp(arg, "storedist") &&
+ (flags & GEOSEARCH) &&
+ (flags & GEOSEARCHSTORE))
+ {
+ storedist = 1;
+ } else if (!strcasecmp(arg, "frommember") &&
+ (i+1) < remaining &&
+ flags & GEOSEARCH &&
+ !fromloc)
+ {
+ /* No source key, proceed with argument parsing and return an error when done. */
+ if (zobj == NULL) {
+ frommember = 1;
+ i++;
+ continue;
+ }
+
+ if (longLatFromMember(zobj, c->argv[base_args+i+1], shape.xy) == C_ERR) {
+ addReplyError(c, "could not decode requested zset member");
+ return;
+ }
+ frommember = 1;
+ i++;
+ } else if (!strcasecmp(arg, "fromlonlat") &&
+ (i+2) < remaining &&
+ flags & GEOSEARCH &&
+ !frommember)
+ {
+ if (extractLongLatOrReply(c, c->argv+base_args+i+1, shape.xy) == C_ERR) return;
+ fromloc = 1;
+ i += 2;
+ } else if (!strcasecmp(arg, "byradius") &&
+ (i+2) < remaining &&
+ flags & GEOSEARCH &&
+ !bybox)
+ {
+ if (extractDistanceOrReply(c, c->argv+base_args+i+1, &shape.conversion, &shape.t.radius) != C_OK)
+ return;
+ shape.type = CIRCULAR_TYPE;
+ byradius = 1;
+ i += 2;
+ } else if (!strcasecmp(arg, "bybox") &&
+ (i+3) < remaining &&
+ flags & GEOSEARCH &&
+ !byradius)
+ {
+ if (extractBoxOrReply(c, c->argv+base_args+i+1, &shape.conversion, &shape.t.r.width,
+ &shape.t.r.height) != C_OK) return;
+ shape.type = RECTANGLE_TYPE;
+ bybox = 1;
+ i += 3;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+ }
+
+ /* Trap options not compatible with STORE and STOREDIST. */
+ if (storekey && (withdist || withhash || withcoords)) {
+ addReplyErrorFormat(c,
+ "%s is not compatible with WITHDIST, WITHHASH and WITHCOORD options",
+ flags & GEOSEARCHSTORE? "GEOSEARCHSTORE": "STORE option in GEORADIUS");
+ return;
+ }
+
+ if ((flags & GEOSEARCH) && !(frommember || fromloc)) {
+ addReplyErrorFormat(c,
+ "exactly one of FROMMEMBER or FROMLONLAT can be specified for %s",
+ (char *)c->argv[0]->ptr);
+ return;
+ }
+
+ if ((flags & GEOSEARCH) && !(byradius || bybox)) {
+ addReplyErrorFormat(c,
+ "exactly one of BYRADIUS and BYBOX can be specified for %s",
+ (char *)c->argv[0]->ptr);
+ return;
+ }
+
+ if (any && !count) {
+ addReplyErrorFormat(c, "the ANY argument requires COUNT argument");
+ return;
+ }
+
+ /* Return ASAP when src key does not exist. */
+ if (zobj == NULL) {
+ if (storekey) {
+ /* store key is not NULL, try to delete it and return 0. */
+ if (dbDelete(c->db, storekey)) {
+ signalModifiedKey(c, c->db, storekey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC, "del", storekey, c->db->id);
+ server.dirty++;
+ }
+ addReply(c, shared.czero);
+ } else {
+ /* Otherwise we return an empty array. */
+ addReply(c, shared.emptyarray);
+ }
+ return;
+ }
+
+ /* COUNT without ordering does not make much sense (we need to
+ * sort in order to return the closest N entries),
+ * force ASC ordering if COUNT was specified but no sorting was
+ * requested. Note that this is not needed for ANY option. */
+ if (count != 0 && sort == SORT_NONE && !any) sort = SORT_ASC;
+
+ /* Get all neighbor geohash boxes for our radius search */
+ GeoHashRadius georadius = geohashCalculateAreasByShapeWGS84(&shape);
+
+ /* Search the zset for all matching points */
+ geoArray *ga = geoArrayCreate();
+ membersOfAllNeighbors(zobj, &georadius, &shape, ga, any ? count : 0);
+
+ /* If no matching results, the user gets an empty reply. */
+ if (ga->used == 0 && storekey == NULL) {
+ addReply(c,shared.emptyarray);
+ geoArrayFree(ga);
+ return;
+ }
+
+ long result_length = ga->used;
+ long returned_items = (count == 0 || result_length < count) ?
+ result_length : count;
+ long option_length = 0;
+
+ /* Process [optional] requested sorting */
+ if (sort != SORT_NONE) {
+ int (*sort_gp_callback)(const void *a, const void *b) = NULL;
+ if (sort == SORT_ASC) {
+ sort_gp_callback = sort_gp_asc;
+ } else if (sort == SORT_DESC) {
+ sort_gp_callback = sort_gp_desc;
+ }
+
+ if (returned_items == result_length) {
+ qsort(ga->array, result_length, sizeof(geoPoint), sort_gp_callback);
+ } else {
+ pqsort(ga->array, result_length, sizeof(geoPoint), sort_gp_callback,
+ 0, (returned_items - 1));
+ }
+ }
+
+ if (storekey == NULL) {
+ /* No target key, return results to user. */
+
+ /* Our options are self-contained nested multibulk replies, so we
+ * only need to track how many of those nested replies we return. */
+ if (withdist)
+ option_length++;
+
+ if (withcoords)
+ option_length++;
+
+ if (withhash)
+ option_length++;
+
+ /* The array len we send is exactly result_length. The result is
+ * either all strings of just zset members *or* a nested multi-bulk
+ * reply containing the zset member string _and_ all the additional
+ * options the user enabled for this request. */
+ addReplyArrayLen(c, returned_items);
+
+ /* Finally send results back to the caller */
+ int i;
+ for (i = 0; i < returned_items; i++) {
+ geoPoint *gp = ga->array+i;
+ gp->dist /= shape.conversion; /* Fix according to unit. */
+
+ /* If we have options in option_length, return each sub-result
+ * as a nested multi-bulk. Add 1 to account for result value
+ * itself. */
+ if (option_length)
+ addReplyArrayLen(c, option_length + 1);
+
+ addReplyBulkSds(c,gp->member);
+ gp->member = NULL;
+
+ if (withdist)
+ addReplyDoubleDistance(c, gp->dist);
+
+ if (withhash)
+ addReplyLongLong(c, gp->score);
+
+ if (withcoords) {
+ addReplyArrayLen(c, 2);
+ addReplyHumanLongDouble(c, gp->longitude);
+ addReplyHumanLongDouble(c, gp->latitude);
+ }
+ }
+ } else {
+ /* Target key, create a sorted set with the results. */
+ robj *zobj;
+ zset *zs;
+ int i;
+ size_t maxelelen = 0, totelelen = 0;
+
+ if (returned_items) {
+ zobj = createZsetObject();
+ zs = zobj->ptr;
+ }
+
+ for (i = 0; i < returned_items; i++) {
+ zskiplistNode *znode;
+ geoPoint *gp = ga->array+i;
+ gp->dist /= shape.conversion; /* Fix according to unit. */
+ double score = storedist ? gp->dist : gp->score;
+ size_t elelen = sdslen(gp->member);
+
+ if (maxelelen < elelen) maxelelen = elelen;
+ totelelen += elelen;
+ znode = zslInsert(zs->zsl,score,gp->member);
+ serverAssert(dictAdd(zs->dict,gp->member,&znode->score) == DICT_OK);
+ gp->member = NULL;
+ }
+
+ if (returned_items) {
+ zsetConvertToListpackIfNeeded(zobj,maxelelen,totelelen);
+ setKey(c,c->db,storekey,zobj,0);
+ decrRefCount(zobj);
+ notifyKeyspaceEvent(NOTIFY_ZSET,flags & GEOSEARCH ? "geosearchstore" : "georadiusstore",storekey,
+ c->db->id);
+ server.dirty += returned_items;
+ } else if (dbDelete(c->db,storekey)) {
+ signalModifiedKey(c,c->db,storekey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",storekey,c->db->id);
+ server.dirty++;
+ }
+ addReplyLongLong(c, returned_items);
+ }
+ geoArrayFree(ga);
+}
+
+/* GEORADIUS wrapper function. */
+void georadiusCommand(client *c) {
+ georadiusGeneric(c, 1, RADIUS_COORDS);
+}
+
+/* GEORADIUSBYMEMBER wrapper function. */
+void georadiusbymemberCommand(client *c) {
+ georadiusGeneric(c, 1, RADIUS_MEMBER);
+}
+
+/* GEORADIUS_RO wrapper function. */
+void georadiusroCommand(client *c) {
+ georadiusGeneric(c, 1, RADIUS_COORDS|RADIUS_NOSTORE);
+}
+
+/* GEORADIUSBYMEMBER_RO wrapper function. */
+void georadiusbymemberroCommand(client *c) {
+ georadiusGeneric(c, 1, RADIUS_MEMBER|RADIUS_NOSTORE);
+}
+
+void geosearchCommand(client *c) {
+ georadiusGeneric(c, 1, GEOSEARCH);
+}
+
+void geosearchstoreCommand(client *c) {
+ georadiusGeneric(c, 2, GEOSEARCH|GEOSEARCHSTORE);
+}
+
+/* GEOHASH key ele1 ele2 ... eleN
+ *
+ * Returns an array with an 11 characters geohash representation of the
+ * position of the specified elements. */
+void geohashCommand(client *c) {
+ char *geoalphabet= "0123456789bcdefghjkmnpqrstuvwxyz";
+ int j;
+
+ /* Look up the requested zset */
+ robj *zobj = lookupKeyRead(c->db, c->argv[1]);
+ if (checkType(c, zobj, OBJ_ZSET)) return;
+
+ /* Geohash elements one after the other, using a null bulk reply for
+ * missing elements. */
+ addReplyArrayLen(c,c->argc-2);
+ for (j = 2; j < c->argc; j++) {
+ double score;
+ if (!zobj || zsetScore(zobj, c->argv[j]->ptr, &score) == C_ERR) {
+ addReplyNull(c);
+ } else {
+ /* The internal format we use for geocoding is a bit different
+ * than the standard, since we use as initial latitude range
+ * -85,85, while the normal geohashing algorithm uses -90,90.
+ * So we have to decode our position and re-encode using the
+ * standard ranges in order to output a valid geohash string. */
+
+ /* Decode... */
+ double xy[2];
+ if (!decodeGeohash(score,xy)) {
+ addReplyNull(c);
+ continue;
+ }
+
+ /* Re-encode */
+ GeoHashRange r[2];
+ GeoHashBits hash;
+ r[0].min = -180;
+ r[0].max = 180;
+ r[1].min = -90;
+ r[1].max = 90;
+ geohashEncode(&r[0],&r[1],xy[0],xy[1],26,&hash);
+
+ char buf[12];
+ int i;
+ for (i = 0; i < 11; i++) {
+ int idx;
+ if (i == 10) {
+ /* We have just 52 bits, but the API used to output
+ * an 11 bytes geohash. For compatibility we assume
+ * zero. */
+ idx = 0;
+ } else {
+ idx = (hash.bits >> (52-((i+1)*5))) & 0x1f;
+ }
+ buf[i] = geoalphabet[idx];
+ }
+ buf[11] = '\0';
+ addReplyBulkCBuffer(c,buf,11);
+ }
+ }
+}
+
+/* GEOPOS key ele1 ele2 ... eleN
+ *
+ * Returns an array of two-items arrays representing the x,y position of each
+ * element specified in the arguments. For missing elements NULL is returned. */
+void geoposCommand(client *c) {
+ int j;
+
+ /* Look up the requested zset */
+ robj *zobj = lookupKeyRead(c->db, c->argv[1]);
+ if (checkType(c, zobj, OBJ_ZSET)) return;
+
+ /* Report elements one after the other, using a null bulk reply for
+ * missing elements. */
+ addReplyArrayLen(c,c->argc-2);
+ for (j = 2; j < c->argc; j++) {
+ double score;
+ if (!zobj || zsetScore(zobj, c->argv[j]->ptr, &score) == C_ERR) {
+ addReplyNullArray(c);
+ } else {
+ /* Decode... */
+ double xy[2];
+ if (!decodeGeohash(score,xy)) {
+ addReplyNullArray(c);
+ continue;
+ }
+ addReplyArrayLen(c,2);
+ addReplyHumanLongDouble(c,xy[0]);
+ addReplyHumanLongDouble(c,xy[1]);
+ }
+ }
+}
+
+/* GEODIST key ele1 ele2 [unit]
+ *
+ * Return the distance, in meters by default, otherwise according to "unit",
+ * between points ele1 and ele2. If one or more elements are missing NULL
+ * is returned. */
+void geodistCommand(client *c) {
+ double to_meter = 1;
+
+ /* Check if there is the unit to extract, otherwise assume meters. */
+ if (c->argc == 5) {
+ to_meter = extractUnitOrReply(c,c->argv[4]);
+ if (to_meter < 0) return;
+ } else if (c->argc > 5) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Look up the requested zset */
+ robj *zobj = NULL;
+ if ((zobj = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp]))
+ == NULL || checkType(c, zobj, OBJ_ZSET)) return;
+
+ /* Get the scores. We need both otherwise NULL is returned. */
+ double score1, score2, xyxy[4];
+ if (zsetScore(zobj, c->argv[2]->ptr, &score1) == C_ERR ||
+ zsetScore(zobj, c->argv[3]->ptr, &score2) == C_ERR)
+ {
+ addReplyNull(c);
+ return;
+ }
+
+ /* Decode & compute the distance. */
+ if (!decodeGeohash(score1,xyxy) || !decodeGeohash(score2,xyxy+2))
+ addReplyNull(c);
+ else
+ addReplyDoubleDistance(c,
+ geohashGetDistance(xyxy[0],xyxy[1],xyxy[2],xyxy[3]) / to_meter);
+}
diff --git a/src/geo.h b/src/geo.h
new file mode 100644
index 0000000..79d0a6a
--- /dev/null
+++ b/src/geo.h
@@ -0,0 +1,22 @@
+#ifndef __GEO_H__
+#define __GEO_H__
+
+#include "server.h"
+
+/* Structures used inside geo.c in order to represent points and array of
+ * points on the earth. */
+typedef struct geoPoint {
+ double longitude;
+ double latitude;
+ double dist;
+ double score;
+ char *member;
+} geoPoint;
+
+typedef struct geoArray {
+ struct geoPoint *array;
+ size_t buckets;
+ size_t used;
+} geoArray;
+
+#endif
diff --git a/src/geohash.c b/src/geohash.c
new file mode 100644
index 0000000..2cbcf28
--- /dev/null
+++ b/src/geohash.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "geohash.h"
+
+/**
+ * Hashing works like this:
+ * Divide the world into 4 buckets. Label each one as such:
+ * -----------------
+ * | | |
+ * | | |
+ * | 0,1 | 1,1 |
+ * -----------------
+ * | | |
+ * | | |
+ * | 0,0 | 1,0 |
+ * -----------------
+ */
+
+/* Interleave lower bits of x and y, so the bits of x
+ * are in the even positions and bits from y in the odd;
+ * x and y must initially be less than 2**32 (4294967296).
+ * From: https://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
+ */
+static inline uint64_t interleave64(uint32_t xlo, uint32_t ylo) {
+ static const uint64_t B[] = {0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL};
+ static const unsigned int S[] = {1, 2, 4, 8, 16};
+
+ uint64_t x = xlo;
+ uint64_t y = ylo;
+
+ x = (x | (x << S[4])) & B[4];
+ y = (y | (y << S[4])) & B[4];
+
+ x = (x | (x << S[3])) & B[3];
+ y = (y | (y << S[3])) & B[3];
+
+ x = (x | (x << S[2])) & B[2];
+ y = (y | (y << S[2])) & B[2];
+
+ x = (x | (x << S[1])) & B[1];
+ y = (y | (y << S[1])) & B[1];
+
+ x = (x | (x << S[0])) & B[0];
+ y = (y | (y << S[0])) & B[0];
+
+ return x | (y << 1);
+}
+
+/* reverse the interleave process
+ * derived from http://stackoverflow.com/questions/4909263
+ */
+static inline uint64_t deinterleave64(uint64_t interleaved) {
+ static const uint64_t B[] = {0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
+ static const unsigned int S[] = {0, 1, 2, 4, 8, 16};
+
+ uint64_t x = interleaved;
+ uint64_t y = interleaved >> 1;
+
+ x = (x | (x >> S[0])) & B[0];
+ y = (y | (y >> S[0])) & B[0];
+
+ x = (x | (x >> S[1])) & B[1];
+ y = (y | (y >> S[1])) & B[1];
+
+ x = (x | (x >> S[2])) & B[2];
+ y = (y | (y >> S[2])) & B[2];
+
+ x = (x | (x >> S[3])) & B[3];
+ y = (y | (y >> S[3])) & B[3];
+
+ x = (x | (x >> S[4])) & B[4];
+ y = (y | (y >> S[4])) & B[4];
+
+ x = (x | (x >> S[5])) & B[5];
+ y = (y | (y >> S[5])) & B[5];
+
+ return x | (y << 32);
+}
+
+void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range) {
+ /* These are constraints from EPSG:900913 / EPSG:3785 / OSGEO:41001 */
+ /* We can't geocode at the north/south pole. */
+ long_range->max = GEO_LONG_MAX;
+ long_range->min = GEO_LONG_MIN;
+ lat_range->max = GEO_LAT_MAX;
+ lat_range->min = GEO_LAT_MIN;
+}
+
+int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,
+ double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash) {
+ /* Check basic arguments sanity. */
+ if (hash == NULL || step > 32 || step == 0 ||
+ RANGEPISZERO(lat_range) || RANGEPISZERO(long_range)) return 0;
+
+ /* Return an error when trying to index outside the supported
+ * constraints. */
+ if (longitude > GEO_LONG_MAX || longitude < GEO_LONG_MIN ||
+ latitude > GEO_LAT_MAX || latitude < GEO_LAT_MIN) return 0;
+
+ hash->bits = 0;
+ hash->step = step;
+
+ if (latitude < lat_range->min || latitude > lat_range->max ||
+ longitude < long_range->min || longitude > long_range->max) {
+ return 0;
+ }
+
+ double lat_offset =
+ (latitude - lat_range->min) / (lat_range->max - lat_range->min);
+ double long_offset =
+ (longitude - long_range->min) / (long_range->max - long_range->min);
+
+ /* convert to fixed point based on the step size */
+ lat_offset *= (1ULL << step);
+ long_offset *= (1ULL << step);
+ hash->bits = interleave64(lat_offset, long_offset);
+ return 1;
+}
+
+int geohashEncodeType(double longitude, double latitude, uint8_t step, GeoHashBits *hash) {
+ GeoHashRange r[2] = {{0}};
+ geohashGetCoordRange(&r[0], &r[1]);
+ return geohashEncode(&r[0], &r[1], longitude, latitude, step, hash);
+}
+
+int geohashEncodeWGS84(double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash) {
+ return geohashEncodeType(longitude, latitude, step, hash);
+}
+
+int geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range,
+ const GeoHashBits hash, GeoHashArea *area) {
+ if (HASHISZERO(hash) || NULL == area || RANGEISZERO(lat_range) ||
+ RANGEISZERO(long_range)) {
+ return 0;
+ }
+
+ area->hash = hash;
+ uint8_t step = hash.step;
+ uint64_t hash_sep = deinterleave64(hash.bits); /* hash = [LAT][LONG] */
+
+ double lat_scale = lat_range.max - lat_range.min;
+ double long_scale = long_range.max - long_range.min;
+
+ uint32_t ilato = hash_sep; /* get lat part of deinterleaved hash */
+ uint32_t ilono = hash_sep >> 32; /* shift over to get long part of hash */
+
+ /* divide by 2**step.
+ * Then, for 0-1 coordinate, multiply times scale and add
+ to the min to get the absolute coordinate. */
+ area->latitude.min =
+ lat_range.min + (ilato * 1.0 / (1ull << step)) * lat_scale;
+ area->latitude.max =
+ lat_range.min + ((ilato + 1) * 1.0 / (1ull << step)) * lat_scale;
+ area->longitude.min =
+ long_range.min + (ilono * 1.0 / (1ull << step)) * long_scale;
+ area->longitude.max =
+ long_range.min + ((ilono + 1) * 1.0 / (1ull << step)) * long_scale;
+
+ return 1;
+}
+
+int geohashDecodeType(const GeoHashBits hash, GeoHashArea *area) {
+ GeoHashRange r[2] = {{0}};
+ geohashGetCoordRange(&r[0], &r[1]);
+ return geohashDecode(r[0], r[1], hash, area);
+}
+
+int geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area) {
+ return geohashDecodeType(hash, area);
+}
+
+int geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy) {
+ if (!xy) return 0;
+ xy[0] = (area->longitude.min + area->longitude.max) / 2;
+ if (xy[0] > GEO_LONG_MAX) xy[0] = GEO_LONG_MAX;
+ if (xy[0] < GEO_LONG_MIN) xy[0] = GEO_LONG_MIN;
+ xy[1] = (area->latitude.min + area->latitude.max) / 2;
+ if (xy[1] > GEO_LAT_MAX) xy[1] = GEO_LAT_MAX;
+ if (xy[1] < GEO_LAT_MIN) xy[1] = GEO_LAT_MIN;
+ return 1;
+}
+
+int geohashDecodeToLongLatType(const GeoHashBits hash, double *xy) {
+ GeoHashArea area = {{0}};
+ if (!xy || !geohashDecodeType(hash, &area))
+ return 0;
+ return geohashDecodeAreaToLongLat(&area, xy);
+}
+
+int geohashDecodeToLongLatWGS84(const GeoHashBits hash, double *xy) {
+ return geohashDecodeToLongLatType(hash, xy);
+}
+
+static void geohash_move_x(GeoHashBits *hash, int8_t d) {
+ if (d == 0)
+ return;
+
+ uint64_t x = hash->bits & 0xaaaaaaaaaaaaaaaaULL;
+ uint64_t y = hash->bits & 0x5555555555555555ULL;
+
+ uint64_t zz = 0x5555555555555555ULL >> (64 - hash->step * 2);
+
+ if (d > 0) {
+ x = x + (zz + 1);
+ } else {
+ x = x | zz;
+ x = x - (zz + 1);
+ }
+
+ x &= (0xaaaaaaaaaaaaaaaaULL >> (64 - hash->step * 2));
+ hash->bits = (x | y);
+}
+
+static void geohash_move_y(GeoHashBits *hash, int8_t d) {
+ if (d == 0)
+ return;
+
+ uint64_t x = hash->bits & 0xaaaaaaaaaaaaaaaaULL;
+ uint64_t y = hash->bits & 0x5555555555555555ULL;
+
+ uint64_t zz = 0xaaaaaaaaaaaaaaaaULL >> (64 - hash->step * 2);
+ if (d > 0) {
+ y = y + (zz + 1);
+ } else {
+ y = y | zz;
+ y = y - (zz + 1);
+ }
+ y &= (0x5555555555555555ULL >> (64 - hash->step * 2));
+ hash->bits = (x | y);
+}
+
+void geohashNeighbors(const GeoHashBits *hash, GeoHashNeighbors *neighbors) {
+ neighbors->east = *hash;
+ neighbors->west = *hash;
+ neighbors->north = *hash;
+ neighbors->south = *hash;
+ neighbors->south_east = *hash;
+ neighbors->south_west = *hash;
+ neighbors->north_east = *hash;
+ neighbors->north_west = *hash;
+
+ geohash_move_x(&neighbors->east, 1);
+ geohash_move_y(&neighbors->east, 0);
+
+ geohash_move_x(&neighbors->west, -1);
+ geohash_move_y(&neighbors->west, 0);
+
+ geohash_move_x(&neighbors->south, 0);
+ geohash_move_y(&neighbors->south, -1);
+
+ geohash_move_x(&neighbors->north, 0);
+ geohash_move_y(&neighbors->north, 1);
+
+ geohash_move_x(&neighbors->north_west, -1);
+ geohash_move_y(&neighbors->north_west, 1);
+
+ geohash_move_x(&neighbors->north_east, 1);
+ geohash_move_y(&neighbors->north_east, 1);
+
+ geohash_move_x(&neighbors->south_east, 1);
+ geohash_move_y(&neighbors->south_east, -1);
+
+ geohash_move_x(&neighbors->south_west, -1);
+ geohash_move_y(&neighbors->south_west, -1);
+}
diff --git a/src/geohash.h b/src/geohash.h
new file mode 100644
index 0000000..4befb93
--- /dev/null
+++ b/src/geohash.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GEOHASH_H_
+#define GEOHASH_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define HASHISZERO(r) (!(r).bits && !(r).step)
+#define RANGEISZERO(r) (!(r).max && !(r).min)
+#define RANGEPISZERO(r) (r == NULL || RANGEISZERO(*r))
+
+#define GEO_STEP_MAX 26 /* 26*2 = 52 bits. */
+
+/* Limits from EPSG:900913 / EPSG:3785 / OSGEO:41001 */
+#define GEO_LAT_MIN -85.05112878
+#define GEO_LAT_MAX 85.05112878
+#define GEO_LONG_MIN -180
+#define GEO_LONG_MAX 180
+
+typedef enum {
+ GEOHASH_NORTH = 0,
+ GEOHASH_EAST,
+ GEOHASH_WEST,
+ GEOHASH_SOUTH,
+ GEOHASH_SOUTH_WEST,
+ GEOHASH_SOUTH_EAST,
+ GEOHASH_NORT_WEST,
+ GEOHASH_NORT_EAST
+} GeoDirection;
+
+typedef struct {
+ uint64_t bits;
+ uint8_t step;
+} GeoHashBits;
+
+typedef struct {
+ double min;
+ double max;
+} GeoHashRange;
+
+typedef struct {
+ GeoHashBits hash;
+ GeoHashRange longitude;
+ GeoHashRange latitude;
+} GeoHashArea;
+
+typedef struct {
+ GeoHashBits north;
+ GeoHashBits east;
+ GeoHashBits west;
+ GeoHashBits south;
+ GeoHashBits north_east;
+ GeoHashBits south_east;
+ GeoHashBits north_west;
+ GeoHashBits south_west;
+} GeoHashNeighbors;
+
+#define CIRCULAR_TYPE 1
+#define RECTANGLE_TYPE 2
+typedef struct {
+ int type; /* search type */
+ double xy[2]; /* search center point, xy[0]: lon, xy[1]: lat */
+ double conversion; /* km: 1000 */
+ double bounds[4]; /* bounds[0]: min_lon, bounds[1]: min_lat
+ * bounds[2]: max_lon, bounds[3]: max_lat */
+ union {
+ /* CIRCULAR_TYPE */
+ double radius;
+ /* RECTANGLE_TYPE */
+ struct {
+ double height;
+ double width;
+ } r;
+ } t;
+} GeoShape;
+
+/*
+ * 0:success
+ * -1:failed
+ */
+void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range);
+int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,
+ double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash);
+int geohashEncodeType(double longitude, double latitude,
+ uint8_t step, GeoHashBits *hash);
+int geohashEncodeWGS84(double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash);
+int geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range,
+ const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeType(const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy);
+int geohashDecodeToLongLatType(const GeoHashBits hash, double *xy);
+int geohashDecodeToLongLatWGS84(const GeoHashBits hash, double *xy);
+void geohashNeighbors(const GeoHashBits *hash, GeoHashNeighbors *neighbors);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* GEOHASH_H_ */
diff --git a/src/geohash_helper.c b/src/geohash_helper.c
new file mode 100644
index 0000000..a3816fb
--- /dev/null
+++ b/src/geohash_helper.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This is a C++ to C conversion from the ardb project.
+ * This file started out as:
+ * https://github.com/yinqiwen/ardb/blob/d42503/src/geo/geohash_helper.cpp
+ */
+
+#include "fmacros.h"
+#include "geohash_helper.h"
+#include "debugmacro.h"
+#include <math.h>
+
+#define D_R (M_PI / 180.0)
+#define R_MAJOR 6378137.0
+#define R_MINOR 6356752.3142
+#define RATIO (R_MINOR / R_MAJOR)
+#define ECCENT (sqrt(1.0 - (RATIO *RATIO)))
+#define COM (0.5 * ECCENT)
+
+/// @brief The usual PI/180 constant
+const double DEG_TO_RAD = 0.017453292519943295769236907684886;
+/// @brief Earth's quatratic mean radius for WGS-84
+const double EARTH_RADIUS_IN_METERS = 6372797.560856;
+
+const double MERCATOR_MAX = 20037726.37;
+const double MERCATOR_MIN = -20037726.37;
+
+static inline double deg_rad(double ang) { return ang * D_R; }
+static inline double rad_deg(double ang) { return ang / D_R; }
+
+/* This function is used in order to estimate the step (bits precision)
+ * of the 9 search area boxes during radius queries. */
+uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) {
+ if (range_meters == 0) return 26;
+ int step = 1;
+ while (range_meters < MERCATOR_MAX) {
+ range_meters *= 2;
+ step++;
+ }
+ step -= 2; /* Make sure range is included in most of the base cases. */
+
+ /* Wider range towards the poles... Note: it is possible to do better
+ * than this approximation by computing the distance between meridians
+ * at this latitude, but this does the trick for now. */
+ if (lat > 66 || lat < -66) {
+ step--;
+ if (lat > 80 || lat < -80) step--;
+ }
+
+ /* Frame to valid range. */
+ if (step < 1) step = 1;
+ if (step > 26) step = 26;
+ return step;
+}
+
+/* Return the bounding box of the search area by shape (see geohash.h GeoShape)
+ * bounds[0] - bounds[2] is the minimum and maximum longitude
+ * while bounds[1] - bounds[3] is the minimum and maximum latitude.
+ * since the higher the latitude, the shorter the arc length, the box shape is as follows
+ * (left and right edges are actually bent), as shown in the following diagram:
+ *
+ * \-----------------/ -------- \-----------------/
+ * \ / / \ \ /
+ * \ (long,lat) / / (long,lat) \ \ (long,lat) /
+ * \ / / \ / \
+ * --------- /----------------\ /---------------\
+ * Northern Hemisphere Southern Hemisphere Around the equator
+ */
+int geohashBoundingBox(GeoShape *shape, double *bounds) {
+ if (!bounds) return 0;
+ double longitude = shape->xy[0];
+ double latitude = shape->xy[1];
+ double height = shape->conversion * (shape->type == CIRCULAR_TYPE ? shape->t.radius : shape->t.r.height/2);
+ double width = shape->conversion * (shape->type == CIRCULAR_TYPE ? shape->t.radius : shape->t.r.width/2);
+
+ const double lat_delta = rad_deg(height/EARTH_RADIUS_IN_METERS);
+ const double long_delta_top = rad_deg(width/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude+lat_delta)));
+ const double long_delta_bottom = rad_deg(width/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude-lat_delta)));
+ /* The directions of the northern and southern hemispheres
+ * are opposite, so we choice different points as min/max long/lat */
+ int southern_hemisphere = latitude < 0 ? 1 : 0;
+ bounds[0] = southern_hemisphere ? longitude-long_delta_bottom : longitude-long_delta_top;
+ bounds[2] = southern_hemisphere ? longitude+long_delta_bottom : longitude+long_delta_top;
+ bounds[1] = latitude - lat_delta;
+ bounds[3] = latitude + lat_delta;
+ return 1;
+}
+
+/* Calculate a set of areas (center + 8) that are able to cover a range query
+ * for the specified position and shape (see geohash.h GeoShape).
+ * the bounding box saved in shaple.bounds */
+GeoHashRadius geohashCalculateAreasByShapeWGS84(GeoShape *shape) {
+ GeoHashRange long_range, lat_range;
+ GeoHashRadius radius;
+ GeoHashBits hash;
+ GeoHashNeighbors neighbors;
+ GeoHashArea area;
+ double min_lon, max_lon, min_lat, max_lat;
+ int steps;
+
+ geohashBoundingBox(shape, shape->bounds);
+ min_lon = shape->bounds[0];
+ min_lat = shape->bounds[1];
+ max_lon = shape->bounds[2];
+ max_lat = shape->bounds[3];
+
+ double longitude = shape->xy[0];
+ double latitude = shape->xy[1];
+ /* radius_meters is calculated differently in different search types:
+ * 1) CIRCULAR_TYPE, just use radius.
+ * 2) RECTANGLE_TYPE, we use sqrt((width/2)^2 + (height/2)^2) to
+ * calculate the distance from the center point to the corner */
+ double radius_meters = shape->type == CIRCULAR_TYPE ? shape->t.radius :
+ sqrt((shape->t.r.width/2)*(shape->t.r.width/2) + (shape->t.r.height/2)*(shape->t.r.height/2));
+ radius_meters *= shape->conversion;
+
+ steps = geohashEstimateStepsByRadius(radius_meters,latitude);
+
+ geohashGetCoordRange(&long_range,&lat_range);
+ geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);
+ geohashNeighbors(&hash,&neighbors);
+ geohashDecode(long_range,lat_range,hash,&area);
+
+ /* Check if the step is enough at the limits of the covered area.
+ * Sometimes when the search area is near an edge of the
+ * area, the estimated step is not small enough, since one of the
+ * north / south / west / east square is too near to the search area
+ * to cover everything. */
+ int decrease_step = 0;
+ {
+ GeoHashArea north, south, east, west;
+
+ geohashDecode(long_range, lat_range, neighbors.north, &north);
+ geohashDecode(long_range, lat_range, neighbors.south, &south);
+ geohashDecode(long_range, lat_range, neighbors.east, &east);
+ geohashDecode(long_range, lat_range, neighbors.west, &west);
+
+ if (north.latitude.max < max_lat)
+ decrease_step = 1;
+ if (south.latitude.min > min_lat)
+ decrease_step = 1;
+ if (east.longitude.max < max_lon)
+ decrease_step = 1;
+ if (west.longitude.min > min_lon)
+ decrease_step = 1;
+ }
+
+ if (steps > 1 && decrease_step) {
+ steps--;
+ geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);
+ geohashNeighbors(&hash,&neighbors);
+ geohashDecode(long_range,lat_range,hash,&area);
+ }
+
+ /* Exclude the search areas that are useless. */
+ if (steps >= 2) {
+ if (area.latitude.min < min_lat) {
+ GZERO(neighbors.south);
+ GZERO(neighbors.south_west);
+ GZERO(neighbors.south_east);
+ }
+ if (area.latitude.max > max_lat) {
+ GZERO(neighbors.north);
+ GZERO(neighbors.north_east);
+ GZERO(neighbors.north_west);
+ }
+ if (area.longitude.min < min_lon) {
+ GZERO(neighbors.west);
+ GZERO(neighbors.south_west);
+ GZERO(neighbors.north_west);
+ }
+ if (area.longitude.max > max_lon) {
+ GZERO(neighbors.east);
+ GZERO(neighbors.south_east);
+ GZERO(neighbors.north_east);
+ }
+ }
+ radius.hash = hash;
+ radius.neighbors = neighbors;
+ radius.area = area;
+ return radius;
+}
+
+GeoHashFix52Bits geohashAlign52Bits(const GeoHashBits hash) {
+ uint64_t bits = hash.bits;
+ bits <<= (52 - hash.step * 2);
+ return bits;
+}
+
+/* Calculate distance using simplified haversine great circle distance formula.
+ * Given longitude diff is 0 the asin(sqrt(a)) on the haversine is asin(sin(abs(u))).
+ * arcsin(sin(x)) equal to x when x ∈[−𝜋/2,𝜋/2]. Given latitude is between [−𝜋/2,𝜋/2]
+ * we can simplify arcsin(sin(x)) to x.
+ */
+double geohashGetLatDistance(double lat1d, double lat2d) {
+ return EARTH_RADIUS_IN_METERS * fabs(deg_rad(lat2d) - deg_rad(lat1d));
+}
+
+/* Calculate distance using haversine great circle distance formula. */
+double geohashGetDistance(double lon1d, double lat1d, double lon2d, double lat2d) {
+ double lat1r, lon1r, lat2r, lon2r, u, v, a;
+ lon1r = deg_rad(lon1d);
+ lon2r = deg_rad(lon2d);
+ v = sin((lon2r - lon1r) / 2);
+ /* if v == 0 we can avoid doing expensive math when lons are practically the same */
+ if (v == 0.0)
+ return geohashGetLatDistance(lat1d, lat2d);
+ lat1r = deg_rad(lat1d);
+ lat2r = deg_rad(lat2d);
+ u = sin((lat2r - lat1r) / 2);
+ a = u * u + cos(lat1r) * cos(lat2r) * v * v;
+ return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(a));
+}
+
+int geohashGetDistanceIfInRadius(double x1, double y1,
+ double x2, double y2, double radius,
+ double *distance) {
+ *distance = geohashGetDistance(x1, y1, x2, y2);
+ if (*distance > radius) return 0;
+ return 1;
+}
+
+int geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,
+ double y2, double radius,
+ double *distance) {
+ return geohashGetDistanceIfInRadius(x1, y1, x2, y2, radius, distance);
+}
+
+/* Judge whether a point is in the axis-aligned rectangle, when the distance
+ * between a searched point and the center point is less than or equal to
+ * height/2 or width/2 in height and width, the point is in the rectangle.
+ *
+ * width_m, height_m: the rectangle
+ * x1, y1 : the center of the box
+ * x2, y2 : the point to be searched
+ */
+int geohashGetDistanceIfInRectangle(double width_m, double height_m, double x1, double y1,
+ double x2, double y2, double *distance) {
+ /* latitude distance is less expensive to compute than longitude distance
+ * so we check first for the latitude condition */
+ double lat_distance = geohashGetLatDistance(y2, y1);
+ if (lat_distance > height_m/2) {
+ return 0;
+ }
+ double lon_distance = geohashGetDistance(x2, y2, x1, y2);
+ if (lon_distance > width_m/2) {
+ return 0;
+ }
+ *distance = geohashGetDistance(x1, y1, x2, y2);
+ return 1;
+}
diff --git a/src/geohash_helper.h b/src/geohash_helper.h
new file mode 100644
index 0000000..56c731f
--- /dev/null
+++ b/src/geohash_helper.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GEOHASH_HELPER_HPP_
+#define GEOHASH_HELPER_HPP_
+
+#include "geohash.h"
+
+#define GZERO(s) s.bits = s.step = 0;
+#define GISZERO(s) (!s.bits && !s.step)
+#define GISNOTZERO(s) (s.bits || s.step)
+
+typedef uint64_t GeoHashFix52Bits;
+typedef uint64_t GeoHashVarBits;
+
+typedef struct {
+ GeoHashBits hash;
+ GeoHashArea area;
+ GeoHashNeighbors neighbors;
+} GeoHashRadius;
+
+uint8_t geohashEstimateStepsByRadius(double range_meters, double lat);
+int geohashBoundingBox(GeoShape *shape, double *bounds);
+GeoHashRadius geohashCalculateAreasByShapeWGS84(GeoShape *shape);
+GeoHashFix52Bits geohashAlign52Bits(const GeoHashBits hash);
+double geohashGetDistance(double lon1d, double lat1d,
+ double lon2d, double lat2d);
+int geohashGetDistanceIfInRadius(double x1, double y1,
+ double x2, double y2, double radius,
+ double *distance);
+int geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,
+ double y2, double radius,
+ double *distance);
+int geohashGetDistanceIfInRectangle(double width_m, double height_m, double x1, double y1,
+ double x2, double y2, double *distance);
+
+#endif /* GEOHASH_HELPER_HPP_ */
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
new file mode 100644
index 0000000..1a74f47
--- /dev/null
+++ b/src/hyperloglog.c
@@ -0,0 +1,1618 @@
+/* hyperloglog.c - Redis HyperLogLog probabilistic cardinality approximation.
+ * This file implements the algorithm and the exported Redis commands.
+ *
+ * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+#include <stdint.h>
+#include <math.h>
+
+/* The Redis HyperLogLog implementation is based on the following ideas:
+ *
+ * * The use of a 64 bit hash function as proposed in [1], in order to estimate
+ * cardinalities larger than 10^9, at the cost of just 1 additional bit per
+ * register.
+ * * The use of 16384 6-bit registers for a great level of accuracy, using
+ * a total of 12k per key.
+ * * The use of the Redis string data type. No new type is introduced.
+ * * No attempt is made to compress the data structure as in [1]. Also the
+ * algorithm used is the original HyperLogLog Algorithm as in [2], with
+ * the only difference that a 64 bit hash function is used, so no correction
+ * is performed for values near 2^32 as in [1].
+ *
+ * [1] Heule, Nunkesser, Hall: HyperLogLog in Practice: Algorithmic
+ * Engineering of a State of The Art Cardinality Estimation Algorithm.
+ *
+ * [2] P. Flajolet, Éric Fusy, O. Gandouet, and F. Meunier. Hyperloglog: The
+ * analysis of a near-optimal cardinality estimation algorithm.
+ *
+ * Redis uses two representations:
+ *
+ * 1) A "dense" representation where every entry is represented by
+ * a 6-bit integer.
+ * 2) A "sparse" representation using run length compression suitable
+ * for representing HyperLogLogs with many registers set to 0 in
+ * a memory efficient way.
+ *
+ *
+ * HLL header
+ * ===
+ *
+ * Both the dense and sparse representation have a 16 byte header as follows:
+ *
+ * +------+---+-----+----------+
+ * | HYLL | E | N/U | Cardin. |
+ * +------+---+-----+----------+
+ *
+ * The first 4 bytes are a magic string set to the bytes "HYLL".
+ * "E" is one byte encoding, currently set to HLL_DENSE or
+ * HLL_SPARSE. N/U are three not used bytes.
+ *
+ * The "Cardin." field is a 64 bit integer stored in little endian format
+ * with the latest cardinality computed that can be reused if the data
+ * structure was not modified since the last computation (this is useful
+ * because there are high probabilities that HLLADD operations don't
+ * modify the actual data structure and hence the approximated cardinality).
+ *
+ * When the most significant bit in the most significant byte of the cached
+ * cardinality is set, it means that the data structure was modified and
+ * we can't reuse the cached value that must be recomputed.
+ *
+ * Dense representation
+ * ===
+ *
+ * The dense representation used by Redis is the following:
+ *
+ * +--------+--------+--------+------// //--+
+ * |11000000|22221111|33333322|55444444 .... |
+ * +--------+--------+--------+------// //--+
+ *
+ * The 6 bits counters are encoded one after the other starting from the
+ * LSB to the MSB, and using the next bytes as needed.
+ *
+ * Sparse representation
+ * ===
+ *
+ * The sparse representation encodes registers using a run length
+ * encoding composed of three opcodes, two using one byte, and one using
+ * of two bytes. The opcodes are called ZERO, XZERO and VAL.
+ *
+ * ZERO opcode is represented as 00xxxxxx. The 6-bit integer represented
+ * by the six bits 'xxxxxx', plus 1, means that there are N registers set
+ * to 0. This opcode can represent from 1 to 64 contiguous registers set
+ * to the value of 0.
+ *
+ * XZERO opcode is represented by two bytes 01xxxxxx yyyyyyyy. The 14-bit
+ * integer represented by the bits 'xxxxxx' as most significant bits and
+ * 'yyyyyyyy' as least significant bits, plus 1, means that there are N
+ * registers set to 0. This opcode can represent from 0 to 16384 contiguous
+ * registers set to the value of 0.
+ *
+ * VAL opcode is represented as 1vvvvvxx. It contains a 5-bit integer
+ * representing the value of a register, and a 2-bit integer representing
+ * the number of contiguous registers set to that value 'vvvvv'.
+ * To obtain the value and run length, the integers vvvvv and xx must be
+ * incremented by one. This opcode can represent values from 1 to 32,
+ * repeated from 1 to 4 times.
+ *
+ * The sparse representation can't represent registers with a value greater
+ * than 32, however it is very unlikely that we find such a register in an
+ * HLL with a cardinality where the sparse representation is still more
+ * memory efficient than the dense representation. When this happens the
+ * HLL is converted to the dense representation.
+ *
+ * The sparse representation is purely positional. For example a sparse
+ * representation of an empty HLL is just: XZERO:16384.
+ *
+ * An HLL having only 3 non-zero registers at position 1000, 1020, 1021
+ * respectively set to 2, 3, 3, is represented by the following three
+ * opcodes:
+ *
+ * XZERO:1000 (Registers 0-999 are set to 0)
+ * VAL:2,1 (1 register set to value 2, that is register 1000)
+ * ZERO:19 (Registers 1001-1019 set to 0)
+ * VAL:3,2 (2 registers set to value 3, that is registers 1020,1021)
+ * XZERO:15362 (Registers 1022-16383 set to 0)
+ *
+ * In the example the sparse representation used just 7 bytes instead
+ * of 12k in order to represent the HLL registers. In general for low
+ * cardinality there is a big win in terms of space efficiency, traded
+ * with CPU time since the sparse representation is slower to access.
+ *
+ * The following table shows average cardinality vs bytes used, 100
+ * samples per cardinality (when the set was not representable because
+ * of registers with too big value, the dense representation size was used
+ * as a sample).
+ *
+ * 100 267
+ * 200 485
+ * 300 678
+ * 400 859
+ * 500 1033
+ * 600 1205
+ * 700 1375
+ * 800 1544
+ * 900 1713
+ * 1000 1882
+ * 2000 3480
+ * 3000 4879
+ * 4000 6089
+ * 5000 7138
+ * 6000 8042
+ * 7000 8823
+ * 8000 9500
+ * 9000 10088
+ * 10000 10591
+ *
+ * The dense representation uses 12288 bytes, so there is a big win up to
+ * a cardinality of ~2000-3000. For bigger cardinalities the constant times
+ * involved in updating the sparse representation is not justified by the
+ * memory savings. The exact maximum length of the sparse representation
+ * when this implementation switches to the dense representation is
+ * configured via the define server.hll_sparse_max_bytes.
+ */
+
+struct hllhdr {
+ char magic[4]; /* "HYLL" */
+ uint8_t encoding; /* HLL_DENSE or HLL_SPARSE. */
+ uint8_t notused[3]; /* Reserved for future use, must be zero. */
+ uint8_t card[8]; /* Cached cardinality, little endian. */
+ uint8_t registers[]; /* Data bytes. */
+};
+
+/* The cached cardinality MSB is used to signal validity of the cached value. */
+#define HLL_INVALIDATE_CACHE(hdr) (hdr)->card[7] |= (1<<7)
+#define HLL_VALID_CACHE(hdr) (((hdr)->card[7] & (1<<7)) == 0)
+
+#define HLL_P 14 /* The greater is P, the smaller the error. */
+#define HLL_Q (64-HLL_P) /* The number of bits of the hash value used for
+ determining the number of leading zeros. */
+#define HLL_REGISTERS (1<<HLL_P) /* With P=14, 16384 registers. */
+#define HLL_P_MASK (HLL_REGISTERS-1) /* Mask to index register. */
+#define HLL_BITS 6 /* Enough to count up to 63 leading zeroes. */
+#define HLL_REGISTER_MAX ((1<<HLL_BITS)-1)
+#define HLL_HDR_SIZE sizeof(struct hllhdr)
+#define HLL_DENSE_SIZE (HLL_HDR_SIZE+((HLL_REGISTERS*HLL_BITS+7)/8))
+#define HLL_DENSE 0 /* Dense encoding. */
+#define HLL_SPARSE 1 /* Sparse encoding. */
+#define HLL_RAW 255 /* Only used internally, never exposed. */
+#define HLL_MAX_ENCODING 1
+
+static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected";
+
+/* =========================== Low level bit macros ========================= */
+
+/* Macros to access the dense representation.
+ *
+ * We need to get and set 6 bit counters in an array of 8 bit bytes.
+ * We use macros to make sure the code is inlined since speed is critical
+ * especially in order to compute the approximated cardinality in
+ * HLLCOUNT where we need to access all the registers at once.
+ * For the same reason we also want to avoid conditionals in this code path.
+ *
+ * +--------+--------+--------+------//
+ * |11000000|22221111|33333322|55444444
+ * +--------+--------+--------+------//
+ *
+ * Note: in the above representation the most significant bit (MSB)
+ * of every byte is on the left. We start using bits from the LSB to MSB,
+ * and so forth passing to the next byte.
+ *
+ * Example, we want to access to counter at pos = 1 ("111111" in the
+ * illustration above).
+ *
+ * The index of the first byte b0 containing our data is:
+ *
+ * b0 = 6 * pos / 8 = 0
+ *
+ * +--------+
+ * |11000000| <- Our byte at b0
+ * +--------+
+ *
+ * The position of the first bit (counting from the LSB = 0) in the byte
+ * is given by:
+ *
+ * fb = 6 * pos % 8 -> 6
+ *
+ * Right shift b0 of 'fb' bits.
+ *
+ * +--------+
+ * |11000000| <- Initial value of b0
+ * |00000011| <- After right shift of 6 pos.
+ * +--------+
+ *
+ * Left shift b1 of bits 8-fb bits (2 bits)
+ *
+ * +--------+
+ * |22221111| <- Initial value of b1
+ * |22111100| <- After left shift of 2 bits.
+ * +--------+
+ *
+ * OR the two bits, and finally AND with 111111 (63 in decimal) to
+ * clean the higher order bits we are not interested in:
+ *
+ * +--------+
+ * |00000011| <- b0 right shifted
+ * |22111100| <- b1 left shifted
+ * |22111111| <- b0 OR b1
+ * | 111111| <- (b0 OR b1) AND 63, our value.
+ * +--------+
+ *
+ * We can try with a different example, like pos = 0. In this case
+ * the 6-bit counter is actually contained in a single byte.
+ *
+ * b0 = 6 * pos / 8 = 0
+ *
+ * +--------+
+ * |11000000| <- Our byte at b0
+ * +--------+
+ *
+ * fb = 6 * pos % 8 = 0
+ *
+ * So we right shift of 0 bits (no shift in practice) and
+ * left shift the next byte of 8 bits, even if we don't use it,
+ * but this has the effect of clearing the bits so the result
+ * will not be affected after the OR.
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Setting the register is a bit more complex, let's assume that 'val'
+ * is the value we want to set, already in the right range.
+ *
+ * We need two steps, in one we need to clear the bits, and in the other
+ * we need to bitwise-OR the new bits.
+ *
+ * Let's try with 'pos' = 1, so our first byte at 'b' is 0,
+ *
+ * "fb" is 6 in this case.
+ *
+ * +--------+
+ * |11000000| <- Our byte at b0
+ * +--------+
+ *
+ * To create an AND-mask to clear the bits about this position, we just
+ * initialize the mask with the value 63, left shift it of "fs" bits,
+ * and finally invert the result.
+ *
+ * +--------+
+ * |00111111| <- "mask" starts at 63
+ * |11000000| <- "mask" after left shift of "ls" bits.
+ * |00111111| <- "mask" after invert.
+ * +--------+
+ *
+ * Now we can bitwise-AND the byte at "b" with the mask, and bitwise-OR
+ * it with "val" left-shifted of "ls" bits to set the new bits.
+ *
+ * Now let's focus on the next byte b1:
+ *
+ * +--------+
+ * |22221111| <- Initial value of b1
+ * +--------+
+ *
+ * To build the AND mask we start again with the 63 value, right shift
+ * it by 8-fb bits, and invert it.
+ *
+ * +--------+
+ * |00111111| <- "mask" set at 2&6-1
+ * |00001111| <- "mask" after the right shift by 8-fb = 2 bits
+ * |11110000| <- "mask" after bitwise not.
+ * +--------+
+ *
+ * Now we can mask it with b+1 to clear the old bits, and bitwise-OR
+ * with "val" left-shifted by "rs" bits to set the new value.
+ */
+
+/* Note: if we access the last counter, we will also access the b+1 byte
+ * that is out of the array, but sds strings always have an implicit null
+ * term, so the byte exists, and we can skip the conditional (or the need
+ * to allocate 1 byte more explicitly). */
+
+/* Store the value of the register at position 'regnum' into variable 'target'.
+ * 'p' is an array of unsigned bytes. */
+#define HLL_DENSE_GET_REGISTER(target,p,regnum) do { \
+ uint8_t *_p = (uint8_t*) p; \
+ unsigned long _byte = regnum*HLL_BITS/8; \
+ unsigned long _fb = regnum*HLL_BITS&7; \
+ unsigned long _fb8 = 8 - _fb; \
+ unsigned long b0 = _p[_byte]; \
+ unsigned long b1 = _p[_byte+1]; \
+ target = ((b0 >> _fb) | (b1 << _fb8)) & HLL_REGISTER_MAX; \
+} while(0)
+
+/* Set the value of the register at position 'regnum' to 'val'.
+ * 'p' is an array of unsigned bytes. */
+#define HLL_DENSE_SET_REGISTER(p,regnum,val) do { \
+ uint8_t *_p = (uint8_t*) p; \
+ unsigned long _byte = (regnum)*HLL_BITS/8; \
+ unsigned long _fb = (regnum)*HLL_BITS&7; \
+ unsigned long _fb8 = 8 - _fb; \
+ unsigned long _v = (val); \
+ _p[_byte] &= ~(HLL_REGISTER_MAX << _fb); \
+ _p[_byte] |= _v << _fb; \
+ _p[_byte+1] &= ~(HLL_REGISTER_MAX >> _fb8); \
+ _p[_byte+1] |= _v >> _fb8; \
+} while(0)
+
+/* Macros to access the sparse representation.
+ * The macros parameter is expected to be an uint8_t pointer. */
+#define HLL_SPARSE_XZERO_BIT 0x40 /* 01xxxxxx */
+#define HLL_SPARSE_VAL_BIT 0x80 /* 1vvvvvxx */
+#define HLL_SPARSE_IS_ZERO(p) (((*(p)) & 0xc0) == 0) /* 00xxxxxx */
+#define HLL_SPARSE_IS_XZERO(p) (((*(p)) & 0xc0) == HLL_SPARSE_XZERO_BIT)
+#define HLL_SPARSE_IS_VAL(p) ((*(p)) & HLL_SPARSE_VAL_BIT)
+#define HLL_SPARSE_ZERO_LEN(p) (((*(p)) & 0x3f)+1)
+#define HLL_SPARSE_XZERO_LEN(p) (((((*(p)) & 0x3f) << 8) | (*((p)+1)))+1)
+#define HLL_SPARSE_VAL_VALUE(p) ((((*(p)) >> 2) & 0x1f)+1)
+#define HLL_SPARSE_VAL_LEN(p) (((*(p)) & 0x3)+1)
+#define HLL_SPARSE_VAL_MAX_VALUE 32
+#define HLL_SPARSE_VAL_MAX_LEN 4
+#define HLL_SPARSE_ZERO_MAX_LEN 64
+#define HLL_SPARSE_XZERO_MAX_LEN 16384
+#define HLL_SPARSE_VAL_SET(p,val,len) do { \
+ *(p) = (((val)-1)<<2|((len)-1))|HLL_SPARSE_VAL_BIT; \
+} while(0)
+#define HLL_SPARSE_ZERO_SET(p,len) do { \
+ *(p) = (len)-1; \
+} while(0)
+#define HLL_SPARSE_XZERO_SET(p,len) do { \
+ int _l = (len)-1; \
+ *(p) = (_l>>8) | HLL_SPARSE_XZERO_BIT; \
+ *((p)+1) = (_l&0xff); \
+} while(0)
+#define HLL_ALPHA_INF 0.721347520444481703680 /* constant for 0.5/ln(2) */
+
+/* ========================= HyperLogLog algorithm ========================= */
+
+/* Our hash function is MurmurHash2, 64 bit version.
+ * It was modified for Redis in order to provide the same result in
+ * big and little endian archs (endian neutral). */
+REDIS_NO_SANITIZE("alignment")
+uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
+ const uint64_t m = 0xc6a4a7935bd1e995;
+ const int r = 47;
+ uint64_t h = seed ^ (len * m);
+ const uint8_t *data = (const uint8_t *)key;
+ const uint8_t *end = data + (len-(len&7));
+
+ while(data != end) {
+ uint64_t k;
+
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+ #ifdef USE_ALIGNED_ACCESS
+ memcpy(&k,data,sizeof(uint64_t));
+ #else
+ k = *((uint64_t*)data);
+ #endif
+#else
+ k = (uint64_t) data[0];
+ k |= (uint64_t) data[1] << 8;
+ k |= (uint64_t) data[2] << 16;
+ k |= (uint64_t) data[3] << 24;
+ k |= (uint64_t) data[4] << 32;
+ k |= (uint64_t) data[5] << 40;
+ k |= (uint64_t) data[6] << 48;
+ k |= (uint64_t) data[7] << 56;
+#endif
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+ h ^= k;
+ h *= m;
+ data += 8;
+ }
+
+ switch(len & 7) {
+ case 7: h ^= (uint64_t)data[6] << 48; /* fall-thru */
+ case 6: h ^= (uint64_t)data[5] << 40; /* fall-thru */
+ case 5: h ^= (uint64_t)data[4] << 32; /* fall-thru */
+ case 4: h ^= (uint64_t)data[3] << 24; /* fall-thru */
+ case 3: h ^= (uint64_t)data[2] << 16; /* fall-thru */
+ case 2: h ^= (uint64_t)data[1] << 8; /* fall-thru */
+ case 1: h ^= (uint64_t)data[0];
+ h *= m; /* fall-thru */
+ };
+
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
+ return h;
+}
+
+/* Given a string element to add to the HyperLogLog, returns the length
+ * of the pattern 000..1 of the element hash. As a side effect 'regp' is
+ * set to the register index this element hashes to. */
+int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
+ uint64_t hash, bit, index;
+ int count;
+
+ /* Count the number of zeroes starting from bit HLL_REGISTERS
+ * (that is a power of two corresponding to the first bit we don't use
+ * as index). The max run can be 64-P+1 = Q+1 bits.
+ *
+ * Note that the final "1" ending the sequence of zeroes must be
+ * included in the count, so if we find "001" the count is 3, and
+ * the smallest count possible is no zeroes at all, just a 1 bit
+ * at the first position, that is a count of 1.
+ *
+ * This may sound like inefficient, but actually in the average case
+ * there are high probabilities to find a 1 after a few iterations. */
+ hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
+ index = hash & HLL_P_MASK; /* Register index. */
+ hash >>= HLL_P; /* Remove bits used to address the register. */
+ hash |= ((uint64_t)1<<HLL_Q); /* Make sure the loop terminates
+ and count will be <= Q+1. */
+ bit = 1;
+ count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
+ while((hash & bit) == 0) {
+ count++;
+ bit <<= 1;
+ }
+ *regp = (int) index;
+ return count;
+}
+
+/* ================== Dense representation implementation ================== */
+
+/* Low level function to set the dense HLL register at 'index' to the
+ * specified value if the current value is smaller than 'count'.
+ *
+ * 'registers' is expected to have room for HLL_REGISTERS plus an
+ * additional byte on the right. This requirement is met by sds strings
+ * automatically since they are implicitly null terminated.
+ *
+ * The function always succeed, however if as a result of the operation
+ * the approximated cardinality changed, 1 is returned. Otherwise 0
+ * is returned. */
+int hllDenseSet(uint8_t *registers, long index, uint8_t count) {
+ uint8_t oldcount;
+
+ HLL_DENSE_GET_REGISTER(oldcount,registers,index);
+ if (count > oldcount) {
+ HLL_DENSE_SET_REGISTER(registers,index,count);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* "Add" the element in the dense hyperloglog data structure.
+ * Actually nothing is added, but the max 0 pattern counter of the subset
+ * the element belongs to is incremented if needed.
+ *
+ * This is just a wrapper to hllDenseSet(), performing the hashing of the
+ * element in order to retrieve the index and zero-run count. */
+int hllDenseAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
+ long index;
+ uint8_t count = hllPatLen(ele,elesize,&index);
+ /* Update the register if this element produced a longer run of zeroes. */
+ return hllDenseSet(registers,index,count);
+}
+
+/* Compute the register histogram in the dense representation. */
+void hllDenseRegHisto(uint8_t *registers, int* reghisto) {
+ int j;
+
+ /* Redis default is to use 16384 registers 6 bits each. The code works
+ * with other values by modifying the defines, but for our target value
+ * we take a faster path with unrolled loops. */
+ if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {
+ uint8_t *r = registers;
+ unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9,
+ r10, r11, r12, r13, r14, r15;
+ for (j = 0; j < 1024; j++) {
+ /* Handle 16 registers per iteration. */
+ r0 = r[0] & 63;
+ r1 = (r[0] >> 6 | r[1] << 2) & 63;
+ r2 = (r[1] >> 4 | r[2] << 4) & 63;
+ r3 = (r[2] >> 2) & 63;
+ r4 = r[3] & 63;
+ r5 = (r[3] >> 6 | r[4] << 2) & 63;
+ r6 = (r[4] >> 4 | r[5] << 4) & 63;
+ r7 = (r[5] >> 2) & 63;
+ r8 = r[6] & 63;
+ r9 = (r[6] >> 6 | r[7] << 2) & 63;
+ r10 = (r[7] >> 4 | r[8] << 4) & 63;
+ r11 = (r[8] >> 2) & 63;
+ r12 = r[9] & 63;
+ r13 = (r[9] >> 6 | r[10] << 2) & 63;
+ r14 = (r[10] >> 4 | r[11] << 4) & 63;
+ r15 = (r[11] >> 2) & 63;
+
+ reghisto[r0]++;
+ reghisto[r1]++;
+ reghisto[r2]++;
+ reghisto[r3]++;
+ reghisto[r4]++;
+ reghisto[r5]++;
+ reghisto[r6]++;
+ reghisto[r7]++;
+ reghisto[r8]++;
+ reghisto[r9]++;
+ reghisto[r10]++;
+ reghisto[r11]++;
+ reghisto[r12]++;
+ reghisto[r13]++;
+ reghisto[r14]++;
+ reghisto[r15]++;
+
+ r += 12;
+ }
+ } else {
+ for(j = 0; j < HLL_REGISTERS; j++) {
+ unsigned long reg;
+ HLL_DENSE_GET_REGISTER(reg,registers,j);
+ reghisto[reg]++;
+ }
+ }
+}
+
+/* ================== Sparse representation implementation ================= */
+
+/* Convert the HLL with sparse representation given as input in its dense
+ * representation. Both representations are represented by SDS strings, and
+ * the input representation is freed as a side effect.
+ *
+ * The function returns C_OK if the sparse representation was valid,
+ * otherwise C_ERR is returned if the representation was corrupted. */
+int hllSparseToDense(robj *o) {
+ sds sparse = o->ptr, dense;
+ struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse;
+ int idx = 0, runlen, regval;
+ uint8_t *p = (uint8_t*)sparse, *end = p+sdslen(sparse);
+
+ /* If the representation is already the right one return ASAP. */
+ hdr = (struct hllhdr*) sparse;
+ if (hdr->encoding == HLL_DENSE) return C_OK;
+
+ /* Create a string of the right size filled with zero bytes.
+ * Note that the cached cardinality is set to 0 as a side effect
+ * that is exactly the cardinality of an empty HLL. */
+ dense = sdsnewlen(NULL,HLL_DENSE_SIZE);
+ hdr = (struct hllhdr*) dense;
+ *hdr = *oldhdr; /* This will copy the magic and cached cardinality. */
+ hdr->encoding = HLL_DENSE;
+
+ /* Now read the sparse representation and set non-zero registers
+ * accordingly. */
+ p += HLL_HDR_SIZE;
+ while(p < end) {
+ if (HLL_SPARSE_IS_ZERO(p)) {
+ runlen = HLL_SPARSE_ZERO_LEN(p);
+ idx += runlen;
+ p++;
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
+ runlen = HLL_SPARSE_XZERO_LEN(p);
+ idx += runlen;
+ p += 2;
+ } else {
+ runlen = HLL_SPARSE_VAL_LEN(p);
+ regval = HLL_SPARSE_VAL_VALUE(p);
+ if ((runlen + idx) > HLL_REGISTERS) break; /* Overflow. */
+ while(runlen--) {
+ HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval);
+ idx++;
+ }
+ p++;
+ }
+ }
+
+ /* If the sparse representation was valid, we expect to find idx
+ * set to HLL_REGISTERS. */
+ if (idx != HLL_REGISTERS) {
+ sdsfree(dense);
+ return C_ERR;
+ }
+
+ /* Free the old representation and set the new one. */
+ sdsfree(o->ptr);
+ o->ptr = dense;
+ return C_OK;
+}
+
+/* Low level function to set the sparse HLL register at 'index' to the
+ * specified value if the current value is smaller than 'count'.
+ *
+ * The object 'o' is the String object holding the HLL. The function requires
+ * a reference to the object in order to be able to enlarge the string if
+ * needed.
+ *
+ * On success, the function returns 1 if the cardinality changed, or 0
+ * if the register for this element was not updated.
+ * On error (if the representation is invalid) -1 is returned.
+ *
+ * As a side effect the function may promote the HLL representation from
+ * sparse to dense: this happens when a register requires to be set to a value
+ * not representable with the sparse representation, or when the resulting
+ * size would be greater than server.hll_sparse_max_bytes. */
+int hllSparseSet(robj *o, long index, uint8_t count) {
+ struct hllhdr *hdr;
+ uint8_t oldcount, *sparse, *end, *p, *prev, *next;
+ long first, span;
+ long is_zero = 0, is_xzero = 0, is_val = 0, runlen = 0;
+
+ /* If the count is too big to be representable by the sparse representation
+ * switch to dense representation. */
+ if (count > HLL_SPARSE_VAL_MAX_VALUE) goto promote;
+
+ /* When updating a sparse representation, sometimes we may need to enlarge the
+ * buffer for up to 3 bytes in the worst case (XZERO split into XZERO-VAL-XZERO),
+ * and the following code does the enlarge job.
+ * Actually, we use a greedy strategy, enlarge more than 3 bytes to avoid the need
+ * for future reallocates on incremental growth. But we do not allocate more than
+ * 'server.hll_sparse_max_bytes' bytes for the sparse representation.
+ * If the available size of hyperloglog sds string is not enough for the increment
+ * we need, we promote the hypreloglog to dense representation in 'step 3'.
+ */
+ if (sdsalloc(o->ptr) < server.hll_sparse_max_bytes && sdsavail(o->ptr) < 3) {
+ size_t newlen = sdslen(o->ptr) + 3;
+ newlen += min(newlen, 300); /* Greediness: double 'newlen' if it is smaller than 300, or add 300 to it when it exceeds 300 */
+ if (newlen > server.hll_sparse_max_bytes)
+ newlen = server.hll_sparse_max_bytes;
+ o->ptr = sdsResize(o->ptr, newlen, 1);
+ }
+
+ /* Step 1: we need to locate the opcode we need to modify to check
+ * if a value update is actually needed. */
+ sparse = p = ((uint8_t*)o->ptr) + HLL_HDR_SIZE;
+ end = p + sdslen(o->ptr) - HLL_HDR_SIZE;
+
+ first = 0;
+ prev = NULL; /* Points to previous opcode at the end of the loop. */
+ next = NULL; /* Points to the next opcode at the end of the loop. */
+ span = 0;
+ while(p < end) {
+ long oplen;
+
+ /* Set span to the number of registers covered by this opcode.
+ *
+ * This is the most performance critical loop of the sparse
+ * representation. Sorting the conditionals from the most to the
+ * least frequent opcode in many-bytes sparse HLLs is faster. */
+ oplen = 1;
+ if (HLL_SPARSE_IS_ZERO(p)) {
+ span = HLL_SPARSE_ZERO_LEN(p);
+ } else if (HLL_SPARSE_IS_VAL(p)) {
+ span = HLL_SPARSE_VAL_LEN(p);
+ } else { /* XZERO. */
+ span = HLL_SPARSE_XZERO_LEN(p);
+ oplen = 2;
+ }
+ /* Break if this opcode covers the register as 'index'. */
+ if (index <= first+span-1) break;
+ prev = p;
+ p += oplen;
+ first += span;
+ }
+ if (span == 0 || p >= end) return -1; /* Invalid format. */
+
+ next = HLL_SPARSE_IS_XZERO(p) ? p+2 : p+1;
+ if (next >= end) next = NULL;
+
+ /* Cache current opcode type to avoid using the macro again and
+ * again for something that will not change.
+ * Also cache the run-length of the opcode. */
+ if (HLL_SPARSE_IS_ZERO(p)) {
+ is_zero = 1;
+ runlen = HLL_SPARSE_ZERO_LEN(p);
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
+ is_xzero = 1;
+ runlen = HLL_SPARSE_XZERO_LEN(p);
+ } else {
+ is_val = 1;
+ runlen = HLL_SPARSE_VAL_LEN(p);
+ }
+
+ /* Step 2: After the loop:
+ *
+ * 'first' stores to the index of the first register covered
+ * by the current opcode, which is pointed by 'p'.
+ *
+ * 'next' ad 'prev' store respectively the next and previous opcode,
+ * or NULL if the opcode at 'p' is respectively the last or first.
+ *
+ * 'span' is set to the number of registers covered by the current
+ * opcode.
+ *
+ * There are different cases in order to update the data structure
+ * in place without generating it from scratch:
+ *
+ * A) If it is a VAL opcode already set to a value >= our 'count'
+ * no update is needed, regardless of the VAL run-length field.
+ * In this case PFADD returns 0 since no changes are performed.
+ *
+ * B) If it is a VAL opcode with len = 1 (representing only our
+ * register) and the value is less than 'count', we just update it
+ * since this is a trivial case. */
+ if (is_val) {
+ oldcount = HLL_SPARSE_VAL_VALUE(p);
+ /* Case A. */
+ if (oldcount >= count) return 0;
+
+ /* Case B. */
+ if (runlen == 1) {
+ HLL_SPARSE_VAL_SET(p,count,1);
+ goto updated;
+ }
+ }
+
+ /* C) Another trivial to handle case is a ZERO opcode with a len of 1.
+ * We can just replace it with a VAL opcode with our value and len of 1. */
+ if (is_zero && runlen == 1) {
+ HLL_SPARSE_VAL_SET(p,count,1);
+ goto updated;
+ }
+
+ /* D) General case.
+ *
+ * The other cases are more complex: our register requires to be updated
+ * and is either currently represented by a VAL opcode with len > 1,
+ * by a ZERO opcode with len > 1, or by an XZERO opcode.
+ *
+ * In those cases the original opcode must be split into multiple
+ * opcodes. The worst case is an XZERO split in the middle resulting into
+ * XZERO - VAL - XZERO, so the resulting sequence max length is
+ * 5 bytes.
+ *
+ * We perform the split writing the new sequence into the 'new' buffer
+ * with 'newlen' as length. Later the new sequence is inserted in place
+ * of the old one, possibly moving what is on the right a few bytes
+ * if the new sequence is longer than the older one. */
+ uint8_t seq[5], *n = seq;
+ int last = first+span-1; /* Last register covered by the sequence. */
+ int len;
+
+ if (is_zero || is_xzero) {
+ /* Handle splitting of ZERO / XZERO. */
+ if (index != first) {
+ len = index-first;
+ if (len > HLL_SPARSE_ZERO_MAX_LEN) {
+ HLL_SPARSE_XZERO_SET(n,len);
+ n += 2;
+ } else {
+ HLL_SPARSE_ZERO_SET(n,len);
+ n++;
+ }
+ }
+ HLL_SPARSE_VAL_SET(n,count,1);
+ n++;
+ if (index != last) {
+ len = last-index;
+ if (len > HLL_SPARSE_ZERO_MAX_LEN) {
+ HLL_SPARSE_XZERO_SET(n,len);
+ n += 2;
+ } else {
+ HLL_SPARSE_ZERO_SET(n,len);
+ n++;
+ }
+ }
+ } else {
+ /* Handle splitting of VAL. */
+ int curval = HLL_SPARSE_VAL_VALUE(p);
+
+ if (index != first) {
+ len = index-first;
+ HLL_SPARSE_VAL_SET(n,curval,len);
+ n++;
+ }
+ HLL_SPARSE_VAL_SET(n,count,1);
+ n++;
+ if (index != last) {
+ len = last-index;
+ HLL_SPARSE_VAL_SET(n,curval,len);
+ n++;
+ }
+ }
+
+ /* Step 3: substitute the new sequence with the old one.
+ *
+ * Note that we already allocated space on the sds string
+ * calling sdsResize(). */
+ int seqlen = n-seq;
+ int oldlen = is_xzero ? 2 : 1;
+ int deltalen = seqlen-oldlen;
+
+ if (deltalen > 0 &&
+ sdslen(o->ptr) + deltalen > server.hll_sparse_max_bytes) goto promote;
+ serverAssert(sdslen(o->ptr) + deltalen <= sdsalloc(o->ptr));
+ if (deltalen && next) memmove(next+deltalen,next,end-next);
+ sdsIncrLen(o->ptr,deltalen);
+ memcpy(p,seq,seqlen);
+ end += deltalen;
+
+updated:
+ /* Step 4: Merge adjacent values if possible.
+ *
+ * The representation was updated, however the resulting representation
+ * may not be optimal: adjacent VAL opcodes can sometimes be merged into
+ * a single one. */
+ p = prev ? prev : sparse;
+ int scanlen = 5; /* Scan up to 5 upcodes starting from prev. */
+ while (p < end && scanlen--) {
+ if (HLL_SPARSE_IS_XZERO(p)) {
+ p += 2;
+ continue;
+ } else if (HLL_SPARSE_IS_ZERO(p)) {
+ p++;
+ continue;
+ }
+ /* We need two adjacent VAL opcodes to try a merge, having
+ * the same value, and a len that fits the VAL opcode max len. */
+ if (p+1 < end && HLL_SPARSE_IS_VAL(p+1)) {
+ int v1 = HLL_SPARSE_VAL_VALUE(p);
+ int v2 = HLL_SPARSE_VAL_VALUE(p+1);
+ if (v1 == v2) {
+ int len = HLL_SPARSE_VAL_LEN(p)+HLL_SPARSE_VAL_LEN(p+1);
+ if (len <= HLL_SPARSE_VAL_MAX_LEN) {
+ HLL_SPARSE_VAL_SET(p+1,v1,len);
+ memmove(p,p+1,end-p);
+ sdsIncrLen(o->ptr,-1);
+ end--;
+ /* After a merge we reiterate without incrementing 'p'
+ * in order to try to merge the just merged value with
+ * a value on its right. */
+ continue;
+ }
+ }
+ }
+ p++;
+ }
+
+ /* Invalidate the cached cardinality. */
+ hdr = o->ptr;
+ HLL_INVALIDATE_CACHE(hdr);
+ return 1;
+
+promote: /* Promote to dense representation. */
+ if (hllSparseToDense(o) == C_ERR) return -1; /* Corrupted HLL. */
+ hdr = o->ptr;
+
+ /* We need to call hllDenseAdd() to perform the operation after the
+ * conversion. However the result must be 1, since if we need to
+ * convert from sparse to dense a register requires to be updated.
+ *
+ * Note that this in turn means that PFADD will make sure the command
+ * is propagated to slaves / AOF, so if there is a sparse -> dense
+ * conversion, it will be performed in all the slaves as well. */
+ int dense_retval = hllDenseSet(hdr->registers,index,count);
+ serverAssert(dense_retval == 1);
+ return dense_retval;
+}
+
+/* "Add" the element in the sparse hyperloglog data structure.
+ * Actually nothing is added, but the max 0 pattern counter of the subset
+ * the element belongs to is incremented if needed.
+ *
+ * This function is actually a wrapper for hllSparseSet(), it only performs
+ * the hashing of the element to obtain the index and zeros run length. */
+int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
+ long index;
+ uint8_t count = hllPatLen(ele,elesize,&index);
+ /* Update the register if this element produced a longer run of zeroes. */
+ return hllSparseSet(o,index,count);
+}
+
+/* Compute the register histogram in the sparse representation. */
+void hllSparseRegHisto(uint8_t *sparse, int sparselen, int *invalid, int* reghisto) {
+ int idx = 0, runlen, regval;
+ uint8_t *end = sparse+sparselen, *p = sparse;
+
+ while(p < end) {
+ if (HLL_SPARSE_IS_ZERO(p)) {
+ runlen = HLL_SPARSE_ZERO_LEN(p);
+ idx += runlen;
+ reghisto[0] += runlen;
+ p++;
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
+ runlen = HLL_SPARSE_XZERO_LEN(p);
+ idx += runlen;
+ reghisto[0] += runlen;
+ p += 2;
+ } else {
+ runlen = HLL_SPARSE_VAL_LEN(p);
+ regval = HLL_SPARSE_VAL_VALUE(p);
+ idx += runlen;
+ reghisto[regval] += runlen;
+ p++;
+ }
+ }
+ if (idx != HLL_REGISTERS && invalid) *invalid = 1;
+}
+
+/* ========================= HyperLogLog Count ==============================
+ * This is the core of the algorithm where the approximated count is computed.
+ * The function uses the lower level hllDenseRegHisto() and hllSparseRegHisto()
+ * functions as helpers to compute histogram of register values part of the
+ * computation, which is representation-specific, while all the rest is common. */
+
+/* Implements the register histogram calculation for uint8_t data type
+ * which is only used internally as speedup for PFCOUNT with multiple keys. */
+void hllRawRegHisto(uint8_t *registers, int* reghisto) {
+ uint64_t *word = (uint64_t*) registers;
+ uint8_t *bytes;
+ int j;
+
+ for (j = 0; j < HLL_REGISTERS/8; j++) {
+ if (*word == 0) {
+ reghisto[0] += 8;
+ } else {
+ bytes = (uint8_t*) word;
+ reghisto[bytes[0]]++;
+ reghisto[bytes[1]]++;
+ reghisto[bytes[2]]++;
+ reghisto[bytes[3]]++;
+ reghisto[bytes[4]]++;
+ reghisto[bytes[5]]++;
+ reghisto[bytes[6]]++;
+ reghisto[bytes[7]]++;
+ }
+ word++;
+ }
+}
+
+/* Helper function sigma as defined in
+ * "New cardinality estimation algorithms for HyperLogLog sketches"
+ * Otmar Ertl, arXiv:1702.01284 */
+double hllSigma(double x) {
+ if (x == 1.) return INFINITY;
+ double zPrime;
+ double y = 1;
+ double z = x;
+ do {
+ x *= x;
+ zPrime = z;
+ z += x * y;
+ y += y;
+ } while(zPrime != z);
+ return z;
+}
+
+/* Helper function tau as defined in
+ * "New cardinality estimation algorithms for HyperLogLog sketches"
+ * Otmar Ertl, arXiv:1702.01284 */
+double hllTau(double x) {
+ if (x == 0. || x == 1.) return 0.;
+ double zPrime;
+ double y = 1.0;
+ double z = 1 - x;
+ do {
+ x = sqrt(x);
+ zPrime = z;
+ y *= 0.5;
+ z -= pow(1 - x, 2)*y;
+ } while(zPrime != z);
+ return z / 3;
+}
+
+/* Return the approximated cardinality of the set based on the harmonic
+ * mean of the registers values. 'hdr' points to the start of the SDS
+ * representing the String object holding the HLL representation.
+ *
+ * If the sparse representation of the HLL object is not valid, the integer
+ * pointed by 'invalid' is set to non-zero, otherwise it is left untouched.
+ *
+ * hllCount() supports a special internal-only encoding of HLL_RAW, that
+ * is, hdr->registers will point to an uint8_t array of HLL_REGISTERS element.
+ * This is useful in order to speedup PFCOUNT when called against multiple
+ * keys (no need to work with 6-bit integers encoding). */
+uint64_t hllCount(struct hllhdr *hdr, int *invalid) {
+ double m = HLL_REGISTERS;
+ double E;
+ int j;
+ /* Note that reghisto size could be just HLL_Q+2, because HLL_Q+1 is
+ * the maximum frequency of the "000...1" sequence the hash function is
+ * able to return. However it is slow to check for sanity of the
+ * input: instead we history array at a safe size: overflows will
+ * just write data to wrong, but correctly allocated, places. */
+ int reghisto[64] = {0};
+
+ /* Compute register histogram */
+ if (hdr->encoding == HLL_DENSE) {
+ hllDenseRegHisto(hdr->registers,reghisto);
+ } else if (hdr->encoding == HLL_SPARSE) {
+ hllSparseRegHisto(hdr->registers,
+ sdslen((sds)hdr)-HLL_HDR_SIZE,invalid,reghisto);
+ } else if (hdr->encoding == HLL_RAW) {
+ hllRawRegHisto(hdr->registers,reghisto);
+ } else {
+ serverPanic("Unknown HyperLogLog encoding in hllCount()");
+ }
+
+ /* Estimate cardinality from register histogram. See:
+ * "New cardinality estimation algorithms for HyperLogLog sketches"
+ * Otmar Ertl, arXiv:1702.01284 */
+ double z = m * hllTau((m-reghisto[HLL_Q+1])/(double)m);
+ for (j = HLL_Q; j >= 1; --j) {
+ z += reghisto[j];
+ z *= 0.5;
+ }
+ z += m * hllSigma(reghisto[0]/(double)m);
+ E = llroundl(HLL_ALPHA_INF*m*m/z);
+
+ return (uint64_t) E;
+}
+
+/* Call hllDenseAdd() or hllSparseAdd() according to the HLL encoding. */
+int hllAdd(robj *o, unsigned char *ele, size_t elesize) {
+ struct hllhdr *hdr = o->ptr;
+ switch(hdr->encoding) {
+ case HLL_DENSE: return hllDenseAdd(hdr->registers,ele,elesize);
+ case HLL_SPARSE: return hllSparseAdd(o,ele,elesize);
+ default: return -1; /* Invalid representation. */
+ }
+}
+
+/* Merge by computing MAX(registers[i],hll[i]) the HyperLogLog 'hll'
+ * with an array of uint8_t HLL_REGISTERS registers pointed by 'max'.
+ *
+ * The hll object must be already validated via isHLLObjectOrReply()
+ * or in some other way.
+ *
+ * If the HyperLogLog is sparse and is found to be invalid, C_ERR
+ * is returned, otherwise the function always succeeds. */
+int hllMerge(uint8_t *max, robj *hll) {
+ struct hllhdr *hdr = hll->ptr;
+ int i;
+
+ if (hdr->encoding == HLL_DENSE) {
+ uint8_t val;
+
+ for (i = 0; i < HLL_REGISTERS; i++) {
+ HLL_DENSE_GET_REGISTER(val,hdr->registers,i);
+ if (val > max[i]) max[i] = val;
+ }
+ } else {
+ uint8_t *p = hll->ptr, *end = p + sdslen(hll->ptr);
+ long runlen, regval;
+
+ p += HLL_HDR_SIZE;
+ i = 0;
+ while(p < end) {
+ if (HLL_SPARSE_IS_ZERO(p)) {
+ runlen = HLL_SPARSE_ZERO_LEN(p);
+ i += runlen;
+ p++;
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
+ runlen = HLL_SPARSE_XZERO_LEN(p);
+ i += runlen;
+ p += 2;
+ } else {
+ runlen = HLL_SPARSE_VAL_LEN(p);
+ regval = HLL_SPARSE_VAL_VALUE(p);
+ if ((runlen + i) > HLL_REGISTERS) break; /* Overflow. */
+ while(runlen--) {
+ if (regval > max[i]) max[i] = regval;
+ i++;
+ }
+ p++;
+ }
+ }
+ if (i != HLL_REGISTERS) return C_ERR;
+ }
+ return C_OK;
+}
+
+/* ========================== HyperLogLog commands ========================== */
+
+/* Create an HLL object. We always create the HLL using sparse encoding.
+ * This will be upgraded to the dense representation as needed. */
+robj *createHLLObject(void) {
+ robj *o;
+ struct hllhdr *hdr;
+ sds s;
+ uint8_t *p;
+ int sparselen = HLL_HDR_SIZE +
+ (((HLL_REGISTERS+(HLL_SPARSE_XZERO_MAX_LEN-1)) /
+ HLL_SPARSE_XZERO_MAX_LEN)*2);
+ int aux;
+
+ /* Populate the sparse representation with as many XZERO opcodes as
+ * needed to represent all the registers. */
+ aux = HLL_REGISTERS;
+ s = sdsnewlen(NULL,sparselen);
+ p = (uint8_t*)s + HLL_HDR_SIZE;
+ while(aux) {
+ int xzero = HLL_SPARSE_XZERO_MAX_LEN;
+ if (xzero > aux) xzero = aux;
+ HLL_SPARSE_XZERO_SET(p,xzero);
+ p += 2;
+ aux -= xzero;
+ }
+ serverAssert((p-(uint8_t*)s) == sparselen);
+
+ /* Create the actual object. */
+ o = createObject(OBJ_STRING,s);
+ hdr = o->ptr;
+ memcpy(hdr->magic,"HYLL",4);
+ hdr->encoding = HLL_SPARSE;
+ return o;
+}
+
+/* Check if the object is a String with a valid HLL representation.
+ * Return C_OK if this is true, otherwise reply to the client
+ * with an error and return C_ERR. */
+int isHLLObjectOrReply(client *c, robj *o) {
+ struct hllhdr *hdr;
+
+ /* Key exists, check type */
+ if (checkType(c,o,OBJ_STRING))
+ return C_ERR; /* Error already sent. */
+
+ if (!sdsEncodedObject(o)) goto invalid;
+ if (stringObjectLen(o) < sizeof(*hdr)) goto invalid;
+ hdr = o->ptr;
+
+ /* Magic should be "HYLL". */
+ if (hdr->magic[0] != 'H' || hdr->magic[1] != 'Y' ||
+ hdr->magic[2] != 'L' || hdr->magic[3] != 'L') goto invalid;
+
+ if (hdr->encoding > HLL_MAX_ENCODING) goto invalid;
+
+ /* Dense representation string length should match exactly. */
+ if (hdr->encoding == HLL_DENSE &&
+ stringObjectLen(o) != HLL_DENSE_SIZE) goto invalid;
+
+ /* All tests passed. */
+ return C_OK;
+
+invalid:
+ addReplyError(c,"-WRONGTYPE Key is not a valid "
+ "HyperLogLog string value.");
+ return C_ERR;
+}
+
+/* PFADD var ele ele ele ... ele => :0 or :1 */
+void pfaddCommand(client *c) {
+ robj *o = lookupKeyWrite(c->db,c->argv[1]);
+ struct hllhdr *hdr;
+ int updated = 0, j;
+
+ if (o == NULL) {
+ /* Create the key with a string value of the exact length to
+ * hold our HLL data structure. sdsnewlen() when NULL is passed
+ * is guaranteed to return bytes initialized to zero. */
+ o = createHLLObject();
+ dbAdd(c->db,c->argv[1],o);
+ updated++;
+ } else {
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
+ o = dbUnshareStringValue(c->db,c->argv[1],o);
+ }
+ /* Perform the low level ADD operation for every element. */
+ for (j = 2; j < c->argc; j++) {
+ int retval = hllAdd(o, (unsigned char*)c->argv[j]->ptr,
+ sdslen(c->argv[j]->ptr));
+ switch(retval) {
+ case 1:
+ updated++;
+ break;
+ case -1:
+ addReplyError(c,invalid_hll_err);
+ return;
+ }
+ }
+ hdr = o->ptr;
+ if (updated) {
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
+ server.dirty += updated;
+ HLL_INVALIDATE_CACHE(hdr);
+ }
+ addReply(c, updated ? shared.cone : shared.czero);
+}
+
+/* PFCOUNT var -> approximated cardinality of set. */
+void pfcountCommand(client *c) {
+ robj *o;
+ struct hllhdr *hdr;
+ uint64_t card;
+
+ /* Case 1: multi-key keys, cardinality of the union.
+ *
+ * When multiple keys are specified, PFCOUNT actually computes
+ * the cardinality of the merge of the N HLLs specified. */
+ if (c->argc > 2) {
+ uint8_t max[HLL_HDR_SIZE+HLL_REGISTERS], *registers;
+ int j;
+
+ /* Compute an HLL with M[i] = MAX(M[i]_j). */
+ memset(max,0,sizeof(max));
+ hdr = (struct hllhdr*) max;
+ hdr->encoding = HLL_RAW; /* Special internal-only encoding. */
+ registers = max + HLL_HDR_SIZE;
+ for (j = 1; j < c->argc; j++) {
+ /* Check type and size. */
+ robj *o = lookupKeyRead(c->db,c->argv[j]);
+ if (o == NULL) continue; /* Assume empty HLL for non existing var.*/
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
+
+ /* Merge with this HLL with our 'max' HLL by setting max[i]
+ * to MAX(max[i],hll[i]). */
+ if (hllMerge(registers,o) == C_ERR) {
+ addReplyError(c,invalid_hll_err);
+ return;
+ }
+ }
+
+ /* Compute cardinality of the resulting set. */
+ addReplyLongLong(c,hllCount(hdr,NULL));
+ return;
+ }
+
+ /* Case 2: cardinality of the single HLL.
+ *
+ * The user specified a single key. Either return the cached value
+ * or compute one and update the cache.
+ *
+ * Since a HLL is a regular Redis string type value, updating the cache does
+ * modify the value. We do a lookupKeyRead anyway since this is flagged as a
+ * read-only command. The difference is that with lookupKeyWrite, a
+ * logically expired key on a replica is deleted, while with lookupKeyRead
+ * it isn't, but the lookup returns NULL either way if the key is logically
+ * expired, which is what matters here. */
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o == NULL) {
+ /* No key? Cardinality is zero since no element was added, otherwise
+ * we would have a key as HLLADD creates it as a side effect. */
+ addReply(c,shared.czero);
+ } else {
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
+ o = dbUnshareStringValue(c->db,c->argv[1],o);
+
+ /* Check if the cached cardinality is valid. */
+ hdr = o->ptr;
+ if (HLL_VALID_CACHE(hdr)) {
+ /* Just return the cached value. */
+ card = (uint64_t)hdr->card[0];
+ card |= (uint64_t)hdr->card[1] << 8;
+ card |= (uint64_t)hdr->card[2] << 16;
+ card |= (uint64_t)hdr->card[3] << 24;
+ card |= (uint64_t)hdr->card[4] << 32;
+ card |= (uint64_t)hdr->card[5] << 40;
+ card |= (uint64_t)hdr->card[6] << 48;
+ card |= (uint64_t)hdr->card[7] << 56;
+ } else {
+ int invalid = 0;
+ /* Recompute it and update the cached value. */
+ card = hllCount(hdr,&invalid);
+ if (invalid) {
+ addReplyError(c,invalid_hll_err);
+ return;
+ }
+ hdr->card[0] = card & 0xff;
+ hdr->card[1] = (card >> 8) & 0xff;
+ hdr->card[2] = (card >> 16) & 0xff;
+ hdr->card[3] = (card >> 24) & 0xff;
+ hdr->card[4] = (card >> 32) & 0xff;
+ hdr->card[5] = (card >> 40) & 0xff;
+ hdr->card[6] = (card >> 48) & 0xff;
+ hdr->card[7] = (card >> 56) & 0xff;
+ /* This is considered a read-only command even if the cached value
+ * may be modified and given that the HLL is a Redis string
+ * we need to propagate the change. */
+ signalModifiedKey(c,c->db,c->argv[1]);
+ server.dirty++;
+ }
+ addReplyLongLong(c,card);
+ }
+}
+
+/* PFMERGE dest src1 src2 src3 ... srcN => OK */
+void pfmergeCommand(client *c) {
+ uint8_t max[HLL_REGISTERS];
+ struct hllhdr *hdr;
+ int j;
+ int use_dense = 0; /* Use dense representation as target? */
+
+ /* Compute an HLL with M[i] = MAX(M[i]_j).
+ * We store the maximum into the max array of registers. We'll write
+ * it to the target variable later. */
+ memset(max,0,sizeof(max));
+ for (j = 1; j < c->argc; j++) {
+ /* Check type and size. */
+ robj *o = lookupKeyRead(c->db,c->argv[j]);
+ if (o == NULL) continue; /* Assume empty HLL for non existing var. */
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
+
+ /* If at least one involved HLL is dense, use the dense representation
+ * as target ASAP to save time and avoid the conversion step. */
+ hdr = o->ptr;
+ if (hdr->encoding == HLL_DENSE) use_dense = 1;
+
+ /* Merge with this HLL with our 'max' HLL by setting max[i]
+ * to MAX(max[i],hll[i]). */
+ if (hllMerge(max,o) == C_ERR) {
+ addReplyError(c,invalid_hll_err);
+ return;
+ }
+ }
+
+ /* Create / unshare the destination key's value if needed. */
+ robj *o = lookupKeyWrite(c->db,c->argv[1]);
+ if (o == NULL) {
+ /* Create the key with a string value of the exact length to
+ * hold our HLL data structure. sdsnewlen() when NULL is passed
+ * is guaranteed to return bytes initialized to zero. */
+ o = createHLLObject();
+ dbAdd(c->db,c->argv[1],o);
+ } else {
+ /* If key exists we are sure it's of the right type/size
+ * since we checked when merging the different HLLs, so we
+ * don't check again. */
+ o = dbUnshareStringValue(c->db,c->argv[1],o);
+ }
+
+ /* Convert the destination object to dense representation if at least
+ * one of the inputs was dense. */
+ if (use_dense && hllSparseToDense(o) == C_ERR) {
+ addReplyError(c,invalid_hll_err);
+ return;
+ }
+
+ /* Write the resulting HLL to the destination HLL registers and
+ * invalidate the cached value. */
+ for (j = 0; j < HLL_REGISTERS; j++) {
+ if (max[j] == 0) continue;
+ hdr = o->ptr;
+ switch(hdr->encoding) {
+ case HLL_DENSE: hllDenseSet(hdr->registers,j,max[j]); break;
+ case HLL_SPARSE: hllSparseSet(o,j,max[j]); break;
+ }
+ }
+ hdr = o->ptr; /* o->ptr may be different now, as a side effect of
+ last hllSparseSet() call. */
+ HLL_INVALIDATE_CACHE(hdr);
+
+ signalModifiedKey(c,c->db,c->argv[1]);
+ /* We generate a PFADD event for PFMERGE for semantical simplicity
+ * since in theory this is a mass-add of elements. */
+ notifyKeyspaceEvent(NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
+ server.dirty++;
+ addReply(c,shared.ok);
+}
+
+/* ========================== Testing / Debugging ========================== */
+
+/* PFSELFTEST
+ * This command performs a self-test of the HLL registers implementation.
+ * Something that is not easy to test from within the outside. */
+#define HLL_TEST_CYCLES 1000
+void pfselftestCommand(client *c) {
+ unsigned int j, i;
+ sds bitcounters = sdsnewlen(NULL,HLL_DENSE_SIZE);
+ struct hllhdr *hdr = (struct hllhdr*) bitcounters, *hdr2;
+ robj *o = NULL;
+ uint8_t bytecounters[HLL_REGISTERS];
+
+ /* Test 1: access registers.
+ * The test is conceived to test that the different counters of our data
+ * structure are accessible and that setting their values both result in
+ * the correct value to be retained and not affect adjacent values. */
+ for (j = 0; j < HLL_TEST_CYCLES; j++) {
+ /* Set the HLL counters and an array of unsigned byes of the
+ * same size to the same set of random values. */
+ for (i = 0; i < HLL_REGISTERS; i++) {
+ unsigned int r = rand() & HLL_REGISTER_MAX;
+
+ bytecounters[i] = r;
+ HLL_DENSE_SET_REGISTER(hdr->registers,i,r);
+ }
+ /* Check that we are able to retrieve the same values. */
+ for (i = 0; i < HLL_REGISTERS; i++) {
+ unsigned int val;
+
+ HLL_DENSE_GET_REGISTER(val,hdr->registers,i);
+ if (val != bytecounters[i]) {
+ addReplyErrorFormat(c,
+ "TESTFAILED Register %d should be %d but is %d",
+ i, (int) bytecounters[i], (int) val);
+ goto cleanup;
+ }
+ }
+ }
+
+ /* Test 2: approximation error.
+ * The test adds unique elements and check that the estimated value
+ * is always reasonable bounds.
+ *
+ * We check that the error is smaller than a few times than the expected
+ * standard error, to make it very unlikely for the test to fail because
+ * of a "bad" run.
+ *
+ * The test is performed with both dense and sparse HLLs at the same
+ * time also verifying that the computed cardinality is the same. */
+ memset(hdr->registers,0,HLL_DENSE_SIZE-HLL_HDR_SIZE);
+ o = createHLLObject();
+ double relerr = 1.04/sqrt(HLL_REGISTERS);
+ int64_t checkpoint = 1;
+ uint64_t seed = (uint64_t)rand() | (uint64_t)rand() << 32;
+ uint64_t ele;
+ for (j = 1; j <= 10000000; j++) {
+ ele = j ^ seed;
+ hllDenseAdd(hdr->registers,(unsigned char*)&ele,sizeof(ele));
+ hllAdd(o,(unsigned char*)&ele,sizeof(ele));
+
+ /* Make sure that for small cardinalities we use sparse
+ * encoding. */
+ if (j == checkpoint && j < server.hll_sparse_max_bytes/2) {
+ hdr2 = o->ptr;
+ if (hdr2->encoding != HLL_SPARSE) {
+ addReplyError(c, "TESTFAILED sparse encoding not used");
+ goto cleanup;
+ }
+ }
+
+ /* Check that dense and sparse representations agree. */
+ if (j == checkpoint && hllCount(hdr,NULL) != hllCount(o->ptr,NULL)) {
+ addReplyError(c, "TESTFAILED dense/sparse disagree");
+ goto cleanup;
+ }
+
+ /* Check error. */
+ if (j == checkpoint) {
+ int64_t abserr = checkpoint - (int64_t)hllCount(hdr,NULL);
+ uint64_t maxerr = ceil(relerr*6*checkpoint);
+
+ /* Adjust the max error we expect for cardinality 10
+ * since from time to time it is statistically likely to get
+ * much higher error due to collision, resulting into a false
+ * positive. */
+ if (j == 10) maxerr = 1;
+
+ if (abserr < 0) abserr = -abserr;
+ if (abserr > (int64_t)maxerr) {
+ addReplyErrorFormat(c,
+ "TESTFAILED Too big error. card:%llu abserr:%llu",
+ (unsigned long long) checkpoint,
+ (unsigned long long) abserr);
+ goto cleanup;
+ }
+ checkpoint *= 10;
+ }
+ }
+
+ /* Success! */
+ addReply(c,shared.ok);
+
+cleanup:
+ sdsfree(bitcounters);
+ if (o) decrRefCount(o);
+}
+
+/* Different debugging related operations about the HLL implementation.
+ *
+ * PFDEBUG GETREG <key>
+ * PFDEBUG DECODE <key>
+ * PFDEBUG ENCODING <key>
+ * PFDEBUG TODENSE <key>
+ */
+void pfdebugCommand(client *c) {
+ char *cmd = c->argv[1]->ptr;
+ struct hllhdr *hdr;
+ robj *o;
+ int j;
+
+ o = lookupKeyWrite(c->db,c->argv[2]);
+ if (o == NULL) {
+ addReplyError(c,"The specified key does not exist");
+ return;
+ }
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
+ o = dbUnshareStringValue(c->db,c->argv[2],o);
+ hdr = o->ptr;
+
+ /* PFDEBUG GETREG <key> */
+ if (!strcasecmp(cmd,"getreg")) {
+ if (c->argc != 3) goto arityerr;
+
+ if (hdr->encoding == HLL_SPARSE) {
+ if (hllSparseToDense(o) == C_ERR) {
+ addReplyError(c,invalid_hll_err);
+ return;
+ }
+ server.dirty++; /* Force propagation on encoding change. */
+ }
+
+ hdr = o->ptr;
+ addReplyArrayLen(c,HLL_REGISTERS);
+ for (j = 0; j < HLL_REGISTERS; j++) {
+ uint8_t val;
+
+ HLL_DENSE_GET_REGISTER(val,hdr->registers,j);
+ addReplyLongLong(c,val);
+ }
+ }
+ /* PFDEBUG DECODE <key> */
+ else if (!strcasecmp(cmd,"decode")) {
+ if (c->argc != 3) goto arityerr;
+
+ uint8_t *p = o->ptr, *end = p+sdslen(o->ptr);
+ sds decoded = sdsempty();
+
+ if (hdr->encoding != HLL_SPARSE) {
+ sdsfree(decoded);
+ addReplyError(c,"HLL encoding is not sparse");
+ return;
+ }
+
+ p += HLL_HDR_SIZE;
+ while(p < end) {
+ int runlen, regval;
+
+ if (HLL_SPARSE_IS_ZERO(p)) {
+ runlen = HLL_SPARSE_ZERO_LEN(p);
+ p++;
+ decoded = sdscatprintf(decoded,"z:%d ",runlen);
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
+ runlen = HLL_SPARSE_XZERO_LEN(p);
+ p += 2;
+ decoded = sdscatprintf(decoded,"Z:%d ",runlen);
+ } else {
+ runlen = HLL_SPARSE_VAL_LEN(p);
+ regval = HLL_SPARSE_VAL_VALUE(p);
+ p++;
+ decoded = sdscatprintf(decoded,"v:%d,%d ",regval,runlen);
+ }
+ }
+ decoded = sdstrim(decoded," ");
+ addReplyBulkCBuffer(c,decoded,sdslen(decoded));
+ sdsfree(decoded);
+ }
+ /* PFDEBUG ENCODING <key> */
+ else if (!strcasecmp(cmd,"encoding")) {
+ char *encodingstr[2] = {"dense","sparse"};
+ if (c->argc != 3) goto arityerr;
+
+ addReplyStatus(c,encodingstr[hdr->encoding]);
+ }
+ /* PFDEBUG TODENSE <key> */
+ else if (!strcasecmp(cmd,"todense")) {
+ int conv = 0;
+ if (c->argc != 3) goto arityerr;
+
+ if (hdr->encoding == HLL_SPARSE) {
+ if (hllSparseToDense(o) == C_ERR) {
+ addReplyError(c,invalid_hll_err);
+ return;
+ }
+ conv = 1;
+ server.dirty++; /* Force propagation on encoding change. */
+ }
+ addReply(c,conv ? shared.cone : shared.czero);
+ } else {
+ addReplyErrorFormat(c,"Unknown PFDEBUG subcommand '%s'", cmd);
+ }
+ return;
+
+arityerr:
+ addReplyErrorFormat(c,
+ "Wrong number of arguments for the '%s' subcommand",cmd);
+}
+
diff --git a/src/intset.c b/src/intset.c
new file mode 100644
index 0000000..621a742
--- /dev/null
+++ b/src/intset.c
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "intset.h"
+#include "zmalloc.h"
+#include "endianconv.h"
+#include "redisassert.h"
+
+/* Note that these encodings are ordered, so:
+ * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
+#define INTSET_ENC_INT16 (sizeof(int16_t))
+#define INTSET_ENC_INT32 (sizeof(int32_t))
+#define INTSET_ENC_INT64 (sizeof(int64_t))
+
+/* Return the required encoding for the provided value. */
+static uint8_t _intsetValueEncoding(int64_t v) {
+ if (v < INT32_MIN || v > INT32_MAX)
+ return INTSET_ENC_INT64;
+ else if (v < INT16_MIN || v > INT16_MAX)
+ return INTSET_ENC_INT32;
+ else
+ return INTSET_ENC_INT16;
+}
+
+/* Return the value at pos, given an encoding. */
+static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
+ int64_t v64;
+ int32_t v32;
+ int16_t v16;
+
+ if (enc == INTSET_ENC_INT64) {
+ memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));
+ memrev64ifbe(&v64);
+ return v64;
+ } else if (enc == INTSET_ENC_INT32) {
+ memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));
+ memrev32ifbe(&v32);
+ return v32;
+ } else {
+ memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));
+ memrev16ifbe(&v16);
+ return v16;
+ }
+}
+
+/* Return the value at pos, using the configured encoding. */
+static int64_t _intsetGet(intset *is, int pos) {
+ return _intsetGetEncoded(is,pos,intrev32ifbe(is->encoding));
+}
+
+/* Set the value at pos, using the configured encoding. */
+static void _intsetSet(intset *is, int pos, int64_t value) {
+ uint32_t encoding = intrev32ifbe(is->encoding);
+
+ if (encoding == INTSET_ENC_INT64) {
+ ((int64_t*)is->contents)[pos] = value;
+ memrev64ifbe(((int64_t*)is->contents)+pos);
+ } else if (encoding == INTSET_ENC_INT32) {
+ ((int32_t*)is->contents)[pos] = value;
+ memrev32ifbe(((int32_t*)is->contents)+pos);
+ } else {
+ ((int16_t*)is->contents)[pos] = value;
+ memrev16ifbe(((int16_t*)is->contents)+pos);
+ }
+}
+
+/* Create an empty intset. */
+intset *intsetNew(void) {
+ intset *is = zmalloc(sizeof(intset));
+ is->encoding = intrev32ifbe(INTSET_ENC_INT16);
+ is->length = 0;
+ return is;
+}
+
+/* Resize the intset */
+static intset *intsetResize(intset *is, uint32_t len) {
+ uint64_t size = (uint64_t)len*intrev32ifbe(is->encoding);
+ assert(size <= SIZE_MAX - sizeof(intset));
+ is = zrealloc(is,sizeof(intset)+size);
+ return is;
+}
+
+/* Search for the position of "value". Return 1 when the value was found and
+ * sets "pos" to the position of the value within the intset. Return 0 when
+ * the value is not present in the intset and sets "pos" to the position
+ * where "value" can be inserted. */
+static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
+ int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
+ int64_t cur = -1;
+
+ /* The value can never be found when the set is empty */
+ if (intrev32ifbe(is->length) == 0) {
+ if (pos) *pos = 0;
+ return 0;
+ } else {
+ /* Check for the case where we know we cannot find the value,
+ * but do know the insert position. */
+ if (value > _intsetGet(is,max)) {
+ if (pos) *pos = intrev32ifbe(is->length);
+ return 0;
+ } else if (value < _intsetGet(is,0)) {
+ if (pos) *pos = 0;
+ return 0;
+ }
+ }
+
+ while(max >= min) {
+ mid = ((unsigned int)min + (unsigned int)max) >> 1;
+ cur = _intsetGet(is,mid);
+ if (value > cur) {
+ min = mid+1;
+ } else if (value < cur) {
+ max = mid-1;
+ } else {
+ break;
+ }
+ }
+
+ if (value == cur) {
+ if (pos) *pos = mid;
+ return 1;
+ } else {
+ if (pos) *pos = min;
+ return 0;
+ }
+}
+
+/* Upgrades the intset to a larger encoding and inserts the given integer. */
+static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
+ uint8_t curenc = intrev32ifbe(is->encoding);
+ uint8_t newenc = _intsetValueEncoding(value);
+ int length = intrev32ifbe(is->length);
+ int prepend = value < 0 ? 1 : 0;
+
+ /* First set new encoding and resize */
+ is->encoding = intrev32ifbe(newenc);
+ is = intsetResize(is,intrev32ifbe(is->length)+1);
+
+ /* Upgrade back-to-front so we don't overwrite values.
+ * Note that the "prepend" variable is used to make sure we have an empty
+ * space at either the beginning or the end of the intset. */
+ while(length--)
+ _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
+
+ /* Set the value at the beginning or the end. */
+ if (prepend)
+ _intsetSet(is,0,value);
+ else
+ _intsetSet(is,intrev32ifbe(is->length),value);
+ is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
+ return is;
+}
+
+static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
+ void *src, *dst;
+ uint32_t bytes = intrev32ifbe(is->length)-from;
+ uint32_t encoding = intrev32ifbe(is->encoding);
+
+ if (encoding == INTSET_ENC_INT64) {
+ src = (int64_t*)is->contents+from;
+ dst = (int64_t*)is->contents+to;
+ bytes *= sizeof(int64_t);
+ } else if (encoding == INTSET_ENC_INT32) {
+ src = (int32_t*)is->contents+from;
+ dst = (int32_t*)is->contents+to;
+ bytes *= sizeof(int32_t);
+ } else {
+ src = (int16_t*)is->contents+from;
+ dst = (int16_t*)is->contents+to;
+ bytes *= sizeof(int16_t);
+ }
+ memmove(dst,src,bytes);
+}
+
+/* Insert an integer in the intset */
+intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
+ uint8_t valenc = _intsetValueEncoding(value);
+ uint32_t pos;
+ if (success) *success = 1;
+
+ /* Upgrade encoding if necessary. If we need to upgrade, we know that
+ * this value should be either appended (if > 0) or prepended (if < 0),
+ * because it lies outside the range of existing values. */
+ if (valenc > intrev32ifbe(is->encoding)) {
+ /* This always succeeds, so we don't need to curry *success. */
+ return intsetUpgradeAndAdd(is,value);
+ } else {
+ /* Abort if the value is already present in the set.
+ * This call will populate "pos" with the right position to insert
+ * the value when it cannot be found. */
+ if (intsetSearch(is,value,&pos)) {
+ if (success) *success = 0;
+ return is;
+ }
+
+ is = intsetResize(is,intrev32ifbe(is->length)+1);
+ if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);
+ }
+
+ _intsetSet(is,pos,value);
+ is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
+ return is;
+}
+
+/* Delete integer from intset */
+intset *intsetRemove(intset *is, int64_t value, int *success) {
+ uint8_t valenc = _intsetValueEncoding(value);
+ uint32_t pos;
+ if (success) *success = 0;
+
+ if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
+ uint32_t len = intrev32ifbe(is->length);
+
+ /* We know we can delete */
+ if (success) *success = 1;
+
+ /* Overwrite value with tail and update length */
+ if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);
+ is = intsetResize(is,len-1);
+ is->length = intrev32ifbe(len-1);
+ }
+ return is;
+}
+
+/* Determine whether a value belongs to this set */
+uint8_t intsetFind(intset *is, int64_t value) {
+ uint8_t valenc = _intsetValueEncoding(value);
+ return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL);
+}
+
+/* Return random member */
+int64_t intsetRandom(intset *is) {
+ uint32_t len = intrev32ifbe(is->length);
+ assert(len); /* avoid division by zero on corrupt intset payload. */
+ return _intsetGet(is,rand()%len);
+}
+
+/* Return the largest member. */
+int64_t intsetMax(intset *is) {
+ uint32_t len = intrev32ifbe(is->length);
+ return _intsetGet(is, len - 1);
+}
+
+/* Return the smallest member. */
+int64_t intsetMin(intset *is) {
+ return _intsetGet(is, 0);
+}
+
+/* Get the value at the given position. When this position is
+ * out of range the function returns 0, when in range it returns 1. */
+uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
+ if (pos < intrev32ifbe(is->length)) {
+ *value = _intsetGet(is,pos);
+ return 1;
+ }
+ return 0;
+}
+
+/* Return intset length */
+uint32_t intsetLen(const intset *is) {
+ return intrev32ifbe(is->length);
+}
+
+/* Return intset blob size in bytes. */
+size_t intsetBlobLen(intset *is) {
+ return sizeof(intset)+(size_t)intrev32ifbe(is->length)*intrev32ifbe(is->encoding);
+}
+
+/* Validate the integrity of the data structure.
+ * when `deep` is 0, only the integrity of the header is validated.
+ * when `deep` is 1, we make sure there are no duplicate or out of order records. */
+int intsetValidateIntegrity(const unsigned char *p, size_t size, int deep) {
+ intset *is = (intset *)p;
+ /* check that we can actually read the header. */
+ if (size < sizeof(*is))
+ return 0;
+
+ uint32_t encoding = intrev32ifbe(is->encoding);
+
+ size_t record_size;
+ if (encoding == INTSET_ENC_INT64) {
+ record_size = INTSET_ENC_INT64;
+ } else if (encoding == INTSET_ENC_INT32) {
+ record_size = INTSET_ENC_INT32;
+ } else if (encoding == INTSET_ENC_INT16){
+ record_size = INTSET_ENC_INT16;
+ } else {
+ return 0;
+ }
+
+ /* check that the size matches (all records are inside the buffer). */
+ uint32_t count = intrev32ifbe(is->length);
+ if (sizeof(*is) + count*record_size != size)
+ return 0;
+
+ /* check that the set is not empty. */
+ if (count==0)
+ return 0;
+
+ if (!deep)
+ return 1;
+
+ /* check that there are no dup or out of order records. */
+ int64_t prev = _intsetGet(is,0);
+ for (uint32_t i=1; i<count; i++) {
+ int64_t cur = _intsetGet(is,i);
+ if (cur <= prev)
+ return 0;
+ prev = cur;
+ }
+
+ return 1;
+}
+
+#ifdef REDIS_TEST
+#include <sys/time.h>
+#include <time.h>
+
+#if 0
+static void intsetRepr(intset *is) {
+ for (uint32_t i = 0; i < intrev32ifbe(is->length); i++) {
+ printf("%lld\n", (uint64_t)_intsetGet(is,i));
+ }
+ printf("\n");
+}
+
+static void error(char *err) {
+ printf("%s\n", err);
+ exit(1);
+}
+#endif
+
+static void ok(void) {
+ printf("OK\n");
+}
+
+static long long usec(void) {
+ struct timeval tv;
+ gettimeofday(&tv,NULL);
+ return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
+}
+
+static intset *createSet(int bits, int size) {
+ uint64_t mask = (1<<bits)-1;
+ uint64_t value;
+ intset *is = intsetNew();
+
+ for (int i = 0; i < size; i++) {
+ if (bits > 32) {
+ value = (rand()*rand()) & mask;
+ } else {
+ value = rand() & mask;
+ }
+ is = intsetAdd(is,value,NULL);
+ }
+ return is;
+}
+
+static void checkConsistency(intset *is) {
+ for (uint32_t i = 0; i < (intrev32ifbe(is->length)-1); i++) {
+ uint32_t encoding = intrev32ifbe(is->encoding);
+
+ if (encoding == INTSET_ENC_INT16) {
+ int16_t *i16 = (int16_t*)is->contents;
+ assert(i16[i] < i16[i+1]);
+ } else if (encoding == INTSET_ENC_INT32) {
+ int32_t *i32 = (int32_t*)is->contents;
+ assert(i32[i] < i32[i+1]);
+ } else {
+ int64_t *i64 = (int64_t*)is->contents;
+ assert(i64[i] < i64[i+1]);
+ }
+ }
+}
+
+#define UNUSED(x) (void)(x)
+int intsetTest(int argc, char **argv, int flags) {
+ uint8_t success;
+ int i;
+ intset *is;
+ srand(time(NULL));
+
+ UNUSED(argc);
+ UNUSED(argv);
+ UNUSED(flags);
+
+ printf("Value encodings: "); {
+ assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
+ assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);
+ assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);
+ assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);
+ assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);
+ assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
+ assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(-9223372036854775808ull) ==
+ INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(+9223372036854775807ull) ==
+ INTSET_ENC_INT64);
+ ok();
+ }
+
+ printf("Basic adding: "); {
+ is = intsetNew();
+ is = intsetAdd(is,5,&success); assert(success);
+ is = intsetAdd(is,6,&success); assert(success);
+ is = intsetAdd(is,4,&success); assert(success);
+ is = intsetAdd(is,4,&success); assert(!success);
+ assert(6 == intsetMax(is));
+ assert(4 == intsetMin(is));
+ ok();
+ zfree(is);
+ }
+
+ printf("Large number of random adds: "); {
+ uint32_t inserts = 0;
+ is = intsetNew();
+ for (i = 0; i < 1024; i++) {
+ is = intsetAdd(is,rand()%0x800,&success);
+ if (success) inserts++;
+ }
+ assert(intrev32ifbe(is->length) == inserts);
+ checkConsistency(is);
+ ok();
+ zfree(is);
+ }
+
+ printf("Upgrade from int16 to int32: "); {
+ is = intsetNew();
+ is = intsetAdd(is,32,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
+ is = intsetAdd(is,65535,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
+ assert(intsetFind(is,32));
+ assert(intsetFind(is,65535));
+ checkConsistency(is);
+ zfree(is);
+
+ is = intsetNew();
+ is = intsetAdd(is,32,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
+ is = intsetAdd(is,-65535,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
+ assert(intsetFind(is,32));
+ assert(intsetFind(is,-65535));
+ checkConsistency(is);
+ ok();
+ zfree(is);
+ }
+
+ printf("Upgrade from int16 to int64: "); {
+ is = intsetNew();
+ is = intsetAdd(is,32,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
+ is = intsetAdd(is,4294967295,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
+ assert(intsetFind(is,32));
+ assert(intsetFind(is,4294967295));
+ checkConsistency(is);
+ zfree(is);
+
+ is = intsetNew();
+ is = intsetAdd(is,32,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
+ is = intsetAdd(is,-4294967295,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
+ assert(intsetFind(is,32));
+ assert(intsetFind(is,-4294967295));
+ checkConsistency(is);
+ ok();
+ zfree(is);
+ }
+
+ printf("Upgrade from int32 to int64: "); {
+ is = intsetNew();
+ is = intsetAdd(is,65535,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
+ is = intsetAdd(is,4294967295,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
+ assert(intsetFind(is,65535));
+ assert(intsetFind(is,4294967295));
+ checkConsistency(is);
+ zfree(is);
+
+ is = intsetNew();
+ is = intsetAdd(is,65535,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
+ is = intsetAdd(is,-4294967295,NULL);
+ assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
+ assert(intsetFind(is,65535));
+ assert(intsetFind(is,-4294967295));
+ checkConsistency(is);
+ ok();
+ zfree(is);
+ }
+
+ printf("Stress lookups: "); {
+ long num = 100000, size = 10000;
+ int i, bits = 20;
+ long long start;
+ is = createSet(bits,size);
+ checkConsistency(is);
+
+ start = usec();
+ for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
+ printf("%ld lookups, %ld element set, %lldusec\n",
+ num,size,usec()-start);
+ zfree(is);
+ }
+
+ printf("Stress add+delete: "); {
+ int i, v1, v2;
+ is = intsetNew();
+ for (i = 0; i < 0xffff; i++) {
+ v1 = rand() % 0xfff;
+ is = intsetAdd(is,v1,NULL);
+ assert(intsetFind(is,v1));
+
+ v2 = rand() % 0xfff;
+ is = intsetRemove(is,v2,NULL);
+ assert(!intsetFind(is,v2));
+ }
+ checkConsistency(is);
+ ok();
+ zfree(is);
+ }
+
+ return 0;
+}
+#endif
diff --git a/src/intset.h b/src/intset.h
new file mode 100644
index 0000000..41cc7b8
--- /dev/null
+++ b/src/intset.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INTSET_H
+#define __INTSET_H
+#include <stdint.h>
+
+typedef struct intset {
+ uint32_t encoding;
+ uint32_t length;
+ int8_t contents[];
+} intset;
+
+intset *intsetNew(void);
+intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
+intset *intsetRemove(intset *is, int64_t value, int *success);
+uint8_t intsetFind(intset *is, int64_t value);
+int64_t intsetRandom(intset *is);
+int64_t intsetMax(intset *is);
+int64_t intsetMin(intset *is);
+uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
+uint32_t intsetLen(const intset *is);
+size_t intsetBlobLen(intset *is);
+int intsetValidateIntegrity(const unsigned char *is, size_t size, int deep);
+
+#ifdef REDIS_TEST
+int intsetTest(int argc, char *argv[], int flags);
+#endif
+
+#endif // __INTSET_H
diff --git a/src/latency.c b/src/latency.c
new file mode 100644
index 0000000..d46890e
--- /dev/null
+++ b/src/latency.c
@@ -0,0 +1,739 @@
+/* The latency monitor allows to easily observe the sources of latency
+ * in a Redis instance using the LATENCY command. Different latency
+ * sources are monitored, like disk I/O, execution of commands, fork
+ * system call, and so forth.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "hdr_histogram.h"
+
+/* Dictionary type for latency events. */
+int dictStringKeyCompare(dict *d, const void *key1, const void *key2) {
+ UNUSED(d);
+ return strcmp(key1,key2) == 0;
+}
+
+uint64_t dictStringHash(const void *key) {
+ return dictGenHashFunction(key, strlen(key));
+}
+
+void dictVanillaFree(dict *d, void *val);
+
+dictType latencyTimeSeriesDictType = {
+ dictStringHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictStringKeyCompare, /* key compare */
+ dictVanillaFree, /* key destructor */
+ dictVanillaFree, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* ------------------------- Utility functions ------------------------------ */
+
+/* Report the amount of AnonHugePages in smap, in bytes. If the return
+ * value of the function is non-zero, the process is being targeted by
+ * THP support, and is likely to have memory usage / latency issues. */
+int THPGetAnonHugePagesSize(void) {
+ return zmalloc_get_smap_bytes_by_field("AnonHugePages:",-1);
+}
+
+/* ---------------------------- Latency API --------------------------------- */
+
+/* Latency monitor initialization. We just need to create the dictionary
+ * of time series, each time series is created on demand in order to avoid
+ * having a fixed list to maintain. */
+void latencyMonitorInit(void) {
+ server.latency_events = dictCreate(&latencyTimeSeriesDictType);
+}
+
+/* Add the specified sample to the specified time series "event".
+ * This function is usually called via latencyAddSampleIfNeeded(), that
+ * is a macro that only adds the sample if the latency is higher than
+ * server.latency_monitor_threshold. */
+void latencyAddSample(const char *event, mstime_t latency) {
+ struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event);
+ time_t now = time(NULL);
+ int prev;
+
+ /* Create the time series if it does not exist. */
+ if (ts == NULL) {
+ ts = zmalloc(sizeof(*ts));
+ ts->idx = 0;
+ ts->max = 0;
+ memset(ts->samples,0,sizeof(ts->samples));
+ dictAdd(server.latency_events,zstrdup(event),ts);
+ }
+
+ if (latency > ts->max) ts->max = latency;
+
+ /* If the previous sample is in the same second, we update our old sample
+ * if this latency is > of the old one, or just return. */
+ prev = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN;
+ if (ts->samples[prev].time == now) {
+ if (latency > ts->samples[prev].latency)
+ ts->samples[prev].latency = latency;
+ return;
+ }
+
+ ts->samples[ts->idx].time = now;
+ ts->samples[ts->idx].latency = latency;
+
+ ts->idx++;
+ if (ts->idx == LATENCY_TS_LEN) ts->idx = 0;
+}
+
+/* Reset data for the specified event, or all the events data if 'event' is
+ * NULL.
+ *
+ * Note: this is O(N) even when event_to_reset is not NULL because makes
+ * the code simpler and we have a small fixed max number of events. */
+int latencyResetEvent(char *event_to_reset) {
+ dictIterator *di;
+ dictEntry *de;
+ int resets = 0;
+
+ di = dictGetSafeIterator(server.latency_events);
+ while((de = dictNext(di)) != NULL) {
+ char *event = dictGetKey(de);
+
+ if (event_to_reset == NULL || strcasecmp(event,event_to_reset) == 0) {
+ dictDelete(server.latency_events, event);
+ resets++;
+ }
+ }
+ dictReleaseIterator(di);
+ return resets;
+}
+
+/* ------------------------ Latency reporting (doctor) ---------------------- */
+
+/* Analyze the samples available for a given event and return a structure
+ * populate with different metrics, average, MAD, min, max, and so forth.
+ * Check latency.h definition of struct latencyStats for more info.
+ * If the specified event has no elements the structure is populate with
+ * zero values. */
+void analyzeLatencyForEvent(char *event, struct latencyStats *ls) {
+ struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event);
+ int j;
+ uint64_t sum;
+
+ ls->all_time_high = ts ? ts->max : 0;
+ ls->avg = 0;
+ ls->min = 0;
+ ls->max = 0;
+ ls->mad = 0;
+ ls->samples = 0;
+ ls->period = 0;
+ if (!ts) return;
+
+ /* First pass, populate everything but the MAD. */
+ sum = 0;
+ for (j = 0; j < LATENCY_TS_LEN; j++) {
+ if (ts->samples[j].time == 0) continue;
+ ls->samples++;
+ if (ls->samples == 1) {
+ ls->min = ls->max = ts->samples[j].latency;
+ } else {
+ if (ls->min > ts->samples[j].latency)
+ ls->min = ts->samples[j].latency;
+ if (ls->max < ts->samples[j].latency)
+ ls->max = ts->samples[j].latency;
+ }
+ sum += ts->samples[j].latency;
+
+ /* Track the oldest event time in ls->period. */
+ if (ls->period == 0 || ts->samples[j].time < ls->period)
+ ls->period = ts->samples[j].time;
+ }
+
+ /* So far avg is actually the sum of the latencies, and period is
+ * the oldest event time. We need to make the first an average and
+ * the second a range of seconds. */
+ if (ls->samples) {
+ ls->avg = sum / ls->samples;
+ ls->period = time(NULL) - ls->period;
+ if (ls->period == 0) ls->period = 1;
+ }
+
+ /* Second pass, compute MAD. */
+ sum = 0;
+ for (j = 0; j < LATENCY_TS_LEN; j++) {
+ int64_t delta;
+
+ if (ts->samples[j].time == 0) continue;
+ delta = (int64_t)ls->avg - ts->samples[j].latency;
+ if (delta < 0) delta = -delta;
+ sum += delta;
+ }
+ if (ls->samples) ls->mad = sum / ls->samples;
+}
+
+/* Create a human readable report of latency events for this Redis instance. */
+sds createLatencyReport(void) {
+ sds report = sdsempty();
+ int advise_better_vm = 0; /* Better virtual machines. */
+ int advise_slowlog_enabled = 0; /* Enable slowlog. */
+ int advise_slowlog_tuning = 0; /* Reconfigure slowlog. */
+ int advise_slowlog_inspect = 0; /* Check your slowlog. */
+ int advise_disk_contention = 0; /* Try to lower disk contention. */
+ int advise_scheduler = 0; /* Intrinsic latency. */
+ int advise_data_writeback = 0; /* data=writeback. */
+ int advise_no_appendfsync = 0; /* don't fsync during rewrites. */
+ int advise_local_disk = 0; /* Avoid remote disks. */
+ int advise_ssd = 0; /* Use an SSD drive. */
+ int advise_write_load_info = 0; /* Print info about AOF and write load. */
+ int advise_hz = 0; /* Use higher HZ. */
+ int advise_large_objects = 0; /* Deletion of large objects. */
+ int advise_mass_eviction = 0; /* Avoid mass eviction of keys. */
+ int advise_relax_fsync_policy = 0; /* appendfsync always is slow. */
+ int advise_disable_thp = 0; /* AnonHugePages detected. */
+ int advices = 0;
+
+ /* Return ASAP if the latency engine is disabled and it looks like it
+ * was never enabled so far. */
+ if (dictSize(server.latency_events) == 0 &&
+ server.latency_monitor_threshold == 0)
+ {
+ report = sdscat(report,"I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it. If we weren't in a deep space mission I'd suggest to take a look at https://redis.io/topics/latency-monitor.\n");
+ return report;
+ }
+
+ /* Show all the events stats and add for each event some event-related
+ * comment depending on the values. */
+ dictIterator *di;
+ dictEntry *de;
+ int eventnum = 0;
+
+ di = dictGetSafeIterator(server.latency_events);
+ while((de = dictNext(di)) != NULL) {
+ char *event = dictGetKey(de);
+ struct latencyTimeSeries *ts = dictGetVal(de);
+ struct latencyStats ls;
+
+ if (ts == NULL) continue;
+ eventnum++;
+ if (eventnum == 1) {
+ report = sdscat(report,"Dave, I have observed latency spikes in this Redis instance. You don't mind talking about it, do you Dave?\n\n");
+ }
+ analyzeLatencyForEvent(event,&ls);
+
+ report = sdscatprintf(report,
+ "%d. %s: %d latency spikes (average %lums, mean deviation %lums, period %.2f sec). Worst all time event %lums.",
+ eventnum, event,
+ ls.samples,
+ (unsigned long) ls.avg,
+ (unsigned long) ls.mad,
+ (double) ls.period/ls.samples,
+ (unsigned long) ts->max);
+
+ /* Fork */
+ if (!strcasecmp(event,"fork")) {
+ char *fork_quality;
+ if (server.stat_fork_rate < 10) {
+ fork_quality = "terrible";
+ advise_better_vm = 1;
+ advices++;
+ } else if (server.stat_fork_rate < 25) {
+ fork_quality = "poor";
+ advise_better_vm = 1;
+ advices++;
+ } else if (server.stat_fork_rate < 100) {
+ fork_quality = "good";
+ } else {
+ fork_quality = "excellent";
+ }
+ report = sdscatprintf(report,
+ " Fork rate is %.2f GB/sec (%s).", server.stat_fork_rate,
+ fork_quality);
+ }
+
+ /* Potentially commands. */
+ if (!strcasecmp(event,"command")) {
+ if (server.slowlog_log_slower_than < 0) {
+ advise_slowlog_enabled = 1;
+ advices++;
+ } else if (server.slowlog_log_slower_than/1000 >
+ server.latency_monitor_threshold)
+ {
+ advise_slowlog_tuning = 1;
+ advices++;
+ }
+ advise_slowlog_inspect = 1;
+ advise_large_objects = 1;
+ advices += 2;
+ }
+
+ /* fast-command. */
+ if (!strcasecmp(event,"fast-command")) {
+ advise_scheduler = 1;
+ advices++;
+ }
+
+ /* AOF and I/O. */
+ if (!strcasecmp(event,"aof-write-pending-fsync")) {
+ advise_local_disk = 1;
+ advise_disk_contention = 1;
+ advise_ssd = 1;
+ advise_data_writeback = 1;
+ advices += 4;
+ }
+
+ if (!strcasecmp(event,"aof-write-active-child")) {
+ advise_no_appendfsync = 1;
+ advise_data_writeback = 1;
+ advise_ssd = 1;
+ advices += 3;
+ }
+
+ if (!strcasecmp(event,"aof-write-alone")) {
+ advise_local_disk = 1;
+ advise_data_writeback = 1;
+ advise_ssd = 1;
+ advices += 3;
+ }
+
+ if (!strcasecmp(event,"aof-fsync-always")) {
+ advise_relax_fsync_policy = 1;
+ advices++;
+ }
+
+ if (!strcasecmp(event,"aof-fstat") ||
+ !strcasecmp(event,"rdb-unlink-temp-file")) {
+ advise_disk_contention = 1;
+ advise_local_disk = 1;
+ advices += 2;
+ }
+
+ if (!strcasecmp(event,"aof-rewrite-diff-write") ||
+ !strcasecmp(event,"aof-rename")) {
+ advise_write_load_info = 1;
+ advise_data_writeback = 1;
+ advise_ssd = 1;
+ advise_local_disk = 1;
+ advices += 4;
+ }
+
+ /* Expire cycle. */
+ if (!strcasecmp(event,"expire-cycle")) {
+ advise_hz = 1;
+ advise_large_objects = 1;
+ advices += 2;
+ }
+
+ /* Eviction cycle. */
+ if (!strcasecmp(event,"eviction-del")) {
+ advise_large_objects = 1;
+ advices++;
+ }
+
+ if (!strcasecmp(event,"eviction-cycle")) {
+ advise_mass_eviction = 1;
+ advices++;
+ }
+
+ report = sdscatlen(report,"\n",1);
+ }
+ dictReleaseIterator(di);
+
+ /* Add non event based advices. */
+ if (THPGetAnonHugePagesSize() > 0) {
+ advise_disable_thp = 1;
+ advices++;
+ }
+
+ if (eventnum == 0 && advices == 0) {
+ report = sdscat(report,"Dave, no latency spike was observed during the lifetime of this Redis instance, not in the slightest bit. I honestly think you ought to sit down calmly, take a stress pill, and think things over.\n");
+ } else if (eventnum > 0 && advices == 0) {
+ report = sdscat(report,"\nWhile there are latency events logged, I'm not able to suggest any easy fix. Please use the Redis community to get some help, providing this report in your help request.\n");
+ } else {
+ /* Add all the suggestions accumulated so far. */
+
+ /* Better VM. */
+ report = sdscat(report,"\nI have a few advices for you:\n\n");
+ if (advise_better_vm) {
+ report = sdscat(report,"- If you are using a virtual machine, consider upgrading it with a faster one using a hypervisior that provides less latency during fork() calls. Xen is known to have poor fork() performance. Even in the context of the same VM provider, certain kinds of instances can execute fork faster than others.\n");
+ }
+
+ /* Slow log. */
+ if (advise_slowlog_enabled) {
+ report = sdscatprintf(report,"- There are latency issues with potentially slow commands you are using. Try to enable the Slow Log Redis feature using the command 'CONFIG SET slowlog-log-slower-than %llu'. If the Slow log is disabled Redis is not able to log slow commands execution for you.\n", (unsigned long long)server.latency_monitor_threshold*1000);
+ }
+
+ if (advise_slowlog_tuning) {
+ report = sdscatprintf(report,"- Your current Slow Log configuration only logs events that are slower than your configured latency monitor threshold. Please use 'CONFIG SET slowlog-log-slower-than %llu'.\n", (unsigned long long)server.latency_monitor_threshold*1000);
+ }
+
+ if (advise_slowlog_inspect) {
+ report = sdscat(report,"- Check your Slow Log to understand what are the commands you are running which are too slow to execute. Please check https://redis.io/commands/slowlog for more information.\n");
+ }
+
+ /* Intrinsic latency. */
+ if (advise_scheduler) {
+ report = sdscat(report,"- The system is slow to execute Redis code paths not containing system calls. This usually means the system does not provide Redis CPU time to run for long periods. You should try to:\n"
+ " 1) Lower the system load.\n"
+ " 2) Use a computer / VM just for Redis if you are running other software in the same system.\n"
+ " 3) Check if you have a \"noisy neighbour\" problem.\n"
+ " 4) Check with 'redis-cli --intrinsic-latency 100' what is the intrinsic latency in your system.\n"
+ " 5) Check if the problem is allocator-related by recompiling Redis with MALLOC=libc, if you are using Jemalloc. However this may create fragmentation problems.\n");
+ }
+
+ /* AOF / Disk latency. */
+ if (advise_local_disk) {
+ report = sdscat(report,"- It is strongly advised to use local disks for persistence, especially if you are using AOF. Remote disks provided by platform-as-a-service providers are known to be slow.\n");
+ }
+
+ if (advise_ssd) {
+ report = sdscat(report,"- SSD disks are able to reduce fsync latency, and total time needed for snapshotting and AOF log rewriting (resulting in smaller memory usage). With extremely high write load SSD disks can be a good option. However Redis should perform reasonably with high load using normal disks. Use this advice as a last resort.\n");
+ }
+
+ if (advise_data_writeback) {
+ report = sdscat(report,"- Mounting ext3/4 filesystems with data=writeback can provide a performance boost compared to data=ordered, however this mode of operation provides less guarantees, and sometimes it can happen that after a hard crash the AOF file will have a half-written command at the end and will require to be repaired before Redis restarts.\n");
+ }
+
+ if (advise_disk_contention) {
+ report = sdscat(report,"- Try to lower the disk contention. This is often caused by other disk intensive processes running in the same computer (including other Redis instances).\n");
+ }
+
+ if (advise_no_appendfsync) {
+ report = sdscat(report,"- Assuming from the point of view of data safety this is viable in your environment, you could try to enable the 'no-appendfsync-on-rewrite' option, so that fsync will not be performed while there is a child rewriting the AOF file or producing an RDB file (the moment where there is high disk contention).\n");
+ }
+
+ if (advise_relax_fsync_policy && server.aof_fsync == AOF_FSYNC_ALWAYS) {
+ report = sdscat(report,"- Your fsync policy is set to 'always'. It is very hard to get good performances with such a setup, if possible try to relax the fsync policy to 'onesec'.\n");
+ }
+
+ if (advise_write_load_info) {
+ report = sdscat(report,"- Latency during the AOF atomic rename operation or when the final difference is flushed to the AOF file at the end of the rewrite, sometimes is caused by very high write load, causing the AOF buffer to get very large. If possible try to send less commands to accomplish the same work, or use Lua scripts to group multiple operations into a single EVALSHA call.\n");
+ }
+
+ if (advise_hz && server.hz < 100) {
+ report = sdscat(report,"- In order to make the Redis keys expiring process more incremental, try to set the 'hz' configuration parameter to 100 using 'CONFIG SET hz 100'.\n");
+ }
+
+ if (advise_large_objects) {
+ report = sdscat(report,"- Deleting, expiring or evicting (because of maxmemory policy) large objects is a blocking operation. If you have very large objects that are often deleted, expired, or evicted, try to fragment those objects into multiple smaller objects.\n");
+ }
+
+ if (advise_mass_eviction) {
+ report = sdscat(report,"- Sudden changes to the 'maxmemory' setting via 'CONFIG SET', or allocation of large objects via sets or sorted sets intersections, STORE option of SORT, Redis Cluster large keys migrations (RESTORE command), may create sudden memory pressure forcing the server to block trying to evict keys. \n");
+ }
+
+ if (advise_disable_thp) {
+ report = sdscat(report,"- I detected a non zero amount of anonymous huge pages used by your process. This creates very serious latency events in different conditions, especially when Redis is persisting on disk. To disable THP support use the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into /etc/rc.local so that the command will be executed again after a reboot. Note that even if you have already disabled THP, you still need to restart the Redis process to get rid of the huge pages already created.\n");
+ }
+ }
+
+ return report;
+}
+
+/* ---------------------- Latency command implementation -------------------- */
+
+/* latencyCommand() helper to produce a map of time buckets,
+ * each representing a latency range,
+ * between 1 nanosecond and roughly 1 second.
+ * Each bucket covers twice the previous bucket's range.
+ * Empty buckets are not printed.
+ * Everything above 1 sec is considered +Inf.
+ * At max there will be log2(1000000000)=30 buckets */
+void fillCommandCDF(client *c, struct hdr_histogram* histogram) {
+ addReplyMapLen(c,2);
+ addReplyBulkCString(c,"calls");
+ addReplyLongLong(c,(long long) histogram->total_count);
+ addReplyBulkCString(c,"histogram_usec");
+ void *replylen = addReplyDeferredLen(c);
+ int samples = 0;
+ struct hdr_iter iter;
+ hdr_iter_log_init(&iter,histogram,1024,2);
+ int64_t previous_count = 0;
+ while (hdr_iter_next(&iter)) {
+ const int64_t micros = iter.highest_equivalent_value / 1000;
+ const int64_t cumulative_count = iter.cumulative_count;
+ if(cumulative_count > previous_count){
+ addReplyLongLong(c,(long long) micros);
+ addReplyLongLong(c,(long long) cumulative_count);
+ samples++;
+ }
+ previous_count = cumulative_count;
+ }
+ setDeferredMapLen(c,replylen,samples);
+}
+
+/* latencyCommand() helper to produce for all commands,
+ * a per command cumulative distribution of latencies. */
+void latencyAllCommandsFillCDF(client *c, dict *commands, int *command_with_data) {
+ dictIterator *di = dictGetSafeIterator(commands);
+ dictEntry *de;
+ struct redisCommand *cmd;
+
+ while((de = dictNext(di)) != NULL) {
+ cmd = (struct redisCommand *) dictGetVal(de);
+ if (cmd->latency_histogram) {
+ addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
+ fillCommandCDF(c, cmd->latency_histogram);
+ (*command_with_data)++;
+ }
+
+ if (cmd->subcommands) {
+ latencyAllCommandsFillCDF(c, cmd->subcommands_dict, command_with_data);
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* latencyCommand() helper to produce for a specific command set,
+ * a per command cumulative distribution of latencies. */
+void latencySpecificCommandsFillCDF(client *c) {
+ void *replylen = addReplyDeferredLen(c);
+ int command_with_data = 0;
+ for (int j = 2; j < c->argc; j++){
+ struct redisCommand *cmd = lookupCommandBySds(c->argv[j]->ptr);
+ /* If the command does not exist we skip the reply */
+ if (cmd == NULL) {
+ continue;
+ }
+
+ if (cmd->latency_histogram) {
+ addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
+ fillCommandCDF(c, cmd->latency_histogram);
+ command_with_data++;
+ }
+
+ if (cmd->subcommands_dict) {
+ dictEntry *de;
+ dictIterator *di = dictGetSafeIterator(cmd->subcommands_dict);
+
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *sub = dictGetVal(de);
+ if (sub->latency_histogram) {
+ addReplyBulkCBuffer(c, sub->fullname, sdslen(sub->fullname));
+ fillCommandCDF(c, sub->latency_histogram);
+ command_with_data++;
+ }
+ }
+ dictReleaseIterator(di);
+ }
+ }
+ setDeferredMapLen(c,replylen,command_with_data);
+}
+
+/* latencyCommand() helper to produce a time-delay reply for all the samples
+ * in memory for the specified time series. */
+void latencyCommandReplyWithSamples(client *c, struct latencyTimeSeries *ts) {
+ void *replylen = addReplyDeferredLen(c);
+ int samples = 0, j;
+
+ for (j = 0; j < LATENCY_TS_LEN; j++) {
+ int i = (ts->idx + j) % LATENCY_TS_LEN;
+
+ if (ts->samples[i].time == 0) continue;
+ addReplyArrayLen(c,2);
+ addReplyLongLong(c,ts->samples[i].time);
+ addReplyLongLong(c,ts->samples[i].latency);
+ samples++;
+ }
+ setDeferredArrayLen(c,replylen,samples);
+}
+
+/* latencyCommand() helper to produce the reply for the LATEST subcommand,
+ * listing the last latency sample for every event type registered so far. */
+void latencyCommandReplyWithLatestEvents(client *c) {
+ dictIterator *di;
+ dictEntry *de;
+
+ addReplyArrayLen(c,dictSize(server.latency_events));
+ di = dictGetIterator(server.latency_events);
+ while((de = dictNext(di)) != NULL) {
+ char *event = dictGetKey(de);
+ struct latencyTimeSeries *ts = dictGetVal(de);
+ int last = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN;
+
+ addReplyArrayLen(c,4);
+ addReplyBulkCString(c,event);
+ addReplyLongLong(c,ts->samples[last].time);
+ addReplyLongLong(c,ts->samples[last].latency);
+ addReplyLongLong(c,ts->max);
+ }
+ dictReleaseIterator(di);
+}
+
+#define LATENCY_GRAPH_COLS 80
+sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
+ int j;
+ struct sequence *seq = createSparklineSequence();
+ sds graph = sdsempty();
+ uint32_t min = 0, max = 0;
+
+ for (j = 0; j < LATENCY_TS_LEN; j++) {
+ int i = (ts->idx + j) % LATENCY_TS_LEN;
+ int elapsed;
+ char buf[64];
+
+ if (ts->samples[i].time == 0) continue;
+ /* Update min and max. */
+ if (seq->length == 0) {
+ min = max = ts->samples[i].latency;
+ } else {
+ if (ts->samples[i].latency > max) max = ts->samples[i].latency;
+ if (ts->samples[i].latency < min) min = ts->samples[i].latency;
+ }
+ /* Use as label the number of seconds / minutes / hours / days
+ * ago the event happened. */
+ elapsed = time(NULL) - ts->samples[i].time;
+ if (elapsed < 60)
+ snprintf(buf,sizeof(buf),"%ds",elapsed);
+ else if (elapsed < 3600)
+ snprintf(buf,sizeof(buf),"%dm",elapsed/60);
+ else if (elapsed < 3600*24)
+ snprintf(buf,sizeof(buf),"%dh",elapsed/3600);
+ else
+ snprintf(buf,sizeof(buf),"%dd",elapsed/(3600*24));
+ sparklineSequenceAddSample(seq,ts->samples[i].latency,buf);
+ }
+
+ graph = sdscatprintf(graph,
+ "%s - high %lu ms, low %lu ms (all time high %lu ms)\n", event,
+ (unsigned long) max, (unsigned long) min, (unsigned long) ts->max);
+ for (j = 0; j < LATENCY_GRAPH_COLS; j++)
+ graph = sdscatlen(graph,"-",1);
+ graph = sdscatlen(graph,"\n",1);
+ graph = sparklineRender(graph,seq,LATENCY_GRAPH_COLS,4,SPARKLINE_FILL);
+ freeSparklineSequence(seq);
+ return graph;
+}
+
+/* LATENCY command implementations.
+ *
+ * LATENCY HISTORY: return time-latency samples for the specified event.
+ * LATENCY LATEST: return the latest latency for all the events classes.
+ * LATENCY DOCTOR: returns a human readable analysis of instance latency.
+ * LATENCY GRAPH: provide an ASCII graph of the latency of the specified event.
+ * LATENCY RESET: reset data of a specified event or all the data if no event provided.
+ * LATENCY HISTOGRAM: return a cumulative distribution of latencies in the format of an histogram for the specified command names.
+ */
+void latencyCommand(client *c) {
+ struct latencyTimeSeries *ts;
+
+ if (!strcasecmp(c->argv[1]->ptr,"history") && c->argc == 3) {
+ /* LATENCY HISTORY <event> */
+ ts = dictFetchValue(server.latency_events,c->argv[2]->ptr);
+ if (ts == NULL) {
+ addReplyArrayLen(c,0);
+ } else {
+ latencyCommandReplyWithSamples(c,ts);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"graph") && c->argc == 3) {
+ /* LATENCY GRAPH <event> */
+ sds graph;
+ dictEntry *de;
+ char *event;
+
+ de = dictFind(server.latency_events,c->argv[2]->ptr);
+ if (de == NULL) goto nodataerr;
+ ts = dictGetVal(de);
+ event = dictGetKey(de);
+
+ graph = latencyCommandGenSparkeline(event,ts);
+ addReplyVerbatim(c,graph,sdslen(graph),"txt");
+ sdsfree(graph);
+ } else if (!strcasecmp(c->argv[1]->ptr,"latest") && c->argc == 2) {
+ /* LATENCY LATEST */
+ latencyCommandReplyWithLatestEvents(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
+ /* LATENCY DOCTOR */
+ sds report = createLatencyReport();
+
+ addReplyVerbatim(c,report,sdslen(report),"txt");
+ sdsfree(report);
+ } else if (!strcasecmp(c->argv[1]->ptr,"reset") && c->argc >= 2) {
+ /* LATENCY RESET */
+ if (c->argc == 2) {
+ addReplyLongLong(c,latencyResetEvent(NULL));
+ } else {
+ int j, resets = 0;
+
+ for (j = 2; j < c->argc; j++)
+ resets += latencyResetEvent(c->argv[j]->ptr);
+ addReplyLongLong(c,resets);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"histogram") && c->argc >= 2) {
+ /* LATENCY HISTOGRAM*/
+ if (c->argc == 2) {
+ int command_with_data = 0;
+ void *replylen = addReplyDeferredLen(c);
+ latencyAllCommandsFillCDF(c, server.commands, &command_with_data);
+ setDeferredMapLen(c, replylen, command_with_data);
+ } else {
+ latencySpecificCommandsFillCDF(c);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
+ const char *help[] = {
+"DOCTOR",
+" Return a human readable latency analysis report.",
+"GRAPH <event>",
+" Return an ASCII latency graph for the <event> class.",
+"HISTORY <event>",
+" Return time-latency samples for the <event> class.",
+"LATEST",
+" Return the latest latency samples for all events.",
+"RESET [<event> ...]",
+" Reset latency data of one or more <event> classes.",
+" (default: reset all data for all event classes)",
+"HISTOGRAM [COMMAND ...]",
+" Return a cumulative distribution of latencies in the format of a histogram for the specified command names.",
+" If no commands are specified then all histograms are replied.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+ return;
+
+nodataerr:
+ /* Common error when the user asks for an event we have no latency
+ * information about. */
+ addReplyErrorFormat(c,
+ "No samples available for event '%s'", (char*) c->argv[2]->ptr);
+}
+
+void durationAddSample(int type, monotime duration) {
+ if (type >= EL_DURATION_TYPE_NUM) {
+ return;
+ }
+ durationStats* ds = &server.duration_stats[type];
+ ds->cnt++;
+ ds->sum += duration;
+ if (duration > ds->max) {
+ ds->max = duration;
+ }
+}
diff --git a/src/latency.h b/src/latency.h
new file mode 100644
index 0000000..13503d5
--- /dev/null
+++ b/src/latency.h
@@ -0,0 +1,108 @@
+/* latency.h -- latency monitor API header file
+ * See latency.c for more information.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __LATENCY_H
+#define __LATENCY_H
+
+#define LATENCY_TS_LEN 160 /* History length for every monitored event. */
+
+/* Representation of a latency sample: the sampling time and the latency
+ * observed in milliseconds. */
+struct latencySample {
+ int32_t time; /* We don't use time_t to force 4 bytes usage everywhere. */
+ uint32_t latency; /* Latency in milliseconds. */
+};
+
+/* The latency time series for a given event. */
+struct latencyTimeSeries {
+ int idx; /* Index of the next sample to store. */
+ uint32_t max; /* Max latency observed for this event. */
+ struct latencySample samples[LATENCY_TS_LEN]; /* Latest history. */
+};
+
+/* Latency statistics structure. */
+struct latencyStats {
+ uint32_t all_time_high; /* Absolute max observed since latest reset. */
+ uint32_t avg; /* Average of current samples. */
+ uint32_t min; /* Min of current samples. */
+ uint32_t max; /* Max of current samples. */
+ uint32_t mad; /* Mean absolute deviation. */
+ uint32_t samples; /* Number of non-zero samples. */
+ time_t period; /* Number of seconds since first event and now. */
+};
+
+void latencyMonitorInit(void);
+void latencyAddSample(const char *event, mstime_t latency);
+
+/* Latency monitoring macros. */
+
+/* Start monitoring an event. We just set the current time. */
+#define latencyStartMonitor(var) if (server.latency_monitor_threshold) { \
+ var = mstime(); \
+} else { \
+ var = 0; \
+}
+
+/* End monitoring an event, compute the difference with the current time
+ * to check the amount of time elapsed. */
+#define latencyEndMonitor(var) if (server.latency_monitor_threshold) { \
+ var = mstime() - var; \
+}
+
+/* Add the sample only if the elapsed time is >= to the configured threshold. */
+#define latencyAddSampleIfNeeded(event,var) \
+ if (server.latency_monitor_threshold && \
+ (var) >= server.latency_monitor_threshold) \
+ latencyAddSample((event),(var));
+
+/* Remove time from a nested event. */
+#define latencyRemoveNestedEvent(event_var,nested_var) \
+ event_var += nested_var;
+
+typedef struct durationStats {
+ unsigned long long cnt;
+ unsigned long long sum;
+ unsigned long long max;
+} durationStats;
+
+typedef enum {
+ EL_DURATION_TYPE_EL = 0, // cumulative time duration metric of the whole eventloop
+ EL_DURATION_TYPE_CMD, // cumulative time duration metric of executing commands
+ EL_DURATION_TYPE_AOF, // cumulative time duration metric of flushing AOF in eventloop
+ EL_DURATION_TYPE_CRON, // cumulative time duration metric of cron (serverCron and beforeSleep, but excluding IO and AOF)
+ EL_DURATION_TYPE_NUM
+} DurationType;
+
+void durationAddSample(int type, monotime duration);
+
+#endif /* __LATENCY_H */
diff --git a/src/lazyfree.c b/src/lazyfree.c
new file mode 100644
index 0000000..8ac55f7
--- /dev/null
+++ b/src/lazyfree.c
@@ -0,0 +1,227 @@
+#include "server.h"
+#include "bio.h"
+#include "atomicvar.h"
+#include "functions.h"
+
+static redisAtomic size_t lazyfree_objects = 0;
+static redisAtomic size_t lazyfreed_objects = 0;
+
+/* Release objects from the lazyfree thread. It's just decrRefCount()
+ * updating the count of objects to release. */
+void lazyfreeFreeObject(void *args[]) {
+ robj *o = (robj *) args[0];
+ decrRefCount(o);
+ atomicDecr(lazyfree_objects,1);
+ atomicIncr(lazyfreed_objects,1);
+}
+
+/* Release a database from the lazyfree thread. The 'db' pointer is the
+ * database which was substituted with a fresh one in the main thread
+ * when the database was logically deleted. */
+void lazyfreeFreeDatabase(void *args[]) {
+ dict *ht1 = (dict *) args[0];
+ dict *ht2 = (dict *) args[1];
+
+ size_t numkeys = dictSize(ht1);
+ dictRelease(ht1);
+ dictRelease(ht2);
+ atomicDecr(lazyfree_objects,numkeys);
+ atomicIncr(lazyfreed_objects,numkeys);
+}
+
+/* Release the key tracking table. */
+void lazyFreeTrackingTable(void *args[]) {
+ rax *rt = args[0];
+ size_t len = rt->numele;
+ freeTrackingRadixTree(rt);
+ atomicDecr(lazyfree_objects,len);
+ atomicIncr(lazyfreed_objects,len);
+}
+
+/* Release the lua_scripts dict. */
+void lazyFreeLuaScripts(void *args[]) {
+ dict *lua_scripts = args[0];
+ long long len = dictSize(lua_scripts);
+ dictRelease(lua_scripts);
+ atomicDecr(lazyfree_objects,len);
+ atomicIncr(lazyfreed_objects,len);
+}
+
+/* Release the functions ctx. */
+void lazyFreeFunctionsCtx(void *args[]) {
+ functionsLibCtx *functions_lib_ctx = args[0];
+ size_t len = functionsLibCtxfunctionsLen(functions_lib_ctx);
+ functionsLibCtxFree(functions_lib_ctx);
+ atomicDecr(lazyfree_objects,len);
+ atomicIncr(lazyfreed_objects,len);
+}
+
+/* Release replication backlog referencing memory. */
+void lazyFreeReplicationBacklogRefMem(void *args[]) {
+ list *blocks = args[0];
+ rax *index = args[1];
+ long long len = listLength(blocks);
+ len += raxSize(index);
+ listRelease(blocks);
+ raxFree(index);
+ atomicDecr(lazyfree_objects,len);
+ atomicIncr(lazyfreed_objects,len);
+}
+
+/* Return the number of currently pending objects to free. */
+size_t lazyfreeGetPendingObjectsCount(void) {
+ size_t aux;
+ atomicGet(lazyfree_objects,aux);
+ return aux;
+}
+
+/* Return the number of objects that have been freed. */
+size_t lazyfreeGetFreedObjectsCount(void) {
+ size_t aux;
+ atomicGet(lazyfreed_objects,aux);
+ return aux;
+}
+
+void lazyfreeResetStats(void) {
+ atomicSet(lazyfreed_objects,0);
+}
+
+/* Return the amount of work needed in order to free an object.
+ * The return value is not always the actual number of allocations the
+ * object is composed of, but a number proportional to it.
+ *
+ * For strings the function always returns 1.
+ *
+ * For aggregated objects represented by hash tables or other data structures
+ * the function just returns the number of elements the object is composed of.
+ *
+ * Objects composed of single allocations are always reported as having a
+ * single item even if they are actually logical composed of multiple
+ * elements.
+ *
+ * For lists the function returns the number of elements in the quicklist
+ * representing the list. */
+size_t lazyfreeGetFreeEffort(robj *key, robj *obj, int dbid) {
+ if (obj->type == OBJ_LIST && obj->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = obj->ptr;
+ return ql->len;
+ } else if (obj->type == OBJ_SET && obj->encoding == OBJ_ENCODING_HT) {
+ dict *ht = obj->ptr;
+ return dictSize(ht);
+ } else if (obj->type == OBJ_ZSET && obj->encoding == OBJ_ENCODING_SKIPLIST){
+ zset *zs = obj->ptr;
+ return zs->zsl->length;
+ } else if (obj->type == OBJ_HASH && obj->encoding == OBJ_ENCODING_HT) {
+ dict *ht = obj->ptr;
+ return dictSize(ht);
+ } else if (obj->type == OBJ_STREAM) {
+ size_t effort = 0;
+ stream *s = obj->ptr;
+
+ /* Make a best effort estimate to maintain constant runtime. Every macro
+ * node in the Stream is one allocation. */
+ effort += s->rax->numnodes;
+
+ /* Every consumer group is an allocation and so are the entries in its
+ * PEL. We use size of the first group's PEL as an estimate for all
+ * others. */
+ if (s->cgroups && raxSize(s->cgroups)) {
+ raxIterator ri;
+ streamCG *cg;
+ raxStart(&ri,s->cgroups);
+ raxSeek(&ri,"^",NULL,0);
+ /* There must be at least one group so the following should always
+ * work. */
+ serverAssert(raxNext(&ri));
+ cg = ri.data;
+ effort += raxSize(s->cgroups)*(1+raxSize(cg->pel));
+ raxStop(&ri);
+ }
+ return effort;
+ } else if (obj->type == OBJ_MODULE) {
+ size_t effort = moduleGetFreeEffort(key, obj, dbid);
+ /* If the module's free_effort returns 0, we will use asynchronous free
+ * memory by default. */
+ return effort == 0 ? ULONG_MAX : effort;
+ } else {
+ return 1; /* Everything else is a single allocation. */
+ }
+}
+
+/* If there are enough allocations to free the value object asynchronously, it
+ * may be put into a lazy free list instead of being freed synchronously. The
+ * lazy free list will be reclaimed in a different bio.c thread. If the value is
+ * composed of a few allocations, to free in a lazy way is actually just
+ * slower... So under a certain limit we just free the object synchronously. */
+#define LAZYFREE_THRESHOLD 64
+
+/* Free an object, if the object is huge enough, free it in async way. */
+void freeObjAsync(robj *key, robj *obj, int dbid) {
+ size_t free_effort = lazyfreeGetFreeEffort(key,obj,dbid);
+ /* Note that if the object is shared, to reclaim it now it is not
+ * possible. This rarely happens, however sometimes the implementation
+ * of parts of the Redis core may call incrRefCount() to protect
+ * objects, and then call dbDelete(). */
+ if (free_effort > LAZYFREE_THRESHOLD && obj->refcount == 1) {
+ atomicIncr(lazyfree_objects,1);
+ bioCreateLazyFreeJob(lazyfreeFreeObject,1,obj);
+ } else {
+ decrRefCount(obj);
+ }
+}
+
+/* Empty a Redis DB asynchronously. What the function does actually is to
+ * create a new empty set of hash tables and scheduling the old ones for
+ * lazy freeing. */
+void emptyDbAsync(redisDb *db) {
+ dict *oldht1 = db->dict, *oldht2 = db->expires;
+ db->dict = dictCreate(&dbDictType);
+ db->expires = dictCreate(&dbExpiresDictType);
+ atomicIncr(lazyfree_objects,dictSize(oldht1));
+ bioCreateLazyFreeJob(lazyfreeFreeDatabase,2,oldht1,oldht2);
+}
+
+/* Free the key tracking table.
+ * If the table is huge enough, free it in async way. */
+void freeTrackingRadixTreeAsync(rax *tracking) {
+ /* Because this rax has only keys and no values so we use numnodes. */
+ if (tracking->numnodes > LAZYFREE_THRESHOLD) {
+ atomicIncr(lazyfree_objects,tracking->numele);
+ bioCreateLazyFreeJob(lazyFreeTrackingTable,1,tracking);
+ } else {
+ freeTrackingRadixTree(tracking);
+ }
+}
+
+/* Free lua_scripts dict, if the dict is huge enough, free it in async way. */
+void freeLuaScriptsAsync(dict *lua_scripts) {
+ if (dictSize(lua_scripts) > LAZYFREE_THRESHOLD) {
+ atomicIncr(lazyfree_objects,dictSize(lua_scripts));
+ bioCreateLazyFreeJob(lazyFreeLuaScripts,1,lua_scripts);
+ } else {
+ dictRelease(lua_scripts);
+ }
+}
+
+/* Free functions ctx, if the functions ctx contains enough functions, free it in async way. */
+void freeFunctionsAsync(functionsLibCtx *functions_lib_ctx) {
+ if (functionsLibCtxfunctionsLen(functions_lib_ctx) > LAZYFREE_THRESHOLD) {
+ atomicIncr(lazyfree_objects,functionsLibCtxfunctionsLen(functions_lib_ctx));
+ bioCreateLazyFreeJob(lazyFreeFunctionsCtx,1,functions_lib_ctx);
+ } else {
+ functionsLibCtxFree(functions_lib_ctx);
+ }
+}
+
+/* Free replication backlog referencing buffer blocks and rax index. */
+void freeReplicationBacklogRefMemAsync(list *blocks, rax *index) {
+ if (listLength(blocks) > LAZYFREE_THRESHOLD ||
+ raxSize(index) > LAZYFREE_THRESHOLD)
+ {
+ atomicIncr(lazyfree_objects,listLength(blocks)+raxSize(index));
+ bioCreateLazyFreeJob(lazyFreeReplicationBacklogRefMem,2,blocks,index);
+ } else {
+ listRelease(blocks);
+ raxFree(index);
+ }
+}
diff --git a/src/listpack.c b/src/listpack.c
new file mode 100644
index 0000000..ecc7e9f
--- /dev/null
+++ b/src/listpack.c
@@ -0,0 +1,2660 @@
+/* Listpack -- A lists of strings serialization format
+ *
+ * This file implements the specification you can find at:
+ *
+ * https://github.com/antirez/listpack
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2020, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "listpack.h"
+#include "listpack_malloc.h"
+#include "redisassert.h"
+#include "util.h"
+
+#define LP_HDR_SIZE 6 /* 32 bit total len + 16 bit number of elements. */
+#define LP_HDR_NUMELE_UNKNOWN UINT16_MAX
+#define LP_MAX_INT_ENCODING_LEN 9
+#define LP_MAX_BACKLEN_SIZE 5
+#define LP_ENCODING_INT 0
+#define LP_ENCODING_STRING 1
+
+#define LP_ENCODING_7BIT_UINT 0
+#define LP_ENCODING_7BIT_UINT_MASK 0x80
+#define LP_ENCODING_IS_7BIT_UINT(byte) (((byte)&LP_ENCODING_7BIT_UINT_MASK)==LP_ENCODING_7BIT_UINT)
+#define LP_ENCODING_7BIT_UINT_ENTRY_SIZE 2
+
+#define LP_ENCODING_6BIT_STR 0x80
+#define LP_ENCODING_6BIT_STR_MASK 0xC0
+#define LP_ENCODING_IS_6BIT_STR(byte) (((byte)&LP_ENCODING_6BIT_STR_MASK)==LP_ENCODING_6BIT_STR)
+
+#define LP_ENCODING_13BIT_INT 0xC0
+#define LP_ENCODING_13BIT_INT_MASK 0xE0
+#define LP_ENCODING_IS_13BIT_INT(byte) (((byte)&LP_ENCODING_13BIT_INT_MASK)==LP_ENCODING_13BIT_INT)
+#define LP_ENCODING_13BIT_INT_ENTRY_SIZE 3
+
+#define LP_ENCODING_12BIT_STR 0xE0
+#define LP_ENCODING_12BIT_STR_MASK 0xF0
+#define LP_ENCODING_IS_12BIT_STR(byte) (((byte)&LP_ENCODING_12BIT_STR_MASK)==LP_ENCODING_12BIT_STR)
+
+#define LP_ENCODING_16BIT_INT 0xF1
+#define LP_ENCODING_16BIT_INT_MASK 0xFF
+#define LP_ENCODING_IS_16BIT_INT(byte) (((byte)&LP_ENCODING_16BIT_INT_MASK)==LP_ENCODING_16BIT_INT)
+#define LP_ENCODING_16BIT_INT_ENTRY_SIZE 4
+
+#define LP_ENCODING_24BIT_INT 0xF2
+#define LP_ENCODING_24BIT_INT_MASK 0xFF
+#define LP_ENCODING_IS_24BIT_INT(byte) (((byte)&LP_ENCODING_24BIT_INT_MASK)==LP_ENCODING_24BIT_INT)
+#define LP_ENCODING_24BIT_INT_ENTRY_SIZE 5
+
+#define LP_ENCODING_32BIT_INT 0xF3
+#define LP_ENCODING_32BIT_INT_MASK 0xFF
+#define LP_ENCODING_IS_32BIT_INT(byte) (((byte)&LP_ENCODING_32BIT_INT_MASK)==LP_ENCODING_32BIT_INT)
+#define LP_ENCODING_32BIT_INT_ENTRY_SIZE 6
+
+#define LP_ENCODING_64BIT_INT 0xF4
+#define LP_ENCODING_64BIT_INT_MASK 0xFF
+#define LP_ENCODING_IS_64BIT_INT(byte) (((byte)&LP_ENCODING_64BIT_INT_MASK)==LP_ENCODING_64BIT_INT)
+#define LP_ENCODING_64BIT_INT_ENTRY_SIZE 10
+
+#define LP_ENCODING_32BIT_STR 0xF0
+#define LP_ENCODING_32BIT_STR_MASK 0xFF
+#define LP_ENCODING_IS_32BIT_STR(byte) (((byte)&LP_ENCODING_32BIT_STR_MASK)==LP_ENCODING_32BIT_STR)
+
+#define LP_EOF 0xFF
+
+#define LP_ENCODING_6BIT_STR_LEN(p) ((p)[0] & 0x3F)
+#define LP_ENCODING_12BIT_STR_LEN(p) ((((p)[0] & 0xF) << 8) | (p)[1])
+#define LP_ENCODING_32BIT_STR_LEN(p) (((uint32_t)(p)[1]<<0) | \
+ ((uint32_t)(p)[2]<<8) | \
+ ((uint32_t)(p)[3]<<16) | \
+ ((uint32_t)(p)[4]<<24))
+
+#define lpGetTotalBytes(p) (((uint32_t)(p)[0]<<0) | \
+ ((uint32_t)(p)[1]<<8) | \
+ ((uint32_t)(p)[2]<<16) | \
+ ((uint32_t)(p)[3]<<24))
+
+#define lpGetNumElements(p) (((uint32_t)(p)[4]<<0) | \
+ ((uint32_t)(p)[5]<<8))
+#define lpSetTotalBytes(p,v) do { \
+ (p)[0] = (v)&0xff; \
+ (p)[1] = ((v)>>8)&0xff; \
+ (p)[2] = ((v)>>16)&0xff; \
+ (p)[3] = ((v)>>24)&0xff; \
+} while(0)
+
+#define lpSetNumElements(p,v) do { \
+ (p)[4] = (v)&0xff; \
+ (p)[5] = ((v)>>8)&0xff; \
+} while(0)
+
+/* Validates that 'p' is not outside the listpack.
+ * All function that return a pointer to an element in the listpack will assert
+ * that this element is valid, so it can be freely used.
+ * Generally functions such lpNext and lpDelete assume the input pointer is
+ * already validated (since it's the return value of another function). */
+#define ASSERT_INTEGRITY(lp, p) do { \
+ assert((p) >= (lp)+LP_HDR_SIZE && (p) < (lp)+lpGetTotalBytes((lp))); \
+} while (0)
+
+/* Similar to the above, but validates the entire element length rather than just
+ * it's pointer. */
+#define ASSERT_INTEGRITY_LEN(lp, p, len) do { \
+ assert((p) >= (lp)+LP_HDR_SIZE && (p)+(len) < (lp)+lpGetTotalBytes((lp))); \
+} while (0)
+
+static inline void lpAssertValidEntry(unsigned char* lp, size_t lpbytes, unsigned char *p);
+
+/* Don't let listpacks grow over 1GB in any case, don't wanna risk overflow in
+ * Total Bytes header field */
+#define LISTPACK_MAX_SAFETY_SIZE (1<<30)
+int lpSafeToAdd(unsigned char* lp, size_t add) {
+ size_t len = lp? lpGetTotalBytes(lp): 0;
+ if (len + add > LISTPACK_MAX_SAFETY_SIZE)
+ return 0;
+ return 1;
+}
+
+/* Convert a string into a signed 64 bit integer.
+ * The function returns 1 if the string could be parsed into a (non-overflowing)
+ * signed 64 bit int, 0 otherwise. The 'value' will be set to the parsed value
+ * when the function returns success.
+ *
+ * Note that this function demands that the string strictly represents
+ * a int64 value: no spaces or other characters before or after the string
+ * representing the number are accepted, nor zeroes at the start if not
+ * for the string "0" representing the zero number.
+ *
+ * Because of its strictness, it is safe to use this function to check if
+ * you can convert a string into a long long, and obtain back the string
+ * from the number without any loss in the string representation. *
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Credits: this function was adapted from the Redis source code, file
+ * "utils.c", function string2ll(), and is copyright:
+ *
+ * Copyright(C) 2011, Pieter Noordhuis
+ * Copyright(C) 2011, Salvatore Sanfilippo
+ *
+ * The function is released under the BSD 3-clause license.
+ */
+int lpStringToInt64(const char *s, unsigned long slen, int64_t *value) {
+ const char *p = s;
+ unsigned long plen = 0;
+ int negative = 0;
+ uint64_t v;
+
+ /* Abort if length indicates this cannot possibly be an int */
+ if (slen == 0 || slen >= LONG_STR_SIZE)
+ return 0;
+
+ /* Special case: first and only digit is 0. */
+ if (slen == 1 && p[0] == '0') {
+ if (value != NULL) *value = 0;
+ return 1;
+ }
+
+ if (p[0] == '-') {
+ negative = 1;
+ p++; plen++;
+
+ /* Abort on only a negative sign. */
+ if (plen == slen)
+ return 0;
+ }
+
+ /* First digit should be 1-9, otherwise the string should just be 0. */
+ if (p[0] >= '1' && p[0] <= '9') {
+ v = p[0]-'0';
+ p++; plen++;
+ } else {
+ return 0;
+ }
+
+ while (plen < slen && p[0] >= '0' && p[0] <= '9') {
+ if (v > (UINT64_MAX / 10)) /* Overflow. */
+ return 0;
+ v *= 10;
+
+ if (v > (UINT64_MAX - (p[0]-'0'))) /* Overflow. */
+ return 0;
+ v += p[0]-'0';
+
+ p++; plen++;
+ }
+
+ /* Return if not all bytes were used. */
+ if (plen < slen)
+ return 0;
+
+ if (negative) {
+ if (v > ((uint64_t)(-(INT64_MIN+1))+1)) /* Overflow. */
+ return 0;
+ if (value != NULL) *value = -v;
+ } else {
+ if (v > INT64_MAX) /* Overflow. */
+ return 0;
+ if (value != NULL) *value = v;
+ }
+ return 1;
+}
+
+/* Create a new, empty listpack.
+ * On success the new listpack is returned, otherwise an error is returned.
+ * Pre-allocate at least `capacity` bytes of memory,
+ * over-allocated memory can be shrunk by `lpShrinkToFit`.
+ * */
+unsigned char *lpNew(size_t capacity) {
+ unsigned char *lp = lp_malloc(capacity > LP_HDR_SIZE+1 ? capacity : LP_HDR_SIZE+1);
+ if (lp == NULL) return NULL;
+ lpSetTotalBytes(lp,LP_HDR_SIZE+1);
+ lpSetNumElements(lp,0);
+ lp[LP_HDR_SIZE] = LP_EOF;
+ return lp;
+}
+
+/* Free the specified listpack. */
+void lpFree(unsigned char *lp) {
+ lp_free(lp);
+}
+
+/* Shrink the memory to fit. */
+unsigned char* lpShrinkToFit(unsigned char *lp) {
+ size_t size = lpGetTotalBytes(lp);
+ if (size < lp_malloc_size(lp)) {
+ return lp_realloc(lp, size);
+ } else {
+ return lp;
+ }
+}
+
+/* Stores the integer encoded representation of 'v' in the 'intenc' buffer. */
+static inline void lpEncodeIntegerGetType(int64_t v, unsigned char *intenc, uint64_t *enclen) {
+ if (v >= 0 && v <= 127) {
+ /* Single byte 0-127 integer. */
+ intenc[0] = v;
+ *enclen = 1;
+ } else if (v >= -4096 && v <= 4095) {
+ /* 13 bit integer. */
+ if (v < 0) v = ((int64_t)1<<13)+v;
+ intenc[0] = (v>>8)|LP_ENCODING_13BIT_INT;
+ intenc[1] = v&0xff;
+ *enclen = 2;
+ } else if (v >= -32768 && v <= 32767) {
+ /* 16 bit integer. */
+ if (v < 0) v = ((int64_t)1<<16)+v;
+ intenc[0] = LP_ENCODING_16BIT_INT;
+ intenc[1] = v&0xff;
+ intenc[2] = v>>8;
+ *enclen = 3;
+ } else if (v >= -8388608 && v <= 8388607) {
+ /* 24 bit integer. */
+ if (v < 0) v = ((int64_t)1<<24)+v;
+ intenc[0] = LP_ENCODING_24BIT_INT;
+ intenc[1] = v&0xff;
+ intenc[2] = (v>>8)&0xff;
+ intenc[3] = v>>16;
+ *enclen = 4;
+ } else if (v >= -2147483648 && v <= 2147483647) {
+ /* 32 bit integer. */
+ if (v < 0) v = ((int64_t)1<<32)+v;
+ intenc[0] = LP_ENCODING_32BIT_INT;
+ intenc[1] = v&0xff;
+ intenc[2] = (v>>8)&0xff;
+ intenc[3] = (v>>16)&0xff;
+ intenc[4] = v>>24;
+ *enclen = 5;
+ } else {
+ /* 64 bit integer. */
+ uint64_t uv = v;
+ intenc[0] = LP_ENCODING_64BIT_INT;
+ intenc[1] = uv&0xff;
+ intenc[2] = (uv>>8)&0xff;
+ intenc[3] = (uv>>16)&0xff;
+ intenc[4] = (uv>>24)&0xff;
+ intenc[5] = (uv>>32)&0xff;
+ intenc[6] = (uv>>40)&0xff;
+ intenc[7] = (uv>>48)&0xff;
+ intenc[8] = uv>>56;
+ *enclen = 9;
+ }
+}
+
+/* Given an element 'ele' of size 'size', determine if the element can be
+ * represented inside the listpack encoded as integer, and returns
+ * LP_ENCODING_INT if so. Otherwise returns LP_ENCODING_STR if no integer
+ * encoding is possible.
+ *
+ * If the LP_ENCODING_INT is returned, the function stores the integer encoded
+ * representation of the element in the 'intenc' buffer.
+ *
+ * Regardless of the returned encoding, 'enclen' is populated by reference to
+ * the number of bytes that the string or integer encoded element will require
+ * in order to be represented. */
+static inline int lpEncodeGetType(unsigned char *ele, uint32_t size, unsigned char *intenc, uint64_t *enclen) {
+ int64_t v;
+ if (lpStringToInt64((const char*)ele, size, &v)) {
+ lpEncodeIntegerGetType(v, intenc, enclen);
+ return LP_ENCODING_INT;
+ } else {
+ if (size < 64) *enclen = 1+size;
+ else if (size < 4096) *enclen = 2+size;
+ else *enclen = 5+(uint64_t)size;
+ return LP_ENCODING_STRING;
+ }
+}
+
+/* Store a reverse-encoded variable length field, representing the length
+ * of the previous element of size 'l', in the target buffer 'buf'.
+ * The function returns the number of bytes used to encode it, from
+ * 1 to 5. If 'buf' is NULL the function just returns the number of bytes
+ * needed in order to encode the backlen. */
+static inline unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) {
+ if (l <= 127) {
+ if (buf) buf[0] = l;
+ return 1;
+ } else if (l < 16383) {
+ if (buf) {
+ buf[0] = l>>7;
+ buf[1] = (l&127)|128;
+ }
+ return 2;
+ } else if (l < 2097151) {
+ if (buf) {
+ buf[0] = l>>14;
+ buf[1] = ((l>>7)&127)|128;
+ buf[2] = (l&127)|128;
+ }
+ return 3;
+ } else if (l < 268435455) {
+ if (buf) {
+ buf[0] = l>>21;
+ buf[1] = ((l>>14)&127)|128;
+ buf[2] = ((l>>7)&127)|128;
+ buf[3] = (l&127)|128;
+ }
+ return 4;
+ } else {
+ if (buf) {
+ buf[0] = l>>28;
+ buf[1] = ((l>>21)&127)|128;
+ buf[2] = ((l>>14)&127)|128;
+ buf[3] = ((l>>7)&127)|128;
+ buf[4] = (l&127)|128;
+ }
+ return 5;
+ }
+}
+
+/* Decode the backlen and returns it. If the encoding looks invalid (more than
+ * 5 bytes are used), UINT64_MAX is returned to report the problem. */
+static inline uint64_t lpDecodeBacklen(unsigned char *p) {
+ uint64_t val = 0;
+ uint64_t shift = 0;
+ do {
+ val |= (uint64_t)(p[0] & 127) << shift;
+ if (!(p[0] & 128)) break;
+ shift += 7;
+ p--;
+ if (shift > 28) return UINT64_MAX;
+ } while(1);
+ return val;
+}
+
+/* Encode the string element pointed by 's' of size 'len' in the target
+ * buffer 's'. The function should be called with 'buf' having always enough
+ * space for encoding the string. This is done by calling lpEncodeGetType()
+ * before calling this function. */
+static inline void lpEncodeString(unsigned char *buf, unsigned char *s, uint32_t len) {
+ if (len < 64) {
+ buf[0] = len | LP_ENCODING_6BIT_STR;
+ memcpy(buf+1,s,len);
+ } else if (len < 4096) {
+ buf[0] = (len >> 8) | LP_ENCODING_12BIT_STR;
+ buf[1] = len & 0xff;
+ memcpy(buf+2,s,len);
+ } else {
+ buf[0] = LP_ENCODING_32BIT_STR;
+ buf[1] = len & 0xff;
+ buf[2] = (len >> 8) & 0xff;
+ buf[3] = (len >> 16) & 0xff;
+ buf[4] = (len >> 24) & 0xff;
+ memcpy(buf+5,s,len);
+ }
+}
+
+/* Return the encoded length of the listpack element pointed by 'p'.
+ * This includes the encoding byte, length bytes, and the element data itself.
+ * If the element encoding is wrong then 0 is returned.
+ * Note that this method may access additional bytes (in case of 12 and 32 bit
+ * str), so should only be called when we know 'p' was already validated by
+ * lpCurrentEncodedSizeBytes or ASSERT_INTEGRITY_LEN (possibly since 'p' is
+ * a return value of another function that validated its return. */
+static inline uint32_t lpCurrentEncodedSizeUnsafe(unsigned char *p) {
+ if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1;
+ if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1+LP_ENCODING_6BIT_STR_LEN(p);
+ if (LP_ENCODING_IS_13BIT_INT(p[0])) return 2;
+ if (LP_ENCODING_IS_16BIT_INT(p[0])) return 3;
+ if (LP_ENCODING_IS_24BIT_INT(p[0])) return 4;
+ if (LP_ENCODING_IS_32BIT_INT(p[0])) return 5;
+ if (LP_ENCODING_IS_64BIT_INT(p[0])) return 9;
+ if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2+LP_ENCODING_12BIT_STR_LEN(p);
+ if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5+LP_ENCODING_32BIT_STR_LEN(p);
+ if (p[0] == LP_EOF) return 1;
+ return 0;
+}
+
+/* Return bytes needed to encode the length of the listpack element pointed by 'p'.
+ * This includes just the encoding byte, and the bytes needed to encode the length
+ * of the element (excluding the element data itself)
+ * If the element encoding is wrong then 0 is returned. */
+static inline uint32_t lpCurrentEncodedSizeBytes(unsigned char *p) {
+ if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1;
+ if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1;
+ if (LP_ENCODING_IS_13BIT_INT(p[0])) return 1;
+ if (LP_ENCODING_IS_16BIT_INT(p[0])) return 1;
+ if (LP_ENCODING_IS_24BIT_INT(p[0])) return 1;
+ if (LP_ENCODING_IS_32BIT_INT(p[0])) return 1;
+ if (LP_ENCODING_IS_64BIT_INT(p[0])) return 1;
+ if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2;
+ if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5;
+ if (p[0] == LP_EOF) return 1;
+ return 0;
+}
+
+/* Skip the current entry returning the next. It is invalid to call this
+ * function if the current element is the EOF element at the end of the
+ * listpack, however, while this function is used to implement lpNext(),
+ * it does not return NULL when the EOF element is encountered. */
+unsigned char *lpSkip(unsigned char *p) {
+ unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);
+ entrylen += lpEncodeBacklen(NULL,entrylen);
+ p += entrylen;
+ return p;
+}
+
+/* If 'p' points to an element of the listpack, calling lpNext() will return
+ * the pointer to the next element (the one on the right), or NULL if 'p'
+ * already pointed to the last element of the listpack. */
+unsigned char *lpNext(unsigned char *lp, unsigned char *p) {
+ assert(p);
+ p = lpSkip(p);
+ if (p[0] == LP_EOF) return NULL;
+ lpAssertValidEntry(lp, lpBytes(lp), p);
+ return p;
+}
+
+/* If 'p' points to an element of the listpack, calling lpPrev() will return
+ * the pointer to the previous element (the one on the left), or NULL if 'p'
+ * already pointed to the first element of the listpack. */
+unsigned char *lpPrev(unsigned char *lp, unsigned char *p) {
+ assert(p);
+ if (p-lp == LP_HDR_SIZE) return NULL;
+ p--; /* Seek the first backlen byte of the last element. */
+ uint64_t prevlen = lpDecodeBacklen(p);
+ prevlen += lpEncodeBacklen(NULL,prevlen);
+ p -= prevlen-1; /* Seek the first byte of the previous entry. */
+ lpAssertValidEntry(lp, lpBytes(lp), p);
+ return p;
+}
+
+/* Return a pointer to the first element of the listpack, or NULL if the
+ * listpack has no elements. */
+unsigned char *lpFirst(unsigned char *lp) {
+ unsigned char *p = lp + LP_HDR_SIZE; /* Skip the header. */
+ if (p[0] == LP_EOF) return NULL;
+ lpAssertValidEntry(lp, lpBytes(lp), p);
+ return p;
+}
+
+/* Return a pointer to the last element of the listpack, or NULL if the
+ * listpack has no elements. */
+unsigned char *lpLast(unsigned char *lp) {
+ unsigned char *p = lp+lpGetTotalBytes(lp)-1; /* Seek EOF element. */
+ return lpPrev(lp,p); /* Will return NULL if EOF is the only element. */
+}
+
+/* Return the number of elements inside the listpack. This function attempts
+ * to use the cached value when within range, otherwise a full scan is
+ * needed. As a side effect of calling this function, the listpack header
+ * could be modified, because if the count is found to be already within
+ * the 'numele' header field range, the new value is set. */
+unsigned long lpLength(unsigned char *lp) {
+ uint32_t numele = lpGetNumElements(lp);
+ if (numele != LP_HDR_NUMELE_UNKNOWN) return numele;
+
+ /* Too many elements inside the listpack. We need to scan in order
+ * to get the total number. */
+ uint32_t count = 0;
+ unsigned char *p = lpFirst(lp);
+ while(p) {
+ count++;
+ p = lpNext(lp,p);
+ }
+
+ /* If the count is again within range of the header numele field,
+ * set it. */
+ if (count < LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp,count);
+ return count;
+}
+
+/* Return the listpack element pointed by 'p'.
+ *
+ * The function changes behavior depending on the passed 'intbuf' value.
+ * Specifically, if 'intbuf' is NULL:
+ *
+ * If the element is internally encoded as an integer, the function returns
+ * NULL and populates the integer value by reference in 'count'. Otherwise if
+ * the element is encoded as a string a pointer to the string (pointing inside
+ * the listpack itself) is returned, and 'count' is set to the length of the
+ * string.
+ *
+ * If instead 'intbuf' points to a buffer passed by the caller, that must be
+ * at least LP_INTBUF_SIZE bytes, the function always returns the element as
+ * it was a string (returning the pointer to the string and setting the
+ * 'count' argument to the string length by reference). However if the element
+ * is encoded as an integer, the 'intbuf' buffer is used in order to store
+ * the string representation.
+ *
+ * The user should use one or the other form depending on what the value will
+ * be used for. If there is immediate usage for an integer value returned
+ * by the function, than to pass a buffer (and convert it back to a number)
+ * is of course useless.
+ *
+ * If 'entry_size' is not NULL, *entry_size is set to the entry length of the
+ * listpack element pointed by 'p'. This includes the encoding bytes, length
+ * bytes, the element data itself, and the backlen bytes.
+ *
+ * If the function is called against a badly encoded ziplist, so that there
+ * is no valid way to parse it, the function returns like if there was an
+ * integer encoded with value 12345678900000000 + <unrecognized byte>, this may
+ * be an hint to understand that something is wrong. To crash in this case is
+ * not sensible because of the different requirements of the application using
+ * this lib.
+ *
+ * Similarly, there is no error returned since the listpack normally can be
+ * assumed to be valid, so that would be a very high API cost. */
+static inline unsigned char *lpGetWithSize(unsigned char *p, int64_t *count, unsigned char *intbuf, uint64_t *entry_size) {
+ int64_t val;
+ uint64_t uval, negstart, negmax;
+
+ assert(p); /* assertion for valgrind (avoid NPD) */
+ if (LP_ENCODING_IS_7BIT_UINT(p[0])) {
+ negstart = UINT64_MAX; /* 7 bit ints are always positive. */
+ negmax = 0;
+ uval = p[0] & 0x7f;
+ if (entry_size) *entry_size = LP_ENCODING_7BIT_UINT_ENTRY_SIZE;
+ } else if (LP_ENCODING_IS_6BIT_STR(p[0])) {
+ *count = LP_ENCODING_6BIT_STR_LEN(p);
+ if (entry_size) *entry_size = 1 + *count + lpEncodeBacklen(NULL, *count + 1);
+ return p+1;
+ } else if (LP_ENCODING_IS_13BIT_INT(p[0])) {
+ uval = ((p[0]&0x1f)<<8) | p[1];
+ negstart = (uint64_t)1<<12;
+ negmax = 8191;
+ if (entry_size) *entry_size = LP_ENCODING_13BIT_INT_ENTRY_SIZE;
+ } else if (LP_ENCODING_IS_16BIT_INT(p[0])) {
+ uval = (uint64_t)p[1] |
+ (uint64_t)p[2]<<8;
+ negstart = (uint64_t)1<<15;
+ negmax = UINT16_MAX;
+ if (entry_size) *entry_size = LP_ENCODING_16BIT_INT_ENTRY_SIZE;
+ } else if (LP_ENCODING_IS_24BIT_INT(p[0])) {
+ uval = (uint64_t)p[1] |
+ (uint64_t)p[2]<<8 |
+ (uint64_t)p[3]<<16;
+ negstart = (uint64_t)1<<23;
+ negmax = UINT32_MAX>>8;
+ if (entry_size) *entry_size = LP_ENCODING_24BIT_INT_ENTRY_SIZE;
+ } else if (LP_ENCODING_IS_32BIT_INT(p[0])) {
+ uval = (uint64_t)p[1] |
+ (uint64_t)p[2]<<8 |
+ (uint64_t)p[3]<<16 |
+ (uint64_t)p[4]<<24;
+ negstart = (uint64_t)1<<31;
+ negmax = UINT32_MAX;
+ if (entry_size) *entry_size = LP_ENCODING_32BIT_INT_ENTRY_SIZE;
+ } else if (LP_ENCODING_IS_64BIT_INT(p[0])) {
+ uval = (uint64_t)p[1] |
+ (uint64_t)p[2]<<8 |
+ (uint64_t)p[3]<<16 |
+ (uint64_t)p[4]<<24 |
+ (uint64_t)p[5]<<32 |
+ (uint64_t)p[6]<<40 |
+ (uint64_t)p[7]<<48 |
+ (uint64_t)p[8]<<56;
+ negstart = (uint64_t)1<<63;
+ negmax = UINT64_MAX;
+ if (entry_size) *entry_size = LP_ENCODING_64BIT_INT_ENTRY_SIZE;
+ } else if (LP_ENCODING_IS_12BIT_STR(p[0])) {
+ *count = LP_ENCODING_12BIT_STR_LEN(p);
+ if (entry_size) *entry_size = 2 + *count + lpEncodeBacklen(NULL, *count + 2);
+ return p+2;
+ } else if (LP_ENCODING_IS_32BIT_STR(p[0])) {
+ *count = LP_ENCODING_32BIT_STR_LEN(p);
+ if (entry_size) *entry_size = 5 + *count + lpEncodeBacklen(NULL, *count + 5);
+ return p+5;
+ } else {
+ uval = 12345678900000000ULL + p[0];
+ negstart = UINT64_MAX;
+ negmax = 0;
+ }
+
+ /* We reach this code path only for integer encodings.
+ * Convert the unsigned value to the signed one using two's complement
+ * rule. */
+ if (uval >= negstart) {
+ /* This three steps conversion should avoid undefined behaviors
+ * in the unsigned -> signed conversion. */
+ uval = negmax-uval;
+ val = uval;
+ val = -val-1;
+ } else {
+ val = uval;
+ }
+
+ /* Return the string representation of the integer or the value itself
+ * depending on intbuf being NULL or not. */
+ if (intbuf) {
+ *count = ll2string((char*)intbuf,LP_INTBUF_SIZE,(long long)val);
+ return intbuf;
+ } else {
+ *count = val;
+ return NULL;
+ }
+}
+
+unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {
+ return lpGetWithSize(p, count, intbuf, NULL);
+}
+
+/* This is just a wrapper to lpGet() that is able to get entry value directly.
+ * When the function returns NULL, it populates the integer value by reference in 'lval'.
+ * Otherwise if the element is encoded as a string a pointer to the string (pointing
+ * inside the listpack itself) is returned, and 'slen' is set to the length of the
+ * string. */
+unsigned char *lpGetValue(unsigned char *p, unsigned int *slen, long long *lval) {
+ unsigned char *vstr;
+ int64_t ele_len;
+
+ vstr = lpGet(p, &ele_len, NULL);
+ if (vstr) {
+ *slen = ele_len;
+ } else {
+ *lval = ele_len;
+ }
+ return vstr;
+}
+
+/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries
+ * between every comparison. Returns NULL when the field could not be found. */
+unsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s,
+ uint32_t slen, unsigned int skip) {
+ int skipcnt = 0;
+ unsigned char vencoding = 0;
+ unsigned char *value;
+ int64_t ll, vll;
+ uint64_t entry_size = 123456789; /* initialized to avoid warning. */
+ uint32_t lp_bytes = lpBytes(lp);
+
+ assert(p);
+ while (p) {
+ if (skipcnt == 0) {
+ value = lpGetWithSize(p, &ll, NULL, &entry_size);
+ if (value) {
+ /* check the value doesn't reach outside the listpack before accessing it */
+ assert(p >= lp + LP_HDR_SIZE && p + entry_size < lp + lp_bytes);
+ if (slen == ll && memcmp(value, s, slen) == 0) {
+ return p;
+ }
+ } else {
+ /* Find out if the searched field can be encoded. Note that
+ * we do it only the first time, once done vencoding is set
+ * to non-zero and vll is set to the integer value. */
+ if (vencoding == 0) {
+ /* If the entry can be encoded as integer we set it to
+ * 1, else set it to UCHAR_MAX, so that we don't retry
+ * again the next time. */
+ if (slen >= 32 || slen == 0 || !lpStringToInt64((const char*)s, slen, &vll)) {
+ vencoding = UCHAR_MAX;
+ } else {
+ vencoding = 1;
+ }
+ }
+
+ /* Compare current entry with specified entry, do it only
+ * if vencoding != UCHAR_MAX because if there is no encoding
+ * possible for the field it can't be a valid integer. */
+ if (vencoding != UCHAR_MAX && ll == vll) {
+ return p;
+ }
+ }
+
+ /* Reset skip count */
+ skipcnt = skip;
+ p += entry_size;
+ } else {
+ /* Skip entry */
+ skipcnt--;
+
+ /* Move to next entry, avoid use `lpNext` due to `lpAssertValidEntry` in
+ * `lpNext` will call `lpBytes`, will cause performance degradation */
+ p = lpSkip(p);
+ }
+
+ /* The next call to lpGetWithSize could read at most 8 bytes past `p`
+ * We use the slower validation call only when necessary. */
+ if (p + 8 >= lp + lp_bytes)
+ lpAssertValidEntry(lp, lp_bytes, p);
+ else
+ assert(p >= lp + LP_HDR_SIZE && p < lp + lp_bytes);
+ if (p[0] == LP_EOF) break;
+ }
+
+ return NULL;
+}
+
+/* Insert, delete or replace the specified string element 'elestr' of length
+ * 'size' or integer element 'eleint' at the specified position 'p', with 'p'
+ * being a listpack element pointer obtained with lpFirst(), lpLast(), lpNext(),
+ * lpPrev() or lpSeek().
+ *
+ * The element is inserted before, after, or replaces the element pointed
+ * by 'p' depending on the 'where' argument, that can be LP_BEFORE, LP_AFTER
+ * or LP_REPLACE.
+ *
+ * If both 'elestr' and `eleint` are NULL, the function removes the element
+ * pointed by 'p' instead of inserting one.
+ * If `eleint` is non-NULL, 'size' is the length of 'eleint', the function insert
+ * or replace with a 64 bit integer, which is stored in the 'eleint' buffer.
+ * If 'elestr` is non-NULL, 'size' is the length of 'elestr', the function insert
+ * or replace with a string, which is stored in the 'elestr' buffer.
+ *
+ * Returns NULL on out of memory or when the listpack total length would exceed
+ * the max allowed size of 2^32-1, otherwise the new pointer to the listpack
+ * holding the new element is returned (and the old pointer passed is no longer
+ * considered valid)
+ *
+ * If 'newp' is not NULL, at the end of a successful call '*newp' will be set
+ * to the address of the element just added, so that it will be possible to
+ * continue an interaction with lpNext() and lpPrev().
+ *
+ * For deletion operations (both 'elestr' and 'eleint' set to NULL) 'newp' is
+ * set to the next element, on the right of the deleted one, or to NULL if the
+ * deleted element was the last one. */
+unsigned char *lpInsert(unsigned char *lp, unsigned char *elestr, unsigned char *eleint,
+ uint32_t size, unsigned char *p, int where, unsigned char **newp)
+{
+ unsigned char intenc[LP_MAX_INT_ENCODING_LEN];
+ unsigned char backlen[LP_MAX_BACKLEN_SIZE];
+
+ uint64_t enclen; /* The length of the encoded element. */
+ int delete = (elestr == NULL && eleint == NULL);
+
+ /* when deletion, it is conceptually replacing the element with a
+ * zero-length element. So whatever we get passed as 'where', set
+ * it to LP_REPLACE. */
+ if (delete) where = LP_REPLACE;
+
+ /* If we need to insert after the current element, we just jump to the
+ * next element (that could be the EOF one) and handle the case of
+ * inserting before. So the function will actually deal with just two
+ * cases: LP_BEFORE and LP_REPLACE. */
+ if (where == LP_AFTER) {
+ p = lpSkip(p);
+ where = LP_BEFORE;
+ ASSERT_INTEGRITY(lp, p);
+ }
+
+ /* Store the offset of the element 'p', so that we can obtain its
+ * address again after a reallocation. */
+ unsigned long poff = p-lp;
+
+ int enctype;
+ if (elestr) {
+ /* Calling lpEncodeGetType() results into the encoded version of the
+ * element to be stored into 'intenc' in case it is representable as
+ * an integer: in that case, the function returns LP_ENCODING_INT.
+ * Otherwise if LP_ENCODING_STR is returned, we'll have to call
+ * lpEncodeString() to actually write the encoded string on place later.
+ *
+ * Whatever the returned encoding is, 'enclen' is populated with the
+ * length of the encoded element. */
+ enctype = lpEncodeGetType(elestr,size,intenc,&enclen);
+ if (enctype == LP_ENCODING_INT) eleint = intenc;
+ } else if (eleint) {
+ enctype = LP_ENCODING_INT;
+ enclen = size; /* 'size' is the length of the encoded integer element. */
+ } else {
+ enctype = -1;
+ enclen = 0;
+ }
+
+ /* We need to also encode the backward-parsable length of the element
+ * and append it to the end: this allows to traverse the listpack from
+ * the end to the start. */
+ unsigned long backlen_size = (!delete) ? lpEncodeBacklen(backlen,enclen) : 0;
+ uint64_t old_listpack_bytes = lpGetTotalBytes(lp);
+ uint32_t replaced_len = 0;
+ if (where == LP_REPLACE) {
+ replaced_len = lpCurrentEncodedSizeUnsafe(p);
+ replaced_len += lpEncodeBacklen(NULL,replaced_len);
+ ASSERT_INTEGRITY_LEN(lp, p, replaced_len);
+ }
+
+ uint64_t new_listpack_bytes = old_listpack_bytes + enclen + backlen_size
+ - replaced_len;
+ if (new_listpack_bytes > UINT32_MAX) return NULL;
+
+ /* We now need to reallocate in order to make space or shrink the
+ * allocation (in case 'when' value is LP_REPLACE and the new element is
+ * smaller). However we do that before memmoving the memory to
+ * make room for the new element if the final allocation will get
+ * larger, or we do it after if the final allocation will get smaller. */
+
+ unsigned char *dst = lp + poff; /* May be updated after reallocation. */
+
+ /* Realloc before: we need more room. */
+ if (new_listpack_bytes > old_listpack_bytes &&
+ new_listpack_bytes > lp_malloc_size(lp)) {
+ if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
+ dst = lp + poff;
+ }
+
+ /* Setup the listpack relocating the elements to make the exact room
+ * we need to store the new one. */
+ if (where == LP_BEFORE) {
+ memmove(dst+enclen+backlen_size,dst,old_listpack_bytes-poff);
+ } else { /* LP_REPLACE. */
+ memmove(dst+enclen+backlen_size,
+ dst+replaced_len,
+ old_listpack_bytes-poff-replaced_len);
+ }
+
+ /* Realloc after: we need to free space. */
+ if (new_listpack_bytes < old_listpack_bytes) {
+ if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
+ dst = lp + poff;
+ }
+
+ /* Store the entry. */
+ if (newp) {
+ *newp = dst;
+ /* In case of deletion, set 'newp' to NULL if the next element is
+ * the EOF element. */
+ if (delete && dst[0] == LP_EOF) *newp = NULL;
+ }
+ if (!delete) {
+ if (enctype == LP_ENCODING_INT) {
+ memcpy(dst,eleint,enclen);
+ } else if (elestr) {
+ lpEncodeString(dst,elestr,size);
+ } else {
+ redis_unreachable();
+ }
+ dst += enclen;
+ memcpy(dst,backlen,backlen_size);
+ dst += backlen_size;
+ }
+
+ /* Update header. */
+ if (where != LP_REPLACE || delete) {
+ uint32_t num_elements = lpGetNumElements(lp);
+ if (num_elements != LP_HDR_NUMELE_UNKNOWN) {
+ if (!delete)
+ lpSetNumElements(lp,num_elements+1);
+ else
+ lpSetNumElements(lp,num_elements-1);
+ }
+ }
+ lpSetTotalBytes(lp,new_listpack_bytes);
+
+#if 0
+ /* This code path is normally disabled: what it does is to force listpack
+ * to return *always* a new pointer after performing some modification to
+ * the listpack, even if the previous allocation was enough. This is useful
+ * in order to spot bugs in code using listpacks: by doing so we can find
+ * if the caller forgets to set the new pointer where the listpack reference
+ * is stored, after an update. */
+ unsigned char *oldlp = lp;
+ lp = lp_malloc(new_listpack_bytes);
+ memcpy(lp,oldlp,new_listpack_bytes);
+ if (newp) {
+ unsigned long offset = (*newp)-oldlp;
+ *newp = lp + offset;
+ }
+ /* Make sure the old allocation contains garbage. */
+ memset(oldlp,'A',new_listpack_bytes);
+ lp_free(oldlp);
+#endif
+
+ return lp;
+}
+
+/* This is just a wrapper for lpInsert() to directly use a string. */
+unsigned char *lpInsertString(unsigned char *lp, unsigned char *s, uint32_t slen,
+ unsigned char *p, int where, unsigned char **newp)
+{
+ return lpInsert(lp, s, NULL, slen, p, where, newp);
+}
+
+/* This is just a wrapper for lpInsert() to directly use a 64 bit integer
+ * instead of a string. */
+unsigned char *lpInsertInteger(unsigned char *lp, long long lval, unsigned char *p, int where, unsigned char **newp) {
+ uint64_t enclen; /* The length of the encoded element. */
+ unsigned char intenc[LP_MAX_INT_ENCODING_LEN];
+
+ lpEncodeIntegerGetType(lval, intenc, &enclen);
+ return lpInsert(lp, NULL, intenc, enclen, p, where, newp);
+}
+
+/* Append the specified element 's' of length 'slen' at the head of the listpack. */
+unsigned char *lpPrepend(unsigned char *lp, unsigned char *s, uint32_t slen) {
+ unsigned char *p = lpFirst(lp);
+ if (!p) return lpAppend(lp, s, slen);
+ return lpInsert(lp, s, NULL, slen, p, LP_BEFORE, NULL);
+}
+
+/* Append the specified integer element 'lval' at the head of the listpack. */
+unsigned char *lpPrependInteger(unsigned char *lp, long long lval) {
+ unsigned char *p = lpFirst(lp);
+ if (!p) return lpAppendInteger(lp, lval);
+ return lpInsertInteger(lp, lval, p, LP_BEFORE, NULL);
+}
+
+/* Append the specified element 'ele' of length 'size' at the end of the
+ * listpack. It is implemented in terms of lpInsert(), so the return value is
+ * the same as lpInsert(). */
+unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size) {
+ uint64_t listpack_bytes = lpGetTotalBytes(lp);
+ unsigned char *eofptr = lp + listpack_bytes - 1;
+ return lpInsert(lp,ele,NULL,size,eofptr,LP_BEFORE,NULL);
+}
+
+/* Append the specified integer element 'lval' at the end of the listpack. */
+unsigned char *lpAppendInteger(unsigned char *lp, long long lval) {
+ uint64_t listpack_bytes = lpGetTotalBytes(lp);
+ unsigned char *eofptr = lp + listpack_bytes - 1;
+ return lpInsertInteger(lp, lval, eofptr, LP_BEFORE, NULL);
+}
+
+/* This is just a wrapper for lpInsert() to directly use a string to replace
+ * the current element. The function returns the new listpack as return
+ * value, and also updates the current cursor by updating '*p'. */
+unsigned char *lpReplace(unsigned char *lp, unsigned char **p, unsigned char *s, uint32_t slen) {
+ return lpInsert(lp, s, NULL, slen, *p, LP_REPLACE, p);
+}
+
+/* This is just a wrapper for lpInsertInteger() to directly use a 64 bit integer
+ * instead of a string to replace the current element. The function returns
+ * the new listpack as return value, and also updates the current cursor
+ * by updating '*p'. */
+unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long lval) {
+ return lpInsertInteger(lp, lval, *p, LP_REPLACE, p);
+}
+
+/* Remove the element pointed by 'p', and return the resulting listpack.
+ * If 'newp' is not NULL, the next element pointer (to the right of the
+ * deleted one) is returned by reference. If the deleted element was the
+ * last one, '*newp' is set to NULL. */
+unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp) {
+ return lpInsert(lp,NULL,NULL,0,p,LP_REPLACE,newp);
+}
+
+/* Delete a range of entries from the listpack start with the element pointed by 'p'. */
+unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num) {
+ size_t bytes = lpBytes(lp);
+ unsigned long deleted = 0;
+ unsigned char *eofptr = lp + bytes - 1;
+ unsigned char *first, *tail;
+ first = tail = *p;
+
+ if (num == 0) return lp; /* Nothing to delete, return ASAP. */
+
+ /* Find the next entry to the last entry that needs to be deleted.
+ * lpLength may be unreliable due to corrupt data, so we cannot
+ * treat 'num' as the number of elements to be deleted. */
+ while (num--) {
+ deleted++;
+ tail = lpSkip(tail);
+ if (tail[0] == LP_EOF) break;
+ lpAssertValidEntry(lp, bytes, tail);
+ }
+
+ /* Store the offset of the element 'first', so that we can obtain its
+ * address again after a reallocation. */
+ unsigned long poff = first-lp;
+
+ /* Move tail to the front of the listpack */
+ memmove(first, tail, eofptr - tail + 1);
+ lpSetTotalBytes(lp, bytes - (tail - first));
+ uint32_t numele = lpGetNumElements(lp);
+ if (numele != LP_HDR_NUMELE_UNKNOWN)
+ lpSetNumElements(lp, numele-deleted);
+ lp = lpShrinkToFit(lp);
+
+ /* Store the entry. */
+ *p = lp+poff;
+ if ((*p)[0] == LP_EOF) *p = NULL;
+
+ return lp;
+}
+
+/* Delete a range of entries from the listpack. */
+unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num) {
+ unsigned char *p;
+ uint32_t numele = lpGetNumElements(lp);
+
+ if (num == 0) return lp; /* Nothing to delete, return ASAP. */
+ if ((p = lpSeek(lp, index)) == NULL) return lp;
+
+ /* If we know we're gonna delete beyond the end of the listpack, we can just move
+ * the EOF marker, and there's no need to iterate through the entries,
+ * but if we can't be sure how many entries there are, we rather avoid calling lpLength
+ * since that means an additional iteration on all elements.
+ *
+ * Note that index could overflow, but we use the value after seek, so when we
+ * use it no overflow happens. */
+ if (numele != LP_HDR_NUMELE_UNKNOWN && index < 0) index = (long)numele + index;
+ if (numele != LP_HDR_NUMELE_UNKNOWN && (numele - (unsigned long)index) <= num) {
+ p[0] = LP_EOF;
+ lpSetTotalBytes(lp, p - lp + 1);
+ lpSetNumElements(lp, index);
+ lp = lpShrinkToFit(lp);
+ } else {
+ lp = lpDeleteRangeWithEntry(lp, &p, num);
+ }
+
+ return lp;
+}
+
+/* Delete the elements 'ps' passed as an array of 'count' element pointers and
+ * return the resulting listpack. The elements must be given in the same order
+ * as they apper in the listpack. */
+unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count) {
+ if (count == 0) return lp;
+ unsigned char *dst = ps[0];
+ size_t total_bytes = lpGetTotalBytes(lp);
+ unsigned char *lp_end = lp + total_bytes; /* After the EOF element. */
+ assert(lp_end[-1] == LP_EOF);
+ /*
+ * ----+--------+-----------+--------+---------+-----+---+
+ * ... | Delete | Keep | Delete | Keep | ... |EOF|
+ * ... |xxxxxxxx| |xxxxxxxx| | ... | |
+ * ----+--------+-----------+--------+---------+-----+---+
+ * ^ ^ ^ ^
+ * | | | |
+ * ps[i] | ps[i+1] |
+ * skip keep_start keep_end lp_end
+ *
+ * The loop memmoves the bytes between keep_start and keep_end to dst.
+ */
+ for (unsigned long i = 0; i < count; i++) {
+ unsigned char *skip = ps[i];
+ assert(skip != NULL && skip[0] != LP_EOF);
+ unsigned char *keep_start = lpSkip(skip);
+ unsigned char *keep_end;
+ if (i + 1 < count) {
+ keep_end = ps[i + 1];
+ /* Deleting consecutive elements. Nothing to keep between them. */
+ if (keep_start == keep_end) continue;
+ } else {
+ /* Keep the rest of the listpack including the EOF marker. */
+ keep_end = lp_end;
+ }
+ assert(keep_end > keep_start);
+ size_t bytes_to_keep = keep_end - keep_start;
+ memmove(dst, keep_start, bytes_to_keep);
+ dst += bytes_to_keep;
+ }
+ /* Update total size and num elements. */
+ size_t deleted_bytes = lp_end - dst;
+ total_bytes -= deleted_bytes;
+ assert(lp[total_bytes - 1] == LP_EOF);
+ lpSetTotalBytes(lp, total_bytes);
+ uint32_t numele = lpGetNumElements(lp);
+ if (numele != LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp, numele - count);
+ return lpShrinkToFit(lp);
+}
+
+/* Merge listpacks 'first' and 'second' by appending 'second' to 'first'.
+ *
+ * NOTE: The larger listpack is reallocated to contain the new merged listpack.
+ * Either 'first' or 'second' can be used for the result. The parameter not
+ * used will be free'd and set to NULL.
+ *
+ * After calling this function, the input parameters are no longer valid since
+ * they are changed and free'd in-place.
+ *
+ * The result listpack is the contents of 'first' followed by 'second'.
+ *
+ * On failure: returns NULL if the merge is impossible.
+ * On success: returns the merged listpack (which is expanded version of either
+ * 'first' or 'second', also frees the other unused input listpack, and sets the
+ * input listpack argument equal to newly reallocated listpack return value. */
+unsigned char *lpMerge(unsigned char **first, unsigned char **second) {
+ /* If any params are null, we can't merge, so NULL. */
+ if (first == NULL || *first == NULL || second == NULL || *second == NULL)
+ return NULL;
+
+ /* Can't merge same list into itself. */
+ if (*first == *second)
+ return NULL;
+
+ size_t first_bytes = lpBytes(*first);
+ unsigned long first_len = lpLength(*first);
+
+ size_t second_bytes = lpBytes(*second);
+ unsigned long second_len = lpLength(*second);
+
+ int append;
+ unsigned char *source, *target;
+ size_t target_bytes, source_bytes;
+ /* Pick the largest listpack so we can resize easily in-place.
+ * We must also track if we are now appending or prepending to
+ * the target listpack. */
+ if (first_bytes >= second_bytes) {
+ /* retain first, append second to first. */
+ target = *first;
+ target_bytes = first_bytes;
+ source = *second;
+ source_bytes = second_bytes;
+ append = 1;
+ } else {
+ /* else, retain second, prepend first to second. */
+ target = *second;
+ target_bytes = second_bytes;
+ source = *first;
+ source_bytes = first_bytes;
+ append = 0;
+ }
+
+ /* Calculate final bytes (subtract one pair of metadata) */
+ unsigned long long lpbytes = (unsigned long long)first_bytes + second_bytes - LP_HDR_SIZE - 1;
+ assert(lpbytes < UINT32_MAX); /* larger values can't be stored */
+ unsigned long lplength = first_len + second_len;
+
+ /* Combined lp length should be limited within UINT16_MAX */
+ lplength = lplength < UINT16_MAX ? lplength : UINT16_MAX;
+
+ /* Extend target to new lpbytes then append or prepend source. */
+ target = lp_realloc(target, lpbytes);
+ if (append) {
+ /* append == appending to target */
+ /* Copy source after target (copying over original [END]):
+ * [TARGET - END, SOURCE - HEADER] */
+ memcpy(target + target_bytes - 1,
+ source + LP_HDR_SIZE,
+ source_bytes - LP_HDR_SIZE);
+ } else {
+ /* !append == prepending to target */
+ /* Move target *contents* exactly size of (source - [END]),
+ * then copy source into vacated space (source - [END]):
+ * [SOURCE - END, TARGET - HEADER] */
+ memmove(target + source_bytes - 1,
+ target + LP_HDR_SIZE,
+ target_bytes - LP_HDR_SIZE);
+ memcpy(target, source, source_bytes - 1);
+ }
+
+ lpSetNumElements(target, lplength);
+ lpSetTotalBytes(target, lpbytes);
+
+ /* Now free and NULL out what we didn't realloc */
+ if (append) {
+ lp_free(*second);
+ *second = NULL;
+ *first = target;
+ } else {
+ lp_free(*first);
+ *first = NULL;
+ *second = target;
+ }
+
+ return target;
+}
+
+unsigned char *lpDup(unsigned char *lp) {
+ size_t lpbytes = lpBytes(lp);
+ unsigned char *newlp = lp_malloc(lpbytes);
+ memcpy(newlp, lp, lpbytes);
+ return newlp;
+}
+
+/* Return the total number of bytes the listpack is composed of. */
+size_t lpBytes(unsigned char *lp) {
+ return lpGetTotalBytes(lp);
+}
+
+/* Returns the size of a listpack consisting of an integer repeated 'rep' times. */
+size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep) {
+ uint64_t enclen;
+ unsigned char intenc[LP_MAX_INT_ENCODING_LEN];
+ lpEncodeIntegerGetType(lval, intenc, &enclen);
+ unsigned long backlen = lpEncodeBacklen(NULL, enclen);
+ return LP_HDR_SIZE + (enclen + backlen) * rep + 1;
+}
+
+/* Seek the specified element and returns the pointer to the seeked element.
+ * Positive indexes specify the zero-based element to seek from the head to
+ * the tail, negative indexes specify elements starting from the tail, where
+ * -1 means the last element, -2 the penultimate and so forth. If the index
+ * is out of range, NULL is returned. */
+unsigned char *lpSeek(unsigned char *lp, long index) {
+ int forward = 1; /* Seek forward by default. */
+
+ /* We want to seek from left to right or the other way around
+ * depending on the listpack length and the element position.
+ * However if the listpack length cannot be obtained in constant time,
+ * we always seek from left to right. */
+ uint32_t numele = lpGetNumElements(lp);
+ if (numele != LP_HDR_NUMELE_UNKNOWN) {
+ if (index < 0) index = (long)numele+index;
+ if (index < 0) return NULL; /* Index still < 0 means out of range. */
+ if (index >= (long)numele) return NULL; /* Out of range the other side. */
+ /* We want to scan right-to-left if the element we are looking for
+ * is past the half of the listpack. */
+ if (index > (long)numele/2) {
+ forward = 0;
+ /* Right to left scanning always expects a negative index. Convert
+ * our index to negative form. */
+ index -= numele;
+ }
+ } else {
+ /* If the listpack length is unspecified, for negative indexes we
+ * want to always scan right-to-left. */
+ if (index < 0) forward = 0;
+ }
+
+ /* Forward and backward scanning is trivially based on lpNext()/lpPrev(). */
+ if (forward) {
+ unsigned char *ele = lpFirst(lp);
+ while (index > 0 && ele) {
+ ele = lpNext(lp,ele);
+ index--;
+ }
+ return ele;
+ } else {
+ unsigned char *ele = lpLast(lp);
+ while (index < -1 && ele) {
+ ele = lpPrev(lp,ele);
+ index++;
+ }
+ return ele;
+ }
+}
+
+/* Same as lpFirst but without validation assert, to be used right before lpValidateNext. */
+unsigned char *lpValidateFirst(unsigned char *lp) {
+ unsigned char *p = lp + LP_HDR_SIZE; /* Skip the header. */
+ if (p[0] == LP_EOF) return NULL;
+ return p;
+}
+
+/* Validate the integrity of a single listpack entry and move to the next one.
+ * The input argument 'pp' is a reference to the current record and is advanced on exit.
+ * Returns 1 if valid, 0 if invalid. */
+int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) {
+#define OUT_OF_RANGE(p) ( \
+ (p) < lp + LP_HDR_SIZE || \
+ (p) > lp + lpbytes - 1)
+ unsigned char *p = *pp;
+ if (!p)
+ return 0;
+
+ /* Before accessing p, make sure it's valid. */
+ if (OUT_OF_RANGE(p))
+ return 0;
+
+ if (*p == LP_EOF) {
+ *pp = NULL;
+ return 1;
+ }
+
+ /* check that we can read the encoded size */
+ uint32_t lenbytes = lpCurrentEncodedSizeBytes(p);
+ if (!lenbytes)
+ return 0;
+
+ /* make sure the encoded entry length doesn't reach outside the edge of the listpack */
+ if (OUT_OF_RANGE(p + lenbytes))
+ return 0;
+
+ /* get the entry length and encoded backlen. */
+ unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);
+ unsigned long encodedBacklen = lpEncodeBacklen(NULL,entrylen);
+ entrylen += encodedBacklen;
+
+ /* make sure the entry doesn't reach outside the edge of the listpack */
+ if (OUT_OF_RANGE(p + entrylen))
+ return 0;
+
+ /* move to the next entry */
+ p += entrylen;
+
+ /* make sure the encoded length at the end patches the one at the beginning. */
+ uint64_t prevlen = lpDecodeBacklen(p-1);
+ if (prevlen + encodedBacklen != entrylen)
+ return 0;
+
+ *pp = p;
+ return 1;
+#undef OUT_OF_RANGE
+}
+
+/* Validate that the entry doesn't reach outside the listpack allocation. */
+static inline void lpAssertValidEntry(unsigned char* lp, size_t lpbytes, unsigned char *p) {
+ assert(lpValidateNext(lp, &p, lpbytes));
+}
+
+/* Validate the integrity of the data structure.
+ * when `deep` is 0, only the integrity of the header is validated.
+ * when `deep` is 1, we scan all the entries one by one. */
+int lpValidateIntegrity(unsigned char *lp, size_t size, int deep,
+ listpackValidateEntryCB entry_cb, void *cb_userdata) {
+ /* Check that we can actually read the header. (and EOF) */
+ if (size < LP_HDR_SIZE + 1)
+ return 0;
+
+ /* Check that the encoded size in the header must match the allocated size. */
+ size_t bytes = lpGetTotalBytes(lp);
+ if (bytes != size)
+ return 0;
+
+ /* The last byte must be the terminator. */
+ if (lp[size-1] != LP_EOF)
+ return 0;
+
+ if (!deep)
+ return 1;
+
+ /* Validate the individual entries. */
+ uint32_t count = 0;
+ uint32_t numele = lpGetNumElements(lp);
+ unsigned char *p = lp + LP_HDR_SIZE;
+ while(p && p[0] != LP_EOF) {
+ unsigned char *prev = p;
+
+ /* Validate this entry and move to the next entry in advance
+ * to avoid callback crash due to corrupt listpack. */
+ if (!lpValidateNext(lp, &p, bytes))
+ return 0;
+
+ /* Optionally let the caller validate the entry too. */
+ if (entry_cb && !entry_cb(prev, numele, cb_userdata))
+ return 0;
+
+ count++;
+ }
+
+ /* Make sure 'p' really does point to the end of the listpack. */
+ if (p != lp + size - 1)
+ return 0;
+
+ /* Check that the count in the header is correct */
+ if (numele != LP_HDR_NUMELE_UNKNOWN && numele != count)
+ return 0;
+
+ return 1;
+}
+
+/* Compare entry pointer to by 'p' with string 's' of length 'slen'.
+ * Return 1 if equal. */
+unsigned int lpCompare(unsigned char *p, unsigned char *s, uint32_t slen) {
+ unsigned char *value;
+ int64_t sz;
+ if (p[0] == LP_EOF) return 0;
+
+ value = lpGet(p, &sz, NULL);
+ if (value) {
+ return (slen == sz) && memcmp(value,s,slen) == 0;
+ } else {
+ /* We use lpStringToInt64() to get an integer representation of the
+ * string 's' and compare it to 'sval', it's much faster than convert
+ * integer to string and comparing. */
+ int64_t sval;
+ if (lpStringToInt64((const char*)s, slen, &sval))
+ return sz == sval;
+ }
+
+ return 0;
+}
+
+/* uint compare for qsort */
+static int uintCompare(const void *a, const void *b) {
+ return (*(unsigned int *) a - *(unsigned int *) b);
+}
+
+/* Helper method to store a string into from val or lval into dest */
+static inline void lpSaveValue(unsigned char *val, unsigned int len, int64_t lval, listpackEntry *dest) {
+ dest->sval = val;
+ dest->slen = len;
+ dest->lval = lval;
+}
+
+/* Randomly select a pair of key and value.
+ * total_count is a pre-computed length/2 of the listpack (to avoid calls to lpLength)
+ * 'key' and 'val' are used to store the result key value pair.
+ * 'val' can be NULL if the value is not needed. */
+void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val) {
+ unsigned char *p;
+
+ /* Avoid div by zero on corrupt listpack */
+ assert(total_count);
+
+ /* Generate even numbers, because listpack saved K-V pair */
+ int r = (rand() % total_count) * 2;
+ assert((p = lpSeek(lp, r)));
+ key->sval = lpGetValue(p, &(key->slen), &(key->lval));
+
+ if (!val)
+ return;
+ assert((p = lpNext(lp, p)));
+ val->sval = lpGetValue(p, &(val->slen), &(val->lval));
+}
+
+/* Randomly select 'count' entries and store them in the 'entries' array, which
+ * needs to have space for 'count' listpackEntry structs. The order is random
+ * and duplicates are possible. */
+void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries) {
+ struct pick {
+ unsigned int index;
+ unsigned int order;
+ } *picks = lp_malloc(count * sizeof(struct pick));
+ unsigned int total_size = lpLength(lp);
+ assert(total_size);
+ for (unsigned int i = 0; i < count; i++) {
+ picks[i].index = rand() % total_size;
+ picks[i].order = i;
+ }
+
+ /* Sort by index. */
+ qsort(picks, count, sizeof(struct pick), uintCompare);
+
+ /* Iterate over listpack in index order and store the values in the entries
+ * array respecting the original order. */
+ unsigned char *p = lpFirst(lp);
+ unsigned int j = 0; /* index in listpack */
+ for (unsigned int i = 0; i < count; i++) {
+ /* Advance listpack pointer to until we reach 'index' listpack. */
+ while (j < picks[i].index) {
+ p = lpNext(lp, p);
+ j++;
+ }
+ int storeorder = picks[i].order;
+ unsigned int len = 0;
+ long long llval = 0;
+ unsigned char *str = lpGetValue(p, &len, &llval);
+ lpSaveValue(str, len, llval, &entries[storeorder]);
+ }
+ lp_free(picks);
+}
+
+/* Randomly select count of key value pairs and store into 'keys' and
+ * 'vals' args. The order of the picked entries is random, and the selections
+ * are non-unique (repetitions are possible).
+ * The 'vals' arg can be NULL in which case we skip these. */
+void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals) {
+ unsigned char *p, *key, *value;
+ unsigned int klen = 0, vlen = 0;
+ long long klval = 0, vlval = 0;
+
+ /* Notice: the index member must be first due to the use in uintCompare */
+ typedef struct {
+ unsigned int index;
+ unsigned int order;
+ } rand_pick;
+ rand_pick *picks = lp_malloc(sizeof(rand_pick)*count);
+ unsigned int total_size = lpLength(lp)/2;
+
+ /* Avoid div by zero on corrupt listpack */
+ assert(total_size);
+
+ /* create a pool of random indexes (some may be duplicate). */
+ for (unsigned int i = 0; i < count; i++) {
+ picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */
+ /* keep track of the order we picked them */
+ picks[i].order = i;
+ }
+
+ /* sort by indexes. */
+ qsort(picks, count, sizeof(rand_pick), uintCompare);
+
+ /* fetch the elements form the listpack into a output array respecting the original order. */
+ unsigned int lpindex = picks[0].index, pickindex = 0;
+ p = lpSeek(lp, lpindex);
+ while (p && pickindex < count) {
+ key = lpGetValue(p, &klen, &klval);
+ assert((p = lpNext(lp, p)));
+ value = lpGetValue(p, &vlen, &vlval);
+ while (pickindex < count && lpindex == picks[pickindex].index) {
+ int storeorder = picks[pickindex].order;
+ lpSaveValue(key, klen, klval, &keys[storeorder]);
+ if (vals)
+ lpSaveValue(value, vlen, vlval, &vals[storeorder]);
+ pickindex++;
+ }
+ lpindex += 2;
+ p = lpNext(lp, p);
+ }
+
+ lp_free(picks);
+}
+
+/* Randomly select count of key value pairs and store into 'keys' and
+ * 'vals' args. The selections are unique (no repetitions), and the order of
+ * the picked entries is NOT-random.
+ * The 'vals' arg can be NULL in which case we skip these.
+ * The return value is the number of items picked which can be lower than the
+ * requested count if the listpack doesn't hold enough pairs. */
+unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals) {
+ unsigned char *p, *key;
+ unsigned int klen = 0;
+ long long klval = 0;
+ unsigned int total_size = lpLength(lp)/2;
+ unsigned int index = 0;
+ if (count > total_size)
+ count = total_size;
+
+ p = lpFirst(lp);
+ unsigned int picked = 0, remaining = count;
+ while (picked < count && p) {
+ assert((p = lpNextRandom(lp, p, &index, remaining, 1)));
+ key = lpGetValue(p, &klen, &klval);
+ lpSaveValue(key, klen, klval, &keys[picked]);
+ assert((p = lpNext(lp, p)));
+ index++;
+ if (vals) {
+ key = lpGetValue(p, &klen, &klval);
+ lpSaveValue(key, klen, klval, &vals[picked]);
+ }
+ p = lpNext(lp, p);
+ remaining--;
+ picked++;
+ index++;
+ }
+ return picked;
+}
+
+/* Iterates forward to the "next random" element, given we are yet to pick
+ * 'remaining' unique elements between the starting element 'p' (inclusive) and
+ * the end of the list. The 'index' needs to be initialized according to the
+ * current zero-based index matching the position of the starting element 'p'
+ * and is updated to match the returned element's zero-based index. If
+ * 'even_only' is nonzero, an element with an even index is picked, which is
+ * useful if the listpack represents a key-value pair sequence.
+ *
+ * Note that this function can return p. In order to skip the previously
+ * returned element, you need to call lpNext() or lpDelete() after each call to
+ * lpNextRandom(). Idea:
+ *
+ * assert(remaining <= lpLength(lp));
+ * p = lpFirst(lp);
+ * i = 0;
+ * while (remaining > 0) {
+ * p = lpNextRandom(lp, p, &i, remaining--, 0);
+ *
+ * // ... Do stuff with p ...
+ *
+ * p = lpNext(lp, p);
+ * i++;
+ * }
+ */
+unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
+ unsigned int remaining, int even_only)
+{
+ /* To only iterate once, every time we try to pick a member, the probability
+ * we pick it is the quotient of the count left we want to pick and the
+ * count still we haven't visited. This way, we could make every member be
+ * equally likely to be picked. */
+ unsigned int i = *index;
+ unsigned int total_size = lpLength(lp);
+ while (i < total_size && p != NULL) {
+ if (even_only && i % 2 != 0) {
+ p = lpNext(lp, p);
+ i++;
+ continue;
+ }
+
+ /* Do we pick this element? */
+ unsigned int available = total_size - i;
+ if (even_only) available /= 2;
+ double randomDouble = ((double)rand()) / RAND_MAX;
+ double threshold = ((double)remaining) / available;
+ if (randomDouble <= threshold) {
+ *index = i;
+ return p;
+ }
+
+ p = lpNext(lp, p);
+ i++;
+ }
+
+ return NULL;
+}
+
+/* Print info of listpack which is used in debugCommand */
+void lpRepr(unsigned char *lp) {
+ unsigned char *p, *vstr;
+ int64_t vlen;
+ unsigned char intbuf[LP_INTBUF_SIZE];
+ int index = 0;
+
+ printf("{total bytes %zu} {num entries %lu}\n", lpBytes(lp), lpLength(lp));
+
+ p = lpFirst(lp);
+ while(p) {
+ uint32_t encoded_size_bytes = lpCurrentEncodedSizeBytes(p);
+ uint32_t encoded_size = lpCurrentEncodedSizeUnsafe(p);
+ unsigned long back_len = lpEncodeBacklen(NULL, encoded_size);
+ printf(
+ "{\n"
+ "\taddr: 0x%08lx,\n"
+ "\tindex: %2d,\n"
+ "\toffset: %1lu,\n"
+ "\thdr+entrylen+backlen: %2lu,\n"
+ "\thdrlen: %3u,\n"
+ "\tbacklen: %2lu,\n"
+ "\tpayload: %1u\n",
+ (long unsigned)p,
+ index,
+ (unsigned long) (p-lp),
+ encoded_size + back_len,
+ encoded_size_bytes,
+ back_len,
+ encoded_size - encoded_size_bytes);
+ printf("\tbytes: ");
+ for (unsigned int i = 0; i < (encoded_size + back_len); i++) {
+ printf("%02x|",p[i]);
+ }
+ printf("\n");
+
+ vstr = lpGet(p, &vlen, intbuf);
+ printf("\t[str]");
+ if (vlen > 40) {
+ if (fwrite(vstr, 40, 1, stdout) == 0) perror("fwrite");
+ printf("...");
+ } else {
+ if (fwrite(vstr, vlen, 1, stdout) == 0) perror("fwrite");
+ }
+ printf("\n}\n");
+ index++;
+ p = lpNext(lp, p);
+ }
+ printf("{end}\n\n");
+}
+
+#ifdef REDIS_TEST
+
+#include <sys/time.h>
+#include "adlist.h"
+#include "sds.h"
+#include "testhelp.h"
+
+#define UNUSED(x) (void)(x)
+#define TEST(name) printf("test — %s\n", name);
+
+char *mixlist[] = {"hello", "foo", "quux", "1024"};
+char *intlist[] = {"4294967296", "-100", "100", "128000",
+ "non integer", "much much longer non integer"};
+
+static unsigned char *createList(void) {
+ unsigned char *lp = lpNew(0);
+ lp = lpAppend(lp, (unsigned char*)mixlist[1], strlen(mixlist[1]));
+ lp = lpAppend(lp, (unsigned char*)mixlist[2], strlen(mixlist[2]));
+ lp = lpPrepend(lp, (unsigned char*)mixlist[0], strlen(mixlist[0]));
+ lp = lpAppend(lp, (unsigned char*)mixlist[3], strlen(mixlist[3]));
+ return lp;
+}
+
+static unsigned char *createIntList(void) {
+ unsigned char *lp = lpNew(0);
+ lp = lpAppend(lp, (unsigned char*)intlist[2], strlen(intlist[2]));
+ lp = lpAppend(lp, (unsigned char*)intlist[3], strlen(intlist[3]));
+ lp = lpPrepend(lp, (unsigned char*)intlist[1], strlen(intlist[1]));
+ lp = lpPrepend(lp, (unsigned char*)intlist[0], strlen(intlist[0]));
+ lp = lpAppend(lp, (unsigned char*)intlist[4], strlen(intlist[4]));
+ lp = lpAppend(lp, (unsigned char*)intlist[5], strlen(intlist[5]));
+ return lp;
+}
+
+static long long usec(void) {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
+}
+
+static void stress(int pos, int num, int maxsize, int dnum) {
+ int i, j, k;
+ unsigned char *lp;
+ char posstr[2][5] = { "HEAD", "TAIL" };
+ long long start;
+ for (i = 0; i < maxsize; i+=dnum) {
+ lp = lpNew(0);
+ for (j = 0; j < i; j++) {
+ lp = lpAppend(lp, (unsigned char*)"quux", 4);
+ }
+
+ /* Do num times a push+pop from pos */
+ start = usec();
+ for (k = 0; k < num; k++) {
+ if (pos == 0) {
+ lp = lpPrepend(lp, (unsigned char*)"quux", 4);
+ } else {
+ lp = lpAppend(lp, (unsigned char*)"quux", 4);
+
+ }
+ lp = lpDelete(lp, lpFirst(lp), NULL);
+ }
+ printf("List size: %8d, bytes: %8zu, %dx push+pop (%s): %6lld usec\n",
+ i, lpBytes(lp), num, posstr[pos], usec()-start);
+ lpFree(lp);
+ }
+}
+
+static unsigned char *pop(unsigned char *lp, int where) {
+ unsigned char *p, *vstr;
+ int64_t vlen;
+
+ p = lpSeek(lp, where == 0 ? 0 : -1);
+ vstr = lpGet(p, &vlen, NULL);
+ if (where == 0)
+ printf("Pop head: ");
+ else
+ printf("Pop tail: ");
+
+ if (vstr) {
+ if (vlen && fwrite(vstr, vlen, 1, stdout) == 0) perror("fwrite");
+ } else {
+ printf("%lld", (long long)vlen);
+ }
+
+ printf("\n");
+ return lpDelete(lp, p, &p);
+}
+
+static int randstring(char *target, unsigned int min, unsigned int max) {
+ int p = 0;
+ int len = min+rand()%(max-min+1);
+ int minval, maxval;
+ switch(rand() % 3) {
+ case 0:
+ minval = 0;
+ maxval = 255;
+ break;
+ case 1:
+ minval = 48;
+ maxval = 122;
+ break;
+ case 2:
+ minval = 48;
+ maxval = 52;
+ break;
+ default:
+ assert(NULL);
+ }
+
+ while(p < len)
+ target[p++] = minval+rand()%(maxval-minval+1);
+ return len;
+}
+
+static void verifyEntry(unsigned char *p, unsigned char *s, size_t slen) {
+ assert(lpCompare(p, s, slen));
+}
+
+static int lpValidation(unsigned char *p, unsigned int head_count, void *userdata) {
+ UNUSED(p);
+ UNUSED(head_count);
+
+ int ret;
+ long *count = userdata;
+ ret = lpCompare(p, (unsigned char *)mixlist[*count], strlen(mixlist[*count]));
+ (*count)++;
+ return ret;
+}
+
+int listpackTest(int argc, char *argv[], int flags) {
+ UNUSED(argc);
+ UNUSED(argv);
+
+ int i;
+ unsigned char *lp, *p, *vstr;
+ int64_t vlen;
+ unsigned char intbuf[LP_INTBUF_SIZE];
+ int accurate = (flags & REDIS_TEST_ACCURATE);
+
+ TEST("Create int list") {
+ lp = createIntList();
+ assert(lpLength(lp) == 6);
+ lpFree(lp);
+ }
+
+ TEST("Create list") {
+ lp = createList();
+ assert(lpLength(lp) == 4);
+ lpFree(lp);
+ }
+
+ TEST("Test lpPrepend") {
+ lp = lpNew(0);
+ lp = lpPrepend(lp, (unsigned char*)"abc", 3);
+ lp = lpPrepend(lp, (unsigned char*)"1024", 4);
+ verifyEntry(lpSeek(lp, 0), (unsigned char*)"1024", 4);
+ verifyEntry(lpSeek(lp, 1), (unsigned char*)"abc", 3);
+ lpFree(lp);
+ }
+
+ TEST("Test lpPrependInteger") {
+ lp = lpNew(0);
+ lp = lpPrependInteger(lp, 127);
+ lp = lpPrependInteger(lp, 4095);
+ lp = lpPrependInteger(lp, 32767);
+ lp = lpPrependInteger(lp, 8388607);
+ lp = lpPrependInteger(lp, 2147483647);
+ lp = lpPrependInteger(lp, 9223372036854775807);
+ verifyEntry(lpSeek(lp, 0), (unsigned char*)"9223372036854775807", 19);
+ verifyEntry(lpSeek(lp, -1), (unsigned char*)"127", 3);
+ lpFree(lp);
+ }
+
+ TEST("Get element at index") {
+ lp = createList();
+ verifyEntry(lpSeek(lp, 0), (unsigned char*)"hello", 5);
+ verifyEntry(lpSeek(lp, 3), (unsigned char*)"1024", 4);
+ verifyEntry(lpSeek(lp, -1), (unsigned char*)"1024", 4);
+ verifyEntry(lpSeek(lp, -4), (unsigned char*)"hello", 5);
+ assert(lpSeek(lp, 4) == NULL);
+ assert(lpSeek(lp, -5) == NULL);
+ lpFree(lp);
+ }
+
+ TEST("Pop list") {
+ lp = createList();
+ lp = pop(lp, 1);
+ lp = pop(lp, 0);
+ lp = pop(lp, 1);
+ lp = pop(lp, 1);
+ lpFree(lp);
+ }
+
+ TEST("Get element at index") {
+ lp = createList();
+ verifyEntry(lpSeek(lp, 0), (unsigned char*)"hello", 5);
+ verifyEntry(lpSeek(lp, 3), (unsigned char*)"1024", 4);
+ verifyEntry(lpSeek(lp, -1), (unsigned char*)"1024", 4);
+ verifyEntry(lpSeek(lp, -4), (unsigned char*)"hello", 5);
+ assert(lpSeek(lp, 4) == NULL);
+ assert(lpSeek(lp, -5) == NULL);
+ lpFree(lp);
+ }
+
+ TEST("Iterate list from 0 to end") {
+ lp = createList();
+ p = lpFirst(lp);
+ i = 0;
+ while (p) {
+ verifyEntry(p, (unsigned char*)mixlist[i], strlen(mixlist[i]));
+ p = lpNext(lp, p);
+ i++;
+ }
+ lpFree(lp);
+ }
+
+ TEST("Iterate list from 1 to end") {
+ lp = createList();
+ i = 1;
+ p = lpSeek(lp, i);
+ while (p) {
+ verifyEntry(p, (unsigned char*)mixlist[i], strlen(mixlist[i]));
+ p = lpNext(lp, p);
+ i++;
+ }
+ lpFree(lp);
+ }
+
+ TEST("Iterate list from 2 to end") {
+ lp = createList();
+ i = 2;
+ p = lpSeek(lp, i);
+ while (p) {
+ verifyEntry(p, (unsigned char*)mixlist[i], strlen(mixlist[i]));
+ p = lpNext(lp, p);
+ i++;
+ }
+ lpFree(lp);
+ }
+
+ TEST("Iterate from back to front") {
+ lp = createList();
+ p = lpLast(lp);
+ i = 3;
+ while (p) {
+ verifyEntry(p, (unsigned char*)mixlist[i], strlen(mixlist[i]));
+ p = lpPrev(lp, p);
+ i--;
+ }
+ lpFree(lp);
+ }
+
+ TEST("Iterate from back to front, deleting all items") {
+ lp = createList();
+ p = lpLast(lp);
+ i = 3;
+ while ((p = lpLast(lp))) {
+ verifyEntry(p, (unsigned char*)mixlist[i], strlen(mixlist[i]));
+ lp = lpDelete(lp, p, &p);
+ assert(p == NULL);
+ i--;
+ }
+ lpFree(lp);
+ }
+
+ TEST("Delete whole listpack when num == -1");
+ {
+ lp = createList();
+ lp = lpDeleteRange(lp, 0, -1);
+ assert(lpLength(lp) == 0);
+ assert(lp[LP_HDR_SIZE] == LP_EOF);
+ assert(lpBytes(lp) == (LP_HDR_SIZE + 1));
+ zfree(lp);
+
+ lp = createList();
+ unsigned char *ptr = lpFirst(lp);
+ lp = lpDeleteRangeWithEntry(lp, &ptr, -1);
+ assert(lpLength(lp) == 0);
+ assert(lp[LP_HDR_SIZE] == LP_EOF);
+ assert(lpBytes(lp) == (LP_HDR_SIZE + 1));
+ zfree(lp);
+ }
+
+ TEST("Delete whole listpack with negative index");
+ {
+ lp = createList();
+ lp = lpDeleteRange(lp, -4, 4);
+ assert(lpLength(lp) == 0);
+ assert(lp[LP_HDR_SIZE] == LP_EOF);
+ assert(lpBytes(lp) == (LP_HDR_SIZE + 1));
+ zfree(lp);
+
+ lp = createList();
+ unsigned char *ptr = lpSeek(lp, -4);
+ lp = lpDeleteRangeWithEntry(lp, &ptr, 4);
+ assert(lpLength(lp) == 0);
+ assert(lp[LP_HDR_SIZE] == LP_EOF);
+ assert(lpBytes(lp) == (LP_HDR_SIZE + 1));
+ zfree(lp);
+ }
+
+ TEST("Delete inclusive range 0,0");
+ {
+ lp = createList();
+ lp = lpDeleteRange(lp, 0, 1);
+ assert(lpLength(lp) == 3);
+ assert(lpSkip(lpLast(lp))[0] == LP_EOF); /* check set LP_EOF correctly */
+ zfree(lp);
+
+ lp = createList();
+ unsigned char *ptr = lpFirst(lp);
+ lp = lpDeleteRangeWithEntry(lp, &ptr, 1);
+ assert(lpLength(lp) == 3);
+ assert(lpSkip(lpLast(lp))[0] == LP_EOF); /* check set LP_EOF correctly */
+ zfree(lp);
+ }
+
+ TEST("Delete inclusive range 0,1");
+ {
+ lp = createList();
+ lp = lpDeleteRange(lp, 0, 2);
+ assert(lpLength(lp) == 2);
+ verifyEntry(lpFirst(lp), (unsigned char*)mixlist[2], strlen(mixlist[2]));
+ zfree(lp);
+
+ lp = createList();
+ unsigned char *ptr = lpFirst(lp);
+ lp = lpDeleteRangeWithEntry(lp, &ptr, 2);
+ assert(lpLength(lp) == 2);
+ verifyEntry(lpFirst(lp), (unsigned char*)mixlist[2], strlen(mixlist[2]));
+ zfree(lp);
+ }
+
+ TEST("Delete inclusive range 1,2");
+ {
+ lp = createList();
+ lp = lpDeleteRange(lp, 1, 2);
+ assert(lpLength(lp) == 2);
+ verifyEntry(lpFirst(lp), (unsigned char*)mixlist[0], strlen(mixlist[0]));
+ zfree(lp);
+
+ lp = createList();
+ unsigned char *ptr = lpSeek(lp, 1);
+ lp = lpDeleteRangeWithEntry(lp, &ptr, 2);
+ assert(lpLength(lp) == 2);
+ verifyEntry(lpFirst(lp), (unsigned char*)mixlist[0], strlen(mixlist[0]));
+ zfree(lp);
+ }
+
+ TEST("Delete with start index out of range");
+ {
+ lp = createList();
+ lp = lpDeleteRange(lp, 5, 1);
+ assert(lpLength(lp) == 4);
+ zfree(lp);
+ }
+
+ TEST("Delete with num overflow");
+ {
+ lp = createList();
+ lp = lpDeleteRange(lp, 1, 5);
+ assert(lpLength(lp) == 1);
+ verifyEntry(lpFirst(lp), (unsigned char*)mixlist[0], strlen(mixlist[0]));
+ zfree(lp);
+
+ lp = createList();
+ unsigned char *ptr = lpSeek(lp, 1);
+ lp = lpDeleteRangeWithEntry(lp, &ptr, 5);
+ assert(lpLength(lp) == 1);
+ verifyEntry(lpFirst(lp), (unsigned char*)mixlist[0], strlen(mixlist[0]));
+ zfree(lp);
+ }
+
+ TEST("Batch delete") {
+ unsigned char *lp = createList(); /* char *mixlist[] = {"hello", "foo", "quux", "1024"} */
+ assert(lpLength(lp) == 4); /* Pre-condition */
+ unsigned char *p0 = lpFirst(lp),
+ *p1 = lpNext(lp, p0),
+ *p2 = lpNext(lp, p1),
+ *p3 = lpNext(lp, p2);
+ unsigned char *ps[] = {p0, p1, p3};
+ lp = lpBatchDelete(lp, ps, 3);
+ assert(lpLength(lp) == 1);
+ verifyEntry(lpFirst(lp), (unsigned char*)mixlist[2], strlen(mixlist[2]));
+ assert(lpValidateIntegrity(lp, lpBytes(lp), 1, NULL, NULL) == 1);
+ lpFree(lp);
+ }
+
+ TEST("Delete foo while iterating") {
+ lp = createList();
+ p = lpFirst(lp);
+ while (p) {
+ if (lpCompare(p, (unsigned char*)"foo", 3)) {
+ lp = lpDelete(lp, p, &p);
+ } else {
+ p = lpNext(lp, p);
+ }
+ }
+ lpFree(lp);
+ }
+
+ TEST("Replace with same size") {
+ lp = createList(); /* "hello", "foo", "quux", "1024" */
+ unsigned char *orig_lp = lp;
+ p = lpSeek(lp, 0);
+ lp = lpReplace(lp, &p, (unsigned char*)"zoink", 5);
+ p = lpSeek(lp, 3);
+ lp = lpReplace(lp, &p, (unsigned char*)"y", 1);
+ p = lpSeek(lp, 1);
+ lp = lpReplace(lp, &p, (unsigned char*)"65536", 5);
+ p = lpSeek(lp, 0);
+ assert(!memcmp((char*)p,
+ "\x85zoink\x06"
+ "\xf2\x00\x00\x01\x04" /* 65536 as int24 */
+ "\x84quux\05" "\x81y\x02" "\xff",
+ 22));
+ assert(lp == orig_lp); /* no reallocations have happened */
+ lpFree(lp);
+ }
+
+ TEST("Replace with different size") {
+ lp = createList(); /* "hello", "foo", "quux", "1024" */
+ p = lpSeek(lp, 1);
+ lp = lpReplace(lp, &p, (unsigned char*)"squirrel", 8);
+ p = lpSeek(lp, 0);
+ assert(!strncmp((char*)p,
+ "\x85hello\x06" "\x88squirrel\x09" "\x84quux\x05"
+ "\xc4\x00\x02" "\xff",
+ 27));
+ lpFree(lp);
+ }
+
+ TEST("Regression test for >255 byte strings") {
+ char v1[257] = {0}, v2[257] = {0};
+ memset(v1,'x',256);
+ memset(v2,'y',256);
+ lp = lpNew(0);
+ lp = lpAppend(lp, (unsigned char*)v1 ,strlen(v1));
+ lp = lpAppend(lp, (unsigned char*)v2 ,strlen(v2));
+
+ /* Pop values again and compare their value. */
+ p = lpFirst(lp);
+ vstr = lpGet(p, &vlen, NULL);
+ assert(strncmp(v1, (char*)vstr, vlen) == 0);
+ p = lpSeek(lp, 1);
+ vstr = lpGet(p, &vlen, NULL);
+ assert(strncmp(v2, (char*)vstr, vlen) == 0);
+ lpFree(lp);
+ }
+
+ TEST("Create long list and check indices") {
+ lp = lpNew(0);
+ char buf[32];
+ int i,len;
+ for (i = 0; i < 1000; i++) {
+ len = snprintf(buf, sizeof(buf), "%d", i);
+ lp = lpAppend(lp, (unsigned char*)buf, len);
+ }
+ for (i = 0; i < 1000; i++) {
+ p = lpSeek(lp, i);
+ vstr = lpGet(p, &vlen, NULL);
+ assert(i == vlen);
+
+ p = lpSeek(lp, -i-1);
+ vstr = lpGet(p, &vlen, NULL);
+ assert(999-i == vlen);
+ }
+ lpFree(lp);
+ }
+
+ TEST("Compare strings with listpack entries") {
+ lp = createList();
+ p = lpSeek(lp,0);
+ assert(lpCompare(p,(unsigned char*)"hello",5));
+ assert(!lpCompare(p,(unsigned char*)"hella",5));
+
+ p = lpSeek(lp,3);
+ assert(lpCompare(p,(unsigned char*)"1024",4));
+ assert(!lpCompare(p,(unsigned char*)"1025",4));
+ lpFree(lp);
+ }
+
+ TEST("lpMerge two empty listpacks") {
+ unsigned char *lp1 = lpNew(0);
+ unsigned char *lp2 = lpNew(0);
+
+ /* Merge two empty listpacks, get empty result back. */
+ lp1 = lpMerge(&lp1, &lp2);
+ assert(lpLength(lp1) == 0);
+ zfree(lp1);
+ }
+
+ TEST("lpMerge two listpacks - first larger than second") {
+ unsigned char *lp1 = createIntList();
+ unsigned char *lp2 = createList();
+
+ size_t lp1_bytes = lpBytes(lp1);
+ size_t lp2_bytes = lpBytes(lp2);
+ unsigned long lp1_len = lpLength(lp1);
+ unsigned long lp2_len = lpLength(lp2);
+
+ unsigned char *lp3 = lpMerge(&lp1, &lp2);
+ assert(lp3 == lp1);
+ assert(lp2 == NULL);
+ assert(lpLength(lp3) == (lp1_len + lp2_len));
+ assert(lpBytes(lp3) == (lp1_bytes + lp2_bytes - LP_HDR_SIZE - 1));
+ verifyEntry(lpSeek(lp3, 0), (unsigned char*)"4294967296", 10);
+ verifyEntry(lpSeek(lp3, 5), (unsigned char*)"much much longer non integer", 28);
+ verifyEntry(lpSeek(lp3, 6), (unsigned char*)"hello", 5);
+ verifyEntry(lpSeek(lp3, -1), (unsigned char*)"1024", 4);
+ zfree(lp3);
+ }
+
+ TEST("lpMerge two listpacks - second larger than first") {
+ unsigned char *lp1 = createList();
+ unsigned char *lp2 = createIntList();
+
+ size_t lp1_bytes = lpBytes(lp1);
+ size_t lp2_bytes = lpBytes(lp2);
+ unsigned long lp1_len = lpLength(lp1);
+ unsigned long lp2_len = lpLength(lp2);
+
+ unsigned char *lp3 = lpMerge(&lp1, &lp2);
+ assert(lp3 == lp2);
+ assert(lp1 == NULL);
+ assert(lpLength(lp3) == (lp1_len + lp2_len));
+ assert(lpBytes(lp3) == (lp1_bytes + lp2_bytes - LP_HDR_SIZE - 1));
+ verifyEntry(lpSeek(lp3, 0), (unsigned char*)"hello", 5);
+ verifyEntry(lpSeek(lp3, 3), (unsigned char*)"1024", 4);
+ verifyEntry(lpSeek(lp3, 4), (unsigned char*)"4294967296", 10);
+ verifyEntry(lpSeek(lp3, -1), (unsigned char*)"much much longer non integer", 28);
+ zfree(lp3);
+ }
+
+ TEST("lpNextRandom normal usage") {
+ /* Create some data */
+ unsigned char *lp = lpNew(0);
+ unsigned char buf[100] = "asdf";
+ unsigned int size = 100;
+ for (size_t i = 0; i < size; i++) {
+ lp = lpAppend(lp, buf, i);
+ }
+ assert(lpLength(lp) == size);
+
+ /* Pick a subset of the elements of every possible subset size */
+ for (unsigned int count = 0; count <= size; count++) {
+ unsigned int remaining = count;
+ unsigned char *p = lpFirst(lp);
+ unsigned char *prev = NULL;
+ unsigned index = 0;
+ while (remaining > 0) {
+ assert(p != NULL);
+ p = lpNextRandom(lp, p, &index, remaining--, 0);
+ assert(p != NULL);
+ assert(p != prev);
+ prev = p;
+ p = lpNext(lp, p);
+ index++;
+ }
+ }
+ lpFree(lp);
+ }
+
+ TEST("lpNextRandom corner cases") {
+ unsigned char *lp = lpNew(0);
+ unsigned i = 0;
+
+ /* Pick from empty listpack returns NULL. */
+ assert(lpNextRandom(lp, NULL, &i, 2, 0) == NULL);
+
+ /* Add some elements and find their pointers within the listpack. */
+ lp = lpAppend(lp, (unsigned char *)"abc", 3);
+ lp = lpAppend(lp, (unsigned char *)"def", 3);
+ lp = lpAppend(lp, (unsigned char *)"ghi", 3);
+ assert(lpLength(lp) == 3);
+ unsigned char *p0 = lpFirst(lp);
+ unsigned char *p1 = lpNext(lp, p0);
+ unsigned char *p2 = lpNext(lp, p1);
+ assert(lpNext(lp, p2) == NULL);
+
+ /* Pick zero elements returns NULL. */
+ i = 0; assert(lpNextRandom(lp, lpFirst(lp), &i, 0, 0) == NULL);
+
+ /* Pick all returns all. */
+ i = 0; assert(lpNextRandom(lp, p0, &i, 3, 0) == p0 && i == 0);
+ i = 1; assert(lpNextRandom(lp, p1, &i, 2, 0) == p1 && i == 1);
+ i = 2; assert(lpNextRandom(lp, p2, &i, 1, 0) == p2 && i == 2);
+
+ /* Pick more than one when there's only one left returns the last one. */
+ i = 2; assert(lpNextRandom(lp, p2, &i, 42, 0) == p2 && i == 2);
+
+ /* Pick all even elements returns p0 and p2. */
+ i = 0; assert(lpNextRandom(lp, p0, &i, 10, 1) == p0 && i == 0);
+ i = 1; assert(lpNextRandom(lp, p1, &i, 10, 1) == p2 && i == 2);
+
+ /* Don't crash even for bad index. */
+ for (int j = 0; j < 100; j++) {
+ unsigned char *p;
+ switch (j % 4) {
+ case 0: p = p0; break;
+ case 1: p = p1; break;
+ case 2: p = p2; break;
+ case 3: p = NULL; break;
+ }
+ i = j % 7;
+ unsigned int remaining = j % 5;
+ p = lpNextRandom(lp, p, &i, remaining, 0);
+ assert(p == p0 || p == p1 || p == p2 || p == NULL);
+ }
+ lpFree(lp);
+ }
+
+ TEST("Random pair with one element") {
+ listpackEntry key, val;
+ unsigned char *lp = lpNew(0);
+ lp = lpAppend(lp, (unsigned char*)"abc", 3);
+ lp = lpAppend(lp, (unsigned char*)"123", 3);
+ lpRandomPair(lp, 1, &key, &val);
+ assert(memcmp(key.sval, "abc", key.slen) == 0);
+ assert(val.lval == 123);
+ lpFree(lp);
+ }
+
+ TEST("Random pair with many elements") {
+ listpackEntry key, val;
+ unsigned char *lp = lpNew(0);
+ lp = lpAppend(lp, (unsigned char*)"abc", 3);
+ lp = lpAppend(lp, (unsigned char*)"123", 3);
+ lp = lpAppend(lp, (unsigned char*)"456", 3);
+ lp = lpAppend(lp, (unsigned char*)"def", 3);
+ lpRandomPair(lp, 2, &key, &val);
+ if (key.sval) {
+ assert(!memcmp(key.sval, "abc", key.slen));
+ assert(key.slen == 3);
+ assert(val.lval == 123);
+ }
+ if (!key.sval) {
+ assert(key.lval == 456);
+ assert(!memcmp(val.sval, "def", val.slen));
+ }
+ lpFree(lp);
+ }
+
+ TEST("Random pairs with one element") {
+ int count = 5;
+ unsigned char *lp = lpNew(0);
+ listpackEntry *keys = zmalloc(sizeof(listpackEntry) * count);
+ listpackEntry *vals = zmalloc(sizeof(listpackEntry) * count);
+
+ lp = lpAppend(lp, (unsigned char*)"abc", 3);
+ lp = lpAppend(lp, (unsigned char*)"123", 3);
+ lpRandomPairs(lp, count, keys, vals);
+ assert(memcmp(keys[4].sval, "abc", keys[4].slen) == 0);
+ assert(vals[4].lval == 123);
+ zfree(keys);
+ zfree(vals);
+ lpFree(lp);
+ }
+
+ TEST("Random pairs with many elements") {
+ int count = 5;
+ lp = lpNew(0);
+ listpackEntry *keys = zmalloc(sizeof(listpackEntry) * count);
+ listpackEntry *vals = zmalloc(sizeof(listpackEntry) * count);
+
+ lp = lpAppend(lp, (unsigned char*)"abc", 3);
+ lp = lpAppend(lp, (unsigned char*)"123", 3);
+ lp = lpAppend(lp, (unsigned char*)"456", 3);
+ lp = lpAppend(lp, (unsigned char*)"def", 3);
+ lpRandomPairs(lp, count, keys, vals);
+ for (int i = 0; i < count; i++) {
+ if (keys[i].sval) {
+ assert(!memcmp(keys[i].sval, "abc", keys[i].slen));
+ assert(keys[i].slen == 3);
+ assert(vals[i].lval == 123);
+ }
+ if (!keys[i].sval) {
+ assert(keys[i].lval == 456);
+ assert(!memcmp(vals[i].sval, "def", vals[i].slen));
+ }
+ }
+ zfree(keys);
+ zfree(vals);
+ lpFree(lp);
+ }
+
+ TEST("Random pairs unique with one element") {
+ unsigned picked;
+ int count = 5;
+ lp = lpNew(0);
+ listpackEntry *keys = zmalloc(sizeof(listpackEntry) * count);
+ listpackEntry *vals = zmalloc(sizeof(listpackEntry) * count);
+
+ lp = lpAppend(lp, (unsigned char*)"abc", 3);
+ lp = lpAppend(lp, (unsigned char*)"123", 3);
+ picked = lpRandomPairsUnique(lp, count, keys, vals);
+ assert(picked == 1);
+ assert(memcmp(keys[0].sval, "abc", keys[0].slen) == 0);
+ assert(vals[0].lval == 123);
+ zfree(keys);
+ zfree(vals);
+ lpFree(lp);
+ }
+
+ TEST("Random pairs unique with many elements") {
+ unsigned picked;
+ int count = 5;
+ lp = lpNew(0);
+ listpackEntry *keys = zmalloc(sizeof(listpackEntry) * count);
+ listpackEntry *vals = zmalloc(sizeof(listpackEntry) * count);
+
+ lp = lpAppend(lp, (unsigned char*)"abc", 3);
+ lp = lpAppend(lp, (unsigned char*)"123", 3);
+ lp = lpAppend(lp, (unsigned char*)"456", 3);
+ lp = lpAppend(lp, (unsigned char*)"def", 3);
+ picked = lpRandomPairsUnique(lp, count, keys, vals);
+ assert(picked == 2);
+ for (int i = 0; i < 2; i++) {
+ if (keys[i].sval) {
+ assert(!memcmp(keys[i].sval, "abc", keys[i].slen));
+ assert(keys[i].slen == 3);
+ assert(vals[i].lval == 123);
+ }
+ if (!keys[i].sval) {
+ assert(keys[i].lval == 456);
+ assert(!memcmp(vals[i].sval, "def", vals[i].slen));
+ }
+ }
+ zfree(keys);
+ zfree(vals);
+ lpFree(lp);
+ }
+
+ TEST("push various encodings") {
+ lp = lpNew(0);
+
+ /* Push integer encode element using lpAppend */
+ lp = lpAppend(lp, (unsigned char*)"127", 3);
+ assert(LP_ENCODING_IS_7BIT_UINT(lpLast(lp)[0]));
+ lp = lpAppend(lp, (unsigned char*)"4095", 4);
+ assert(LP_ENCODING_IS_13BIT_INT(lpLast(lp)[0]));
+ lp = lpAppend(lp, (unsigned char*)"32767", 5);
+ assert(LP_ENCODING_IS_16BIT_INT(lpLast(lp)[0]));
+ lp = lpAppend(lp, (unsigned char*)"8388607", 7);
+ assert(LP_ENCODING_IS_24BIT_INT(lpLast(lp)[0]));
+ lp = lpAppend(lp, (unsigned char*)"2147483647", 10);
+ assert(LP_ENCODING_IS_32BIT_INT(lpLast(lp)[0]));
+ lp = lpAppend(lp, (unsigned char*)"9223372036854775807", 19);
+ assert(LP_ENCODING_IS_64BIT_INT(lpLast(lp)[0]));
+
+ /* Push integer encode element using lpAppendInteger */
+ lp = lpAppendInteger(lp, 127);
+ assert(LP_ENCODING_IS_7BIT_UINT(lpLast(lp)[0]));
+ verifyEntry(lpLast(lp), (unsigned char*)"127", 3);
+ lp = lpAppendInteger(lp, 4095);
+ verifyEntry(lpLast(lp), (unsigned char*)"4095", 4);
+ assert(LP_ENCODING_IS_13BIT_INT(lpLast(lp)[0]));
+ lp = lpAppendInteger(lp, 32767);
+ verifyEntry(lpLast(lp), (unsigned char*)"32767", 5);
+ assert(LP_ENCODING_IS_16BIT_INT(lpLast(lp)[0]));
+ lp = lpAppendInteger(lp, 8388607);
+ verifyEntry(lpLast(lp), (unsigned char*)"8388607", 7);
+ assert(LP_ENCODING_IS_24BIT_INT(lpLast(lp)[0]));
+ lp = lpAppendInteger(lp, 2147483647);
+ verifyEntry(lpLast(lp), (unsigned char*)"2147483647", 10);
+ assert(LP_ENCODING_IS_32BIT_INT(lpLast(lp)[0]));
+ lp = lpAppendInteger(lp, 9223372036854775807);
+ verifyEntry(lpLast(lp), (unsigned char*)"9223372036854775807", 19);
+ assert(LP_ENCODING_IS_64BIT_INT(lpLast(lp)[0]));
+
+ /* string encode */
+ unsigned char *str = zmalloc(65535);
+ memset(str, 0, 65535);
+ lp = lpAppend(lp, (unsigned char*)str, 63);
+ assert(LP_ENCODING_IS_6BIT_STR(lpLast(lp)[0]));
+ lp = lpAppend(lp, (unsigned char*)str, 4095);
+ assert(LP_ENCODING_IS_12BIT_STR(lpLast(lp)[0]));
+ lp = lpAppend(lp, (unsigned char*)str, 65535);
+ assert(LP_ENCODING_IS_32BIT_STR(lpLast(lp)[0]));
+ zfree(str);
+ lpFree(lp);
+ }
+
+ TEST("Test lpFind") {
+ lp = createList();
+ assert(lpFind(lp, lpFirst(lp), (unsigned char*)"abc", 3, 0) == NULL);
+ verifyEntry(lpFind(lp, lpFirst(lp), (unsigned char*)"hello", 5, 0), (unsigned char*)"hello", 5);
+ verifyEntry(lpFind(lp, lpFirst(lp), (unsigned char*)"1024", 4, 0), (unsigned char*)"1024", 4);
+ lpFree(lp);
+ }
+
+ TEST("Test lpValidateIntegrity") {
+ lp = createList();
+ long count = 0;
+ assert(lpValidateIntegrity(lp, lpBytes(lp), 1, lpValidation, &count) == 1);
+ lpFree(lp);
+ }
+
+ TEST("Test number of elements exceeds LP_HDR_NUMELE_UNKNOWN") {
+ lp = lpNew(0);
+ for (int i = 0; i < LP_HDR_NUMELE_UNKNOWN + 1; i++)
+ lp = lpAppend(lp, (unsigned char*)"1", 1);
+
+ assert(lpGetNumElements(lp) == LP_HDR_NUMELE_UNKNOWN);
+ assert(lpLength(lp) == LP_HDR_NUMELE_UNKNOWN+1);
+
+ lp = lpDeleteRange(lp, -2, 2);
+ assert(lpGetNumElements(lp) == LP_HDR_NUMELE_UNKNOWN);
+ assert(lpLength(lp) == LP_HDR_NUMELE_UNKNOWN-1);
+ assert(lpGetNumElements(lp) == LP_HDR_NUMELE_UNKNOWN-1); /* update length after lpLength */
+ lpFree(lp);
+ }
+
+ TEST("Stress with random payloads of different encoding") {
+ unsigned long long start = usec();
+ int i,j,len,where;
+ unsigned char *p;
+ char buf[1024];
+ int buflen;
+ list *ref;
+ listNode *refnode;
+
+ int iteration = accurate ? 20000 : 20;
+ for (i = 0; i < iteration; i++) {
+ lp = lpNew(0);
+ ref = listCreate();
+ listSetFreeMethod(ref,(void (*)(void*))sdsfree);
+ len = rand() % 256;
+
+ /* Create lists */
+ for (j = 0; j < len; j++) {
+ where = (rand() & 1) ? 0 : 1;
+ if (rand() % 2) {
+ buflen = randstring(buf,1,sizeof(buf)-1);
+ } else {
+ switch(rand() % 3) {
+ case 0:
+ buflen = snprintf(buf,sizeof(buf),"%lld",(0LL + rand()) >> 20);
+ break;
+ case 1:
+ buflen = snprintf(buf,sizeof(buf),"%lld",(0LL + rand()));
+ break;
+ case 2:
+ buflen = snprintf(buf,sizeof(buf),"%lld",(0LL + rand()) << 20);
+ break;
+ default:
+ assert(NULL);
+ }
+ }
+
+ /* Add to listpack */
+ if (where == 0) {
+ lp = lpPrepend(lp, (unsigned char*)buf, buflen);
+ } else {
+ lp = lpAppend(lp, (unsigned char*)buf, buflen);
+ }
+
+ /* Add to reference list */
+ if (where == 0) {
+ listAddNodeHead(ref,sdsnewlen(buf, buflen));
+ } else if (where == 1) {
+ listAddNodeTail(ref,sdsnewlen(buf, buflen));
+ } else {
+ assert(NULL);
+ }
+ }
+
+ assert(listLength(ref) == lpLength(lp));
+ for (j = 0; j < len; j++) {
+ /* Naive way to get elements, but similar to the stresser
+ * executed from the Tcl test suite. */
+ p = lpSeek(lp,j);
+ refnode = listIndex(ref,j);
+
+ vstr = lpGet(p, &vlen, intbuf);
+ assert(memcmp(vstr,listNodeValue(refnode),vlen) == 0);
+ }
+ lpFree(lp);
+ listRelease(ref);
+ }
+ printf("Done. usec=%lld\n\n", usec()-start);
+ }
+
+ TEST("Stress with variable listpack size") {
+ unsigned long long start = usec();
+ int maxsize = accurate ? 16384 : 16;
+ stress(0,100000,maxsize,256);
+ stress(1,100000,maxsize,256);
+ printf("Done. usec=%lld\n\n", usec()-start);
+ }
+
+ /* Benchmarks */
+ {
+ int iteration = accurate ? 100000 : 100;
+ lp = lpNew(0);
+ TEST("Benchmark lpAppend") {
+ unsigned long long start = usec();
+ for (int i=0; i<iteration; i++) {
+ char buf[4096] = "asdf";
+ lp = lpAppend(lp, (unsigned char*)buf, 4);
+ lp = lpAppend(lp, (unsigned char*)buf, 40);
+ lp = lpAppend(lp, (unsigned char*)buf, 400);
+ lp = lpAppend(lp, (unsigned char*)buf, 4000);
+ lp = lpAppend(lp, (unsigned char*)"1", 1);
+ lp = lpAppend(lp, (unsigned char*)"10", 2);
+ lp = lpAppend(lp, (unsigned char*)"100", 3);
+ lp = lpAppend(lp, (unsigned char*)"1000", 4);
+ lp = lpAppend(lp, (unsigned char*)"10000", 5);
+ lp = lpAppend(lp, (unsigned char*)"100000", 6);
+ }
+ printf("Done. usec=%lld\n", usec()-start);
+ }
+
+ TEST("Benchmark lpFind string") {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ unsigned char *fptr = lpFirst(lp);
+ fptr = lpFind(lp, fptr, (unsigned char*)"nothing", 7, 1);
+ }
+ printf("Done. usec=%lld\n", usec()-start);
+ }
+
+ TEST("Benchmark lpFind number") {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ unsigned char *fptr = lpFirst(lp);
+ fptr = lpFind(lp, fptr, (unsigned char*)"99999", 5, 1);
+ }
+ printf("Done. usec=%lld\n", usec()-start);
+ }
+
+ TEST("Benchmark lpSeek") {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ lpSeek(lp, 99999);
+ }
+ printf("Done. usec=%lld\n", usec()-start);
+ }
+
+ TEST("Benchmark lpValidateIntegrity") {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ lpValidateIntegrity(lp, lpBytes(lp), 1, NULL, NULL);
+ }
+ printf("Done. usec=%lld\n", usec()-start);
+ }
+
+ TEST("Benchmark lpCompare with string") {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ unsigned char *eptr = lpSeek(lp,0);
+ while (eptr != NULL) {
+ lpCompare(eptr,(unsigned char*)"nothing",7);
+ eptr = lpNext(lp,eptr);
+ }
+ }
+ printf("Done. usec=%lld\n", usec()-start);
+ }
+
+ TEST("Benchmark lpCompare with number") {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ unsigned char *eptr = lpSeek(lp,0);
+ while (eptr != NULL) {
+ lpCompare(lp, (unsigned char*)"99999", 5);
+ eptr = lpNext(lp,eptr);
+ }
+ }
+ printf("Done. usec=%lld\n", usec()-start);
+ }
+
+ lpFree(lp);
+ }
+
+ return 0;
+}
+
+#endif
diff --git a/src/listpack.h b/src/listpack.h
new file mode 100644
index 0000000..a60f089
--- /dev/null
+++ b/src/listpack.h
@@ -0,0 +1,106 @@
+/* Listpack -- A lists of strings serialization format
+ *
+ * This file implements the specification you can find at:
+ *
+ * https://github.com/antirez/listpack
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __LISTPACK_H
+#define __LISTPACK_H
+
+#include <stdlib.h>
+#include <stdint.h>
+
+#define LP_INTBUF_SIZE 21 /* 20 digits of -2^63 + 1 null term = 21. */
+
+/* lpInsert() where argument possible values: */
+#define LP_BEFORE 0
+#define LP_AFTER 1
+#define LP_REPLACE 2
+
+/* Each entry in the listpack is either a string or an integer. */
+typedef struct {
+ /* When string is used, it is provided with the length (slen). */
+ unsigned char *sval;
+ uint32_t slen;
+ /* When integer is used, 'sval' is NULL, and lval holds the value. */
+ long long lval;
+} listpackEntry;
+
+unsigned char *lpNew(size_t capacity);
+void lpFree(unsigned char *lp);
+unsigned char* lpShrinkToFit(unsigned char *lp);
+unsigned char *lpInsertString(unsigned char *lp, unsigned char *s, uint32_t slen,
+ unsigned char *p, int where, unsigned char **newp);
+unsigned char *lpInsertInteger(unsigned char *lp, long long lval,
+ unsigned char *p, int where, unsigned char **newp);
+unsigned char *lpPrepend(unsigned char *lp, unsigned char *s, uint32_t slen);
+unsigned char *lpPrependInteger(unsigned char *lp, long long lval);
+unsigned char *lpAppend(unsigned char *lp, unsigned char *s, uint32_t slen);
+unsigned char *lpAppendInteger(unsigned char *lp, long long lval);
+unsigned char *lpReplace(unsigned char *lp, unsigned char **p, unsigned char *s, uint32_t slen);
+unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long lval);
+unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp);
+unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num);
+unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num);
+unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count);
+unsigned char *lpMerge(unsigned char **first, unsigned char **second);
+unsigned char *lpDup(unsigned char *lp);
+unsigned long lpLength(unsigned char *lp);
+unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);
+unsigned char *lpGetValue(unsigned char *p, unsigned int *slen, long long *lval);
+unsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s, uint32_t slen, unsigned int skip);
+unsigned char *lpFirst(unsigned char *lp);
+unsigned char *lpLast(unsigned char *lp);
+unsigned char *lpNext(unsigned char *lp, unsigned char *p);
+unsigned char *lpPrev(unsigned char *lp, unsigned char *p);
+size_t lpBytes(unsigned char *lp);
+size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep);
+unsigned char *lpSeek(unsigned char *lp, long index);
+typedef int (*listpackValidateEntryCB)(unsigned char *p, unsigned int head_count, void *userdata);
+int lpValidateIntegrity(unsigned char *lp, size_t size, int deep,
+ listpackValidateEntryCB entry_cb, void *cb_userdata);
+unsigned char *lpValidateFirst(unsigned char *lp);
+int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes);
+unsigned int lpCompare(unsigned char *p, unsigned char *s, uint32_t slen);
+void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val);
+void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
+unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
+void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries);
+unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
+ unsigned int remaining, int even_only);
+int lpSafeToAdd(unsigned char* lp, size_t add);
+void lpRepr(unsigned char *lp);
+
+#ifdef REDIS_TEST
+int listpackTest(int argc, char *argv[], int flags);
+#endif
+
+#endif
diff --git a/src/listpack_malloc.h b/src/listpack_malloc.h
new file mode 100644
index 0000000..a8a81c3
--- /dev/null
+++ b/src/listpack_malloc.h
@@ -0,0 +1,49 @@
+/* Listpack -- A lists of strings serialization format
+ * https://github.com/antirez/listpack
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Allocator selection.
+ *
+ * This file is used in order to change the Rax allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#ifndef LISTPACK_ALLOC_H
+#define LISTPACK_ALLOC_H
+#include "zmalloc.h"
+/* We use zmalloc_usable/zrealloc_usable instead of zmalloc/zrealloc
+ * to ensure the safe invocation of 'zmalloc_usable_size().
+ * See comment in zmalloc_usable_size(). */
+#define lp_malloc(sz) zmalloc_usable(sz,NULL)
+#define lp_realloc(ptr,sz) zrealloc_usable(ptr,sz,NULL)
+#define lp_free zfree
+#define lp_malloc_size zmalloc_usable_size
+#endif
diff --git a/src/localtime.c b/src/localtime.c
new file mode 100644
index 0000000..1cefdfa
--- /dev/null
+++ b/src/localtime.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <time.h>
+
+/* This is a safe version of localtime() which contains no locks and is
+ * fork() friendly. Even the _r version of localtime() cannot be used safely
+ * in Redis. Another thread may be calling localtime() while the main thread
+ * forks(). Later when the child process calls localtime() again, for instance
+ * in order to log something to the Redis log, it may deadlock: in the copy
+ * of the address space of the forked process the lock will never be released.
+ *
+ * This function takes the timezone 'tz' as argument, and the 'dst' flag is
+ * used to check if daylight saving time is currently in effect. The caller
+ * of this function should obtain such information calling tzset() ASAP in the
+ * main() function to obtain the timezone offset from the 'timezone' global
+ * variable. To obtain the daylight information, if it is currently active or not,
+ * one trick is to call localtime() in main() ASAP as well, and get the
+ * information from the tm_isdst field of the tm structure. However the daylight
+ * time may switch in the future for long running processes, so this information
+ * should be refreshed at safe times.
+ *
+ * Note that this function does not work for dates < 1/1/1970, it is solely
+ * designed to work with what time(NULL) may return, and to support Redis
+ * logging of the dates, it's not really a complete implementation. */
+static int is_leap_year(time_t year) {
+ if (year % 4) return 0; /* A year not divisible by 4 is not leap. */
+ else if (year % 100) return 1; /* If div by 4 and not 100 is surely leap. */
+ else if (year % 400) return 0; /* If div by 100 *and* not by 400 is not leap. */
+ else return 1; /* If div by 100 and 400 is leap. */
+}
+
+void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst) {
+ const time_t secs_min = 60;
+ const time_t secs_hour = 3600;
+ const time_t secs_day = 3600*24;
+
+ t -= tz; /* Adjust for timezone. */
+ t += 3600*dst; /* Adjust for daylight time. */
+ time_t days = t / secs_day; /* Days passed since epoch. */
+ time_t seconds = t % secs_day; /* Remaining seconds. */
+
+ tmp->tm_isdst = dst;
+ tmp->tm_hour = seconds / secs_hour;
+ tmp->tm_min = (seconds % secs_hour) / secs_min;
+ tmp->tm_sec = (seconds % secs_hour) % secs_min;
+
+ /* 1/1/1970 was a Thursday, that is, day 4 from the POV of the tm structure
+ * where sunday = 0, so to calculate the day of the week we have to add 4
+ * and take the modulo by 7. */
+ tmp->tm_wday = (days+4)%7;
+
+ /* Calculate the current year. */
+ tmp->tm_year = 1970;
+ while(1) {
+ /* Leap years have one day more. */
+ time_t days_this_year = 365 + is_leap_year(tmp->tm_year);
+ if (days_this_year > days) break;
+ days -= days_this_year;
+ tmp->tm_year++;
+ }
+ tmp->tm_yday = days; /* Number of day of the current year. */
+
+ /* We need to calculate in which month and day of the month we are. To do
+ * so we need to skip days according to how many days there are in each
+ * month, and adjust for the leap year that has one more day in February. */
+ int mdays[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+ mdays[1] += is_leap_year(tmp->tm_year);
+
+ tmp->tm_mon = 0;
+ while(days >= mdays[tmp->tm_mon]) {
+ days -= mdays[tmp->tm_mon];
+ tmp->tm_mon++;
+ }
+
+ tmp->tm_mday = days+1; /* Add 1 since our 'days' is zero-based. */
+ tmp->tm_year -= 1900; /* Surprisingly tm_year is year-1900. */
+}
+
+#ifdef LOCALTIME_TEST_MAIN
+#include <stdio.h>
+
+int main(void) {
+ /* Obtain timezone and daylight info. */
+ tzset(); /* Now 'timezone' global is populated. */
+ time_t t = time(NULL);
+ struct tm *aux = localtime(&t);
+ int daylight_active = aux->tm_isdst;
+
+ struct tm tm;
+ char buf[1024];
+
+ nolocks_localtime(&tm,t,timezone,daylight_active);
+ strftime(buf,sizeof(buf),"%d %b %H:%M:%S",&tm);
+ printf("[timezone: %d, dl: %d] %s\n", (int)timezone, (int)daylight_active, buf);
+}
+#endif
diff --git a/src/logreqres.c b/src/logreqres.c
new file mode 100644
index 0000000..6e7621d
--- /dev/null
+++ b/src/logreqres.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2021, Redis Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This file implements the interface of logging clients' requests and
+ * responses into a file.
+ * This feature needs the LOG_REQ_RES macro to be compiled and is turned
+ * on by the req-res-logfile config."
+ *
+ * Some examples:
+ *
+ * PING:
+ *
+ * 4
+ * ping
+ * 12
+ * __argv_end__
+ * +PONG
+ *
+ * LRANGE:
+ *
+ * 6
+ * lrange
+ * 4
+ * list
+ * 1
+ * 0
+ * 2
+ * -1
+ * 12
+ * __argv_end__
+ * *1
+ * $3
+ * ele
+ *
+ * The request is everything up until the __argv_end__ marker.
+ * The format is:
+ * <number of characters>
+ * <the argument>
+ *
+ * After __argv_end__ the response appears, and the format is
+ * RESP (2 or 3, depending on what the client has configured)
+ */
+
+#include "server.h"
+#include <ctype.h>
+
+#ifdef LOG_REQ_RES
+
+/* ----- Helpers ----- */
+
+static int reqresShouldLog(client *c) {
+ if (!server.req_res_logfile)
+ return 0;
+
+ /* Ignore client with streaming non-standard response */
+ if (c->flags & (CLIENT_PUBSUB|CLIENT_MONITOR|CLIENT_SLAVE))
+ return 0;
+
+ /* We only work on masters (didn't implement reqresAppendResponse to work on shared slave buffers) */
+ if (getClientType(c) == CLIENT_TYPE_MASTER)
+ return 0;
+
+ return 1;
+}
+
+static size_t reqresAppendBuffer(client *c, void *buf, size_t len) {
+ if (!c->reqres.buf) {
+ c->reqres.capacity = max(len, 1024);
+ c->reqres.buf = zmalloc(c->reqres.capacity);
+ } else if (c->reqres.capacity - c->reqres.used < len) {
+ c->reqres.capacity += len;
+ c->reqres.buf = zrealloc(c->reqres.buf, c->reqres.capacity);
+ }
+
+ memcpy(c->reqres.buf + c->reqres.used, buf, len);
+ c->reqres.used += len;
+ return len;
+}
+
+/* Functions for requests */
+
+static size_t reqresAppendArg(client *c, char *arg, size_t arg_len) {
+ char argv_len_buf[LONG_STR_SIZE];
+ size_t argv_len_buf_len = ll2string(argv_len_buf,sizeof(argv_len_buf),(long)arg_len);
+ size_t ret = reqresAppendBuffer(c, argv_len_buf, argv_len_buf_len);
+ ret += reqresAppendBuffer(c, "\r\n", 2);
+ ret += reqresAppendBuffer(c, arg, arg_len);
+ ret += reqresAppendBuffer(c, "\r\n", 2);
+ return ret;
+}
+
+/* ----- API ----- */
+
+
+/* Zero out the clientReqResInfo struct inside the client,
+ * and free the buffer if needed */
+void reqresReset(client *c, int free_buf) {
+ if (free_buf && c->reqres.buf)
+ zfree(c->reqres.buf);
+ memset(&c->reqres, 0, sizeof(c->reqres));
+}
+
+/* Save the offset of the reply buffer (or the reply list).
+ * Should be called when adding a reply (but it will only save the offset
+ * on the very first time it's called, because of c->reqres.offset.saved)
+ * The idea is:
+ * 1. When a client is executing a command, we save the reply offset.
+ * 2. During the execution, the reply offset may grow, as addReply* functions are called.
+ * 3. When client is done with the command (commandProcessed), reqresAppendResponse
+ * is called.
+ * 4. reqresAppendResponse will append the diff between the current offset and the one from step (1)
+ * 5. When client is reset before the next command, we clear c->reqres.offset.saved and start again
+ *
+ * We cannot reply on c->sentlen to keep track because it depends on the network
+ * (reqresAppendResponse will always write the whole buffer, unlike writeToClient)
+ *
+ * Ideally, we would just have this code inside reqresAppendRequest, which is called
+ * from processCommand, but we cannot save the reply offset inside processCommand
+ * because of the following pipe-lining scenario:
+ * set rd [redis_deferring_client]
+ * set buf ""
+ * append buf "SET key vale\r\n"
+ * append buf "BLPOP mylist 0\r\n"
+ * $rd write $buf
+ * $rd flush
+ *
+ * Let's assume we save the reply offset in processCommand
+ * When BLPOP is processed the offset is 5 (+OK\r\n from the SET)
+ * Then beforeSleep is called, the +OK is written to network, and bufpos is 0
+ * When the client is finally unblocked, the cached offset is 5, but bufpos is already
+ * 0, so we would miss the first 5 bytes of the reply.
+ **/
+void reqresSaveClientReplyOffset(client *c) {
+ if (!reqresShouldLog(c))
+ return;
+
+ if (c->reqres.offset.saved)
+ return;
+
+ c->reqres.offset.saved = 1;
+
+ c->reqres.offset.bufpos = c->bufpos;
+ if (listLength(c->reply) && listNodeValue(listLast(c->reply))) {
+ c->reqres.offset.last_node.index = listLength(c->reply) - 1;
+ c->reqres.offset.last_node.used = ((clientReplyBlock *)listNodeValue(listLast(c->reply)))->used;
+ } else {
+ c->reqres.offset.last_node.index = 0;
+ c->reqres.offset.last_node.used = 0;
+ }
+}
+
+size_t reqresAppendRequest(client *c) {
+ robj **argv = c->argv;
+ int argc = c->argc;
+
+ serverAssert(argc);
+
+ if (!reqresShouldLog(c))
+ return 0;
+
+ /* Ignore commands that have streaming non-standard response */
+ sds cmd = argv[0]->ptr;
+ if (!strcasecmp(cmd,"debug") || /* because of DEBUG SEGFAULT */
+ !strcasecmp(cmd,"sync") ||
+ !strcasecmp(cmd,"psync") ||
+ !strcasecmp(cmd,"monitor") ||
+ !strcasecmp(cmd,"subscribe") ||
+ !strcasecmp(cmd,"unsubscribe") ||
+ !strcasecmp(cmd,"ssubscribe") ||
+ !strcasecmp(cmd,"sunsubscribe") ||
+ !strcasecmp(cmd,"psubscribe") ||
+ !strcasecmp(cmd,"punsubscribe"))
+ {
+ return 0;
+ }
+
+ c->reqres.argv_logged = 1;
+
+ size_t ret = 0;
+ for (int i = 0; i < argc; i++) {
+ if (sdsEncodedObject(argv[i])) {
+ ret += reqresAppendArg(c, argv[i]->ptr, sdslen(argv[i]->ptr));
+ } else if (argv[i]->encoding == OBJ_ENCODING_INT) {
+ char buf[LONG_STR_SIZE];
+ size_t len = ll2string(buf,sizeof(buf),(long)argv[i]->ptr);
+ ret += reqresAppendArg(c, buf, len);
+ } else {
+ serverPanic("Wrong encoding in reqresAppendRequest()");
+ }
+ }
+ return ret + reqresAppendArg(c, "__argv_end__", 12);
+}
+
+size_t reqresAppendResponse(client *c) {
+ size_t ret = 0;
+
+ if (!reqresShouldLog(c))
+ return 0;
+
+ if (!c->reqres.argv_logged) /* Example: UNSUBSCRIBE */
+ return 0;
+
+ if (!c->reqres.offset.saved) /* Example: module client blocked on keys + CLIENT KILL */
+ return 0;
+
+ /* First append the static reply buffer */
+ if (c->bufpos > c->reqres.offset.bufpos) {
+ size_t written = reqresAppendBuffer(c, c->buf + c->reqres.offset.bufpos, c->bufpos - c->reqres.offset.bufpos);
+ ret += written;
+ }
+
+ int curr_index = 0;
+ size_t curr_used = 0;
+ if (listLength(c->reply)) {
+ curr_index = listLength(c->reply) - 1;
+ curr_used = ((clientReplyBlock *)listNodeValue(listLast(c->reply)))->used;
+ }
+
+ /* Now, append reply bytes from the reply list */
+ if (curr_index > c->reqres.offset.last_node.index ||
+ curr_used > c->reqres.offset.last_node.used)
+ {
+ int i = 0;
+ listIter iter;
+ listNode *curr;
+ clientReplyBlock *o;
+ listRewind(c->reply, &iter);
+ while ((curr = listNext(&iter)) != NULL) {
+ size_t written;
+
+ /* Skip nodes we had already processed */
+ if (i < c->reqres.offset.last_node.index) {
+ i++;
+ continue;
+ }
+ o = listNodeValue(curr);
+ if (o->used == 0) {
+ i++;
+ continue;
+ }
+ if (i == c->reqres.offset.last_node.index) {
+ /* Write the potentially incomplete node, which had data from
+ * before the current command started */
+ written = reqresAppendBuffer(c,
+ o->buf + c->reqres.offset.last_node.used,
+ o->used - c->reqres.offset.last_node.used);
+ } else {
+ /* New node */
+ written = reqresAppendBuffer(c, o->buf, o->used);
+ }
+ ret += written;
+ i++;
+ }
+ }
+ serverAssert(ret);
+
+ /* Flush both request and response to file */
+ FILE *fp = fopen(server.req_res_logfile, "a");
+ serverAssert(fp);
+ fwrite(c->reqres.buf, c->reqres.used, 1, fp);
+ fclose(fp);
+
+ return ret;
+}
+
+#else /* #ifdef LOG_REQ_RES */
+
+/* Just mimic the API without doing anything */
+
+void reqresReset(client *c, int free_buf) {
+ UNUSED(c);
+ UNUSED(free_buf);
+}
+
+inline void reqresSaveClientReplyOffset(client *c) {
+ UNUSED(c);
+}
+
+inline size_t reqresAppendRequest(client *c) {
+ UNUSED(c);
+ return 0;
+}
+
+inline size_t reqresAppendResponse(client *c) {
+ UNUSED(c);
+ return 0;
+}
+
+#endif /* #ifdef LOG_REQ_RES */
diff --git a/src/lolwut.c b/src/lolwut.c
new file mode 100644
index 0000000..c014840
--- /dev/null
+++ b/src/lolwut.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * This file implements the LOLWUT command. The command should do something
+ * fun and interesting, and should be replaced by a new implementation at
+ * each new version of Redis.
+ */
+
+#include "server.h"
+#include "lolwut.h"
+#include <math.h>
+
+void lolwut5Command(client *c);
+void lolwut6Command(client *c);
+
+/* The default target for LOLWUT if no matching version was found.
+ * This is what unstable versions of Redis will display. */
+void lolwutUnstableCommand(client *c) {
+ sds rendered = sdsnew("Redis ver. ");
+ rendered = sdscat(rendered,REDIS_VERSION);
+ rendered = sdscatlen(rendered,"\n",1);
+ addReplyVerbatim(c,rendered,sdslen(rendered),"txt");
+ sdsfree(rendered);
+}
+
+/* LOLWUT [VERSION <version>] [... version specific arguments ...] */
+void lolwutCommand(client *c) {
+ char *v = REDIS_VERSION;
+ char verstr[64];
+
+ if (c->argc >= 3 && !strcasecmp(c->argv[1]->ptr,"version")) {
+ long ver;
+ if (getLongFromObjectOrReply(c,c->argv[2],&ver,NULL) != C_OK) return;
+ snprintf(verstr,sizeof(verstr),"%u.0.0",(unsigned int)ver);
+ v = verstr;
+
+ /* Adjust argv/argc to filter the "VERSION ..." option, since the
+ * specific LOLWUT version implementations don't know about it
+ * and expect their arguments. */
+ c->argv += 2;
+ c->argc -= 2;
+ }
+
+ if ((v[0] == '5' && v[1] == '.' && v[2] != '9') ||
+ (v[0] == '4' && v[1] == '.' && v[2] == '9'))
+ lolwut5Command(c);
+ else if ((v[0] == '6' && v[1] == '.' && v[2] != '9') ||
+ (v[0] == '5' && v[1] == '.' && v[2] == '9'))
+ lolwut6Command(c);
+ else
+ lolwutUnstableCommand(c);
+
+ /* Fix back argc/argv in case of VERSION argument. */
+ if (v == verstr) {
+ c->argv -= 2;
+ c->argc += 2;
+ }
+}
+
+/* ========================== LOLWUT Canvas ===============================
+ * Many LOLWUT versions will likely print some computer art to the screen.
+ * This is the case with LOLWUT 5 and LOLWUT 6, so here there is a generic
+ * canvas implementation that can be reused. */
+
+/* Allocate and return a new canvas of the specified size. */
+lwCanvas *lwCreateCanvas(int width, int height, int bgcolor) {
+ lwCanvas *canvas = zmalloc(sizeof(*canvas));
+ canvas->width = width;
+ canvas->height = height;
+ canvas->pixels = zmalloc((size_t)width*height);
+ memset(canvas->pixels,bgcolor,(size_t)width*height);
+ return canvas;
+}
+
+/* Free the canvas created by lwCreateCanvas(). */
+void lwFreeCanvas(lwCanvas *canvas) {
+ zfree(canvas->pixels);
+ zfree(canvas);
+}
+
+/* Set a pixel to the specified color. Color is 0 or 1, where zero means no
+ * dot will be displayed, and 1 means dot will be displayed.
+ * Coordinates are arranged so that left-top corner is 0,0. You can write
+ * out of the size of the canvas without issues. */
+void lwDrawPixel(lwCanvas *canvas, int x, int y, int color) {
+ if (x < 0 || x >= canvas->width ||
+ y < 0 || y >= canvas->height) return;
+ canvas->pixels[x+y*canvas->width] = color;
+}
+
+/* Return the value of the specified pixel on the canvas. */
+int lwGetPixel(lwCanvas *canvas, int x, int y) {
+ if (x < 0 || x >= canvas->width ||
+ y < 0 || y >= canvas->height) return 0;
+ return canvas->pixels[x+y*canvas->width];
+}
+
+/* Draw a line from x1,y1 to x2,y2 using the Bresenham algorithm. */
+void lwDrawLine(lwCanvas *canvas, int x1, int y1, int x2, int y2, int color) {
+ int dx = abs(x2-x1);
+ int dy = abs(y2-y1);
+ int sx = (x1 < x2) ? 1 : -1;
+ int sy = (y1 < y2) ? 1 : -1;
+ int err = dx-dy, e2;
+
+ while(1) {
+ lwDrawPixel(canvas,x1,y1,color);
+ if (x1 == x2 && y1 == y2) break;
+ e2 = err*2;
+ if (e2 > -dy) {
+ err -= dy;
+ x1 += sx;
+ }
+ if (e2 < dx) {
+ err += dx;
+ y1 += sy;
+ }
+ }
+}
+
+/* Draw a square centered at the specified x,y coordinates, with the specified
+ * rotation angle and size. In order to write a rotated square, we use the
+ * trivial fact that the parametric equation:
+ *
+ * x = sin(k)
+ * y = cos(k)
+ *
+ * Describes a circle for values going from 0 to 2*PI. So basically if we start
+ * at 45 degrees, that is k = PI/4, with the first point, and then we find
+ * the other three points incrementing K by PI/2 (90 degrees), we'll have the
+ * points of the square. In order to rotate the square, we just start with
+ * k = PI/4 + rotation_angle, and we are done.
+ *
+ * Of course the vanilla equations above will describe the square inside a
+ * circle of radius 1, so in order to draw larger squares we'll have to
+ * multiply the obtained coordinates, and then translate them. However this
+ * is much simpler than implementing the abstract concept of 2D shape and then
+ * performing the rotation/translation transformation, so for LOLWUT it's
+ * a good approach. */
+void lwDrawSquare(lwCanvas *canvas, int x, int y, float size, float angle, int color) {
+ int px[4], py[4];
+
+ /* Adjust the desired size according to the fact that the square inscribed
+ * into a circle of radius 1 has the side of length SQRT(2). This way
+ * size becomes a simple multiplication factor we can use with our
+ * coordinates to magnify them. */
+ size /= 1.4142135623;
+ size = round(size);
+
+ /* Compute the four points. */
+ float k = M_PI/4 + angle;
+ for (int j = 0; j < 4; j++) {
+ px[j] = round(sin(k) * size + x);
+ py[j] = round(cos(k) * size + y);
+ k += M_PI/2;
+ }
+
+ /* Draw the square. */
+ for (int j = 0; j < 4; j++)
+ lwDrawLine(canvas,px[j],py[j],px[(j+1)%4],py[(j+1)%4],color);
+}
diff --git a/src/lolwut.h b/src/lolwut.h
new file mode 100644
index 0000000..682d005
--- /dev/null
+++ b/src/lolwut.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018-2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This structure represents our canvas. Drawing functions will take a pointer
+ * to a canvas to write to it. Later the canvas can be rendered to a string
+ * suitable to be printed on the screen, using unicode Braille characters. */
+
+/* This represents a very simple generic canvas in order to draw stuff.
+ * It's up to each LOLWUT versions to translate what they draw to the
+ * screen, depending on the result to accomplish. */
+
+#ifndef __LOLWUT_H
+#define __LOLWUT_H
+
+typedef struct lwCanvas {
+ int width;
+ int height;
+ char *pixels;
+} lwCanvas;
+
+/* Drawing functions implemented inside lolwut.c. */
+lwCanvas *lwCreateCanvas(int width, int height, int bgcolor);
+void lwFreeCanvas(lwCanvas *canvas);
+void lwDrawPixel(lwCanvas *canvas, int x, int y, int color);
+int lwGetPixel(lwCanvas *canvas, int x, int y);
+void lwDrawLine(lwCanvas *canvas, int x1, int y1, int x2, int y2, int color);
+void lwDrawSquare(lwCanvas *canvas, int x, int y, float size, float angle, int color);
+
+#endif
diff --git a/src/lolwut5.c b/src/lolwut5.c
new file mode 100644
index 0000000..1240168
--- /dev/null
+++ b/src/lolwut5.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * This file implements the LOLWUT command. The command should do something
+ * fun and interesting, and should be replaced by a new implementation at
+ * each new version of Redis.
+ */
+
+#include "server.h"
+#include "lolwut.h"
+#include <math.h>
+
+/* Translate a group of 8 pixels (2x4 vertical rectangle) to the corresponding
+ * braille character. The byte should correspond to the pixels arranged as
+ * follows, where 0 is the least significant bit, and 7 the most significant
+ * bit:
+ *
+ * 0 3
+ * 1 4
+ * 2 5
+ * 6 7
+ *
+ * The corresponding utf8 encoded character is set into the three bytes
+ * pointed by 'output'.
+ */
+#include <stdio.h>
+void lwTranslatePixelsGroup(int byte, char *output) {
+ int code = 0x2800 + byte;
+ /* Convert to unicode. This is in the U0800-UFFFF range, so we need to
+ * emit it like this in three bytes:
+ * 1110xxxx 10xxxxxx 10xxxxxx. */
+ output[0] = 0xE0 | (code >> 12); /* 1110-xxxx */
+ output[1] = 0x80 | ((code >> 6) & 0x3F); /* 10-xxxxxx */
+ output[2] = 0x80 | (code & 0x3F); /* 10-xxxxxx */
+}
+
+/* Schotter, the output of LOLWUT of Redis 5, is a computer graphic art piece
+ * generated by Georg Nees in the 60s. It explores the relationship between
+ * caos and order.
+ *
+ * The function creates the canvas itself, depending on the columns available
+ * in the output display and the number of squares per row and per column
+ * requested by the caller. */
+lwCanvas *lwDrawSchotter(int console_cols, int squares_per_row, int squares_per_col) {
+ /* Calculate the canvas size. */
+ int canvas_width = console_cols*2;
+ int padding = canvas_width > 4 ? 2 : 0;
+ float square_side = (float)(canvas_width-padding*2) / squares_per_row;
+ int canvas_height = square_side * squares_per_col + padding*2;
+ lwCanvas *canvas = lwCreateCanvas(canvas_width, canvas_height, 0);
+
+ for (int y = 0; y < squares_per_col; y++) {
+ for (int x = 0; x < squares_per_row; x++) {
+ int sx = x * square_side + square_side/2 + padding;
+ int sy = y * square_side + square_side/2 + padding;
+ /* Rotate and translate randomly as we go down to lower
+ * rows. */
+ float angle = 0;
+ if (y > 1) {
+ float r1 = (float)rand() / (float) RAND_MAX / squares_per_col * y;
+ float r2 = (float)rand() / (float) RAND_MAX / squares_per_col * y;
+ float r3 = (float)rand() / (float) RAND_MAX / squares_per_col * y;
+ if (rand() % 2) r1 = -r1;
+ if (rand() % 2) r2 = -r2;
+ if (rand() % 2) r3 = -r3;
+ angle = r1;
+ sx += r2*square_side/3;
+ sy += r3*square_side/3;
+ }
+ lwDrawSquare(canvas,sx,sy,square_side,angle,1);
+ }
+ }
+
+ return canvas;
+}
+
+/* Converts the canvas to an SDS string representing the UTF8 characters to
+ * print to the terminal in order to obtain a graphical representation of the
+ * logical canvas. The actual returned string will require a terminal that is
+ * width/2 large and height/4 tall in order to hold the whole image without
+ * overflowing or scrolling, since each Barille character is 2x4. */
+static sds renderCanvas(lwCanvas *canvas) {
+ sds text = sdsempty();
+ for (int y = 0; y < canvas->height; y += 4) {
+ for (int x = 0; x < canvas->width; x += 2) {
+ /* We need to emit groups of 8 bits according to a specific
+ * arrangement. See lwTranslatePixelsGroup() for more info. */
+ int byte = 0;
+ if (lwGetPixel(canvas,x,y)) byte |= (1<<0);
+ if (lwGetPixel(canvas,x,y+1)) byte |= (1<<1);
+ if (lwGetPixel(canvas,x,y+2)) byte |= (1<<2);
+ if (lwGetPixel(canvas,x+1,y)) byte |= (1<<3);
+ if (lwGetPixel(canvas,x+1,y+1)) byte |= (1<<4);
+ if (lwGetPixel(canvas,x+1,y+2)) byte |= (1<<5);
+ if (lwGetPixel(canvas,x,y+3)) byte |= (1<<6);
+ if (lwGetPixel(canvas,x+1,y+3)) byte |= (1<<7);
+ char unicode[3];
+ lwTranslatePixelsGroup(byte,unicode);
+ text = sdscatlen(text,unicode,3);
+ }
+ if (y != canvas->height-1) text = sdscatlen(text,"\n",1);
+ }
+ return text;
+}
+
+/* The LOLWUT command:
+ *
+ * LOLWUT [terminal columns] [squares-per-row] [squares-per-col]
+ *
+ * By default the command uses 66 columns, 8 squares per row, 12 squares
+ * per column.
+ */
+void lolwut5Command(client *c) {
+ long cols = 66;
+ long squares_per_row = 8;
+ long squares_per_col = 12;
+
+ /* Parse the optional arguments if any. */
+ if (c->argc > 1 &&
+ getLongFromObjectOrReply(c,c->argv[1],&cols,NULL) != C_OK)
+ return;
+
+ if (c->argc > 2 &&
+ getLongFromObjectOrReply(c,c->argv[2],&squares_per_row,NULL) != C_OK)
+ return;
+
+ if (c->argc > 3 &&
+ getLongFromObjectOrReply(c,c->argv[3],&squares_per_col,NULL) != C_OK)
+ return;
+
+ /* Limits. We want LOLWUT to be always reasonably fast and cheap to execute
+ * so we have maximum number of columns, rows, and output resolution. */
+ if (cols < 1) cols = 1;
+ if (cols > 1000) cols = 1000;
+ if (squares_per_row < 1) squares_per_row = 1;
+ if (squares_per_row > 200) squares_per_row = 200;
+ if (squares_per_col < 1) squares_per_col = 1;
+ if (squares_per_col > 200) squares_per_col = 200;
+
+ /* Generate some computer art and reply. */
+ lwCanvas *canvas = lwDrawSchotter(cols,squares_per_row,squares_per_col);
+ sds rendered = renderCanvas(canvas);
+ rendered = sdscat(rendered,
+ "\nGeorg Nees - schotter, plotter on paper, 1968. Redis ver. ");
+ rendered = sdscat(rendered,REDIS_VERSION);
+ rendered = sdscatlen(rendered,"\n",1);
+ addReplyVerbatim(c,rendered,sdslen(rendered),"txt");
+ sdsfree(rendered);
+ lwFreeCanvas(canvas);
+}
diff --git a/src/lolwut6.c b/src/lolwut6.c
new file mode 100644
index 0000000..1ba111c
--- /dev/null
+++ b/src/lolwut6.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * This file implements the LOLWUT command. The command should do something
+ * fun and interesting, and should be replaced by a new implementation at
+ * each new version of Redis.
+ *
+ * Thanks to Michele Hiki Falcone for the original image that inspired
+ * the image, part of his game, Plaguemon.
+ *
+ * Thanks to the Shhh computer art collective for the help in tuning the
+ * output to have a better artistic effect.
+ */
+
+#include "server.h"
+#include "lolwut.h"
+
+/* Render the canvas using the four gray levels of the standard color
+ * terminal: they match very well to the grayscale display of the gameboy. */
+static sds renderCanvas(lwCanvas *canvas) {
+ sds text = sdsempty();
+ for (int y = 0; y < canvas->height; y++) {
+ for (int x = 0; x < canvas->width; x++) {
+ int color = lwGetPixel(canvas,x,y);
+ char *ce; /* Color escape sequence. */
+
+ /* Note that we set both the foreground and background color.
+ * This way we are able to get a more consistent result among
+ * different terminals implementations. */
+ switch(color) {
+ case 0: ce = "0;30;40m"; break; /* Black */
+ case 1: ce = "0;90;100m"; break; /* Gray 1 */
+ case 2: ce = "0;37;47m"; break; /* Gray 2 */
+ case 3: ce = "0;97;107m"; break; /* White */
+ default: ce = "0;30;40m"; break; /* Just for safety. */
+ }
+ text = sdscatprintf(text,"\033[%s \033[0m",ce);
+ }
+ if (y != canvas->height-1) text = sdscatlen(text,"\n",1);
+ }
+ return text;
+}
+
+/* Draw a skyscraper on the canvas, according to the parameters in the
+ * 'skyscraper' structure. Window colors are random and are always one
+ * of the two grays. */
+struct skyscraper {
+ int xoff; /* X offset. */
+ int width; /* Pixels width. */
+ int height; /* Pixels height. */
+ int windows; /* Draw windows if true. */
+ int color; /* Color of the skyscraper. */
+};
+
+void generateSkyscraper(lwCanvas *canvas, struct skyscraper *si) {
+ int starty = canvas->height-1;
+ int endy = starty - si->height + 1;
+ for (int y = starty; y >= endy; y--) {
+ for (int x = si->xoff; x < si->xoff+si->width; x++) {
+ /* The roof is four pixels less wide. */
+ if (y == endy && (x <= si->xoff+1 || x >= si->xoff+si->width-2))
+ continue;
+ int color = si->color;
+ /* Alter the color if this is a place where we want to
+ * draw a window. We check that we are in the inner part of the
+ * skyscraper, so that windows are far from the borders. */
+ if (si->windows &&
+ x > si->xoff+1 &&
+ x < si->xoff+si->width-2 &&
+ y > endy+1 &&
+ y < starty-1)
+ {
+ /* Calculate the x,y position relative to the start of
+ * the window area. */
+ int relx = x - (si->xoff+1);
+ int rely = y - (endy+1);
+
+ /* Note that we want the windows to be two pixels wide
+ * but just one pixel tall, because terminal "pixels"
+ * (characters) are not square. */
+ if (relx/2 % 2 && rely % 2) {
+ do {
+ color = 1 + rand() % 2;
+ } while (color == si->color);
+ /* Except we want adjacent pixels creating the same
+ * window to be the same color. */
+ if (relx % 2) color = lwGetPixel(canvas,x-1,y);
+ }
+ }
+ lwDrawPixel(canvas,x,y,color);
+ }
+ }
+}
+
+/* Generate a skyline inspired by the parallax backgrounds of 8 bit games. */
+void generateSkyline(lwCanvas *canvas) {
+ struct skyscraper si;
+
+ /* First draw the background skyscraper without windows, using the
+ * two different grays. We use two passes to make sure that the lighter
+ * ones are always in the background. */
+ for (int color = 2; color >= 1; color--) {
+ si.color = color;
+ for (int offset = -10; offset < canvas->width;) {
+ offset += rand() % 8;
+ si.xoff = offset;
+ si.width = 10 + rand()%9;
+ if (color == 2)
+ si.height = canvas->height/2 + rand()%canvas->height/2;
+ else
+ si.height = canvas->height/2 + rand()%canvas->height/3;
+ si.windows = 0;
+ generateSkyscraper(canvas, &si);
+ if (color == 2)
+ offset += si.width/2;
+ else
+ offset += si.width+1;
+ }
+ }
+
+ /* Now draw the foreground skyscraper with the windows. */
+ si.color = 0;
+ for (int offset = -10; offset < canvas->width;) {
+ offset += rand() % 8;
+ si.xoff = offset;
+ si.width = 5 + rand()%14;
+ if (si.width % 4) si.width += (si.width % 3);
+ si.height = canvas->height/3 + rand()%canvas->height/2;
+ si.windows = 1;
+ generateSkyscraper(canvas, &si);
+ offset += si.width+5;
+ }
+}
+
+/* The LOLWUT 6 command:
+ *
+ * LOLWUT [columns] [rows]
+ *
+ * By default the command uses 80 columns, 40 squares per row
+ * per column.
+ */
+void lolwut6Command(client *c) {
+ long cols = 80;
+ long rows = 20;
+
+ /* Parse the optional arguments if any. */
+ if (c->argc > 1 &&
+ getLongFromObjectOrReply(c,c->argv[1],&cols,NULL) != C_OK)
+ return;
+
+ if (c->argc > 2 &&
+ getLongFromObjectOrReply(c,c->argv[2],&rows,NULL) != C_OK)
+ return;
+
+ /* Limits. We want LOLWUT to be always reasonably fast and cheap to execute
+ * so we have maximum number of columns, rows, and output resolution. */
+ if (cols < 1) cols = 1;
+ if (cols > 1000) cols = 1000;
+ if (rows < 1) rows = 1;
+ if (rows > 1000) rows = 1000;
+
+ /* Generate the city skyline and reply. */
+ lwCanvas *canvas = lwCreateCanvas(cols,rows,3);
+ generateSkyline(canvas);
+ sds rendered = renderCanvas(canvas);
+ rendered = sdscat(rendered,
+ "\nDedicated to the 8 bit game developers of past and present.\n"
+ "Original 8 bit image from Plaguemon by hikikomori. Redis ver. ");
+ rendered = sdscat(rendered,REDIS_VERSION);
+ rendered = sdscatlen(rendered,"\n",1);
+ addReplyVerbatim(c,rendered,sdslen(rendered),"txt");
+ sdsfree(rendered);
+ lwFreeCanvas(canvas);
+}
diff --git a/src/lzf.h b/src/lzf.h
new file mode 100644
index 0000000..45ddfa8
--- /dev/null
+++ b/src/lzf.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZF_H
+#define LZF_H
+
+/***********************************************************************
+**
+** lzf -- an extremely fast/free compression/decompression-method
+** http://liblzf.plan9.de/
+**
+** This algorithm is believed to be patent-free.
+**
+***********************************************************************/
+
+#define LZF_VERSION 0x0105 /* 1.5, API version */
+
+/*
+ * Compress in_len bytes stored at the memory block starting at
+ * in_data and write the result to out_data, up to a maximum length
+ * of out_len bytes.
+ *
+ * If the output buffer is not large enough or any error occurs return 0,
+ * otherwise return the number of bytes used, which might be considerably
+ * more than in_len (but less than 104% of the original size), so it
+ * makes sense to always use out_len == in_len - 1), to ensure _some_
+ * compression, and store the data uncompressed otherwise (with a flag, of
+ * course.
+ *
+ * lzf_compress might use different algorithms on different systems and
+ * even different runs, thus might result in different compressed strings
+ * depending on the phase of the moon or similar factors. However, all
+ * these strings are architecture-independent and will result in the
+ * original data when decompressed using lzf_decompress.
+ *
+ * The buffers must not be overlapping.
+ *
+ * If the option LZF_STATE_ARG is enabled, an extra argument must be
+ * supplied which is not reflected in this header file. Refer to lzfP.h
+ * and lzf_c.c.
+ *
+ */
+size_t
+lzf_compress (const void *const in_data, size_t in_len,
+ void *out_data, size_t out_len);
+
+/*
+ * Decompress data compressed with some version of the lzf_compress
+ * function and stored at location in_data and length in_len. The result
+ * will be stored at out_data up to a maximum of out_len characters.
+ *
+ * If the output buffer is not large enough to hold the decompressed
+ * data, a 0 is returned and errno is set to E2BIG. Otherwise the number
+ * of decompressed bytes (i.e. the original length of the data) is
+ * returned.
+ *
+ * If an error in the compressed data is detected, a zero is returned and
+ * errno is set to EINVAL.
+ *
+ * This function is very fast, about as fast as a copying loop.
+ */
+size_t
+lzf_decompress (const void *const in_data, size_t in_len,
+ void *out_data, size_t out_len);
+
+#endif
+
diff --git a/src/lzfP.h b/src/lzfP.h
new file mode 100644
index 0000000..567f5a2
--- /dev/null
+++ b/src/lzfP.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZFP_h
+#define LZFP_h
+
+#define STANDALONE 1 /* at the moment, this is ok. */
+
+#ifndef STANDALONE
+# include "lzf.h"
+#endif
+
+/*
+ * Size of hashtable is (1 << HLOG) * sizeof (char *)
+ * decompression is independent of the hash table size
+ * the difference between 15 and 14 is very small
+ * for small blocks (and 14 is usually a bit faster).
+ * For a low-memory/faster configuration, use HLOG == 13;
+ * For best compression, use 15 or 16 (or more, up to 22).
+ */
+#ifndef HLOG
+# define HLOG 16
+#endif
+
+/*
+ * Sacrifice very little compression quality in favour of compression speed.
+ * This gives almost the same compression as the default code, and is
+ * (very roughly) 15% faster. This is the preferred mode of operation.
+ */
+#ifndef VERY_FAST
+# define VERY_FAST 1
+#endif
+
+/*
+ * Sacrifice some more compression quality in favour of compression speed.
+ * (roughly 1-2% worse compression for large blocks and
+ * 9-10% for small, redundant, blocks and >>20% better speed in both cases)
+ * In short: when in need for speed, enable this for binary data,
+ * possibly disable this for text data.
+ */
+#ifndef ULTRA_FAST
+# define ULTRA_FAST 0
+#endif
+
+/*
+ * Unconditionally aligning does not cost very much, so do it if unsure
+ */
+#ifndef STRICT_ALIGN
+# if !(defined(__i386) || defined (__amd64))
+# define STRICT_ALIGN 1
+# else
+# define STRICT_ALIGN 0
+# endif
+#endif
+
+/*
+ * You may choose to pre-set the hash table (might be faster on some
+ * modern cpus and large (>>64k) blocks, and also makes compression
+ * deterministic/repeatable when the configuration otherwise is the same).
+ */
+#ifndef INIT_HTAB
+# define INIT_HTAB 0
+#endif
+
+/*
+ * Avoid assigning values to errno variable? for some embedding purposes
+ * (linux kernel for example), this is necessary. NOTE: this breaks
+ * the documentation in lzf.h. Avoiding errno has no speed impact.
+ */
+#ifndef AVOID_ERRNO
+# define AVOID_ERRNO 0
+#endif
+
+/*
+ * Whether to pass the LZF_STATE variable as argument, or allocate it
+ * on the stack. For small-stack environments, define this to 1.
+ * NOTE: this breaks the prototype in lzf.h.
+ */
+#ifndef LZF_STATE_ARG
+# define LZF_STATE_ARG 0
+#endif
+
+/*
+ * Whether to add extra checks for input validity in lzf_decompress
+ * and return EINVAL if the input stream has been corrupted. This
+ * only shields against overflowing the input buffer and will not
+ * detect most corrupted streams.
+ * This check is not normally noticeable on modern hardware
+ * (<1% slowdown), but might slow down older cpus considerably.
+ */
+#ifndef CHECK_INPUT
+# define CHECK_INPUT 1
+#endif
+
+/*
+ * Whether to store pointers or offsets inside the hash table. On
+ * 64 bit architectures, pointers take up twice as much space,
+ * and might also be slower. Default is to autodetect.
+ * Notice: Don't set this value to 1, it will result in 'LZF_HSLOT'
+ * not being able to store offset above UINT32_MAX in 64bit. */
+#define LZF_USE_OFFSETS 0
+
+/*****************************************************************************/
+/* nothing should be changed below */
+
+#ifdef __cplusplus
+# include <cstring>
+# include <climits>
+using namespace std;
+#else
+# include <string.h>
+# include <limits.h>
+#endif
+
+#ifndef LZF_USE_OFFSETS
+# if defined (WIN32)
+# define LZF_USE_OFFSETS defined(_M_X64)
+# else
+# if __cplusplus > 199711L
+# include <cstdint>
+# else
+# include <stdint.h>
+# endif
+# define LZF_USE_OFFSETS (UINTPTR_MAX > 0xffffffffU)
+# endif
+#endif
+
+typedef unsigned char u8;
+
+#if LZF_USE_OFFSETS
+# define LZF_HSLOT_BIAS ((const u8 *)in_data)
+ typedef unsigned int LZF_HSLOT;
+#else
+# define LZF_HSLOT_BIAS 0
+ typedef const u8 *LZF_HSLOT;
+#endif
+
+typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
+
+#if !STRICT_ALIGN
+/* for unaligned accesses we need a 16 bit datatype. */
+# if USHRT_MAX == 65535
+ typedef unsigned short u16;
+# elif UINT_MAX == 65535
+ typedef unsigned int u16;
+# else
+# undef STRICT_ALIGN
+# define STRICT_ALIGN 1
+# endif
+#endif
+
+#if ULTRA_FAST
+# undef VERY_FAST
+#endif
+
+#endif
+
diff --git a/src/lzf_c.c b/src/lzf_c.c
new file mode 100644
index 0000000..7cbbc82
--- /dev/null
+++ b/src/lzf_c.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#define HSIZE (1 << (HLOG))
+
+/*
+ * don't play with this unless you benchmark!
+ * the data format is not dependent on the hash function.
+ * the hash function might seem strange, just believe me,
+ * it works ;)
+ */
+#ifndef FRST
+# define FRST(p) (((p[0]) << 8) | p[1])
+# define NEXT(v,p) (((v) << 8) | p[2])
+# if ULTRA_FAST
+# define IDX(h) ((( h >> (3*8 - HLOG)) - h ) & (HSIZE - 1))
+# elif VERY_FAST
+# define IDX(h) ((( h >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# else
+# define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# endif
+#endif
+/*
+ * IDX works because it is very similar to a multiplicative hash, e.g.
+ * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
+ * the latter is also quite fast on newer CPUs, and compresses similarly.
+ *
+ * the next one is also quite good, albeit slow ;)
+ * (int)(cos(h & 0xffffff) * 1e6)
+ */
+
+#if 0
+/* original lzv-like hash function, much worse and thus slower */
+# define FRST(p) (p[0] << 5) ^ p[1]
+# define NEXT(v,p) ((v) << 5) ^ p[2]
+# define IDX(h) ((h) & (HSIZE - 1))
+#endif
+
+#define MAX_LIT (1 << 5)
+#define MAX_OFF (1 << 13)
+#define MAX_REF ((1 << 8) + (1 << 3))
+
+#if __GNUC__ >= 3
+# define expect(expr,value) __builtin_expect ((expr),(value))
+# define inline inline
+#else
+# define expect(expr,value) (expr)
+# define inline static
+#endif
+
+#define expect_false(expr) expect ((expr) != 0, 0)
+#define expect_true(expr) expect ((expr) != 0, 1)
+
+#if defined(__has_attribute)
+# if __has_attribute(no_sanitize)
+# define NO_SANITIZE(sanitizer) __attribute__((no_sanitize(sanitizer)))
+# endif
+#endif
+
+#if !defined(NO_SANITIZE)
+# define NO_SANITIZE(sanitizer)
+#endif
+
+/*
+ * compressed format
+ *
+ * 000LLLLL <L+1> ; literal, L+1=1..33 octets
+ * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset
+ * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset
+ *
+ */
+NO_SANITIZE("alignment")
+size_t
+lzf_compress (const void *const in_data, size_t in_len,
+ void *out_data, size_t out_len
+#if LZF_STATE_ARG
+ , LZF_STATE htab
+#endif
+ )
+{
+#if !LZF_STATE_ARG
+ LZF_STATE htab;
+#endif
+ const u8 *ip = (const u8 *)in_data;
+ u8 *op = (u8 *)out_data;
+ const u8 *in_end = ip + in_len;
+ u8 *out_end = op + out_len;
+ const u8 *ref;
+
+ /* off requires a type wide enough to hold a general pointer difference.
+ * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
+ * works for differences within a single object). We also assume that no
+ * no bit pattern traps. Since the only platform that is both non-POSIX
+ * and fails to support both assumptions is windows 64 bit, we make a
+ * special workaround for it.
+ */
+#if defined (WIN32) && defined (_M_X64)
+ unsigned _int64 off; /* workaround for missing POSIX compliance */
+#else
+ size_t off;
+#endif
+ unsigned int hval;
+ int lit;
+
+ if (!in_len || !out_len)
+ return 0;
+
+#if INIT_HTAB
+ memset (htab, 0, sizeof (htab));
+#endif
+
+ lit = 0; op++; /* start run */
+
+ hval = FRST (ip);
+ while (ip < in_end - 2)
+ {
+ LZF_HSLOT *hslot;
+
+ hval = NEXT (hval, ip);
+ hslot = htab + IDX (hval);
+ ref = *hslot ? (*hslot + LZF_HSLOT_BIAS) : NULL; /* avoid applying zero offset to null pointer */
+ *hslot = ip - LZF_HSLOT_BIAS;
+
+ if (1
+#if INIT_HTAB
+ && ref < ip /* the next test will actually take care of this, but this is faster */
+#endif
+ && (off = ip - ref - 1) < MAX_OFF
+ && ref > (u8 *)in_data
+ && ref[2] == ip[2]
+#if STRICT_ALIGN
+ && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0])
+#else
+ && *(u16 *)ref == *(u16 *)ip
+#endif
+ )
+ {
+ /* match found at *ref++ */
+ unsigned int len = 2;
+ size_t maxlen = in_end - ip - len;
+ maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+
+ if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
+ if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
+ return 0;
+
+ op [- lit - 1] = lit - 1; /* stop run */
+ op -= !lit; /* undo run if length is zero */
+
+ for (;;)
+ {
+ if (expect_true (maxlen > 16))
+ {
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ }
+
+ do
+ len++;
+ while (len < maxlen && ref[len] == ip[len]);
+
+ break;
+ }
+
+ len -= 2; /* len is now #octets - 1 */
+ ip++;
+
+ if (len < 7)
+ {
+ *op++ = (off >> 8) + (len << 5);
+ }
+ else
+ {
+ *op++ = (off >> 8) + ( 7 << 5);
+ *op++ = len - 7;
+ }
+
+ *op++ = off;
+
+ lit = 0; op++; /* start run */
+
+ ip += len + 1;
+
+ if (expect_false (ip >= in_end - 2))
+ break;
+
+#if ULTRA_FAST || VERY_FAST
+ --ip;
+# if VERY_FAST && !ULTRA_FAST
+ --ip;
+# endif
+ hval = FRST (ip);
+
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
+ ip++;
+
+# if VERY_FAST && !ULTRA_FAST
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
+ ip++;
+# endif
+#else
+ ip -= len + 1;
+
+ do
+ {
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
+ ip++;
+ }
+ while (len--);
+#endif
+ }
+ else
+ {
+ /* one more literal byte we must copy */
+ if (expect_false (op >= out_end))
+ return 0;
+
+ lit++; *op++ = *ip++;
+
+ if (expect_false (lit == MAX_LIT))
+ {
+ op [- lit - 1] = lit - 1; /* stop run */
+ lit = 0; op++; /* start run */
+ }
+ }
+ }
+
+ if (op + 3 > out_end) /* at most 3 bytes can be missing here */
+ return 0;
+
+ while (ip < in_end)
+ {
+ lit++; *op++ = *ip++;
+
+ if (expect_false (lit == MAX_LIT))
+ {
+ op [- lit - 1] = lit - 1; /* stop run */
+ lit = 0; op++; /* start run */
+ }
+ }
+
+ op [- lit - 1] = lit - 1; /* end run */
+ op -= !lit; /* undo run if length is zero */
+
+ return op - (u8 *)out_data;
+}
+
diff --git a/src/lzf_d.c b/src/lzf_d.c
new file mode 100644
index 0000000..ff32be8
--- /dev/null
+++ b/src/lzf_d.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#if AVOID_ERRNO
+# define SET_ERRNO(n)
+#else
+# include <errno.h>
+# define SET_ERRNO(n) errno = (n)
+#endif
+
+#if USE_REP_MOVSB /* small win on amd, big loss on intel */
+#if (__i386 || __amd64) && __GNUC__ >= 3
+# define lzf_movsb(dst, src, len) \
+ asm ("rep movsb" \
+ : "=D" (dst), "=S" (src), "=c" (len) \
+ : "0" (dst), "1" (src), "2" (len));
+#endif
+#endif
+
+#if defined(__GNUC__) && __GNUC__ >= 7
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+#endif
+size_t
+lzf_decompress (const void *const in_data, size_t in_len,
+ void *out_data, size_t out_len)
+{
+ u8 const *ip = (const u8 *)in_data;
+ u8 *op = (u8 *)out_data;
+ u8 const *const in_end = ip + in_len;
+ u8 *const out_end = op + out_len;
+
+ while (ip < in_end)
+ {
+ unsigned int ctrl;
+ ctrl = *ip++;
+
+ if (ctrl < (1 << 5)) /* literal run */
+ {
+ ctrl++;
+
+ if (op + ctrl > out_end)
+ {
+ SET_ERRNO (E2BIG);
+ return 0;
+ }
+
+#if CHECK_INPUT
+ if (ip + ctrl > in_end)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+#endif
+
+#ifdef lzf_movsb
+ lzf_movsb (op, ip, ctrl);
+#else
+ switch (ctrl)
+ {
+ case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++;
+ case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++;
+ case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++;
+ case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++;
+ case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++;
+ case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case 9: *op++ = *ip++;
+ case 8: *op++ = *ip++; case 7: *op++ = *ip++; case 6: *op++ = *ip++; case 5: *op++ = *ip++;
+ case 4: *op++ = *ip++; case 3: *op++ = *ip++; case 2: *op++ = *ip++; case 1: *op++ = *ip++;
+ }
+#endif
+ }
+ else /* back reference */
+ {
+ unsigned int len = ctrl >> 5;
+
+ u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
+
+#if CHECK_INPUT
+ if (ip >= in_end)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+#endif
+ if (len == 7)
+ {
+ len += *ip++;
+#if CHECK_INPUT
+ if (ip >= in_end)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+#endif
+ }
+
+ ref -= *ip++;
+
+ if (op + len + 2 > out_end)
+ {
+ SET_ERRNO (E2BIG);
+ return 0;
+ }
+
+ if (ref < (u8 *)out_data)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+
+#ifdef lzf_movsb
+ len += 2;
+ lzf_movsb (op, ref, len);
+#else
+ switch (len)
+ {
+ default:
+ len += 2;
+
+ if (op >= ref + len)
+ {
+ /* disjunct areas */
+ memcpy (op, ref, len);
+ op += len;
+ }
+ else
+ {
+ /* overlapping, use octte by octte copying */
+ do
+ *op++ = *ref++;
+ while (--len);
+ }
+
+ break;
+
+ case 9: *op++ = *ref++; /* fall-thru */
+ case 8: *op++ = *ref++; /* fall-thru */
+ case 7: *op++ = *ref++; /* fall-thru */
+ case 6: *op++ = *ref++; /* fall-thru */
+ case 5: *op++ = *ref++; /* fall-thru */
+ case 4: *op++ = *ref++; /* fall-thru */
+ case 3: *op++ = *ref++; /* fall-thru */
+ case 2: *op++ = *ref++; /* fall-thru */
+ case 1: *op++ = *ref++; /* fall-thru */
+ case 0: *op++ = *ref++; /* two octets more */
+ *op++ = *ref++; /* fall-thru */
+ }
+#endif
+ }
+ }
+
+ return op - (u8 *)out_data;
+}
+#if defined(__GNUC__) && __GNUC__ >= 5
+#pragma GCC diagnostic pop
+#endif
diff --git a/src/memtest.c b/src/memtest.c
new file mode 100644
index 0000000..1ca4b82
--- /dev/null
+++ b/src/memtest.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+#include <errno.h>
+#include <termios.h>
+#include <sys/ioctl.h>
+#if defined(__sun)
+#include <stropts.h>
+#endif
+#include "config.h"
+
+#if (ULONG_MAX == 4294967295UL)
+#define MEMTEST_32BIT
+#elif (ULONG_MAX == 18446744073709551615ULL)
+#define MEMTEST_64BIT
+#else
+#error "ULONG_MAX value not supported."
+#endif
+
+#ifdef MEMTEST_32BIT
+#define ULONG_ONEZERO 0xaaaaaaaaUL
+#define ULONG_ZEROONE 0x55555555UL
+#else
+#define ULONG_ONEZERO 0xaaaaaaaaaaaaaaaaUL
+#define ULONG_ZEROONE 0x5555555555555555UL
+#endif
+
+#if defined(__has_attribute)
+#if __has_attribute(no_sanitize)
+#define NO_SANITIZE(sanitizer) __attribute__((no_sanitize(sanitizer)))
+#endif
+#endif
+
+#if !defined(NO_SANITIZE)
+#define NO_SANITIZE(sanitizer)
+#endif
+
+static struct winsize ws;
+size_t progress_printed; /* Printed chars in screen-wide progress bar. */
+size_t progress_full; /* How many chars to write to fill the progress bar. */
+
+void memtest_progress_start(char *title, int pass) {
+ int j;
+
+ printf("\x1b[H\x1b[2J"); /* Cursor home, clear screen. */
+ /* Fill with dots. */
+ for (j = 0; j < ws.ws_col*(ws.ws_row-2); j++) printf(".");
+ printf("Please keep the test running several minutes per GB of memory.\n");
+ printf("Also check http://www.memtest86.com/ and http://pyropus.ca/software/memtester/");
+ printf("\x1b[H\x1b[2K"); /* Cursor home, clear current line. */
+ printf("%s [%d]\n", title, pass); /* Print title. */
+ progress_printed = 0;
+ progress_full = (size_t)ws.ws_col*(ws.ws_row-3);
+ fflush(stdout);
+}
+
+void memtest_progress_end(void) {
+ printf("\x1b[H\x1b[2J"); /* Cursor home, clear screen. */
+}
+
+void memtest_progress_step(size_t curr, size_t size, char c) {
+ size_t chars = ((unsigned long long)curr*progress_full)/size, j;
+
+ for (j = 0; j < chars-progress_printed; j++) printf("%c",c);
+ progress_printed = chars;
+ fflush(stdout);
+}
+
+/* Test that addressing is fine. Every location is populated with its own
+ * address, and finally verified. This test is very fast but may detect
+ * ASAP big issues with the memory subsystem. */
+int memtest_addressing(unsigned long *l, size_t bytes, int interactive) {
+ unsigned long words = bytes/sizeof(unsigned long);
+ unsigned long j, *p;
+
+ /* Fill */
+ p = l;
+ for (j = 0; j < words; j++) {
+ *p = (unsigned long)p;
+ p++;
+ if ((j & 0xffff) == 0 && interactive)
+ memtest_progress_step(j,words*2,'A');
+ }
+ /* Test */
+ p = l;
+ for (j = 0; j < words; j++) {
+ if (*p != (unsigned long)p) {
+ if (interactive) {
+ printf("\n*** MEMORY ADDRESSING ERROR: %p contains %lu\n",
+ (void*) p, *p);
+ exit(1);
+ }
+ return 1;
+ }
+ p++;
+ if ((j & 0xffff) == 0 && interactive)
+ memtest_progress_step(j+words,words*2,'A');
+ }
+ return 0;
+}
+
+/* Fill words stepping a single page at every write, so we continue to
+ * touch all the pages in the smallest amount of time reducing the
+ * effectiveness of caches, and making it hard for the OS to transfer
+ * pages on the swap.
+ *
+ * In this test we can't call rand() since the system may be completely
+ * unable to handle library calls, so we have to resort to our own
+ * PRNG that only uses local state. We use an xorshift* PRNG. */
+#define xorshift64star_next() do { \
+ rseed ^= rseed >> 12; \
+ rseed ^= rseed << 25; \
+ rseed ^= rseed >> 27; \
+ rout = rseed * UINT64_C(2685821657736338717); \
+} while(0)
+
+void memtest_fill_random(unsigned long *l, size_t bytes, int interactive) {
+ unsigned long step = 4096/sizeof(unsigned long);
+ unsigned long words = bytes/sizeof(unsigned long)/2;
+ unsigned long iwords = words/step; /* words per iteration */
+ unsigned long off, w, *l1, *l2;
+ uint64_t rseed = UINT64_C(0xd13133de9afdb566); /* Just a random seed. */
+ uint64_t rout = 0;
+
+ assert((bytes & 4095) == 0);
+ for (off = 0; off < step; off++) {
+ l1 = l+off;
+ l2 = l1+words;
+ for (w = 0; w < iwords; w++) {
+ xorshift64star_next();
+ *l1 = *l2 = (unsigned long) rout;
+ l1 += step;
+ l2 += step;
+ if ((w & 0xffff) == 0 && interactive)
+ memtest_progress_step(w+iwords*off,words,'R');
+ }
+ }
+}
+
+/* Like memtest_fill_random() but uses the two specified values to fill
+ * memory, in an alternated way (v1|v2|v1|v2|...) */
+void memtest_fill_value(unsigned long *l, size_t bytes, unsigned long v1,
+ unsigned long v2, char sym, int interactive)
+{
+ unsigned long step = 4096/sizeof(unsigned long);
+ unsigned long words = bytes/sizeof(unsigned long)/2;
+ unsigned long iwords = words/step; /* words per iteration */
+ unsigned long off, w, *l1, *l2, v;
+
+ assert((bytes & 4095) == 0);
+ for (off = 0; off < step; off++) {
+ l1 = l+off;
+ l2 = l1+words;
+ v = (off & 1) ? v2 : v1;
+ for (w = 0; w < iwords; w++) {
+#ifdef MEMTEST_32BIT
+ *l1 = *l2 = ((unsigned long) v) |
+ (((unsigned long) v) << 16);
+#else
+ *l1 = *l2 = ((unsigned long) v) |
+ (((unsigned long) v) << 16) |
+ (((unsigned long) v) << 32) |
+ (((unsigned long) v) << 48);
+#endif
+ l1 += step;
+ l2 += step;
+ if ((w & 0xffff) == 0 && interactive)
+ memtest_progress_step(w+iwords*off,words,sym);
+ }
+ }
+}
+
+int memtest_compare(unsigned long *l, size_t bytes, int interactive) {
+ unsigned long words = bytes/sizeof(unsigned long)/2;
+ unsigned long w, *l1, *l2;
+
+ assert((bytes & 4095) == 0);
+ l1 = l;
+ l2 = l1+words;
+ for (w = 0; w < words; w++) {
+ if (*l1 != *l2) {
+ if (interactive) {
+ printf("\n*** MEMORY ERROR DETECTED: %p != %p (%lu vs %lu)\n",
+ (void*)l1, (void*)l2, *l1, *l2);
+ exit(1);
+ }
+ return 1;
+ }
+ l1 ++;
+ l2 ++;
+ if ((w & 0xffff) == 0 && interactive)
+ memtest_progress_step(w,words,'=');
+ }
+ return 0;
+}
+
+int memtest_compare_times(unsigned long *m, size_t bytes, int pass, int times,
+ int interactive)
+{
+ int j;
+ int errors = 0;
+
+ for (j = 0; j < times; j++) {
+ if (interactive) memtest_progress_start("Compare",pass);
+ errors += memtest_compare(m,bytes,interactive);
+ if (interactive) memtest_progress_end();
+ }
+ return errors;
+}
+
+/* Test the specified memory. The number of bytes must be multiple of 4096.
+ * If interactive is true the program exists with an error and prints
+ * ASCII arts to show progresses. Instead when interactive is 0, it can
+ * be used as an API call, and returns 1 if memory errors were found or
+ * 0 if there were no errors detected. */
+int memtest_test(unsigned long *m, size_t bytes, int passes, int interactive) {
+ int pass = 0;
+ int errors = 0;
+
+ while (pass != passes) {
+ pass++;
+
+ if (interactive) memtest_progress_start("Addressing test",pass);
+ errors += memtest_addressing(m,bytes,interactive);
+ if (interactive) memtest_progress_end();
+
+ if (interactive) memtest_progress_start("Random fill",pass);
+ memtest_fill_random(m,bytes,interactive);
+ if (interactive) memtest_progress_end();
+ errors += memtest_compare_times(m,bytes,pass,4,interactive);
+
+ if (interactive) memtest_progress_start("Solid fill",pass);
+ memtest_fill_value(m,bytes,0,(unsigned long)-1,'S',interactive);
+ if (interactive) memtest_progress_end();
+ errors += memtest_compare_times(m,bytes,pass,4,interactive);
+
+ if (interactive) memtest_progress_start("Checkerboard fill",pass);
+ memtest_fill_value(m,bytes,ULONG_ONEZERO,ULONG_ZEROONE,'C',interactive);
+ if (interactive) memtest_progress_end();
+ errors += memtest_compare_times(m,bytes,pass,4,interactive);
+ }
+ return errors;
+}
+
+/* A version of memtest_test() that tests memory in small pieces
+ * in order to restore the memory content at exit.
+ *
+ * One problem we have with this approach, is that the cache can avoid
+ * real memory accesses, and we can't test big chunks of memory at the
+ * same time, because we need to backup them on the stack (the allocator
+ * may not be usable or we may be already in an out of memory condition).
+ * So what we do is to try to trash the cache with useless memory accesses
+ * between the fill and compare cycles. */
+#define MEMTEST_BACKUP_WORDS (1024*(1024/sizeof(long)))
+/* Random accesses of MEMTEST_DECACHE_SIZE are performed at the start and
+ * end of the region between fill and compare cycles in order to trash
+ * the cache. */
+#define MEMTEST_DECACHE_SIZE (1024*8)
+
+NO_SANITIZE("undefined")
+int memtest_preserving_test(unsigned long *m, size_t bytes, int passes) {
+ unsigned long backup[MEMTEST_BACKUP_WORDS];
+ unsigned long *p = m;
+ unsigned long *end = (unsigned long*) (((unsigned char*)m)+(bytes-MEMTEST_DECACHE_SIZE));
+ size_t left = bytes;
+ int errors = 0;
+
+ if (bytes & 4095) return 0; /* Can't test across 4k page boundaries. */
+ if (bytes < 4096*2) return 0; /* Can't test a single page. */
+
+ while(left) {
+ /* If we have to test a single final page, go back a single page
+ * so that we can test two pages, since the code can't test a single
+ * page but at least two. */
+ if (left == 4096) {
+ left += 4096;
+ p -= 4096/sizeof(unsigned long);
+ }
+
+ int pass = 0;
+ size_t len = (left > sizeof(backup)) ? sizeof(backup) : left;
+
+ /* Always test an even number of pages. */
+ if (len/4096 % 2) len -= 4096;
+
+ memcpy(backup,p,len); /* Backup. */
+ while(pass != passes) {
+ pass++;
+ errors += memtest_addressing(p,len,0);
+ memtest_fill_random(p,len,0);
+ if (bytes >= MEMTEST_DECACHE_SIZE) {
+ memtest_compare_times(m,MEMTEST_DECACHE_SIZE,pass,1,0);
+ memtest_compare_times(end,MEMTEST_DECACHE_SIZE,pass,1,0);
+ }
+ errors += memtest_compare_times(p,len,pass,4,0);
+ memtest_fill_value(p,len,0,(unsigned long)-1,'S',0);
+ if (bytes >= MEMTEST_DECACHE_SIZE) {
+ memtest_compare_times(m,MEMTEST_DECACHE_SIZE,pass,1,0);
+ memtest_compare_times(end,MEMTEST_DECACHE_SIZE,pass,1,0);
+ }
+ errors += memtest_compare_times(p,len,pass,4,0);
+ memtest_fill_value(p,len,ULONG_ONEZERO,ULONG_ZEROONE,'C',0);
+ if (bytes >= MEMTEST_DECACHE_SIZE) {
+ memtest_compare_times(m,MEMTEST_DECACHE_SIZE,pass,1,0);
+ memtest_compare_times(end,MEMTEST_DECACHE_SIZE,pass,1,0);
+ }
+ errors += memtest_compare_times(p,len,pass,4,0);
+ }
+ memcpy(p,backup,len); /* Restore. */
+ left -= len;
+ p += len/sizeof(unsigned long);
+ }
+ return errors;
+}
+
+/* Perform an interactive test allocating the specified number of megabytes. */
+void memtest_alloc_and_test(size_t megabytes, int passes) {
+ size_t bytes = megabytes*1024*1024;
+ unsigned long *m = malloc(bytes);
+
+ if (m == NULL) {
+ fprintf(stderr,"Unable to allocate %zu megabytes: %s",
+ megabytes, strerror(errno));
+ exit(1);
+ }
+ memtest_test(m,bytes,passes,1);
+ free(m);
+}
+
+void memtest(size_t megabytes, int passes) {
+#if !defined(__HAIKU__)
+ if (ioctl(1, TIOCGWINSZ, &ws) == -1) {
+ ws.ws_col = 80;
+ ws.ws_row = 20;
+ }
+#else
+ ws.ws_col = 80;
+ ws.ws_row = 20;
+#endif
+ memtest_alloc_and_test(megabytes,passes);
+ printf("\nYour memory passed this test.\n");
+ printf("Please if you are still in doubt use the following two tools:\n");
+ printf("1) memtest86: http://www.memtest86.com/\n");
+ printf("2) memtester: http://pyropus.ca/software/memtester/\n");
+ exit(0);
+}
diff --git a/src/mkreleasehdr.sh b/src/mkreleasehdr.sh
new file mode 100755
index 0000000..117b9e8
--- /dev/null
+++ b/src/mkreleasehdr.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+GIT_SHA1=`(git show-ref --head --hash=8 2> /dev/null || echo 00000000) | head -n1`
+GIT_DIRTY=`git diff --no-ext-diff 2> /dev/null | wc -l`
+BUILD_ID=`uname -n`"-"`date +%s`
+if [ -n "$SOURCE_DATE_EPOCH" ]; then
+ BUILD_ID=$(date -u -d "@$SOURCE_DATE_EPOCH" +%s 2>/dev/null || date -u -r "$SOURCE_DATE_EPOCH" +%s 2>/dev/null || date -u +%s)
+fi
+test -f release.h || touch release.h
+(cat release.h | grep SHA1 | grep $GIT_SHA1) && \
+(cat release.h | grep DIRTY | grep $GIT_DIRTY) && exit 0 # Already up-to-date
+echo "#define REDIS_GIT_SHA1 \"$GIT_SHA1\"" > release.h
+echo "#define REDIS_GIT_DIRTY \"$GIT_DIRTY\"" >> release.h
+echo "#define REDIS_BUILD_ID \"$BUILD_ID\"" >> release.h
+echo "#include \"version.h\"" >> release.h
+echo "#define REDIS_BUILD_ID_RAW REDIS_VERSION REDIS_BUILD_ID REDIS_GIT_DIRTY REDIS_GIT_SHA1" >> release.h
+touch release.c # Force recompile of release.c
diff --git a/src/module.c b/src/module.c
new file mode 100644
index 0000000..ac6cbbb
--- /dev/null
+++ b/src/module.c
@@ -0,0 +1,13846 @@
+/*
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* --------------------------------------------------------------------------
+ * Modules API documentation information
+ *
+ * The comments in this file are used to generate the API documentation on the
+ * Redis website.
+ *
+ * Each function starting with RM_ and preceded by a block comment is included
+ * in the API documentation. To hide an RM_ function, put a blank line between
+ * the comment and the function definition or put the comment inside the
+ * function body.
+ *
+ * The functions are divided into sections. Each section is preceded by a
+ * documentation block, which is comment block starting with a markdown level 2
+ * heading, i.e. a line starting with ##, on the first line of the comment block
+ * (with the exception of a ----- line which can appear first). Other comment
+ * blocks, which are not intended for the modules API user, such as this comment
+ * block, do NOT start with a markdown level 2 heading, so they are included in
+ * the generated a API documentation.
+ *
+ * The documentation comments may contain markdown formatting. Some automatic
+ * replacements are done, such as the replacement of RM with RedisModule in
+ * function names. For details, see the script src/modules/gendoc.rb.
+ * -------------------------------------------------------------------------- */
+
+#include "server.h"
+#include "cluster.h"
+#include "slowlog.h"
+#include "rdb.h"
+#include "monotonic.h"
+#include "script.h"
+#include "call_reply.h"
+#include "hdr_histogram.h"
+#include <dlfcn.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <string.h>
+
+/* --------------------------------------------------------------------------
+ * Private data structures used by the modules system. Those are data
+ * structures that are never exposed to Redis Modules, if not as void
+ * pointers that have an API the module can call with them)
+ * -------------------------------------------------------------------------- */
+
+struct RedisModuleInfoCtx {
+ struct RedisModule *module;
+ dict *requested_sections;
+ sds info; /* info string we collected so far */
+ int sections; /* number of sections we collected so far */
+ int in_section; /* indication if we're in an active section or not */
+ int in_dict_field; /* indication that we're currently appending to a dict */
+};
+
+/* This represents a shared API. Shared APIs will be used to populate
+ * the server.sharedapi dictionary, mapping names of APIs exported by
+ * modules for other modules to use, to their structure specifying the
+ * function pointer that can be called. */
+struct RedisModuleSharedAPI {
+ void *func;
+ RedisModule *module;
+};
+typedef struct RedisModuleSharedAPI RedisModuleSharedAPI;
+
+dict *modules; /* Hash table of modules. SDS -> RedisModule ptr.*/
+
+/* Entries in the context->amqueue array, representing objects to free
+ * when the callback returns. */
+struct AutoMemEntry {
+ void *ptr;
+ int type;
+};
+
+/* AutoMemEntry type field values. */
+#define REDISMODULE_AM_KEY 0
+#define REDISMODULE_AM_STRING 1
+#define REDISMODULE_AM_REPLY 2
+#define REDISMODULE_AM_FREED 3 /* Explicitly freed by user already. */
+#define REDISMODULE_AM_DICT 4
+#define REDISMODULE_AM_INFO 5
+
+/* The pool allocator block. Redis Modules can allocate memory via this special
+ * allocator that will automatically release it all once the callback returns.
+ * This means that it can only be used for ephemeral allocations. However
+ * there are two advantages for modules to use this API:
+ *
+ * 1) The memory is automatically released when the callback returns.
+ * 2) This allocator is faster for many small allocations since whole blocks
+ * are allocated, and small pieces returned to the caller just advancing
+ * the index of the allocation.
+ *
+ * Allocations are always rounded to the size of the void pointer in order
+ * to always return aligned memory chunks. */
+
+#define REDISMODULE_POOL_ALLOC_MIN_SIZE (1024*8)
+#define REDISMODULE_POOL_ALLOC_ALIGN (sizeof(void*))
+
+typedef struct RedisModulePoolAllocBlock {
+ uint32_t size;
+ uint32_t used;
+ struct RedisModulePoolAllocBlock *next;
+ char memory[];
+} RedisModulePoolAllocBlock;
+
+/* This structure represents the context in which Redis modules operate.
+ * Most APIs module can access, get a pointer to the context, so that the API
+ * implementation can hold state across calls, or remember what to free after
+ * the call and so forth.
+ *
+ * Note that not all the context structure is always filled with actual values
+ * but only the fields needed in a given context. */
+
+struct RedisModuleBlockedClient;
+struct RedisModuleUser;
+
+struct RedisModuleCtx {
+ void *getapifuncptr; /* NOTE: Must be the first field. */
+ struct RedisModule *module; /* Module reference. */
+ client *client; /* Client calling a command. */
+ struct RedisModuleBlockedClient *blocked_client; /* Blocked client for
+ thread safe context. */
+ struct AutoMemEntry *amqueue; /* Auto memory queue of objects to free. */
+ int amqueue_len; /* Number of slots in amqueue. */
+ int amqueue_used; /* Number of used slots in amqueue. */
+ int flags; /* REDISMODULE_CTX_... flags. */
+ void **postponed_arrays; /* To set with RM_ReplySetArrayLength(). */
+ int postponed_arrays_count; /* Number of entries in postponed_arrays. */
+ void *blocked_privdata; /* Privdata set when unblocking a client. */
+ RedisModuleString *blocked_ready_key; /* Key ready when the reply callback
+ gets called for clients blocked
+ on keys. */
+
+ /* Used if there is the REDISMODULE_CTX_KEYS_POS_REQUEST or
+ * REDISMODULE_CTX_CHANNEL_POS_REQUEST flag set. */
+ getKeysResult *keys_result;
+
+ struct RedisModulePoolAllocBlock *pa_head;
+ long long next_yield_time;
+
+ const struct RedisModuleUser *user; /* RedisModuleUser commands executed via
+ RM_Call should be executed as, if set */
+};
+typedef struct RedisModuleCtx RedisModuleCtx;
+
+#define REDISMODULE_CTX_NONE (0)
+#define REDISMODULE_CTX_AUTO_MEMORY (1<<0)
+#define REDISMODULE_CTX_KEYS_POS_REQUEST (1<<1)
+#define REDISMODULE_CTX_BLOCKED_REPLY (1<<2)
+#define REDISMODULE_CTX_BLOCKED_TIMEOUT (1<<3)
+#define REDISMODULE_CTX_THREAD_SAFE (1<<4)
+#define REDISMODULE_CTX_BLOCKED_DISCONNECTED (1<<5)
+#define REDISMODULE_CTX_TEMP_CLIENT (1<<6) /* Return client object to the pool
+ when the context is destroyed */
+#define REDISMODULE_CTX_NEW_CLIENT (1<<7) /* Free client object when the
+ context is destroyed */
+#define REDISMODULE_CTX_CHANNELS_POS_REQUEST (1<<8)
+#define REDISMODULE_CTX_COMMAND (1<<9) /* Context created to serve a command from call() or AOF (which calls cmd->proc directly) */
+
+
+/* This represents a Redis key opened with RM_OpenKey(). */
+struct RedisModuleKey {
+ RedisModuleCtx *ctx;
+ redisDb *db;
+ robj *key; /* Key name object. */
+ robj *value; /* Value object, or NULL if the key was not found. */
+ void *iter; /* Iterator. */
+ int mode; /* Opening mode. */
+
+ union {
+ struct {
+ /* List, use only if value->type == OBJ_LIST */
+ listTypeEntry entry; /* Current entry in iteration. */
+ long index; /* Current 0-based index in iteration. */
+ } list;
+ struct {
+ /* Zset iterator, use only if value->type == OBJ_ZSET */
+ uint32_t type; /* REDISMODULE_ZSET_RANGE_* */
+ zrangespec rs; /* Score range. */
+ zlexrangespec lrs; /* Lex range. */
+ uint32_t start; /* Start pos for positional ranges. */
+ uint32_t end; /* End pos for positional ranges. */
+ void *current; /* Zset iterator current node. */
+ int er; /* Zset iterator end reached flag
+ (true if end was reached). */
+ } zset;
+ struct {
+ /* Stream, use only if value->type == OBJ_STREAM */
+ streamID currentid; /* Current entry while iterating. */
+ int64_t numfieldsleft; /* Fields left to fetch for current entry. */
+ int signalready; /* Flag that signalKeyAsReady() is needed. */
+ } stream;
+ } u;
+};
+
+/* RedisModuleKey 'ztype' values. */
+#define REDISMODULE_ZSET_RANGE_NONE 0 /* This must always be 0. */
+#define REDISMODULE_ZSET_RANGE_LEX 1
+#define REDISMODULE_ZSET_RANGE_SCORE 2
+#define REDISMODULE_ZSET_RANGE_POS 3
+
+/* Function pointer type of a function representing a command inside
+ * a Redis module. */
+struct RedisModuleBlockedClient;
+typedef int (*RedisModuleCmdFunc) (RedisModuleCtx *ctx, void **argv, int argc);
+typedef int (*RedisModuleAuthCallback)(RedisModuleCtx *ctx, void *username, void *password, RedisModuleString **err);
+typedef void (*RedisModuleDisconnectFunc) (RedisModuleCtx *ctx, struct RedisModuleBlockedClient *bc);
+
+/* This struct holds the information about a command registered by a module.*/
+struct RedisModuleCommand {
+ struct RedisModule *module;
+ RedisModuleCmdFunc func;
+ struct redisCommand *rediscmd;
+};
+typedef struct RedisModuleCommand RedisModuleCommand;
+
+#define REDISMODULE_REPLYFLAG_NONE 0
+#define REDISMODULE_REPLYFLAG_TOPARSE (1<<0) /* Protocol must be parsed. */
+#define REDISMODULE_REPLYFLAG_NESTED (1<<1) /* Nested reply object. No proto
+ or struct free. */
+
+/* Reply of RM_Call() function. The function is filled in a lazy
+ * way depending on the function called on the reply structure. By default
+ * only the type, proto and protolen are filled. */
+typedef struct CallReply RedisModuleCallReply;
+
+/* Structure to hold the module auth callback & the Module implementing it. */
+typedef struct RedisModuleAuthCtx {
+ struct RedisModule *module;
+ RedisModuleAuthCallback auth_cb;
+} RedisModuleAuthCtx;
+
+/* Structure representing a blocked client. We get a pointer to such
+ * an object when blocking from modules. */
+typedef struct RedisModuleBlockedClient {
+ client *client; /* Pointer to the blocked client. or NULL if the client
+ was destroyed during the life of this object. */
+ RedisModule *module; /* Module blocking the client. */
+ RedisModuleCmdFunc reply_callback; /* Reply callback on normal completion.*/
+ RedisModuleAuthCallback auth_reply_cb; /* Reply callback on completing blocking
+ module authentication. */
+ RedisModuleCmdFunc timeout_callback; /* Reply callback on timeout. */
+ RedisModuleDisconnectFunc disconnect_callback; /* Called on disconnection.*/
+ void (*free_privdata)(RedisModuleCtx*,void*);/* privdata cleanup callback.*/
+ void *privdata; /* Module private data that may be used by the reply
+ or timeout callback. It is set via the
+ RedisModule_UnblockClient() API. */
+ client *thread_safe_ctx_client; /* Fake client to be used for thread safe
+ context so that no lock is required. */
+ client *reply_client; /* Fake client used to accumulate replies
+ in thread safe contexts. */
+ int dbid; /* Database number selected by the original client. */
+ int blocked_on_keys; /* If blocked via RM_BlockClientOnKeys(). */
+ int unblocked; /* Already on the moduleUnblocked list. */
+ monotime background_timer; /* Timer tracking the start of background work */
+ uint64_t background_duration; /* Current command background time duration.
+ Used for measuring latency of blocking cmds */
+} RedisModuleBlockedClient;
+
+/* This is a list of Module Auth Contexts. Each time a Module registers a callback, a new ctx is
+ * added to this list. Multiple modules can register auth callbacks and the same Module can have
+ * multiple auth callbacks. */
+static list *moduleAuthCallbacks;
+
+static pthread_mutex_t moduleUnblockedClientsMutex = PTHREAD_MUTEX_INITIALIZER;
+static list *moduleUnblockedClients;
+
+/* Pool for temporary client objects. Creating and destroying a client object is
+ * costly. We manage a pool of clients to avoid this cost. Pool expands when
+ * more clients are needed and shrinks when unused. Please see modulesCron()
+ * for more details. */
+static client **moduleTempClients;
+static size_t moduleTempClientCap = 0;
+static size_t moduleTempClientCount = 0; /* Client count in pool */
+static size_t moduleTempClientMinCount = 0; /* Min client count in pool since
+ the last cron. */
+
+/* We need a mutex that is unlocked / relocked in beforeSleep() in order to
+ * allow thread safe contexts to execute commands at a safe moment. */
+static pthread_mutex_t moduleGIL = PTHREAD_MUTEX_INITIALIZER;
+
+
+/* Function pointer type for keyspace event notification subscriptions from modules. */
+typedef int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key);
+
+/* Function pointer type for post jobs */
+typedef void (*RedisModulePostNotificationJobFunc) (RedisModuleCtx *ctx, void *pd);
+
+/* Keyspace notification subscriber information.
+ * See RM_SubscribeToKeyspaceEvents() for more information. */
+typedef struct RedisModuleKeyspaceSubscriber {
+ /* The module subscribed to the event */
+ RedisModule *module;
+ /* Notification callback in the module*/
+ RedisModuleNotificationFunc notify_callback;
+ /* A bit mask of the events the module is interested in */
+ int event_mask;
+ /* Active flag set on entry, to avoid reentrant subscribers
+ * calling themselves */
+ int active;
+} RedisModuleKeyspaceSubscriber;
+
+typedef struct RedisModulePostExecUnitJob {
+ /* The module subscribed to the event */
+ RedisModule *module;
+ RedisModulePostNotificationJobFunc callback;
+ void *pd;
+ void (*free_pd)(void*);
+ int dbid;
+} RedisModulePostExecUnitJob;
+
+/* The module keyspace notification subscribers list */
+static list *moduleKeyspaceSubscribers;
+
+/* The module post keyspace jobs list */
+static list *modulePostExecUnitJobs;
+
+/* Data structures related to the exported dictionary data structure. */
+typedef struct RedisModuleDict {
+ rax *rax; /* The radix tree. */
+} RedisModuleDict;
+
+typedef struct RedisModuleDictIter {
+ RedisModuleDict *dict;
+ raxIterator ri;
+} RedisModuleDictIter;
+
+typedef struct RedisModuleCommandFilterCtx {
+ RedisModuleString **argv;
+ int argv_len;
+ int argc;
+ client *c;
+} RedisModuleCommandFilterCtx;
+
+typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCommandFilterCtx *filter);
+
+typedef struct RedisModuleCommandFilter {
+ /* The module that registered the filter */
+ RedisModule *module;
+ /* Filter callback function */
+ RedisModuleCommandFilterFunc callback;
+ /* REDISMODULE_CMDFILTER_* flags */
+ int flags;
+} RedisModuleCommandFilter;
+
+/* Registered filters */
+static list *moduleCommandFilters;
+
+typedef void (*RedisModuleForkDoneHandler) (int exitcode, int bysignal, void *user_data);
+
+static struct RedisModuleForkInfo {
+ RedisModuleForkDoneHandler done_handler;
+ void* done_handler_user_data;
+} moduleForkInfo = {0};
+
+typedef struct RedisModuleServerInfoData {
+ rax *rax; /* parsed info data. */
+} RedisModuleServerInfoData;
+
+/* Flags for moduleCreateArgvFromUserFormat(). */
+#define REDISMODULE_ARGV_REPLICATE (1<<0)
+#define REDISMODULE_ARGV_NO_AOF (1<<1)
+#define REDISMODULE_ARGV_NO_REPLICAS (1<<2)
+#define REDISMODULE_ARGV_RESP_3 (1<<3)
+#define REDISMODULE_ARGV_RESP_AUTO (1<<4)
+#define REDISMODULE_ARGV_RUN_AS_USER (1<<5)
+#define REDISMODULE_ARGV_SCRIPT_MODE (1<<6)
+#define REDISMODULE_ARGV_NO_WRITES (1<<7)
+#define REDISMODULE_ARGV_CALL_REPLIES_AS_ERRORS (1<<8)
+#define REDISMODULE_ARGV_RESPECT_DENY_OOM (1<<9)
+#define REDISMODULE_ARGV_DRY_RUN (1<<10)
+#define REDISMODULE_ARGV_ALLOW_BLOCK (1<<11)
+
+/* Determine whether Redis should signalModifiedKey implicitly.
+ * In case 'ctx' has no 'module' member (and therefore no module->options),
+ * we assume default behavior, that is, Redis signals.
+ * (see RM_GetThreadSafeContext) */
+#define SHOULD_SIGNAL_MODIFIED_KEYS(ctx) \
+ ((ctx)->module? !((ctx)->module->options & REDISMODULE_OPTION_NO_IMPLICIT_SIGNAL_MODIFIED) : 1)
+
+/* Server events hooks data structures and defines: this modules API
+ * allow modules to subscribe to certain events in Redis, such as
+ * the start and end of an RDB or AOF save, the change of role in replication,
+ * and similar other events. */
+
+typedef struct RedisModuleEventListener {
+ RedisModule *module;
+ RedisModuleEvent event;
+ RedisModuleEventCallback callback;
+} RedisModuleEventListener;
+
+list *RedisModule_EventListeners; /* Global list of all the active events. */
+
+/* Data structures related to the redis module users */
+
+/* This is the object returned by RM_CreateModuleUser(). The module API is
+ * able to create users, set ACLs to such users, and later authenticate
+ * clients using such newly created users. */
+typedef struct RedisModuleUser {
+ user *user; /* Reference to the real redis user */
+ int free_user; /* Indicates that user should also be freed when this object is freed */
+} RedisModuleUser;
+
+/* This is a structure used to export some meta-information such as dbid to the module. */
+typedef struct RedisModuleKeyOptCtx {
+ struct redisObject *from_key, *to_key; /* Optional name of key processed, NULL when unknown.
+ In most cases, only 'from_key' is valid, but in callbacks
+ such as `copy2`, both 'from_key' and 'to_key' are valid. */
+ int from_dbid, to_dbid; /* The dbid of the key being processed, -1 when unknown.
+ In most cases, only 'from_dbid' is valid, but in callbacks such
+ as `copy2`, 'from_dbid' and 'to_dbid' are both valid. */
+} RedisModuleKeyOptCtx;
+
+/* Data structures related to redis module configurations */
+/* The function signatures for module config get callbacks. These are identical to the ones exposed in redismodule.h. */
+typedef RedisModuleString * (*RedisModuleConfigGetStringFunc)(const char *name, void *privdata);
+typedef long long (*RedisModuleConfigGetNumericFunc)(const char *name, void *privdata);
+typedef int (*RedisModuleConfigGetBoolFunc)(const char *name, void *privdata);
+typedef int (*RedisModuleConfigGetEnumFunc)(const char *name, void *privdata);
+/* The function signatures for module config set callbacks. These are identical to the ones exposed in redismodule.h. */
+typedef int (*RedisModuleConfigSetStringFunc)(const char *name, RedisModuleString *val, void *privdata, RedisModuleString **err);
+typedef int (*RedisModuleConfigSetNumericFunc)(const char *name, long long val, void *privdata, RedisModuleString **err);
+typedef int (*RedisModuleConfigSetBoolFunc)(const char *name, int val, void *privdata, RedisModuleString **err);
+typedef int (*RedisModuleConfigSetEnumFunc)(const char *name, int val, void *privdata, RedisModuleString **err);
+/* Apply signature, identical to redismodule.h */
+typedef int (*RedisModuleConfigApplyFunc)(RedisModuleCtx *ctx, void *privdata, RedisModuleString **err);
+
+/* Struct representing a module config. These are stored in a list in the module struct */
+struct ModuleConfig {
+ sds name; /* Name of config without the module name appended to the front */
+ void *privdata; /* Optional data passed into the module config callbacks */
+ union get_fn { /* The get callback specified by the module */
+ RedisModuleConfigGetStringFunc get_string;
+ RedisModuleConfigGetNumericFunc get_numeric;
+ RedisModuleConfigGetBoolFunc get_bool;
+ RedisModuleConfigGetEnumFunc get_enum;
+ } get_fn;
+ union set_fn { /* The set callback specified by the module */
+ RedisModuleConfigSetStringFunc set_string;
+ RedisModuleConfigSetNumericFunc set_numeric;
+ RedisModuleConfigSetBoolFunc set_bool;
+ RedisModuleConfigSetEnumFunc set_enum;
+ } set_fn;
+ RedisModuleConfigApplyFunc apply_fn;
+ RedisModule *module;
+};
+
+typedef struct RedisModuleAsyncRMCallPromise{
+ size_t ref_count;
+ void *private_data;
+ RedisModule *module;
+ RedisModuleOnUnblocked on_unblocked;
+ client *c;
+ RedisModuleCtx *ctx;
+} RedisModuleAsyncRMCallPromise;
+
+/* --------------------------------------------------------------------------
+ * Prototypes
+ * -------------------------------------------------------------------------- */
+
+void RM_FreeCallReply(RedisModuleCallReply *reply);
+void RM_CloseKey(RedisModuleKey *key);
+void autoMemoryCollect(RedisModuleCtx *ctx);
+robj **moduleCreateArgvFromUserFormat(const char *cmdname, const char *fmt, int *argcp, int *flags, va_list ap);
+void RM_ZsetRangeStop(RedisModuleKey *kp);
+static void zsetKeyReset(RedisModuleKey *key);
+static void moduleInitKeyTypeSpecific(RedisModuleKey *key);
+void RM_FreeDict(RedisModuleCtx *ctx, RedisModuleDict *d);
+void RM_FreeServerInfo(RedisModuleCtx *ctx, RedisModuleServerInfoData *data);
+
+/* Helpers for RM_SetCommandInfo. */
+static int moduleValidateCommandInfo(const RedisModuleCommandInfo *info);
+static int64_t moduleConvertKeySpecsFlags(int64_t flags, int from_api);
+static int moduleValidateCommandArgs(RedisModuleCommandArg *args,
+ const RedisModuleCommandInfoVersion *version);
+static struct redisCommandArg *moduleCopyCommandArgs(RedisModuleCommandArg *args,
+ const RedisModuleCommandInfoVersion *version);
+static redisCommandArgType moduleConvertArgType(RedisModuleCommandArgType type, int *error);
+static int moduleConvertArgFlags(int flags);
+void moduleCreateContext(RedisModuleCtx *out_ctx, RedisModule *module, int ctx_flags);
+/* --------------------------------------------------------------------------
+ * ## Heap allocation raw functions
+ *
+ * Memory allocated with these functions are taken into account by Redis key
+ * eviction algorithms and are reported in Redis memory usage information.
+ * -------------------------------------------------------------------------- */
+
+/* Use like malloc(). Memory allocated with this function is reported in
+ * Redis INFO memory, used for keys eviction according to maxmemory settings
+ * and in general is taken into account as memory allocated by Redis.
+ * You should avoid using malloc().
+ * This function panics if unable to allocate enough memory. */
+void *RM_Alloc(size_t bytes) {
+ /* Use 'zmalloc_usable()' instead of 'zmalloc()' to allow the compiler
+ * to recognize the additional memory size, which means that modules can
+ * use the memory reported by 'RM_MallocUsableSize()' safely. In theory this
+ * isn't really needed since this API can't be inlined (not even for embedded
+ * modules like TLS (we use function pointers for module APIs), and the API doesn't
+ * have the malloc_size attribute, but it's hard to predict how smart future compilers
+ * will be, so better safe than sorry. */
+ return zmalloc_usable(bytes,NULL);
+}
+
+/* Similar to RM_Alloc, but returns NULL in case of allocation failure, instead
+ * of panicking. */
+void *RM_TryAlloc(size_t bytes) {
+ return ztrymalloc_usable(bytes,NULL);
+}
+
+/* Use like calloc(). Memory allocated with this function is reported in
+ * Redis INFO memory, used for keys eviction according to maxmemory settings
+ * and in general is taken into account as memory allocated by Redis.
+ * You should avoid using calloc() directly. */
+void *RM_Calloc(size_t nmemb, size_t size) {
+ return zcalloc_usable(nmemb*size,NULL);
+}
+
+/* Use like realloc() for memory obtained with RedisModule_Alloc(). */
+void* RM_Realloc(void *ptr, size_t bytes) {
+ return zrealloc_usable(ptr,bytes,NULL);
+}
+
+/* Use like free() for memory obtained by RedisModule_Alloc() and
+ * RedisModule_Realloc(). However you should never try to free with
+ * RedisModule_Free() memory allocated with malloc() inside your module. */
+void RM_Free(void *ptr) {
+ zfree(ptr);
+}
+
+/* Like strdup() but returns memory allocated with RedisModule_Alloc(). */
+char *RM_Strdup(const char *str) {
+ return zstrdup(str);
+}
+
+/* --------------------------------------------------------------------------
+ * Pool allocator
+ * -------------------------------------------------------------------------- */
+
+/* Release the chain of blocks used for pool allocations. */
+void poolAllocRelease(RedisModuleCtx *ctx) {
+ RedisModulePoolAllocBlock *head = ctx->pa_head, *next;
+
+ while(head != NULL) {
+ next = head->next;
+ zfree(head);
+ head = next;
+ }
+ ctx->pa_head = NULL;
+}
+
+/* Return heap allocated memory that will be freed automatically when the
+ * module callback function returns. Mostly suitable for small allocations
+ * that are short living and must be released when the callback returns
+ * anyway. The returned memory is aligned to the architecture word size
+ * if at least word size bytes are requested, otherwise it is just
+ * aligned to the next power of two, so for example a 3 bytes request is
+ * 4 bytes aligned while a 2 bytes request is 2 bytes aligned.
+ *
+ * There is no realloc style function since when this is needed to use the
+ * pool allocator is not a good idea.
+ *
+ * The function returns NULL if `bytes` is 0. */
+void *RM_PoolAlloc(RedisModuleCtx *ctx, size_t bytes) {
+ if (bytes == 0) return NULL;
+ RedisModulePoolAllocBlock *b = ctx->pa_head;
+ size_t left = b ? b->size - b->used : 0;
+
+ /* Fix alignment. */
+ if (left >= bytes) {
+ size_t alignment = REDISMODULE_POOL_ALLOC_ALIGN;
+ while (bytes < alignment && alignment/2 >= bytes) alignment /= 2;
+ if (b->used % alignment)
+ b->used += alignment - (b->used % alignment);
+ left = (b->used > b->size) ? 0 : b->size - b->used;
+ }
+
+ /* Create a new block if needed. */
+ if (left < bytes) {
+ size_t blocksize = REDISMODULE_POOL_ALLOC_MIN_SIZE;
+ if (blocksize < bytes) blocksize = bytes;
+ b = zmalloc(sizeof(*b) + blocksize);
+ b->size = blocksize;
+ b->used = 0;
+ b->next = ctx->pa_head;
+ ctx->pa_head = b;
+ }
+
+ char *retval = b->memory + b->used;
+ b->used += bytes;
+ return retval;
+}
+
+/* --------------------------------------------------------------------------
+ * Helpers for modules API implementation
+ * -------------------------------------------------------------------------- */
+
+client *moduleAllocTempClient(void) {
+ client *c = NULL;
+
+ if (moduleTempClientCount > 0) {
+ c = moduleTempClients[--moduleTempClientCount];
+ if (moduleTempClientCount < moduleTempClientMinCount)
+ moduleTempClientMinCount = moduleTempClientCount;
+ } else {
+ c = createClient(NULL);
+ c->flags |= CLIENT_MODULE;
+ c->user = NULL; /* Root user */
+ }
+ return c;
+}
+
+static void freeRedisModuleAsyncRMCallPromise(RedisModuleAsyncRMCallPromise *promise) {
+ if (--promise->ref_count > 0) {
+ return;
+ }
+ /* When the promise is finally freed it can not have a client attached to it.
+ * Either releasing the client or RM_CallReplyPromiseAbort would have removed it. */
+ serverAssert(!promise->c);
+ zfree(promise);
+}
+
+void moduleReleaseTempClient(client *c) {
+ if (moduleTempClientCount == moduleTempClientCap) {
+ moduleTempClientCap = moduleTempClientCap ? moduleTempClientCap*2 : 32;
+ moduleTempClients = zrealloc(moduleTempClients, sizeof(c)*moduleTempClientCap);
+ }
+ clearClientConnectionState(c);
+ listEmpty(c->reply);
+ c->reply_bytes = 0;
+ c->duration = 0;
+ resetClient(c);
+ c->bufpos = 0;
+ c->flags = CLIENT_MODULE;
+ c->user = NULL; /* Root user */
+ c->cmd = c->lastcmd = c->realcmd = NULL;
+ if (c->bstate.async_rm_call_handle) {
+ RedisModuleAsyncRMCallPromise *promise = c->bstate.async_rm_call_handle;
+ promise->c = NULL; /* Remove the client from the promise so it will no longer be possible to abort it. */
+ freeRedisModuleAsyncRMCallPromise(promise);
+ c->bstate.async_rm_call_handle = NULL;
+ }
+ moduleTempClients[moduleTempClientCount++] = c;
+}
+
+/* Create an empty key of the specified type. `key` must point to a key object
+ * opened for writing where the `.value` member is set to NULL because the
+ * key was found to be non existing.
+ *
+ * On success REDISMODULE_OK is returned and the key is populated with
+ * the value of the specified type. The function fails and returns
+ * REDISMODULE_ERR if:
+ *
+ * 1. The key is not open for writing.
+ * 2. The key is not empty.
+ * 3. The specified type is unknown.
+ */
+int moduleCreateEmptyKey(RedisModuleKey *key, int type) {
+ robj *obj;
+
+ /* The key must be open for writing and non existing to proceed. */
+ if (!(key->mode & REDISMODULE_WRITE) || key->value)
+ return REDISMODULE_ERR;
+
+ switch(type) {
+ case REDISMODULE_KEYTYPE_LIST:
+ obj = createListListpackObject();
+ break;
+ case REDISMODULE_KEYTYPE_ZSET:
+ obj = createZsetListpackObject();
+ break;
+ case REDISMODULE_KEYTYPE_HASH:
+ obj = createHashObject();
+ break;
+ case REDISMODULE_KEYTYPE_STREAM:
+ obj = createStreamObject();
+ break;
+ default: return REDISMODULE_ERR;
+ }
+ dbAdd(key->db,key->key,obj);
+ key->value = obj;
+ moduleInitKeyTypeSpecific(key);
+ return REDISMODULE_OK;
+}
+
+/* Frees key->iter and sets it to NULL. */
+static void moduleFreeKeyIterator(RedisModuleKey *key) {
+ serverAssert(key->iter != NULL);
+ switch (key->value->type) {
+ case OBJ_LIST: listTypeReleaseIterator(key->iter); break;
+ case OBJ_STREAM:
+ streamIteratorStop(key->iter);
+ zfree(key->iter);
+ break;
+ default: serverAssert(0); /* No key->iter for other types. */
+ }
+ key->iter = NULL;
+}
+
+/* Callback for listTypeTryConversion().
+ * Frees list iterator and sets it to NULL. */
+static void moduleFreeListIterator(void *data) {
+ RedisModuleKey *key = (RedisModuleKey*)data;
+ serverAssert(key->value->type == OBJ_LIST);
+ if (key->iter) moduleFreeKeyIterator(key);
+}
+
+/* This function is called in low-level API implementation functions in order
+ * to check if the value associated with the key remained empty after an
+ * operation that removed elements from an aggregate data type.
+ *
+ * If this happens, the key is deleted from the DB and the key object state
+ * is set to the right one in order to be targeted again by write operations
+ * possibly recreating the key if needed.
+ *
+ * The function returns 1 if the key value object is found empty and is
+ * deleted, otherwise 0 is returned. */
+int moduleDelKeyIfEmpty(RedisModuleKey *key) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->value == NULL) return 0;
+ int isempty;
+ robj *o = key->value;
+
+ switch(o->type) {
+ case OBJ_LIST: isempty = listTypeLength(o) == 0; break;
+ case OBJ_SET: isempty = setTypeSize(o) == 0; break;
+ case OBJ_ZSET: isempty = zsetLength(o) == 0; break;
+ case OBJ_HASH: isempty = hashTypeLength(o) == 0; break;
+ case OBJ_STREAM: isempty = streamLength(o) == 0; break;
+ default: isempty = 0;
+ }
+
+ if (isempty) {
+ if (key->iter) moduleFreeKeyIterator(key);
+ dbDelete(key->db,key->key);
+ key->value = NULL;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Service API exported to modules
+ *
+ * Note that all the exported APIs are called RM_<funcname> in the core
+ * and RedisModule_<funcname> in the module side (defined as function
+ * pointers in redismodule.h). In this way the dynamic linker does not
+ * mess with our global function pointers, overriding it with the symbols
+ * defined in the main executable having the same names.
+ * -------------------------------------------------------------------------- */
+
+int RM_GetApi(const char *funcname, void **targetPtrPtr) {
+ /* Lookup the requested module API and store the function pointer into the
+ * target pointer. The function returns REDISMODULE_ERR if there is no such
+ * named API, otherwise REDISMODULE_OK.
+ *
+ * This function is not meant to be used by modules developer, it is only
+ * used implicitly by including redismodule.h. */
+ dictEntry *he = dictFind(server.moduleapi, funcname);
+ if (!he) return REDISMODULE_ERR;
+ *targetPtrPtr = dictGetVal(he);
+ return REDISMODULE_OK;
+}
+
+void modulePostExecutionUnitOperations(void) {
+ if (server.execution_nesting)
+ return;
+
+ if (server.busy_module_yield_flags) {
+ blockingOperationEnds();
+ server.busy_module_yield_flags = BUSY_MODULE_YIELD_NONE;
+ if (server.current_client)
+ unprotectClient(server.current_client);
+ unblockPostponedClients();
+ }
+}
+
+/* Free the context after the user function was called. */
+void moduleFreeContext(RedisModuleCtx *ctx) {
+ /* See comment in moduleCreateContext */
+ if (!(ctx->flags & (REDISMODULE_CTX_THREAD_SAFE|REDISMODULE_CTX_COMMAND))) {
+ exitExecutionUnit();
+ postExecutionUnitOperations();
+ }
+ autoMemoryCollect(ctx);
+ poolAllocRelease(ctx);
+ if (ctx->postponed_arrays) {
+ zfree(ctx->postponed_arrays);
+ ctx->postponed_arrays_count = 0;
+ serverLog(LL_WARNING,
+ "API misuse detected in module %s: "
+ "RedisModule_ReplyWith*(REDISMODULE_POSTPONED_LEN) "
+ "not matched by the same number of RedisModule_SetReply*Len() "
+ "calls.",
+ ctx->module->name);
+ }
+ /* If this context has a temp client, we return it back to the pool.
+ * If this context created a new client (e.g detached context), we free it.
+ * If the client is assigned manually, e.g ctx->client = someClientInstance,
+ * none of these flags will be set and we do not attempt to free it. */
+ if (ctx->flags & REDISMODULE_CTX_TEMP_CLIENT)
+ moduleReleaseTempClient(ctx->client);
+ else if (ctx->flags & REDISMODULE_CTX_NEW_CLIENT)
+ freeClient(ctx->client);
+}
+
+static CallReply *moduleParseReply(client *c, RedisModuleCtx *ctx) {
+ /* Convert the result of the Redis command into a module reply. */
+ sds proto = sdsnewlen(c->buf,c->bufpos);
+ c->bufpos = 0;
+ while(listLength(c->reply)) {
+ clientReplyBlock *o = listNodeValue(listFirst(c->reply));
+
+ proto = sdscatlen(proto,o->buf,o->used);
+ listDelNode(c->reply,listFirst(c->reply));
+ }
+ CallReply *reply = callReplyCreate(proto, c->deferred_reply_errors, ctx);
+ c->deferred_reply_errors = NULL; /* now the responsibility of the reply object. */
+ return reply;
+}
+
+void moduleCallCommandUnblockedHandler(client *c) {
+ RedisModuleCtx ctx;
+ RedisModuleAsyncRMCallPromise *promise = c->bstate.async_rm_call_handle;
+ serverAssert(promise);
+ RedisModule *module = promise->module;
+ if (!promise->on_unblocked) {
+ moduleReleaseTempClient(c);
+ return; /* module did not set any unblock callback. */
+ }
+ moduleCreateContext(&ctx, module, REDISMODULE_CTX_TEMP_CLIENT);
+ selectDb(ctx.client, c->db->id);
+
+ CallReply *reply = moduleParseReply(c, NULL);
+ module->in_call++;
+ promise->on_unblocked(&ctx, reply, promise->private_data);
+ module->in_call--;
+
+ moduleFreeContext(&ctx);
+ moduleReleaseTempClient(c);
+}
+
+/* Create a module ctx and keep track of the nesting level.
+ *
+ * Note: When creating ctx for threads (RM_GetThreadSafeContext and
+ * RM_GetDetachedThreadSafeContext) we do not bump up the nesting level
+ * because we only need to track of nesting level in the main thread
+ * (only the main thread uses propagatePendingCommands) */
+void moduleCreateContext(RedisModuleCtx *out_ctx, RedisModule *module, int ctx_flags) {
+ memset(out_ctx, 0 ,sizeof(RedisModuleCtx));
+ out_ctx->getapifuncptr = (void*)(unsigned long)&RM_GetApi;
+ out_ctx->module = module;
+ out_ctx->flags = ctx_flags;
+ if (ctx_flags & REDISMODULE_CTX_TEMP_CLIENT)
+ out_ctx->client = moduleAllocTempClient();
+ else if (ctx_flags & REDISMODULE_CTX_NEW_CLIENT)
+ out_ctx->client = createClient(NULL);
+
+ /* Calculate the initial yield time for long blocked contexts.
+ * in loading we depend on the server hz, but in other cases we also wait
+ * for busy_reply_threshold.
+ * Note that in theory we could have started processing BUSY_MODULE_YIELD_EVENTS
+ * sooner, and only delay the processing for clients till the busy_reply_threshold,
+ * but this carries some overheads of frequently marking clients with BLOCKED_POSTPONE
+ * and releasing them, i.e. if modules only block for short periods. */
+ if (server.loading)
+ out_ctx->next_yield_time = getMonotonicUs() + 1000000 / server.hz;
+ else
+ out_ctx->next_yield_time = getMonotonicUs() + server.busy_reply_threshold * 1000;
+
+ /* Increment the execution_nesting counter (module is about to execute some code),
+ * except in the following cases:
+ * 1. We came here from cmd->proc (either call() or AOF load).
+ * In the former, the counter has been already incremented from within
+ * call() and in the latter we don't care about execution_nesting
+ * 2. If we are running in a thread (execution_nesting will be dealt with
+ * when locking/unlocking the GIL) */
+ if (!(ctx_flags & (REDISMODULE_CTX_THREAD_SAFE|REDISMODULE_CTX_COMMAND))) {
+ enterExecutionUnit(1, 0);
+ }
+}
+
+/* This Redis command binds the normal Redis command invocation with commands
+ * exported by modules. */
+void RedisModuleCommandDispatcher(client *c) {
+ RedisModuleCommand *cp = c->cmd->module_cmd;
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, cp->module, REDISMODULE_CTX_COMMAND);
+
+ ctx.client = c;
+ cp->func(&ctx,(void**)c->argv,c->argc);
+ moduleFreeContext(&ctx);
+
+ /* In some cases processMultibulkBuffer uses sdsMakeRoomFor to
+ * expand the query buffer, and in order to avoid a big object copy
+ * the query buffer SDS may be used directly as the SDS string backing
+ * the client argument vectors: sometimes this will result in the SDS
+ * string having unused space at the end. Later if a module takes ownership
+ * of the RedisString, such space will be wasted forever. Inside the
+ * Redis core this is not a problem because tryObjectEncoding() is called
+ * before storing strings in the key space. Here we need to do it
+ * for the module. */
+ for (int i = 0; i < c->argc; i++) {
+ /* Only do the work if the module took ownership of the object:
+ * in that case the refcount is no longer 1. */
+ if (c->argv[i]->refcount > 1)
+ trimStringObjectIfNeeded(c->argv[i], 0);
+ }
+}
+
+/* This function returns the list of keys, with the same interface as the
+ * 'getkeys' function of the native commands, for module commands that exported
+ * the "getkeys-api" flag during the registration. This is done when the
+ * list of keys are not at fixed positions, so that first/last/step cannot
+ * be used.
+ *
+ * In order to accomplish its work, the module command is called, flagging
+ * the context in a way that the command can recognize this is a special
+ * "get keys" call by calling RedisModule_IsKeysPositionRequest(ctx). */
+int moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ RedisModuleCommand *cp = cmd->module_cmd;
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, cp->module, REDISMODULE_CTX_KEYS_POS_REQUEST);
+
+ /* Initialize getKeysResult */
+ getKeysPrepareResult(result, MAX_KEYS_BUFFER);
+ ctx.keys_result = result;
+
+ cp->func(&ctx,(void**)argv,argc);
+ /* We currently always use the array allocated by RM_KeyAtPos() and don't try
+ * to optimize for the pre-allocated buffer.
+ */
+ moduleFreeContext(&ctx);
+ return result->numkeys;
+}
+
+/* This function returns the list of channels, with the same interface as
+ * moduleGetCommandKeysViaAPI, for modules that declare "getchannels-api"
+ * during registration. Unlike keys, this is the only way to declare channels. */
+int moduleGetCommandChannelsViaAPI(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) {
+ RedisModuleCommand *cp = cmd->module_cmd;
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, cp->module, REDISMODULE_CTX_CHANNELS_POS_REQUEST);
+
+ /* Initialize getKeysResult */
+ getKeysPrepareResult(result, MAX_KEYS_BUFFER);
+ ctx.keys_result = result;
+
+ cp->func(&ctx,(void**)argv,argc);
+ /* We currently always use the array allocated by RM_RM_ChannelAtPosWithFlags() and don't try
+ * to optimize for the pre-allocated buffer. */
+ moduleFreeContext(&ctx);
+ return result->numkeys;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Commands API
+ *
+ * These functions are used to implement custom Redis commands.
+ *
+ * For examples, see https://redis.io/topics/modules-intro.
+ * -------------------------------------------------------------------------- */
+
+/* Return non-zero if a module command, that was declared with the
+ * flag "getkeys-api", is called in a special way to get the keys positions
+ * and not to get executed. Otherwise zero is returned. */
+int RM_IsKeysPositionRequest(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_KEYS_POS_REQUEST) != 0;
+}
+
+/* When a module command is called in order to obtain the position of
+ * keys, since it was flagged as "getkeys-api" during the registration,
+ * the command implementation checks for this special call using the
+ * RedisModule_IsKeysPositionRequest() API and uses this function in
+ * order to report keys.
+ *
+ * The supported flags are the ones used by RM_SetCommandInfo, see REDISMODULE_CMD_KEY_*.
+ *
+ *
+ * The following is an example of how it could be used:
+ *
+ * if (RedisModule_IsKeysPositionRequest(ctx)) {
+ * RedisModule_KeyAtPosWithFlags(ctx, 2, REDISMODULE_CMD_KEY_RO | REDISMODULE_CMD_KEY_ACCESS);
+ * RedisModule_KeyAtPosWithFlags(ctx, 1, REDISMODULE_CMD_KEY_RW | REDISMODULE_CMD_KEY_UPDATE | REDISMODULE_CMD_KEY_ACCESS);
+ * }
+ *
+ * Note: in the example above the get keys API could have been handled by key-specs (preferred).
+ * Implementing the getkeys-api is required only when is it not possible to declare key-specs that cover all keys.
+ *
+ */
+void RM_KeyAtPosWithFlags(RedisModuleCtx *ctx, int pos, int flags) {
+ if (!(ctx->flags & REDISMODULE_CTX_KEYS_POS_REQUEST) || !ctx->keys_result) return;
+ if (pos <= 0) return;
+
+ getKeysResult *res = ctx->keys_result;
+
+ /* Check overflow */
+ if (res->numkeys == res->size) {
+ int newsize = res->size + (res->size > 8192 ? 8192 : res->size);
+ getKeysPrepareResult(res, newsize);
+ }
+
+ res->keys[res->numkeys].pos = pos;
+ res->keys[res->numkeys].flags = moduleConvertKeySpecsFlags(flags, 1);
+ res->numkeys++;
+}
+
+/* This API existed before RM_KeyAtPosWithFlags was added, now deprecated and
+ * can be used for compatibility with older versions, before key-specs and flags
+ * were introduced. */
+void RM_KeyAtPos(RedisModuleCtx *ctx, int pos) {
+ /* Default flags require full access */
+ int flags = moduleConvertKeySpecsFlags(CMD_KEY_FULL_ACCESS, 0);
+ RM_KeyAtPosWithFlags(ctx, pos, flags);
+}
+
+/* Return non-zero if a module command, that was declared with the
+ * flag "getchannels-api", is called in a special way to get the channel positions
+ * and not to get executed. Otherwise zero is returned. */
+int RM_IsChannelsPositionRequest(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_CHANNELS_POS_REQUEST) != 0;
+}
+
+/* When a module command is called in order to obtain the position of
+ * channels, since it was flagged as "getchannels-api" during the
+ * registration, the command implementation checks for this special call
+ * using the RedisModule_IsChannelsPositionRequest() API and uses this
+ * function in order to report the channels.
+ *
+ * The supported flags are:
+ * * REDISMODULE_CMD_CHANNEL_SUBSCRIBE: This command will subscribe to the channel.
+ * * REDISMODULE_CMD_CHANNEL_UNSUBSCRIBE: This command will unsubscribe from this channel.
+ * * REDISMODULE_CMD_CHANNEL_PUBLISH: This command will publish to this channel.
+ * * REDISMODULE_CMD_CHANNEL_PATTERN: Instead of acting on a specific channel, will act on any
+ * channel specified by the pattern. This is the same access
+ * used by the PSUBSCRIBE and PUNSUBSCRIBE commands available
+ * in Redis. Not intended to be used with PUBLISH permissions.
+ *
+ * The following is an example of how it could be used:
+ *
+ * if (RedisModule_IsChannelsPositionRequest(ctx)) {
+ * RedisModule_ChannelAtPosWithFlags(ctx, 1, REDISMODULE_CMD_CHANNEL_SUBSCRIBE | REDISMODULE_CMD_CHANNEL_PATTERN);
+ * RedisModule_ChannelAtPosWithFlags(ctx, 1, REDISMODULE_CMD_CHANNEL_PUBLISH);
+ * }
+ *
+ * Note: One usage of declaring channels is for evaluating ACL permissions. In this context,
+ * unsubscribing is always allowed, so commands will only be checked against subscribe and
+ * publish permissions. This is preferred over using RM_ACLCheckChannelPermissions, since
+ * it allows the ACLs to be checked before the command is executed. */
+void RM_ChannelAtPosWithFlags(RedisModuleCtx *ctx, int pos, int flags) {
+ if (!(ctx->flags & REDISMODULE_CTX_CHANNELS_POS_REQUEST) || !ctx->keys_result) return;
+ if (pos <= 0) return;
+
+ getKeysResult *res = ctx->keys_result;
+
+ /* Check overflow */
+ if (res->numkeys == res->size) {
+ int newsize = res->size + (res->size > 8192 ? 8192 : res->size);
+ getKeysPrepareResult(res, newsize);
+ }
+
+ int new_flags = 0;
+ if (flags & REDISMODULE_CMD_CHANNEL_SUBSCRIBE) new_flags |= CMD_CHANNEL_SUBSCRIBE;
+ if (flags & REDISMODULE_CMD_CHANNEL_UNSUBSCRIBE) new_flags |= CMD_CHANNEL_UNSUBSCRIBE;
+ if (flags & REDISMODULE_CMD_CHANNEL_PUBLISH) new_flags |= CMD_CHANNEL_PUBLISH;
+ if (flags & REDISMODULE_CMD_CHANNEL_PATTERN) new_flags |= CMD_CHANNEL_PATTERN;
+
+ res->keys[res->numkeys].pos = pos;
+ res->keys[res->numkeys].flags = new_flags;
+ res->numkeys++;
+}
+
+/* Returns 1 if name is valid, otherwise returns 0.
+ *
+ * We want to block some chars in module command names that we know can
+ * mess things up.
+ *
+ * There are these characters:
+ * ' ' (space) - issues with old inline protocol.
+ * '\r', '\n' (newline) - can mess up the protocol on acl error replies.
+ * '|' - sub-commands.
+ * '@' - ACL categories.
+ * '=', ',' - info and client list fields (':' handled by getSafeInfoString).
+ * */
+int isCommandNameValid(const char *name) {
+ const char *block_chars = " \r\n|@=,";
+
+ if (strpbrk(name, block_chars))
+ return 0;
+ return 1;
+}
+
+/* Helper for RM_CreateCommand(). Turns a string representing command
+ * flags into the command flags used by the Redis core.
+ *
+ * It returns the set of flags, or -1 if unknown flags are found. */
+int64_t commandFlagsFromString(char *s) {
+ int count, j;
+ int64_t flags = 0;
+ sds *tokens = sdssplitlen(s,strlen(s)," ",1,&count);
+ for (j = 0; j < count; j++) {
+ char *t = tokens[j];
+ if (!strcasecmp(t,"write")) flags |= CMD_WRITE;
+ else if (!strcasecmp(t,"readonly")) flags |= CMD_READONLY;
+ else if (!strcasecmp(t,"admin")) flags |= CMD_ADMIN;
+ else if (!strcasecmp(t,"deny-oom")) flags |= CMD_DENYOOM;
+ else if (!strcasecmp(t,"deny-script")) flags |= CMD_NOSCRIPT;
+ else if (!strcasecmp(t,"allow-loading")) flags |= CMD_LOADING;
+ else if (!strcasecmp(t,"pubsub")) flags |= CMD_PUBSUB;
+ else if (!strcasecmp(t,"random")) { /* Deprecated. Silently ignore. */ }
+ else if (!strcasecmp(t,"blocking")) flags |= CMD_BLOCKING;
+ else if (!strcasecmp(t,"allow-stale")) flags |= CMD_STALE;
+ else if (!strcasecmp(t,"no-monitor")) flags |= CMD_SKIP_MONITOR;
+ else if (!strcasecmp(t,"no-slowlog")) flags |= CMD_SKIP_SLOWLOG;
+ else if (!strcasecmp(t,"fast")) flags |= CMD_FAST;
+ else if (!strcasecmp(t,"no-auth")) flags |= CMD_NO_AUTH;
+ else if (!strcasecmp(t,"may-replicate")) flags |= CMD_MAY_REPLICATE;
+ else if (!strcasecmp(t,"getkeys-api")) flags |= CMD_MODULE_GETKEYS;
+ else if (!strcasecmp(t,"getchannels-api")) flags |= CMD_MODULE_GETCHANNELS;
+ else if (!strcasecmp(t,"no-cluster")) flags |= CMD_MODULE_NO_CLUSTER;
+ else if (!strcasecmp(t,"no-mandatory-keys")) flags |= CMD_NO_MANDATORY_KEYS;
+ else if (!strcasecmp(t,"allow-busy")) flags |= CMD_ALLOW_BUSY;
+ else break;
+ }
+ sdsfreesplitres(tokens,count);
+ if (j != count) return -1; /* Some token not processed correctly. */
+ return flags;
+}
+
+RedisModuleCommand *moduleCreateCommandProxy(struct RedisModule *module, sds declared_name, sds fullname, RedisModuleCmdFunc cmdfunc, int64_t flags, int firstkey, int lastkey, int keystep);
+
+/* Register a new command in the Redis server, that will be handled by
+ * calling the function pointer 'cmdfunc' using the RedisModule calling
+ * convention.
+ *
+ * The function returns REDISMODULE_ERR in these cases:
+ * - If creation of module command is called outside the RedisModule_OnLoad.
+ * - The specified command is already busy.
+ * - The command name contains some chars that are not allowed.
+ * - A set of invalid flags were passed.
+ *
+ * Otherwise REDISMODULE_OK is returned and the new command is registered.
+ *
+ * This function must be called during the initialization of the module
+ * inside the RedisModule_OnLoad() function. Calling this function outside
+ * of the initialization function is not defined.
+ *
+ * The command function type is the following:
+ *
+ * int MyCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc);
+ *
+ * And is supposed to always return REDISMODULE_OK.
+ *
+ * The set of flags 'strflags' specify the behavior of the command, and should
+ * be passed as a C string composed of space separated words, like for
+ * example "write deny-oom". The set of flags are:
+ *
+ * * **"write"**: The command may modify the data set (it may also read
+ * from it).
+ * * **"readonly"**: The command returns data from keys but never writes.
+ * * **"admin"**: The command is an administrative command (may change
+ * replication or perform similar tasks).
+ * * **"deny-oom"**: The command may use additional memory and should be
+ * denied during out of memory conditions.
+ * * **"deny-script"**: Don't allow this command in Lua scripts.
+ * * **"allow-loading"**: Allow this command while the server is loading data.
+ * Only commands not interacting with the data set
+ * should be allowed to run in this mode. If not sure
+ * don't use this flag.
+ * * **"pubsub"**: The command publishes things on Pub/Sub channels.
+ * * **"random"**: The command may have different outputs even starting
+ * from the same input arguments and key values.
+ * Starting from Redis 7.0 this flag has been deprecated.
+ * Declaring a command as "random" can be done using
+ * command tips, see https://redis.io/topics/command-tips.
+ * * **"allow-stale"**: The command is allowed to run on slaves that don't
+ * serve stale data. Don't use if you don't know what
+ * this means.
+ * * **"no-monitor"**: Don't propagate the command on monitor. Use this if
+ * the command has sensitive data among the arguments.
+ * * **"no-slowlog"**: Don't log this command in the slowlog. Use this if
+ * the command has sensitive data among the arguments.
+ * * **"fast"**: The command time complexity is not greater
+ * than O(log(N)) where N is the size of the collection or
+ * anything else representing the normal scalability
+ * issue with the command.
+ * * **"getkeys-api"**: The command implements the interface to return
+ * the arguments that are keys. Used when start/stop/step
+ * is not enough because of the command syntax.
+ * * **"no-cluster"**: The command should not register in Redis Cluster
+ * since is not designed to work with it because, for
+ * example, is unable to report the position of the
+ * keys, programmatically creates key names, or any
+ * other reason.
+ * * **"no-auth"**: This command can be run by an un-authenticated client.
+ * Normally this is used by a command that is used
+ * to authenticate a client.
+ * * **"may-replicate"**: This command may generate replication traffic, even
+ * though it's not a write command.
+ * * **"no-mandatory-keys"**: All the keys this command may take are optional
+ * * **"blocking"**: The command has the potential to block the client.
+ * * **"allow-busy"**: Permit the command while the server is blocked either by
+ * a script or by a slow module command, see
+ * RM_Yield.
+ * * **"getchannels-api"**: The command implements the interface to return
+ * the arguments that are channels.
+ *
+ * The last three parameters specify which arguments of the new command are
+ * Redis keys. See https://redis.io/commands/command for more information.
+ *
+ * * `firstkey`: One-based index of the first argument that's a key.
+ * Position 0 is always the command name itself.
+ * 0 for commands with no keys.
+ * * `lastkey`: One-based index of the last argument that's a key.
+ * Negative numbers refer to counting backwards from the last
+ * argument (-1 means the last argument provided)
+ * 0 for commands with no keys.
+ * * `keystep`: Step between first and last key indexes.
+ * 0 for commands with no keys.
+ *
+ * This information is used by ACL, Cluster and the `COMMAND` command.
+ *
+ * NOTE: The scheme described above serves a limited purpose and can
+ * only be used to find keys that exist at constant indices.
+ * For non-trivial key arguments, you may pass 0,0,0 and use
+ * RedisModule_SetCommandInfo to set key specs using a more advanced scheme and use
+ * RedisModule_SetCommandACLCategories to set Redis ACL categories of the commands. */
+int RM_CreateCommand(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep) {
+ if (!ctx->module->onload)
+ return REDISMODULE_ERR;
+ int64_t flags = strflags ? commandFlagsFromString((char*)strflags) : 0;
+ if (flags == -1) return REDISMODULE_ERR;
+ if ((flags & CMD_MODULE_NO_CLUSTER) && server.cluster_enabled)
+ return REDISMODULE_ERR;
+
+ /* Check if the command name is valid. */
+ if (!isCommandNameValid(name))
+ return REDISMODULE_ERR;
+
+ /* Check if the command name is busy. */
+ if (lookupCommandByCString(name) != NULL)
+ return REDISMODULE_ERR;
+
+ sds declared_name = sdsnew(name);
+ RedisModuleCommand *cp = moduleCreateCommandProxy(ctx->module, declared_name, sdsdup(declared_name), cmdfunc, flags, firstkey, lastkey, keystep);
+ cp->rediscmd->arity = cmdfunc ? -1 : -2; /* Default value, can be changed later via dedicated API */
+
+ serverAssert(dictAdd(server.commands, sdsdup(declared_name), cp->rediscmd) == DICT_OK);
+ serverAssert(dictAdd(server.orig_commands, sdsdup(declared_name), cp->rediscmd) == DICT_OK);
+ cp->rediscmd->id = ACLGetCommandID(declared_name); /* ID used for ACL. */
+ return REDISMODULE_OK;
+}
+
+/* A proxy that help create a module command / subcommand.
+ *
+ * 'declared_name': it contains the sub_name, which is just the fullname for non-subcommands.
+ * 'fullname': sds string representing the command fullname.
+ *
+ * Function will take the ownership of both 'declared_name' and 'fullname' SDS.
+ */
+RedisModuleCommand *moduleCreateCommandProxy(struct RedisModule *module, sds declared_name, sds fullname, RedisModuleCmdFunc cmdfunc, int64_t flags, int firstkey, int lastkey, int keystep) {
+ struct redisCommand *rediscmd;
+ RedisModuleCommand *cp;
+
+ /* Create a command "proxy", which is a structure that is referenced
+ * in the command table, so that the generic command that works as
+ * binding between modules and Redis, can know what function to call
+ * and what the module is. */
+ cp = zcalloc(sizeof(*cp));
+ cp->module = module;
+ cp->func = cmdfunc;
+ cp->rediscmd = zcalloc(sizeof(*rediscmd));
+ cp->rediscmd->declared_name = declared_name; /* SDS for module commands */
+ cp->rediscmd->fullname = fullname;
+ cp->rediscmd->group = COMMAND_GROUP_MODULE;
+ cp->rediscmd->proc = RedisModuleCommandDispatcher;
+ cp->rediscmd->flags = flags | CMD_MODULE;
+ cp->rediscmd->module_cmd = cp;
+ if (firstkey != 0) {
+ cp->rediscmd->key_specs_num = 1;
+ cp->rediscmd->key_specs = zcalloc(sizeof(keySpec));
+ cp->rediscmd->key_specs[0].flags = CMD_KEY_FULL_ACCESS;
+ if (flags & CMD_MODULE_GETKEYS)
+ cp->rediscmd->key_specs[0].flags |= CMD_KEY_VARIABLE_FLAGS;
+ cp->rediscmd->key_specs[0].begin_search_type = KSPEC_BS_INDEX;
+ cp->rediscmd->key_specs[0].bs.index.pos = firstkey;
+ cp->rediscmd->key_specs[0].find_keys_type = KSPEC_FK_RANGE;
+ cp->rediscmd->key_specs[0].fk.range.lastkey = lastkey < 0 ? lastkey : (lastkey-firstkey);
+ cp->rediscmd->key_specs[0].fk.range.keystep = keystep;
+ cp->rediscmd->key_specs[0].fk.range.limit = 0;
+ } else {
+ cp->rediscmd->key_specs_num = 0;
+ cp->rediscmd->key_specs = NULL;
+ }
+ populateCommandLegacyRangeSpec(cp->rediscmd);
+ cp->rediscmd->microseconds = 0;
+ cp->rediscmd->calls = 0;
+ cp->rediscmd->rejected_calls = 0;
+ cp->rediscmd->failed_calls = 0;
+ return cp;
+}
+
+/* Get an opaque structure, representing a module command, by command name.
+ * This structure is used in some of the command-related APIs.
+ *
+ * NULL is returned in case of the following errors:
+ *
+ * * Command not found
+ * * The command is not a module command
+ * * The command doesn't belong to the calling module
+ */
+RedisModuleCommand *RM_GetCommand(RedisModuleCtx *ctx, const char *name) {
+ struct redisCommand *cmd = lookupCommandByCString(name);
+
+ if (!cmd || !(cmd->flags & CMD_MODULE))
+ return NULL;
+
+ RedisModuleCommand *cp = cmd->module_cmd;
+ if (cp->module != ctx->module)
+ return NULL;
+
+ return cp;
+}
+
+/* Very similar to RedisModule_CreateCommand except that it is used to create
+ * a subcommand, associated with another, container, command.
+ *
+ * Example: If a module has a configuration command, MODULE.CONFIG, then
+ * GET and SET should be individual subcommands, while MODULE.CONFIG is
+ * a command, but should not be registered with a valid `funcptr`:
+ *
+ * if (RedisModule_CreateCommand(ctx,"module.config",NULL,"",0,0,0) == REDISMODULE_ERR)
+ * return REDISMODULE_ERR;
+ *
+ * RedisModuleCommand *parent = RedisModule_GetCommand(ctx,,"module.config");
+ *
+ * if (RedisModule_CreateSubcommand(parent,"set",cmd_config_set,"",0,0,0) == REDISMODULE_ERR)
+ * return REDISMODULE_ERR;
+ *
+ * if (RedisModule_CreateSubcommand(parent,"get",cmd_config_get,"",0,0,0) == REDISMODULE_ERR)
+ * return REDISMODULE_ERR;
+ *
+ * Returns REDISMODULE_OK on success and REDISMODULE_ERR in case of the following errors:
+ *
+ * * Error while parsing `strflags`
+ * * Command is marked as `no-cluster` but cluster mode is enabled
+ * * `parent` is already a subcommand (we do not allow more than one level of command nesting)
+ * * `parent` is a command with an implementation (RedisModuleCmdFunc) (A parent command should be a pure container of subcommands)
+ * * `parent` already has a subcommand called `name`
+ * * Creating a subcommand is called outside of RedisModule_OnLoad.
+ */
+int RM_CreateSubcommand(RedisModuleCommand *parent, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep) {
+ if (!parent->module->onload)
+ return REDISMODULE_ERR;
+ int64_t flags = strflags ? commandFlagsFromString((char*)strflags) : 0;
+ if (flags == -1) return REDISMODULE_ERR;
+ if ((flags & CMD_MODULE_NO_CLUSTER) && server.cluster_enabled)
+ return REDISMODULE_ERR;
+
+ struct redisCommand *parent_cmd = parent->rediscmd;
+
+ if (parent_cmd->parent)
+ return REDISMODULE_ERR; /* We don't allow more than one level of subcommands */
+
+ RedisModuleCommand *parent_cp = parent_cmd->module_cmd;
+ if (parent_cp->func)
+ return REDISMODULE_ERR; /* A parent command should be a pure container of subcommands */
+
+ /* Check if the command name is valid. */
+ if (!isCommandNameValid(name))
+ return REDISMODULE_ERR;
+
+ /* Check if the command name is busy within the parent command. */
+ sds declared_name = sdsnew(name);
+ if (parent_cmd->subcommands_dict && lookupSubcommand(parent_cmd, declared_name) != NULL) {
+ sdsfree(declared_name);
+ return REDISMODULE_ERR;
+ }
+
+ sds fullname = catSubCommandFullname(parent_cmd->fullname, name);
+ RedisModuleCommand *cp = moduleCreateCommandProxy(parent->module, declared_name, fullname, cmdfunc, flags, firstkey, lastkey, keystep);
+ cp->rediscmd->arity = -2;
+
+ commandAddSubcommand(parent_cmd, cp->rediscmd, name);
+ return REDISMODULE_OK;
+}
+
+/* Accessors of array elements of structs where the element size is stored
+ * separately in the version struct. */
+static RedisModuleCommandHistoryEntry *
+moduleCmdHistoryEntryAt(const RedisModuleCommandInfoVersion *version,
+ RedisModuleCommandHistoryEntry *entries, int index) {
+ off_t offset = index * version->sizeof_historyentry;
+ return (RedisModuleCommandHistoryEntry *)((char *)(entries) + offset);
+}
+static RedisModuleCommandKeySpec *
+moduleCmdKeySpecAt(const RedisModuleCommandInfoVersion *version,
+ RedisModuleCommandKeySpec *keyspecs, int index) {
+ off_t offset = index * version->sizeof_keyspec;
+ return (RedisModuleCommandKeySpec *)((char *)(keyspecs) + offset);
+}
+static RedisModuleCommandArg *
+moduleCmdArgAt(const RedisModuleCommandInfoVersion *version,
+ const RedisModuleCommandArg *args, int index) {
+ off_t offset = index * version->sizeof_arg;
+ return (RedisModuleCommandArg *)((char *)(args) + offset);
+}
+
+/* Recursively populate the args structure (setting num_args to the number of
+ * subargs) and return the number of args. */
+int populateArgsStructure(struct redisCommandArg *args) {
+ if (!args)
+ return 0;
+ int count = 0;
+ while (args->name) {
+ serverAssert(count < INT_MAX);
+ args->num_args = populateArgsStructure(args->subargs);
+ count++;
+ args++;
+ }
+ return count;
+}
+
+/* Helper for categoryFlagsFromString(). Attempts to find an acl flag representing the provided flag string
+ * and adds that flag to acl_categories_flags if a match is found.
+ *
+ * Returns '1' if acl category flag is recognized or
+ * returns '0' if not recognized */
+int matchAclCategoryFlag(char *flag, int64_t *acl_categories_flags) {
+ uint64_t this_flag = ACLGetCommandCategoryFlagByName(flag);
+ if (this_flag) {
+ *acl_categories_flags |= (int64_t) this_flag;
+ return 1;
+ }
+ return 0; /* Unrecognized */
+}
+
+/* Helper for RM_SetCommandACLCategories(). Turns a string representing acl category
+ * flags into the acl category flags used by Redis ACL which allows users to access
+ * the module commands by acl categories.
+ *
+ * It returns the set of acl flags, or -1 if unknown flags are found. */
+int64_t categoryFlagsFromString(char *aclflags) {
+ int count, j;
+ int64_t acl_categories_flags = 0;
+ sds *tokens = sdssplitlen(aclflags,strlen(aclflags)," ",1,&count);
+ for (j = 0; j < count; j++) {
+ char *t = tokens[j];
+ if (!matchAclCategoryFlag(t, &acl_categories_flags)) {
+ serverLog(LL_WARNING,"Unrecognized categories flag %s on module load", t);
+ break;
+ }
+ }
+ sdsfreesplitres(tokens,count);
+ if (j != count) return -1; /* Some token not processed correctly. */
+ return acl_categories_flags;
+}
+
+/* RedisModule_SetCommandACLCategories can be used to set ACL categories to module
+ * commands and subcommands. The set of ACL categories should be passed as
+ * a space separated C string 'aclflags'.
+ *
+ * Example, the acl flags 'write slow' marks the command as part of the write and
+ * slow ACL categories.
+ *
+ * On success REDISMODULE_OK is returned. On error REDISMODULE_ERR is returned.
+ *
+ * This function can only be called during the RedisModule_OnLoad function. If called
+ * outside of this function, an error is returned.
+ */
+int RM_SetCommandACLCategories(RedisModuleCommand *command, const char *aclflags) {
+ if (!command || !command->module || !command->module->onload) return REDISMODULE_ERR;
+ int64_t categories_flags = aclflags ? categoryFlagsFromString((char*)aclflags) : 0;
+ if (categories_flags == -1) return REDISMODULE_ERR;
+ struct redisCommand *rcmd = command->rediscmd;
+ rcmd->acl_categories = categories_flags; /* ACL categories flags for module command */
+ command->module->num_commands_with_acl_categories++;
+ return REDISMODULE_OK;
+}
+
+/* Set additional command information.
+ *
+ * Affects the output of `COMMAND`, `COMMAND INFO` and `COMMAND DOCS`, Cluster,
+ * ACL and is used to filter commands with the wrong number of arguments before
+ * the call reaches the module code.
+ *
+ * This function can be called after creating a command using RM_CreateCommand
+ * and fetching the command pointer using RM_GetCommand. The information can
+ * only be set once for each command and has the following structure:
+ *
+ * typedef struct RedisModuleCommandInfo {
+ * const RedisModuleCommandInfoVersion *version;
+ * const char *summary;
+ * const char *complexity;
+ * const char *since;
+ * RedisModuleCommandHistoryEntry *history;
+ * const char *tips;
+ * int arity;
+ * RedisModuleCommandKeySpec *key_specs;
+ * RedisModuleCommandArg *args;
+ * } RedisModuleCommandInfo;
+ *
+ * All fields except `version` are optional. Explanation of the fields:
+ *
+ * - `version`: This field enables compatibility with different Redis versions.
+ * Always set this field to REDISMODULE_COMMAND_INFO_VERSION.
+ *
+ * - `summary`: A short description of the command (optional).
+ *
+ * - `complexity`: Complexity description (optional).
+ *
+ * - `since`: The version where the command was introduced (optional).
+ * Note: The version specified should be the module's, not Redis version.
+ *
+ * - `history`: An array of RedisModuleCommandHistoryEntry (optional), which is
+ * a struct with the following fields:
+ *
+ * const char *since;
+ * const char *changes;
+ *
+ * `since` is a version string and `changes` is a string describing the
+ * changes. The array is terminated by a zeroed entry, i.e. an entry with
+ * both strings set to NULL.
+ *
+ * - `tips`: A string of space-separated tips regarding this command, meant for
+ * clients and proxies. See https://redis.io/topics/command-tips.
+ *
+ * - `arity`: Number of arguments, including the command name itself. A positive
+ * number specifies an exact number of arguments and a negative number
+ * specifies a minimum number of arguments, so use -N to say >= N. Redis
+ * validates a call before passing it to a module, so this can replace an
+ * arity check inside the module command implementation. A value of 0 (or an
+ * omitted arity field) is equivalent to -2 if the command has sub commands
+ * and -1 otherwise.
+ *
+ * - `key_specs`: An array of RedisModuleCommandKeySpec, terminated by an
+ * element memset to zero. This is a scheme that tries to describe the
+ * positions of key arguments better than the old RM_CreateCommand arguments
+ * `firstkey`, `lastkey`, `keystep` and is needed if those three are not
+ * enough to describe the key positions. There are two steps to retrieve key
+ * positions: *begin search* (BS) in which index should find the first key and
+ * *find keys* (FK) which, relative to the output of BS, describes how can we
+ * will which arguments are keys. Additionally, there are key specific flags.
+ *
+ * Key-specs cause the triplet (firstkey, lastkey, keystep) given in
+ * RM_CreateCommand to be recomputed, but it is still useful to provide
+ * these three parameters in RM_CreateCommand, to better support old Redis
+ * versions where RM_SetCommandInfo is not available.
+ *
+ * Note that key-specs don't fully replace the "getkeys-api" (see
+ * RM_CreateCommand, RM_IsKeysPositionRequest and RM_KeyAtPosWithFlags) so
+ * it may be a good idea to supply both key-specs and implement the
+ * getkeys-api.
+ *
+ * A key-spec has the following structure:
+ *
+ * typedef struct RedisModuleCommandKeySpec {
+ * const char *notes;
+ * uint64_t flags;
+ * RedisModuleKeySpecBeginSearchType begin_search_type;
+ * union {
+ * struct {
+ * int pos;
+ * } index;
+ * struct {
+ * const char *keyword;
+ * int startfrom;
+ * } keyword;
+ * } bs;
+ * RedisModuleKeySpecFindKeysType find_keys_type;
+ * union {
+ * struct {
+ * int lastkey;
+ * int keystep;
+ * int limit;
+ * } range;
+ * struct {
+ * int keynumidx;
+ * int firstkey;
+ * int keystep;
+ * } keynum;
+ * } fk;
+ * } RedisModuleCommandKeySpec;
+ *
+ * Explanation of the fields of RedisModuleCommandKeySpec:
+ *
+ * * `notes`: Optional notes or clarifications about this key spec.
+ *
+ * * `flags`: A bitwise or of key-spec flags described below.
+ *
+ * * `begin_search_type`: This describes how the first key is discovered.
+ * There are two ways to determine the first key:
+ *
+ * * `REDISMODULE_KSPEC_BS_UNKNOWN`: There is no way to tell where the
+ * key args start.
+ * * `REDISMODULE_KSPEC_BS_INDEX`: Key args start at a constant index.
+ * * `REDISMODULE_KSPEC_BS_KEYWORD`: Key args start just after a
+ * specific keyword.
+ *
+ * * `bs`: This is a union in which the `index` or `keyword` branch is used
+ * depending on the value of the `begin_search_type` field.
+ *
+ * * `bs.index.pos`: The index from which we start the search for keys.
+ * (`REDISMODULE_KSPEC_BS_INDEX` only.)
+ *
+ * * `bs.keyword.keyword`: The keyword (string) that indicates the
+ * beginning of key arguments. (`REDISMODULE_KSPEC_BS_KEYWORD` only.)
+ *
+ * * `bs.keyword.startfrom`: An index in argv from which to start
+ * searching. Can be negative, which means start search from the end,
+ * in reverse. Example: -2 means to start in reverse from the
+ * penultimate argument. (`REDISMODULE_KSPEC_BS_KEYWORD` only.)
+ *
+ * * `find_keys_type`: After the "begin search", this describes which
+ * arguments are keys. The strategies are:
+ *
+ * * `REDISMODULE_KSPEC_BS_UNKNOWN`: There is no way to tell where the
+ * key args are located.
+ * * `REDISMODULE_KSPEC_FK_RANGE`: Keys end at a specific index (or
+ * relative to the last argument).
+ * * `REDISMODULE_KSPEC_FK_KEYNUM`: There's an argument that contains
+ * the number of key args somewhere before the keys themselves.
+ *
+ * `find_keys_type` and `fk` can be omitted if this keyspec describes
+ * exactly one key.
+ *
+ * * `fk`: This is a union in which the `range` or `keynum` branch is used
+ * depending on the value of the `find_keys_type` field.
+ *
+ * * `fk.range` (for `REDISMODULE_KSPEC_FK_RANGE`): A struct with the
+ * following fields:
+ *
+ * * `lastkey`: Index of the last key relative to the result of the
+ * begin search step. Can be negative, in which case it's not
+ * relative. -1 indicates the last argument, -2 one before the
+ * last and so on.
+ *
+ * * `keystep`: How many arguments should we skip after finding a
+ * key, in order to find the next one?
+ *
+ * * `limit`: If `lastkey` is -1, we use `limit` to stop the search
+ * by a factor. 0 and 1 mean no limit. 2 means 1/2 of the
+ * remaining args, 3 means 1/3, and so on.
+ *
+ * * `fk.keynum` (for `REDISMODULE_KSPEC_FK_KEYNUM`): A struct with the
+ * following fields:
+ *
+ * * `keynumidx`: Index of the argument containing the number of
+ * keys to come, relative to the result of the begin search step.
+ *
+ * * `firstkey`: Index of the fist key relative to the result of the
+ * begin search step. (Usually it's just after `keynumidx`, in
+ * which case it should be set to `keynumidx + 1`.)
+ *
+ * * `keystep`: How many arguments should we skip after finding a
+ * key, in order to find the next one?
+ *
+ * Key-spec flags:
+ *
+ * The first four refer to what the command actually does with the *value or
+ * metadata of the key*, and not necessarily the user data or how it affects
+ * it. Each key-spec may must have exactly one of these. Any operation
+ * that's not distinctly deletion, overwrite or read-only would be marked as
+ * RW.
+ *
+ * * `REDISMODULE_CMD_KEY_RO`: Read-Only. Reads the value of the key, but
+ * doesn't necessarily return it.
+ *
+ * * `REDISMODULE_CMD_KEY_RW`: Read-Write. Modifies the data stored in the
+ * value of the key or its metadata.
+ *
+ * * `REDISMODULE_CMD_KEY_OW`: Overwrite. Overwrites the data stored in the
+ * value of the key.
+ *
+ * * `REDISMODULE_CMD_KEY_RM`: Deletes the key.
+ *
+ * The next four refer to *user data inside the value of the key*, not the
+ * metadata like LRU, type, cardinality. It refers to the logical operation
+ * on the user's data (actual input strings or TTL), being
+ * used/returned/copied/changed. It doesn't refer to modification or
+ * returning of metadata (like type, count, presence of data). ACCESS can be
+ * combined with one of the write operations INSERT, DELETE or UPDATE. Any
+ * write that's not an INSERT or a DELETE would be UPDATE.
+ *
+ * * `REDISMODULE_CMD_KEY_ACCESS`: Returns, copies or uses the user data
+ * from the value of the key.
+ *
+ * * `REDISMODULE_CMD_KEY_UPDATE`: Updates data to the value, new value may
+ * depend on the old value.
+ *
+ * * `REDISMODULE_CMD_KEY_INSERT`: Adds data to the value with no chance of
+ * modification or deletion of existing data.
+ *
+ * * `REDISMODULE_CMD_KEY_DELETE`: Explicitly deletes some content from the
+ * value of the key.
+ *
+ * Other flags:
+ *
+ * * `REDISMODULE_CMD_KEY_NOT_KEY`: The key is not actually a key, but
+ * should be routed in cluster mode as if it was a key.
+ *
+ * * `REDISMODULE_CMD_KEY_INCOMPLETE`: The keyspec might not point out all
+ * the keys it should cover.
+ *
+ * * `REDISMODULE_CMD_KEY_VARIABLE_FLAGS`: Some keys might have different
+ * flags depending on arguments.
+ *
+ * - `args`: An array of RedisModuleCommandArg, terminated by an element memset
+ * to zero. RedisModuleCommandArg is a structure with at the fields described
+ * below.
+ *
+ * typedef struct RedisModuleCommandArg {
+ * const char *name;
+ * RedisModuleCommandArgType type;
+ * int key_spec_index;
+ * const char *token;
+ * const char *summary;
+ * const char *since;
+ * int flags;
+ * struct RedisModuleCommandArg *subargs;
+ * } RedisModuleCommandArg;
+ *
+ * Explanation of the fields:
+ *
+ * * `name`: Name of the argument.
+ *
+ * * `type`: The type of the argument. See below for details. The types
+ * `REDISMODULE_ARG_TYPE_ONEOF` and `REDISMODULE_ARG_TYPE_BLOCK` require
+ * an argument to have sub-arguments, i.e. `subargs`.
+ *
+ * * `key_spec_index`: If the `type` is `REDISMODULE_ARG_TYPE_KEY` you must
+ * provide the index of the key-spec associated with this argument. See
+ * `key_specs` above. If the argument is not a key, you may specify -1.
+ *
+ * * `token`: The token preceding the argument (optional). Example: the
+ * argument `seconds` in `SET` has a token `EX`. If the argument consists
+ * of only a token (for example `NX` in `SET`) the type should be
+ * `REDISMODULE_ARG_TYPE_PURE_TOKEN` and `value` should be NULL.
+ *
+ * * `summary`: A short description of the argument (optional).
+ *
+ * * `since`: The first version which included this argument (optional).
+ *
+ * * `flags`: A bitwise or of the macros `REDISMODULE_CMD_ARG_*`. See below.
+ *
+ * * `value`: The display-value of the argument. This string is what should
+ * be displayed when creating the command syntax from the output of
+ * `COMMAND`. If `token` is not NULL, it should also be displayed.
+ *
+ * Explanation of `RedisModuleCommandArgType`:
+ *
+ * * `REDISMODULE_ARG_TYPE_STRING`: String argument.
+ * * `REDISMODULE_ARG_TYPE_INTEGER`: Integer argument.
+ * * `REDISMODULE_ARG_TYPE_DOUBLE`: Double-precision float argument.
+ * * `REDISMODULE_ARG_TYPE_KEY`: String argument representing a keyname.
+ * * `REDISMODULE_ARG_TYPE_PATTERN`: String, but regex pattern.
+ * * `REDISMODULE_ARG_TYPE_UNIX_TIME`: Integer, but Unix timestamp.
+ * * `REDISMODULE_ARG_TYPE_PURE_TOKEN`: Argument doesn't have a placeholder.
+ * It's just a token without a value. Example: the `KEEPTTL` option of the
+ * `SET` command.
+ * * `REDISMODULE_ARG_TYPE_ONEOF`: Used when the user can choose only one of
+ * a few sub-arguments. Requires `subargs`. Example: the `NX` and `XX`
+ * options of `SET`.
+ * * `REDISMODULE_ARG_TYPE_BLOCK`: Used when one wants to group together
+ * several sub-arguments, usually to apply something on all of them, like
+ * making the entire group "optional". Requires `subargs`. Example: the
+ * `LIMIT offset count` parameters in `ZRANGE`.
+ *
+ * Explanation of the command argument flags:
+ *
+ * * `REDISMODULE_CMD_ARG_OPTIONAL`: The argument is optional (like GET in
+ * the SET command).
+ * * `REDISMODULE_CMD_ARG_MULTIPLE`: The argument may repeat itself (like
+ * key in DEL).
+ * * `REDISMODULE_CMD_ARG_MULTIPLE_TOKEN`: The argument may repeat itself,
+ * and so does its token (like `GET pattern` in SORT).
+ *
+ * On success REDISMODULE_OK is returned. On error REDISMODULE_ERR is returned
+ * and `errno` is set to EINVAL if invalid info was provided or EEXIST if info
+ * has already been set. If the info is invalid, a warning is logged explaining
+ * which part of the info is invalid and why. */
+int RM_SetCommandInfo(RedisModuleCommand *command, const RedisModuleCommandInfo *info) {
+ if (!moduleValidateCommandInfo(info)) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+
+ struct redisCommand *cmd = command->rediscmd;
+
+ /* Check if any info has already been set. Overwriting info involves freeing
+ * the old info, which is not implemented. */
+ if (cmd->summary || cmd->complexity || cmd->since || cmd->history ||
+ cmd->tips || cmd->args ||
+ !(cmd->key_specs_num == 0 ||
+ /* Allow key spec populated from legacy (first,last,step) to exist. */
+ (cmd->key_specs_num == 1 &&
+ cmd->key_specs[0].begin_search_type == KSPEC_BS_INDEX &&
+ cmd->key_specs[0].find_keys_type == KSPEC_FK_RANGE))) {
+ errno = EEXIST;
+ return REDISMODULE_ERR;
+ }
+
+ if (info->summary) cmd->summary = zstrdup(info->summary);
+ if (info->complexity) cmd->complexity = zstrdup(info->complexity);
+ if (info->since) cmd->since = zstrdup(info->since);
+
+ const RedisModuleCommandInfoVersion *version = info->version;
+ if (info->history) {
+ size_t count = 0;
+ while (moduleCmdHistoryEntryAt(version, info->history, count)->since)
+ count++;
+ serverAssert(count < SIZE_MAX / sizeof(commandHistory));
+ cmd->history = zmalloc(sizeof(commandHistory) * (count + 1));
+ for (size_t j = 0; j < count; j++) {
+ RedisModuleCommandHistoryEntry *entry =
+ moduleCmdHistoryEntryAt(version, info->history, j);
+ cmd->history[j].since = zstrdup(entry->since);
+ cmd->history[j].changes = zstrdup(entry->changes);
+ }
+ cmd->history[count].since = NULL;
+ cmd->history[count].changes = NULL;
+ cmd->num_history = count;
+ }
+
+ if (info->tips) {
+ int count;
+ sds *tokens = sdssplitlen(info->tips, strlen(info->tips), " ", 1, &count);
+ if (tokens) {
+ cmd->tips = zmalloc(sizeof(char *) * (count + 1));
+ for (int j = 0; j < count; j++) {
+ cmd->tips[j] = zstrdup(tokens[j]);
+ }
+ cmd->tips[count] = NULL;
+ cmd->num_tips = count;
+ sdsfreesplitres(tokens, count);
+ }
+ }
+
+ if (info->arity) cmd->arity = info->arity;
+
+ if (info->key_specs) {
+ /* Count and allocate the key specs. */
+ size_t count = 0;
+ while (moduleCmdKeySpecAt(version, info->key_specs, count)->begin_search_type)
+ count++;
+ serverAssert(count < INT_MAX);
+ zfree(cmd->key_specs);
+ cmd->key_specs = zmalloc(sizeof(keySpec) * count);
+
+ /* Copy the contents of the RedisModuleCommandKeySpec array. */
+ cmd->key_specs_num = count;
+ for (size_t j = 0; j < count; j++) {
+ RedisModuleCommandKeySpec *spec =
+ moduleCmdKeySpecAt(version, info->key_specs, j);
+ cmd->key_specs[j].notes = spec->notes ? zstrdup(spec->notes) : NULL;
+ cmd->key_specs[j].flags = moduleConvertKeySpecsFlags(spec->flags, 1);
+ switch (spec->begin_search_type) {
+ case REDISMODULE_KSPEC_BS_UNKNOWN:
+ cmd->key_specs[j].begin_search_type = KSPEC_BS_UNKNOWN;
+ break;
+ case REDISMODULE_KSPEC_BS_INDEX:
+ cmd->key_specs[j].begin_search_type = KSPEC_BS_INDEX;
+ cmd->key_specs[j].bs.index.pos = spec->bs.index.pos;
+ break;
+ case REDISMODULE_KSPEC_BS_KEYWORD:
+ cmd->key_specs[j].begin_search_type = KSPEC_BS_KEYWORD;
+ cmd->key_specs[j].bs.keyword.keyword = zstrdup(spec->bs.keyword.keyword);
+ cmd->key_specs[j].bs.keyword.startfrom = spec->bs.keyword.startfrom;
+ break;
+ default:
+ /* Can't happen; stopped in moduleValidateCommandInfo(). */
+ serverPanic("Unknown begin_search_type");
+ }
+
+ switch (spec->find_keys_type) {
+ case REDISMODULE_KSPEC_FK_OMITTED:
+ /* Omitted field is shorthand to say that it's a single key. */
+ cmd->key_specs[j].find_keys_type = KSPEC_FK_RANGE;
+ cmd->key_specs[j].fk.range.lastkey = 0;
+ cmd->key_specs[j].fk.range.keystep = 1;
+ cmd->key_specs[j].fk.range.limit = 0;
+ break;
+ case REDISMODULE_KSPEC_FK_UNKNOWN:
+ cmd->key_specs[j].find_keys_type = KSPEC_FK_UNKNOWN;
+ break;
+ case REDISMODULE_KSPEC_FK_RANGE:
+ cmd->key_specs[j].find_keys_type = KSPEC_FK_RANGE;
+ cmd->key_specs[j].fk.range.lastkey = spec->fk.range.lastkey;
+ cmd->key_specs[j].fk.range.keystep = spec->fk.range.keystep;
+ cmd->key_specs[j].fk.range.limit = spec->fk.range.limit;
+ break;
+ case REDISMODULE_KSPEC_FK_KEYNUM:
+ cmd->key_specs[j].find_keys_type = KSPEC_FK_KEYNUM;
+ cmd->key_specs[j].fk.keynum.keynumidx = spec->fk.keynum.keynumidx;
+ cmd->key_specs[j].fk.keynum.firstkey = spec->fk.keynum.firstkey;
+ cmd->key_specs[j].fk.keynum.keystep = spec->fk.keynum.keystep;
+ break;
+ default:
+ /* Can't happen; stopped in moduleValidateCommandInfo(). */
+ serverPanic("Unknown find_keys_type");
+ }
+ }
+
+ /* Update the legacy (first,last,step) spec and "movablekeys" flag used by the COMMAND command,
+ * by trying to "glue" consecutive range key specs. */
+ populateCommandLegacyRangeSpec(cmd);
+ }
+
+ if (info->args) {
+ cmd->args = moduleCopyCommandArgs(info->args, version);
+ /* Populate arg.num_args with the number of subargs, recursively */
+ cmd->num_args = populateArgsStructure(cmd->args);
+ }
+
+ /* Fields added in future versions to be added here, under conditions like
+ * `if (info->version >= 2) { access version 2 fields here }` */
+
+ return REDISMODULE_OK;
+}
+
+/* Returns 1 if v is a power of two, 0 otherwise. */
+static inline int isPowerOfTwo(uint64_t v) {
+ return v && !(v & (v - 1));
+}
+
+/* Returns 1 if the command info is valid and 0 otherwise. */
+static int moduleValidateCommandInfo(const RedisModuleCommandInfo *info) {
+ const RedisModuleCommandInfoVersion *version = info->version;
+ if (!version) {
+ serverLog(LL_WARNING, "Invalid command info: version missing");
+ return 0;
+ }
+
+ /* No validation for the fields summary, complexity, since, tips (strings or
+ * NULL) and arity (any integer). */
+
+ /* History: If since is set, changes must also be set. */
+ if (info->history) {
+ for (size_t j = 0;
+ moduleCmdHistoryEntryAt(version, info->history, j)->since;
+ j++)
+ {
+ if (!moduleCmdHistoryEntryAt(version, info->history, j)->changes) {
+ serverLog(LL_WARNING, "Invalid command info: history[%zd].changes missing", j);
+ return 0;
+ }
+ }
+ }
+
+ /* Key specs. */
+ if (info->key_specs) {
+ for (size_t j = 0;
+ moduleCmdKeySpecAt(version, info->key_specs, j)->begin_search_type;
+ j++)
+ {
+ RedisModuleCommandKeySpec *spec =
+ moduleCmdKeySpecAt(version, info->key_specs, j);
+ if (j >= INT_MAX) {
+ serverLog(LL_WARNING, "Invalid command info: Too many key specs");
+ return 0; /* redisCommand.key_specs_num is an int. */
+ }
+
+ /* Flags. Exactly one flag in a group is set if and only if the
+ * masked bits is a power of two. */
+ uint64_t key_flags =
+ REDISMODULE_CMD_KEY_RO | REDISMODULE_CMD_KEY_RW |
+ REDISMODULE_CMD_KEY_OW | REDISMODULE_CMD_KEY_RM;
+ uint64_t write_flags =
+ REDISMODULE_CMD_KEY_INSERT | REDISMODULE_CMD_KEY_DELETE |
+ REDISMODULE_CMD_KEY_UPDATE;
+ if (!isPowerOfTwo(spec->flags & key_flags)) {
+ serverLog(LL_WARNING,
+ "Invalid command info: key_specs[%zd].flags: "
+ "Exactly one of the flags RO, RW, OW, RM required", j);
+ return 0;
+ }
+ if ((spec->flags & write_flags) != 0 &&
+ !isPowerOfTwo(spec->flags & write_flags))
+ {
+ serverLog(LL_WARNING,
+ "Invalid command info: key_specs[%zd].flags: "
+ "INSERT, DELETE and UPDATE are mutually exclusive", j);
+ return 0;
+ }
+
+ switch (spec->begin_search_type) {
+ case REDISMODULE_KSPEC_BS_UNKNOWN: break;
+ case REDISMODULE_KSPEC_BS_INDEX: break;
+ case REDISMODULE_KSPEC_BS_KEYWORD:
+ if (spec->bs.keyword.keyword == NULL) {
+ serverLog(LL_WARNING,
+ "Invalid command info: key_specs[%zd].bs.keyword.keyword "
+ "required when begin_search_type is KEYWORD", j);
+ return 0;
+ }
+ break;
+ default:
+ serverLog(LL_WARNING,
+ "Invalid command info: key_specs[%zd].begin_search_type: "
+ "Invalid value %d", j, spec->begin_search_type);
+ return 0;
+ }
+
+ /* Validate find_keys_type. */
+ switch (spec->find_keys_type) {
+ case REDISMODULE_KSPEC_FK_OMITTED: break; /* short for RANGE {0,1,0} */
+ case REDISMODULE_KSPEC_FK_UNKNOWN: break;
+ case REDISMODULE_KSPEC_FK_RANGE: break;
+ case REDISMODULE_KSPEC_FK_KEYNUM: break;
+ default:
+ serverLog(LL_WARNING,
+ "Invalid command info: key_specs[%zd].find_keys_type: "
+ "Invalid value %d", j, spec->find_keys_type);
+ return 0;
+ }
+ }
+ }
+
+ /* Args, subargs (recursive) */
+ return moduleValidateCommandArgs(info->args, version);
+}
+
+/* When from_api is true, converts from REDISMODULE_CMD_KEY_* flags to CMD_KEY_* flags.
+ * When from_api is false, converts from CMD_KEY_* flags to REDISMODULE_CMD_KEY_* flags. */
+static int64_t moduleConvertKeySpecsFlags(int64_t flags, int from_api) {
+ int64_t out = 0;
+ int64_t map[][2] = {
+ {REDISMODULE_CMD_KEY_RO, CMD_KEY_RO},
+ {REDISMODULE_CMD_KEY_RW, CMD_KEY_RW},
+ {REDISMODULE_CMD_KEY_OW, CMD_KEY_OW},
+ {REDISMODULE_CMD_KEY_RM, CMD_KEY_RM},
+ {REDISMODULE_CMD_KEY_ACCESS, CMD_KEY_ACCESS},
+ {REDISMODULE_CMD_KEY_INSERT, CMD_KEY_INSERT},
+ {REDISMODULE_CMD_KEY_UPDATE, CMD_KEY_UPDATE},
+ {REDISMODULE_CMD_KEY_DELETE, CMD_KEY_DELETE},
+ {REDISMODULE_CMD_KEY_NOT_KEY, CMD_KEY_NOT_KEY},
+ {REDISMODULE_CMD_KEY_INCOMPLETE, CMD_KEY_INCOMPLETE},
+ {REDISMODULE_CMD_KEY_VARIABLE_FLAGS, CMD_KEY_VARIABLE_FLAGS},
+ {0,0}};
+
+ int from_idx = from_api ? 0 : 1, to_idx = !from_idx;
+ for (int i=0; map[i][0]; i++)
+ if (flags & map[i][from_idx]) out |= map[i][to_idx];
+ return out;
+}
+
+/* Validates an array of RedisModuleCommandArg. Returns 1 if it's valid and 0 if
+ * it's invalid. */
+static int moduleValidateCommandArgs(RedisModuleCommandArg *args,
+ const RedisModuleCommandInfoVersion *version) {
+ if (args == NULL) return 1; /* Missing args is OK. */
+ for (size_t j = 0; moduleCmdArgAt(version, args, j)->name != NULL; j++) {
+ RedisModuleCommandArg *arg = moduleCmdArgAt(version, args, j);
+ int arg_type_error = 0;
+ moduleConvertArgType(arg->type, &arg_type_error);
+ if (arg_type_error) {
+ serverLog(LL_WARNING,
+ "Invalid command info: Argument \"%s\": Undefined type %d",
+ arg->name, arg->type);
+ return 0;
+ }
+ if (arg->type == REDISMODULE_ARG_TYPE_PURE_TOKEN && !arg->token) {
+ serverLog(LL_WARNING,
+ "Invalid command info: Argument \"%s\": "
+ "token required when type is PURE_TOKEN", args[j].name);
+ return 0;
+ }
+
+ if (arg->type == REDISMODULE_ARG_TYPE_KEY) {
+ if (arg->key_spec_index < 0) {
+ serverLog(LL_WARNING,
+ "Invalid command info: Argument \"%s\": "
+ "key_spec_index required when type is KEY",
+ arg->name);
+ return 0;
+ }
+ } else if (arg->key_spec_index != -1 && arg->key_spec_index != 0) {
+ /* 0 is allowed for convenience, to allow it to be omitted in
+ * compound struct literals on the form `.field = value`. */
+ serverLog(LL_WARNING,
+ "Invalid command info: Argument \"%s\": "
+ "key_spec_index specified but type isn't KEY",
+ arg->name);
+ return 0;
+ }
+
+ if (arg->flags & ~(_REDISMODULE_CMD_ARG_NEXT - 1)) {
+ serverLog(LL_WARNING,
+ "Invalid command info: Argument \"%s\": Invalid flags",
+ arg->name);
+ return 0;
+ }
+
+ if (arg->type == REDISMODULE_ARG_TYPE_ONEOF ||
+ arg->type == REDISMODULE_ARG_TYPE_BLOCK)
+ {
+ if (arg->subargs == NULL) {
+ serverLog(LL_WARNING,
+ "Invalid command info: Argument \"%s\": "
+ "subargs required when type is ONEOF or BLOCK",
+ arg->name);
+ return 0;
+ }
+ if (!moduleValidateCommandArgs(arg->subargs, version)) return 0;
+ } else {
+ if (arg->subargs != NULL) {
+ serverLog(LL_WARNING,
+ "Invalid command info: Argument \"%s\": "
+ "subargs specified but type isn't ONEOF nor BLOCK",
+ arg->name);
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
+/* Converts an array of RedisModuleCommandArg into a freshly allocated array of
+ * struct redisCommandArg. */
+static struct redisCommandArg *moduleCopyCommandArgs(RedisModuleCommandArg *args,
+ const RedisModuleCommandInfoVersion *version) {
+ size_t count = 0;
+ while (moduleCmdArgAt(version, args, count)->name) count++;
+ serverAssert(count < SIZE_MAX / sizeof(struct redisCommandArg));
+ struct redisCommandArg *realargs = zcalloc((count+1) * sizeof(redisCommandArg));
+
+ for (size_t j = 0; j < count; j++) {
+ RedisModuleCommandArg *arg = moduleCmdArgAt(version, args, j);
+ realargs[j].name = zstrdup(arg->name);
+ realargs[j].type = moduleConvertArgType(arg->type, NULL);
+ if (arg->type == REDISMODULE_ARG_TYPE_KEY)
+ realargs[j].key_spec_index = arg->key_spec_index;
+ else
+ realargs[j].key_spec_index = -1;
+ if (arg->token) realargs[j].token = zstrdup(arg->token);
+ if (arg->summary) realargs[j].summary = zstrdup(arg->summary);
+ if (arg->since) realargs[j].since = zstrdup(arg->since);
+ if (arg->deprecated_since) realargs[j].deprecated_since = zstrdup(arg->deprecated_since);
+ if (arg->display_text) realargs[j].display_text = zstrdup(arg->display_text);
+ realargs[j].flags = moduleConvertArgFlags(arg->flags);
+ if (arg->subargs) realargs[j].subargs = moduleCopyCommandArgs(arg->subargs, version);
+ }
+ return realargs;
+}
+
+static redisCommandArgType moduleConvertArgType(RedisModuleCommandArgType type, int *error) {
+ if (error) *error = 0;
+ switch (type) {
+ case REDISMODULE_ARG_TYPE_STRING: return ARG_TYPE_STRING;
+ case REDISMODULE_ARG_TYPE_INTEGER: return ARG_TYPE_INTEGER;
+ case REDISMODULE_ARG_TYPE_DOUBLE: return ARG_TYPE_DOUBLE;
+ case REDISMODULE_ARG_TYPE_KEY: return ARG_TYPE_KEY;
+ case REDISMODULE_ARG_TYPE_PATTERN: return ARG_TYPE_PATTERN;
+ case REDISMODULE_ARG_TYPE_UNIX_TIME: return ARG_TYPE_UNIX_TIME;
+ case REDISMODULE_ARG_TYPE_PURE_TOKEN: return ARG_TYPE_PURE_TOKEN;
+ case REDISMODULE_ARG_TYPE_ONEOF: return ARG_TYPE_ONEOF;
+ case REDISMODULE_ARG_TYPE_BLOCK: return ARG_TYPE_BLOCK;
+ default:
+ if (error) *error = 1;
+ return -1;
+ }
+}
+
+static int moduleConvertArgFlags(int flags) {
+ int realflags = 0;
+ if (flags & REDISMODULE_CMD_ARG_OPTIONAL) realflags |= CMD_ARG_OPTIONAL;
+ if (flags & REDISMODULE_CMD_ARG_MULTIPLE) realflags |= CMD_ARG_MULTIPLE;
+ if (flags & REDISMODULE_CMD_ARG_MULTIPLE_TOKEN) realflags |= CMD_ARG_MULTIPLE_TOKEN;
+ return realflags;
+}
+
+/* Return `struct RedisModule *` as `void *` to avoid exposing it outside of module.c. */
+void *moduleGetHandleByName(char *modulename) {
+ return dictFetchValue(modules,modulename);
+}
+
+/* Returns 1 if `cmd` is a command of the module `modulename`. 0 otherwise. */
+int moduleIsModuleCommand(void *module_handle, struct redisCommand *cmd) {
+ if (cmd->proc != RedisModuleCommandDispatcher)
+ return 0;
+ if (module_handle == NULL)
+ return 0;
+ RedisModuleCommand *cp = cmd->module_cmd;
+ return (cp->module == module_handle);
+}
+
+/* --------------------------------------------------------------------------
+ * ## Module information and time measurement
+ * -------------------------------------------------------------------------- */
+
+int moduleListConfigMatch(void *config, void *name) {
+ return strcasecmp(((ModuleConfig *) config)->name, (char *) name) == 0;
+}
+
+void moduleListFree(void *config) {
+ ModuleConfig *module_config = (ModuleConfig *) config;
+ sdsfree(module_config->name);
+ zfree(config);
+}
+
+void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int apiver) {
+ /* Called by RM_Init() to setup the `ctx->module` structure.
+ *
+ * This is an internal function, Redis modules developers don't need
+ * to use it. */
+ RedisModule *module;
+
+ if (ctx->module != NULL) return;
+ module = zmalloc(sizeof(*module));
+ module->name = sdsnew(name);
+ module->ver = ver;
+ module->apiver = apiver;
+ module->types = listCreate();
+ module->usedby = listCreate();
+ module->using = listCreate();
+ module->filters = listCreate();
+ module->module_configs = listCreate();
+ listSetMatchMethod(module->module_configs, moduleListConfigMatch);
+ listSetFreeMethod(module->module_configs, moduleListFree);
+ module->in_call = 0;
+ module->configs_initialized = 0;
+ module->in_hook = 0;
+ module->options = 0;
+ module->info_cb = 0;
+ module->defrag_cb = 0;
+ module->loadmod = NULL;
+ module->num_commands_with_acl_categories = 0;
+ module->onload = 1;
+ ctx->module = module;
+}
+
+/* Return non-zero if the module name is busy.
+ * Otherwise zero is returned. */
+int RM_IsModuleNameBusy(const char *name) {
+ sds modulename = sdsnew(name);
+ dictEntry *de = dictFind(modules,modulename);
+ sdsfree(modulename);
+ return de != NULL;
+}
+
+/* Return the current UNIX time in milliseconds. */
+mstime_t RM_Milliseconds(void) {
+ return mstime();
+}
+
+/* Return counter of micro-seconds relative to an arbitrary point in time. */
+uint64_t RM_MonotonicMicroseconds(void) {
+ return getMonotonicUs();
+}
+
+/* Return the current UNIX time in microseconds */
+ustime_t RM_Microseconds(void) {
+ return ustime();
+}
+
+/* Return the cached UNIX time in microseconds.
+ * It is updated in the server cron job and before executing a command.
+ * It is useful for complex call stacks, such as a command causing a
+ * key space notification, causing a module to execute a RedisModule_Call,
+ * causing another notification, etc.
+ * It makes sense that all this callbacks would use the same clock. */
+ustime_t RM_CachedMicroseconds(void) {
+ return server.ustime;
+}
+
+/* Mark a point in time that will be used as the start time to calculate
+ * the elapsed execution time when RM_BlockedClientMeasureTimeEnd() is called.
+ * Within the same command, you can call multiple times
+ * RM_BlockedClientMeasureTimeStart() and RM_BlockedClientMeasureTimeEnd()
+ * to accumulate independent time intervals to the background duration.
+ * This method always return REDISMODULE_OK. */
+int RM_BlockedClientMeasureTimeStart(RedisModuleBlockedClient *bc) {
+ elapsedStart(&(bc->background_timer));
+ return REDISMODULE_OK;
+}
+
+/* Mark a point in time that will be used as the end time
+ * to calculate the elapsed execution time.
+ * On success REDISMODULE_OK is returned.
+ * This method only returns REDISMODULE_ERR if no start time was
+ * previously defined ( meaning RM_BlockedClientMeasureTimeStart was not called ). */
+int RM_BlockedClientMeasureTimeEnd(RedisModuleBlockedClient *bc) {
+ // If the counter is 0 then we haven't called RM_BlockedClientMeasureTimeStart
+ if (!bc->background_timer)
+ return REDISMODULE_ERR;
+ bc->background_duration += elapsedUs(bc->background_timer);
+ return REDISMODULE_OK;
+}
+
+/* This API allows modules to let Redis process background tasks, and some
+ * commands during long blocking execution of a module command.
+ * The module can call this API periodically.
+ * The flags is a bit mask of these:
+ *
+ * - `REDISMODULE_YIELD_FLAG_NONE`: No special flags, can perform some background
+ * operations, but not process client commands.
+ * - `REDISMODULE_YIELD_FLAG_CLIENTS`: Redis can also process client commands.
+ *
+ * The `busy_reply` argument is optional, and can be used to control the verbose
+ * error string after the `-BUSY` error code.
+ *
+ * When the `REDISMODULE_YIELD_FLAG_CLIENTS` is used, Redis will only start
+ * processing client commands after the time defined by the
+ * `busy-reply-threshold` config, in which case Redis will start rejecting most
+ * commands with `-BUSY` error, but allow the ones marked with the `allow-busy`
+ * flag to be executed.
+ * This API can also be used in thread safe context (while locked), and during
+ * loading (in the `rdb_load` callback, in which case it'll reject commands with
+ * the -LOADING error)
+ */
+void RM_Yield(RedisModuleCtx *ctx, int flags, const char *busy_reply) {
+ static int yield_nesting = 0;
+ /* Avoid nested calls to RM_Yield */
+ if (yield_nesting)
+ return;
+ yield_nesting++;
+
+ long long now = getMonotonicUs();
+ if (now >= ctx->next_yield_time) {
+ /* In loading mode, there's no need to handle busy_module_yield_reply,
+ * and busy_module_yield_flags, since redis is anyway rejecting all
+ * commands with -LOADING. */
+ if (server.loading) {
+ /* Let redis process events */
+ processEventsWhileBlocked();
+ } else {
+ const char *prev_busy_module_yield_reply = server.busy_module_yield_reply;
+ server.busy_module_yield_reply = busy_reply;
+ /* start the blocking operation if not already started. */
+ if (!server.busy_module_yield_flags) {
+ server.busy_module_yield_flags = BUSY_MODULE_YIELD_EVENTS;
+ blockingOperationStarts();
+ if (server.current_client)
+ protectClient(server.current_client);
+ }
+ if (flags & REDISMODULE_YIELD_FLAG_CLIENTS)
+ server.busy_module_yield_flags |= BUSY_MODULE_YIELD_CLIENTS;
+
+ /* Let redis process events */
+ processEventsWhileBlocked();
+
+ server.busy_module_yield_reply = prev_busy_module_yield_reply;
+ /* Possibly restore the previous flags in case of two nested contexts
+ * that use this API with different flags, but keep the first bit
+ * (PROCESS_EVENTS) set, so we know to call blockingOperationEnds on time. */
+ server.busy_module_yield_flags &= ~BUSY_MODULE_YIELD_CLIENTS;
+ }
+
+ /* decide when the next event should fire. */
+ ctx->next_yield_time = now + 1000000 / server.hz;
+ }
+ yield_nesting--;
+}
+
+/* Set flags defining capabilities or behavior bit flags.
+ *
+ * REDISMODULE_OPTIONS_HANDLE_IO_ERRORS:
+ * Generally, modules don't need to bother with this, as the process will just
+ * terminate if a read error happens, however, setting this flag would allow
+ * repl-diskless-load to work if enabled.
+ * The module should use RedisModule_IsIOError after reads, before using the
+ * data that was read, and in case of error, propagate it upwards, and also be
+ * able to release the partially populated value and all it's allocations.
+ *
+ * REDISMODULE_OPTION_NO_IMPLICIT_SIGNAL_MODIFIED:
+ * See RM_SignalModifiedKey().
+ *
+ * REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD:
+ * Setting this flag indicates module awareness of diskless async replication (repl-diskless-load=swapdb)
+ * and that redis could be serving reads during replication instead of blocking with LOADING status.
+ *
+ * REDISMODULE_OPTIONS_ALLOW_NESTED_KEYSPACE_NOTIFICATIONS:
+ * Declare that the module wants to get nested key-space notifications.
+ * By default, Redis will not fire key-space notifications that happened inside
+ * a key-space notification callback. This flag allows to change this behavior
+ * and fire nested key-space notifications. Notice: if enabled, the module
+ * should protected itself from infinite recursion. */
+void RM_SetModuleOptions(RedisModuleCtx *ctx, int options) {
+ ctx->module->options = options;
+}
+
+/* Signals that the key is modified from user's perspective (i.e. invalidate WATCH
+ * and client side caching).
+ *
+ * This is done automatically when a key opened for writing is closed, unless
+ * the option REDISMODULE_OPTION_NO_IMPLICIT_SIGNAL_MODIFIED has been set using
+ * RM_SetModuleOptions().
+*/
+int RM_SignalModifiedKey(RedisModuleCtx *ctx, RedisModuleString *keyname) {
+ signalModifiedKey(ctx->client,ctx->client->db,keyname);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Automatic memory management for modules
+ * -------------------------------------------------------------------------- */
+
+/* Enable automatic memory management.
+ *
+ * The function must be called as the first function of a command implementation
+ * that wants to use automatic memory.
+ *
+ * When enabled, automatic memory management tracks and automatically frees
+ * keys, call replies and Redis string objects once the command returns. In most
+ * cases this eliminates the need of calling the following functions:
+ *
+ * 1. RedisModule_CloseKey()
+ * 2. RedisModule_FreeCallReply()
+ * 3. RedisModule_FreeString()
+ *
+ * These functions can still be used with automatic memory management enabled,
+ * to optimize loops that make numerous allocations for example. */
+void RM_AutoMemory(RedisModuleCtx *ctx) {
+ ctx->flags |= REDISMODULE_CTX_AUTO_MEMORY;
+}
+
+/* Add a new object to release automatically when the callback returns. */
+void autoMemoryAdd(RedisModuleCtx *ctx, int type, void *ptr) {
+ if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return;
+ if (ctx->amqueue_used == ctx->amqueue_len) {
+ ctx->amqueue_len *= 2;
+ if (ctx->amqueue_len < 16) ctx->amqueue_len = 16;
+ ctx->amqueue = zrealloc(ctx->amqueue,sizeof(struct AutoMemEntry)*ctx->amqueue_len);
+ }
+ ctx->amqueue[ctx->amqueue_used].type = type;
+ ctx->amqueue[ctx->amqueue_used].ptr = ptr;
+ ctx->amqueue_used++;
+}
+
+/* Mark an object as freed in the auto release queue, so that users can still
+ * free things manually if they want.
+ *
+ * The function returns 1 if the object was actually found in the auto memory
+ * pool, otherwise 0 is returned. */
+int autoMemoryFreed(RedisModuleCtx *ctx, int type, void *ptr) {
+ if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return 0;
+
+ int count = (ctx->amqueue_used+1)/2;
+ for (int j = 0; j < count; j++) {
+ for (int side = 0; side < 2; side++) {
+ /* For side = 0 check right side of the array, for
+ * side = 1 check the left side instead (zig-zag scanning). */
+ int i = (side == 0) ? (ctx->amqueue_used - 1 - j) : j;
+ if (ctx->amqueue[i].type == type &&
+ ctx->amqueue[i].ptr == ptr)
+ {
+ ctx->amqueue[i].type = REDISMODULE_AM_FREED;
+
+ /* Switch the freed element and the last element, to avoid growing
+ * the queue unnecessarily if we allocate/free in a loop */
+ if (i != ctx->amqueue_used-1) {
+ ctx->amqueue[i] = ctx->amqueue[ctx->amqueue_used-1];
+ }
+
+ /* Reduce the size of the queue because we either moved the top
+ * element elsewhere or freed it */
+ ctx->amqueue_used--;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Release all the objects in queue. */
+void autoMemoryCollect(RedisModuleCtx *ctx) {
+ if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return;
+ /* Clear the AUTO_MEMORY flag from the context, otherwise the functions
+ * we call to free the resources, will try to scan the auto release
+ * queue to mark the entries as freed. */
+ ctx->flags &= ~REDISMODULE_CTX_AUTO_MEMORY;
+ int j;
+ for (j = 0; j < ctx->amqueue_used; j++) {
+ void *ptr = ctx->amqueue[j].ptr;
+ switch(ctx->amqueue[j].type) {
+ case REDISMODULE_AM_STRING: decrRefCount(ptr); break;
+ case REDISMODULE_AM_REPLY: RM_FreeCallReply(ptr); break;
+ case REDISMODULE_AM_KEY: RM_CloseKey(ptr); break;
+ case REDISMODULE_AM_DICT: RM_FreeDict(NULL,ptr); break;
+ case REDISMODULE_AM_INFO: RM_FreeServerInfo(NULL,ptr); break;
+ }
+ }
+ ctx->flags |= REDISMODULE_CTX_AUTO_MEMORY;
+ zfree(ctx->amqueue);
+ ctx->amqueue = NULL;
+ ctx->amqueue_len = 0;
+ ctx->amqueue_used = 0;
+}
+
+/* --------------------------------------------------------------------------
+ * ## String objects APIs
+ * -------------------------------------------------------------------------- */
+
+/* Create a new module string object. The returned string must be freed
+ * with RedisModule_FreeString(), unless automatic memory is enabled.
+ *
+ * The string is created by copying the `len` bytes starting
+ * at `ptr`. No reference is retained to the passed buffer.
+ *
+ * The module context 'ctx' is optional and may be NULL if you want to create
+ * a string out of the context scope. However in that case, the automatic
+ * memory management will not be available, and the string memory must be
+ * managed manually. */
+RedisModuleString *RM_CreateString(RedisModuleCtx *ctx, const char *ptr, size_t len) {
+ RedisModuleString *o = createStringObject(ptr,len);
+ if (ctx != NULL) autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
+ return o;
+}
+
+/* Create a new module string object from a printf format and arguments.
+ * The returned string must be freed with RedisModule_FreeString(), unless
+ * automatic memory is enabled.
+ *
+ * The string is created using the sds formatter function sdscatvprintf().
+ *
+ * The passed context 'ctx' may be NULL if necessary, see the
+ * RedisModule_CreateString() documentation for more info. */
+RedisModuleString *RM_CreateStringPrintf(RedisModuleCtx *ctx, const char *fmt, ...) {
+ sds s = sdsempty();
+
+ va_list ap;
+ va_start(ap, fmt);
+ s = sdscatvprintf(s, fmt, ap);
+ va_end(ap);
+
+ RedisModuleString *o = createObject(OBJ_STRING, s);
+ if (ctx != NULL) autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
+
+ return o;
+}
+
+
+/* Like RedisModule_CreateString(), but creates a string starting from a `long long`
+ * integer instead of taking a buffer and its length.
+ *
+ * The returned string must be released with RedisModule_FreeString() or by
+ * enabling automatic memory management.
+ *
+ * The passed context 'ctx' may be NULL if necessary, see the
+ * RedisModule_CreateString() documentation for more info. */
+RedisModuleString *RM_CreateStringFromLongLong(RedisModuleCtx *ctx, long long ll) {
+ char buf[LONG_STR_SIZE];
+ size_t len = ll2string(buf,sizeof(buf),ll);
+ return RM_CreateString(ctx,buf,len);
+}
+
+/* Like RedisModule_CreateString(), but creates a string starting from a `unsigned long long`
+ * integer instead of taking a buffer and its length.
+ *
+ * The returned string must be released with RedisModule_FreeString() or by
+ * enabling automatic memory management.
+ *
+ * The passed context 'ctx' may be NULL if necessary, see the
+ * RedisModule_CreateString() documentation for more info. */
+RedisModuleString *RM_CreateStringFromULongLong(RedisModuleCtx *ctx, unsigned long long ull) {
+ char buf[LONG_STR_SIZE];
+ size_t len = ull2string(buf,sizeof(buf),ull);
+ return RM_CreateString(ctx,buf,len);
+}
+
+/* Like RedisModule_CreateString(), but creates a string starting from a double
+ * instead of taking a buffer and its length.
+ *
+ * The returned string must be released with RedisModule_FreeString() or by
+ * enabling automatic memory management. */
+RedisModuleString *RM_CreateStringFromDouble(RedisModuleCtx *ctx, double d) {
+ char buf[MAX_D2STRING_CHARS];
+ size_t len = d2string(buf,sizeof(buf),d);
+ return RM_CreateString(ctx,buf,len);
+}
+
+/* Like RedisModule_CreateString(), but creates a string starting from a long
+ * double.
+ *
+ * The returned string must be released with RedisModule_FreeString() or by
+ * enabling automatic memory management.
+ *
+ * The passed context 'ctx' may be NULL if necessary, see the
+ * RedisModule_CreateString() documentation for more info. */
+RedisModuleString *RM_CreateStringFromLongDouble(RedisModuleCtx *ctx, long double ld, int humanfriendly) {
+ char buf[MAX_LONG_DOUBLE_CHARS];
+ size_t len = ld2string(buf,sizeof(buf),ld,
+ (humanfriendly ? LD_STR_HUMAN : LD_STR_AUTO));
+ return RM_CreateString(ctx,buf,len);
+}
+
+/* Like RedisModule_CreateString(), but creates a string starting from another
+ * RedisModuleString.
+ *
+ * The returned string must be released with RedisModule_FreeString() or by
+ * enabling automatic memory management.
+ *
+ * The passed context 'ctx' may be NULL if necessary, see the
+ * RedisModule_CreateString() documentation for more info. */
+RedisModuleString *RM_CreateStringFromString(RedisModuleCtx *ctx, const RedisModuleString *str) {
+ RedisModuleString *o = dupStringObject(str);
+ if (ctx != NULL) autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
+ return o;
+}
+
+/* Creates a string from a stream ID. The returned string must be released with
+ * RedisModule_FreeString(), unless automatic memory is enabled.
+ *
+ * The passed context `ctx` may be NULL if necessary. See the
+ * RedisModule_CreateString() documentation for more info. */
+RedisModuleString *RM_CreateStringFromStreamID(RedisModuleCtx *ctx, const RedisModuleStreamID *id) {
+ streamID streamid = {id->ms, id->seq};
+ RedisModuleString *o = createObjectFromStreamID(&streamid);
+ if (ctx != NULL) autoMemoryAdd(ctx, REDISMODULE_AM_STRING, o);
+ return o;
+}
+
+/* Free a module string object obtained with one of the Redis modules API calls
+ * that return new string objects.
+ *
+ * It is possible to call this function even when automatic memory management
+ * is enabled. In that case the string will be released ASAP and removed
+ * from the pool of string to release at the end.
+ *
+ * If the string was created with a NULL context 'ctx', it is also possible to
+ * pass ctx as NULL when releasing the string (but passing a context will not
+ * create any issue). Strings created with a context should be freed also passing
+ * the context, so if you want to free a string out of context later, make sure
+ * to create it using a NULL context. */
+void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) {
+ decrRefCount(str);
+ if (ctx != NULL) autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str);
+}
+
+/* Every call to this function, will make the string 'str' requiring
+ * an additional call to RedisModule_FreeString() in order to really
+ * free the string. Note that the automatic freeing of the string obtained
+ * enabling modules automatic memory management counts for one
+ * RedisModule_FreeString() call (it is just executed automatically).
+ *
+ * Normally you want to call this function when, at the same time
+ * the following conditions are true:
+ *
+ * 1. You have automatic memory management enabled.
+ * 2. You want to create string objects.
+ * 3. Those string objects you create need to live *after* the callback
+ * function(for example a command implementation) creating them returns.
+ *
+ * Usually you want this in order to store the created string object
+ * into your own data structure, for example when implementing a new data
+ * type.
+ *
+ * Note that when memory management is turned off, you don't need
+ * any call to RetainString() since creating a string will always result
+ * into a string that lives after the callback function returns, if
+ * no FreeString() call is performed.
+ *
+ * It is possible to call this function with a NULL context.
+ *
+ * When strings are going to be retained for an extended duration, it is good
+ * practice to also call RedisModule_TrimStringAllocation() in order to
+ * optimize memory usage.
+ *
+ * Threaded modules that reference retained strings from other threads *must*
+ * explicitly trim the allocation as soon as the string is retained. Not doing
+ * so may result with automatic trimming which is not thread safe. */
+void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) {
+ if (ctx == NULL || !autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str)) {
+ /* Increment the string reference counting only if we can't
+ * just remove the object from the list of objects that should
+ * be reclaimed. Why we do that, instead of just incrementing
+ * the refcount in any case, and let the automatic FreeString()
+ * call at the end to bring the refcount back at the desired
+ * value? Because this way we ensure that the object refcount
+ * value is 1 (instead of going to 2 to be dropped later to 1)
+ * after the call to this function. This is needed for functions
+ * like RedisModule_StringAppendBuffer() to work. */
+ incrRefCount(str);
+ }
+}
+
+/**
+* This function can be used instead of RedisModule_RetainString().
+* The main difference between the two is that this function will always
+* succeed, whereas RedisModule_RetainString() may fail because of an
+* assertion.
+*
+* The function returns a pointer to RedisModuleString, which is owned
+* by the caller. It requires a call to RedisModule_FreeString() to free
+* the string when automatic memory management is disabled for the context.
+* When automatic memory management is enabled, you can either call
+* RedisModule_FreeString() or let the automation free it.
+*
+* This function is more efficient than RedisModule_CreateStringFromString()
+* because whenever possible, it avoids copying the underlying
+* RedisModuleString. The disadvantage of using this function is that it
+* might not be possible to use RedisModule_StringAppendBuffer() on the
+* returned RedisModuleString.
+*
+* It is possible to call this function with a NULL context.
+*
+ * When strings are going to be held for an extended duration, it is good
+ * practice to also call RedisModule_TrimStringAllocation() in order to
+ * optimize memory usage.
+ *
+ * Threaded modules that reference held strings from other threads *must*
+ * explicitly trim the allocation as soon as the string is held. Not doing
+ * so may result with automatic trimming which is not thread safe. */
+RedisModuleString* RM_HoldString(RedisModuleCtx *ctx, RedisModuleString *str) {
+ if (str->refcount == OBJ_STATIC_REFCOUNT) {
+ return RM_CreateStringFromString(ctx, str);
+ }
+
+ incrRefCount(str);
+ if (ctx != NULL) {
+ /*
+ * Put the str in the auto memory management of the ctx.
+ * It might already be there, in this case, the ref count will
+ * be 2 and we will decrease the ref count twice and free the
+ * object in the auto memory free function.
+ *
+ * Why we can not do the same trick of just remove the object
+ * from the auto memory (like in RM_RetainString)?
+ * This code shows the issue:
+ *
+ * RM_AutoMemory(ctx);
+ * str1 = RM_CreateString(ctx, "test", 4);
+ * str2 = RM_HoldString(ctx, str1);
+ * RM_FreeString(str1);
+ * RM_FreeString(str2);
+ *
+ * If after the RM_HoldString we would just remove the string from
+ * the auto memory, this example will cause access to a freed memory
+ * on 'RM_FreeString(str2);' because the String will be free
+ * on 'RM_FreeString(str1);'.
+ *
+ * So it's safer to just increase the ref count
+ * and add the String to auto memory again.
+ *
+ * The limitation is that it is not possible to use RedisModule_StringAppendBuffer
+ * on the String.
+ */
+ autoMemoryAdd(ctx,REDISMODULE_AM_STRING,str);
+ }
+ return str;
+}
+
+/* Given a string module object, this function returns the string pointer
+ * and length of the string. The returned pointer and length should only
+ * be used for read only accesses and never modified. */
+const char *RM_StringPtrLen(const RedisModuleString *str, size_t *len) {
+ if (str == NULL) {
+ const char *errmsg = "(NULL string reply referenced in module)";
+ if (len) *len = strlen(errmsg);
+ return errmsg;
+ }
+ if (len) *len = sdslen(str->ptr);
+ return str->ptr;
+}
+
+/* --------------------------------------------------------------------------
+ * Higher level string operations
+ * ------------------------------------------------------------------------- */
+
+/* Convert the string into a `long long` integer, storing it at `*ll`.
+ * Returns REDISMODULE_OK on success. If the string can't be parsed
+ * as a valid, strict `long long` (no spaces before/after), REDISMODULE_ERR
+ * is returned. */
+int RM_StringToLongLong(const RedisModuleString *str, long long *ll) {
+ return string2ll(str->ptr,sdslen(str->ptr),ll) ? REDISMODULE_OK :
+ REDISMODULE_ERR;
+}
+
+/* Convert the string into a `unsigned long long` integer, storing it at `*ull`.
+ * Returns REDISMODULE_OK on success. If the string can't be parsed
+ * as a valid, strict `unsigned long long` (no spaces before/after), REDISMODULE_ERR
+ * is returned. */
+int RM_StringToULongLong(const RedisModuleString *str, unsigned long long *ull) {
+ return string2ull(str->ptr,ull) ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Convert the string into a double, storing it at `*d`.
+ * Returns REDISMODULE_OK on success or REDISMODULE_ERR if the string is
+ * not a valid string representation of a double value. */
+int RM_StringToDouble(const RedisModuleString *str, double *d) {
+ int retval = getDoubleFromObject(str,d);
+ return (retval == C_OK) ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Convert the string into a long double, storing it at `*ld`.
+ * Returns REDISMODULE_OK on success or REDISMODULE_ERR if the string is
+ * not a valid string representation of a double value. */
+int RM_StringToLongDouble(const RedisModuleString *str, long double *ld) {
+ int retval = string2ld(str->ptr,sdslen(str->ptr),ld);
+ return retval ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Convert the string into a stream ID, storing it at `*id`.
+ * Returns REDISMODULE_OK on success and returns REDISMODULE_ERR if the string
+ * is not a valid string representation of a stream ID. The special IDs "+" and
+ * "-" are allowed.
+ */
+int RM_StringToStreamID(const RedisModuleString *str, RedisModuleStreamID *id) {
+ streamID streamid;
+ if (streamParseID(str, &streamid) == C_OK) {
+ id->ms = streamid.ms;
+ id->seq = streamid.seq;
+ return REDISMODULE_OK;
+ } else {
+ return REDISMODULE_ERR;
+ }
+}
+
+/* Compare two string objects, returning -1, 0 or 1 respectively if
+ * a < b, a == b, a > b. Strings are compared byte by byte as two
+ * binary blobs without any encoding care / collation attempt. */
+int RM_StringCompare(const RedisModuleString *a, const RedisModuleString *b) {
+ return compareStringObjects(a,b);
+}
+
+/* Return the (possibly modified in encoding) input 'str' object if
+ * the string is unshared, otherwise NULL is returned. */
+RedisModuleString *moduleAssertUnsharedString(RedisModuleString *str) {
+ if (str->refcount != 1) {
+ serverLog(LL_WARNING,
+ "Module attempted to use an in-place string modify operation "
+ "with a string referenced multiple times. Please check the code "
+ "for API usage correctness.");
+ return NULL;
+ }
+ if (str->encoding == OBJ_ENCODING_EMBSTR) {
+ /* Note: here we "leak" the additional allocation that was
+ * used in order to store the embedded string in the object. */
+ str->ptr = sdsnewlen(str->ptr,sdslen(str->ptr));
+ str->encoding = OBJ_ENCODING_RAW;
+ } else if (str->encoding == OBJ_ENCODING_INT) {
+ /* Convert the string from integer to raw encoding. */
+ str->ptr = sdsfromlonglong((long)str->ptr);
+ str->encoding = OBJ_ENCODING_RAW;
+ }
+ return str;
+}
+
+/* Append the specified buffer to the string 'str'. The string must be a
+ * string created by the user that is referenced only a single time, otherwise
+ * REDISMODULE_ERR is returned and the operation is not performed. */
+int RM_StringAppendBuffer(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len) {
+ UNUSED(ctx);
+ str = moduleAssertUnsharedString(str);
+ if (str == NULL) return REDISMODULE_ERR;
+ str->ptr = sdscatlen(str->ptr,buf,len);
+ return REDISMODULE_OK;
+}
+
+/* Trim possible excess memory allocated for a RedisModuleString.
+ *
+ * Sometimes a RedisModuleString may have more memory allocated for
+ * it than required, typically for argv arguments that were constructed
+ * from network buffers. This function optimizes such strings by reallocating
+ * their memory, which is useful for strings that are not short lived but
+ * retained for an extended duration.
+ *
+ * This operation is *not thread safe* and should only be called when
+ * no concurrent access to the string is guaranteed. Using it for an argv
+ * string in a module command before the string is potentially available
+ * to other threads is generally safe.
+ *
+ * Currently, Redis may also automatically trim retained strings when a
+ * module command returns. However, doing this explicitly should still be
+ * a preferred option:
+ *
+ * 1. Future versions of Redis may abandon auto-trimming.
+ * 2. Auto-trimming as currently implemented is *not thread safe*.
+ * A background thread manipulating a recently retained string may end up
+ * in a race condition with the auto-trim, which could result with
+ * data corruption.
+ */
+void RM_TrimStringAllocation(RedisModuleString *str) {
+ if (!str) return;
+ trimStringObjectIfNeeded(str, 1);
+}
+
+/* --------------------------------------------------------------------------
+ * ## Reply APIs
+ *
+ * These functions are used for sending replies to the client.
+ *
+ * Most functions always return REDISMODULE_OK so you can use it with
+ * 'return' in order to return from the command implementation with:
+ *
+ * if (... some condition ...)
+ * return RedisModule_ReplyWithLongLong(ctx,mycount);
+ *
+ * ### Reply with collection functions
+ *
+ * After starting a collection reply, the module must make calls to other
+ * `ReplyWith*` style functions in order to emit the elements of the collection.
+ * Collection types include: Array, Map, Set and Attribute.
+ *
+ * When producing collections with a number of elements that is not known
+ * beforehand, the function can be called with a special flag
+ * REDISMODULE_POSTPONED_LEN (REDISMODULE_POSTPONED_ARRAY_LEN in the past),
+ * and the actual number of elements can be later set with RM_ReplySet*Length()
+ * call (which will set the latest "open" count if there are multiple ones).
+ * -------------------------------------------------------------------------- */
+
+/* Send an error about the number of arguments given to the command,
+ * citing the command name in the error message. Returns REDISMODULE_OK.
+ *
+ * Example:
+ *
+ * if (argc != 3) return RedisModule_WrongArity(ctx);
+ */
+int RM_WrongArity(RedisModuleCtx *ctx) {
+ addReplyErrorArity(ctx->client);
+ return REDISMODULE_OK;
+}
+
+/* Return the client object the `RM_Reply*` functions should target.
+ * Normally this is just `ctx->client`, that is the client that called
+ * the module command, however in the case of thread safe contexts there
+ * is no directly associated client (since it would not be safe to access
+ * the client from a thread), so instead the blocked client object referenced
+ * in the thread safe context, has a fake client that we just use to accumulate
+ * the replies. Later, when the client is unblocked, the accumulated replies
+ * are appended to the actual client.
+ *
+ * The function returns the client pointer depending on the context, or
+ * NULL if there is no potential client. This happens when we are in the
+ * context of a thread safe context that was not initialized with a blocked
+ * client object. Other contexts without associated clients are the ones
+ * initialized to run the timers callbacks. */
+client *moduleGetReplyClient(RedisModuleCtx *ctx) {
+ if (ctx->flags & REDISMODULE_CTX_THREAD_SAFE) {
+ if (ctx->blocked_client)
+ return ctx->blocked_client->reply_client;
+ else
+ return NULL;
+ } else {
+ /* If this is a non thread safe context, just return the client
+ * that is running the command if any. This may be NULL as well
+ * in the case of contexts that are not executed with associated
+ * clients, like timer contexts. */
+ return ctx->client;
+ }
+}
+
+/* Send an integer reply to the client, with the specified `long long` value.
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithLongLong(RedisModuleCtx *ctx, long long ll) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyLongLong(c,ll);
+ return REDISMODULE_OK;
+}
+
+/* Reply with the error 'err'.
+ *
+ * Note that 'err' must contain all the error, including
+ * the initial error code. The function only provides the initial "-", so
+ * the usage is, for example:
+ *
+ * RedisModule_ReplyWithError(ctx,"ERR Wrong Type");
+ *
+ * and not just:
+ *
+ * RedisModule_ReplyWithError(ctx,"Wrong Type");
+ *
+ * The function always returns REDISMODULE_OK.
+ */
+int RM_ReplyWithError(RedisModuleCtx *ctx, const char *err) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyErrorFormat(c,"-%s",err);
+ return REDISMODULE_OK;
+}
+
+/* Reply with the error create from a printf format and arguments.
+ *
+ * Note that 'fmt' must contain all the error, including
+ * the initial error code. The function only provides the initial "-", so
+ * the usage is, for example:
+ *
+ * RedisModule_ReplyWithErrorFormat(ctx,"ERR Wrong Type: %s",type);
+ *
+ * and not just:
+ *
+ * RedisModule_ReplyWithErrorFormat(ctx,"Wrong Type: %s",type);
+ *
+ * The function always returns REDISMODULE_OK.
+ */
+int RM_ReplyWithErrorFormat(RedisModuleCtx *ctx, const char *fmt, ...) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+
+ int len = strlen(fmt) + 2; /* 1 for the \0 and 1 for the hyphen */
+ char *hyphenfmt = zmalloc(len);
+ snprintf(hyphenfmt, len, "-%s", fmt);
+
+ va_list ap;
+ va_start(ap, fmt);
+ addReplyErrorFormatInternal(c, 0, hyphenfmt, ap);
+ va_end(ap);
+
+ zfree(hyphenfmt);
+
+ return REDISMODULE_OK;
+}
+
+/* Reply with a simple string (`+... \r\n` in RESP protocol). This replies
+ * are suitable only when sending a small non-binary string with small
+ * overhead, like "OK" or similar replies.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithSimpleString(RedisModuleCtx *ctx, const char *msg) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyProto(c,"+",1);
+ addReplyProto(c,msg,strlen(msg));
+ addReplyProto(c,"\r\n",2);
+ return REDISMODULE_OK;
+}
+
+#define COLLECTION_REPLY_ARRAY 1
+#define COLLECTION_REPLY_MAP 2
+#define COLLECTION_REPLY_SET 3
+#define COLLECTION_REPLY_ATTRIBUTE 4
+
+int moduleReplyWithCollection(RedisModuleCtx *ctx, long len, int type) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ if (len == REDISMODULE_POSTPONED_LEN) {
+ ctx->postponed_arrays = zrealloc(ctx->postponed_arrays,sizeof(void*)*
+ (ctx->postponed_arrays_count+1));
+ ctx->postponed_arrays[ctx->postponed_arrays_count] =
+ addReplyDeferredLen(c);
+ ctx->postponed_arrays_count++;
+ } else if (len == 0) {
+ switch (type) {
+ case COLLECTION_REPLY_ARRAY:
+ addReply(c, shared.emptyarray);
+ break;
+ case COLLECTION_REPLY_MAP:
+ addReply(c, shared.emptymap[c->resp]);
+ break;
+ case COLLECTION_REPLY_SET:
+ addReply(c, shared.emptyset[c->resp]);
+ break;
+ case COLLECTION_REPLY_ATTRIBUTE:
+ addReplyAttributeLen(c,len);
+ break;
+ default:
+ serverPanic("Invalid module empty reply type %d", type); }
+ } else {
+ switch (type) {
+ case COLLECTION_REPLY_ARRAY:
+ addReplyArrayLen(c,len);
+ break;
+ case COLLECTION_REPLY_MAP:
+ addReplyMapLen(c,len);
+ break;
+ case COLLECTION_REPLY_SET:
+ addReplySetLen(c,len);
+ break;
+ case COLLECTION_REPLY_ATTRIBUTE:
+ addReplyAttributeLen(c,len);
+ break;
+ default:
+ serverPanic("Invalid module reply type %d", type);
+ }
+ }
+ return REDISMODULE_OK;
+}
+
+/* Reply with an array type of 'len' elements.
+ *
+ * After starting an array reply, the module must make `len` calls to other
+ * `ReplyWith*` style functions in order to emit the elements of the array.
+ * See Reply APIs section for more details.
+ *
+ * Use RM_ReplySetArrayLength() to set deferred length.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithArray(RedisModuleCtx *ctx, long len) {
+ return moduleReplyWithCollection(ctx, len, COLLECTION_REPLY_ARRAY);
+}
+
+/* Reply with a RESP3 Map type of 'len' pairs.
+ * Visit https://github.com/antirez/RESP3/blob/master/spec.md for more info about RESP3.
+ *
+ * After starting a map reply, the module must make `len*2` calls to other
+ * `ReplyWith*` style functions in order to emit the elements of the map.
+ * See Reply APIs section for more details.
+ *
+ * If the connected client is using RESP2, the reply will be converted to a flat
+ * array.
+ *
+ * Use RM_ReplySetMapLength() to set deferred length.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithMap(RedisModuleCtx *ctx, long len) {
+ return moduleReplyWithCollection(ctx, len, COLLECTION_REPLY_MAP);
+}
+
+/* Reply with a RESP3 Set type of 'len' elements.
+ * Visit https://github.com/antirez/RESP3/blob/master/spec.md for more info about RESP3.
+ *
+ * After starting a set reply, the module must make `len` calls to other
+ * `ReplyWith*` style functions in order to emit the elements of the set.
+ * See Reply APIs section for more details.
+ *
+ * If the connected client is using RESP2, the reply will be converted to an
+ * array type.
+ *
+ * Use RM_ReplySetSetLength() to set deferred length.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithSet(RedisModuleCtx *ctx, long len) {
+ return moduleReplyWithCollection(ctx, len, COLLECTION_REPLY_SET);
+}
+
+
+/* Add attributes (metadata) to the reply. Should be done before adding the
+ * actual reply. see https://github.com/antirez/RESP3/blob/master/spec.md#attribute-type
+ *
+ * After starting an attribute's reply, the module must make `len*2` calls to other
+ * `ReplyWith*` style functions in order to emit the elements of the attribute map.
+ * See Reply APIs section for more details.
+ *
+ * Use RM_ReplySetAttributeLength() to set deferred length.
+ *
+ * Not supported by RESP2 and will return REDISMODULE_ERR, otherwise
+ * the function always returns REDISMODULE_OK. */
+int RM_ReplyWithAttribute(RedisModuleCtx *ctx, long len) {
+ if (ctx->client->resp == 2) return REDISMODULE_ERR;
+
+ return moduleReplyWithCollection(ctx, len, COLLECTION_REPLY_ATTRIBUTE);
+}
+
+/* Reply to the client with a null array, simply null in RESP3,
+ * null array in RESP2.
+ *
+ * Note: In RESP3 there's no difference between Null reply and
+ * NullArray reply, so to prevent ambiguity it's better to avoid
+ * using this API and use RedisModule_ReplyWithNull instead.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithNullArray(RedisModuleCtx *ctx) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyNullArray(c);
+ return REDISMODULE_OK;
+}
+
+/* Reply to the client with an empty array.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithEmptyArray(RedisModuleCtx *ctx) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReply(c,shared.emptyarray);
+ return REDISMODULE_OK;
+}
+
+void moduleReplySetCollectionLength(RedisModuleCtx *ctx, long len, int type) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return;
+ if (ctx->postponed_arrays_count == 0) {
+ serverLog(LL_WARNING,
+ "API misuse detected in module %s: "
+ "RedisModule_ReplySet*Length() called without previous "
+ "RedisModule_ReplyWith*(ctx,REDISMODULE_POSTPONED_LEN) "
+ "call.", ctx->module->name);
+ return;
+ }
+ ctx->postponed_arrays_count--;
+ switch(type) {
+ case COLLECTION_REPLY_ARRAY:
+ setDeferredArrayLen(c,ctx->postponed_arrays[ctx->postponed_arrays_count],len);
+ break;
+ case COLLECTION_REPLY_MAP:
+ setDeferredMapLen(c,ctx->postponed_arrays[ctx->postponed_arrays_count],len);
+ break;
+ case COLLECTION_REPLY_SET:
+ setDeferredSetLen(c,ctx->postponed_arrays[ctx->postponed_arrays_count],len);
+ break;
+ case COLLECTION_REPLY_ATTRIBUTE:
+ setDeferredAttributeLen(c,ctx->postponed_arrays[ctx->postponed_arrays_count],len);
+ break;
+ default:
+ serverPanic("Invalid module reply type %d", type);
+ }
+ if (ctx->postponed_arrays_count == 0) {
+ zfree(ctx->postponed_arrays);
+ ctx->postponed_arrays = NULL;
+ }
+}
+
+/* When RedisModule_ReplyWithArray() is used with the argument
+ * REDISMODULE_POSTPONED_LEN, because we don't know beforehand the number
+ * of items we are going to output as elements of the array, this function
+ * will take care to set the array length.
+ *
+ * Since it is possible to have multiple array replies pending with unknown
+ * length, this function guarantees to always set the latest array length
+ * that was created in a postponed way.
+ *
+ * For example in order to output an array like [1,[10,20,30]] we
+ * could write:
+ *
+ * RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_LEN);
+ * RedisModule_ReplyWithLongLong(ctx,1);
+ * RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_LEN);
+ * RedisModule_ReplyWithLongLong(ctx,10);
+ * RedisModule_ReplyWithLongLong(ctx,20);
+ * RedisModule_ReplyWithLongLong(ctx,30);
+ * RedisModule_ReplySetArrayLength(ctx,3); // Set len of 10,20,30 array.
+ * RedisModule_ReplySetArrayLength(ctx,2); // Set len of top array
+ *
+ * Note that in the above example there is no reason to postpone the array
+ * length, since we produce a fixed number of elements, but in the practice
+ * the code may use an iterator or other ways of creating the output so
+ * that is not easy to calculate in advance the number of elements.
+ */
+void RM_ReplySetArrayLength(RedisModuleCtx *ctx, long len) {
+ moduleReplySetCollectionLength(ctx, len, COLLECTION_REPLY_ARRAY);
+}
+
+/* Very similar to RedisModule_ReplySetArrayLength except `len` should
+ * exactly half of the number of `ReplyWith*` functions called in the
+ * context of the map.
+ * Visit https://github.com/antirez/RESP3/blob/master/spec.md for more info about RESP3. */
+void RM_ReplySetMapLength(RedisModuleCtx *ctx, long len) {
+ moduleReplySetCollectionLength(ctx, len, COLLECTION_REPLY_MAP);
+}
+
+/* Very similar to RedisModule_ReplySetArrayLength
+ * Visit https://github.com/antirez/RESP3/blob/master/spec.md for more info about RESP3. */
+void RM_ReplySetSetLength(RedisModuleCtx *ctx, long len) {
+ moduleReplySetCollectionLength(ctx, len, COLLECTION_REPLY_SET);
+}
+
+/* Very similar to RedisModule_ReplySetMapLength
+ * Visit https://github.com/antirez/RESP3/blob/master/spec.md for more info about RESP3.
+ *
+ * Must not be called if RM_ReplyWithAttribute returned an error. */
+void RM_ReplySetAttributeLength(RedisModuleCtx *ctx, long len) {
+ if (ctx->client->resp == 2) return;
+ moduleReplySetCollectionLength(ctx, len, COLLECTION_REPLY_ATTRIBUTE);
+}
+
+/* Reply with a bulk string, taking in input a C buffer pointer and length.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyBulkCBuffer(c,(char*)buf,len);
+ return REDISMODULE_OK;
+}
+
+/* Reply with a bulk string, taking in input a C buffer pointer that is
+ * assumed to be null-terminated.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithCString(RedisModuleCtx *ctx, const char *buf) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyBulkCString(c,(char*)buf);
+ return REDISMODULE_OK;
+}
+
+/* Reply with a bulk string, taking in input a RedisModuleString object.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithString(RedisModuleCtx *ctx, RedisModuleString *str) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyBulk(c,str);
+ return REDISMODULE_OK;
+}
+
+/* Reply with an empty string.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithEmptyString(RedisModuleCtx *ctx) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReply(c,shared.emptybulk);
+ return REDISMODULE_OK;
+}
+
+/* Reply with a binary safe string, which should not be escaped or filtered
+ * taking in input a C buffer pointer, length and a 3 character type/extension.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithVerbatimStringType(RedisModuleCtx *ctx, const char *buf, size_t len, const char *ext) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyVerbatim(c, buf, len, ext);
+ return REDISMODULE_OK;
+}
+
+/* Reply with a binary safe string, which should not be escaped or filtered
+ * taking in input a C buffer pointer and length.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithVerbatimString(RedisModuleCtx *ctx, const char *buf, size_t len) {
+ return RM_ReplyWithVerbatimStringType(ctx, buf, len, "txt");
+}
+
+/* Reply to the client with a NULL.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithNull(RedisModuleCtx *ctx) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyNull(c);
+ return REDISMODULE_OK;
+}
+
+/* Reply with a RESP3 Boolean type.
+ * Visit https://github.com/antirez/RESP3/blob/master/spec.md for more info about RESP3.
+ *
+ * In RESP3, this is boolean type
+ * In RESP2, it's a string response of "1" and "0" for true and false respectively.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithBool(RedisModuleCtx *ctx, int b) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyBool(c,b);
+ return REDISMODULE_OK;
+}
+
+/* Reply exactly what a Redis command returned us with RedisModule_Call().
+ * This function is useful when we use RedisModule_Call() in order to
+ * execute some command, as we want to reply to the client exactly the
+ * same reply we obtained by the command.
+ *
+ * Return:
+ * - REDISMODULE_OK on success.
+ * - REDISMODULE_ERR if the given reply is in RESP3 format but the client expects RESP2.
+ * In case of an error, it's the module writer responsibility to translate the reply
+ * to RESP2 (or handle it differently by returning an error). Notice that for
+ * module writer convenience, it is possible to pass `0` as a parameter to the fmt
+ * argument of `RM_Call` so that the RedisModuleCallReply will return in the same
+ * protocol (RESP2 or RESP3) as set in the current client's context. */
+int RM_ReplyWithCallReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ if (c->resp == 2 && callReplyIsResp3(reply)) {
+ /* The reply is in RESP3 format and the client is RESP2,
+ * so it isn't possible to send this reply to the client. */
+ return REDISMODULE_ERR;
+ }
+ size_t proto_len;
+ const char *proto = callReplyGetProto(reply, &proto_len);
+ addReplyProto(c, proto, proto_len);
+ /* Propagate the error list from that reply to the other client, to do some
+ * post error reply handling, like statistics.
+ * Note that if the original reply had an array with errors, and the module
+ * replied with just a portion of the original reply, and not the entire
+ * reply, the errors are currently not propagated and the errors stats
+ * will not get propagated. */
+ list *errors = callReplyDeferredErrorList(reply);
+ if (errors)
+ deferredAfterErrorReply(c, errors);
+ return REDISMODULE_OK;
+}
+
+/* Reply with a RESP3 Double type.
+ * Visit https://github.com/antirez/RESP3/blob/master/spec.md for more info about RESP3.
+ *
+ * Send a string reply obtained converting the double 'd' into a bulk string.
+ * This function is basically equivalent to converting a double into
+ * a string into a C buffer, and then calling the function
+ * RedisModule_ReplyWithStringBuffer() with the buffer and length.
+ *
+ * In RESP3 the string is tagged as a double, while in RESP2 it's just a plain string
+ * that the user will have to parse.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithDouble(RedisModuleCtx *ctx, double d) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyDouble(c,d);
+ return REDISMODULE_OK;
+}
+
+/* Reply with a RESP3 BigNumber type.
+ * Visit https://github.com/antirez/RESP3/blob/master/spec.md for more info about RESP3.
+ *
+ * In RESP3, this is a string of length `len` that is tagged as a BigNumber,
+ * however, it's up to the caller to ensure that it's a valid BigNumber.
+ * In RESP2, this is just a plain bulk string response.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithBigNumber(RedisModuleCtx *ctx, const char *bignum, size_t len) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyBigNum(c, bignum, len);
+ return REDISMODULE_OK;
+}
+
+/* Send a string reply obtained converting the long double 'ld' into a bulk
+ * string. This function is basically equivalent to converting a long double
+ * into a string into a C buffer, and then calling the function
+ * RedisModule_ReplyWithStringBuffer() with the buffer and length.
+ * The double string uses human readable formatting (see
+ * `addReplyHumanLongDouble` in networking.c).
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithLongDouble(RedisModuleCtx *ctx, long double ld) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyHumanLongDouble(c, ld);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Commands replication API
+ * -------------------------------------------------------------------------- */
+
+/* Replicate the specified command and arguments to slaves and AOF, as effect
+ * of execution of the calling command implementation.
+ *
+ * The replicated commands are always wrapped into the MULTI/EXEC that
+ * contains all the commands replicated in a given module command
+ * execution. However the commands replicated with RedisModule_Call()
+ * are the first items, the ones replicated with RedisModule_Replicate()
+ * will all follow before the EXEC.
+ *
+ * Modules should try to use one interface or the other.
+ *
+ * This command follows exactly the same interface of RedisModule_Call(),
+ * so a set of format specifiers must be passed, followed by arguments
+ * matching the provided format specifiers.
+ *
+ * Please refer to RedisModule_Call() for more information.
+ *
+ * Using the special "A" and "R" modifiers, the caller can exclude either
+ * the AOF or the replicas from the propagation of the specified command.
+ * Otherwise, by default, the command will be propagated in both channels.
+ *
+ * #### Note about calling this function from a thread safe context:
+ *
+ * Normally when you call this function from the callback implementing a
+ * module command, or any other callback provided by the Redis Module API,
+ * Redis will accumulate all the calls to this function in the context of
+ * the callback, and will propagate all the commands wrapped in a MULTI/EXEC
+ * transaction. However when calling this function from a threaded safe context
+ * that can live an undefined amount of time, and can be locked/unlocked in
+ * at will, the behavior is different: MULTI/EXEC wrapper is not emitted
+ * and the command specified is inserted in the AOF and replication stream
+ * immediately.
+ *
+ * #### Return value
+ *
+ * The command returns REDISMODULE_ERR if the format specifiers are invalid
+ * or the command name does not belong to a known command. */
+int RM_Replicate(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) {
+ struct redisCommand *cmd;
+ robj **argv = NULL;
+ int argc = 0, flags = 0, j;
+ va_list ap;
+
+ cmd = lookupCommandByCString((char*)cmdname);
+ if (!cmd) return REDISMODULE_ERR;
+
+ /* Create the client and dispatch the command. */
+ va_start(ap, fmt);
+ argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
+ va_end(ap);
+ if (argv == NULL) return REDISMODULE_ERR;
+
+ /* Select the propagation target. Usually is AOF + replicas, however
+ * the caller can exclude one or the other using the "A" or "R"
+ * modifiers. */
+ int target = 0;
+ if (!(flags & REDISMODULE_ARGV_NO_AOF)) target |= PROPAGATE_AOF;
+ if (!(flags & REDISMODULE_ARGV_NO_REPLICAS)) target |= PROPAGATE_REPL;
+
+ alsoPropagate(ctx->client->db->id,argv,argc,target);
+
+ /* Release the argv. */
+ for (j = 0; j < argc; j++) decrRefCount(argv[j]);
+ zfree(argv);
+ server.dirty++;
+ return REDISMODULE_OK;
+}
+
+/* This function will replicate the command exactly as it was invoked
+ * by the client. Note that this function will not wrap the command into
+ * a MULTI/EXEC stanza, so it should not be mixed with other replication
+ * commands.
+ *
+ * Basically this form of replication is useful when you want to propagate
+ * the command to the slaves and AOF file exactly as it was called, since
+ * the command can just be re-executed to deterministically re-create the
+ * new state starting from the old one.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplicateVerbatim(RedisModuleCtx *ctx) {
+ alsoPropagate(ctx->client->db->id,
+ ctx->client->argv,ctx->client->argc,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ server.dirty++;
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * ## DB and Key APIs -- Generic API
+ * -------------------------------------------------------------------------- */
+
+/* Return the ID of the current client calling the currently active module
+ * command. The returned ID has a few guarantees:
+ *
+ * 1. The ID is different for each different client, so if the same client
+ * executes a module command multiple times, it can be recognized as
+ * having the same ID, otherwise the ID will be different.
+ * 2. The ID increases monotonically. Clients connecting to the server later
+ * are guaranteed to get IDs greater than any past ID previously seen.
+ *
+ * Valid IDs are from 1 to 2^64 - 1. If 0 is returned it means there is no way
+ * to fetch the ID in the context the function was currently called.
+ *
+ * After obtaining the ID, it is possible to check if the command execution
+ * is actually happening in the context of AOF loading, using this macro:
+ *
+ * if (RedisModule_IsAOFClient(RedisModule_GetClientId(ctx)) {
+ * // Handle it differently.
+ * }
+ */
+unsigned long long RM_GetClientId(RedisModuleCtx *ctx) {
+ if (ctx->client == NULL) return 0;
+ return ctx->client->id;
+}
+
+/* Return the ACL user name used by the client with the specified client ID.
+ * Client ID can be obtained with RM_GetClientId() API. If the client does not
+ * exist, NULL is returned and errno is set to ENOENT. If the client isn't
+ * using an ACL user, NULL is returned and errno is set to ENOTSUP */
+RedisModuleString *RM_GetClientUserNameById(RedisModuleCtx *ctx, uint64_t id) {
+ client *client = lookupClientByID(id);
+ if (client == NULL) {
+ errno = ENOENT;
+ return NULL;
+ }
+
+ if (client->user == NULL) {
+ errno = ENOTSUP;
+ return NULL;
+ }
+
+ sds name = sdsnew(client->user->name);
+ robj *str = createObject(OBJ_STRING, name);
+ autoMemoryAdd(ctx, REDISMODULE_AM_STRING, str);
+ return str;
+}
+
+/* This is a helper for RM_GetClientInfoById() and other functions: given
+ * a client, it populates the client info structure with the appropriate
+ * fields depending on the version provided. If the version is not valid
+ * then REDISMODULE_ERR is returned. Otherwise the function returns
+ * REDISMODULE_OK and the structure pointed by 'ci' gets populated. */
+
+int modulePopulateClientInfoStructure(void *ci, client *client, int structver) {
+ if (structver != 1) return REDISMODULE_ERR;
+
+ RedisModuleClientInfoV1 *ci1 = ci;
+ memset(ci1,0,sizeof(*ci1));
+ ci1->version = structver;
+ if (client->flags & CLIENT_MULTI)
+ ci1->flags |= REDISMODULE_CLIENTINFO_FLAG_MULTI;
+ if (client->flags & CLIENT_PUBSUB)
+ ci1->flags |= REDISMODULE_CLIENTINFO_FLAG_PUBSUB;
+ if (client->flags & CLIENT_UNIX_SOCKET)
+ ci1->flags |= REDISMODULE_CLIENTINFO_FLAG_UNIXSOCKET;
+ if (client->flags & CLIENT_TRACKING)
+ ci1->flags |= REDISMODULE_CLIENTINFO_FLAG_TRACKING;
+ if (client->flags & CLIENT_BLOCKED)
+ ci1->flags |= REDISMODULE_CLIENTINFO_FLAG_BLOCKED;
+ if (client->conn->type == connectionTypeTls())
+ ci1->flags |= REDISMODULE_CLIENTINFO_FLAG_SSL;
+
+ int port;
+ connAddrPeerName(client->conn,ci1->addr,sizeof(ci1->addr),&port);
+ ci1->port = port;
+ ci1->db = client->db->id;
+ ci1->id = client->id;
+ return REDISMODULE_OK;
+}
+
+/* This is a helper for moduleFireServerEvent() and other functions:
+ * It populates the replication info structure with the appropriate
+ * fields depending on the version provided. If the version is not valid
+ * then REDISMODULE_ERR is returned. Otherwise the function returns
+ * REDISMODULE_OK and the structure pointed by 'ri' gets populated. */
+int modulePopulateReplicationInfoStructure(void *ri, int structver) {
+ if (structver != 1) return REDISMODULE_ERR;
+
+ RedisModuleReplicationInfoV1 *ri1 = ri;
+ memset(ri1,0,sizeof(*ri1));
+ ri1->version = structver;
+ ri1->master = server.masterhost==NULL;
+ ri1->masterhost = server.masterhost? server.masterhost: "";
+ ri1->masterport = server.masterport;
+ ri1->replid1 = server.replid;
+ ri1->replid2 = server.replid2;
+ ri1->repl1_offset = server.master_repl_offset;
+ ri1->repl2_offset = server.second_replid_offset;
+ return REDISMODULE_OK;
+}
+
+/* Return information about the client with the specified ID (that was
+ * previously obtained via the RedisModule_GetClientId() API). If the
+ * client exists, REDISMODULE_OK is returned, otherwise REDISMODULE_ERR
+ * is returned.
+ *
+ * When the client exist and the `ci` pointer is not NULL, but points to
+ * a structure of type RedisModuleClientInfoV1, previously initialized with
+ * the correct REDISMODULE_CLIENTINFO_INITIALIZER_V1, the structure is populated
+ * with the following fields:
+ *
+ * uint64_t flags; // REDISMODULE_CLIENTINFO_FLAG_*
+ * uint64_t id; // Client ID
+ * char addr[46]; // IPv4 or IPv6 address.
+ * uint16_t port; // TCP port.
+ * uint16_t db; // Selected DB.
+ *
+ * Note: the client ID is useless in the context of this call, since we
+ * already know, however the same structure could be used in other
+ * contexts where we don't know the client ID, yet the same structure
+ * is returned.
+ *
+ * With flags having the following meaning:
+ *
+ * REDISMODULE_CLIENTINFO_FLAG_SSL Client using SSL connection.
+ * REDISMODULE_CLIENTINFO_FLAG_PUBSUB Client in Pub/Sub mode.
+ * REDISMODULE_CLIENTINFO_FLAG_BLOCKED Client blocked in command.
+ * REDISMODULE_CLIENTINFO_FLAG_TRACKING Client with keys tracking on.
+ * REDISMODULE_CLIENTINFO_FLAG_UNIXSOCKET Client using unix domain socket.
+ * REDISMODULE_CLIENTINFO_FLAG_MULTI Client in MULTI state.
+ *
+ * However passing NULL is a way to just check if the client exists in case
+ * we are not interested in any additional information.
+ *
+ * This is the correct usage when we want the client info structure
+ * returned:
+ *
+ * RedisModuleClientInfo ci = REDISMODULE_CLIENTINFO_INITIALIZER;
+ * int retval = RedisModule_GetClientInfoById(&ci,client_id);
+ * if (retval == REDISMODULE_OK) {
+ * printf("Address: %s\n", ci.addr);
+ * }
+ */
+int RM_GetClientInfoById(void *ci, uint64_t id) {
+ client *client = lookupClientByID(id);
+ if (client == NULL) return REDISMODULE_ERR;
+ if (ci == NULL) return REDISMODULE_OK;
+
+ /* Fill the info structure if passed. */
+ uint64_t structver = ((uint64_t*)ci)[0];
+ return modulePopulateClientInfoStructure(ci,client,structver);
+}
+
+/* Returns the name of the client connection with the given ID.
+ *
+ * If the client ID does not exist or if the client has no name associated with
+ * it, NULL is returned. */
+RedisModuleString *RM_GetClientNameById(RedisModuleCtx *ctx, uint64_t id) {
+ client *client = lookupClientByID(id);
+ if (client == NULL || client->name == NULL) return NULL;
+ robj *name = client->name;
+ incrRefCount(name);
+ autoMemoryAdd(ctx, REDISMODULE_AM_STRING, name);
+ return name;
+}
+
+/* Sets the name of the client with the given ID. This is equivalent to the client calling
+ * `CLIENT SETNAME name`.
+ *
+ * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned
+ * and errno is set as follows:
+ *
+ * - ENOENT if the client does not exist
+ * - EINVAL if the name contains invalid characters */
+int RM_SetClientNameById(uint64_t id, RedisModuleString *name) {
+ client *client = lookupClientByID(id);
+ if (client == NULL) {
+ errno = ENOENT;
+ return REDISMODULE_ERR;
+ }
+ if (clientSetName(client, name, NULL) == C_ERR) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+ return REDISMODULE_OK;
+}
+
+/* Publish a message to subscribers (see PUBLISH command). */
+int RM_PublishMessage(RedisModuleCtx *ctx, RedisModuleString *channel, RedisModuleString *message) {
+ UNUSED(ctx);
+ return pubsubPublishMessageAndPropagateToCluster(channel, message, 0);
+}
+
+/* Publish a message to shard-subscribers (see SPUBLISH command). */
+int RM_PublishMessageShard(RedisModuleCtx *ctx, RedisModuleString *channel, RedisModuleString *message) {
+ UNUSED(ctx);
+ return pubsubPublishMessageAndPropagateToCluster(channel, message, 1);
+}
+
+/* Return the currently selected DB. */
+int RM_GetSelectedDb(RedisModuleCtx *ctx) {
+ return ctx->client->db->id;
+}
+
+
+/* Return the current context's flags. The flags provide information on the
+ * current request context (whether the client is a Lua script or in a MULTI),
+ * and about the Redis instance in general, i.e replication and persistence.
+ *
+ * It is possible to call this function even with a NULL context, however
+ * in this case the following flags will not be reported:
+ *
+ * * LUA, MULTI, REPLICATED, DIRTY (see below for more info).
+ *
+ * Available flags and their meaning:
+ *
+ * * REDISMODULE_CTX_FLAGS_LUA: The command is running in a Lua script
+ *
+ * * REDISMODULE_CTX_FLAGS_MULTI: The command is running inside a transaction
+ *
+ * * REDISMODULE_CTX_FLAGS_REPLICATED: The command was sent over the replication
+ * link by the MASTER
+ *
+ * * REDISMODULE_CTX_FLAGS_MASTER: The Redis instance is a master
+ *
+ * * REDISMODULE_CTX_FLAGS_SLAVE: The Redis instance is a slave
+ *
+ * * REDISMODULE_CTX_FLAGS_READONLY: The Redis instance is read-only
+ *
+ * * REDISMODULE_CTX_FLAGS_CLUSTER: The Redis instance is in cluster mode
+ *
+ * * REDISMODULE_CTX_FLAGS_AOF: The Redis instance has AOF enabled
+ *
+ * * REDISMODULE_CTX_FLAGS_RDB: The instance has RDB enabled
+ *
+ * * REDISMODULE_CTX_FLAGS_MAXMEMORY: The instance has Maxmemory set
+ *
+ * * REDISMODULE_CTX_FLAGS_EVICT: Maxmemory is set and has an eviction
+ * policy that may delete keys
+ *
+ * * REDISMODULE_CTX_FLAGS_OOM: Redis is out of memory according to the
+ * maxmemory setting.
+ *
+ * * REDISMODULE_CTX_FLAGS_OOM_WARNING: Less than 25% of memory remains before
+ * reaching the maxmemory level.
+ *
+ * * REDISMODULE_CTX_FLAGS_LOADING: Server is loading RDB/AOF
+ *
+ * * REDISMODULE_CTX_FLAGS_REPLICA_IS_STALE: No active link with the master.
+ *
+ * * REDISMODULE_CTX_FLAGS_REPLICA_IS_CONNECTING: The replica is trying to
+ * connect with the master.
+ *
+ * * REDISMODULE_CTX_FLAGS_REPLICA_IS_TRANSFERRING: Master -> Replica RDB
+ * transfer is in progress.
+ *
+ * * REDISMODULE_CTX_FLAGS_REPLICA_IS_ONLINE: The replica has an active link
+ * with its master. This is the
+ * contrary of STALE state.
+ *
+ * * REDISMODULE_CTX_FLAGS_ACTIVE_CHILD: There is currently some background
+ * process active (RDB, AUX or module).
+ *
+ * * REDISMODULE_CTX_FLAGS_MULTI_DIRTY: The next EXEC will fail due to dirty
+ * CAS (touched keys).
+ *
+ * * REDISMODULE_CTX_FLAGS_IS_CHILD: Redis is currently running inside
+ * background child process.
+ *
+ * * REDISMODULE_CTX_FLAGS_RESP3: Indicate the that client attached to this
+ * context is using RESP3.
+ *
+ * * REDISMODULE_CTX_FLAGS_SERVER_STARTUP: The Redis instance is starting
+ */
+int RM_GetContextFlags(RedisModuleCtx *ctx) {
+ int flags = 0;
+
+ /* Client specific flags */
+ if (ctx) {
+ if (ctx->client) {
+ if (ctx->client->flags & CLIENT_DENY_BLOCKING)
+ flags |= REDISMODULE_CTX_FLAGS_DENY_BLOCKING;
+ /* Module command received from MASTER, is replicated. */
+ if (ctx->client->flags & CLIENT_MASTER)
+ flags |= REDISMODULE_CTX_FLAGS_REPLICATED;
+ if (ctx->client->resp == 3) {
+ flags |= REDISMODULE_CTX_FLAGS_RESP3;
+ }
+ }
+
+ /* For DIRTY flags, we need the blocked client if used */
+ client *c = ctx->blocked_client ? ctx->blocked_client->client : ctx->client;
+ if (c && (c->flags & (CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC))) {
+ flags |= REDISMODULE_CTX_FLAGS_MULTI_DIRTY;
+ }
+ }
+
+ if (scriptIsRunning())
+ flags |= REDISMODULE_CTX_FLAGS_LUA;
+
+ if (server.in_exec)
+ flags |= REDISMODULE_CTX_FLAGS_MULTI;
+
+ if (server.cluster_enabled)
+ flags |= REDISMODULE_CTX_FLAGS_CLUSTER;
+
+ if (server.async_loading)
+ flags |= REDISMODULE_CTX_FLAGS_ASYNC_LOADING;
+ else if (server.loading)
+ flags |= REDISMODULE_CTX_FLAGS_LOADING;
+
+ /* Maxmemory and eviction policy */
+ if (server.maxmemory > 0 && (!server.masterhost || !server.repl_slave_ignore_maxmemory)) {
+ flags |= REDISMODULE_CTX_FLAGS_MAXMEMORY;
+
+ if (server.maxmemory_policy != MAXMEMORY_NO_EVICTION)
+ flags |= REDISMODULE_CTX_FLAGS_EVICT;
+ }
+
+ /* Persistence flags */
+ if (server.aof_state != AOF_OFF)
+ flags |= REDISMODULE_CTX_FLAGS_AOF;
+ if (server.saveparamslen > 0)
+ flags |= REDISMODULE_CTX_FLAGS_RDB;
+
+ /* Replication flags */
+ if (server.masterhost == NULL) {
+ flags |= REDISMODULE_CTX_FLAGS_MASTER;
+ } else {
+ flags |= REDISMODULE_CTX_FLAGS_SLAVE;
+ if (server.repl_slave_ro)
+ flags |= REDISMODULE_CTX_FLAGS_READONLY;
+
+ /* Replica state flags. */
+ if (server.repl_state == REPL_STATE_CONNECT ||
+ server.repl_state == REPL_STATE_CONNECTING)
+ {
+ flags |= REDISMODULE_CTX_FLAGS_REPLICA_IS_CONNECTING;
+ } else if (server.repl_state == REPL_STATE_TRANSFER) {
+ flags |= REDISMODULE_CTX_FLAGS_REPLICA_IS_TRANSFERRING;
+ } else if (server.repl_state == REPL_STATE_CONNECTED) {
+ flags |= REDISMODULE_CTX_FLAGS_REPLICA_IS_ONLINE;
+ }
+
+ if (server.repl_state != REPL_STATE_CONNECTED)
+ flags |= REDISMODULE_CTX_FLAGS_REPLICA_IS_STALE;
+ }
+
+ /* OOM flag. */
+ float level;
+ int retval = getMaxmemoryState(NULL,NULL,NULL,&level);
+ if (retval == C_ERR) flags |= REDISMODULE_CTX_FLAGS_OOM;
+ if (level > 0.75) flags |= REDISMODULE_CTX_FLAGS_OOM_WARNING;
+
+ /* Presence of children processes. */
+ if (hasActiveChildProcess()) flags |= REDISMODULE_CTX_FLAGS_ACTIVE_CHILD;
+ if (server.in_fork_child) flags |= REDISMODULE_CTX_FLAGS_IS_CHILD;
+
+ /* Non-empty server.loadmodule_queue means that Redis is starting. */
+ if (listLength(server.loadmodule_queue) > 0)
+ flags |= REDISMODULE_CTX_FLAGS_SERVER_STARTUP;
+
+ return flags;
+}
+
+/* Returns true if a client sent the CLIENT PAUSE command to the server or
+ * if Redis Cluster does a manual failover, pausing the clients.
+ * This is needed when we have a master with replicas, and want to write,
+ * without adding further data to the replication channel, that the replicas
+ * replication offset, match the one of the master. When this happens, it is
+ * safe to failover the master without data loss.
+ *
+ * However modules may generate traffic by calling RedisModule_Call() with
+ * the "!" flag, or by calling RedisModule_Replicate(), in a context outside
+ * commands execution, for instance in timeout callbacks, threads safe
+ * contexts, and so forth. When modules will generate too much traffic, it
+ * will be hard for the master and replicas offset to match, because there
+ * is more data to send in the replication channel.
+ *
+ * So modules may want to try to avoid very heavy background work that has
+ * the effect of creating data to the replication channel, when this function
+ * returns true. This is mostly useful for modules that have background
+ * garbage collection tasks, or that do writes and replicate such writes
+ * periodically in timer callbacks or other periodic callbacks.
+ */
+int RM_AvoidReplicaTraffic(void) {
+ return !!(isPausedActionsWithUpdate(PAUSE_ACTION_REPLICA));
+}
+
+/* Change the currently selected DB. Returns an error if the id
+ * is out of range.
+ *
+ * Note that the client will retain the currently selected DB even after
+ * the Redis command implemented by the module calling this function
+ * returns.
+ *
+ * If the module command wishes to change something in a different DB and
+ * returns back to the original one, it should call RedisModule_GetSelectedDb()
+ * before in order to restore the old DB number before returning. */
+int RM_SelectDb(RedisModuleCtx *ctx, int newid) {
+ int retval = selectDb(ctx->client,newid);
+ return (retval == C_OK) ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Check if a key exists, without affecting its last access time.
+ *
+ * This is equivalent to calling RM_OpenKey with the mode REDISMODULE_READ |
+ * REDISMODULE_OPEN_KEY_NOTOUCH, then checking if NULL was returned and, if not,
+ * calling RM_CloseKey on the opened key.
+ */
+int RM_KeyExists(RedisModuleCtx *ctx, robj *keyname) {
+ robj *value = lookupKeyReadWithFlags(ctx->client->db, keyname, LOOKUP_NOTOUCH);
+ return (value != NULL);
+}
+
+/* Initialize a RedisModuleKey struct */
+static void moduleInitKey(RedisModuleKey *kp, RedisModuleCtx *ctx, robj *keyname, robj *value, int mode){
+ kp->ctx = ctx;
+ kp->db = ctx->client->db;
+ kp->key = keyname;
+ incrRefCount(keyname);
+ kp->value = value;
+ kp->iter = NULL;
+ kp->mode = mode;
+ if (kp->value) moduleInitKeyTypeSpecific(kp);
+}
+
+/* Initialize the type-specific part of the key. Only when key has a value. */
+static void moduleInitKeyTypeSpecific(RedisModuleKey *key) {
+ switch (key->value->type) {
+ case OBJ_ZSET: zsetKeyReset(key); break;
+ case OBJ_STREAM: key->u.stream.signalready = 0; break;
+ }
+}
+
+/* Return a handle representing a Redis key, so that it is possible
+ * to call other APIs with the key handle as argument to perform
+ * operations on the key.
+ *
+ * The return value is the handle representing the key, that must be
+ * closed with RM_CloseKey().
+ *
+ * If the key does not exist and REDISMODULE_WRITE mode is requested, the handle
+ * is still returned, since it is possible to perform operations on
+ * a yet not existing key (that will be created, for example, after
+ * a list push operation). If the mode is just REDISMODULE_READ instead, and the
+ * key does not exist, NULL is returned. However it is still safe to
+ * call RedisModule_CloseKey() and RedisModule_KeyType() on a NULL
+ * value.
+ *
+ * Extra flags that can be pass to the API under the mode argument:
+ * * REDISMODULE_OPEN_KEY_NOTOUCH - Avoid touching the LRU/LFU of the key when opened.
+ * * REDISMODULE_OPEN_KEY_NONOTIFY - Don't trigger keyspace event on key misses.
+ * * REDISMODULE_OPEN_KEY_NOSTATS - Don't update keyspace hits/misses counters.
+ * * REDISMODULE_OPEN_KEY_NOEXPIRE - Avoid deleting lazy expired keys.
+ * * REDISMODULE_OPEN_KEY_NOEFFECTS - Avoid any effects from fetching the key. */
+RedisModuleKey *RM_OpenKey(RedisModuleCtx *ctx, robj *keyname, int mode) {
+ RedisModuleKey *kp;
+ robj *value;
+ int flags = 0;
+ flags |= (mode & REDISMODULE_OPEN_KEY_NOTOUCH? LOOKUP_NOTOUCH: 0);
+ flags |= (mode & REDISMODULE_OPEN_KEY_NONOTIFY? LOOKUP_NONOTIFY: 0);
+ flags |= (mode & REDISMODULE_OPEN_KEY_NOSTATS? LOOKUP_NOSTATS: 0);
+ flags |= (mode & REDISMODULE_OPEN_KEY_NOEXPIRE? LOOKUP_NOEXPIRE: 0);
+ flags |= (mode & REDISMODULE_OPEN_KEY_NOEFFECTS? LOOKUP_NOEFFECTS: 0);
+
+ if (mode & REDISMODULE_WRITE) {
+ value = lookupKeyWriteWithFlags(ctx->client->db,keyname, flags);
+ } else {
+ value = lookupKeyReadWithFlags(ctx->client->db,keyname, flags);
+ if (value == NULL) {
+ return NULL;
+ }
+ }
+
+ /* Setup the key handle. */
+ kp = zmalloc(sizeof(*kp));
+ moduleInitKey(kp, ctx, keyname, value, mode);
+ autoMemoryAdd(ctx,REDISMODULE_AM_KEY,kp);
+ return kp;
+}
+
+/**
+ * Returns the full OpenKey modes mask, using the return value
+ * the module can check if a certain set of OpenKey modes are supported
+ * by the redis server version in use.
+ * Example:
+ *
+ * int supportedMode = RM_GetOpenKeyModesAll();
+ * if (supportedMode & REDISMODULE_OPEN_KEY_NOTOUCH) {
+ * // REDISMODULE_OPEN_KEY_NOTOUCH is supported
+ * } else{
+ * // REDISMODULE_OPEN_KEY_NOTOUCH is not supported
+ * }
+ */
+int RM_GetOpenKeyModesAll(void) {
+ return _REDISMODULE_OPEN_KEY_ALL;
+}
+
+/* Destroy a RedisModuleKey struct (freeing is the responsibility of the caller). */
+static void moduleCloseKey(RedisModuleKey *key) {
+ int signal = SHOULD_SIGNAL_MODIFIED_KEYS(key->ctx);
+ if ((key->mode & REDISMODULE_WRITE) && signal)
+ signalModifiedKey(key->ctx->client,key->db,key->key);
+ if (key->value) {
+ if (key->iter) moduleFreeKeyIterator(key);
+ switch (key->value->type) {
+ case OBJ_ZSET:
+ RM_ZsetRangeStop(key);
+ break;
+ case OBJ_STREAM:
+ if (key->u.stream.signalready)
+ /* One or more RM_StreamAdd() have been done. */
+ signalKeyAsReady(key->db, key->key, OBJ_STREAM);
+ break;
+ }
+ }
+ serverAssert(key->iter == NULL);
+ decrRefCount(key->key);
+}
+
+/* Close a key handle. */
+void RM_CloseKey(RedisModuleKey *key) {
+ if (key == NULL) return;
+ moduleCloseKey(key);
+ autoMemoryFreed(key->ctx,REDISMODULE_AM_KEY,key);
+ zfree(key);
+}
+
+/* Return the type of the key. If the key pointer is NULL then
+ * REDISMODULE_KEYTYPE_EMPTY is returned. */
+int RM_KeyType(RedisModuleKey *key) {
+ if (key == NULL || key->value == NULL) return REDISMODULE_KEYTYPE_EMPTY;
+ /* We map between defines so that we are free to change the internal
+ * defines as desired. */
+ switch(key->value->type) {
+ case OBJ_STRING: return REDISMODULE_KEYTYPE_STRING;
+ case OBJ_LIST: return REDISMODULE_KEYTYPE_LIST;
+ case OBJ_SET: return REDISMODULE_KEYTYPE_SET;
+ case OBJ_ZSET: return REDISMODULE_KEYTYPE_ZSET;
+ case OBJ_HASH: return REDISMODULE_KEYTYPE_HASH;
+ case OBJ_MODULE: return REDISMODULE_KEYTYPE_MODULE;
+ case OBJ_STREAM: return REDISMODULE_KEYTYPE_STREAM;
+ default: return REDISMODULE_KEYTYPE_EMPTY;
+ }
+}
+
+/* Return the length of the value associated with the key.
+ * For strings this is the length of the string. For all the other types
+ * is the number of elements (just counting keys for hashes).
+ *
+ * If the key pointer is NULL or the key is empty, zero is returned. */
+size_t RM_ValueLength(RedisModuleKey *key) {
+ if (key == NULL || key->value == NULL) return 0;
+ switch(key->value->type) {
+ case OBJ_STRING: return stringObjectLen(key->value);
+ case OBJ_LIST: return listTypeLength(key->value);
+ case OBJ_SET: return setTypeSize(key->value);
+ case OBJ_ZSET: return zsetLength(key->value);
+ case OBJ_HASH: return hashTypeLength(key->value);
+ case OBJ_STREAM: return streamLength(key->value);
+ default: return 0;
+ }
+}
+
+/* If the key is open for writing, remove it, and setup the key to
+ * accept new writes as an empty key (that will be created on demand).
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing REDISMODULE_ERR is returned. */
+int RM_DeleteKey(RedisModuleKey *key) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value) {
+ dbDelete(key->db,key->key);
+ key->value = NULL;
+ }
+ return REDISMODULE_OK;
+}
+
+/* If the key is open for writing, unlink it (that is delete it in a
+ * non-blocking way, not reclaiming memory immediately) and setup the key to
+ * accept new writes as an empty key (that will be created on demand).
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing REDISMODULE_ERR is returned. */
+int RM_UnlinkKey(RedisModuleKey *key) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value) {
+ dbAsyncDelete(key->db,key->key);
+ key->value = NULL;
+ }
+ return REDISMODULE_OK;
+}
+
+/* Return the key expire value, as milliseconds of remaining TTL.
+ * If no TTL is associated with the key or if the key is empty,
+ * REDISMODULE_NO_EXPIRE is returned. */
+mstime_t RM_GetExpire(RedisModuleKey *key) {
+ mstime_t expire = getExpire(key->db,key->key);
+ if (expire == -1 || key->value == NULL)
+ return REDISMODULE_NO_EXPIRE;
+ expire -= commandTimeSnapshot();
+ return expire >= 0 ? expire : 0;
+}
+
+/* Set a new expire for the key. If the special expire
+ * REDISMODULE_NO_EXPIRE is set, the expire is cancelled if there was
+ * one (the same as the PERSIST command).
+ *
+ * Note that the expire must be provided as a positive integer representing
+ * the number of milliseconds of TTL the key should have.
+ *
+ * The function returns REDISMODULE_OK on success or REDISMODULE_ERR if
+ * the key was not open for writing or is an empty key. */
+int RM_SetExpire(RedisModuleKey *key, mstime_t expire) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->value == NULL || (expire < 0 && expire != REDISMODULE_NO_EXPIRE))
+ return REDISMODULE_ERR;
+ if (expire != REDISMODULE_NO_EXPIRE) {
+ expire += commandTimeSnapshot();
+ setExpire(key->ctx->client,key->db,key->key,expire);
+ } else {
+ removeExpire(key->db,key->key);
+ }
+ return REDISMODULE_OK;
+}
+
+/* Return the key expire value, as absolute Unix timestamp.
+ * If no TTL is associated with the key or if the key is empty,
+ * REDISMODULE_NO_EXPIRE is returned. */
+mstime_t RM_GetAbsExpire(RedisModuleKey *key) {
+ mstime_t expire = getExpire(key->db,key->key);
+ if (expire == -1 || key->value == NULL)
+ return REDISMODULE_NO_EXPIRE;
+ return expire;
+}
+
+/* Set a new expire for the key. If the special expire
+ * REDISMODULE_NO_EXPIRE is set, the expire is cancelled if there was
+ * one (the same as the PERSIST command).
+ *
+ * Note that the expire must be provided as a positive integer representing
+ * the absolute Unix timestamp the key should have.
+ *
+ * The function returns REDISMODULE_OK on success or REDISMODULE_ERR if
+ * the key was not open for writing or is an empty key. */
+int RM_SetAbsExpire(RedisModuleKey *key, mstime_t expire) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->value == NULL || (expire < 0 && expire != REDISMODULE_NO_EXPIRE))
+ return REDISMODULE_ERR;
+ if (expire != REDISMODULE_NO_EXPIRE) {
+ setExpire(key->ctx->client,key->db,key->key,expire);
+ } else {
+ removeExpire(key->db,key->key);
+ }
+ return REDISMODULE_OK;
+}
+
+/* Performs similar operation to FLUSHALL, and optionally start a new AOF file (if enabled)
+ * If restart_aof is true, you must make sure the command that triggered this call is not
+ * propagated to the AOF file.
+ * When async is set to true, db contents will be freed by a background thread. */
+void RM_ResetDataset(int restart_aof, int async) {
+ if (restart_aof && server.aof_state != AOF_OFF) stopAppendOnly();
+ flushAllDataAndResetRDB((async? EMPTYDB_ASYNC: EMPTYDB_NO_FLAGS) | EMPTYDB_NOFUNCTIONS);
+ if (server.aof_enabled && restart_aof) restartAOFAfterSYNC();
+}
+
+/* Returns the number of keys in the current db. */
+unsigned long long RM_DbSize(RedisModuleCtx *ctx) {
+ return dictSize(ctx->client->db->dict);
+}
+
+/* Returns a name of a random key, or NULL if current db is empty. */
+RedisModuleString *RM_RandomKey(RedisModuleCtx *ctx) {
+ robj *key = dbRandomKey(ctx->client->db);
+ autoMemoryAdd(ctx,REDISMODULE_AM_STRING,key);
+ return key;
+}
+
+/* Returns the name of the key currently being processed. */
+const RedisModuleString *RM_GetKeyNameFromOptCtx(RedisModuleKeyOptCtx *ctx) {
+ return ctx->from_key;
+}
+
+/* Returns the name of the target key currently being processed. */
+const RedisModuleString *RM_GetToKeyNameFromOptCtx(RedisModuleKeyOptCtx *ctx) {
+ return ctx->to_key;
+}
+
+/* Returns the dbid currently being processed. */
+int RM_GetDbIdFromOptCtx(RedisModuleKeyOptCtx *ctx) {
+ return ctx->from_dbid;
+}
+
+/* Returns the target dbid currently being processed. */
+int RM_GetToDbIdFromOptCtx(RedisModuleKeyOptCtx *ctx) {
+ return ctx->to_dbid;
+}
+/* --------------------------------------------------------------------------
+ * ## Key API for String type
+ *
+ * See also RM_ValueLength(), which returns the length of a string.
+ * -------------------------------------------------------------------------- */
+
+/* If the key is open for writing, set the specified string 'str' as the
+ * value of the key, deleting the old value if any.
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing or there is an active iterator, REDISMODULE_ERR is returned. */
+int RM_StringSet(RedisModuleKey *key, RedisModuleString *str) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->iter) return REDISMODULE_ERR;
+ RM_DeleteKey(key);
+ setKey(key->ctx->client,key->db,key->key,str,SETKEY_NO_SIGNAL);
+ key->value = str;
+ return REDISMODULE_OK;
+}
+
+/* Prepare the key associated string value for DMA access, and returns
+ * a pointer and size (by reference), that the user can use to read or
+ * modify the string in-place accessing it directly via pointer.
+ *
+ * The 'mode' is composed by bitwise OR-ing the following flags:
+ *
+ * REDISMODULE_READ -- Read access
+ * REDISMODULE_WRITE -- Write access
+ *
+ * If the DMA is not requested for writing, the pointer returned should
+ * only be accessed in a read-only fashion.
+ *
+ * On error (wrong type) NULL is returned.
+ *
+ * DMA access rules:
+ *
+ * 1. No other key writing function should be called since the moment
+ * the pointer is obtained, for all the time we want to use DMA access
+ * to read or modify the string.
+ *
+ * 2. Each time RM_StringTruncate() is called, to continue with the DMA
+ * access, RM_StringDMA() should be called again to re-obtain
+ * a new pointer and length.
+ *
+ * 3. If the returned pointer is not NULL, but the length is zero, no
+ * byte can be touched (the string is empty, or the key itself is empty)
+ * so a RM_StringTruncate() call should be used if there is to enlarge
+ * the string, and later call StringDMA() again to get the pointer.
+ */
+char *RM_StringDMA(RedisModuleKey *key, size_t *len, int mode) {
+ /* We need to return *some* pointer for empty keys, we just return
+ * a string literal pointer, that is the advantage to be mapped into
+ * a read only memory page, so the module will segfault if a write
+ * attempt is performed. */
+ char *emptystring = "<dma-empty-string>";
+ if (key->value == NULL) {
+ *len = 0;
+ return emptystring;
+ }
+
+ if (key->value->type != OBJ_STRING) return NULL;
+
+ /* For write access, and even for read access if the object is encoded,
+ * we unshare the string (that has the side effect of decoding it). */
+ if ((mode & REDISMODULE_WRITE) || key->value->encoding != OBJ_ENCODING_RAW)
+ key->value = dbUnshareStringValue(key->db, key->key, key->value);
+
+ *len = sdslen(key->value->ptr);
+ return key->value->ptr;
+}
+
+/* If the key is open for writing and is of string type, resize it, padding
+ * with zero bytes if the new length is greater than the old one.
+ *
+ * After this call, RM_StringDMA() must be called again to continue
+ * DMA access with the new pointer.
+ *
+ * The function returns REDISMODULE_OK on success, and REDISMODULE_ERR on
+ * error, that is, the key is not open for writing, is not a string
+ * or resizing for more than 512 MB is requested.
+ *
+ * If the key is empty, a string key is created with the new string value
+ * unless the new length value requested is zero. */
+int RM_StringTruncate(RedisModuleKey *key, size_t newlen) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_STRING) return REDISMODULE_ERR;
+ if (newlen > 512*1024*1024) return REDISMODULE_ERR;
+
+ /* Empty key and new len set to 0. Just return REDISMODULE_OK without
+ * doing anything. */
+ if (key->value == NULL && newlen == 0) return REDISMODULE_OK;
+
+ if (key->value == NULL) {
+ /* Empty key: create it with the new size. */
+ robj *o = createObject(OBJ_STRING,sdsnewlen(NULL, newlen));
+ setKey(key->ctx->client,key->db,key->key,o,SETKEY_NO_SIGNAL);
+ key->value = o;
+ decrRefCount(o);
+ } else {
+ /* Unshare and resize. */
+ key->value = dbUnshareStringValue(key->db, key->key, key->value);
+ size_t curlen = sdslen(key->value->ptr);
+ if (newlen > curlen) {
+ key->value->ptr = sdsgrowzero(key->value->ptr,newlen);
+ } else if (newlen < curlen) {
+ sdssubstr(key->value->ptr,0,newlen);
+ /* If the string is too wasteful, reallocate it. */
+ if (sdslen(key->value->ptr) < sdsavail(key->value->ptr))
+ key->value->ptr = sdsRemoveFreeSpace(key->value->ptr, 0);
+ }
+ }
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Key API for List type
+ *
+ * Many of the list functions access elements by index. Since a list is in
+ * essence a doubly-linked list, accessing elements by index is generally an
+ * O(N) operation. However, if elements are accessed sequentially or with
+ * indices close together, the functions are optimized to seek the index from
+ * the previous index, rather than seeking from the ends of the list.
+ *
+ * This enables iteration to be done efficiently using a simple for loop:
+ *
+ * long n = RM_ValueLength(key);
+ * for (long i = 0; i < n; i++) {
+ * RedisModuleString *elem = RedisModule_ListGet(key, i);
+ * // Do stuff...
+ * }
+ *
+ * Note that after modifying a list using RM_ListPop, RM_ListSet or
+ * RM_ListInsert, the internal iterator is invalidated so the next operation
+ * will require a linear seek.
+ *
+ * Modifying a list in any another way, for example using RM_Call(), while a key
+ * is open will confuse the internal iterator and may cause trouble if the key
+ * is used after such modifications. The key must be reopened in this case.
+ *
+ * See also RM_ValueLength(), which returns the length of a list.
+ * -------------------------------------------------------------------------- */
+
+/* Seeks the key's internal list iterator to the given index. On success, 1 is
+ * returned and key->iter, key->u.list.entry and key->u.list.index are set. On
+ * failure, 0 is returned and errno is set as required by the list API
+ * functions. */
+int moduleListIteratorSeek(RedisModuleKey *key, long index, int mode) {
+ if (!key) {
+ errno = EINVAL;
+ return 0;
+ } else if (!key->value || key->value->type != OBJ_LIST) {
+ errno = ENOTSUP;
+ return 0;
+ } if (!(key->mode & mode)) {
+ errno = EBADF;
+ return 0;
+ }
+
+ long length = listTypeLength(key->value);
+ if (index < -length || index >= length) {
+ errno = EDOM; /* Invalid index */
+ return 0;
+ }
+
+ if (key->iter == NULL) {
+ /* No existing iterator. Create one. */
+ key->iter = listTypeInitIterator(key->value, index, LIST_TAIL);
+ serverAssert(key->iter != NULL);
+ serverAssert(listTypeNext(key->iter, &key->u.list.entry));
+ key->u.list.index = index;
+ return 1;
+ }
+
+ /* There's an existing iterator. Make sure the requested index has the same
+ * sign as the iterator's index. */
+ if (index < 0 && key->u.list.index >= 0) index += length;
+ else if (index >= 0 && key->u.list.index < 0) index -= length;
+
+ if (index == key->u.list.index) return 1; /* We're done. */
+
+ /* Seek the iterator to the requested index. */
+ unsigned char dir = key->u.list.index < index ? LIST_TAIL : LIST_HEAD;
+ listTypeSetIteratorDirection(key->iter, &key->u.list.entry, dir);
+ while (key->u.list.index != index) {
+ serverAssert(listTypeNext(key->iter, &key->u.list.entry));
+ key->u.list.index += dir == LIST_HEAD ? -1 : 1;
+ }
+ return 1;
+}
+
+/* Push an element into a list, on head or tail depending on 'where' argument
+ * (REDISMODULE_LIST_HEAD or REDISMODULE_LIST_TAIL). If the key refers to an
+ * empty key opened for writing, the key is created. On success, REDISMODULE_OK
+ * is returned. On failure, REDISMODULE_ERR is returned and `errno` is set as
+ * follows:
+ *
+ * - EINVAL if key or ele is NULL.
+ * - ENOTSUP if the key is of another type than list.
+ * - EBADF if the key is not opened for writing.
+ *
+ * Note: Before Redis 7.0, `errno` was not set by this function. */
+int RM_ListPush(RedisModuleKey *key, int where, RedisModuleString *ele) {
+ if (!key || !ele) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (key->value != NULL && key->value->type != OBJ_LIST) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ } if (!(key->mode & REDISMODULE_WRITE)) {
+ errno = EBADF;
+ return REDISMODULE_ERR;
+ }
+
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_LIST) return REDISMODULE_ERR;
+ if (key->iter) moduleFreeKeyIterator(key);
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_LIST);
+ listTypeTryConversionAppend(key->value, &ele, 0, 0, moduleFreeListIterator, key);
+ listTypePush(key->value, ele,
+ (where == REDISMODULE_LIST_HEAD) ? LIST_HEAD : LIST_TAIL);
+ return REDISMODULE_OK;
+}
+
+/* Pop an element from the list, and returns it as a module string object
+ * that the user should be free with RM_FreeString() or by enabling
+ * automatic memory. The `where` argument specifies if the element should be
+ * popped from the beginning or the end of the list (REDISMODULE_LIST_HEAD or
+ * REDISMODULE_LIST_TAIL). On failure, the command returns NULL and sets
+ * `errno` as follows:
+ *
+ * - EINVAL if key is NULL.
+ * - ENOTSUP if the key is empty or of another type than list.
+ * - EBADF if the key is not opened for writing.
+ *
+ * Note: Before Redis 7.0, `errno` was not set by this function. */
+RedisModuleString *RM_ListPop(RedisModuleKey *key, int where) {
+ if (!key) {
+ errno = EINVAL;
+ return NULL;
+ } else if (key->value == NULL || key->value->type != OBJ_LIST) {
+ errno = ENOTSUP;
+ return NULL;
+ } else if (!(key->mode & REDISMODULE_WRITE)) {
+ errno = EBADF;
+ return NULL;
+ }
+ if (key->iter) moduleFreeKeyIterator(key);
+ robj *ele = listTypePop(key->value,
+ (where == REDISMODULE_LIST_HEAD) ? LIST_HEAD : LIST_TAIL);
+ robj *decoded = getDecodedObject(ele);
+ decrRefCount(ele);
+ if (!moduleDelKeyIfEmpty(key))
+ listTypeTryConversion(key->value, LIST_CONV_SHRINKING, moduleFreeListIterator, key);
+ autoMemoryAdd(key->ctx,REDISMODULE_AM_STRING,decoded);
+ return decoded;
+}
+
+/* Returns the element at index `index` in the list stored at `key`, like the
+ * LINDEX command. The element should be free'd using RM_FreeString() or using
+ * automatic memory management.
+ *
+ * The index is zero-based, so 0 means the first element, 1 the second element
+ * and so on. Negative indices can be used to designate elements starting at the
+ * tail of the list. Here, -1 means the last element, -2 means the penultimate
+ * and so forth.
+ *
+ * When no value is found at the given key and index, NULL is returned and
+ * `errno` is set as follows:
+ *
+ * - EINVAL if key is NULL.
+ * - ENOTSUP if the key is not a list.
+ * - EBADF if the key is not opened for reading.
+ * - EDOM if the index is not a valid index in the list.
+ */
+RedisModuleString *RM_ListGet(RedisModuleKey *key, long index) {
+ if (moduleListIteratorSeek(key, index, REDISMODULE_READ)) {
+ robj *elem = listTypeGet(&key->u.list.entry);
+ robj *decoded = getDecodedObject(elem);
+ decrRefCount(elem);
+ autoMemoryAdd(key->ctx, REDISMODULE_AM_STRING, decoded);
+ return decoded;
+ } else {
+ return NULL;
+ }
+}
+
+/* Replaces the element at index `index` in the list stored at `key`.
+ *
+ * The index is zero-based, so 0 means the first element, 1 the second element
+ * and so on. Negative indices can be used to designate elements starting at the
+ * tail of the list. Here, -1 means the last element, -2 means the penultimate
+ * and so forth.
+ *
+ * On success, REDISMODULE_OK is returned. On failure, REDISMODULE_ERR is
+ * returned and `errno` is set as follows:
+ *
+ * - EINVAL if key or value is NULL.
+ * - ENOTSUP if the key is not a list.
+ * - EBADF if the key is not opened for writing.
+ * - EDOM if the index is not a valid index in the list.
+ */
+int RM_ListSet(RedisModuleKey *key, long index, RedisModuleString *value) {
+ if (!value) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+ if (!key->value || key->value->type != OBJ_LIST) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ }
+ listTypeTryConversionAppend(key->value, &value, 0, 0, moduleFreeListIterator, key);
+ if (moduleListIteratorSeek(key, index, REDISMODULE_WRITE)) {
+ listTypeReplace(&key->u.list.entry, value);
+ /* A note in quicklist.c forbids use of iterator after insert, so
+ * probably also after replace. */
+ moduleFreeKeyIterator(key);
+ return REDISMODULE_OK;
+ } else {
+ return REDISMODULE_ERR;
+ }
+}
+
+/* Inserts an element at the given index.
+ *
+ * The index is zero-based, so 0 means the first element, 1 the second element
+ * and so on. Negative indices can be used to designate elements starting at the
+ * tail of the list. Here, -1 means the last element, -2 means the penultimate
+ * and so forth. The index is the element's index after inserting it.
+ *
+ * On success, REDISMODULE_OK is returned. On failure, REDISMODULE_ERR is
+ * returned and `errno` is set as follows:
+ *
+ * - EINVAL if key or value is NULL.
+ * - ENOTSUP if the key of another type than list.
+ * - EBADF if the key is not opened for writing.
+ * - EDOM if the index is not a valid index in the list.
+ */
+int RM_ListInsert(RedisModuleKey *key, long index, RedisModuleString *value) {
+ if (!value) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (key != NULL && key->value == NULL &&
+ (index == 0 || index == -1)) {
+ /* Insert in empty key => push. */
+ return RM_ListPush(key, REDISMODULE_LIST_TAIL, value);
+ } else if (key != NULL && key->value != NULL &&
+ key->value->type == OBJ_LIST &&
+ (index == (long)listTypeLength(key->value) || index == -1)) {
+ /* Insert after the last element => push tail. */
+ return RM_ListPush(key, REDISMODULE_LIST_TAIL, value);
+ } else if (key != NULL && key->value != NULL &&
+ key->value->type == OBJ_LIST &&
+ (index == 0 || index == -(long)listTypeLength(key->value) - 1)) {
+ /* Insert before the first element => push head. */
+ return RM_ListPush(key, REDISMODULE_LIST_HEAD, value);
+ }
+ listTypeTryConversionAppend(key->value, &value, 0, 0, moduleFreeListIterator, key);
+ if (moduleListIteratorSeek(key, index, REDISMODULE_WRITE)) {
+ int where = index < 0 ? LIST_TAIL : LIST_HEAD;
+ listTypeInsert(&key->u.list.entry, value, where);
+ /* A note in quicklist.c forbids use of iterator after insert. */
+ moduleFreeKeyIterator(key);
+ return REDISMODULE_OK;
+ } else {
+ return REDISMODULE_ERR;
+ }
+}
+
+/* Removes an element at the given index. The index is 0-based. A negative index
+ * can also be used, counting from the end of the list.
+ *
+ * On success, REDISMODULE_OK is returned. On failure, REDISMODULE_ERR is
+ * returned and `errno` is set as follows:
+ *
+ * - EINVAL if key or value is NULL.
+ * - ENOTSUP if the key is not a list.
+ * - EBADF if the key is not opened for writing.
+ * - EDOM if the index is not a valid index in the list.
+ */
+int RM_ListDelete(RedisModuleKey *key, long index) {
+ if (moduleListIteratorSeek(key, index, REDISMODULE_WRITE)) {
+ listTypeDelete(key->iter, &key->u.list.entry);
+ if (moduleDelKeyIfEmpty(key)) return REDISMODULE_OK;
+ listTypeTryConversion(key->value, LIST_CONV_SHRINKING, moduleFreeListIterator, key);
+ if (!key->iter) return REDISMODULE_OK; /* Return ASAP if iterator has been freed */
+ if (listTypeNext(key->iter, &key->u.list.entry)) {
+ /* After delete entry at position 'index', we need to update
+ * 'key->u.list.index' according to the following cases:
+ * 1) [1, 2, 3] => dir: forward, index: 0 => [2, 3] => index: still 0
+ * 2) [1, 2, 3] => dir: forward, index: -3 => [2, 3] => index: -2
+ * 3) [1, 2, 3] => dir: reverse, index: 2 => [1, 2] => index: 1
+ * 4) [1, 2, 3] => dir: reverse, index: -1 => [1, 2] => index: still -1 */
+ listTypeIterator *li = key->iter;
+ int reverse = li->direction == LIST_HEAD;
+ if (key->u.list.index < 0)
+ key->u.list.index += reverse ? 0 : 1;
+ else
+ key->u.list.index += reverse ? -1 : 0;
+ } else {
+ /* Reset list iterator if the next entry doesn't exist. */
+ moduleFreeKeyIterator(key);
+ }
+ return REDISMODULE_OK;
+ } else {
+ return REDISMODULE_ERR;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * ## Key API for Sorted Set type
+ *
+ * See also RM_ValueLength(), which returns the length of a sorted set.
+ * -------------------------------------------------------------------------- */
+
+/* Conversion from/to public flags of the Modules API and our private flags,
+ * so that we have everything decoupled. */
+int moduleZsetAddFlagsToCoreFlags(int flags) {
+ int retflags = 0;
+ if (flags & REDISMODULE_ZADD_XX) retflags |= ZADD_IN_XX;
+ if (flags & REDISMODULE_ZADD_NX) retflags |= ZADD_IN_NX;
+ if (flags & REDISMODULE_ZADD_GT) retflags |= ZADD_IN_GT;
+ if (flags & REDISMODULE_ZADD_LT) retflags |= ZADD_IN_LT;
+ return retflags;
+}
+
+/* See previous function comment. */
+int moduleZsetAddFlagsFromCoreFlags(int flags) {
+ int retflags = 0;
+ if (flags & ZADD_OUT_ADDED) retflags |= REDISMODULE_ZADD_ADDED;
+ if (flags & ZADD_OUT_UPDATED) retflags |= REDISMODULE_ZADD_UPDATED;
+ if (flags & ZADD_OUT_NOP) retflags |= REDISMODULE_ZADD_NOP;
+ return retflags;
+}
+
+/* Add a new element into a sorted set, with the specified 'score'.
+ * If the element already exists, the score is updated.
+ *
+ * A new sorted set is created at value if the key is an empty open key
+ * setup for writing.
+ *
+ * Additional flags can be passed to the function via a pointer, the flags
+ * are both used to receive input and to communicate state when the function
+ * returns. 'flagsptr' can be NULL if no special flags are used.
+ *
+ * The input flags are:
+ *
+ * REDISMODULE_ZADD_XX: Element must already exist. Do nothing otherwise.
+ * REDISMODULE_ZADD_NX: Element must not exist. Do nothing otherwise.
+ * REDISMODULE_ZADD_GT: If element exists, new score must be greater than the current score.
+ * Do nothing otherwise. Can optionally be combined with XX.
+ * REDISMODULE_ZADD_LT: If element exists, new score must be less than the current score.
+ * Do nothing otherwise. Can optionally be combined with XX.
+ *
+ * The output flags are:
+ *
+ * REDISMODULE_ZADD_ADDED: The new element was added to the sorted set.
+ * REDISMODULE_ZADD_UPDATED: The score of the element was updated.
+ * REDISMODULE_ZADD_NOP: No operation was performed because XX or NX flags.
+ *
+ * On success the function returns REDISMODULE_OK. On the following errors
+ * REDISMODULE_ERR is returned:
+ *
+ * * The key was not opened for writing.
+ * * The key is of the wrong type.
+ * * 'score' double value is not a number (NaN).
+ */
+int RM_ZsetAdd(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr) {
+ int in_flags = 0, out_flags = 0;
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_ZSET);
+ if (flagsptr) in_flags = moduleZsetAddFlagsToCoreFlags(*flagsptr);
+ if (zsetAdd(key->value,score,ele->ptr,in_flags,&out_flags,NULL) == 0) {
+ if (flagsptr) *flagsptr = 0;
+ moduleDelKeyIfEmpty(key);
+ return REDISMODULE_ERR;
+ }
+ if (flagsptr) *flagsptr = moduleZsetAddFlagsFromCoreFlags(out_flags);
+ return REDISMODULE_OK;
+}
+
+/* This function works exactly like RM_ZsetAdd(), but instead of setting
+ * a new score, the score of the existing element is incremented, or if the
+ * element does not already exist, it is added assuming the old score was
+ * zero.
+ *
+ * The input and output flags, and the return value, have the same exact
+ * meaning, with the only difference that this function will return
+ * REDISMODULE_ERR even when 'score' is a valid double number, but adding it
+ * to the existing score results into a NaN (not a number) condition.
+ *
+ * This function has an additional field 'newscore', if not NULL is filled
+ * with the new score of the element after the increment, if no error
+ * is returned. */
+int RM_ZsetIncrby(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr, double *newscore) {
+ int in_flags = 0, out_flags = 0;
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_ZSET);
+ if (flagsptr) in_flags = moduleZsetAddFlagsToCoreFlags(*flagsptr);
+ in_flags |= ZADD_IN_INCR;
+ if (zsetAdd(key->value,score,ele->ptr,in_flags,&out_flags,newscore) == 0) {
+ if (flagsptr) *flagsptr = 0;
+ moduleDelKeyIfEmpty(key);
+ return REDISMODULE_ERR;
+ }
+ if (flagsptr) *flagsptr = moduleZsetAddFlagsFromCoreFlags(out_flags);
+ return REDISMODULE_OK;
+}
+
+/* Remove the specified element from the sorted set.
+ * The function returns REDISMODULE_OK on success, and REDISMODULE_ERR
+ * on one of the following conditions:
+ *
+ * * The key was not opened for writing.
+ * * The key is of the wrong type.
+ *
+ * The return value does NOT indicate the fact the element was really
+ * removed (since it existed) or not, just if the function was executed
+ * with success.
+ *
+ * In order to know if the element was removed, the additional argument
+ * 'deleted' must be passed, that populates the integer by reference
+ * setting it to 1 or 0 depending on the outcome of the operation.
+ * The 'deleted' argument can be NULL if the caller is not interested
+ * to know if the element was really removed.
+ *
+ * Empty keys will be handled correctly by doing nothing. */
+int RM_ZsetRem(RedisModuleKey *key, RedisModuleString *ele, int *deleted) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (key->value != NULL && zsetDel(key->value,ele->ptr)) {
+ if (deleted) *deleted = 1;
+ moduleDelKeyIfEmpty(key);
+ } else {
+ if (deleted) *deleted = 0;
+ }
+ return REDISMODULE_OK;
+}
+
+/* On success retrieve the double score associated at the sorted set element
+ * 'ele' and returns REDISMODULE_OK. Otherwise REDISMODULE_ERR is returned
+ * to signal one of the following conditions:
+ *
+ * * There is no such element 'ele' in the sorted set.
+ * * The key is not a sorted set.
+ * * The key is an open empty key.
+ */
+int RM_ZsetScore(RedisModuleKey *key, RedisModuleString *ele, double *score) {
+ if (key->value == NULL) return REDISMODULE_ERR;
+ if (key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (zsetScore(key->value,ele->ptr,score) == C_ERR) return REDISMODULE_ERR;
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Key API for Sorted Set iterator
+ * -------------------------------------------------------------------------- */
+
+void zsetKeyReset(RedisModuleKey *key) {
+ key->u.zset.type = REDISMODULE_ZSET_RANGE_NONE;
+ key->u.zset.current = NULL;
+ key->u.zset.er = 1;
+}
+
+/* Stop a sorted set iteration. */
+void RM_ZsetRangeStop(RedisModuleKey *key) {
+ if (!key->value || key->value->type != OBJ_ZSET) return;
+ /* Free resources if needed. */
+ if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX)
+ zslFreeLexRange(&key->u.zset.lrs);
+ /* Setup sensible values so that misused iteration API calls when an
+ * iterator is not active will result into something more sensible
+ * than crashing. */
+ zsetKeyReset(key);
+}
+
+/* Return the "End of range" flag value to signal the end of the iteration. */
+int RM_ZsetRangeEndReached(RedisModuleKey *key) {
+ if (!key->value || key->value->type != OBJ_ZSET) return 1;
+ return key->u.zset.er;
+}
+
+/* Helper function for RM_ZsetFirstInScoreRange() and RM_ZsetLastInScoreRange().
+ * Setup the sorted set iteration according to the specified score range
+ * (see the functions calling it for more info). If 'first' is true the
+ * first element in the range is used as a starting point for the iterator
+ * otherwise the last. Return REDISMODULE_OK on success otherwise
+ * REDISMODULE_ERR. */
+int zsetInitScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex, int first) {
+ if (!key->value || key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+
+ RM_ZsetRangeStop(key);
+ key->u.zset.type = REDISMODULE_ZSET_RANGE_SCORE;
+ key->u.zset.er = 0;
+
+ /* Setup the range structure used by the sorted set core implementation
+ * in order to seek at the specified element. */
+ zrangespec *zrs = &key->u.zset.rs;
+ zrs->min = min;
+ zrs->max = max;
+ zrs->minex = minex;
+ zrs->maxex = maxex;
+
+ if (key->value->encoding == OBJ_ENCODING_LISTPACK) {
+ key->u.zset.current = first ? zzlFirstInRange(key->value->ptr,zrs) :
+ zzlLastInRange(key->value->ptr,zrs);
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = key->value->ptr;
+ zskiplist *zsl = zs->zsl;
+ key->u.zset.current = first ? zslFirstInRange(zsl,zrs) :
+ zslLastInRange(zsl,zrs);
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+ if (key->u.zset.current == NULL) key->u.zset.er = 1;
+ return REDISMODULE_OK;
+}
+
+/* Setup a sorted set iterator seeking the first element in the specified
+ * range. Returns REDISMODULE_OK if the iterator was correctly initialized
+ * otherwise REDISMODULE_ERR is returned in the following conditions:
+ *
+ * 1. The value stored at key is not a sorted set or the key is empty.
+ *
+ * The range is specified according to the two double values 'min' and 'max'.
+ * Both can be infinite using the following two macros:
+ *
+ * * REDISMODULE_POSITIVE_INFINITE for positive infinite value
+ * * REDISMODULE_NEGATIVE_INFINITE for negative infinite value
+ *
+ * 'minex' and 'maxex' parameters, if true, respectively setup a range
+ * where the min and max value are exclusive (not included) instead of
+ * inclusive. */
+int RM_ZsetFirstInScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex) {
+ return zsetInitScoreRange(key,min,max,minex,maxex,1);
+}
+
+/* Exactly like RedisModule_ZsetFirstInScoreRange() but the last element of
+ * the range is selected for the start of the iteration instead. */
+int RM_ZsetLastInScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex) {
+ return zsetInitScoreRange(key,min,max,minex,maxex,0);
+}
+
+/* Helper function for RM_ZsetFirstInLexRange() and RM_ZsetLastInLexRange().
+ * Setup the sorted set iteration according to the specified lexicographical
+ * range (see the functions calling it for more info). If 'first' is true the
+ * first element in the range is used as a starting point for the iterator
+ * otherwise the last. Return REDISMODULE_OK on success otherwise
+ * REDISMODULE_ERR.
+ *
+ * Note that this function takes 'min' and 'max' in the same form of the
+ * Redis ZRANGEBYLEX command. */
+int zsetInitLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max, int first) {
+ if (!key->value || key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+
+ RM_ZsetRangeStop(key);
+ key->u.zset.er = 0;
+
+ /* Setup the range structure used by the sorted set core implementation
+ * in order to seek at the specified element. */
+ zlexrangespec *zlrs = &key->u.zset.lrs;
+ if (zslParseLexRange(min, max, zlrs) == C_ERR) return REDISMODULE_ERR;
+
+ /* Set the range type to lex only after successfully parsing the range,
+ * otherwise we don't want the zlexrangespec to be freed. */
+ key->u.zset.type = REDISMODULE_ZSET_RANGE_LEX;
+
+ if (key->value->encoding == OBJ_ENCODING_LISTPACK) {
+ key->u.zset.current = first ? zzlFirstInLexRange(key->value->ptr,zlrs) :
+ zzlLastInLexRange(key->value->ptr,zlrs);
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = key->value->ptr;
+ zskiplist *zsl = zs->zsl;
+ key->u.zset.current = first ? zslFirstInLexRange(zsl,zlrs) :
+ zslLastInLexRange(zsl,zlrs);
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+ if (key->u.zset.current == NULL) key->u.zset.er = 1;
+
+ return REDISMODULE_OK;
+}
+
+/* Setup a sorted set iterator seeking the first element in the specified
+ * lexicographical range. Returns REDISMODULE_OK if the iterator was correctly
+ * initialized otherwise REDISMODULE_ERR is returned in the
+ * following conditions:
+ *
+ * 1. The value stored at key is not a sorted set or the key is empty.
+ * 2. The lexicographical range 'min' and 'max' format is invalid.
+ *
+ * 'min' and 'max' should be provided as two RedisModuleString objects
+ * in the same format as the parameters passed to the ZRANGEBYLEX command.
+ * The function does not take ownership of the objects, so they can be released
+ * ASAP after the iterator is setup. */
+int RM_ZsetFirstInLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max) {
+ return zsetInitLexRange(key,min,max,1);
+}
+
+/* Exactly like RedisModule_ZsetFirstInLexRange() but the last element of
+ * the range is selected for the start of the iteration instead. */
+int RM_ZsetLastInLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max) {
+ return zsetInitLexRange(key,min,max,0);
+}
+
+/* Return the current sorted set element of an active sorted set iterator
+ * or NULL if the range specified in the iterator does not include any
+ * element. */
+RedisModuleString *RM_ZsetRangeCurrentElement(RedisModuleKey *key, double *score) {
+ RedisModuleString *str;
+
+ if (!key->value || key->value->type != OBJ_ZSET) return NULL;
+ if (key->u.zset.current == NULL) return NULL;
+ if (key->value->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *eptr, *sptr;
+ eptr = key->u.zset.current;
+ sds ele = lpGetObject(eptr);
+ if (score) {
+ sptr = lpNext(key->value->ptr,eptr);
+ *score = zzlGetScore(sptr);
+ }
+ str = createObject(OBJ_STRING,ele);
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zskiplistNode *ln = key->u.zset.current;
+ if (score) *score = ln->score;
+ str = createStringObject(ln->ele,sdslen(ln->ele));
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+ autoMemoryAdd(key->ctx,REDISMODULE_AM_STRING,str);
+ return str;
+}
+
+/* Go to the next element of the sorted set iterator. Returns 1 if there was
+ * a next element, 0 if we are already at the latest element or the range
+ * does not include any item at all. */
+int RM_ZsetRangeNext(RedisModuleKey *key) {
+ if (!key->value || key->value->type != OBJ_ZSET) return 0;
+ if (!key->u.zset.type || !key->u.zset.current) return 0; /* No active iterator. */
+
+ if (key->value->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = key->value->ptr;
+ unsigned char *eptr = key->u.zset.current;
+ unsigned char *next;
+ next = lpNext(zl,eptr); /* Skip element. */
+ if (next) next = lpNext(zl,next); /* Skip score. */
+ if (next == NULL) {
+ key->u.zset.er = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE) {
+ /* Fetch the next element score for the
+ * range check. */
+ unsigned char *saved_next = next;
+ next = lpNext(zl,next); /* Skip next element. */
+ double score = zzlGetScore(next); /* Obtain the next score. */
+ if (!zslValueLteMax(score,&key->u.zset.rs)) {
+ key->u.zset.er = 1;
+ return 0;
+ }
+ next = saved_next;
+ } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zzlLexValueLteMax(next,&key->u.zset.lrs)) {
+ key->u.zset.er = 1;
+ return 0;
+ }
+ }
+ key->u.zset.current = next;
+ return 1;
+ }
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zskiplistNode *ln = key->u.zset.current, *next = ln->level[0].forward;
+ if (next == NULL) {
+ key->u.zset.er = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE &&
+ !zslValueLteMax(next->score,&key->u.zset.rs))
+ {
+ key->u.zset.er = 1;
+ return 0;
+ } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zslLexValueLteMax(next->ele,&key->u.zset.lrs)) {
+ key->u.zset.er = 1;
+ return 0;
+ }
+ }
+ key->u.zset.current = next;
+ return 1;
+ }
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+}
+
+/* Go to the previous element of the sorted set iterator. Returns 1 if there was
+ * a previous element, 0 if we are already at the first element or the range
+ * does not include any item at all. */
+int RM_ZsetRangePrev(RedisModuleKey *key) {
+ if (!key->value || key->value->type != OBJ_ZSET) return 0;
+ if (!key->u.zset.type || !key->u.zset.current) return 0; /* No active iterator. */
+
+ if (key->value->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = key->value->ptr;
+ unsigned char *eptr = key->u.zset.current;
+ unsigned char *prev;
+ prev = lpPrev(zl,eptr); /* Go back to previous score. */
+ if (prev) prev = lpPrev(zl,prev); /* Back to previous ele. */
+ if (prev == NULL) {
+ key->u.zset.er = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE) {
+ /* Fetch the previous element score for the
+ * range check. */
+ unsigned char *saved_prev = prev;
+ prev = lpNext(zl,prev); /* Skip element to get the score.*/
+ double score = zzlGetScore(prev); /* Obtain the prev score. */
+ if (!zslValueGteMin(score,&key->u.zset.rs)) {
+ key->u.zset.er = 1;
+ return 0;
+ }
+ prev = saved_prev;
+ } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zzlLexValueGteMin(prev,&key->u.zset.lrs)) {
+ key->u.zset.er = 1;
+ return 0;
+ }
+ }
+ key->u.zset.current = prev;
+ return 1;
+ }
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zskiplistNode *ln = key->u.zset.current, *prev = ln->backward;
+ if (prev == NULL) {
+ key->u.zset.er = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->u.zset.type == REDISMODULE_ZSET_RANGE_SCORE &&
+ !zslValueGteMin(prev->score,&key->u.zset.rs))
+ {
+ key->u.zset.er = 1;
+ return 0;
+ } else if (key->u.zset.type == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zslLexValueGteMin(prev->ele,&key->u.zset.lrs)) {
+ key->u.zset.er = 1;
+ return 0;
+ }
+ }
+ key->u.zset.current = prev;
+ return 1;
+ }
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * ## Key API for Hash type
+ *
+ * See also RM_ValueLength(), which returns the number of fields in a hash.
+ * -------------------------------------------------------------------------- */
+
+/* Set the field of the specified hash field to the specified value.
+ * If the key is an empty key open for writing, it is created with an empty
+ * hash value, in order to set the specified field.
+ *
+ * The function is variadic and the user must specify pairs of field
+ * names and values, both as RedisModuleString pointers (unless the
+ * CFIELD option is set, see later). At the end of the field/value-ptr pairs,
+ * NULL must be specified as last argument to signal the end of the arguments
+ * in the variadic function.
+ *
+ * Example to set the hash argv[1] to the value argv[2]:
+ *
+ * RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[1],argv[2],NULL);
+ *
+ * The function can also be used in order to delete fields (if they exist)
+ * by setting them to the specified value of REDISMODULE_HASH_DELETE:
+ *
+ * RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[1],
+ * REDISMODULE_HASH_DELETE,NULL);
+ *
+ * The behavior of the command changes with the specified flags, that can be
+ * set to REDISMODULE_HASH_NONE if no special behavior is needed.
+ *
+ * REDISMODULE_HASH_NX: The operation is performed only if the field was not
+ * already existing in the hash.
+ * REDISMODULE_HASH_XX: The operation is performed only if the field was
+ * already existing, so that a new value could be
+ * associated to an existing filed, but no new fields
+ * are created.
+ * REDISMODULE_HASH_CFIELDS: The field names passed are null terminated C
+ * strings instead of RedisModuleString objects.
+ * REDISMODULE_HASH_COUNT_ALL: Include the number of inserted fields in the
+ * returned number, in addition to the number of
+ * updated and deleted fields. (Added in Redis
+ * 6.2.)
+ *
+ * Unless NX is specified, the command overwrites the old field value with
+ * the new one.
+ *
+ * When using REDISMODULE_HASH_CFIELDS, field names are reported using
+ * normal C strings, so for example to delete the field "foo" the following
+ * code can be used:
+ *
+ * RedisModule_HashSet(key,REDISMODULE_HASH_CFIELDS,"foo",
+ * REDISMODULE_HASH_DELETE,NULL);
+ *
+ * Return value:
+ *
+ * The number of fields existing in the hash prior to the call, which have been
+ * updated (its old value has been replaced by a new value) or deleted. If the
+ * flag REDISMODULE_HASH_COUNT_ALL is set, inserted fields not previously
+ * existing in the hash are also counted.
+ *
+ * If the return value is zero, `errno` is set (since Redis 6.2) as follows:
+ *
+ * - EINVAL if any unknown flags are set or if key is NULL.
+ * - ENOTSUP if the key is associated with a non Hash value.
+ * - EBADF if the key was not opened for writing.
+ * - ENOENT if no fields were counted as described under Return value above.
+ * This is not actually an error. The return value can be zero if all fields
+ * were just created and the COUNT_ALL flag was unset, or if changes were held
+ * back due to the NX and XX flags.
+ *
+ * NOTICE: The return value semantics of this function are very different
+ * between Redis 6.2 and older versions. Modules that use it should determine
+ * the Redis version and handle it accordingly.
+ */
+int RM_HashSet(RedisModuleKey *key, int flags, ...) {
+ va_list ap;
+ if (!key || (flags & ~(REDISMODULE_HASH_NX |
+ REDISMODULE_HASH_XX |
+ REDISMODULE_HASH_CFIELDS |
+ REDISMODULE_HASH_COUNT_ALL))) {
+ errno = EINVAL;
+ return 0;
+ } else if (key->value && key->value->type != OBJ_HASH) {
+ errno = ENOTSUP;
+ return 0;
+ } else if (!(key->mode & REDISMODULE_WRITE)) {
+ errno = EBADF;
+ return 0;
+ }
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_HASH);
+
+ int count = 0;
+ va_start(ap, flags);
+ while(1) {
+ RedisModuleString *field, *value;
+ /* Get the field and value objects. */
+ if (flags & REDISMODULE_HASH_CFIELDS) {
+ char *cfield = va_arg(ap,char*);
+ if (cfield == NULL) break;
+ field = createRawStringObject(cfield,strlen(cfield));
+ } else {
+ field = va_arg(ap,RedisModuleString*);
+ if (field == NULL) break;
+ }
+ value = va_arg(ap,RedisModuleString*);
+
+ /* Handle XX and NX */
+ if (flags & (REDISMODULE_HASH_XX|REDISMODULE_HASH_NX)) {
+ int exists = hashTypeExists(key->value, field->ptr);
+ if (((flags & REDISMODULE_HASH_XX) && !exists) ||
+ ((flags & REDISMODULE_HASH_NX) && exists))
+ {
+ if (flags & REDISMODULE_HASH_CFIELDS) decrRefCount(field);
+ continue;
+ }
+ }
+
+ /* Handle deletion if value is REDISMODULE_HASH_DELETE. */
+ if (value == REDISMODULE_HASH_DELETE) {
+ count += hashTypeDelete(key->value, field->ptr);
+ if (flags & REDISMODULE_HASH_CFIELDS) decrRefCount(field);
+ continue;
+ }
+
+ int low_flags = HASH_SET_COPY;
+ /* If CFIELDS is active, we can pass the ownership of the
+ * SDS object to the low level function that sets the field
+ * to avoid a useless copy. */
+ if (flags & REDISMODULE_HASH_CFIELDS)
+ low_flags |= HASH_SET_TAKE_FIELD;
+
+ robj *argv[2] = {field,value};
+ hashTypeTryConversion(key->value,argv,0,1);
+ int updated = hashTypeSet(key->value, field->ptr, value->ptr, low_flags);
+ count += (flags & REDISMODULE_HASH_COUNT_ALL) ? 1 : updated;
+
+ /* If CFIELDS is active, SDS string ownership is now of hashTypeSet(),
+ * however we still have to release the 'field' object shell. */
+ if (flags & REDISMODULE_HASH_CFIELDS) {
+ field->ptr = NULL; /* Prevent the SDS string from being freed. */
+ decrRefCount(field);
+ }
+ }
+ va_end(ap);
+ moduleDelKeyIfEmpty(key);
+ if (count == 0) errno = ENOENT;
+ return count;
+}
+
+/* Get fields from a hash value. This function is called using a variable
+ * number of arguments, alternating a field name (as a RedisModuleString
+ * pointer) with a pointer to a RedisModuleString pointer, that is set to the
+ * value of the field if the field exists, or NULL if the field does not exist.
+ * At the end of the field/value-ptr pairs, NULL must be specified as last
+ * argument to signal the end of the arguments in the variadic function.
+ *
+ * This is an example usage:
+ *
+ * RedisModuleString *first, *second;
+ * RedisModule_HashGet(mykey,REDISMODULE_HASH_NONE,argv[1],&first,
+ * argv[2],&second,NULL);
+ *
+ * As with RedisModule_HashSet() the behavior of the command can be specified
+ * passing flags different than REDISMODULE_HASH_NONE:
+ *
+ * REDISMODULE_HASH_CFIELDS: field names as null terminated C strings.
+ *
+ * REDISMODULE_HASH_EXISTS: instead of setting the value of the field
+ * expecting a RedisModuleString pointer to pointer, the function just
+ * reports if the field exists or not and expects an integer pointer
+ * as the second element of each pair.
+ *
+ * Example of REDISMODULE_HASH_CFIELDS:
+ *
+ * RedisModuleString *username, *hashedpass;
+ * RedisModule_HashGet(mykey,REDISMODULE_HASH_CFIELDS,"username",&username,"hp",&hashedpass, NULL);
+ *
+ * Example of REDISMODULE_HASH_EXISTS:
+ *
+ * int exists;
+ * RedisModule_HashGet(mykey,REDISMODULE_HASH_EXISTS,argv[1],&exists,NULL);
+ *
+ * The function returns REDISMODULE_OK on success and REDISMODULE_ERR if
+ * the key is not a hash value.
+ *
+ * Memory management:
+ *
+ * The returned RedisModuleString objects should be released with
+ * RedisModule_FreeString(), or by enabling automatic memory management.
+ */
+int RM_HashGet(RedisModuleKey *key, int flags, ...) {
+ va_list ap;
+ if (key->value && key->value->type != OBJ_HASH) return REDISMODULE_ERR;
+
+ va_start(ap, flags);
+ while(1) {
+ RedisModuleString *field, **valueptr;
+ int *existsptr;
+ /* Get the field object and the value pointer to pointer. */
+ if (flags & REDISMODULE_HASH_CFIELDS) {
+ char *cfield = va_arg(ap,char*);
+ if (cfield == NULL) break;
+ field = createRawStringObject(cfield,strlen(cfield));
+ } else {
+ field = va_arg(ap,RedisModuleString*);
+ if (field == NULL) break;
+ }
+
+ /* Query the hash for existence or value object. */
+ if (flags & REDISMODULE_HASH_EXISTS) {
+ existsptr = va_arg(ap,int*);
+ if (key->value)
+ *existsptr = hashTypeExists(key->value,field->ptr);
+ else
+ *existsptr = 0;
+ } else {
+ valueptr = va_arg(ap,RedisModuleString**);
+ if (key->value) {
+ *valueptr = hashTypeGetValueObject(key->value,field->ptr);
+ if (*valueptr) {
+ robj *decoded = getDecodedObject(*valueptr);
+ decrRefCount(*valueptr);
+ *valueptr = decoded;
+ }
+ if (*valueptr)
+ autoMemoryAdd(key->ctx,REDISMODULE_AM_STRING,*valueptr);
+ } else {
+ *valueptr = NULL;
+ }
+ }
+
+ /* Cleanup */
+ if (flags & REDISMODULE_HASH_CFIELDS) decrRefCount(field);
+ }
+ va_end(ap);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Key API for Stream type
+ *
+ * For an introduction to streams, see https://redis.io/topics/streams-intro.
+ *
+ * The type RedisModuleStreamID, which is used in stream functions, is a struct
+ * with two 64-bit fields and is defined as
+ *
+ * typedef struct RedisModuleStreamID {
+ * uint64_t ms;
+ * uint64_t seq;
+ * } RedisModuleStreamID;
+ *
+ * See also RM_ValueLength(), which returns the length of a stream, and the
+ * conversion functions RM_StringToStreamID() and RM_CreateStringFromStreamID().
+ * -------------------------------------------------------------------------- */
+
+/* Adds an entry to a stream. Like XADD without trimming.
+ *
+ * - `key`: The key where the stream is (or will be) stored
+ * - `flags`: A bit field of
+ * - `REDISMODULE_STREAM_ADD_AUTOID`: Assign a stream ID automatically, like
+ * `*` in the XADD command.
+ * - `id`: If the `AUTOID` flag is set, this is where the assigned ID is
+ * returned. Can be NULL if `AUTOID` is set, if you don't care to receive the
+ * ID. If `AUTOID` is not set, this is the requested ID.
+ * - `argv`: A pointer to an array of size `numfields * 2` containing the
+ * fields and values.
+ * - `numfields`: The number of field-value pairs in `argv`.
+ *
+ * Returns REDISMODULE_OK if an entry has been added. On failure,
+ * REDISMODULE_ERR is returned and `errno` is set as follows:
+ *
+ * - EINVAL if called with invalid arguments
+ * - ENOTSUP if the key refers to a value of a type other than stream
+ * - EBADF if the key was not opened for writing
+ * - EDOM if the given ID was 0-0 or not greater than all other IDs in the
+ * stream (only if the AUTOID flag is unset)
+ * - EFBIG if the stream has reached the last possible ID
+ * - ERANGE if the elements are too large to be stored.
+ */
+int RM_StreamAdd(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, long numfields) {
+ /* Validate args */
+ if (!key || (numfields != 0 && !argv) || /* invalid key or argv */
+ (flags & ~(REDISMODULE_STREAM_ADD_AUTOID)) || /* invalid flags */
+ (!(flags & REDISMODULE_STREAM_ADD_AUTOID) && !id)) { /* id required */
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (key->value && key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP; /* wrong type */
+ return REDISMODULE_ERR;
+ } else if (!(key->mode & REDISMODULE_WRITE)) {
+ errno = EBADF; /* key not open for writing */
+ return REDISMODULE_ERR;
+ } else if (!(flags & REDISMODULE_STREAM_ADD_AUTOID) &&
+ id->ms == 0 && id->seq == 0) {
+ errno = EDOM; /* ID out of range */
+ return REDISMODULE_ERR;
+ }
+
+ /* Create key if necessary */
+ int created = 0;
+ if (key->value == NULL) {
+ moduleCreateEmptyKey(key, REDISMODULE_KEYTYPE_STREAM);
+ created = 1;
+ }
+
+ stream *s = key->value->ptr;
+ if (s->last_id.ms == UINT64_MAX && s->last_id.seq == UINT64_MAX) {
+ /* The stream has reached the last possible ID */
+ errno = EFBIG;
+ return REDISMODULE_ERR;
+ }
+
+ streamID added_id;
+ streamID use_id;
+ streamID *use_id_ptr = NULL;
+ if (!(flags & REDISMODULE_STREAM_ADD_AUTOID)) {
+ use_id.ms = id->ms;
+ use_id.seq = id->seq;
+ use_id_ptr = &use_id;
+ }
+
+ if (streamAppendItem(s,argv,numfields,&added_id,use_id_ptr,1) == C_ERR) {
+ /* Either the ID not greater than all existing IDs in the stream, or
+ * the elements are too large to be stored. either way, errno is already
+ * set by streamAppendItem. */
+ if (created) moduleDelKeyIfEmpty(key);
+ return REDISMODULE_ERR;
+ }
+ /* Postponed signalKeyAsReady(). Done implicitly by moduleCreateEmptyKey()
+ * so not needed if the stream has just been created. */
+ if (!created) key->u.stream.signalready = 1;
+
+ if (id != NULL) {
+ id->ms = added_id.ms;
+ id->seq = added_id.seq;
+ }
+
+ return REDISMODULE_OK;
+}
+
+/* Deletes an entry from a stream.
+ *
+ * - `key`: A key opened for writing, with no stream iterator started.
+ * - `id`: The stream ID of the entry to delete.
+ *
+ * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned
+ * and `errno` is set as follows:
+ *
+ * - EINVAL if called with invalid arguments
+ * - ENOTSUP if the key refers to a value of a type other than stream or if the
+ * key is empty
+ * - EBADF if the key was not opened for writing or if a stream iterator is
+ * associated with the key
+ * - ENOENT if no entry with the given stream ID exists
+ *
+ * See also RM_StreamIteratorDelete() for deleting the current entry while
+ * iterating using a stream iterator.
+ */
+int RM_StreamDelete(RedisModuleKey *key, RedisModuleStreamID *id) {
+ if (!key || !id) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP; /* wrong type */
+ return REDISMODULE_ERR;
+ } else if (!(key->mode & REDISMODULE_WRITE) ||
+ key->iter != NULL) {
+ errno = EBADF; /* key not opened for writing or iterator started */
+ return REDISMODULE_ERR;
+ }
+ stream *s = key->value->ptr;
+ streamID streamid = {id->ms, id->seq};
+ if (streamDeleteItem(s, &streamid)) {
+ return REDISMODULE_OK;
+ } else {
+ errno = ENOENT; /* no entry with this id */
+ return REDISMODULE_ERR;
+ }
+}
+
+/* Sets up a stream iterator.
+ *
+ * - `key`: The stream key opened for reading using RedisModule_OpenKey().
+ * - `flags`:
+ * - `REDISMODULE_STREAM_ITERATOR_EXCLUSIVE`: Don't include `start` and `end`
+ * in the iterated range.
+ * - `REDISMODULE_STREAM_ITERATOR_REVERSE`: Iterate in reverse order, starting
+ * from the `end` of the range.
+ * - `start`: The lower bound of the range. Use NULL for the beginning of the
+ * stream.
+ * - `end`: The upper bound of the range. Use NULL for the end of the stream.
+ *
+ * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned
+ * and `errno` is set as follows:
+ *
+ * - EINVAL if called with invalid arguments
+ * - ENOTSUP if the key refers to a value of a type other than stream or if the
+ * key is empty
+ * - EBADF if the key was not opened for writing or if a stream iterator is
+ * already associated with the key
+ * - EDOM if `start` or `end` is outside the valid range
+ *
+ * Returns REDISMODULE_OK on success and REDISMODULE_ERR if the key doesn't
+ * refer to a stream or if invalid arguments were given.
+ *
+ * The stream IDs are retrieved using RedisModule_StreamIteratorNextID() and
+ * for each stream ID, the fields and values are retrieved using
+ * RedisModule_StreamIteratorNextField(). The iterator is freed by calling
+ * RedisModule_StreamIteratorStop().
+ *
+ * Example (error handling omitted):
+ *
+ * RedisModule_StreamIteratorStart(key, 0, startid_ptr, endid_ptr);
+ * RedisModuleStreamID id;
+ * long numfields;
+ * while (RedisModule_StreamIteratorNextID(key, &id, &numfields) ==
+ * REDISMODULE_OK) {
+ * RedisModuleString *field, *value;
+ * while (RedisModule_StreamIteratorNextField(key, &field, &value) ==
+ * REDISMODULE_OK) {
+ * //
+ * // ... Do stuff ...
+ * //
+ * RedisModule_FreeString(ctx, field);
+ * RedisModule_FreeString(ctx, value);
+ * }
+ * }
+ * RedisModule_StreamIteratorStop(key);
+ */
+int RM_StreamIteratorStart(RedisModuleKey *key, int flags, RedisModuleStreamID *start, RedisModuleStreamID *end) {
+ /* check args */
+ if (!key ||
+ (flags & ~(REDISMODULE_STREAM_ITERATOR_EXCLUSIVE |
+ REDISMODULE_STREAM_ITERATOR_REVERSE))) {
+ errno = EINVAL; /* key missing or invalid flags */
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR; /* not a stream */
+ } else if (key->iter) {
+ errno = EBADF; /* iterator already started */
+ return REDISMODULE_ERR;
+ }
+
+ /* define range for streamIteratorStart() */
+ streamID lower, upper;
+ if (start) lower = (streamID){start->ms, start->seq};
+ if (end) upper = (streamID){end->ms, end->seq};
+ if (flags & REDISMODULE_STREAM_ITERATOR_EXCLUSIVE) {
+ if ((start && streamIncrID(&lower) != C_OK) ||
+ (end && streamDecrID(&upper) != C_OK)) {
+ errno = EDOM; /* end is 0-0 or start is MAX-MAX? */
+ return REDISMODULE_ERR;
+ }
+ }
+
+ /* create iterator */
+ stream *s = key->value->ptr;
+ int rev = flags & REDISMODULE_STREAM_ITERATOR_REVERSE;
+ streamIterator *si = zmalloc(sizeof(*si));
+ streamIteratorStart(si, s, start ? &lower : NULL, end ? &upper : NULL, rev);
+ key->iter = si;
+ key->u.stream.currentid.ms = 0; /* for RM_StreamIteratorDelete() */
+ key->u.stream.currentid.seq = 0;
+ key->u.stream.numfieldsleft = 0; /* for RM_StreamIteratorNextField() */
+ return REDISMODULE_OK;
+}
+
+/* Stops a stream iterator created using RedisModule_StreamIteratorStart() and
+ * reclaims its memory.
+ *
+ * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned
+ * and `errno` is set as follows:
+ *
+ * - EINVAL if called with a NULL key
+ * - ENOTSUP if the key refers to a value of a type other than stream or if the
+ * key is empty
+ * - EBADF if the key was not opened for writing or if no stream iterator is
+ * associated with the key
+ */
+int RM_StreamIteratorStop(RedisModuleKey *key) {
+ if (!key) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ } else if (!key->iter) {
+ errno = EBADF;
+ return REDISMODULE_ERR;
+ }
+ streamIteratorStop(key->iter);
+ zfree(key->iter);
+ key->iter = NULL;
+ return REDISMODULE_OK;
+}
+
+/* Finds the next stream entry and returns its stream ID and the number of
+ * fields.
+ *
+ * - `key`: Key for which a stream iterator has been started using
+ * RedisModule_StreamIteratorStart().
+ * - `id`: The stream ID returned. NULL if you don't care.
+ * - `numfields`: The number of fields in the found stream entry. NULL if you
+ * don't care.
+ *
+ * Returns REDISMODULE_OK and sets `*id` and `*numfields` if an entry was found.
+ * On failure, REDISMODULE_ERR is returned and `errno` is set as follows:
+ *
+ * - EINVAL if called with a NULL key
+ * - ENOTSUP if the key refers to a value of a type other than stream or if the
+ * key is empty
+ * - EBADF if no stream iterator is associated with the key
+ * - ENOENT if there are no more entries in the range of the iterator
+ *
+ * In practice, if RM_StreamIteratorNextID() is called after a successful call
+ * to RM_StreamIteratorStart() and with the same key, it is safe to assume that
+ * an REDISMODULE_ERR return value means that there are no more entries.
+ *
+ * Use RedisModule_StreamIteratorNextField() to retrieve the fields and values.
+ * See the example at RedisModule_StreamIteratorStart().
+ */
+int RM_StreamIteratorNextID(RedisModuleKey *key, RedisModuleStreamID *id, long *numfields) {
+ if (!key) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ } else if (!key->iter) {
+ errno = EBADF;
+ return REDISMODULE_ERR;
+ }
+ streamIterator *si = key->iter;
+ int64_t *num_ptr = &key->u.stream.numfieldsleft;
+ streamID *streamid_ptr = &key->u.stream.currentid;
+ if (streamIteratorGetID(si, streamid_ptr, num_ptr)) {
+ if (id) {
+ id->ms = streamid_ptr->ms;
+ id->seq = streamid_ptr->seq;
+ }
+ if (numfields) *numfields = *num_ptr;
+ return REDISMODULE_OK;
+ } else {
+ /* No entry found. */
+ key->u.stream.currentid.ms = 0; /* for RM_StreamIteratorDelete() */
+ key->u.stream.currentid.seq = 0;
+ key->u.stream.numfieldsleft = 0; /* for RM_StreamIteratorNextField() */
+ errno = ENOENT;
+ return REDISMODULE_ERR;
+ }
+}
+
+/* Retrieves the next field of the current stream ID and its corresponding value
+ * in a stream iteration. This function should be called repeatedly after calling
+ * RedisModule_StreamIteratorNextID() to fetch each field-value pair.
+ *
+ * - `key`: Key where a stream iterator has been started.
+ * - `field_ptr`: This is where the field is returned.
+ * - `value_ptr`: This is where the value is returned.
+ *
+ * Returns REDISMODULE_OK and points `*field_ptr` and `*value_ptr` to freshly
+ * allocated RedisModuleString objects. The string objects are freed
+ * automatically when the callback finishes if automatic memory is enabled. On
+ * failure, REDISMODULE_ERR is returned and `errno` is set as follows:
+ *
+ * - EINVAL if called with a NULL key
+ * - ENOTSUP if the key refers to a value of a type other than stream or if the
+ * key is empty
+ * - EBADF if no stream iterator is associated with the key
+ * - ENOENT if there are no more fields in the current stream entry
+ *
+ * In practice, if RM_StreamIteratorNextField() is called after a successful
+ * call to RM_StreamIteratorNextID() and with the same key, it is safe to assume
+ * that an REDISMODULE_ERR return value means that there are no more fields.
+ *
+ * See the example at RedisModule_StreamIteratorStart().
+ */
+int RM_StreamIteratorNextField(RedisModuleKey *key, RedisModuleString **field_ptr, RedisModuleString **value_ptr) {
+ if (!key) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ } else if (!key->iter) {
+ errno = EBADF;
+ return REDISMODULE_ERR;
+ } else if (key->u.stream.numfieldsleft <= 0) {
+ errno = ENOENT;
+ return REDISMODULE_ERR;
+ }
+ streamIterator *si = key->iter;
+ unsigned char *field, *value;
+ int64_t field_len, value_len;
+ streamIteratorGetField(si, &field, &value, &field_len, &value_len);
+ if (field_ptr) {
+ *field_ptr = createRawStringObject((char *)field, field_len);
+ autoMemoryAdd(key->ctx, REDISMODULE_AM_STRING, *field_ptr);
+ }
+ if (value_ptr) {
+ *value_ptr = createRawStringObject((char *)value, value_len);
+ autoMemoryAdd(key->ctx, REDISMODULE_AM_STRING, *value_ptr);
+ }
+ key->u.stream.numfieldsleft--;
+ return REDISMODULE_OK;
+}
+
+/* Deletes the current stream entry while iterating.
+ *
+ * This function can be called after RM_StreamIteratorNextID() or after any
+ * calls to RM_StreamIteratorNextField().
+ *
+ * Returns REDISMODULE_OK on success. On failure, REDISMODULE_ERR is returned
+ * and `errno` is set as follows:
+ *
+ * - EINVAL if key is NULL
+ * - ENOTSUP if the key is empty or is of another type than stream
+ * - EBADF if the key is not opened for writing, if no iterator has been started
+ * - ENOENT if the iterator has no current stream entry
+ */
+int RM_StreamIteratorDelete(RedisModuleKey *key) {
+ if (!key) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ } else if (!(key->mode & REDISMODULE_WRITE) || !key->iter) {
+ errno = EBADF;
+ return REDISMODULE_ERR;
+ } else if (key->u.stream.currentid.ms == 0 &&
+ key->u.stream.currentid.seq == 0) {
+ errno = ENOENT;
+ return REDISMODULE_ERR;
+ }
+ streamIterator *si = key->iter;
+ streamIteratorRemoveEntry(si, &key->u.stream.currentid);
+ key->u.stream.currentid.ms = 0; /* Make sure repeated Delete() fails */
+ key->u.stream.currentid.seq = 0;
+ key->u.stream.numfieldsleft = 0; /* Make sure NextField() fails */
+ return REDISMODULE_OK;
+}
+
+/* Trim a stream by length, similar to XTRIM with MAXLEN.
+ *
+ * - `key`: Key opened for writing.
+ * - `flags`: A bitfield of
+ * - `REDISMODULE_STREAM_TRIM_APPROX`: Trim less if it improves performance,
+ * like XTRIM with `~`.
+ * - `length`: The number of stream entries to keep after trimming.
+ *
+ * Returns the number of entries deleted. On failure, a negative value is
+ * returned and `errno` is set as follows:
+ *
+ * - EINVAL if called with invalid arguments
+ * - ENOTSUP if the key is empty or of a type other than stream
+ * - EBADF if the key is not opened for writing
+ */
+long long RM_StreamTrimByLength(RedisModuleKey *key, int flags, long long length) {
+ if (!key || (flags & ~(REDISMODULE_STREAM_TRIM_APPROX)) || length < 0) {
+ errno = EINVAL;
+ return -1;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return -1;
+ } else if (!(key->mode & REDISMODULE_WRITE)) {
+ errno = EBADF;
+ return -1;
+ }
+ int approx = flags & REDISMODULE_STREAM_TRIM_APPROX ? 1 : 0;
+ return streamTrimByLength((stream *)key->value->ptr, length, approx);
+}
+
+/* Trim a stream by ID, similar to XTRIM with MINID.
+ *
+ * - `key`: Key opened for writing.
+ * - `flags`: A bitfield of
+ * - `REDISMODULE_STREAM_TRIM_APPROX`: Trim less if it improves performance,
+ * like XTRIM with `~`.
+ * - `id`: The smallest stream ID to keep after trimming.
+ *
+ * Returns the number of entries deleted. On failure, a negative value is
+ * returned and `errno` is set as follows:
+ *
+ * - EINVAL if called with invalid arguments
+ * - ENOTSUP if the key is empty or of a type other than stream
+ * - EBADF if the key is not opened for writing
+ */
+long long RM_StreamTrimByID(RedisModuleKey *key, int flags, RedisModuleStreamID *id) {
+ if (!key || (flags & ~(REDISMODULE_STREAM_TRIM_APPROX)) || !id) {
+ errno = EINVAL;
+ return -1;
+ } else if (!key->value || key->value->type != OBJ_STREAM) {
+ errno = ENOTSUP;
+ return -1;
+ } else if (!(key->mode & REDISMODULE_WRITE)) {
+ errno = EBADF;
+ return -1;
+ }
+ int approx = flags & REDISMODULE_STREAM_TRIM_APPROX ? 1 : 0;
+ streamID minid = (streamID){id->ms, id->seq};
+ return streamTrimByID((stream *)key->value->ptr, minid, approx);
+}
+
+/* --------------------------------------------------------------------------
+ * ## Calling Redis commands from modules
+ *
+ * RM_Call() sends a command to Redis. The remaining functions handle the reply.
+ * -------------------------------------------------------------------------- */
+
+
+void moduleParseCallReply_Int(RedisModuleCallReply *reply);
+void moduleParseCallReply_BulkString(RedisModuleCallReply *reply);
+void moduleParseCallReply_SimpleString(RedisModuleCallReply *reply);
+void moduleParseCallReply_Array(RedisModuleCallReply *reply);
+
+
+
+
+/* Free a Call reply and all the nested replies it contains if it's an
+ * array. */
+void RM_FreeCallReply(RedisModuleCallReply *reply) {
+ /* This is a wrapper for the recursive free reply function. This is needed
+ * in order to have the first level function to return on nested replies,
+ * but only if called by the module API. */
+
+ RedisModuleCtx *ctx = NULL;
+ if(callReplyType(reply) == REDISMODULE_REPLY_PROMISE) {
+ RedisModuleAsyncRMCallPromise *promise = callReplyGetPrivateData(reply);
+ ctx = promise->ctx;
+ freeRedisModuleAsyncRMCallPromise(promise);
+ } else {
+ ctx = callReplyGetPrivateData(reply);
+ }
+
+ freeCallReply(reply);
+ if (ctx) {
+ autoMemoryFreed(ctx,REDISMODULE_AM_REPLY,reply);
+ }
+}
+
+/* Return the reply type as one of the following:
+ *
+ * - REDISMODULE_REPLY_UNKNOWN
+ * - REDISMODULE_REPLY_STRING
+ * - REDISMODULE_REPLY_ERROR
+ * - REDISMODULE_REPLY_INTEGER
+ * - REDISMODULE_REPLY_ARRAY
+ * - REDISMODULE_REPLY_NULL
+ * - REDISMODULE_REPLY_MAP
+ * - REDISMODULE_REPLY_SET
+ * - REDISMODULE_REPLY_BOOL
+ * - REDISMODULE_REPLY_DOUBLE
+ * - REDISMODULE_REPLY_BIG_NUMBER
+ * - REDISMODULE_REPLY_VERBATIM_STRING
+ * - REDISMODULE_REPLY_ATTRIBUTE
+ * - REDISMODULE_REPLY_PROMISE */
+int RM_CallReplyType(RedisModuleCallReply *reply) {
+ return callReplyType(reply);
+}
+
+/* Return the reply type length, where applicable. */
+size_t RM_CallReplyLength(RedisModuleCallReply *reply) {
+ return callReplyGetLen(reply);
+}
+
+/* Return the 'idx'-th nested call reply element of an array reply, or NULL
+ * if the reply type is wrong or the index is out of range. */
+RedisModuleCallReply *RM_CallReplyArrayElement(RedisModuleCallReply *reply, size_t idx) {
+ return callReplyGetArrayElement(reply, idx);
+}
+
+/* Return the `long long` of an integer reply. */
+long long RM_CallReplyInteger(RedisModuleCallReply *reply) {
+ return callReplyGetLongLong(reply);
+}
+
+/* Return the double value of a double reply. */
+double RM_CallReplyDouble(RedisModuleCallReply *reply) {
+ return callReplyGetDouble(reply);
+}
+
+/* Return the big number value of a big number reply. */
+const char *RM_CallReplyBigNumber(RedisModuleCallReply *reply, size_t *len) {
+ return callReplyGetBigNumber(reply, len);
+}
+
+/* Return the value of a verbatim string reply,
+ * An optional output argument can be given to get verbatim reply format. */
+const char *RM_CallReplyVerbatim(RedisModuleCallReply *reply, size_t *len, const char **format) {
+ return callReplyGetVerbatim(reply, len, format);
+}
+
+/* Return the Boolean value of a Boolean reply. */
+int RM_CallReplyBool(RedisModuleCallReply *reply) {
+ return callReplyGetBool(reply);
+}
+
+/* Return the 'idx'-th nested call reply element of a set reply, or NULL
+ * if the reply type is wrong or the index is out of range. */
+RedisModuleCallReply *RM_CallReplySetElement(RedisModuleCallReply *reply, size_t idx) {
+ return callReplyGetSetElement(reply, idx);
+}
+
+/* Retrieve the 'idx'-th key and value of a map reply.
+ *
+ * Returns:
+ * - REDISMODULE_OK on success.
+ * - REDISMODULE_ERR if idx out of range or if the reply type is wrong.
+ *
+ * The `key` and `value` arguments are used to return by reference, and may be
+ * NULL if not required. */
+int RM_CallReplyMapElement(RedisModuleCallReply *reply, size_t idx, RedisModuleCallReply **key, RedisModuleCallReply **val) {
+ if (callReplyGetMapElement(reply, idx, key, val) == C_OK){
+ return REDISMODULE_OK;
+ }
+ return REDISMODULE_ERR;
+}
+
+/* Return the attribute of the given reply, or NULL if no attribute exists. */
+RedisModuleCallReply *RM_CallReplyAttribute(RedisModuleCallReply *reply) {
+ return callReplyGetAttribute(reply);
+}
+
+/* Retrieve the 'idx'-th key and value of an attribute reply.
+ *
+ * Returns:
+ * - REDISMODULE_OK on success.
+ * - REDISMODULE_ERR if idx out of range or if the reply type is wrong.
+ *
+ * The `key` and `value` arguments are used to return by reference, and may be
+ * NULL if not required. */
+int RM_CallReplyAttributeElement(RedisModuleCallReply *reply, size_t idx, RedisModuleCallReply **key, RedisModuleCallReply **val) {
+ if (callReplyGetAttributeElement(reply, idx, key, val) == C_OK){
+ return REDISMODULE_OK;
+ }
+ return REDISMODULE_ERR;
+}
+
+/* Set unblock handler (callback and private data) on the given promise RedisModuleCallReply.
+ * The given reply must be of promise type (REDISMODULE_REPLY_PROMISE). */
+void RM_CallReplyPromiseSetUnblockHandler(RedisModuleCallReply *reply, RedisModuleOnUnblocked on_unblock, void *private_data) {
+ RedisModuleAsyncRMCallPromise *promise = callReplyGetPrivateData(reply);
+ promise->on_unblocked = on_unblock;
+ promise->private_data = private_data;
+}
+
+/* Abort the execution of a given promise RedisModuleCallReply.
+ * return REDMODULE_OK in case the abort was done successfully and REDISMODULE_ERR
+ * if its not possible to abort the execution (execution already finished).
+ * In case the execution was aborted (REDMODULE_OK was returned), the private_data out parameter
+ * will be set with the value of the private data that was given on 'RM_CallReplyPromiseSetUnblockHandler'
+ * so the caller will be able to release the private data.
+ *
+ * If the execution was aborted successfully, it is promised that the unblock handler will not be called.
+ * That said, it is possible that the abort operation will successes but the operation will still continue.
+ * This can happened if, for example, a module implements some blocking command and does not respect the
+ * disconnect callback. For pure Redis commands this can not happened.*/
+int RM_CallReplyPromiseAbort(RedisModuleCallReply *reply, void **private_data) {
+ RedisModuleAsyncRMCallPromise *promise = callReplyGetPrivateData(reply);
+ if (!promise->c) return REDISMODULE_ERR; /* Promise can not be aborted, either already aborted or already finished. */
+ if (!(promise->c->flags & CLIENT_BLOCKED)) return REDISMODULE_ERR; /* Client is not blocked anymore, can not abort it. */
+
+ /* Client is still blocked, remove it from any blocking state and release it. */
+ if (private_data) *private_data = promise->private_data;
+ promise->private_data = NULL;
+ promise->on_unblocked = NULL;
+ unblockClient(promise->c, 0);
+ moduleReleaseTempClient(promise->c);
+ return REDISMODULE_OK;
+}
+
+/* Return the pointer and length of a string or error reply. */
+const char *RM_CallReplyStringPtr(RedisModuleCallReply *reply, size_t *len) {
+ size_t private_len;
+ if (!len) len = &private_len;
+ return callReplyGetString(reply, len);
+}
+
+/* Return a new string object from a call reply of type string, error or
+ * integer. Otherwise (wrong reply type) return NULL. */
+RedisModuleString *RM_CreateStringFromCallReply(RedisModuleCallReply *reply) {
+ RedisModuleCtx* ctx = callReplyGetPrivateData(reply);
+ size_t len;
+ const char *str;
+ switch(callReplyType(reply)) {
+ case REDISMODULE_REPLY_STRING:
+ case REDISMODULE_REPLY_ERROR:
+ str = callReplyGetString(reply, &len);
+ return RM_CreateString(ctx, str, len);
+ case REDISMODULE_REPLY_INTEGER: {
+ char buf[64];
+ int len = ll2string(buf,sizeof(buf),callReplyGetLongLong(reply));
+ return RM_CreateString(ctx ,buf,len);
+ }
+ default:
+ return NULL;
+ }
+}
+
+/* Modifies the user that RM_Call will use (e.g. for ACL checks) */
+void RM_SetContextUser(RedisModuleCtx *ctx, const RedisModuleUser *user) {
+ ctx->user = user;
+}
+
+/* Returns an array of robj pointers, by parsing the format specifier "fmt" as described for
+ * the RM_Call(), RM_Replicate() and other module APIs. Populates *argcp with the number of
+ * items (which equals to the length of the allocated argv).
+ *
+ * The integer pointed by 'flags' is populated with flags according
+ * to special modifiers in "fmt".
+ *
+ * "!" -> REDISMODULE_ARGV_REPLICATE
+ * "A" -> REDISMODULE_ARGV_NO_AOF
+ * "R" -> REDISMODULE_ARGV_NO_REPLICAS
+ * "3" -> REDISMODULE_ARGV_RESP_3
+ * "0" -> REDISMODULE_ARGV_RESP_AUTO
+ * "C" -> REDISMODULE_ARGV_RUN_AS_USER
+ * "M" -> REDISMODULE_ARGV_RESPECT_DENY_OOM
+ * "K" -> REDISMODULE_ARGV_ALLOW_BLOCK
+ *
+ * On error (format specifier error) NULL is returned and nothing is
+ * allocated. On success the argument vector is returned. */
+robj **moduleCreateArgvFromUserFormat(const char *cmdname, const char *fmt, int *argcp, int *flags, va_list ap) {
+ int argc = 0, argv_size, j;
+ robj **argv = NULL;
+
+ /* As a first guess to avoid useless reallocations, size argv to
+ * hold one argument for each char specifier in 'fmt'. */
+ argv_size = strlen(fmt)+1; /* +1 because of the command name. */
+ argv = zrealloc(argv,sizeof(robj*)*argv_size);
+
+ /* Build the arguments vector based on the format specifier. */
+ argv[0] = createStringObject(cmdname,strlen(cmdname));
+ argc++;
+
+ /* Create the client and dispatch the command. */
+ const char *p = fmt;
+ while(*p) {
+ if (*p == 'c') {
+ char *cstr = va_arg(ap,char*);
+ argv[argc++] = createStringObject(cstr,strlen(cstr));
+ } else if (*p == 's') {
+ robj *obj = va_arg(ap,void*);
+ if (obj->refcount == OBJ_STATIC_REFCOUNT)
+ obj = createStringObject(obj->ptr,sdslen(obj->ptr));
+ else
+ incrRefCount(obj);
+ argv[argc++] = obj;
+ } else if (*p == 'b') {
+ char *buf = va_arg(ap,char*);
+ size_t len = va_arg(ap,size_t);
+ argv[argc++] = createStringObject(buf,len);
+ } else if (*p == 'l') {
+ long long ll = va_arg(ap,long long);
+ argv[argc++] = createStringObjectFromLongLongWithSds(ll);
+ } else if (*p == 'v') {
+ /* A vector of strings */
+ robj **v = va_arg(ap, void*);
+ size_t vlen = va_arg(ap, size_t);
+
+ /* We need to grow argv to hold the vector's elements.
+ * We resize by vector_len-1 elements, because we held
+ * one element in argv for the vector already */
+ argv_size += vlen-1;
+ argv = zrealloc(argv,sizeof(robj*)*argv_size);
+
+ size_t i = 0;
+ for (i = 0; i < vlen; i++) {
+ incrRefCount(v[i]);
+ argv[argc++] = v[i];
+ }
+ } else if (*p == '!') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_REPLICATE;
+ } else if (*p == 'A') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_NO_AOF;
+ } else if (*p == 'R') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_NO_REPLICAS;
+ } else if (*p == '3') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_RESP_3;
+ } else if (*p == '0') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_RESP_AUTO;
+ } else if (*p == 'C') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_RUN_AS_USER;
+ } else if (*p == 'S') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_SCRIPT_MODE;
+ } else if (*p == 'W') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_NO_WRITES;
+ } else if (*p == 'M') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_RESPECT_DENY_OOM;
+ } else if (*p == 'E') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_CALL_REPLIES_AS_ERRORS;
+ } else if (*p == 'D') {
+ if (flags) (*flags) |= (REDISMODULE_ARGV_DRY_RUN | REDISMODULE_ARGV_CALL_REPLIES_AS_ERRORS);
+ } else if (*p == 'K') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_ALLOW_BLOCK;
+ } else {
+ goto fmterr;
+ }
+ p++;
+ }
+ if (argcp) *argcp = argc;
+ return argv;
+
+fmterr:
+ for (j = 0; j < argc; j++)
+ decrRefCount(argv[j]);
+ zfree(argv);
+ return NULL;
+}
+
+/* Exported API to call any Redis command from modules.
+ *
+ * * **cmdname**: The Redis command to call.
+ * * **fmt**: A format specifier string for the command's arguments. Each
+ * of the arguments should be specified by a valid type specification. The
+ * format specifier can also contain the modifiers `!`, `A`, `3` and `R` which
+ * don't have a corresponding argument.
+ *
+ * * `b` -- The argument is a buffer and is immediately followed by another
+ * argument that is the buffer's length.
+ * * `c` -- The argument is a pointer to a plain C string (null-terminated).
+ * * `l` -- The argument is a `long long` integer.
+ * * `s` -- The argument is a RedisModuleString.
+ * * `v` -- The argument(s) is a vector of RedisModuleString.
+ * * `!` -- Sends the Redis command and its arguments to replicas and AOF.
+ * * `A` -- Suppress AOF propagation, send only to replicas (requires `!`).
+ * * `R` -- Suppress replicas propagation, send only to AOF (requires `!`).
+ * * `3` -- Return a RESP3 reply. This will change the command reply.
+ * e.g., HGETALL returns a map instead of a flat array.
+ * * `0` -- Return the reply in auto mode, i.e. the reply format will be the
+ * same as the client attached to the given RedisModuleCtx. This will
+ * probably used when you want to pass the reply directly to the client.
+ * * `C` -- Run a command as the user attached to the context.
+ * User is either attached automatically via the client that directly
+ * issued the command and created the context or via RM_SetContextUser.
+ * If the context is not directly created by an issued command (such as a
+ * background context and no user was set on it via RM_SetContextUser,
+ * RM_Call will fail.
+ * Checks if the command can be executed according to ACL rules and causes
+ * the command to run as the determined user, so that any future user
+ * dependent activity, such as ACL checks within scripts will proceed as
+ * expected.
+ * Otherwise, the command will run as the Redis unrestricted user.
+ * * `S` -- Run the command in a script mode, this means that it will raise
+ * an error if a command which are not allowed inside a script
+ * (flagged with the `deny-script` flag) is invoked (like SHUTDOWN).
+ * In addition, on script mode, write commands are not allowed if there are
+ * not enough good replicas (as configured with `min-replicas-to-write`)
+ * or when the server is unable to persist to the disk.
+ * * `W` -- Do not allow to run any write command (flagged with the `write` flag).
+ * * `M` -- Do not allow `deny-oom` flagged commands when over the memory limit.
+ * * `E` -- Return error as RedisModuleCallReply. If there is an error before
+ * invoking the command, the error is returned using errno mechanism.
+ * This flag allows to get the error also as an error CallReply with
+ * relevant error message.
+ * * 'D' -- A "Dry Run" mode. Return before executing the underlying call().
+ * If everything succeeded, it will return with a NULL, otherwise it will
+ * return with a CallReply object denoting the error, as if it was called with
+ * the 'E' code.
+ * * 'K' -- Allow running blocking commands. If enabled and the command gets blocked, a
+ * special REDISMODULE_REPLY_PROMISE will be returned. This reply type
+ * indicates that the command was blocked and the reply will be given asynchronously.
+ * The module can use this reply object to set a handler which will be called when
+ * the command gets unblocked using RedisModule_CallReplyPromiseSetUnblockHandler.
+ * The handler must be set immediately after the command invocation (without releasing
+ * the Redis lock in between). If the handler is not set, the blocking command will
+ * still continue its execution but the reply will be ignored (fire and forget),
+ * notice that this is dangerous in case of role change, as explained below.
+ * The module can use RedisModule_CallReplyPromiseAbort to abort the command invocation
+ * if it was not yet finished (see RedisModule_CallReplyPromiseAbort documentation for more
+ * details). It is also the module's responsibility to abort the execution on role change, either by using
+ * server event (to get notified when the instance becomes a replica) or relying on the disconnect
+ * callback of the original client. Failing to do so can result in a write operation on a replica.
+ * Unlike other call replies, promise call reply **must** be freed while the Redis GIL is locked.
+ * Notice that on unblocking, the only promise is that the unblock handler will be called,
+ * If the blocking RM_Call caused the module to also block some real client (using RM_BlockClient),
+ * it is the module responsibility to unblock this client on the unblock handler.
+ * On the unblock handler it is only allowed to perform the following:
+ * * Calling additional Redis commands using RM_Call
+ * * Open keys using RM_OpenKey
+ * * Replicate data to the replica or AOF
+ *
+ * Specifically, it is not allowed to call any Redis module API which are client related such as:
+ * * RM_Reply* API's
+ * * RM_BlockClient
+ * * RM_GetCurrentUserName
+ *
+ * * **...**: The actual arguments to the Redis command.
+ *
+ * On success a RedisModuleCallReply object is returned, otherwise
+ * NULL is returned and errno is set to the following values:
+ *
+ * * EBADF: wrong format specifier.
+ * * EINVAL: wrong command arity.
+ * * ENOENT: command does not exist.
+ * * EPERM: operation in Cluster instance with key in non local slot.
+ * * EROFS: operation in Cluster instance when a write command is sent
+ * in a readonly state.
+ * * ENETDOWN: operation in Cluster instance when cluster is down.
+ * * ENOTSUP: No ACL user for the specified module context
+ * * EACCES: Command cannot be executed, according to ACL rules
+ * * ENOSPC: Write or deny-oom command is not allowed
+ * * ESPIPE: Command not allowed on script mode
+ *
+ * Example code fragment:
+ *
+ * reply = RedisModule_Call(ctx,"INCRBY","sc",argv[1],"10");
+ * if (RedisModule_CallReplyType(reply) == REDISMODULE_REPLY_INTEGER) {
+ * long long myval = RedisModule_CallReplyInteger(reply);
+ * // Do something with myval.
+ * }
+ *
+ * This API is documented here: https://redis.io/topics/modules-intro
+ */
+RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) {
+ client *c = NULL;
+ robj **argv = NULL;
+ int argc = 0, flags = 0;
+ va_list ap;
+ RedisModuleCallReply *reply = NULL;
+ int replicate = 0; /* Replicate this command? */
+ int error_as_call_replies = 0; /* return errors as RedisModuleCallReply object */
+ uint64_t cmd_flags;
+
+ /* Handle arguments. */
+ va_start(ap, fmt);
+ argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
+ replicate = flags & REDISMODULE_ARGV_REPLICATE;
+ error_as_call_replies = flags & REDISMODULE_ARGV_CALL_REPLIES_AS_ERRORS;
+ va_end(ap);
+
+ c = moduleAllocTempClient();
+
+ if (!(flags & REDISMODULE_ARGV_ALLOW_BLOCK)) {
+ /* We do not want to allow block, the module do not expect it */
+ c->flags |= CLIENT_DENY_BLOCKING;
+ }
+ c->db = ctx->client->db;
+ c->argv = argv;
+ /* We have to assign argv_len, which is equal to argc in that case (RM_Call)
+ * because we may be calling a command that uses rewriteClientCommandArgument */
+ c->argc = c->argv_len = argc;
+ c->resp = 2;
+ if (flags & REDISMODULE_ARGV_RESP_3) {
+ c->resp = 3;
+ } else if (flags & REDISMODULE_ARGV_RESP_AUTO) {
+ /* Auto mode means to take the same protocol as the ctx client. */
+ c->resp = ctx->client->resp;
+ }
+ if (ctx->module) ctx->module->in_call++;
+
+ user *user = NULL;
+ if (flags & REDISMODULE_ARGV_RUN_AS_USER) {
+ user = ctx->user ? ctx->user->user : ctx->client->user;
+ if (!user) {
+ errno = ENOTSUP;
+ if (error_as_call_replies) {
+ sds msg = sdsnew("cannot run as user, no user directly attached to context or context's client");
+ reply = callReplyCreateError(msg, ctx);
+ }
+ goto cleanup;
+ }
+ c->user = user;
+ }
+
+ /* We handle the above format error only when the client is setup so that
+ * we can free it normally. */
+ if (argv == NULL) {
+ /* We do not return a call reply here this is an error that should only
+ * be catch by the module indicating wrong fmt was given, the module should
+ * handle this error and decide how to continue. It is not an error that
+ * should be propagated to the user. */
+ errno = EBADF;
+ goto cleanup;
+ }
+
+ /* Call command filters */
+ moduleCallCommandFilters(c);
+
+ /* Lookup command now, after filters had a chance to make modifications
+ * if necessary.
+ */
+ c->cmd = c->lastcmd = c->realcmd = lookupCommand(c->argv,c->argc);
+ sds err;
+ if (!commandCheckExistence(c, error_as_call_replies? &err : NULL)) {
+ errno = ENOENT;
+ if (error_as_call_replies)
+ reply = callReplyCreateError(err, ctx);
+ goto cleanup;
+ }
+ if (!commandCheckArity(c, error_as_call_replies? &err : NULL)) {
+ errno = EINVAL;
+ if (error_as_call_replies)
+ reply = callReplyCreateError(err, ctx);
+ goto cleanup;
+ }
+
+ cmd_flags = getCommandFlags(c);
+
+ if (flags & REDISMODULE_ARGV_SCRIPT_MODE) {
+ /* Basically on script mode we want to only allow commands that can
+ * be executed on scripts (CMD_NOSCRIPT is not set on the command flags) */
+ if (cmd_flags & CMD_NOSCRIPT) {
+ errno = ESPIPE;
+ if (error_as_call_replies) {
+ sds msg = sdscatfmt(sdsempty(), "command '%S' is not allowed on script mode", c->cmd->fullname);
+ reply = callReplyCreateError(msg, ctx);
+ }
+ goto cleanup;
+ }
+ }
+
+ if (flags & REDISMODULE_ARGV_RESPECT_DENY_OOM && server.maxmemory) {
+ if (cmd_flags & CMD_DENYOOM) {
+ int oom_state;
+ if (ctx->flags & REDISMODULE_CTX_THREAD_SAFE) {
+ /* On background thread we can not count on server.pre_command_oom_state.
+ * Because it is only set on the main thread, in such case we will check
+ * the actual memory usage. */
+ oom_state = (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_ERR);
+ } else {
+ oom_state = server.pre_command_oom_state;
+ }
+ if (oom_state) {
+ errno = ENOSPC;
+ if (error_as_call_replies) {
+ sds msg = sdsdup(shared.oomerr->ptr);
+ reply = callReplyCreateError(msg, ctx);
+ }
+ goto cleanup;
+ }
+ }
+ } else {
+ /* if we aren't OOM checking in RM_Call, we want further executions from this client to also not fail on OOM */
+ c->flags |= CLIENT_ALLOW_OOM;
+ }
+
+ if (flags & REDISMODULE_ARGV_NO_WRITES) {
+ if (cmd_flags & CMD_WRITE) {
+ errno = ENOSPC;
+ if (error_as_call_replies) {
+ sds msg = sdscatfmt(sdsempty(), "Write command '%S' was "
+ "called while write is not allowed.", c->cmd->fullname);
+ reply = callReplyCreateError(msg, ctx);
+ }
+ goto cleanup;
+ }
+ }
+
+ /* Script mode tests */
+ if (flags & REDISMODULE_ARGV_SCRIPT_MODE) {
+ if (cmd_flags & CMD_WRITE) {
+ /* on script mode, if a command is a write command,
+ * We will not run it if we encounter disk error
+ * or we do not have enough replicas */
+
+ if (!checkGoodReplicasStatus()) {
+ errno = ESPIPE;
+ if (error_as_call_replies) {
+ sds msg = sdsdup(shared.noreplicaserr->ptr);
+ reply = callReplyCreateError(msg, ctx);
+ }
+ goto cleanup;
+ }
+
+ int deny_write_type = writeCommandsDeniedByDiskError();
+ int obey_client = (server.current_client && mustObeyClient(server.current_client));
+
+ if (deny_write_type != DISK_ERROR_TYPE_NONE && !obey_client) {
+ errno = ESPIPE;
+ if (error_as_call_replies) {
+ sds msg = writeCommandsGetDiskErrorMessage(deny_write_type);
+ reply = callReplyCreateError(msg, ctx);
+ }
+ goto cleanup;
+ }
+
+ if (server.masterhost && server.repl_slave_ro && !obey_client) {
+ errno = ESPIPE;
+ if (error_as_call_replies) {
+ sds msg = sdsdup(shared.roslaveerr->ptr);
+ reply = callReplyCreateError(msg, ctx);
+ }
+ goto cleanup;
+ }
+ }
+
+ if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED &&
+ server.repl_serve_stale_data == 0 && !(cmd_flags & CMD_STALE)) {
+ errno = ESPIPE;
+ if (error_as_call_replies) {
+ sds msg = sdsdup(shared.masterdownerr->ptr);
+ reply = callReplyCreateError(msg, ctx);
+ }
+ goto cleanup;
+ }
+ }
+
+ /* Check if the user can run this command according to the current
+ * ACLs.
+ *
+ * If RM_SetContextUser has set a user, that user is used, otherwise
+ * use the attached client's user. If there is no attached client user and no manually
+ * set user, an error will be returned */
+ if (flags & REDISMODULE_ARGV_RUN_AS_USER) {
+ int acl_errpos;
+ int acl_retval;
+
+ acl_retval = ACLCheckAllUserCommandPerm(user,c->cmd,c->argv,c->argc,&acl_errpos);
+ if (acl_retval != ACL_OK) {
+ sds object = (acl_retval == ACL_DENIED_CMD) ? sdsdup(c->cmd->fullname) : sdsdup(c->argv[acl_errpos]->ptr);
+ addACLLogEntry(ctx->client, acl_retval, ACL_LOG_CTX_MODULE, -1, c->user->name, object);
+ if (error_as_call_replies) {
+ /* verbosity should be same as processCommand() in server.c */
+ sds acl_msg = getAclErrorMessage(acl_retval, c->user, c->cmd, c->argv[acl_errpos]->ptr, 0);
+ sds msg = sdscatfmt(sdsempty(), "-NOPERM %S\r\n", acl_msg);
+ sdsfree(acl_msg);
+ reply = callReplyCreateError(msg, ctx);
+ }
+ errno = EACCES;
+ goto cleanup;
+ }
+ }
+
+ /* If this is a Redis Cluster node, we need to make sure the module is not
+ * trying to access non-local keys, with the exception of commands
+ * received from our master. */
+ if (server.cluster_enabled && !mustObeyClient(ctx->client)) {
+ int error_code;
+ /* Duplicate relevant flags in the module client. */
+ c->flags &= ~(CLIENT_READONLY|CLIENT_ASKING);
+ c->flags |= ctx->client->flags & (CLIENT_READONLY|CLIENT_ASKING);
+ if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,&error_code) !=
+ server.cluster->myself)
+ {
+ sds msg = NULL;
+ if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
+ if (error_as_call_replies) {
+ msg = sdscatfmt(sdsempty(), "Can not execute a write command '%S' while the cluster is down and readonly", c->cmd->fullname);
+ }
+ errno = EROFS;
+ } else if (error_code == CLUSTER_REDIR_DOWN_STATE) {
+ if (error_as_call_replies) {
+ msg = sdscatfmt(sdsempty(), "Can not execute a command '%S' while the cluster is down", c->cmd->fullname);
+ }
+ errno = ENETDOWN;
+ } else {
+ if (error_as_call_replies) {
+ msg = sdsnew("Attempted to access a non local key in a cluster node");
+ }
+ errno = EPERM;
+ }
+ if (msg) {
+ reply = callReplyCreateError(msg, ctx);
+ }
+ goto cleanup;
+ }
+ }
+
+ if (flags & REDISMODULE_ARGV_DRY_RUN) {
+ goto cleanup;
+ }
+
+ /* We need to use a global replication_allowed flag in order to prevent
+ * replication of nested RM_Calls. Example:
+ * 1. module1.foo does RM_Call of module2.bar without replication (i.e. no '!')
+ * 2. module2.bar internally calls RM_Call of INCR with '!'
+ * 3. at the end of module1.foo we call RM_ReplicateVerbatim
+ * We want the replica/AOF to see only module1.foo and not the INCR from module2.bar */
+ int prev_replication_allowed = server.replication_allowed;
+ server.replication_allowed = replicate && server.replication_allowed;
+
+ /* Run the command */
+ int call_flags = CMD_CALL_FROM_MODULE;
+ if (replicate) {
+ if (!(flags & REDISMODULE_ARGV_NO_AOF))
+ call_flags |= CMD_CALL_PROPAGATE_AOF;
+ if (!(flags & REDISMODULE_ARGV_NO_REPLICAS))
+ call_flags |= CMD_CALL_PROPAGATE_REPL;
+ }
+ call(c,call_flags);
+ server.replication_allowed = prev_replication_allowed;
+
+ if (c->flags & CLIENT_BLOCKED) {
+ serverAssert(flags & REDISMODULE_ARGV_ALLOW_BLOCK);
+ serverAssert(ctx->module);
+ RedisModuleAsyncRMCallPromise *promise = zmalloc(sizeof(RedisModuleAsyncRMCallPromise));
+ *promise = (RedisModuleAsyncRMCallPromise) {
+ /* We start with ref_count value of 2 because this object is held
+ * by the promise CallReply and the fake client that was used to execute the command. */
+ .ref_count = 2,
+ .module = ctx->module,
+ .on_unblocked = NULL,
+ .private_data = NULL,
+ .c = c,
+ .ctx = (ctx->flags & REDISMODULE_CTX_AUTO_MEMORY) ? ctx : NULL,
+ };
+ reply = callReplyCreatePromise(promise);
+ c->bstate.async_rm_call_handle = promise;
+ if (!(call_flags & CMD_CALL_PROPAGATE_AOF)) {
+ /* No need for AOF propagation, set the relevant flags of the client */
+ c->flags |= CLIENT_MODULE_PREVENT_AOF_PROP;
+ }
+ if (!(call_flags & CMD_CALL_PROPAGATE_REPL)) {
+ /* No need for replication propagation, set the relevant flags of the client */
+ c->flags |= CLIENT_MODULE_PREVENT_REPL_PROP;
+ }
+ c = NULL; /* Make sure not to free the client */
+ } else {
+ reply = moduleParseReply(c, (ctx->flags & REDISMODULE_CTX_AUTO_MEMORY) ? ctx : NULL);
+ }
+
+cleanup:
+ if (reply) autoMemoryAdd(ctx,REDISMODULE_AM_REPLY,reply);
+ if (ctx->module) ctx->module->in_call--;
+ if (c) moduleReleaseTempClient(c);
+ return reply;
+}
+
+/* Return a pointer, and a length, to the protocol returned by the command
+ * that returned the reply object. */
+const char *RM_CallReplyProto(RedisModuleCallReply *reply, size_t *len) {
+ return callReplyGetProto(reply, len);
+}
+
+/* --------------------------------------------------------------------------
+ * ## Modules data types
+ *
+ * When String DMA or using existing data structures is not enough, it is
+ * possible to create new data types from scratch and export them to
+ * Redis. The module must provide a set of callbacks for handling the
+ * new values exported (for example in order to provide RDB saving/loading,
+ * AOF rewrite, and so forth). In this section we define this API.
+ * -------------------------------------------------------------------------- */
+
+/* Turn a 9 chars name in the specified charset and a 10 bit encver into
+ * a single 64 bit unsigned integer that represents this exact module name
+ * and version. This final number is called a "type ID" and is used when
+ * writing module exported values to RDB files, in order to re-associate the
+ * value to the right module to load them during RDB loading.
+ *
+ * If the string is not of the right length or the charset is wrong, or
+ * if encver is outside the unsigned 10 bit integer range, 0 is returned,
+ * otherwise the function returns the right type ID.
+ *
+ * The resulting 64 bit integer is composed as follows:
+ *
+ * (high order bits) 6|6|6|6|6|6|6|6|6|10 (low order bits)
+ *
+ * The first 6 bits value is the first character, name[0], while the last
+ * 6 bits value, immediately before the 10 bits integer, is name[8].
+ * The last 10 bits are the encoding version.
+ *
+ * Note that a name and encver combo of "AAAAAAAAA" and 0, will produce
+ * zero as return value, that is the same we use to signal errors, thus
+ * this combination is invalid, and also useless since type names should
+ * try to be vary to avoid collisions. */
+
+const char *ModuleTypeNameCharSet =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789-_";
+
+uint64_t moduleTypeEncodeId(const char *name, int encver) {
+ /* We use 64 symbols so that we can map each character into 6 bits
+ * of the final output. */
+ const char *cset = ModuleTypeNameCharSet;
+ if (strlen(name) != 9) return 0;
+ if (encver < 0 || encver > 1023) return 0;
+
+ uint64_t id = 0;
+ for (int j = 0; j < 9; j++) {
+ char *p = strchr(cset,name[j]);
+ if (!p) return 0;
+ unsigned long pos = p-cset;
+ id = (id << 6) | pos;
+ }
+ id = (id << 10) | encver;
+ return id;
+}
+
+/* Search, in the list of exported data types of all the modules registered,
+ * a type with the same name as the one given. Returns the moduleType
+ * structure pointer if such a module is found, or NULL otherwise. */
+moduleType *moduleTypeLookupModuleByNameInternal(const char *name, int ignore_case) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ listIter li;
+ listNode *ln;
+
+ listRewind(module->types,&li);
+ while((ln = listNext(&li))) {
+ moduleType *mt = ln->value;
+ if ((!ignore_case && memcmp(name,mt->name,sizeof(mt->name)) == 0)
+ || (ignore_case && !strcasecmp(name, mt->name)))
+ {
+ dictReleaseIterator(di);
+ return mt;
+ }
+ }
+ }
+ dictReleaseIterator(di);
+ return NULL;
+}
+/* Search all registered modules by name, and name is case sensitive */
+moduleType *moduleTypeLookupModuleByName(const char *name) {
+ return moduleTypeLookupModuleByNameInternal(name, 0);
+}
+
+/* Search all registered modules by name, but case insensitive */
+moduleType *moduleTypeLookupModuleByNameIgnoreCase(const char *name) {
+ return moduleTypeLookupModuleByNameInternal(name, 1);
+}
+
+/* Lookup a module by ID, with caching. This function is used during RDB
+ * loading. Modules exporting data types should never be able to unload, so
+ * our cache does not need to expire. */
+#define MODULE_LOOKUP_CACHE_SIZE 3
+
+moduleType *moduleTypeLookupModuleByID(uint64_t id) {
+ static struct {
+ uint64_t id;
+ moduleType *mt;
+ } cache[MODULE_LOOKUP_CACHE_SIZE];
+
+ /* Search in cache to start. */
+ int j;
+ for (j = 0; j < MODULE_LOOKUP_CACHE_SIZE && cache[j].mt != NULL; j++)
+ if (cache[j].id == id) return cache[j].mt;
+
+ /* Slow module by module lookup. */
+ moduleType *mt = NULL;
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL && mt == NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ listIter li;
+ listNode *ln;
+
+ listRewind(module->types,&li);
+ while((ln = listNext(&li))) {
+ moduleType *this_mt = ln->value;
+ /* Compare only the 54 bit module identifier and not the
+ * encoding version. */
+ if (this_mt->id >> 10 == id >> 10) {
+ mt = this_mt;
+ break;
+ }
+ }
+ }
+ dictReleaseIterator(di);
+
+ /* Add to cache if possible. */
+ if (mt && j < MODULE_LOOKUP_CACHE_SIZE) {
+ cache[j].id = id;
+ cache[j].mt = mt;
+ }
+ return mt;
+}
+
+/* Turn an (unresolved) module ID into a type name, to show the user an
+ * error when RDB files contain module data we can't load.
+ * The buffer pointed by 'name' must be 10 bytes at least. The function will
+ * fill it with a null terminated module name. */
+void moduleTypeNameByID(char *name, uint64_t moduleid) {
+ const char *cset = ModuleTypeNameCharSet;
+
+ name[9] = '\0';
+ char *p = name+8;
+ moduleid >>= 10;
+ for (int j = 0; j < 9; j++) {
+ *p-- = cset[moduleid & 63];
+ moduleid >>= 6;
+ }
+}
+
+/* Return the name of the module that owns the specified moduleType. */
+const char *moduleTypeModuleName(moduleType *mt) {
+ if (!mt || !mt->module) return NULL;
+ return mt->module->name;
+}
+
+/* Return the module name from a module command */
+const char *moduleNameFromCommand(struct redisCommand *cmd) {
+ serverAssert(cmd->proc == RedisModuleCommandDispatcher);
+
+ RedisModuleCommand *cp = cmd->module_cmd;
+ return cp->module->name;
+}
+
+/* Create a copy of a module type value using the copy callback. If failed
+ * or not supported, produce an error reply and return NULL.
+ */
+robj *moduleTypeDupOrReply(client *c, robj *fromkey, robj *tokey, int todb, robj *value) {
+ moduleValue *mv = value->ptr;
+ moduleType *mt = mv->type;
+ if (!mt->copy && !mt->copy2) {
+ addReplyError(c, "not supported for this module key");
+ return NULL;
+ }
+ void *newval = NULL;
+ if (mt->copy2 != NULL) {
+ RedisModuleKeyOptCtx ctx = {fromkey, tokey, c->db->id, todb};
+ newval = mt->copy2(&ctx, mv->value);
+ } else {
+ newval = mt->copy(fromkey, tokey, mv->value);
+ }
+
+ if (!newval) {
+ addReplyError(c, "module key failed to copy");
+ return NULL;
+ }
+ return createModuleObject(mt, newval);
+}
+
+/* Register a new data type exported by the module. The parameters are the
+ * following. Please for in depth documentation check the modules API
+ * documentation, especially https://redis.io/topics/modules-native-types.
+ *
+ * * **name**: A 9 characters data type name that MUST be unique in the Redis
+ * Modules ecosystem. Be creative... and there will be no collisions. Use
+ * the charset A-Z a-z 9-0, plus the two "-_" characters. A good
+ * idea is to use, for example `<typename>-<vendor>`. For example
+ * "tree-AntZ" may mean "Tree data structure by @antirez". To use both
+ * lower case and upper case letters helps in order to prevent collisions.
+ * * **encver**: Encoding version, which is, the version of the serialization
+ * that a module used in order to persist data. As long as the "name"
+ * matches, the RDB loading will be dispatched to the type callbacks
+ * whatever 'encver' is used, however the module can understand if
+ * the encoding it must load are of an older version of the module.
+ * For example the module "tree-AntZ" initially used encver=0. Later
+ * after an upgrade, it started to serialize data in a different format
+ * and to register the type with encver=1. However this module may
+ * still load old data produced by an older version if the rdb_load
+ * callback is able to check the encver value and act accordingly.
+ * The encver must be a positive value between 0 and 1023.
+ *
+ * * **typemethods_ptr** is a pointer to a RedisModuleTypeMethods structure
+ * that should be populated with the methods callbacks and structure
+ * version, like in the following example:
+ *
+ * RedisModuleTypeMethods tm = {
+ * .version = REDISMODULE_TYPE_METHOD_VERSION,
+ * .rdb_load = myType_RDBLoadCallBack,
+ * .rdb_save = myType_RDBSaveCallBack,
+ * .aof_rewrite = myType_AOFRewriteCallBack,
+ * .free = myType_FreeCallBack,
+ *
+ * // Optional fields
+ * .digest = myType_DigestCallBack,
+ * .mem_usage = myType_MemUsageCallBack,
+ * .aux_load = myType_AuxRDBLoadCallBack,
+ * .aux_save = myType_AuxRDBSaveCallBack,
+ * .free_effort = myType_FreeEffortCallBack,
+ * .unlink = myType_UnlinkCallBack,
+ * .copy = myType_CopyCallback,
+ * .defrag = myType_DefragCallback
+ *
+ * // Enhanced optional fields
+ * .mem_usage2 = myType_MemUsageCallBack2,
+ * .free_effort2 = myType_FreeEffortCallBack2,
+ * .unlink2 = myType_UnlinkCallBack2,
+ * .copy2 = myType_CopyCallback2,
+ * }
+ *
+ * * **rdb_load**: A callback function pointer that loads data from RDB files.
+ * * **rdb_save**: A callback function pointer that saves data to RDB files.
+ * * **aof_rewrite**: A callback function pointer that rewrites data as commands.
+ * * **digest**: A callback function pointer that is used for `DEBUG DIGEST`.
+ * * **free**: A callback function pointer that can free a type value.
+ * * **aux_save**: A callback function pointer that saves out of keyspace data to RDB files.
+ * 'when' argument is either REDISMODULE_AUX_BEFORE_RDB or REDISMODULE_AUX_AFTER_RDB.
+ * * **aux_load**: A callback function pointer that loads out of keyspace data from RDB files.
+ * Similar to aux_save, returns REDISMODULE_OK on success, and ERR otherwise.
+ * * **free_effort**: A callback function pointer that used to determine whether the module's
+ * memory needs to be lazy reclaimed. The module should return the complexity involved by
+ * freeing the value. for example: how many pointers are gonna be freed. Note that if it
+ * returns 0, we'll always do an async free.
+ * * **unlink**: A callback function pointer that used to notifies the module that the key has
+ * been removed from the DB by redis, and may soon be freed by a background thread. Note that
+ * it won't be called on FLUSHALL/FLUSHDB (both sync and async), and the module can use the
+ * RedisModuleEvent_FlushDB to hook into that.
+ * * **copy**: A callback function pointer that is used to make a copy of the specified key.
+ * The module is expected to perform a deep copy of the specified value and return it.
+ * In addition, hints about the names of the source and destination keys is provided.
+ * A NULL return value is considered an error and the copy operation fails.
+ * Note: if the target key exists and is being overwritten, the copy callback will be
+ * called first, followed by a free callback to the value that is being replaced.
+ *
+ * * **defrag**: A callback function pointer that is used to request the module to defrag
+ * a key. The module should then iterate pointers and call the relevant RM_Defrag*()
+ * functions to defragment pointers or complex types. The module should continue
+ * iterating as long as RM_DefragShouldStop() returns a zero value, and return a
+ * zero value if finished or non-zero value if more work is left to be done. If more work
+ * needs to be done, RM_DefragCursorSet() and RM_DefragCursorGet() can be used to track
+ * this work across different calls.
+ * Normally, the defrag mechanism invokes the callback without a time limit, so
+ * RM_DefragShouldStop() always returns zero. The "late defrag" mechanism which has
+ * a time limit and provides cursor support is used only for keys that are determined
+ * to have significant internal complexity. To determine this, the defrag mechanism
+ * uses the free_effort callback and the 'active-defrag-max-scan-fields' config directive.
+ * NOTE: The value is passed as a `void**` and the function is expected to update the
+ * pointer if the top-level value pointer is defragmented and consequently changes.
+ *
+ * * **mem_usage2**: Similar to `mem_usage`, but provides the `RedisModuleKeyOptCtx` parameter
+ * so that meta information such as key name and db id can be obtained, and
+ * the `sample_size` for size estimation (see MEMORY USAGE command).
+ * * **free_effort2**: Similar to `free_effort`, but provides the `RedisModuleKeyOptCtx` parameter
+ * so that meta information such as key name and db id can be obtained.
+ * * **unlink2**: Similar to `unlink`, but provides the `RedisModuleKeyOptCtx` parameter
+ * so that meta information such as key name and db id can be obtained.
+ * * **copy2**: Similar to `copy`, but provides the `RedisModuleKeyOptCtx` parameter
+ * so that meta information such as key names and db ids can be obtained.
+ * * **aux_save2**: Similar to `aux_save`, but with small semantic change, if the module
+ * saves nothing on this callback then no data about this aux field will be written to the
+ * RDB and it will be possible to load the RDB even if the module is not loaded.
+ *
+ * Note: the module name "AAAAAAAAA" is reserved and produces an error, it
+ * happens to be pretty lame as well.
+ *
+ * If RedisModule_CreateDataType() is called outside of RedisModule_OnLoad() function,
+ * there is already a module registering a type with the same name,
+ * or if the module name or encver is invalid, NULL is returned.
+ * Otherwise the new type is registered into Redis, and a reference of
+ * type RedisModuleType is returned: the caller of the function should store
+ * this reference into a global variable to make future use of it in the
+ * modules type API, since a single module may register multiple types.
+ * Example code fragment:
+ *
+ * static RedisModuleType *BalancedTreeType;
+ *
+ * int RedisModule_OnLoad(RedisModuleCtx *ctx) {
+ * // some code here ...
+ * BalancedTreeType = RM_CreateDataType(...);
+ * }
+ */
+moduleType *RM_CreateDataType(RedisModuleCtx *ctx, const char *name, int encver, void *typemethods_ptr) {
+ if (!ctx->module->onload)
+ return NULL;
+ uint64_t id = moduleTypeEncodeId(name,encver);
+ if (id == 0) return NULL;
+ if (moduleTypeLookupModuleByName(name) != NULL) return NULL;
+
+ long typemethods_version = ((long*)typemethods_ptr)[0];
+ if (typemethods_version == 0) return NULL;
+
+ struct typemethods {
+ uint64_t version;
+ moduleTypeLoadFunc rdb_load;
+ moduleTypeSaveFunc rdb_save;
+ moduleTypeRewriteFunc aof_rewrite;
+ moduleTypeMemUsageFunc mem_usage;
+ moduleTypeDigestFunc digest;
+ moduleTypeFreeFunc free;
+ struct {
+ moduleTypeAuxLoadFunc aux_load;
+ moduleTypeAuxSaveFunc aux_save;
+ int aux_save_triggers;
+ } v2;
+ struct {
+ moduleTypeFreeEffortFunc free_effort;
+ moduleTypeUnlinkFunc unlink;
+ moduleTypeCopyFunc copy;
+ moduleTypeDefragFunc defrag;
+ } v3;
+ struct {
+ moduleTypeMemUsageFunc2 mem_usage2;
+ moduleTypeFreeEffortFunc2 free_effort2;
+ moduleTypeUnlinkFunc2 unlink2;
+ moduleTypeCopyFunc2 copy2;
+ } v4;
+ struct {
+ moduleTypeAuxSaveFunc aux_save2;
+ } v5;
+ } *tms = (struct typemethods*) typemethods_ptr;
+
+ moduleType *mt = zcalloc(sizeof(*mt));
+ mt->id = id;
+ mt->module = ctx->module;
+ mt->rdb_load = tms->rdb_load;
+ mt->rdb_save = tms->rdb_save;
+ mt->aof_rewrite = tms->aof_rewrite;
+ mt->mem_usage = tms->mem_usage;
+ mt->digest = tms->digest;
+ mt->free = tms->free;
+ if (tms->version >= 2) {
+ mt->aux_load = tms->v2.aux_load;
+ mt->aux_save = tms->v2.aux_save;
+ mt->aux_save_triggers = tms->v2.aux_save_triggers;
+ }
+ if (tms->version >= 3) {
+ mt->free_effort = tms->v3.free_effort;
+ mt->unlink = tms->v3.unlink;
+ mt->copy = tms->v3.copy;
+ mt->defrag = tms->v3.defrag;
+ }
+ if (tms->version >= 4) {
+ mt->mem_usage2 = tms->v4.mem_usage2;
+ mt->unlink2 = tms->v4.unlink2;
+ mt->free_effort2 = tms->v4.free_effort2;
+ mt->copy2 = tms->v4.copy2;
+ }
+ if (tms->version >= 5) {
+ mt->aux_save2 = tms->v5.aux_save2;
+ }
+ memcpy(mt->name,name,sizeof(mt->name));
+ listAddNodeTail(ctx->module->types,mt);
+ return mt;
+}
+
+/* If the key is open for writing, set the specified module type object
+ * as the value of the key, deleting the old value if any.
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing or there is an active iterator, REDISMODULE_ERR is returned. */
+int RM_ModuleTypeSetValue(RedisModuleKey *key, moduleType *mt, void *value) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->iter) return REDISMODULE_ERR;
+ RM_DeleteKey(key);
+ robj *o = createModuleObject(mt,value);
+ setKey(key->ctx->client,key->db,key->key,o,SETKEY_NO_SIGNAL);
+ decrRefCount(o);
+ key->value = o;
+ return REDISMODULE_OK;
+}
+
+/* Assuming RedisModule_KeyType() returned REDISMODULE_KEYTYPE_MODULE on
+ * the key, returns the module type pointer of the value stored at key.
+ *
+ * If the key is NULL, is not associated with a module type, or is empty,
+ * then NULL is returned instead. */
+moduleType *RM_ModuleTypeGetType(RedisModuleKey *key) {
+ if (key == NULL ||
+ key->value == NULL ||
+ RM_KeyType(key) != REDISMODULE_KEYTYPE_MODULE) return NULL;
+ moduleValue *mv = key->value->ptr;
+ return mv->type;
+}
+
+/* Assuming RedisModule_KeyType() returned REDISMODULE_KEYTYPE_MODULE on
+ * the key, returns the module type low-level value stored at key, as
+ * it was set by the user via RedisModule_ModuleTypeSetValue().
+ *
+ * If the key is NULL, is not associated with a module type, or is empty,
+ * then NULL is returned instead. */
+void *RM_ModuleTypeGetValue(RedisModuleKey *key) {
+ if (key == NULL ||
+ key->value == NULL ||
+ RM_KeyType(key) != REDISMODULE_KEYTYPE_MODULE) return NULL;
+ moduleValue *mv = key->value->ptr;
+ return mv->value;
+}
+
+/* --------------------------------------------------------------------------
+ * ## RDB loading and saving functions
+ * -------------------------------------------------------------------------- */
+
+/* Called when there is a load error in the context of a module. On some
+ * modules this cannot be recovered, but if the module declared capability
+ * to handle errors, we'll raise a flag rather than exiting. */
+void moduleRDBLoadError(RedisModuleIO *io) {
+ if (io->type->module->options & REDISMODULE_OPTIONS_HANDLE_IO_ERRORS) {
+ io->error = 1;
+ return;
+ }
+ serverPanic(
+ "Error loading data from RDB (short read or EOF). "
+ "Read performed by module '%s' about type '%s' "
+ "after reading '%llu' bytes of a value "
+ "for key named: '%s'.",
+ io->type->module->name,
+ io->type->name,
+ (unsigned long long)io->bytes,
+ io->key? (char*)io->key->ptr: "(null)");
+}
+
+/* Returns 0 if there's at least one registered data type that did not declare
+ * REDISMODULE_OPTIONS_HANDLE_IO_ERRORS, in which case diskless loading should
+ * be avoided since it could cause data loss. */
+int moduleAllDatatypesHandleErrors(void) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ if (listLength(module->types) &&
+ !(module->options & REDISMODULE_OPTIONS_HANDLE_IO_ERRORS))
+ {
+ dictReleaseIterator(di);
+ return 0;
+ }
+ }
+ dictReleaseIterator(di);
+ return 1;
+}
+
+/* Returns 0 if module did not declare REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD, in which case
+ * diskless async loading should be avoided because module doesn't know there can be traffic during
+ * database full resynchronization. */
+int moduleAllModulesHandleReplAsyncLoad(void) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ if (!(module->options & REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD)) {
+ dictReleaseIterator(di);
+ return 0;
+ }
+ }
+ dictReleaseIterator(di);
+ return 1;
+}
+
+/* Returns true if any previous IO API failed.
+ * for `Load*` APIs the REDISMODULE_OPTIONS_HANDLE_IO_ERRORS flag must be set with
+ * RedisModule_SetModuleOptions first. */
+int RM_IsIOError(RedisModuleIO *io) {
+ return io->error;
+}
+
+static int flushRedisModuleIOBuffer(RedisModuleIO *io) {
+ if (!io->pre_flush_buffer) return 0;
+
+ /* We have data that must be flushed before saving the current data.
+ * Lets flush it. */
+ sds pre_flush_buffer = io->pre_flush_buffer;
+ io->pre_flush_buffer = NULL;
+ ssize_t retval = rdbWriteRaw(io->rio, pre_flush_buffer, sdslen(pre_flush_buffer));
+ sdsfree(pre_flush_buffer);
+ if (retval >= 0) io->bytes += retval;
+ return retval;
+}
+
+/* Save an unsigned 64 bit value into the RDB file. This function should only
+ * be called in the context of the rdb_save method of modules implementing new
+ * data types. */
+void RM_SaveUnsigned(RedisModuleIO *io, uint64_t value) {
+ if (io->error) return;
+ if (flushRedisModuleIOBuffer(io) == -1) goto saveerr;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_UINT);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveLen(io->rio, value);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* Load an unsigned 64 bit value from the RDB file. This function should only
+ * be called in the context of the `rdb_load` method of modules implementing
+ * new data types. */
+uint64_t RM_LoadUnsigned(RedisModuleIO *io) {
+ if (io->error) return 0;
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_UINT) goto loaderr;
+ uint64_t value;
+ int retval = rdbLoadLenByRef(io->rio, NULL, &value);
+ if (retval == -1) goto loaderr;
+ return value;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return 0;
+}
+
+/* Like RedisModule_SaveUnsigned() but for signed 64 bit values. */
+void RM_SaveSigned(RedisModuleIO *io, int64_t value) {
+ union {uint64_t u; int64_t i;} conv;
+ conv.i = value;
+ RM_SaveUnsigned(io,conv.u);
+}
+
+/* Like RedisModule_LoadUnsigned() but for signed 64 bit values. */
+int64_t RM_LoadSigned(RedisModuleIO *io) {
+ union {uint64_t u; int64_t i;} conv;
+ conv.u = RM_LoadUnsigned(io);
+ return conv.i;
+}
+
+/* In the context of the rdb_save method of a module type, saves a
+ * string into the RDB file taking as input a RedisModuleString.
+ *
+ * The string can be later loaded with RedisModule_LoadString() or
+ * other Load family functions expecting a serialized string inside
+ * the RDB file. */
+void RM_SaveString(RedisModuleIO *io, RedisModuleString *s) {
+ if (io->error) return;
+ if (flushRedisModuleIOBuffer(io) == -1) goto saveerr;
+ /* Save opcode. */
+ ssize_t retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveStringObject(io->rio, s);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* Like RedisModule_SaveString() but takes a raw C pointer and length
+ * as input. */
+void RM_SaveStringBuffer(RedisModuleIO *io, const char *str, size_t len) {
+ if (io->error) return;
+ if (flushRedisModuleIOBuffer(io) == -1) goto saveerr;
+ /* Save opcode. */
+ ssize_t retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveRawString(io->rio, (unsigned char*)str,len);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* Implements RM_LoadString() and RM_LoadStringBuffer() */
+void *moduleLoadString(RedisModuleIO *io, int plain, size_t *lenptr) {
+ if (io->error) return NULL;
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_STRING) goto loaderr;
+ void *s = rdbGenericLoadStringObject(io->rio,
+ plain ? RDB_LOAD_PLAIN : RDB_LOAD_NONE, lenptr);
+ if (s == NULL) goto loaderr;
+ return s;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return NULL;
+}
+
+/* In the context of the rdb_load method of a module data type, loads a string
+ * from the RDB file, that was previously saved with RedisModule_SaveString()
+ * functions family.
+ *
+ * The returned string is a newly allocated RedisModuleString object, and
+ * the user should at some point free it with a call to RedisModule_FreeString().
+ *
+ * If the data structure does not store strings as RedisModuleString objects,
+ * the similar function RedisModule_LoadStringBuffer() could be used instead. */
+RedisModuleString *RM_LoadString(RedisModuleIO *io) {
+ return moduleLoadString(io,0,NULL);
+}
+
+/* Like RedisModule_LoadString() but returns a heap allocated string that
+ * was allocated with RedisModule_Alloc(), and can be resized or freed with
+ * RedisModule_Realloc() or RedisModule_Free().
+ *
+ * The size of the string is stored at '*lenptr' if not NULL.
+ * The returned string is not automatically NULL terminated, it is loaded
+ * exactly as it was stored inside the RDB file. */
+char *RM_LoadStringBuffer(RedisModuleIO *io, size_t *lenptr) {
+ return moduleLoadString(io,1,lenptr);
+}
+
+/* In the context of the rdb_save method of a module data type, saves a double
+ * value to the RDB file. The double can be a valid number, a NaN or infinity.
+ * It is possible to load back the value with RedisModule_LoadDouble(). */
+void RM_SaveDouble(RedisModuleIO *io, double value) {
+ if (io->error) return;
+ if (flushRedisModuleIOBuffer(io) == -1) goto saveerr;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_DOUBLE);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveBinaryDoubleValue(io->rio, value);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* In the context of the rdb_save method of a module data type, loads back the
+ * double value saved by RedisModule_SaveDouble(). */
+double RM_LoadDouble(RedisModuleIO *io) {
+ if (io->error) return 0;
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_DOUBLE) goto loaderr;
+ double value;
+ int retval = rdbLoadBinaryDoubleValue(io->rio, &value);
+ if (retval == -1) goto loaderr;
+ return value;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return 0;
+}
+
+/* In the context of the rdb_save method of a module data type, saves a float
+ * value to the RDB file. The float can be a valid number, a NaN or infinity.
+ * It is possible to load back the value with RedisModule_LoadFloat(). */
+void RM_SaveFloat(RedisModuleIO *io, float value) {
+ if (io->error) return;
+ if (flushRedisModuleIOBuffer(io) == -1) goto saveerr;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_FLOAT);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveBinaryFloatValue(io->rio, value);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* In the context of the rdb_save method of a module data type, loads back the
+ * float value saved by RedisModule_SaveFloat(). */
+float RM_LoadFloat(RedisModuleIO *io) {
+ if (io->error) return 0;
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_FLOAT) goto loaderr;
+ float value;
+ int retval = rdbLoadBinaryFloatValue(io->rio, &value);
+ if (retval == -1) goto loaderr;
+ return value;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return 0;
+}
+
+/* In the context of the rdb_save method of a module data type, saves a long double
+ * value to the RDB file. The double can be a valid number, a NaN or infinity.
+ * It is possible to load back the value with RedisModule_LoadLongDouble(). */
+void RM_SaveLongDouble(RedisModuleIO *io, long double value) {
+ if (io->error) return;
+ char buf[MAX_LONG_DOUBLE_CHARS];
+ /* Long double has different number of bits in different platforms, so we
+ * save it as a string type. */
+ size_t len = ld2string(buf,sizeof(buf),value,LD_STR_HEX);
+ RM_SaveStringBuffer(io,buf,len);
+}
+
+/* In the context of the rdb_save method of a module data type, loads back the
+ * long double value saved by RedisModule_SaveLongDouble(). */
+long double RM_LoadLongDouble(RedisModuleIO *io) {
+ if (io->error) return 0;
+ long double value;
+ size_t len;
+ char* str = RM_LoadStringBuffer(io,&len);
+ if (!str) return 0;
+ string2ld(str,len,&value);
+ RM_Free(str);
+ return value;
+}
+
+/* Iterate over modules, and trigger rdb aux saving for the ones modules types
+ * who asked for it. */
+ssize_t rdbSaveModulesAux(rio *rdb, int when) {
+ size_t total_written = 0;
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ listIter li;
+ listNode *ln;
+
+ listRewind(module->types,&li);
+ while((ln = listNext(&li))) {
+ moduleType *mt = ln->value;
+ if ((!mt->aux_save && !mt->aux_save2) || !(mt->aux_save_triggers & when))
+ continue;
+ ssize_t ret = rdbSaveSingleModuleAux(rdb, when, mt);
+ if (ret==-1) {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ total_written += ret;
+ }
+ }
+
+ dictReleaseIterator(di);
+ return total_written;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Key digest API (DEBUG DIGEST interface for modules types)
+ * -------------------------------------------------------------------------- */
+
+/* Add a new element to the digest. This function can be called multiple times
+ * one element after the other, for all the elements that constitute a given
+ * data structure. The function call must be followed by the call to
+ * `RedisModule_DigestEndSequence` eventually, when all the elements that are
+ * always in a given order are added. See the Redis Modules data types
+ * documentation for more info. However this is a quick example that uses Redis
+ * data types as an example.
+ *
+ * To add a sequence of unordered elements (for example in the case of a Redis
+ * Set), the pattern to use is:
+ *
+ * foreach element {
+ * AddElement(element);
+ * EndSequence();
+ * }
+ *
+ * Because Sets are not ordered, so every element added has a position that
+ * does not depend from the other. However if instead our elements are
+ * ordered in pairs, like field-value pairs of a Hash, then one should
+ * use:
+ *
+ * foreach key,value {
+ * AddElement(key);
+ * AddElement(value);
+ * EndSequence();
+ * }
+ *
+ * Because the key and value will be always in the above order, while instead
+ * the single key-value pairs, can appear in any position into a Redis hash.
+ *
+ * A list of ordered elements would be implemented with:
+ *
+ * foreach element {
+ * AddElement(element);
+ * }
+ * EndSequence();
+ *
+ */
+void RM_DigestAddStringBuffer(RedisModuleDigest *md, const char *ele, size_t len) {
+ mixDigest(md->o,ele,len);
+}
+
+/* Like `RedisModule_DigestAddStringBuffer()` but takes a `long long` as input
+ * that gets converted into a string before adding it to the digest. */
+void RM_DigestAddLongLong(RedisModuleDigest *md, long long ll) {
+ char buf[LONG_STR_SIZE];
+ size_t len = ll2string(buf,sizeof(buf),ll);
+ mixDigest(md->o,buf,len);
+}
+
+/* See the documentation for `RedisModule_DigestAddElement()`. */
+void RM_DigestEndSequence(RedisModuleDigest *md) {
+ xorDigest(md->x,md->o,sizeof(md->o));
+ memset(md->o,0,sizeof(md->o));
+}
+
+/* Decode a serialized representation of a module data type 'mt', in a specific encoding version 'encver'
+ * from string 'str' and return a newly allocated value, or NULL if decoding failed.
+ *
+ * This call basically reuses the 'rdb_load' callback which module data types
+ * implement in order to allow a module to arbitrarily serialize/de-serialize
+ * keys, similar to how the Redis 'DUMP' and 'RESTORE' commands are implemented.
+ *
+ * Modules should generally use the REDISMODULE_OPTIONS_HANDLE_IO_ERRORS flag and
+ * make sure the de-serialization code properly checks and handles IO errors
+ * (freeing allocated buffers and returning a NULL).
+ *
+ * If this is NOT done, Redis will handle corrupted (or just truncated) serialized
+ * data by producing an error message and terminating the process.
+ */
+void *RM_LoadDataTypeFromStringEncver(const RedisModuleString *str, const moduleType *mt, int encver) {
+ rio payload;
+ RedisModuleIO io;
+ void *ret;
+
+ rioInitWithBuffer(&payload, str->ptr);
+ moduleInitIOContext(io,(moduleType *)mt,&payload,NULL,-1);
+
+ /* All RM_Save*() calls always write a version 2 compatible format, so we
+ * need to make sure we read the same.
+ */
+ ret = mt->rdb_load(&io,encver);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ return ret;
+}
+
+/* Similar to RM_LoadDataTypeFromStringEncver, original version of the API, kept
+ * for backward compatibility.
+ */
+void *RM_LoadDataTypeFromString(const RedisModuleString *str, const moduleType *mt) {
+ return RM_LoadDataTypeFromStringEncver(str, mt, 0);
+}
+
+/* Encode a module data type 'mt' value 'data' into serialized form, and return it
+ * as a newly allocated RedisModuleString.
+ *
+ * This call basically reuses the 'rdb_save' callback which module data types
+ * implement in order to allow a module to arbitrarily serialize/de-serialize
+ * keys, similar to how the Redis 'DUMP' and 'RESTORE' commands are implemented.
+ */
+RedisModuleString *RM_SaveDataTypeToString(RedisModuleCtx *ctx, void *data, const moduleType *mt) {
+ rio payload;
+ RedisModuleIO io;
+
+ rioInitWithBuffer(&payload,sdsempty());
+ moduleInitIOContext(io,(moduleType *)mt,&payload,NULL,-1);
+ mt->rdb_save(&io,data);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ if (io.error) {
+ return NULL;
+ } else {
+ robj *str = createObject(OBJ_STRING,payload.io.buffer.ptr);
+ if (ctx != NULL) autoMemoryAdd(ctx,REDISMODULE_AM_STRING,str);
+ return str;
+ }
+}
+
+/* Returns the name of the key currently being processed. */
+const RedisModuleString *RM_GetKeyNameFromDigest(RedisModuleDigest *dig) {
+ return dig->key;
+}
+
+/* Returns the database id of the key currently being processed. */
+int RM_GetDbIdFromDigest(RedisModuleDigest *dig) {
+ return dig->dbid;
+}
+/* --------------------------------------------------------------------------
+ * ## AOF API for modules data types
+ * -------------------------------------------------------------------------- */
+
+/* Emits a command into the AOF during the AOF rewriting process. This function
+ * is only called in the context of the aof_rewrite method of data types exported
+ * by a module. The command works exactly like RedisModule_Call() in the way
+ * the parameters are passed, but it does not return anything as the error
+ * handling is performed by Redis itself. */
+void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) {
+ if (io->error) return;
+ struct redisCommand *cmd;
+ robj **argv = NULL;
+ int argc = 0, flags = 0, j;
+ va_list ap;
+
+ cmd = lookupCommandByCString((char*)cmdname);
+ if (!cmd) {
+ serverLog(LL_WARNING,
+ "Fatal: AOF method for module data type '%s' tried to "
+ "emit unknown command '%s'",
+ io->type->name, cmdname);
+ io->error = 1;
+ errno = EINVAL;
+ return;
+ }
+
+ /* Emit the arguments into the AOF in Redis protocol format. */
+ va_start(ap, fmt);
+ argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
+ va_end(ap);
+ if (argv == NULL) {
+ serverLog(LL_WARNING,
+ "Fatal: AOF method for module data type '%s' tried to "
+ "call RedisModule_EmitAOF() with wrong format specifiers '%s'",
+ io->type->name, fmt);
+ io->error = 1;
+ errno = EINVAL;
+ return;
+ }
+
+ /* Bulk count. */
+ if (!io->error && rioWriteBulkCount(io->rio,'*',argc) == 0)
+ io->error = 1;
+
+ /* Arguments. */
+ for (j = 0; j < argc; j++) {
+ if (!io->error && rioWriteBulkObject(io->rio,argv[j]) == 0)
+ io->error = 1;
+ decrRefCount(argv[j]);
+ }
+ zfree(argv);
+ return;
+}
+
+/* --------------------------------------------------------------------------
+ * ## IO context handling
+ * -------------------------------------------------------------------------- */
+
+RedisModuleCtx *RM_GetContextFromIO(RedisModuleIO *io) {
+ if (io->ctx) return io->ctx; /* Can't have more than one... */
+ io->ctx = zmalloc(sizeof(RedisModuleCtx));
+ moduleCreateContext(io->ctx, io->type->module, REDISMODULE_CTX_NONE);
+ return io->ctx;
+}
+
+/* Returns the name of the key currently being processed.
+ * There is no guarantee that the key name is always available, so this may return NULL.
+ */
+const RedisModuleString *RM_GetKeyNameFromIO(RedisModuleIO *io) {
+ return io->key;
+}
+
+/* Returns a RedisModuleString with the name of the key from RedisModuleKey. */
+const RedisModuleString *RM_GetKeyNameFromModuleKey(RedisModuleKey *key) {
+ return key ? key->key : NULL;
+}
+
+/* Returns a database id of the key from RedisModuleKey. */
+int RM_GetDbIdFromModuleKey(RedisModuleKey *key) {
+ return key ? key->db->id : -1;
+}
+
+/* Returns the database id of the key currently being processed.
+ * There is no guarantee that this info is always available, so this may return -1.
+ */
+int RM_GetDbIdFromIO(RedisModuleIO *io) {
+ return io->dbid;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Logging
+ * -------------------------------------------------------------------------- */
+
+/* This is the low level function implementing both:
+ *
+ * RM_Log()
+ * RM_LogIOError()
+ *
+ */
+void moduleLogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap) {
+ char msg[LOG_MAX_LEN];
+ size_t name_len;
+ int level;
+
+ if (!strcasecmp(levelstr,"debug")) level = LL_DEBUG;
+ else if (!strcasecmp(levelstr,"verbose")) level = LL_VERBOSE;
+ else if (!strcasecmp(levelstr,"notice")) level = LL_NOTICE;
+ else if (!strcasecmp(levelstr,"warning")) level = LL_WARNING;
+ else level = LL_VERBOSE; /* Default. */
+
+ if (level < server.verbosity) return;
+
+ name_len = snprintf(msg, sizeof(msg),"<%s> ", module? module->name: "module");
+ vsnprintf(msg + name_len, sizeof(msg) - name_len, fmt, ap);
+ serverLogRaw(level,msg);
+}
+
+/* Produces a log message to the standard Redis log, the format accepts
+ * printf-alike specifiers, while level is a string describing the log
+ * level to use when emitting the log, and must be one of the following:
+ *
+ * * "debug" (`REDISMODULE_LOGLEVEL_DEBUG`)
+ * * "verbose" (`REDISMODULE_LOGLEVEL_VERBOSE`)
+ * * "notice" (`REDISMODULE_LOGLEVEL_NOTICE`)
+ * * "warning" (`REDISMODULE_LOGLEVEL_WARNING`)
+ *
+ * If the specified log level is invalid, verbose is used by default.
+ * There is a fixed limit to the length of the log line this function is able
+ * to emit, this limit is not specified but is guaranteed to be more than
+ * a few lines of text.
+ *
+ * The ctx argument may be NULL if cannot be provided in the context of the
+ * caller for instance threads or callbacks, in which case a generic "module"
+ * will be used instead of the module name.
+ */
+void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ moduleLogRaw(ctx? ctx->module: NULL,levelstr,fmt,ap);
+ va_end(ap);
+}
+
+/* Log errors from RDB / AOF serialization callbacks.
+ *
+ * This function should be used when a callback is returning a critical
+ * error to the caller since cannot load or save the data for some
+ * critical reason. */
+void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ moduleLogRaw(io->type->module,levelstr,fmt,ap);
+ va_end(ap);
+}
+
+/* Redis-like assert function.
+ *
+ * The macro `RedisModule_Assert(expression)` is recommended, rather than
+ * calling this function directly.
+ *
+ * A failed assertion will shut down the server and produce logging information
+ * that looks identical to information generated by Redis itself.
+ */
+void RM__Assert(const char *estr, const char *file, int line) {
+ _serverAssert(estr, file, line);
+}
+
+/* Allows adding event to the latency monitor to be observed by the LATENCY
+ * command. The call is skipped if the latency is smaller than the configured
+ * latency-monitor-threshold. */
+void RM_LatencyAddSample(const char *event, mstime_t latency) {
+ if (latency >= server.latency_monitor_threshold)
+ latencyAddSample(event, latency);
+}
+
+/* --------------------------------------------------------------------------
+ * ## Blocking clients from modules
+ *
+ * For a guide about blocking commands in modules, see
+ * https://redis.io/topics/modules-blocking-ops.
+ * -------------------------------------------------------------------------- */
+
+/* This is called from blocked.c in order to unblock a client: may be called
+ * for multiple reasons while the client is in the middle of being blocked
+ * because the client is terminated, but is also called for cleanup when a
+ * client is unblocked in a clean way after replaying.
+ *
+ * What we do here is just to set the client to NULL in the redis module
+ * blocked client handle. This way if the client is terminated while there
+ * is a pending threaded operation involving the blocked client, we'll know
+ * that the client no longer exists and no reply callback should be called.
+ *
+ * The structure RedisModuleBlockedClient will be always deallocated when
+ * running the list of clients blocked by a module that need to be unblocked. */
+void unblockClientFromModule(client *c) {
+ RedisModuleBlockedClient *bc = c->bstate.module_blocked_handle;
+
+ /* Call the disconnection callback if any. Note that
+ * bc->disconnect_callback is set to NULL if the client gets disconnected
+ * by the module itself or because of a timeout, so the callback will NOT
+ * get called if this is not an actual disconnection event. */
+ if (bc->disconnect_callback) {
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, bc->module, REDISMODULE_CTX_NONE);
+ ctx.blocked_privdata = bc->privdata;
+ ctx.client = bc->client;
+ bc->disconnect_callback(&ctx,bc);
+ moduleFreeContext(&ctx);
+ }
+
+ /* If we made it here and client is still blocked it means that the command
+ * timed-out, client was killed or disconnected and disconnect_callback was
+ * not implemented (or it was, but RM_UnblockClient was not called from
+ * within it, as it should).
+ * We must call moduleUnblockClient in order to free privdata and
+ * RedisModuleBlockedClient.
+ *
+ * Note that we only do that for clients that are blocked on keys, for which
+ * the contract is that the module should not call RM_UnblockClient under
+ * normal circumstances.
+ * Clients implementing threads and working with private data should be
+ * aware that calling RM_UnblockClient for every blocked client is their
+ * responsibility, and if they fail to do so memory may leak. Ideally they
+ * should implement the disconnect and timeout callbacks and call
+ * RM_UnblockClient, but any other way is also acceptable. */
+ if (bc->blocked_on_keys && !bc->unblocked)
+ moduleUnblockClient(c);
+
+ bc->client = NULL;
+}
+
+/* Block a client in the context of a module: this function implements both
+ * RM_BlockClient() and RM_BlockClientOnKeys() depending on the fact the
+ * keys are passed or not.
+ *
+ * When not blocking for keys, the keys, numkeys, and privdata parameters are
+ * not needed. The privdata in that case must be NULL, since later is
+ * RM_UnblockClient() that will provide some private data that the reply
+ * callback will receive.
+ *
+ * Instead when blocking for keys, normally RM_UnblockClient() will not be
+ * called (because the client will unblock when the key is modified), so
+ * 'privdata' should be provided in that case, so that once the client is
+ * unlocked and the reply callback is called, it will receive its associated
+ * private data.
+ *
+ * Even when blocking on keys, RM_UnblockClient() can be called however, but
+ * in that case the privdata argument is disregarded, because we pass the
+ * reply callback the privdata that is set here while blocking.
+ *
+ */
+RedisModuleBlockedClient *moduleBlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback,
+ RedisModuleAuthCallback auth_reply_callback,
+ RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*),
+ long long timeout_ms, RedisModuleString **keys, int numkeys, void *privdata,
+ int flags) {
+ client *c = ctx->client;
+ int islua = scriptIsRunning();
+ int ismulti = server.in_exec;
+
+ c->bstate.module_blocked_handle = zmalloc(sizeof(RedisModuleBlockedClient));
+ RedisModuleBlockedClient *bc = c->bstate.module_blocked_handle;
+ ctx->module->blocked_clients++;
+
+ /* We need to handle the invalid operation of calling modules blocking
+ * commands from Lua or MULTI. We actually create an already aborted
+ * (client set to NULL) blocked client handle, and actually reply with
+ * an error. */
+ bc->client = (islua || ismulti) ? NULL : c;
+ bc->module = ctx->module;
+ bc->reply_callback = reply_callback;
+ bc->auth_reply_cb = auth_reply_callback;
+ bc->timeout_callback = timeout_callback;
+ bc->disconnect_callback = NULL; /* Set by RM_SetDisconnectCallback() */
+ bc->free_privdata = free_privdata;
+ bc->privdata = privdata;
+ bc->reply_client = moduleAllocTempClient();
+ bc->thread_safe_ctx_client = moduleAllocTempClient();
+ if (bc->client)
+ bc->reply_client->resp = bc->client->resp;
+ bc->dbid = c->db->id;
+ bc->blocked_on_keys = keys != NULL;
+ bc->unblocked = 0;
+ bc->background_timer = 0;
+ bc->background_duration = 0;
+
+ c->bstate.timeout = 0;
+ if (timeout_ms) {
+ mstime_t now = mstime();
+ if (timeout_ms > LLONG_MAX - now) {
+ c->bstate.module_blocked_handle = NULL;
+ addReplyError(c, "timeout is out of range"); /* 'timeout_ms+now' would overflow */
+ return bc;
+ }
+ c->bstate.timeout = timeout_ms + now;
+ }
+
+ if (islua || ismulti) {
+ c->bstate.module_blocked_handle = NULL;
+ addReplyError(c, islua ?
+ "Blocking module command called from Lua script" :
+ "Blocking module command called from transaction");
+ } else if (ctx->flags & REDISMODULE_CTX_BLOCKED_REPLY) {
+ c->bstate.module_blocked_handle = NULL;
+ addReplyError(c, "Blocking module command called from a Reply callback context");
+ } else if (!auth_reply_callback && clientHasModuleAuthInProgress(c)) {
+ c->bstate.module_blocked_handle = NULL;
+ addReplyError(c, "Clients undergoing module based authentication can only be blocked on auth");
+ } else {
+ if (keys) {
+ blockForKeys(c,BLOCKED_MODULE,keys,numkeys,c->bstate.timeout,flags&REDISMODULE_BLOCK_UNBLOCK_DELETED);
+ } else {
+ blockClient(c,BLOCKED_MODULE);
+ }
+ }
+ return bc;
+}
+
+/* This API registers a callback to execute in addition to normal password based authentication.
+ * Multiple callbacks can be registered across different modules. When a Module is unloaded, all the
+ * auth callbacks registered by it are unregistered.
+ * The callbacks are attempted (in the order of most recently registered first) when the AUTH/HELLO
+ * (with AUTH field provided) commands are called.
+ * The callbacks will be called with a module context along with a username and a password, and are
+ * expected to take one of the following actions:
+ * (1) Authenticate - Use the RM_AuthenticateClient* API and return REDISMODULE_AUTH_HANDLED.
+ * This will immediately end the auth chain as successful and add the OK reply.
+ * (2) Deny Authentication - Return REDISMODULE_AUTH_HANDLED without authenticating or blocking the
+ * client. Optionally, `err` can be set to a custom error message and `err` will be automatically
+ * freed by the server.
+ * This will immediately end the auth chain as unsuccessful and add the ERR reply.
+ * (3) Block a client on authentication - Use the RM_BlockClientOnAuth API and return
+ * REDISMODULE_AUTH_HANDLED. Here, the client will be blocked until the RM_UnblockClient API is used
+ * which will trigger the auth reply callback (provided through the RM_BlockClientOnAuth).
+ * In this reply callback, the Module should authenticate, deny or skip handling authentication.
+ * (4) Skip handling Authentication - Return REDISMODULE_AUTH_NOT_HANDLED without blocking the
+ * client. This will allow the engine to attempt the next module auth callback.
+ * If none of the callbacks authenticate or deny auth, then password based auth is attempted and
+ * will authenticate or add failure logs and reply to the clients accordingly.
+ *
+ * Note: If a client is disconnected while it was in the middle of blocking module auth, that
+ * occurrence of the AUTH or HELLO command will not be tracked in the INFO command stats.
+ *
+ * The following is an example of how non-blocking module based authentication can be used:
+ *
+ * int auth_cb(RedisModuleCtx *ctx, RedisModuleString *username, RedisModuleString *password, RedisModuleString **err) {
+ * const char *user = RedisModule_StringPtrLen(username, NULL);
+ * const char *pwd = RedisModule_StringPtrLen(password, NULL);
+ * if (!strcmp(user,"foo") && !strcmp(pwd,"valid_password")) {
+ * RedisModule_AuthenticateClientWithACLUser(ctx, "foo", 3, NULL, NULL, NULL);
+ * return REDISMODULE_AUTH_HANDLED;
+ * }
+ *
+ * else if (!strcmp(user,"foo") && !strcmp(pwd,"wrong_password")) {
+ * RedisModuleString *log = RedisModule_CreateString(ctx, "Module Auth", 11);
+ * RedisModule_ACLAddLogEntryByUserName(ctx, username, log, REDISMODULE_ACL_LOG_AUTH);
+ * RedisModule_FreeString(ctx, log);
+ * const char *err_msg = "Auth denied by Misc Module.";
+ * *err = RedisModule_CreateString(ctx, err_msg, strlen(err_msg));
+ * return REDISMODULE_AUTH_HANDLED;
+ * }
+ * return REDISMODULE_AUTH_NOT_HANDLED;
+ * }
+ *
+ * int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ * if (RedisModule_Init(ctx,"authmodule",1,REDISMODULE_APIVER_1)== REDISMODULE_ERR)
+ * return REDISMODULE_ERR;
+ * RedisModule_RegisterAuthCallback(ctx, auth_cb);
+ * return REDISMODULE_OK;
+ * }
+ */
+void RM_RegisterAuthCallback(RedisModuleCtx *ctx, RedisModuleAuthCallback cb) {
+ RedisModuleAuthCtx *auth_ctx = zmalloc(sizeof(RedisModuleAuthCtx));
+ auth_ctx->module = ctx->module;
+ auth_ctx->auth_cb = cb;
+ listAddNodeHead(moduleAuthCallbacks, auth_ctx);
+}
+
+/* Helper function to invoke the free private data callback of a Module blocked client. */
+void moduleInvokeFreePrivDataCallback(client *c, RedisModuleBlockedClient *bc) {
+ if (bc->privdata && bc->free_privdata) {
+ RedisModuleCtx ctx;
+ int ctx_flags = c == NULL ? REDISMODULE_CTX_BLOCKED_DISCONNECTED : REDISMODULE_CTX_NONE;
+ moduleCreateContext(&ctx, bc->module, ctx_flags);
+ ctx.blocked_privdata = bc->privdata;
+ ctx.client = bc->client;
+ bc->free_privdata(&ctx,bc->privdata);
+ moduleFreeContext(&ctx);
+ }
+}
+
+/* Unregisters all the module auth callbacks that have been registered by this Module. */
+void moduleUnregisterAuthCBs(RedisModule *module) {
+ listIter li;
+ listNode *ln;
+ listRewind(moduleAuthCallbacks, &li);
+ while ((ln = listNext(&li))) {
+ RedisModuleAuthCtx *ctx = listNodeValue(ln);
+ if (ctx->module == module) {
+ listDelNode(moduleAuthCallbacks, ln);
+ zfree(ctx);
+ }
+ }
+}
+
+/* Search for & attempt next module auth callback after skipping the ones already attempted.
+ * Returns the result of the module auth callback. */
+int attemptNextAuthCb(client *c, robj *username, robj *password, robj **err) {
+ int handle_next_callback = c->module_auth_ctx == NULL;
+ RedisModuleAuthCtx *cur_auth_ctx = NULL;
+ listNode *ln;
+ listIter li;
+ listRewind(moduleAuthCallbacks, &li);
+ int result = REDISMODULE_AUTH_NOT_HANDLED;
+ while((ln = listNext(&li))) {
+ cur_auth_ctx = listNodeValue(ln);
+ /* Skip over the previously attempted auth contexts. */
+ if (!handle_next_callback) {
+ handle_next_callback = cur_auth_ctx == c->module_auth_ctx;
+ continue;
+ }
+ /* Remove the module auth complete flag before we attempt the next cb. */
+ c->flags &= ~CLIENT_MODULE_AUTH_HAS_RESULT;
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, cur_auth_ctx->module, REDISMODULE_CTX_NONE);
+ ctx.client = c;
+ *err = NULL;
+ c->module_auth_ctx = cur_auth_ctx;
+ result = cur_auth_ctx->auth_cb(&ctx, username, password, err);
+ moduleFreeContext(&ctx);
+ if (result == REDISMODULE_AUTH_HANDLED) break;
+ /* If Auth was not handled (allowed/denied/blocked) by the Module, try the next auth cb. */
+ }
+ return result;
+}
+
+/* Helper function to handle a reprocessed unblocked auth client.
+ * Returns REDISMODULE_AUTH_NOT_HANDLED if the client was not reprocessed after a blocking module
+ * auth operation.
+ * Otherwise, we attempt the auth reply callback & the free priv data callback, update fields and
+ * return the result of the reply callback. */
+int attemptBlockedAuthReplyCallback(client *c, robj *username, robj *password, robj **err) {
+ int result = REDISMODULE_AUTH_NOT_HANDLED;
+ if (!c->module_blocked_client) return result;
+ RedisModuleBlockedClient *bc = (RedisModuleBlockedClient *) c->module_blocked_client;
+ bc->client = c;
+ if (bc->auth_reply_cb) {
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, bc->module, REDISMODULE_CTX_BLOCKED_REPLY);
+ ctx.blocked_privdata = bc->privdata;
+ ctx.blocked_ready_key = NULL;
+ ctx.client = bc->client;
+ ctx.blocked_client = bc;
+ result = bc->auth_reply_cb(&ctx, username, password, err);
+ moduleFreeContext(&ctx);
+ }
+ moduleInvokeFreePrivDataCallback(c, bc);
+ c->module_blocked_client = NULL;
+ c->lastcmd->microseconds += bc->background_duration;
+ bc->module->blocked_clients--;
+ zfree(bc);
+ return result;
+}
+
+/* Helper function to attempt Module based authentication through module auth callbacks.
+ * Here, the Module is expected to authenticate the client using the RedisModule APIs and to add ACL
+ * logs in case of errors.
+ * Returns one of the following codes:
+ * AUTH_OK - Indicates that a module handled and authenticated the client.
+ * AUTH_ERR - Indicates that a module handled and denied authentication for this client.
+ * AUTH_NOT_HANDLED - Indicates that authentication was not handled by any Module and that
+ * normal password based authentication can be attempted next.
+ * AUTH_BLOCKED - Indicates module authentication is in progress through a blocking implementation.
+ * In this case, authentication is handled here again after the client is unblocked / reprocessed. */
+int checkModuleAuthentication(client *c, robj *username, robj *password, robj **err) {
+ if (!listLength(moduleAuthCallbacks)) return AUTH_NOT_HANDLED;
+ int result = attemptBlockedAuthReplyCallback(c, username, password, err);
+ if (result == REDISMODULE_AUTH_NOT_HANDLED) {
+ result = attemptNextAuthCb(c, username, password, err);
+ }
+ if (c->flags & CLIENT_BLOCKED) {
+ /* Modules are expected to return REDISMODULE_AUTH_HANDLED when blocking clients. */
+ serverAssert(result == REDISMODULE_AUTH_HANDLED);
+ return AUTH_BLOCKED;
+ }
+ c->module_auth_ctx = NULL;
+ if (result == REDISMODULE_AUTH_NOT_HANDLED) {
+ c->flags &= ~CLIENT_MODULE_AUTH_HAS_RESULT;
+ return AUTH_NOT_HANDLED;
+ }
+ if (c->flags & CLIENT_MODULE_AUTH_HAS_RESULT) {
+ c->flags &= ~CLIENT_MODULE_AUTH_HAS_RESULT;
+ if (c->authenticated) return AUTH_OK;
+ }
+ return AUTH_ERR;
+}
+
+/* This function is called from module.c in order to check if a module
+ * blocked for BLOCKED_MODULE and subtype 'on keys' (bc->blocked_on_keys true)
+ * can really be unblocked, since the module was able to serve the client.
+ * If the callback returns REDISMODULE_OK, then the client can be unblocked,
+ * otherwise the client remains blocked and we'll retry again when one of
+ * the keys it blocked for becomes "ready" again.
+ * This function returns 1 if client was served (and should be unblocked) */
+int moduleTryServeClientBlockedOnKey(client *c, robj *key) {
+ int served = 0;
+ RedisModuleBlockedClient *bc = c->bstate.module_blocked_handle;
+
+ /* Protect against re-processing: don't serve clients that are already
+ * in the unblocking list for any reason (including RM_UnblockClient()
+ * explicit call). See #6798. */
+ if (bc->unblocked) return 0;
+
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, bc->module, REDISMODULE_CTX_BLOCKED_REPLY);
+ ctx.blocked_ready_key = key;
+ ctx.blocked_privdata = bc->privdata;
+ ctx.client = bc->client;
+ ctx.blocked_client = bc;
+ if (bc->reply_callback(&ctx,(void**)c->argv,c->argc) == REDISMODULE_OK)
+ served = 1;
+ moduleFreeContext(&ctx);
+ return served;
+}
+
+/* Block a client in the context of a blocking command, returning a handle
+ * which will be used, later, in order to unblock the client with a call to
+ * RedisModule_UnblockClient(). The arguments specify callback functions
+ * and a timeout after which the client is unblocked.
+ *
+ * The callbacks are called in the following contexts:
+ *
+ * reply_callback: called after a successful RedisModule_UnblockClient()
+ * call in order to reply to the client and unblock it.
+ *
+ * timeout_callback: called when the timeout is reached or if `CLIENT UNBLOCK`
+ * is invoked, in order to send an error to the client.
+ *
+ * free_privdata: called in order to free the private data that is passed
+ * by RedisModule_UnblockClient() call.
+ *
+ * Note: RedisModule_UnblockClient should be called for every blocked client,
+ * even if client was killed, timed-out or disconnected. Failing to do so
+ * will result in memory leaks.
+ *
+ * There are some cases where RedisModule_BlockClient() cannot be used:
+ *
+ * 1. If the client is a Lua script.
+ * 2. If the client is executing a MULTI block.
+ *
+ * In these cases, a call to RedisModule_BlockClient() will **not** block the
+ * client, but instead produce a specific error reply.
+ *
+ * A module that registers a timeout_callback function can also be unblocked
+ * using the `CLIENT UNBLOCK` command, which will trigger the timeout callback.
+ * If a callback function is not registered, then the blocked client will be
+ * treated as if it is not in a blocked state and `CLIENT UNBLOCK` will return
+ * a zero value.
+ *
+ * Measuring background time: By default the time spent in the blocked command
+ * is not account for the total command duration. To include such time you should
+ * use RM_BlockedClientMeasureTimeStart() and RM_BlockedClientMeasureTimeEnd() one,
+ * or multiple times within the blocking command background work.
+ */
+RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback,
+ RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*),
+ long long timeout_ms) {
+ return moduleBlockClient(ctx,reply_callback,NULL,timeout_callback,free_privdata,timeout_ms, NULL,0,NULL,0);
+}
+
+/* Block the current client for module authentication in the background. If module auth is not in
+ * progress on the client, the API returns NULL. Otherwise, the client is blocked and the RM_BlockedClient
+ * is returned similar to the RM_BlockClient API.
+ * Note: Only use this API from the context of a module auth callback. */
+RedisModuleBlockedClient *RM_BlockClientOnAuth(RedisModuleCtx *ctx, RedisModuleAuthCallback reply_callback,
+ void (*free_privdata)(RedisModuleCtx*,void*)) {
+ if (!clientHasModuleAuthInProgress(ctx->client)) {
+ addReplyError(ctx->client, "Module blocking client on auth when not currently undergoing module authentication");
+ return NULL;
+ }
+ RedisModuleBlockedClient *bc = moduleBlockClient(ctx,NULL,reply_callback,NULL,free_privdata,0, NULL,0,NULL,0);
+ if (ctx->client->flags & CLIENT_BLOCKED) {
+ ctx->client->flags |= CLIENT_PENDING_COMMAND;
+ }
+ return bc;
+}
+
+/* Get the private data that was previusely set on a blocked client */
+void *RM_BlockClientGetPrivateData(RedisModuleBlockedClient *blocked_client) {
+ return blocked_client->privdata;
+}
+
+/* Set private data on a blocked client */
+void RM_BlockClientSetPrivateData(RedisModuleBlockedClient *blocked_client, void *private_data) {
+ blocked_client->privdata = private_data;
+}
+
+/* This call is similar to RedisModule_BlockClient(), however in this case we
+ * don't just block the client, but also ask Redis to unblock it automatically
+ * once certain keys become "ready", that is, contain more data.
+ *
+ * Basically this is similar to what a typical Redis command usually does,
+ * like BLPOP or BZPOPMAX: the client blocks if it cannot be served ASAP,
+ * and later when the key receives new data (a list push for instance), the
+ * client is unblocked and served.
+ *
+ * However in the case of this module API, when the client is unblocked?
+ *
+ * 1. If you block on a key of a type that has blocking operations associated,
+ * like a list, a sorted set, a stream, and so forth, the client may be
+ * unblocked once the relevant key is targeted by an operation that normally
+ * unblocks the native blocking operations for that type. So if we block
+ * on a list key, an RPUSH command may unblock our client and so forth.
+ * 2. If you are implementing your native data type, or if you want to add new
+ * unblocking conditions in addition to "1", you can call the modules API
+ * RedisModule_SignalKeyAsReady().
+ *
+ * Anyway we can't be sure if the client should be unblocked just because the
+ * key is signaled as ready: for instance a successive operation may change the
+ * key, or a client in queue before this one can be served, modifying the key
+ * as well and making it empty again. So when a client is blocked with
+ * RedisModule_BlockClientOnKeys() the reply callback is not called after
+ * RM_UnblockClient() is called, but every time a key is signaled as ready:
+ * if the reply callback can serve the client, it returns REDISMODULE_OK
+ * and the client is unblocked, otherwise it will return REDISMODULE_ERR
+ * and we'll try again later.
+ *
+ * The reply callback can access the key that was signaled as ready by
+ * calling the API RedisModule_GetBlockedClientReadyKey(), that returns
+ * just the string name of the key as a RedisModuleString object.
+ *
+ * Thanks to this system we can setup complex blocking scenarios, like
+ * unblocking a client only if a list contains at least 5 items or other
+ * more fancy logics.
+ *
+ * Note that another difference with RedisModule_BlockClient(), is that here
+ * we pass the private data directly when blocking the client: it will
+ * be accessible later in the reply callback. Normally when blocking with
+ * RedisModule_BlockClient() the private data to reply to the client is
+ * passed when calling RedisModule_UnblockClient() but here the unblocking
+ * is performed by Redis itself, so we need to have some private data before
+ * hand. The private data is used to store any information about the specific
+ * unblocking operation that you are implementing. Such information will be
+ * freed using the free_privdata callback provided by the user.
+ *
+ * However the reply callback will be able to access the argument vector of
+ * the command, so the private data is often not needed.
+ *
+ * Note: Under normal circumstances RedisModule_UnblockClient should not be
+ * called for clients that are blocked on keys (Either the key will
+ * become ready or a timeout will occur). If for some reason you do want
+ * to call RedisModule_UnblockClient it is possible: Client will be
+ * handled as if it were timed-out (You must implement the timeout
+ * callback in that case).
+ */
+RedisModuleBlockedClient *RM_BlockClientOnKeys(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback,
+ RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*),
+ long long timeout_ms, RedisModuleString **keys, int numkeys, void *privdata) {
+ return moduleBlockClient(ctx,reply_callback,NULL,timeout_callback,free_privdata,timeout_ms, keys,numkeys,privdata,0);
+}
+
+/* Same as RedisModule_BlockClientOnKeys, but can take REDISMODULE_BLOCK_* flags
+ * Can be either REDISMODULE_BLOCK_UNBLOCK_DEFAULT, which means default behavior (same
+ * as calling RedisModule_BlockClientOnKeys)
+ *
+ * The flags is a bit mask of these:
+ *
+ * - `REDISMODULE_BLOCK_UNBLOCK_DELETED`: The clients should to be awakened in case any of `keys` are deleted.
+ * Mostly useful for commands that require the key to exist (like XREADGROUP)
+ */
+RedisModuleBlockedClient *RM_BlockClientOnKeysWithFlags(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback,
+ RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*),
+ long long timeout_ms, RedisModuleString **keys, int numkeys, void *privdata,
+ int flags) {
+ return moduleBlockClient(ctx,reply_callback,NULL,timeout_callback,free_privdata,timeout_ms, keys,numkeys,privdata,flags);
+}
+
+/* This function is used in order to potentially unblock a client blocked
+ * on keys with RedisModule_BlockClientOnKeys(). When this function is called,
+ * all the clients blocked for this key will get their reply_callback called. */
+void RM_SignalKeyAsReady(RedisModuleCtx *ctx, RedisModuleString *key) {
+ signalKeyAsReady(ctx->client->db, key, OBJ_MODULE);
+}
+
+/* Implements RM_UnblockClient() and moduleUnblockClient(). */
+int moduleUnblockClientByHandle(RedisModuleBlockedClient *bc, void *privdata) {
+ pthread_mutex_lock(&moduleUnblockedClientsMutex);
+ if (!bc->blocked_on_keys) bc->privdata = privdata;
+ bc->unblocked = 1;
+ if (listLength(moduleUnblockedClients) == 0) {
+ if (write(server.module_pipe[1],"A",1) != 1) {
+ /* Ignore the error, this is best-effort. */
+ }
+ }
+ listAddNodeTail(moduleUnblockedClients,bc);
+ pthread_mutex_unlock(&moduleUnblockedClientsMutex);
+ return REDISMODULE_OK;
+}
+
+/* This API is used by the Redis core to unblock a client that was blocked
+ * by a module. */
+void moduleUnblockClient(client *c) {
+ RedisModuleBlockedClient *bc = c->bstate.module_blocked_handle;
+ moduleUnblockClientByHandle(bc,NULL);
+}
+
+/* Return true if the client 'c' was blocked by a module using
+ * RM_BlockClientOnKeys(). */
+int moduleClientIsBlockedOnKeys(client *c) {
+ RedisModuleBlockedClient *bc = c->bstate.module_blocked_handle;
+ return bc->blocked_on_keys;
+}
+
+/* Unblock a client blocked by `RedisModule_BlockedClient`. This will trigger
+ * the reply callbacks to be called in order to reply to the client.
+ * The 'privdata' argument will be accessible by the reply callback, so
+ * the caller of this function can pass any value that is needed in order to
+ * actually reply to the client.
+ *
+ * A common usage for 'privdata' is a thread that computes something that
+ * needs to be passed to the client, included but not limited some slow
+ * to compute reply or some reply obtained via networking.
+ *
+ * Note 1: this function can be called from threads spawned by the module.
+ *
+ * Note 2: when we unblock a client that is blocked for keys using the API
+ * RedisModule_BlockClientOnKeys(), the privdata argument here is not used.
+ * Unblocking a client that was blocked for keys using this API will still
+ * require the client to get some reply, so the function will use the
+ * "timeout" handler in order to do so (The privdata provided in
+ * RedisModule_BlockClientOnKeys() is accessible from the timeout
+ * callback via RM_GetBlockedClientPrivateData). */
+int RM_UnblockClient(RedisModuleBlockedClient *bc, void *privdata) {
+ if (bc->blocked_on_keys) {
+ /* In theory the user should always pass the timeout handler as an
+ * argument, but better to be safe than sorry. */
+ if (bc->timeout_callback == NULL) return REDISMODULE_ERR;
+ if (bc->unblocked) return REDISMODULE_OK;
+ if (bc->client) moduleBlockedClientTimedOut(bc->client);
+ }
+ moduleUnblockClientByHandle(bc,privdata);
+ return REDISMODULE_OK;
+}
+
+/* Abort a blocked client blocking operation: the client will be unblocked
+ * without firing any callback. */
+int RM_AbortBlock(RedisModuleBlockedClient *bc) {
+ bc->reply_callback = NULL;
+ bc->disconnect_callback = NULL;
+ bc->auth_reply_cb = NULL;
+ return RM_UnblockClient(bc,NULL);
+}
+
+/* Set a callback that will be called if a blocked client disconnects
+ * before the module has a chance to call RedisModule_UnblockClient()
+ *
+ * Usually what you want to do there, is to cleanup your module state
+ * so that you can call RedisModule_UnblockClient() safely, otherwise
+ * the client will remain blocked forever if the timeout is large.
+ *
+ * Notes:
+ *
+ * 1. It is not safe to call Reply* family functions here, it is also
+ * useless since the client is gone.
+ *
+ * 2. This callback is not called if the client disconnects because of
+ * a timeout. In such a case, the client is unblocked automatically
+ * and the timeout callback is called.
+ */
+void RM_SetDisconnectCallback(RedisModuleBlockedClient *bc, RedisModuleDisconnectFunc callback) {
+ bc->disconnect_callback = callback;
+}
+
+/* This function will check the moduleUnblockedClients queue in order to
+ * call the reply callback and really unblock the client.
+ *
+ * Clients end into this list because of calls to RM_UnblockClient(),
+ * however it is possible that while the module was doing work for the
+ * blocked client, it was terminated by Redis (for timeout or other reasons).
+ * When this happens the RedisModuleBlockedClient structure in the queue
+ * will have the 'client' field set to NULL. */
+void moduleHandleBlockedClients(void) {
+ listNode *ln;
+ RedisModuleBlockedClient *bc;
+
+ pthread_mutex_lock(&moduleUnblockedClientsMutex);
+ while (listLength(moduleUnblockedClients)) {
+ ln = listFirst(moduleUnblockedClients);
+ bc = ln->value;
+ client *c = bc->client;
+ listDelNode(moduleUnblockedClients,ln);
+ pthread_mutex_unlock(&moduleUnblockedClientsMutex);
+
+ /* Release the lock during the loop, as long as we don't
+ * touch the shared list. */
+
+ /* Call the reply callback if the client is valid and we have
+ * any callback. However the callback is not called if the client
+ * was blocked on keys (RM_BlockClientOnKeys()), because we already
+ * called such callback in moduleTryServeClientBlockedOnKey() when
+ * the key was signaled as ready. */
+ long long prev_error_replies = server.stat_total_error_replies;
+ uint64_t reply_us = 0;
+ if (c && !bc->blocked_on_keys && bc->reply_callback) {
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, bc->module, REDISMODULE_CTX_BLOCKED_REPLY);
+ ctx.blocked_privdata = bc->privdata;
+ ctx.blocked_ready_key = NULL;
+ ctx.client = bc->client;
+ ctx.blocked_client = bc;
+ monotime replyTimer;
+ elapsedStart(&replyTimer);
+ bc->reply_callback(&ctx,(void**)c->argv,c->argc);
+ reply_us = elapsedUs(replyTimer);
+ moduleFreeContext(&ctx);
+ }
+ /* Hold onto the blocked client if module auth is in progress. The reply callback is invoked
+ * when the client is reprocessed. */
+ if (c && clientHasModuleAuthInProgress(c)) {
+ c->module_blocked_client = bc;
+ } else {
+ /* Free privdata if any. */
+ moduleInvokeFreePrivDataCallback(c, bc);
+ }
+
+ /* It is possible that this blocked client object accumulated
+ * replies to send to the client in a thread safe context.
+ * We need to glue such replies to the client output buffer and
+ * free the temporary client we just used for the replies. */
+ if (c) AddReplyFromClient(c, bc->reply_client);
+ moduleReleaseTempClient(bc->reply_client);
+ moduleReleaseTempClient(bc->thread_safe_ctx_client);
+
+ /* Update stats now that we've finished the blocking operation.
+ * This needs to be out of the reply callback above given that a
+ * module might not define any callback and still do blocking ops.
+ */
+ if (c && !clientHasModuleAuthInProgress(c) && !bc->blocked_on_keys) {
+ updateStatsOnUnblock(c, bc->background_duration, reply_us, server.stat_total_error_replies != prev_error_replies);
+ }
+
+ if (c != NULL) {
+ /* Before unblocking the client, set the disconnect callback
+ * to NULL, because if we reached this point, the client was
+ * properly unblocked by the module. */
+ bc->disconnect_callback = NULL;
+ unblockClient(c, 1);
+
+ /* Update the wait offset, we don't know if this blocked client propagated anything,
+ * currently we rather not add any API for that, so we just assume it did. */
+ c->woff = server.master_repl_offset;
+
+ /* Put the client in the list of clients that need to write
+ * if there are pending replies here. This is needed since
+ * during a non blocking command the client may receive output. */
+ if (!clientHasModuleAuthInProgress(c) && clientHasPendingReplies(c) &&
+ !(c->flags & CLIENT_PENDING_WRITE))
+ {
+ c->flags |= CLIENT_PENDING_WRITE;
+ listLinkNodeHead(server.clients_pending_write, &c->clients_pending_write_node);
+ }
+ }
+
+ /* Free 'bc' only after unblocking the client, since it is
+ * referenced in the client blocking context, and must be valid
+ * when calling unblockClient(). */
+ if (!(c && clientHasModuleAuthInProgress(c))) {
+ bc->module->blocked_clients--;
+ zfree(bc);
+ }
+
+ /* Lock again before to iterate the loop. */
+ pthread_mutex_lock(&moduleUnblockedClientsMutex);
+ }
+ pthread_mutex_unlock(&moduleUnblockedClientsMutex);
+}
+
+/* Check if the specified client can be safely timed out using
+ * moduleBlockedClientTimedOut().
+ */
+int moduleBlockedClientMayTimeout(client *c) {
+ if (c->bstate.btype != BLOCKED_MODULE)
+ return 1;
+
+ RedisModuleBlockedClient *bc = c->bstate.module_blocked_handle;
+ return (bc && bc->timeout_callback != NULL);
+}
+
+/* Called when our client timed out. After this function unblockClient()
+ * is called, and it will invalidate the blocked client. So this function
+ * does not need to do any cleanup. Eventually the module will call the
+ * API to unblock the client and the memory will be released. */
+void moduleBlockedClientTimedOut(client *c) {
+ RedisModuleBlockedClient *bc = c->bstate.module_blocked_handle;
+
+ /* Protect against re-processing: don't serve clients that are already
+ * in the unblocking list for any reason (including RM_UnblockClient()
+ * explicit call). See #6798. */
+ if (bc->unblocked) return;
+
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, bc->module, REDISMODULE_CTX_BLOCKED_TIMEOUT);
+ ctx.client = bc->client;
+ ctx.blocked_client = bc;
+ ctx.blocked_privdata = bc->privdata;
+ long long prev_error_replies = server.stat_total_error_replies;
+ bc->timeout_callback(&ctx,(void**)c->argv,c->argc);
+ moduleFreeContext(&ctx);
+ updateStatsOnUnblock(c, bc->background_duration, 0, server.stat_total_error_replies != prev_error_replies);
+
+ /* For timeout events, we do not want to call the disconnect callback,
+ * because the blocked client will be automatically disconnected in
+ * this case, and the user can still hook using the timeout callback. */
+ bc->disconnect_callback = NULL;
+}
+
+/* Return non-zero if a module command was called in order to fill the
+ * reply for a blocked client. */
+int RM_IsBlockedReplyRequest(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_BLOCKED_REPLY) != 0;
+}
+
+/* Return non-zero if a module command was called in order to fill the
+ * reply for a blocked client that timed out. */
+int RM_IsBlockedTimeoutRequest(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_BLOCKED_TIMEOUT) != 0;
+}
+
+/* Get the private data set by RedisModule_UnblockClient() */
+void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx) {
+ return ctx->blocked_privdata;
+}
+
+/* Get the key that is ready when the reply callback is called in the context
+ * of a client blocked by RedisModule_BlockClientOnKeys(). */
+RedisModuleString *RM_GetBlockedClientReadyKey(RedisModuleCtx *ctx) {
+ return ctx->blocked_ready_key;
+}
+
+/* Get the blocked client associated with a given context.
+ * This is useful in the reply and timeout callbacks of blocked clients,
+ * before sometimes the module has the blocked client handle references
+ * around, and wants to cleanup it. */
+RedisModuleBlockedClient *RM_GetBlockedClientHandle(RedisModuleCtx *ctx) {
+ return ctx->blocked_client;
+}
+
+/* Return true if when the free callback of a blocked client is called,
+ * the reason for the client to be unblocked is that it disconnected
+ * while it was blocked. */
+int RM_BlockedClientDisconnected(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_BLOCKED_DISCONNECTED) != 0;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Thread Safe Contexts
+ * -------------------------------------------------------------------------- */
+
+/* Return a context which can be used inside threads to make Redis context
+ * calls with certain modules APIs. If 'bc' is not NULL then the module will
+ * be bound to a blocked client, and it will be possible to use the
+ * `RedisModule_Reply*` family of functions to accumulate a reply for when the
+ * client will be unblocked. Otherwise the thread safe context will be
+ * detached by a specific client.
+ *
+ * To call non-reply APIs, the thread safe context must be prepared with:
+ *
+ * RedisModule_ThreadSafeContextLock(ctx);
+ * ... make your call here ...
+ * RedisModule_ThreadSafeContextUnlock(ctx);
+ *
+ * This is not needed when using `RedisModule_Reply*` functions, assuming
+ * that a blocked client was used when the context was created, otherwise
+ * no RedisModule_Reply* call should be made at all.
+ *
+ * NOTE: If you're creating a detached thread safe context (bc is NULL),
+ * consider using `RM_GetDetachedThreadSafeContext` which will also retain
+ * the module ID and thus be more useful for logging. */
+RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) {
+ RedisModuleCtx *ctx = zmalloc(sizeof(*ctx));
+ RedisModule *module = bc ? bc->module : NULL;
+ int flags = REDISMODULE_CTX_THREAD_SAFE;
+
+ /* Creating a new client object is costly. To avoid that, we have an
+ * internal pool of client objects. In blockClient(), a client object is
+ * assigned to bc->thread_safe_ctx_client to be used for the thread safe
+ * context.
+ * For detached thread safe contexts, we create a new client object.
+ * Otherwise, as this function can be called from different threads, we
+ * would need to synchronize access to internal pool of client objects.
+ * Assuming creating detached context is rare and not that performance
+ * critical, we avoid synchronizing access to the client pool by creating
+ * a new client */
+ if (!bc) flags |= REDISMODULE_CTX_NEW_CLIENT;
+ moduleCreateContext(ctx, module, flags);
+ /* Even when the context is associated with a blocked client, we can't
+ * access it safely from another thread, so we use a fake client here
+ * in order to keep things like the currently selected database and similar
+ * things. */
+ if (bc) {
+ ctx->blocked_client = bc;
+ ctx->client = bc->thread_safe_ctx_client;
+ selectDb(ctx->client,bc->dbid);
+ if (bc->client) {
+ ctx->client->id = bc->client->id;
+ ctx->client->resp = bc->client->resp;
+ }
+ }
+ return ctx;
+}
+
+/* Return a detached thread safe context that is not associated with any
+ * specific blocked client, but is associated with the module's context.
+ *
+ * This is useful for modules that wish to hold a global context over
+ * a long term, for purposes such as logging. */
+RedisModuleCtx *RM_GetDetachedThreadSafeContext(RedisModuleCtx *ctx) {
+ RedisModuleCtx *new_ctx = zmalloc(sizeof(*new_ctx));
+ /* We create a new client object for the detached context.
+ * See RM_GetThreadSafeContext() for more information */
+ moduleCreateContext(new_ctx, ctx->module,
+ REDISMODULE_CTX_THREAD_SAFE|REDISMODULE_CTX_NEW_CLIENT);
+ return new_ctx;
+}
+
+/* Release a thread safe context. */
+void RM_FreeThreadSafeContext(RedisModuleCtx *ctx) {
+ moduleFreeContext(ctx);
+ zfree(ctx);
+}
+
+void moduleGILAfterLock(void) {
+ /* We should never get here if we already inside a module
+ * code block which already opened a context. */
+ serverAssert(server.execution_nesting == 0);
+ /* Bump up the nesting level to prevent immediate propagation
+ * of possible RM_Call from th thread */
+ enterExecutionUnit(1, 0);
+}
+
+/* Acquire the server lock before executing a thread safe API call.
+ * This is not needed for `RedisModule_Reply*` calls when there is
+ * a blocked client connected to the thread safe context. */
+void RM_ThreadSafeContextLock(RedisModuleCtx *ctx) {
+ UNUSED(ctx);
+ moduleAcquireGIL();
+ moduleGILAfterLock();
+}
+
+/* Similar to RM_ThreadSafeContextLock but this function
+ * would not block if the server lock is already acquired.
+ *
+ * If successful (lock acquired) REDISMODULE_OK is returned,
+ * otherwise REDISMODULE_ERR is returned and errno is set
+ * accordingly. */
+int RM_ThreadSafeContextTryLock(RedisModuleCtx *ctx) {
+ UNUSED(ctx);
+
+ int res = moduleTryAcquireGIL();
+ if(res != 0) {
+ errno = res;
+ return REDISMODULE_ERR;
+ }
+ moduleGILAfterLock();
+ return REDISMODULE_OK;
+}
+
+void moduleGILBeforeUnlock(void) {
+ /* We should never get here if we already inside a module
+ * code block which already opened a context, except
+ * the bump-up from moduleGILAcquired. */
+ serverAssert(server.execution_nesting == 1);
+ /* Restore nesting level and propagate pending commands
+ * (because it's unclear when thread safe contexts are
+ * released we have to propagate here). */
+ exitExecutionUnit();
+ postExecutionUnitOperations();
+}
+
+/* Release the server lock after a thread safe API call was executed. */
+void RM_ThreadSafeContextUnlock(RedisModuleCtx *ctx) {
+ UNUSED(ctx);
+ moduleGILBeforeUnlock();
+ moduleReleaseGIL();
+}
+
+void moduleAcquireGIL(void) {
+ pthread_mutex_lock(&moduleGIL);
+}
+
+int moduleTryAcquireGIL(void) {
+ return pthread_mutex_trylock(&moduleGIL);
+}
+
+void moduleReleaseGIL(void) {
+ pthread_mutex_unlock(&moduleGIL);
+}
+
+
+/* --------------------------------------------------------------------------
+ * ## Module Keyspace Notifications API
+ * -------------------------------------------------------------------------- */
+
+/* Subscribe to keyspace notifications. This is a low-level version of the
+ * keyspace-notifications API. A module can register callbacks to be notified
+ * when keyspace events occur.
+ *
+ * Notification events are filtered by their type (string events, set events,
+ * etc), and the subscriber callback receives only events that match a specific
+ * mask of event types.
+ *
+ * When subscribing to notifications with RedisModule_SubscribeToKeyspaceEvents
+ * the module must provide an event type-mask, denoting the events the subscriber
+ * is interested in. This can be an ORed mask of any of the following flags:
+ *
+ * - REDISMODULE_NOTIFY_GENERIC: Generic commands like DEL, EXPIRE, RENAME
+ * - REDISMODULE_NOTIFY_STRING: String events
+ * - REDISMODULE_NOTIFY_LIST: List events
+ * - REDISMODULE_NOTIFY_SET: Set events
+ * - REDISMODULE_NOTIFY_HASH: Hash events
+ * - REDISMODULE_NOTIFY_ZSET: Sorted Set events
+ * - REDISMODULE_NOTIFY_EXPIRED: Expiration events
+ * - REDISMODULE_NOTIFY_EVICTED: Eviction events
+ * - REDISMODULE_NOTIFY_STREAM: Stream events
+ * - REDISMODULE_NOTIFY_MODULE: Module types events
+ * - REDISMODULE_NOTIFY_KEYMISS: Key-miss events
+ * Notice, key-miss event is the only type
+ * of event that is fired from within a read command.
+ * Performing RM_Call with a write command from within
+ * this notification is wrong and discourage. It will
+ * cause the read command that trigger the event to be
+ * replicated to the AOF/Replica.
+ * - REDISMODULE_NOTIFY_ALL: All events (Excluding REDISMODULE_NOTIFY_KEYMISS)
+ * - REDISMODULE_NOTIFY_LOADED: A special notification available only for modules,
+ * indicates that the key was loaded from persistence.
+ * Notice, when this event fires, the given key
+ * can not be retained, use RM_CreateStringFromString
+ * instead.
+ *
+ * We do not distinguish between key events and keyspace events, and it is up
+ * to the module to filter the actions taken based on the key.
+ *
+ * The subscriber signature is:
+ *
+ * int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type,
+ * const char *event,
+ * RedisModuleString *key);
+ *
+ * `type` is the event type bit, that must match the mask given at registration
+ * time. The event string is the actual command being executed, and key is the
+ * relevant Redis key.
+ *
+ * Notification callback gets executed with a redis context that can not be
+ * used to send anything to the client, and has the db number where the event
+ * occurred as its selected db number.
+ *
+ * Notice that it is not necessary to enable notifications in redis.conf for
+ * module notifications to work.
+ *
+ * Warning: the notification callbacks are performed in a synchronous manner,
+ * so notification callbacks must to be fast, or they would slow Redis down.
+ * If you need to take long actions, use threads to offload them.
+ *
+ * Moreover, the fact that the notification is executed synchronously means
+ * that the notification code will be executed in the middle on Redis logic
+ * (commands logic, eviction, expire). Changing the key space while the logic
+ * runs is dangerous and discouraged. In order to react to key space events with
+ * write actions, please refer to `RM_AddPostNotificationJob`.
+ *
+ * See https://redis.io/topics/notifications for more information.
+ */
+int RM_SubscribeToKeyspaceEvents(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc callback) {
+ RedisModuleKeyspaceSubscriber *sub = zmalloc(sizeof(*sub));
+ sub->module = ctx->module;
+ sub->event_mask = types;
+ sub->notify_callback = callback;
+ sub->active = 0;
+
+ listAddNodeTail(moduleKeyspaceSubscribers, sub);
+ return REDISMODULE_OK;
+}
+
+void firePostExecutionUnitJobs(void) {
+ /* Avoid propagation of commands.
+ * In that way, postExecutionUnitOperations will prevent
+ * recursive calls to firePostExecutionUnitJobs.
+ * This is a special case where we need to increase 'execution_nesting'
+ * but we do not want to update the cached time */
+ enterExecutionUnit(0, 0);
+ while (listLength(modulePostExecUnitJobs) > 0) {
+ listNode *ln = listFirst(modulePostExecUnitJobs);
+ RedisModulePostExecUnitJob *job = listNodeValue(ln);
+ listDelNode(modulePostExecUnitJobs, ln);
+
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, job->module, REDISMODULE_CTX_TEMP_CLIENT);
+ selectDb(ctx.client, job->dbid);
+
+ job->callback(&ctx, job->pd);
+ if (job->free_pd) job->free_pd(job->pd);
+
+ moduleFreeContext(&ctx);
+ zfree(job);
+ }
+ exitExecutionUnit();
+}
+
+/* When running inside a key space notification callback, it is dangerous and highly discouraged to perform any write
+ * operation (See `RM_SubscribeToKeyspaceEvents`). In order to still perform write actions in this scenario,
+ * Redis provides `RM_AddPostNotificationJob` API. The API allows to register a job callback which Redis will call
+ * when the following condition are promised to be fulfilled:
+ * 1. It is safe to perform any write operation.
+ * 2. The job will be called atomically along side the key space notification.
+ *
+ * Notice, one job might trigger key space notifications that will trigger more jobs.
+ * This raises a concerns of entering an infinite loops, we consider infinite loops
+ * as a logical bug that need to be fixed in the module, an attempt to protect against
+ * infinite loops by halting the execution could result in violation of the feature correctness
+ * and so Redis will make no attempt to protect the module from infinite loops.
+ *
+ * 'free_pd' can be NULL and in such case will not be used.
+ *
+ * Return REDISMODULE_OK on success and REDISMODULE_ERR if was called while loading data from disk (AOF or RDB) or
+ * if the instance is a readonly replica. */
+int RM_AddPostNotificationJob(RedisModuleCtx *ctx, RedisModulePostNotificationJobFunc callback, void *privdata, void (*free_privdata)(void*)) {
+ if (server.loading|| (server.masterhost && server.repl_slave_ro)) {
+ return REDISMODULE_ERR;
+ }
+ RedisModulePostExecUnitJob *job = zmalloc(sizeof(*job));
+ job->module = ctx->module;
+ job->callback = callback;
+ job->pd = privdata;
+ job->free_pd = free_privdata;
+ job->dbid = ctx->client->db->id;
+
+ listAddNodeTail(modulePostExecUnitJobs, job);
+ return REDISMODULE_OK;
+}
+
+/* Get the configured bitmap of notify-keyspace-events (Could be used
+ * for additional filtering in RedisModuleNotificationFunc) */
+int RM_GetNotifyKeyspaceEvents(void) {
+ return server.notify_keyspace_events;
+}
+
+/* Expose notifyKeyspaceEvent to modules */
+int RM_NotifyKeyspaceEvent(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key) {
+ if (!ctx || !ctx->client)
+ return REDISMODULE_ERR;
+ notifyKeyspaceEvent(type, (char *)event, key, ctx->client->db->id);
+ return REDISMODULE_OK;
+}
+
+/* Dispatcher for keyspace notifications to module subscriber functions.
+ * This gets called only if at least one module requested to be notified on
+ * keyspace notifications */
+void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) {
+ /* Don't do anything if there aren't any subscribers */
+ if (listLength(moduleKeyspaceSubscribers) == 0) return;
+
+ /* Ugly hack to handle modules which use write commands from within
+ * notify_callback, which they should NOT do!
+ * Modules should use RedisModules_AddPostNotificationJob instead.
+ *
+ * Anyway, we want any propagated commands from within notify_callback
+ * to be propagated inside a MULTI/EXEC together with the original
+ * command that caused the KSN.
+ * Note that it's only relevant for KSNs which are not generated from within
+ * call(), for example active-expiry and eviction (because anyway
+ * execution_nesting is incremented from within call())
+ *
+ * In order to do that we increment the execution_nesting counter, thus
+ * preventing postExecutionUnitOperations (from within moduleFreeContext)
+ * from propagating commands from CB.
+ *
+ * This is a special case where we need to increase 'execution_nesting'
+ * but we do not want to update the cached time */
+ enterExecutionUnit(0, 0);
+
+ listIter li;
+ listNode *ln;
+ listRewind(moduleKeyspaceSubscribers,&li);
+
+ /* Remove irrelevant flags from the type mask */
+ type &= ~(NOTIFY_KEYEVENT | NOTIFY_KEYSPACE);
+
+ while((ln = listNext(&li))) {
+ RedisModuleKeyspaceSubscriber *sub = ln->value;
+ /* Only notify subscribers on events matching the registration,
+ * and avoid subscribers triggering themselves */
+ if ((sub->event_mask & type) &&
+ (sub->active == 0 || (sub->module->options & REDISMODULE_OPTIONS_ALLOW_NESTED_KEYSPACE_NOTIFICATIONS))) {
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, sub->module, REDISMODULE_CTX_TEMP_CLIENT);
+ selectDb(ctx.client, dbid);
+
+ /* mark the handler as active to avoid reentrant loops.
+ * If the subscriber performs an action triggering itself,
+ * it will not be notified about it. */
+ sub->active = 1;
+ server.lazy_expire_disabled++;
+ sub->notify_callback(&ctx, type, event, key);
+ server.lazy_expire_disabled--;
+ sub->active = 0;
+ moduleFreeContext(&ctx);
+ }
+ }
+
+ exitExecutionUnit();
+}
+
+/* Unsubscribe any notification subscribers this module has upon unloading */
+void moduleUnsubscribeNotifications(RedisModule *module) {
+ listIter li;
+ listNode *ln;
+ listRewind(moduleKeyspaceSubscribers,&li);
+ while((ln = listNext(&li))) {
+ RedisModuleKeyspaceSubscriber *sub = ln->value;
+ if (sub->module == module) {
+ listDelNode(moduleKeyspaceSubscribers, ln);
+ zfree(sub);
+ }
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * ## Modules Cluster API
+ * -------------------------------------------------------------------------- */
+
+/* The Cluster message callback function pointer type. */
+typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len);
+
+/* This structure identifies a registered caller: it must match a given module
+ * ID, for a given message type. The callback function is just the function
+ * that was registered as receiver. */
+typedef struct moduleClusterReceiver {
+ uint64_t module_id;
+ RedisModuleClusterMessageReceiver callback;
+ struct RedisModule *module;
+ struct moduleClusterReceiver *next;
+} moduleClusterReceiver;
+
+typedef struct moduleClusterNodeInfo {
+ int flags;
+ char ip[NET_IP_STR_LEN];
+ int port;
+ char master_id[40]; /* Only if flags & REDISMODULE_NODE_MASTER is true. */
+} mdouleClusterNodeInfo;
+
+/* We have an array of message types: each bucket is a linked list of
+ * configured receivers. */
+static moduleClusterReceiver *clusterReceivers[UINT8_MAX];
+
+/* Dispatch the message to the right module receiver. */
+void moduleCallClusterReceivers(const char *sender_id, uint64_t module_id, uint8_t type, const unsigned char *payload, uint32_t len) {
+ moduleClusterReceiver *r = clusterReceivers[type];
+ while(r) {
+ if (r->module_id == module_id) {
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, r->module, REDISMODULE_CTX_TEMP_CLIENT);
+ r->callback(&ctx,sender_id,type,payload,len);
+ moduleFreeContext(&ctx);
+ return;
+ }
+ r = r->next;
+ }
+}
+
+/* Register a callback receiver for cluster messages of type 'type'. If there
+ * was already a registered callback, this will replace the callback function
+ * with the one provided, otherwise if the callback is set to NULL and there
+ * is already a callback for this function, the callback is unregistered
+ * (so this API call is also used in order to delete the receiver). */
+void RM_RegisterClusterMessageReceiver(RedisModuleCtx *ctx, uint8_t type, RedisModuleClusterMessageReceiver callback) {
+ if (!server.cluster_enabled) return;
+
+ uint64_t module_id = moduleTypeEncodeId(ctx->module->name,0);
+ moduleClusterReceiver *r = clusterReceivers[type], *prev = NULL;
+ while(r) {
+ if (r->module_id == module_id) {
+ /* Found! Set or delete. */
+ if (callback) {
+ r->callback = callback;
+ } else {
+ /* Delete the receiver entry if the user is setting
+ * it to NULL. Just unlink the receiver node from the
+ * linked list. */
+ if (prev)
+ prev->next = r->next;
+ else
+ clusterReceivers[type]->next = r->next;
+ zfree(r);
+ }
+ return;
+ }
+ prev = r;
+ r = r->next;
+ }
+
+ /* Not found, let's add it. */
+ if (callback) {
+ r = zmalloc(sizeof(*r));
+ r->module_id = module_id;
+ r->module = ctx->module;
+ r->callback = callback;
+ r->next = clusterReceivers[type];
+ clusterReceivers[type] = r;
+ }
+}
+
+/* Send a message to all the nodes in the cluster if `target` is NULL, otherwise
+ * at the specified target, which is a REDISMODULE_NODE_ID_LEN bytes node ID, as
+ * returned by the receiver callback or by the nodes iteration functions.
+ *
+ * The function returns REDISMODULE_OK if the message was successfully sent,
+ * otherwise if the node is not connected or such node ID does not map to any
+ * known cluster node, REDISMODULE_ERR is returned. */
+int RM_SendClusterMessage(RedisModuleCtx *ctx, const char *target_id, uint8_t type, const char *msg, uint32_t len) {
+ if (!server.cluster_enabled) return REDISMODULE_ERR;
+ uint64_t module_id = moduleTypeEncodeId(ctx->module->name,0);
+ if (clusterSendModuleMessageToTarget(target_id,module_id,type,msg,len) == C_OK)
+ return REDISMODULE_OK;
+ else
+ return REDISMODULE_ERR;
+}
+
+/* Return an array of string pointers, each string pointer points to a cluster
+ * node ID of exactly REDISMODULE_NODE_ID_LEN bytes (without any null term).
+ * The number of returned node IDs is stored into `*numnodes`.
+ * However if this function is called by a module not running an a Redis
+ * instance with Redis Cluster enabled, NULL is returned instead.
+ *
+ * The IDs returned can be used with RedisModule_GetClusterNodeInfo() in order
+ * to get more information about single node.
+ *
+ * The array returned by this function must be freed using the function
+ * RedisModule_FreeClusterNodesList().
+ *
+ * Example:
+ *
+ * size_t count, j;
+ * char **ids = RedisModule_GetClusterNodesList(ctx,&count);
+ * for (j = 0; j < count; j++) {
+ * RedisModule_Log(ctx,"notice","Node %.*s",
+ * REDISMODULE_NODE_ID_LEN,ids[j]);
+ * }
+ * RedisModule_FreeClusterNodesList(ids);
+ */
+char **RM_GetClusterNodesList(RedisModuleCtx *ctx, size_t *numnodes) {
+ UNUSED(ctx);
+
+ if (!server.cluster_enabled) return NULL;
+ size_t count = dictSize(server.cluster->nodes);
+ char **ids = zmalloc((count+1)*REDISMODULE_NODE_ID_LEN);
+ dictIterator *di = dictGetIterator(server.cluster->nodes);
+ dictEntry *de;
+ int j = 0;
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+ if (node->flags & (CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE)) continue;
+ ids[j] = zmalloc(REDISMODULE_NODE_ID_LEN);
+ memcpy(ids[j],node->name,REDISMODULE_NODE_ID_LEN);
+ j++;
+ }
+ *numnodes = j;
+ ids[j] = NULL; /* Null term so that FreeClusterNodesList does not need
+ * to also get the count argument. */
+ dictReleaseIterator(di);
+ return ids;
+}
+
+/* Free the node list obtained with RedisModule_GetClusterNodesList. */
+void RM_FreeClusterNodesList(char **ids) {
+ if (ids == NULL) return;
+ for (int j = 0; ids[j]; j++) zfree(ids[j]);
+ zfree(ids);
+}
+
+/* Return this node ID (REDISMODULE_CLUSTER_ID_LEN bytes) or NULL if the cluster
+ * is disabled. */
+const char *RM_GetMyClusterID(void) {
+ if (!server.cluster_enabled) return NULL;
+ return server.cluster->myself->name;
+}
+
+/* Return the number of nodes in the cluster, regardless of their state
+ * (handshake, noaddress, ...) so that the number of active nodes may actually
+ * be smaller, but not greater than this number. If the instance is not in
+ * cluster mode, zero is returned. */
+size_t RM_GetClusterSize(void) {
+ if (!server.cluster_enabled) return 0;
+ return dictSize(server.cluster->nodes);
+}
+
+/* Populate the specified info for the node having as ID the specified 'id',
+ * then returns REDISMODULE_OK. Otherwise if the format of node ID is invalid
+ * or the node ID does not exist from the POV of this local node, REDISMODULE_ERR
+ * is returned.
+ *
+ * The arguments `ip`, `master_id`, `port` and `flags` can be NULL in case we don't
+ * need to populate back certain info. If an `ip` and `master_id` (only populated
+ * if the instance is a slave) are specified, they point to buffers holding
+ * at least REDISMODULE_NODE_ID_LEN bytes. The strings written back as `ip`
+ * and `master_id` are not null terminated.
+ *
+ * The list of flags reported is the following:
+ *
+ * * REDISMODULE_NODE_MYSELF: This node
+ * * REDISMODULE_NODE_MASTER: The node is a master
+ * * REDISMODULE_NODE_SLAVE: The node is a replica
+ * * REDISMODULE_NODE_PFAIL: We see the node as failing
+ * * REDISMODULE_NODE_FAIL: The cluster agrees the node is failing
+ * * REDISMODULE_NODE_NOFAILOVER: The slave is configured to never failover
+ */
+int RM_GetClusterNodeInfo(RedisModuleCtx *ctx, const char *id, char *ip, char *master_id, int *port, int *flags) {
+ UNUSED(ctx);
+
+ clusterNode *node = clusterLookupNode(id, strlen(id));
+ if (node == NULL ||
+ node->flags & (CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE))
+ {
+ return REDISMODULE_ERR;
+ }
+
+ if (ip) redis_strlcpy(ip,node->ip,NET_IP_STR_LEN);
+
+ if (master_id) {
+ /* If the information is not available, the function will set the
+ * field to zero bytes, so that when the field can't be populated the
+ * function kinda remains predictable. */
+ if (node->flags & CLUSTER_NODE_SLAVE && node->slaveof)
+ memcpy(master_id,node->slaveof->name,REDISMODULE_NODE_ID_LEN);
+ else
+ memset(master_id,0,REDISMODULE_NODE_ID_LEN);
+ }
+ if (port) *port = getNodeDefaultClientPort(node);
+
+ /* As usually we have to remap flags for modules, in order to ensure
+ * we can provide binary compatibility. */
+ if (flags) {
+ *flags = 0;
+ if (node->flags & CLUSTER_NODE_MYSELF) *flags |= REDISMODULE_NODE_MYSELF;
+ if (node->flags & CLUSTER_NODE_MASTER) *flags |= REDISMODULE_NODE_MASTER;
+ if (node->flags & CLUSTER_NODE_SLAVE) *flags |= REDISMODULE_NODE_SLAVE;
+ if (node->flags & CLUSTER_NODE_PFAIL) *flags |= REDISMODULE_NODE_PFAIL;
+ if (node->flags & CLUSTER_NODE_FAIL) *flags |= REDISMODULE_NODE_FAIL;
+ if (node->flags & CLUSTER_NODE_NOFAILOVER) *flags |= REDISMODULE_NODE_NOFAILOVER;
+ }
+ return REDISMODULE_OK;
+}
+
+/* Set Redis Cluster flags in order to change the normal behavior of
+ * Redis Cluster, especially with the goal of disabling certain functions.
+ * This is useful for modules that use the Cluster API in order to create
+ * a different distributed system, but still want to use the Redis Cluster
+ * message bus. Flags that can be set:
+ *
+ * * CLUSTER_MODULE_FLAG_NO_FAILOVER
+ * * CLUSTER_MODULE_FLAG_NO_REDIRECTION
+ *
+ * With the following effects:
+ *
+ * * NO_FAILOVER: prevent Redis Cluster slaves from failing over a dead master.
+ * Also disables the replica migration feature.
+ *
+ * * NO_REDIRECTION: Every node will accept any key, without trying to perform
+ * partitioning according to the Redis Cluster algorithm.
+ * Slots information will still be propagated across the
+ * cluster, but without effect. */
+void RM_SetClusterFlags(RedisModuleCtx *ctx, uint64_t flags) {
+ UNUSED(ctx);
+ if (flags & REDISMODULE_CLUSTER_FLAG_NO_FAILOVER)
+ server.cluster_module_flags |= CLUSTER_MODULE_FLAG_NO_FAILOVER;
+ if (flags & REDISMODULE_CLUSTER_FLAG_NO_REDIRECTION)
+ server.cluster_module_flags |= CLUSTER_MODULE_FLAG_NO_REDIRECTION;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Modules Timers API
+ *
+ * Module timers are a high precision "green timers" abstraction where
+ * every module can register even millions of timers without problems, even if
+ * the actual event loop will just have a single timer that is used to awake the
+ * module timers subsystem in order to process the next event.
+ *
+ * All the timers are stored into a radix tree, ordered by expire time, when
+ * the main Redis event loop timer callback is called, we try to process all
+ * the timers already expired one after the other. Then we re-enter the event
+ * loop registering a timer that will expire when the next to process module
+ * timer will expire.
+ *
+ * Every time the list of active timers drops to zero, we unregister the
+ * main event loop timer, so that there is no overhead when such feature is
+ * not used.
+ * -------------------------------------------------------------------------- */
+
+static rax *Timers; /* The radix tree of all the timers sorted by expire. */
+long long aeTimer = -1; /* Main event loop (ae.c) timer identifier. */
+
+typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data);
+
+/* The timer descriptor, stored as value in the radix tree. */
+typedef struct RedisModuleTimer {
+ RedisModule *module; /* Module reference. */
+ RedisModuleTimerProc callback; /* The callback to invoke on expire. */
+ void *data; /* Private data for the callback. */
+ int dbid; /* Database number selected by the original client. */
+} RedisModuleTimer;
+
+/* This is the timer handler that is called by the main event loop. We schedule
+ * this timer to be called when the nearest of our module timers will expire. */
+int moduleTimerHandler(struct aeEventLoop *eventLoop, long long id, void *clientData) {
+ UNUSED(eventLoop);
+ UNUSED(id);
+ UNUSED(clientData);
+
+ /* To start let's try to fire all the timers already expired. */
+ raxIterator ri;
+ raxStart(&ri,Timers);
+ uint64_t now = ustime();
+ long long next_period = 0;
+ while(1) {
+ raxSeek(&ri,"^",NULL,0);
+ if (!raxNext(&ri)) break;
+ uint64_t expiretime;
+ memcpy(&expiretime,ri.key,sizeof(expiretime));
+ expiretime = ntohu64(expiretime);
+ if (now >= expiretime) {
+ RedisModuleTimer *timer = ri.data;
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx,timer->module,REDISMODULE_CTX_TEMP_CLIENT);
+ selectDb(ctx.client, timer->dbid);
+ timer->callback(&ctx,timer->data);
+ moduleFreeContext(&ctx);
+ raxRemove(Timers,(unsigned char*)ri.key,ri.key_len,NULL);
+ zfree(timer);
+ } else {
+ /* We call ustime() again instead of using the cached 'now' so that
+ * 'next_period' isn't affected by the time it took to execute
+ * previous calls to 'callback.
+ * We need to cast 'expiretime' so that the compiler will not treat
+ * the difference as unsigned (Causing next_period to be huge) in
+ * case expiretime < ustime() */
+ next_period = ((long long)expiretime-ustime())/1000; /* Scale to milliseconds. */
+ break;
+ }
+ }
+ raxStop(&ri);
+
+ /* Reschedule the next timer or cancel it. */
+ if (next_period <= 0) next_period = 1;
+ if (raxSize(Timers) > 0) {
+ return next_period;
+ } else {
+ aeTimer = -1;
+ return AE_NOMORE;
+ }
+}
+
+/* Create a new timer that will fire after `period` milliseconds, and will call
+ * the specified function using `data` as argument. The returned timer ID can be
+ * used to get information from the timer or to stop it before it fires.
+ * Note that for the common use case of a repeating timer (Re-registration
+ * of the timer inside the RedisModuleTimerProc callback) it matters when
+ * this API is called:
+ * If it is called at the beginning of 'callback' it means
+ * the event will triggered every 'period'.
+ * If it is called at the end of 'callback' it means
+ * there will 'period' milliseconds gaps between events.
+ * (If the time it takes to execute 'callback' is negligible the two
+ * statements above mean the same) */
+RedisModuleTimerID RM_CreateTimer(RedisModuleCtx *ctx, mstime_t period, RedisModuleTimerProc callback, void *data) {
+ RedisModuleTimer *timer = zmalloc(sizeof(*timer));
+ timer->module = ctx->module;
+ timer->callback = callback;
+ timer->data = data;
+ timer->dbid = ctx->client ? ctx->client->db->id : 0;
+ uint64_t expiretime = ustime()+period*1000;
+ uint64_t key;
+
+ while(1) {
+ key = htonu64(expiretime);
+ if (raxFind(Timers, (unsigned char*)&key,sizeof(key)) == raxNotFound) {
+ raxInsert(Timers,(unsigned char*)&key,sizeof(key),timer,NULL);
+ break;
+ } else {
+ expiretime++;
+ }
+ }
+
+ /* We need to install the main event loop timer if it's not already
+ * installed, or we may need to refresh its period if we just installed
+ * a timer that will expire sooner than any other else (i.e. the timer
+ * we just installed is the first timer in the Timers rax). */
+ if (aeTimer != -1) {
+ raxIterator ri;
+ raxStart(&ri,Timers);
+ raxSeek(&ri,"^",NULL,0);
+ raxNext(&ri);
+ if (memcmp(ri.key,&key,sizeof(key)) == 0) {
+ /* This is the first key, we need to re-install the timer according
+ * to the just added event. */
+ aeDeleteTimeEvent(server.el,aeTimer);
+ aeTimer = -1;
+ }
+ raxStop(&ri);
+ }
+
+ /* If we have no main timer (the old one was invalidated, or this is the
+ * first module timer we have), install one. */
+ if (aeTimer == -1)
+ aeTimer = aeCreateTimeEvent(server.el,period,moduleTimerHandler,NULL,NULL);
+
+ return key;
+}
+
+/* Stop a timer, returns REDISMODULE_OK if the timer was found, belonged to the
+ * calling module, and was stopped, otherwise REDISMODULE_ERR is returned.
+ * If not NULL, the data pointer is set to the value of the data argument when
+ * the timer was created. */
+int RM_StopTimer(RedisModuleCtx *ctx, RedisModuleTimerID id, void **data) {
+ RedisModuleTimer *timer = raxFind(Timers,(unsigned char*)&id,sizeof(id));
+ if (timer == raxNotFound || timer->module != ctx->module)
+ return REDISMODULE_ERR;
+ if (data) *data = timer->data;
+ raxRemove(Timers,(unsigned char*)&id,sizeof(id),NULL);
+ zfree(timer);
+ return REDISMODULE_OK;
+}
+
+/* Obtain information about a timer: its remaining time before firing
+ * (in milliseconds), and the private data pointer associated with the timer.
+ * If the timer specified does not exist or belongs to a different module
+ * no information is returned and the function returns REDISMODULE_ERR, otherwise
+ * REDISMODULE_OK is returned. The arguments remaining or data can be NULL if
+ * the caller does not need certain information. */
+int RM_GetTimerInfo(RedisModuleCtx *ctx, RedisModuleTimerID id, uint64_t *remaining, void **data) {
+ RedisModuleTimer *timer = raxFind(Timers,(unsigned char*)&id,sizeof(id));
+ if (timer == raxNotFound || timer->module != ctx->module)
+ return REDISMODULE_ERR;
+ if (remaining) {
+ int64_t rem = ntohu64(id)-ustime();
+ if (rem < 0) rem = 0;
+ *remaining = rem/1000; /* Scale to milliseconds. */
+ }
+ if (data) *data = timer->data;
+ return REDISMODULE_OK;
+}
+
+/* Query timers to see if any timer belongs to the module.
+ * Return 1 if any timer was found, otherwise 0 would be returned. */
+int moduleHoldsTimer(struct RedisModule *module) {
+ raxIterator iter;
+ int found = 0;
+ raxStart(&iter,Timers);
+ raxSeek(&iter,"^",NULL,0);
+ while (raxNext(&iter)) {
+ RedisModuleTimer *timer = iter.data;
+ if (timer->module == module) {
+ found = 1;
+ break;
+ }
+ }
+ raxStop(&iter);
+ return found;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Modules EventLoop API
+ * --------------------------------------------------------------------------*/
+
+typedef struct EventLoopData {
+ RedisModuleEventLoopFunc rFunc;
+ RedisModuleEventLoopFunc wFunc;
+ void *user_data;
+} EventLoopData;
+
+typedef struct EventLoopOneShot {
+ RedisModuleEventLoopOneShotFunc func;
+ void *user_data;
+} EventLoopOneShot;
+
+list *moduleEventLoopOneShots;
+static pthread_mutex_t moduleEventLoopMutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int eventLoopToAeMask(int mask) {
+ int aeMask = 0;
+ if (mask & REDISMODULE_EVENTLOOP_READABLE)
+ aeMask |= AE_READABLE;
+ if (mask & REDISMODULE_EVENTLOOP_WRITABLE)
+ aeMask |= AE_WRITABLE;
+ return aeMask;
+}
+
+static int eventLoopFromAeMask(int ae_mask) {
+ int mask = 0;
+ if (ae_mask & AE_READABLE)
+ mask |= REDISMODULE_EVENTLOOP_READABLE;
+ if (ae_mask & AE_WRITABLE)
+ mask |= REDISMODULE_EVENTLOOP_WRITABLE;
+ return mask;
+}
+
+static void eventLoopCbReadable(struct aeEventLoop *ae, int fd, void *user_data, int ae_mask) {
+ UNUSED(ae);
+ EventLoopData *data = user_data;
+ data->rFunc(fd, data->user_data, eventLoopFromAeMask(ae_mask));
+}
+
+static void eventLoopCbWritable(struct aeEventLoop *ae, int fd, void *user_data, int ae_mask) {
+ UNUSED(ae);
+ EventLoopData *data = user_data;
+ data->wFunc(fd, data->user_data, eventLoopFromAeMask(ae_mask));
+}
+
+/* Add a pipe / socket event to the event loop.
+ *
+ * * `mask` must be one of the following values:
+ *
+ * * `REDISMODULE_EVENTLOOP_READABLE`
+ * * `REDISMODULE_EVENTLOOP_WRITABLE`
+ * * `REDISMODULE_EVENTLOOP_READABLE | REDISMODULE_EVENTLOOP_WRITABLE`
+ *
+ * On success REDISMODULE_OK is returned, otherwise
+ * REDISMODULE_ERR is returned and errno is set to the following values:
+ *
+ * * ERANGE: `fd` is negative or higher than `maxclients` Redis config.
+ * * EINVAL: `callback` is NULL or `mask` value is invalid.
+ *
+ * `errno` might take other values in case of an internal error.
+ *
+ * Example:
+ *
+ * void onReadable(int fd, void *user_data, int mask) {
+ * char buf[32];
+ * int bytes = read(fd,buf,sizeof(buf));
+ * printf("Read %d bytes \n", bytes);
+ * }
+ * RM_EventLoopAdd(fd, REDISMODULE_EVENTLOOP_READABLE, onReadable, NULL);
+ */
+int RM_EventLoopAdd(int fd, int mask, RedisModuleEventLoopFunc func, void *user_data) {
+ if (fd < 0 || fd >= aeGetSetSize(server.el)) {
+ errno = ERANGE;
+ return REDISMODULE_ERR;
+ }
+
+ if (!func || mask & ~(REDISMODULE_EVENTLOOP_READABLE |
+ REDISMODULE_EVENTLOOP_WRITABLE)) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+
+ /* We are going to register stub callbacks to 'ae' for two reasons:
+ *
+ * - "ae" callback signature is different from RedisModuleEventLoopCallback,
+ * that will be handled it in our stub callbacks.
+ * - We need to remap 'mask' value to provide binary compatibility.
+ *
+ * For the stub callbacks, saving user 'callback' and 'user_data' in an
+ * EventLoopData object and passing it to ae, later, we'll extract
+ * 'callback' and 'user_data' from that.
+ */
+ EventLoopData *data = aeGetFileClientData(server.el, fd);
+ if (!data)
+ data = zcalloc(sizeof(*data));
+
+ aeFileProc *aeProc;
+ if (mask & REDISMODULE_EVENTLOOP_READABLE)
+ aeProc = eventLoopCbReadable;
+ else
+ aeProc = eventLoopCbWritable;
+
+ int aeMask = eventLoopToAeMask(mask);
+
+ if (aeCreateFileEvent(server.el, fd, aeMask, aeProc, data) != AE_OK) {
+ if (aeGetFileEvents(server.el, fd) == AE_NONE)
+ zfree(data);
+ return REDISMODULE_ERR;
+ }
+
+ data->user_data = user_data;
+ if (mask & REDISMODULE_EVENTLOOP_READABLE)
+ data->rFunc = func;
+ if (mask & REDISMODULE_EVENTLOOP_WRITABLE)
+ data->wFunc = func;
+
+ errno = 0;
+ return REDISMODULE_OK;
+}
+
+/* Delete a pipe / socket event from the event loop.
+ *
+ * * `mask` must be one of the following values:
+ *
+ * * `REDISMODULE_EVENTLOOP_READABLE`
+ * * `REDISMODULE_EVENTLOOP_WRITABLE`
+ * * `REDISMODULE_EVENTLOOP_READABLE | REDISMODULE_EVENTLOOP_WRITABLE`
+ *
+ * On success REDISMODULE_OK is returned, otherwise
+ * REDISMODULE_ERR is returned and errno is set to the following values:
+ *
+ * * ERANGE: `fd` is negative or higher than `maxclients` Redis config.
+ * * EINVAL: `mask` value is invalid.
+ */
+int RM_EventLoopDel(int fd, int mask) {
+ if (fd < 0 || fd >= aeGetSetSize(server.el)) {
+ errno = ERANGE;
+ return REDISMODULE_ERR;
+ }
+
+ if (mask & ~(REDISMODULE_EVENTLOOP_READABLE |
+ REDISMODULE_EVENTLOOP_WRITABLE)) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+
+ /* After deleting the event, if fd does not have any registered event
+ * anymore, we can free the EventLoopData object. */
+ EventLoopData *data = aeGetFileClientData(server.el, fd);
+ aeDeleteFileEvent(server.el, fd, eventLoopToAeMask(mask));
+ if (aeGetFileEvents(server.el, fd) == AE_NONE)
+ zfree(data);
+
+ errno = 0;
+ return REDISMODULE_OK;
+}
+
+/* This function can be called from other threads to trigger callback on Redis
+ * main thread. On success REDISMODULE_OK is returned. If `func` is NULL
+ * REDISMODULE_ERR is returned and errno is set to EINVAL.
+ */
+int RM_EventLoopAddOneShot(RedisModuleEventLoopOneShotFunc func, void *user_data) {
+ if (!func) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+
+ EventLoopOneShot *oneshot = zmalloc(sizeof(*oneshot));
+ oneshot->func = func;
+ oneshot->user_data = user_data;
+
+ pthread_mutex_lock(&moduleEventLoopMutex);
+ if (!moduleEventLoopOneShots) moduleEventLoopOneShots = listCreate();
+ listAddNodeTail(moduleEventLoopOneShots, oneshot);
+ pthread_mutex_unlock(&moduleEventLoopMutex);
+
+ if (write(server.module_pipe[1],"A",1) != 1) {
+ /* Pipe is non-blocking, write() may fail if it's full. */
+ }
+
+ errno = 0;
+ return REDISMODULE_OK;
+}
+
+/* This function will check the moduleEventLoopOneShots queue in order to
+ * call the callback for the registered oneshot events. */
+static void eventLoopHandleOneShotEvents(void) {
+ pthread_mutex_lock(&moduleEventLoopMutex);
+ if (moduleEventLoopOneShots) {
+ while (listLength(moduleEventLoopOneShots)) {
+ listNode *ln = listFirst(moduleEventLoopOneShots);
+ EventLoopOneShot *oneshot = ln->value;
+ listDelNode(moduleEventLoopOneShots, ln);
+ /* Unlock mutex before the callback. Another oneshot event can be
+ * added in the callback, it will need to lock the mutex. */
+ pthread_mutex_unlock(&moduleEventLoopMutex);
+ oneshot->func(oneshot->user_data);
+ zfree(oneshot);
+ /* Lock again for the next iteration */
+ pthread_mutex_lock(&moduleEventLoopMutex);
+ }
+ }
+ pthread_mutex_unlock(&moduleEventLoopMutex);
+}
+
+/* --------------------------------------------------------------------------
+ * ## Modules ACL API
+ *
+ * Implements a hook into the authentication and authorization within Redis.
+ * --------------------------------------------------------------------------*/
+
+/* This function is called when a client's user has changed and invokes the
+ * client's user changed callback if it was set. This callback should
+ * cleanup any state the module was tracking about this client.
+ *
+ * A client's user can be changed through the AUTH command, module
+ * authentication, and when a client is freed. */
+void moduleNotifyUserChanged(client *c) {
+ if (c->auth_callback) {
+ c->auth_callback(c->id, c->auth_callback_privdata);
+
+ /* The callback will fire exactly once, even if the user remains
+ * the same. It is expected to completely clean up the state
+ * so all references are cleared here. */
+ c->auth_callback = NULL;
+ c->auth_callback_privdata = NULL;
+ c->auth_module = NULL;
+ }
+}
+
+void revokeClientAuthentication(client *c) {
+ /* Freeing the client would result in moduleNotifyUserChanged() to be
+ * called later, however since we use revokeClientAuthentication() also
+ * in moduleFreeAuthenticatedClients() to implement module unloading, we
+ * do this action ASAP: this way if the module is unloaded, when the client
+ * is eventually freed we don't rely on the module to still exist. */
+ moduleNotifyUserChanged(c);
+
+ c->user = DefaultUser;
+ c->authenticated = 0;
+ /* We will write replies to this client later, so we can't close it
+ * directly even if async. */
+ if (c == server.current_client) {
+ c->flags |= CLIENT_CLOSE_AFTER_COMMAND;
+ } else {
+ freeClientAsync(c);
+ }
+}
+
+/* Cleanup all clients that have been authenticated with this module. This
+ * is called from onUnload() to give the module a chance to cleanup any
+ * resources associated with clients it has authenticated. */
+static void moduleFreeAuthenticatedClients(RedisModule *module) {
+ listIter li;
+ listNode *ln;
+ listRewind(server.clients,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *c = listNodeValue(ln);
+ if (!c->auth_module) continue;
+
+ RedisModule *auth_module = (RedisModule *) c->auth_module;
+ if (auth_module == module) {
+ revokeClientAuthentication(c);
+ }
+ }
+}
+
+/* Creates a Redis ACL user that the module can use to authenticate a client.
+ * After obtaining the user, the module should set what such user can do
+ * using the RM_SetUserACL() function. Once configured, the user
+ * can be used in order to authenticate a connection, with the specified
+ * ACL rules, using the RedisModule_AuthClientWithUser() function.
+ *
+ * Note that:
+ *
+ * * Users created here are not listed by the ACL command.
+ * * Users created here are not checked for duplicated name, so it's up to
+ * the module calling this function to take care of not creating users
+ * with the same name.
+ * * The created user can be used to authenticate multiple Redis connections.
+ *
+ * The caller can later free the user using the function
+ * RM_FreeModuleUser(). When this function is called, if there are
+ * still clients authenticated with this user, they are disconnected.
+ * The function to free the user should only be used when the caller really
+ * wants to invalidate the user to define a new one with different
+ * capabilities. */
+RedisModuleUser *RM_CreateModuleUser(const char *name) {
+ RedisModuleUser *new_user = zmalloc(sizeof(RedisModuleUser));
+ new_user->user = ACLCreateUnlinkedUser();
+ new_user->free_user = 1;
+
+ /* Free the previous temporarily assigned name to assign the new one */
+ sdsfree(new_user->user->name);
+ new_user->user->name = sdsnew(name);
+ return new_user;
+}
+
+/* Frees a given user and disconnects all of the clients that have been
+ * authenticated with it. See RM_CreateModuleUser for detailed usage.*/
+int RM_FreeModuleUser(RedisModuleUser *user) {
+ if (user->free_user)
+ ACLFreeUserAndKillClients(user->user);
+ zfree(user);
+ return REDISMODULE_OK;
+}
+
+/* Sets the permissions of a user created through the redis module
+ * interface. The syntax is the same as ACL SETUSER, so refer to the
+ * documentation in acl.c for more information. See RM_CreateModuleUser
+ * for detailed usage.
+ *
+ * Returns REDISMODULE_OK on success and REDISMODULE_ERR on failure
+ * and will set an errno describing why the operation failed. */
+int RM_SetModuleUserACL(RedisModuleUser *user, const char* acl) {
+ return ACLSetUser(user->user, acl, -1);
+}
+
+/* Sets the permission of a user with a complete ACL string, such as one
+ * would use on the redis ACL SETUSER command line API. This differs from
+ * RM_SetModuleUserACL, which only takes single ACL operations at a time.
+ *
+ * Returns REDISMODULE_OK on success and REDISMODULE_ERR on failure
+ * if a RedisModuleString is provided in error, a string describing the error
+ * will be returned */
+int RM_SetModuleUserACLString(RedisModuleCtx *ctx, RedisModuleUser *user, const char *acl, RedisModuleString **error) {
+ serverAssert(user != NULL);
+
+ int argc;
+ sds *argv = sdssplitargs(acl, &argc);
+
+ sds err = ACLStringSetUser(user->user, NULL, argv, argc);
+
+ sdsfreesplitres(argv, argc);
+
+ if (err) {
+ if (error) {
+ *error = createObject(OBJ_STRING, err);
+ if (ctx != NULL) autoMemoryAdd(ctx, REDISMODULE_AM_STRING, *error);
+ } else {
+ sdsfree(err);
+ }
+
+ return REDISMODULE_ERR;
+ }
+
+ return REDISMODULE_OK;
+}
+
+/* Get the ACL string for a given user
+ * Returns a RedisModuleString
+ */
+RedisModuleString *RM_GetModuleUserACLString(RedisModuleUser *user) {
+ serverAssert(user != NULL);
+
+ return ACLDescribeUser(user->user);
+}
+
+/* Retrieve the user name of the client connection behind the current context.
+ * The user name can be used later, in order to get a RedisModuleUser.
+ * See more information in RM_GetModuleUserFromUserName.
+ *
+ * The returned string must be released with RedisModule_FreeString() or by
+ * enabling automatic memory management. */
+RedisModuleString *RM_GetCurrentUserName(RedisModuleCtx *ctx) {
+ return RM_CreateString(ctx,ctx->client->user->name,sdslen(ctx->client->user->name));
+}
+
+/* A RedisModuleUser can be used to check if command, key or channel can be executed or
+ * accessed according to the ACLs rules associated with that user.
+ * When a Module wants to do ACL checks on a general ACL user (not created by RM_CreateModuleUser),
+ * it can get the RedisModuleUser from this API, based on the user name retrieved by RM_GetCurrentUserName.
+ *
+ * Since a general ACL user can be deleted at any time, this RedisModuleUser should be used only in the context
+ * where this function was called. In order to do ACL checks out of that context, the Module can store the user name,
+ * and call this API at any other context.
+ *
+ * Returns NULL if the user is disabled or the user does not exist.
+ * The caller should later free the user using the function RM_FreeModuleUser().*/
+RedisModuleUser *RM_GetModuleUserFromUserName(RedisModuleString *name) {
+ /* First, verify that the user exist */
+ user *acl_user = ACLGetUserByName(name->ptr, sdslen(name->ptr));
+ if (acl_user == NULL) {
+ return NULL;
+ }
+
+ RedisModuleUser *new_user = zmalloc(sizeof(RedisModuleUser));
+ new_user->user = acl_user;
+ new_user->free_user = 0;
+ return new_user;
+}
+
+/* Checks if the command can be executed by the user, according to the ACLs associated with it.
+ *
+ * On success a REDISMODULE_OK is returned, otherwise
+ * REDISMODULE_ERR is returned and errno is set to the following values:
+ *
+ * * ENOENT: Specified command does not exist.
+ * * EACCES: Command cannot be executed, according to ACL rules
+ */
+int RM_ACLCheckCommandPermissions(RedisModuleUser *user, RedisModuleString **argv, int argc) {
+ int keyidxptr;
+ struct redisCommand *cmd;
+
+ /* Find command */
+ if ((cmd = lookupCommand(argv, argc)) == NULL) {
+ errno = ENOENT;
+ return REDISMODULE_ERR;
+ }
+
+ if (ACLCheckAllUserCommandPerm(user->user, cmd, argv, argc, &keyidxptr) != ACL_OK) {
+ errno = EACCES;
+ return REDISMODULE_ERR;
+ }
+
+ return REDISMODULE_OK;
+}
+
+/* Check if the key can be accessed by the user according to the ACLs attached to the user
+ * and the flags representing the key access. The flags are the same that are used in the
+ * keyspec for logical operations. These flags are documented in RedisModule_SetCommandInfo as
+ * the REDISMODULE_CMD_KEY_ACCESS, REDISMODULE_CMD_KEY_UPDATE, REDISMODULE_CMD_KEY_INSERT,
+ * and REDISMODULE_CMD_KEY_DELETE flags.
+ *
+ * If no flags are supplied, the user is still required to have some access to the key for
+ * this command to return successfully.
+ *
+ * If the user is able to access the key then REDISMODULE_OK is returned, otherwise
+ * REDISMODULE_ERR is returned and errno is set to one of the following values:
+ *
+ * * EINVAL: The provided flags are invalid.
+ * * EACCESS: The user does not have permission to access the key.
+ */
+int RM_ACLCheckKeyPermissions(RedisModuleUser *user, RedisModuleString *key, int flags) {
+ const int allow_mask = (REDISMODULE_CMD_KEY_ACCESS
+ | REDISMODULE_CMD_KEY_INSERT
+ | REDISMODULE_CMD_KEY_DELETE
+ | REDISMODULE_CMD_KEY_UPDATE);
+
+ if ((flags & allow_mask) != flags) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+
+ int keyspec_flags = moduleConvertKeySpecsFlags(flags, 0);
+ if (ACLUserCheckKeyPerm(user->user, key->ptr, sdslen(key->ptr), keyspec_flags) != ACL_OK) {
+ errno = EACCES;
+ return REDISMODULE_ERR;
+ }
+
+ return REDISMODULE_OK;
+}
+
+/* Check if the pubsub channel can be accessed by the user based off of the given
+ * access flags. See RM_ChannelAtPosWithFlags for more information about the
+ * possible flags that can be passed in.
+ *
+ * If the user is able to access the pubsub channel then REDISMODULE_OK is returned, otherwise
+ * REDISMODULE_ERR is returned and errno is set to one of the following values:
+ *
+ * * EINVAL: The provided flags are invalid.
+ * * EACCESS: The user does not have permission to access the pubsub channel.
+ */
+int RM_ACLCheckChannelPermissions(RedisModuleUser *user, RedisModuleString *ch, int flags) {
+ const int allow_mask = (REDISMODULE_CMD_CHANNEL_PUBLISH
+ | REDISMODULE_CMD_CHANNEL_SUBSCRIBE
+ | REDISMODULE_CMD_CHANNEL_UNSUBSCRIBE
+ | REDISMODULE_CMD_CHANNEL_PATTERN);
+
+ if ((flags & allow_mask) != flags) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+
+ /* Unsubscribe permissions are currently always allowed. */
+ if (flags & REDISMODULE_CMD_CHANNEL_UNSUBSCRIBE){
+ return REDISMODULE_OK;
+ }
+
+ int is_pattern = flags & REDISMODULE_CMD_CHANNEL_PATTERN;
+ if (ACLUserCheckChannelPerm(user->user, ch->ptr, is_pattern) != ACL_OK)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
+
+/* Helper function to map a RedisModuleACLLogEntryReason to ACL Log entry reason. */
+int moduleGetACLLogEntryReason(RedisModuleACLLogEntryReason reason) {
+ int acl_reason = 0;
+ switch (reason) {
+ case REDISMODULE_ACL_LOG_AUTH: acl_reason = ACL_DENIED_AUTH; break;
+ case REDISMODULE_ACL_LOG_KEY: acl_reason = ACL_DENIED_KEY; break;
+ case REDISMODULE_ACL_LOG_CHANNEL: acl_reason = ACL_DENIED_CHANNEL; break;
+ case REDISMODULE_ACL_LOG_CMD: acl_reason = ACL_DENIED_CMD; break;
+ default: break;
+ }
+ return acl_reason;
+}
+
+/* Adds a new entry in the ACL log.
+ * Returns REDISMODULE_OK on success and REDISMODULE_ERR on error.
+ *
+ * For more information about ACL log, please refer to https://redis.io/commands/acl-log */
+int RM_ACLAddLogEntry(RedisModuleCtx *ctx, RedisModuleUser *user, RedisModuleString *object, RedisModuleACLLogEntryReason reason) {
+ int acl_reason = moduleGetACLLogEntryReason(reason);
+ if (!acl_reason) return REDISMODULE_ERR;
+ addACLLogEntry(ctx->client, acl_reason, ACL_LOG_CTX_MODULE, -1, user->user->name, sdsdup(object->ptr));
+ return REDISMODULE_OK;
+}
+
+/* Adds a new entry in the ACL log with the `username` RedisModuleString provided.
+ * Returns REDISMODULE_OK on success and REDISMODULE_ERR on error.
+ *
+ * For more information about ACL log, please refer to https://redis.io/commands/acl-log */
+int RM_ACLAddLogEntryByUserName(RedisModuleCtx *ctx, RedisModuleString *username, RedisModuleString *object, RedisModuleACLLogEntryReason reason) {
+ int acl_reason = moduleGetACLLogEntryReason(reason);
+ if (!acl_reason) return REDISMODULE_ERR;
+ addACLLogEntry(ctx->client, acl_reason, ACL_LOG_CTX_MODULE, -1, username->ptr, sdsdup(object->ptr));
+ return REDISMODULE_OK;
+}
+
+/* Authenticate the client associated with the context with
+ * the provided user. Returns REDISMODULE_OK on success and
+ * REDISMODULE_ERR on error.
+ *
+ * This authentication can be tracked with the optional callback and private
+ * data fields. The callback will be called whenever the user of the client
+ * changes. This callback should be used to cleanup any state that is being
+ * kept in the module related to the client authentication. It will only be
+ * called once, even when the user hasn't changed, in order to allow for a
+ * new callback to be specified. If this authentication does not need to be
+ * tracked, pass in NULL for the callback and privdata.
+ *
+ * If client_id is not NULL, it will be filled with the id of the client
+ * that was authenticated. This can be used with the
+ * RM_DeauthenticateAndCloseClient() API in order to deauthenticate a
+ * previously authenticated client if the authentication is no longer valid.
+ *
+ * For expensive authentication operations, it is recommended to block the
+ * client and do the authentication in the background and then attach the user
+ * to the client in a threadsafe context. */
+static int authenticateClientWithUser(RedisModuleCtx *ctx, user *user, RedisModuleUserChangedFunc callback, void *privdata, uint64_t *client_id) {
+ if (user->flags & USER_FLAG_DISABLED) {
+ return REDISMODULE_ERR;
+ }
+
+ /* Avoid settings which are meaningless and will be lost */
+ if (!ctx->client || (ctx->client->flags & CLIENT_MODULE)) {
+ return REDISMODULE_ERR;
+ }
+
+ moduleNotifyUserChanged(ctx->client);
+
+ ctx->client->user = user;
+ ctx->client->authenticated = 1;
+
+ if (clientHasModuleAuthInProgress(ctx->client)) {
+ ctx->client->flags |= CLIENT_MODULE_AUTH_HAS_RESULT;
+ }
+
+ if (callback) {
+ ctx->client->auth_callback = callback;
+ ctx->client->auth_callback_privdata = privdata;
+ ctx->client->auth_module = ctx->module;
+ }
+
+ if (client_id) {
+ *client_id = ctx->client->id;
+ }
+
+ return REDISMODULE_OK;
+}
+
+
+/* Authenticate the current context's user with the provided redis acl user.
+ * Returns REDISMODULE_ERR if the user is disabled.
+ *
+ * See authenticateClientWithUser for information about callback, client_id,
+ * and general usage for authentication. */
+int RM_AuthenticateClientWithUser(RedisModuleCtx *ctx, RedisModuleUser *module_user, RedisModuleUserChangedFunc callback, void *privdata, uint64_t *client_id) {
+ return authenticateClientWithUser(ctx, module_user->user, callback, privdata, client_id);
+}
+
+/* Authenticate the current context's user with the provided redis acl user.
+ * Returns REDISMODULE_ERR if the user is disabled or the user does not exist.
+ *
+ * See authenticateClientWithUser for information about callback, client_id,
+ * and general usage for authentication. */
+int RM_AuthenticateClientWithACLUser(RedisModuleCtx *ctx, const char *name, size_t len, RedisModuleUserChangedFunc callback, void *privdata, uint64_t *client_id) {
+ user *acl_user = ACLGetUserByName(name, len);
+
+ if (!acl_user) {
+ return REDISMODULE_ERR;
+ }
+ return authenticateClientWithUser(ctx, acl_user, callback, privdata, client_id);
+}
+
+/* Deauthenticate and close the client. The client resources will not be
+ * immediately freed, but will be cleaned up in a background job. This is
+ * the recommended way to deauthenticate a client since most clients can't
+ * handle users becoming deauthenticated. Returns REDISMODULE_ERR when the
+ * client doesn't exist and REDISMODULE_OK when the operation was successful.
+ *
+ * The client ID is returned from the RM_AuthenticateClientWithUser and
+ * RM_AuthenticateClientWithACLUser APIs, but can be obtained through
+ * the CLIENT api or through server events.
+ *
+ * This function is not thread safe, and must be executed within the context
+ * of a command or thread safe context. */
+int RM_DeauthenticateAndCloseClient(RedisModuleCtx *ctx, uint64_t client_id) {
+ UNUSED(ctx);
+ client *c = lookupClientByID(client_id);
+ if (c == NULL) return REDISMODULE_ERR;
+
+ /* Revoke also marks client to be closed ASAP */
+ revokeClientAuthentication(c);
+ return REDISMODULE_OK;
+}
+
+/* Redact the client command argument specified at the given position. Redacted arguments
+ * are obfuscated in user facing commands such as SLOWLOG or MONITOR, as well as
+ * never being written to server logs. This command may be called multiple times on the
+ * same position.
+ *
+ * Note that the command name, position 0, can not be redacted.
+ *
+ * Returns REDISMODULE_OK if the argument was redacted and REDISMODULE_ERR if there
+ * was an invalid parameter passed in or the position is outside the client
+ * argument range. */
+int RM_RedactClientCommandArgument(RedisModuleCtx *ctx, int pos) {
+ if (!ctx || !ctx->client || pos <= 0 || ctx->client->argc <= pos) {
+ return REDISMODULE_ERR;
+ }
+ redactClientCommandArgument(ctx->client, pos);
+ return REDISMODULE_OK;
+}
+
+/* Return the X.509 client-side certificate used by the client to authenticate
+ * this connection.
+ *
+ * The return value is an allocated RedisModuleString that is a X.509 certificate
+ * encoded in PEM (Base64) format. It should be freed (or auto-freed) by the caller.
+ *
+ * A NULL value is returned in the following conditions:
+ *
+ * - Connection ID does not exist
+ * - Connection is not a TLS connection
+ * - Connection is a TLS connection but no client certificate was used
+ */
+RedisModuleString *RM_GetClientCertificate(RedisModuleCtx *ctx, uint64_t client_id) {
+ client *c = lookupClientByID(client_id);
+ if (c == NULL) return NULL;
+
+ sds cert = connGetPeerCert(c->conn);
+ if (!cert) return NULL;
+
+ RedisModuleString *s = createObject(OBJ_STRING, cert);
+ if (ctx != NULL) autoMemoryAdd(ctx, REDISMODULE_AM_STRING, s);
+
+ return s;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Modules Dictionary API
+ *
+ * Implements a sorted dictionary (actually backed by a radix tree) with
+ * the usual get / set / del / num-items API, together with an iterator
+ * capable of going back and forth.
+ * -------------------------------------------------------------------------- */
+
+/* Create a new dictionary. The 'ctx' pointer can be the current module context
+ * or NULL, depending on what you want. Please follow the following rules:
+ *
+ * 1. Use a NULL context if you plan to retain a reference to this dictionary
+ * that will survive the time of the module callback where you created it.
+ * 2. Use a NULL context if no context is available at the time you are creating
+ * the dictionary (of course...).
+ * 3. However use the current callback context as 'ctx' argument if the
+ * dictionary time to live is just limited to the callback scope. In this
+ * case, if enabled, you can enjoy the automatic memory management that will
+ * reclaim the dictionary memory, as well as the strings returned by the
+ * Next / Prev dictionary iterator calls.
+ */
+RedisModuleDict *RM_CreateDict(RedisModuleCtx *ctx) {
+ struct RedisModuleDict *d = zmalloc(sizeof(*d));
+ d->rax = raxNew();
+ if (ctx != NULL) autoMemoryAdd(ctx,REDISMODULE_AM_DICT,d);
+ return d;
+}
+
+/* Free a dictionary created with RM_CreateDict(). You need to pass the
+ * context pointer 'ctx' only if the dictionary was created using the
+ * context instead of passing NULL. */
+void RM_FreeDict(RedisModuleCtx *ctx, RedisModuleDict *d) {
+ if (ctx != NULL) autoMemoryFreed(ctx,REDISMODULE_AM_DICT,d);
+ raxFree(d->rax);
+ zfree(d);
+}
+
+/* Return the size of the dictionary (number of keys). */
+uint64_t RM_DictSize(RedisModuleDict *d) {
+ return raxSize(d->rax);
+}
+
+/* Store the specified key into the dictionary, setting its value to the
+ * pointer 'ptr'. If the key was added with success, since it did not
+ * already exist, REDISMODULE_OK is returned. Otherwise if the key already
+ * exists the function returns REDISMODULE_ERR. */
+int RM_DictSetC(RedisModuleDict *d, void *key, size_t keylen, void *ptr) {
+ int retval = raxTryInsert(d->rax,key,keylen,ptr,NULL);
+ return (retval == 1) ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Like RedisModule_DictSetC() but will replace the key with the new
+ * value if the key already exists. */
+int RM_DictReplaceC(RedisModuleDict *d, void *key, size_t keylen, void *ptr) {
+ int retval = raxInsert(d->rax,key,keylen,ptr,NULL);
+ return (retval == 1) ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Like RedisModule_DictSetC() but takes the key as a RedisModuleString. */
+int RM_DictSet(RedisModuleDict *d, RedisModuleString *key, void *ptr) {
+ return RM_DictSetC(d,key->ptr,sdslen(key->ptr),ptr);
+}
+
+/* Like RedisModule_DictReplaceC() but takes the key as a RedisModuleString. */
+int RM_DictReplace(RedisModuleDict *d, RedisModuleString *key, void *ptr) {
+ return RM_DictReplaceC(d,key->ptr,sdslen(key->ptr),ptr);
+}
+
+/* Return the value stored at the specified key. The function returns NULL
+ * both in the case the key does not exist, or if you actually stored
+ * NULL at key. So, optionally, if the 'nokey' pointer is not NULL, it will
+ * be set by reference to 1 if the key does not exist, or to 0 if the key
+ * exists. */
+void *RM_DictGetC(RedisModuleDict *d, void *key, size_t keylen, int *nokey) {
+ void *res = raxFind(d->rax,key,keylen);
+ if (nokey) *nokey = (res == raxNotFound);
+ return (res == raxNotFound) ? NULL : res;
+}
+
+/* Like RedisModule_DictGetC() but takes the key as a RedisModuleString. */
+void *RM_DictGet(RedisModuleDict *d, RedisModuleString *key, int *nokey) {
+ return RM_DictGetC(d,key->ptr,sdslen(key->ptr),nokey);
+}
+
+/* Remove the specified key from the dictionary, returning REDISMODULE_OK if
+ * the key was found and deleted, or REDISMODULE_ERR if instead there was
+ * no such key in the dictionary. When the operation is successful, if
+ * 'oldval' is not NULL, then '*oldval' is set to the value stored at the
+ * key before it was deleted. Using this feature it is possible to get
+ * a pointer to the value (for instance in order to release it), without
+ * having to call RedisModule_DictGet() before deleting the key. */
+int RM_DictDelC(RedisModuleDict *d, void *key, size_t keylen, void *oldval) {
+ int retval = raxRemove(d->rax,key,keylen,oldval);
+ return retval ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Like RedisModule_DictDelC() but gets the key as a RedisModuleString. */
+int RM_DictDel(RedisModuleDict *d, RedisModuleString *key, void *oldval) {
+ return RM_DictDelC(d,key->ptr,sdslen(key->ptr),oldval);
+}
+
+/* Return an iterator, setup in order to start iterating from the specified
+ * key by applying the operator 'op', which is just a string specifying the
+ * comparison operator to use in order to seek the first element. The
+ * operators available are:
+ *
+ * * `^` -- Seek the first (lexicographically smaller) key.
+ * * `$` -- Seek the last (lexicographically bigger) key.
+ * * `>` -- Seek the first element greater than the specified key.
+ * * `>=` -- Seek the first element greater or equal than the specified key.
+ * * `<` -- Seek the first element smaller than the specified key.
+ * * `<=` -- Seek the first element smaller or equal than the specified key.
+ * * `==` -- Seek the first element matching exactly the specified key.
+ *
+ * Note that for `^` and `$` the passed key is not used, and the user may
+ * just pass NULL with a length of 0.
+ *
+ * If the element to start the iteration cannot be seeked based on the
+ * key and operator passed, RedisModule_DictNext() / Prev() will just return
+ * REDISMODULE_ERR at the first call, otherwise they'll produce elements.
+ */
+RedisModuleDictIter *RM_DictIteratorStartC(RedisModuleDict *d, const char *op, void *key, size_t keylen) {
+ RedisModuleDictIter *di = zmalloc(sizeof(*di));
+ di->dict = d;
+ raxStart(&di->ri,d->rax);
+ raxSeek(&di->ri,op,key,keylen);
+ return di;
+}
+
+/* Exactly like RedisModule_DictIteratorStartC, but the key is passed as a
+ * RedisModuleString. */
+RedisModuleDictIter *RM_DictIteratorStart(RedisModuleDict *d, const char *op, RedisModuleString *key) {
+ return RM_DictIteratorStartC(d,op,key->ptr,sdslen(key->ptr));
+}
+
+/* Release the iterator created with RedisModule_DictIteratorStart(). This call
+ * is mandatory otherwise a memory leak is introduced in the module. */
+void RM_DictIteratorStop(RedisModuleDictIter *di) {
+ raxStop(&di->ri);
+ zfree(di);
+}
+
+/* After its creation with RedisModule_DictIteratorStart(), it is possible to
+ * change the currently selected element of the iterator by using this
+ * API call. The result based on the operator and key is exactly like
+ * the function RedisModule_DictIteratorStart(), however in this case the
+ * return value is just REDISMODULE_OK in case the seeked element was found,
+ * or REDISMODULE_ERR in case it was not possible to seek the specified
+ * element. It is possible to reseek an iterator as many times as you want. */
+int RM_DictIteratorReseekC(RedisModuleDictIter *di, const char *op, void *key, size_t keylen) {
+ return raxSeek(&di->ri,op,key,keylen);
+}
+
+/* Like RedisModule_DictIteratorReseekC() but takes the key as a
+ * RedisModuleString. */
+int RM_DictIteratorReseek(RedisModuleDictIter *di, const char *op, RedisModuleString *key) {
+ return RM_DictIteratorReseekC(di,op,key->ptr,sdslen(key->ptr));
+}
+
+/* Return the current item of the dictionary iterator `di` and steps to the
+ * next element. If the iterator already yield the last element and there
+ * are no other elements to return, NULL is returned, otherwise a pointer
+ * to a string representing the key is provided, and the `*keylen` length
+ * is set by reference (if keylen is not NULL). The `*dataptr`, if not NULL
+ * is set to the value of the pointer stored at the returned key as auxiliary
+ * data (as set by the RedisModule_DictSet API).
+ *
+ * Usage example:
+ *
+ * ... create the iterator here ...
+ * char *key;
+ * void *data;
+ * while((key = RedisModule_DictNextC(iter,&keylen,&data)) != NULL) {
+ * printf("%.*s %p\n", (int)keylen, key, data);
+ * }
+ *
+ * The returned pointer is of type void because sometimes it makes sense
+ * to cast it to a `char*` sometimes to an unsigned `char*` depending on the
+ * fact it contains or not binary data, so this API ends being more
+ * comfortable to use.
+ *
+ * The validity of the returned pointer is until the next call to the
+ * next/prev iterator step. Also the pointer is no longer valid once the
+ * iterator is released. */
+void *RM_DictNextC(RedisModuleDictIter *di, size_t *keylen, void **dataptr) {
+ if (!raxNext(&di->ri)) return NULL;
+ if (keylen) *keylen = di->ri.key_len;
+ if (dataptr) *dataptr = di->ri.data;
+ return di->ri.key;
+}
+
+/* This function is exactly like RedisModule_DictNext() but after returning
+ * the currently selected element in the iterator, it selects the previous
+ * element (lexicographically smaller) instead of the next one. */
+void *RM_DictPrevC(RedisModuleDictIter *di, size_t *keylen, void **dataptr) {
+ if (!raxPrev(&di->ri)) return NULL;
+ if (keylen) *keylen = di->ri.key_len;
+ if (dataptr) *dataptr = di->ri.data;
+ return di->ri.key;
+}
+
+/* Like RedisModuleNextC(), but instead of returning an internally allocated
+ * buffer and key length, it returns directly a module string object allocated
+ * in the specified context 'ctx' (that may be NULL exactly like for the main
+ * API RedisModule_CreateString).
+ *
+ * The returned string object should be deallocated after use, either manually
+ * or by using a context that has automatic memory management active. */
+RedisModuleString *RM_DictNext(RedisModuleCtx *ctx, RedisModuleDictIter *di, void **dataptr) {
+ size_t keylen;
+ void *key = RM_DictNextC(di,&keylen,dataptr);
+ if (key == NULL) return NULL;
+ return RM_CreateString(ctx,key,keylen);
+}
+
+/* Like RedisModule_DictNext() but after returning the currently selected
+ * element in the iterator, it selects the previous element (lexicographically
+ * smaller) instead of the next one. */
+RedisModuleString *RM_DictPrev(RedisModuleCtx *ctx, RedisModuleDictIter *di, void **dataptr) {
+ size_t keylen;
+ void *key = RM_DictPrevC(di,&keylen,dataptr);
+ if (key == NULL) return NULL;
+ return RM_CreateString(ctx,key,keylen);
+}
+
+/* Compare the element currently pointed by the iterator to the specified
+ * element given by key/keylen, according to the operator 'op' (the set of
+ * valid operators are the same valid for RedisModule_DictIteratorStart).
+ * If the comparison is successful the command returns REDISMODULE_OK
+ * otherwise REDISMODULE_ERR is returned.
+ *
+ * This is useful when we want to just emit a lexicographical range, so
+ * in the loop, as we iterate elements, we can also check if we are still
+ * on range.
+ *
+ * The function return REDISMODULE_ERR if the iterator reached the
+ * end of elements condition as well. */
+int RM_DictCompareC(RedisModuleDictIter *di, const char *op, void *key, size_t keylen) {
+ if (raxEOF(&di->ri)) return REDISMODULE_ERR;
+ int res = raxCompare(&di->ri,op,key,keylen);
+ return res ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Like RedisModule_DictCompareC but gets the key to compare with the current
+ * iterator key as a RedisModuleString. */
+int RM_DictCompare(RedisModuleDictIter *di, const char *op, RedisModuleString *key) {
+ if (raxEOF(&di->ri)) return REDISMODULE_ERR;
+ int res = raxCompare(&di->ri,op,key->ptr,sdslen(key->ptr));
+ return res ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+
+
+
+/* --------------------------------------------------------------------------
+ * ## Modules Info fields
+ * -------------------------------------------------------------------------- */
+
+int RM_InfoEndDictField(RedisModuleInfoCtx *ctx);
+
+/* Used to start a new section, before adding any fields. the section name will
+ * be prefixed by `<modulename>_` and must only include A-Z,a-z,0-9.
+ * NULL or empty string indicates the default section (only `<modulename>`) is used.
+ * When return value is REDISMODULE_ERR, the section should and will be skipped. */
+int RM_InfoAddSection(RedisModuleInfoCtx *ctx, const char *name) {
+ sds full_name = sdsdup(ctx->module->name);
+ if (name != NULL && strlen(name) > 0)
+ full_name = sdscatfmt(full_name, "_%s", name);
+
+ /* Implicitly end dicts, instead of returning an error which is likely un checked. */
+ if (ctx->in_dict_field)
+ RM_InfoEndDictField(ctx);
+
+ /* proceed only if:
+ * 1) no section was requested (emit all)
+ * 2) the module name was requested (emit all)
+ * 3) this specific section was requested. */
+ if (ctx->requested_sections) {
+ if ((!full_name || !dictFind(ctx->requested_sections, full_name)) &&
+ (!dictFind(ctx->requested_sections, ctx->module->name)))
+ {
+ sdsfree(full_name);
+ ctx->in_section = 0;
+ return REDISMODULE_ERR;
+ }
+ }
+ if (ctx->sections++) ctx->info = sdscat(ctx->info,"\r\n");
+ ctx->info = sdscatfmt(ctx->info, "# %S\r\n", full_name);
+ ctx->in_section = 1;
+ sdsfree(full_name);
+ return REDISMODULE_OK;
+}
+
+/* Starts a dict field, similar to the ones in INFO KEYSPACE. Use normal
+ * RedisModule_InfoAddField* functions to add the items to this field, and
+ * terminate with RedisModule_InfoEndDictField. */
+int RM_InfoBeginDictField(RedisModuleInfoCtx *ctx, const char *name) {
+ if (!ctx->in_section)
+ return REDISMODULE_ERR;
+ /* Implicitly end dicts, instead of returning an error which is likely un checked. */
+ if (ctx->in_dict_field)
+ RM_InfoEndDictField(ctx);
+ char *tmpmodname, *tmpname;
+ ctx->info = sdscatfmt(ctx->info,
+ "%s_%s:",
+ getSafeInfoString(ctx->module->name, strlen(ctx->module->name), &tmpmodname),
+ getSafeInfoString(name, strlen(name), &tmpname));
+ if (tmpmodname != NULL) zfree(tmpmodname);
+ if (tmpname != NULL) zfree(tmpname);
+ ctx->in_dict_field = 1;
+ return REDISMODULE_OK;
+}
+
+/* Ends a dict field, see RedisModule_InfoBeginDictField */
+int RM_InfoEndDictField(RedisModuleInfoCtx *ctx) {
+ if (!ctx->in_dict_field)
+ return REDISMODULE_ERR;
+ /* trim the last ',' if found. */
+ if (ctx->info[sdslen(ctx->info)-1]==',')
+ sdsIncrLen(ctx->info, -1);
+ ctx->info = sdscat(ctx->info, "\r\n");
+ ctx->in_dict_field = 0;
+ return REDISMODULE_OK;
+}
+
+/* Used by RedisModuleInfoFunc to add info fields.
+ * Each field will be automatically prefixed by `<modulename>_`.
+ * Field names or values must not include `\r\n` or `:`. */
+int RM_InfoAddFieldString(RedisModuleInfoCtx *ctx, const char *field, RedisModuleString *value) {
+ if (!ctx->in_section)
+ return REDISMODULE_ERR;
+ if (ctx->in_dict_field) {
+ ctx->info = sdscatfmt(ctx->info,
+ "%s=%S,",
+ field,
+ (sds)value->ptr);
+ return REDISMODULE_OK;
+ }
+ ctx->info = sdscatfmt(ctx->info,
+ "%s_%s:%S\r\n",
+ ctx->module->name,
+ field,
+ (sds)value->ptr);
+ return REDISMODULE_OK;
+}
+
+/* See RedisModule_InfoAddFieldString(). */
+int RM_InfoAddFieldCString(RedisModuleInfoCtx *ctx, const char *field, const char *value) {
+ if (!ctx->in_section)
+ return REDISMODULE_ERR;
+ if (ctx->in_dict_field) {
+ ctx->info = sdscatfmt(ctx->info,
+ "%s=%s,",
+ field,
+ value);
+ return REDISMODULE_OK;
+ }
+ ctx->info = sdscatfmt(ctx->info,
+ "%s_%s:%s\r\n",
+ ctx->module->name,
+ field,
+ value);
+ return REDISMODULE_OK;
+}
+
+/* See RedisModule_InfoAddFieldString(). */
+int RM_InfoAddFieldDouble(RedisModuleInfoCtx *ctx, const char *field, double value) {
+ if (!ctx->in_section)
+ return REDISMODULE_ERR;
+ if (ctx->in_dict_field) {
+ ctx->info = sdscatprintf(ctx->info,
+ "%s=%.17g,",
+ field,
+ value);
+ return REDISMODULE_OK;
+ }
+ ctx->info = sdscatprintf(ctx->info,
+ "%s_%s:%.17g\r\n",
+ ctx->module->name,
+ field,
+ value);
+ return REDISMODULE_OK;
+}
+
+/* See RedisModule_InfoAddFieldString(). */
+int RM_InfoAddFieldLongLong(RedisModuleInfoCtx *ctx, const char *field, long long value) {
+ if (!ctx->in_section)
+ return REDISMODULE_ERR;
+ if (ctx->in_dict_field) {
+ ctx->info = sdscatfmt(ctx->info,
+ "%s=%I,",
+ field,
+ value);
+ return REDISMODULE_OK;
+ }
+ ctx->info = sdscatfmt(ctx->info,
+ "%s_%s:%I\r\n",
+ ctx->module->name,
+ field,
+ value);
+ return REDISMODULE_OK;
+}
+
+/* See RedisModule_InfoAddFieldString(). */
+int RM_InfoAddFieldULongLong(RedisModuleInfoCtx *ctx, const char *field, unsigned long long value) {
+ if (!ctx->in_section)
+ return REDISMODULE_ERR;
+ if (ctx->in_dict_field) {
+ ctx->info = sdscatfmt(ctx->info,
+ "%s=%U,",
+ field,
+ value);
+ return REDISMODULE_OK;
+ }
+ ctx->info = sdscatfmt(ctx->info,
+ "%s_%s:%U\r\n",
+ ctx->module->name,
+ field,
+ value);
+ return REDISMODULE_OK;
+}
+
+/* Registers callback for the INFO command. The callback should add INFO fields
+ * by calling the `RedisModule_InfoAddField*()` functions. */
+int RM_RegisterInfoFunc(RedisModuleCtx *ctx, RedisModuleInfoFunc cb) {
+ ctx->module->info_cb = cb;
+ return REDISMODULE_OK;
+}
+
+sds modulesCollectInfo(sds info, dict *sections_dict, int for_crash_report, int sections) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ if (!module->info_cb)
+ continue;
+ RedisModuleInfoCtx info_ctx = {module, sections_dict, info, sections, 0, 0};
+ module->info_cb(&info_ctx, for_crash_report);
+ /* Implicitly end dicts (no way to handle errors, and we must add the newline). */
+ if (info_ctx.in_dict_field)
+ RM_InfoEndDictField(&info_ctx);
+ info = info_ctx.info;
+ sections = info_ctx.sections;
+ }
+ dictReleaseIterator(di);
+ return info;
+}
+
+/* Get information about the server similar to the one that returns from the
+ * INFO command. This function takes an optional 'section' argument that may
+ * be NULL. The return value holds the output and can be used with
+ * RedisModule_ServerInfoGetField and alike to get the individual fields.
+ * When done, it needs to be freed with RedisModule_FreeServerInfo or with the
+ * automatic memory management mechanism if enabled. */
+RedisModuleServerInfoData *RM_GetServerInfo(RedisModuleCtx *ctx, const char *section) {
+ struct RedisModuleServerInfoData *d = zmalloc(sizeof(*d));
+ d->rax = raxNew();
+ if (ctx != NULL) autoMemoryAdd(ctx,REDISMODULE_AM_INFO,d);
+ int all = 0, everything = 0;
+ robj *argv[1];
+ argv[0] = section ? createStringObject(section, strlen(section)) : NULL;
+ dict *section_dict = genInfoSectionDict(argv, section ? 1 : 0, NULL, &all, &everything);
+ sds info = genRedisInfoString(section_dict, all, everything);
+ int totlines, i;
+ sds *lines = sdssplitlen(info, sdslen(info), "\r\n", 2, &totlines);
+ for(i=0; i<totlines; i++) {
+ sds line = lines[i];
+ if (line[0]=='#') continue;
+ char *sep = strchr(line, ':');
+ if (!sep) continue;
+ unsigned char *key = (unsigned char*)line;
+ size_t keylen = (intptr_t)sep-(intptr_t)line;
+ sds val = sdsnewlen(sep+1,sdslen(line)-((intptr_t)sep-(intptr_t)line)-1);
+ if (!raxTryInsert(d->rax,key,keylen,val,NULL))
+ sdsfree(val);
+ }
+ sdsfree(info);
+ sdsfreesplitres(lines,totlines);
+ releaseInfoSectionDict(section_dict);
+ if(argv[0]) decrRefCount(argv[0]);
+ return d;
+}
+
+/* Free data created with RM_GetServerInfo(). You need to pass the
+ * context pointer 'ctx' only if the dictionary was created using the
+ * context instead of passing NULL. */
+void RM_FreeServerInfo(RedisModuleCtx *ctx, RedisModuleServerInfoData *data) {
+ if (ctx != NULL) autoMemoryFreed(ctx,REDISMODULE_AM_INFO,data);
+ raxFreeWithCallback(data->rax, (void(*)(void*))sdsfree);
+ zfree(data);
+}
+
+/* Get the value of a field from data collected with RM_GetServerInfo(). You
+ * need to pass the context pointer 'ctx' only if you want to use auto memory
+ * mechanism to release the returned string. Return value will be NULL if the
+ * field was not found. */
+RedisModuleString *RM_ServerInfoGetField(RedisModuleCtx *ctx, RedisModuleServerInfoData *data, const char* field) {
+ sds val = raxFind(data->rax, (unsigned char *)field, strlen(field));
+ if (val == raxNotFound) return NULL;
+ RedisModuleString *o = createStringObject(val,sdslen(val));
+ if (ctx != NULL) autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
+ return o;
+}
+
+/* Similar to RM_ServerInfoGetField, but returns a char* which should not be freed but the caller. */
+const char *RM_ServerInfoGetFieldC(RedisModuleServerInfoData *data, const char* field) {
+ sds val = raxFind(data->rax, (unsigned char *)field, strlen(field));
+ if (val == raxNotFound) return NULL;
+ return val;
+}
+
+/* Get the value of a field from data collected with RM_GetServerInfo(). If the
+ * field is not found, or is not numerical or out of range, return value will be
+ * 0, and the optional out_err argument will be set to REDISMODULE_ERR. */
+long long RM_ServerInfoGetFieldSigned(RedisModuleServerInfoData *data, const char* field, int *out_err) {
+ long long ll;
+ sds val = raxFind(data->rax, (unsigned char *)field, strlen(field));
+ if (val == raxNotFound) {
+ if (out_err) *out_err = REDISMODULE_ERR;
+ return 0;
+ }
+ if (!string2ll(val,sdslen(val),&ll)) {
+ if (out_err) *out_err = REDISMODULE_ERR;
+ return 0;
+ }
+ if (out_err) *out_err = REDISMODULE_OK;
+ return ll;
+}
+
+/* Get the value of a field from data collected with RM_GetServerInfo(). If the
+ * field is not found, or is not numerical or out of range, return value will be
+ * 0, and the optional out_err argument will be set to REDISMODULE_ERR. */
+unsigned long long RM_ServerInfoGetFieldUnsigned(RedisModuleServerInfoData *data, const char* field, int *out_err) {
+ unsigned long long ll;
+ sds val = raxFind(data->rax, (unsigned char *)field, strlen(field));
+ if (val == raxNotFound) {
+ if (out_err) *out_err = REDISMODULE_ERR;
+ return 0;
+ }
+ if (!string2ull(val,&ll)) {
+ if (out_err) *out_err = REDISMODULE_ERR;
+ return 0;
+ }
+ if (out_err) *out_err = REDISMODULE_OK;
+ return ll;
+}
+
+/* Get the value of a field from data collected with RM_GetServerInfo(). If the
+ * field is not found, or is not a double, return value will be 0, and the
+ * optional out_err argument will be set to REDISMODULE_ERR. */
+double RM_ServerInfoGetFieldDouble(RedisModuleServerInfoData *data, const char* field, int *out_err) {
+ double dbl;
+ sds val = raxFind(data->rax, (unsigned char *)field, strlen(field));
+ if (val == raxNotFound) {
+ if (out_err) *out_err = REDISMODULE_ERR;
+ return 0;
+ }
+ if (!string2d(val,sdslen(val),&dbl)) {
+ if (out_err) *out_err = REDISMODULE_ERR;
+ return 0;
+ }
+ if (out_err) *out_err = REDISMODULE_OK;
+ return dbl;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Modules utility APIs
+ * -------------------------------------------------------------------------- */
+
+/* Return random bytes using SHA1 in counter mode with a /dev/urandom
+ * initialized seed. This function is fast so can be used to generate
+ * many bytes without any effect on the operating system entropy pool.
+ * Currently this function is not thread safe. */
+void RM_GetRandomBytes(unsigned char *dst, size_t len) {
+ getRandomBytes(dst,len);
+}
+
+/* Like RedisModule_GetRandomBytes() but instead of setting the string to
+ * random bytes the string is set to random characters in the in the
+ * hex charset [0-9a-f]. */
+void RM_GetRandomHexChars(char *dst, size_t len) {
+ getRandomHexChars(dst,len);
+}
+
+/* --------------------------------------------------------------------------
+ * ## Modules API exporting / importing
+ * -------------------------------------------------------------------------- */
+
+/* This function is called by a module in order to export some API with a
+ * given name. Other modules will be able to use this API by calling the
+ * symmetrical function RM_GetSharedAPI() and casting the return value to
+ * the right function pointer.
+ *
+ * The function will return REDISMODULE_OK if the name is not already taken,
+ * otherwise REDISMODULE_ERR will be returned and no operation will be
+ * performed.
+ *
+ * IMPORTANT: the apiname argument should be a string literal with static
+ * lifetime. The API relies on the fact that it will always be valid in
+ * the future. */
+int RM_ExportSharedAPI(RedisModuleCtx *ctx, const char *apiname, void *func) {
+ RedisModuleSharedAPI *sapi = zmalloc(sizeof(*sapi));
+ sapi->module = ctx->module;
+ sapi->func = func;
+ if (dictAdd(server.sharedapi, (char*)apiname, sapi) != DICT_OK) {
+ zfree(sapi);
+ return REDISMODULE_ERR;
+ }
+ return REDISMODULE_OK;
+}
+
+/* Request an exported API pointer. The return value is just a void pointer
+ * that the caller of this function will be required to cast to the right
+ * function pointer, so this is a private contract between modules.
+ *
+ * If the requested API is not available then NULL is returned. Because
+ * modules can be loaded at different times with different order, this
+ * function calls should be put inside some module generic API registering
+ * step, that is called every time a module attempts to execute a
+ * command that requires external APIs: if some API cannot be resolved, the
+ * command should return an error.
+ *
+ * Here is an example:
+ *
+ * int ... myCommandImplementation(void) {
+ * if (getExternalAPIs() == 0) {
+ * reply with an error here if we cannot have the APIs
+ * }
+ * // Use the API:
+ * myFunctionPointer(foo);
+ * }
+ *
+ * And the function registerAPI() is:
+ *
+ * int getExternalAPIs(void) {
+ * static int api_loaded = 0;
+ * if (api_loaded != 0) return 1; // APIs already resolved.
+ *
+ * myFunctionPointer = RedisModule_GetSharedAPI("...");
+ * if (myFunctionPointer == NULL) return 0;
+ *
+ * return 1;
+ * }
+ */
+void *RM_GetSharedAPI(RedisModuleCtx *ctx, const char *apiname) {
+ dictEntry *de = dictFind(server.sharedapi, apiname);
+ if (de == NULL) return NULL;
+ RedisModuleSharedAPI *sapi = dictGetVal(de);
+ if (listSearchKey(sapi->module->usedby,ctx->module) == NULL) {
+ listAddNodeTail(sapi->module->usedby,ctx->module);
+ listAddNodeTail(ctx->module->using,sapi->module);
+ }
+ return sapi->func;
+}
+
+/* Remove all the APIs registered by the specified module. Usually you
+ * want this when the module is going to be unloaded. This function
+ * assumes that's caller responsibility to make sure the APIs are not
+ * used by other modules.
+ *
+ * The number of unregistered APIs is returned. */
+int moduleUnregisterSharedAPI(RedisModule *module) {
+ int count = 0;
+ dictIterator *di = dictGetSafeIterator(server.sharedapi);
+ dictEntry *de;
+ while ((de = dictNext(di)) != NULL) {
+ const char *apiname = dictGetKey(de);
+ RedisModuleSharedAPI *sapi = dictGetVal(de);
+ if (sapi->module == module) {
+ dictDelete(server.sharedapi,apiname);
+ zfree(sapi);
+ count++;
+ }
+ }
+ dictReleaseIterator(di);
+ return count;
+}
+
+/* Remove the specified module as an user of APIs of ever other module.
+ * This is usually called when a module is unloaded.
+ *
+ * Returns the number of modules this module was using APIs from. */
+int moduleUnregisterUsedAPI(RedisModule *module) {
+ listIter li;
+ listNode *ln;
+ int count = 0;
+
+ listRewind(module->using,&li);
+ while((ln = listNext(&li))) {
+ RedisModule *used = ln->value;
+ listNode *ln = listSearchKey(used->usedby,module);
+ if (ln) {
+ listDelNode(used->usedby,ln);
+ count++;
+ }
+ }
+ return count;
+}
+
+/* Unregister all filters registered by a module.
+ * This is called when a module is being unloaded.
+ *
+ * Returns the number of filters unregistered. */
+int moduleUnregisterFilters(RedisModule *module) {
+ listIter li;
+ listNode *ln;
+ int count = 0;
+
+ listRewind(module->filters,&li);
+ while((ln = listNext(&li))) {
+ RedisModuleCommandFilter *filter = ln->value;
+ listNode *ln = listSearchKey(moduleCommandFilters,filter);
+ if (ln) {
+ listDelNode(moduleCommandFilters,ln);
+ count++;
+ }
+ zfree(filter);
+ }
+ return count;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Module Command Filter API
+ * -------------------------------------------------------------------------- */
+
+/* Register a new command filter function.
+ *
+ * Command filtering makes it possible for modules to extend Redis by plugging
+ * into the execution flow of all commands.
+ *
+ * A registered filter gets called before Redis executes *any* command. This
+ * includes both core Redis commands and commands registered by any module. The
+ * filter applies in all execution paths including:
+ *
+ * 1. Invocation by a client.
+ * 2. Invocation through `RedisModule_Call()` by any module.
+ * 3. Invocation through Lua `redis.call()`.
+ * 4. Replication of a command from a master.
+ *
+ * The filter executes in a special filter context, which is different and more
+ * limited than a RedisModuleCtx. Because the filter affects any command, it
+ * must be implemented in a very efficient way to reduce the performance impact
+ * on Redis. All Redis Module API calls that require a valid context (such as
+ * `RedisModule_Call()`, `RedisModule_OpenKey()`, etc.) are not supported in a
+ * filter context.
+ *
+ * The `RedisModuleCommandFilterCtx` can be used to inspect or modify the
+ * executed command and its arguments. As the filter executes before Redis
+ * begins processing the command, any change will affect the way the command is
+ * processed. For example, a module can override Redis commands this way:
+ *
+ * 1. Register a `MODULE.SET` command which implements an extended version of
+ * the Redis `SET` command.
+ * 2. Register a command filter which detects invocation of `SET` on a specific
+ * pattern of keys. Once detected, the filter will replace the first
+ * argument from `SET` to `MODULE.SET`.
+ * 3. When filter execution is complete, Redis considers the new command name
+ * and therefore executes the module's own command.
+ *
+ * Note that in the above use case, if `MODULE.SET` itself uses
+ * `RedisModule_Call()` the filter will be applied on that call as well. If
+ * that is not desired, the `REDISMODULE_CMDFILTER_NOSELF` flag can be set when
+ * registering the filter.
+ *
+ * The `REDISMODULE_CMDFILTER_NOSELF` flag prevents execution flows that
+ * originate from the module's own `RM_Call()` from reaching the filter. This
+ * flag is effective for all execution flows, including nested ones, as long as
+ * the execution begins from the module's command context or a thread-safe
+ * context that is associated with a blocking command.
+ *
+ * Detached thread-safe contexts are *not* associated with the module and cannot
+ * be protected by this flag.
+ *
+ * If multiple filters are registered (by the same or different modules), they
+ * are executed in the order of registration.
+ */
+RedisModuleCommandFilter *RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback, int flags) {
+ RedisModuleCommandFilter *filter = zmalloc(sizeof(*filter));
+ filter->module = ctx->module;
+ filter->callback = callback;
+ filter->flags = flags;
+
+ listAddNodeTail(moduleCommandFilters, filter);
+ listAddNodeTail(ctx->module->filters, filter);
+ return filter;
+}
+
+/* Unregister a command filter.
+ */
+int RM_UnregisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilter *filter) {
+ listNode *ln;
+
+ /* A module can only remove its own filters */
+ if (filter->module != ctx->module) return REDISMODULE_ERR;
+
+ ln = listSearchKey(moduleCommandFilters,filter);
+ if (!ln) return REDISMODULE_ERR;
+ listDelNode(moduleCommandFilters,ln);
+
+ ln = listSearchKey(ctx->module->filters,filter);
+ if (!ln) return REDISMODULE_ERR; /* Shouldn't happen */
+ listDelNode(ctx->module->filters,ln);
+
+ zfree(filter);
+
+ return REDISMODULE_OK;
+}
+
+void moduleCallCommandFilters(client *c) {
+ if (listLength(moduleCommandFilters) == 0) return;
+
+ listIter li;
+ listNode *ln;
+ listRewind(moduleCommandFilters,&li);
+
+ RedisModuleCommandFilterCtx filter = {
+ .argv = c->argv,
+ .argv_len = c->argv_len,
+ .argc = c->argc,
+ .c = c
+ };
+
+ while((ln = listNext(&li))) {
+ RedisModuleCommandFilter *f = ln->value;
+
+ /* Skip filter if REDISMODULE_CMDFILTER_NOSELF is set and module is
+ * currently processing a command.
+ */
+ if ((f->flags & REDISMODULE_CMDFILTER_NOSELF) && f->module->in_call) continue;
+
+ /* Call filter */
+ f->callback(&filter);
+ }
+
+ c->argv = filter.argv;
+ c->argv_len = filter.argv_len;
+ c->argc = filter.argc;
+}
+
+/* Return the number of arguments a filtered command has. The number of
+ * arguments include the command itself.
+ */
+int RM_CommandFilterArgsCount(RedisModuleCommandFilterCtx *fctx)
+{
+ return fctx->argc;
+}
+
+/* Return the specified command argument. The first argument (position 0) is
+ * the command itself, and the rest are user-provided args.
+ */
+RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *fctx, int pos)
+{
+ if (pos < 0 || pos >= fctx->argc) return NULL;
+ return fctx->argv[pos];
+}
+
+/* Modify the filtered command by inserting a new argument at the specified
+ * position. The specified RedisModuleString argument may be used by Redis
+ * after the filter context is destroyed, so it must not be auto-memory
+ * allocated, freed or used elsewhere.
+ */
+int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg)
+{
+ int i;
+
+ if (pos < 0 || pos > fctx->argc) return REDISMODULE_ERR;
+
+ if (fctx->argv_len < fctx->argc+1) {
+ fctx->argv_len = fctx->argc+1;
+ fctx->argv = zrealloc(fctx->argv, fctx->argv_len*sizeof(RedisModuleString *));
+ }
+ for (i = fctx->argc; i > pos; i--) {
+ fctx->argv[i] = fctx->argv[i-1];
+ }
+ fctx->argv[pos] = arg;
+ fctx->argc++;
+
+ return REDISMODULE_OK;
+}
+
+/* Modify the filtered command by replacing an existing argument with a new one.
+ * The specified RedisModuleString argument may be used by Redis after the
+ * filter context is destroyed, so it must not be auto-memory allocated, freed
+ * or used elsewhere.
+ */
+int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg)
+{
+ if (pos < 0 || pos >= fctx->argc) return REDISMODULE_ERR;
+
+ decrRefCount(fctx->argv[pos]);
+ fctx->argv[pos] = arg;
+
+ return REDISMODULE_OK;
+}
+
+/* Modify the filtered command by deleting an argument at the specified
+ * position.
+ */
+int RM_CommandFilterArgDelete(RedisModuleCommandFilterCtx *fctx, int pos)
+{
+ int i;
+ if (pos < 0 || pos >= fctx->argc) return REDISMODULE_ERR;
+
+ decrRefCount(fctx->argv[pos]);
+ for (i = pos; i < fctx->argc-1; i++) {
+ fctx->argv[i] = fctx->argv[i+1];
+ }
+ fctx->argc--;
+
+ return REDISMODULE_OK;
+}
+
+/* Get Client ID for client that issued the command we are filtering */
+unsigned long long RM_CommandFilterGetClientId(RedisModuleCommandFilterCtx *fctx) {
+ return fctx->c->id;
+}
+
+/* For a given pointer allocated via RedisModule_Alloc() or
+ * RedisModule_Realloc(), return the amount of memory allocated for it.
+ * Note that this may be different (larger) than the memory we allocated
+ * with the allocation calls, since sometimes the underlying allocator
+ * will allocate more memory.
+ */
+size_t RM_MallocSize(void* ptr) {
+ return zmalloc_size(ptr);
+}
+
+/* Similar to RM_MallocSize, the difference is that RM_MallocUsableSize
+ * returns the usable size of memory by the module. */
+size_t RM_MallocUsableSize(void *ptr) {
+ /* It is safe to use 'zmalloc_usable_size()' to manipulate additional
+ * memory space, as we guarantee that the compiler can recognize this
+ * after 'RM_Alloc', 'RM_TryAlloc', 'RM_Realloc', or 'RM_Calloc'. */
+ return zmalloc_usable_size(ptr);
+}
+
+/* Same as RM_MallocSize, except it works on RedisModuleString pointers.
+ */
+size_t RM_MallocSizeString(RedisModuleString* str) {
+ serverAssert(str->type == OBJ_STRING);
+ return sizeof(*str) + getStringObjectSdsUsedMemory(str);
+}
+
+/* Same as RM_MallocSize, except it works on RedisModuleDict pointers.
+ * Note that the returned value is only the overhead of the underlying structures,
+ * it does not include the allocation size of the keys and values.
+ */
+size_t RM_MallocSizeDict(RedisModuleDict* dict) {
+ size_t size = sizeof(RedisModuleDict) + sizeof(rax);
+ size += dict->rax->numnodes * sizeof(raxNode);
+ /* For more info about this weird line, see streamRadixTreeMemoryUsage */
+ size += dict->rax->numnodes * sizeof(long)*30;
+ return size;
+}
+
+/* Return the a number between 0 to 1 indicating the amount of memory
+ * currently used, relative to the Redis "maxmemory" configuration.
+ *
+ * * 0 - No memory limit configured.
+ * * Between 0 and 1 - The percentage of the memory used normalized in 0-1 range.
+ * * Exactly 1 - Memory limit reached.
+ * * Greater 1 - More memory used than the configured limit.
+ */
+float RM_GetUsedMemoryRatio(void){
+ float level;
+ getMaxmemoryState(NULL, NULL, NULL, &level);
+ return level;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Scanning keyspace and hashes
+ * -------------------------------------------------------------------------- */
+
+typedef void (*RedisModuleScanCB)(RedisModuleCtx *ctx, RedisModuleString *keyname, RedisModuleKey *key, void *privdata);
+typedef struct {
+ RedisModuleCtx *ctx;
+ void* user_data;
+ RedisModuleScanCB fn;
+} ScanCBData;
+
+typedef struct RedisModuleScanCursor{
+ unsigned long cursor;
+ int done;
+}RedisModuleScanCursor;
+
+static void moduleScanCallback(void *privdata, const dictEntry *de) {
+ ScanCBData *data = privdata;
+ sds key = dictGetKey(de);
+ robj* val = dictGetVal(de);
+ RedisModuleString *keyname = createObject(OBJ_STRING,sdsdup(key));
+
+ /* Setup the key handle. */
+ RedisModuleKey kp = {0};
+ moduleInitKey(&kp, data->ctx, keyname, val, REDISMODULE_READ);
+
+ data->fn(data->ctx, keyname, &kp, data->user_data);
+
+ moduleCloseKey(&kp);
+ decrRefCount(keyname);
+}
+
+/* Create a new cursor to be used with RedisModule_Scan */
+RedisModuleScanCursor *RM_ScanCursorCreate(void) {
+ RedisModuleScanCursor* cursor = zmalloc(sizeof(*cursor));
+ cursor->cursor = 0;
+ cursor->done = 0;
+ return cursor;
+}
+
+/* Restart an existing cursor. The keys will be rescanned. */
+void RM_ScanCursorRestart(RedisModuleScanCursor *cursor) {
+ cursor->cursor = 0;
+ cursor->done = 0;
+}
+
+/* Destroy the cursor struct. */
+void RM_ScanCursorDestroy(RedisModuleScanCursor *cursor) {
+ zfree(cursor);
+}
+
+/* Scan API that allows a module to scan all the keys and value in
+ * the selected db.
+ *
+ * Callback for scan implementation.
+ *
+ * void scan_callback(RedisModuleCtx *ctx, RedisModuleString *keyname,
+ * RedisModuleKey *key, void *privdata);
+ *
+ * - `ctx`: the redis module context provided to for the scan.
+ * - `keyname`: owned by the caller and need to be retained if used after this
+ * function.
+ * - `key`: holds info on the key and value, it is provided as best effort, in
+ * some cases it might be NULL, in which case the user should (can) use
+ * RedisModule_OpenKey() (and CloseKey too).
+ * when it is provided, it is owned by the caller and will be free when the
+ * callback returns.
+ * - `privdata`: the user data provided to RedisModule_Scan().
+ *
+ * The way it should be used:
+ *
+ * RedisModuleScanCursor *c = RedisModule_ScanCursorCreate();
+ * while(RedisModule_Scan(ctx, c, callback, privateData));
+ * RedisModule_ScanCursorDestroy(c);
+ *
+ * It is also possible to use this API from another thread while the lock
+ * is acquired during the actual call to RM_Scan:
+ *
+ * RedisModuleScanCursor *c = RedisModule_ScanCursorCreate();
+ * RedisModule_ThreadSafeContextLock(ctx);
+ * while(RedisModule_Scan(ctx, c, callback, privateData)){
+ * RedisModule_ThreadSafeContextUnlock(ctx);
+ * // do some background job
+ * RedisModule_ThreadSafeContextLock(ctx);
+ * }
+ * RedisModule_ScanCursorDestroy(c);
+ *
+ * The function will return 1 if there are more elements to scan and
+ * 0 otherwise, possibly setting errno if the call failed.
+ *
+ * It is also possible to restart an existing cursor using RM_ScanCursorRestart.
+ *
+ * IMPORTANT: This API is very similar to the Redis SCAN command from the
+ * point of view of the guarantees it provides. This means that the API
+ * may report duplicated keys, but guarantees to report at least one time
+ * every key that was there from the start to the end of the scanning process.
+ *
+ * NOTE: If you do database changes within the callback, you should be aware
+ * that the internal state of the database may change. For instance it is safe
+ * to delete or modify the current key, but may not be safe to delete any
+ * other key.
+ * Moreover playing with the Redis keyspace while iterating may have the
+ * effect of returning more duplicates. A safe pattern is to store the keys
+ * names you want to modify elsewhere, and perform the actions on the keys
+ * later when the iteration is complete. However this can cost a lot of
+ * memory, so it may make sense to just operate on the current key when
+ * possible during the iteration, given that this is safe. */
+int RM_Scan(RedisModuleCtx *ctx, RedisModuleScanCursor *cursor, RedisModuleScanCB fn, void *privdata) {
+ if (cursor->done) {
+ errno = ENOENT;
+ return 0;
+ }
+ int ret = 1;
+ ScanCBData data = { ctx, privdata, fn };
+ cursor->cursor = dictScan(ctx->client->db->dict, cursor->cursor, moduleScanCallback, &data);
+ if (cursor->cursor == 0) {
+ cursor->done = 1;
+ ret = 0;
+ }
+ errno = 0;
+ return ret;
+}
+
+typedef void (*RedisModuleScanKeyCB)(RedisModuleKey *key, RedisModuleString *field, RedisModuleString *value, void *privdata);
+typedef struct {
+ RedisModuleKey *key;
+ void* user_data;
+ RedisModuleScanKeyCB fn;
+} ScanKeyCBData;
+
+static void moduleScanKeyCallback(void *privdata, const dictEntry *de) {
+ ScanKeyCBData *data = privdata;
+ sds key = dictGetKey(de);
+ robj *o = data->key->value;
+ robj *field = createStringObject(key, sdslen(key));
+ robj *value = NULL;
+ if (o->type == OBJ_SET) {
+ value = NULL;
+ } else if (o->type == OBJ_HASH) {
+ sds val = dictGetVal(de);
+ value = createStringObject(val, sdslen(val));
+ } else if (o->type == OBJ_ZSET) {
+ double *val = (double*)dictGetVal(de);
+ value = createStringObjectFromLongDouble(*val, 0);
+ }
+
+ data->fn(data->key, field, value, data->user_data);
+ decrRefCount(field);
+ if (value) decrRefCount(value);
+}
+
+/* Scan api that allows a module to scan the elements in a hash, set or sorted set key
+ *
+ * Callback for scan implementation.
+ *
+ * void scan_callback(RedisModuleKey *key, RedisModuleString* field, RedisModuleString* value, void *privdata);
+ *
+ * - key - the redis key context provided to for the scan.
+ * - field - field name, owned by the caller and need to be retained if used
+ * after this function.
+ * - value - value string or NULL for set type, owned by the caller and need to
+ * be retained if used after this function.
+ * - privdata - the user data provided to RedisModule_ScanKey.
+ *
+ * The way it should be used:
+ *
+ * RedisModuleScanCursor *c = RedisModule_ScanCursorCreate();
+ * RedisModuleKey *key = RedisModule_OpenKey(...)
+ * while(RedisModule_ScanKey(key, c, callback, privateData));
+ * RedisModule_CloseKey(key);
+ * RedisModule_ScanCursorDestroy(c);
+ *
+ * It is also possible to use this API from another thread while the lock is acquired during
+ * the actual call to RM_ScanKey, and re-opening the key each time:
+ *
+ * RedisModuleScanCursor *c = RedisModule_ScanCursorCreate();
+ * RedisModule_ThreadSafeContextLock(ctx);
+ * RedisModuleKey *key = RedisModule_OpenKey(...)
+ * while(RedisModule_ScanKey(ctx, c, callback, privateData)){
+ * RedisModule_CloseKey(key);
+ * RedisModule_ThreadSafeContextUnlock(ctx);
+ * // do some background job
+ * RedisModule_ThreadSafeContextLock(ctx);
+ * RedisModuleKey *key = RedisModule_OpenKey(...)
+ * }
+ * RedisModule_CloseKey(key);
+ * RedisModule_ScanCursorDestroy(c);
+ *
+ * The function will return 1 if there are more elements to scan and 0 otherwise,
+ * possibly setting errno if the call failed.
+ * It is also possible to restart an existing cursor using RM_ScanCursorRestart.
+ *
+ * NOTE: Certain operations are unsafe while iterating the object. For instance
+ * while the API guarantees to return at least one time all the elements that
+ * are present in the data structure consistently from the start to the end
+ * of the iteration (see HSCAN and similar commands documentation), the more
+ * you play with the elements, the more duplicates you may get. In general
+ * deleting the current element of the data structure is safe, while removing
+ * the key you are iterating is not safe. */
+int RM_ScanKey(RedisModuleKey *key, RedisModuleScanCursor *cursor, RedisModuleScanKeyCB fn, void *privdata) {
+ if (key == NULL || key->value == NULL) {
+ errno = EINVAL;
+ return 0;
+ }
+ dict *ht = NULL;
+ robj *o = key->value;
+ if (o->type == OBJ_SET) {
+ if (o->encoding == OBJ_ENCODING_HT)
+ ht = o->ptr;
+ } else if (o->type == OBJ_HASH) {
+ if (o->encoding == OBJ_ENCODING_HT)
+ ht = o->ptr;
+ } else if (o->type == OBJ_ZSET) {
+ if (o->encoding == OBJ_ENCODING_SKIPLIST)
+ ht = ((zset *)o->ptr)->dict;
+ } else {
+ errno = EINVAL;
+ return 0;
+ }
+ if (cursor->done) {
+ errno = ENOENT;
+ return 0;
+ }
+ int ret = 1;
+ if (ht) {
+ ScanKeyCBData data = { key, privdata, fn };
+ cursor->cursor = dictScan(ht, cursor->cursor, moduleScanKeyCallback, &data);
+ if (cursor->cursor == 0) {
+ cursor->done = 1;
+ ret = 0;
+ }
+ } else if (o->type == OBJ_SET) {
+ setTypeIterator *si = setTypeInitIterator(o);
+ sds sdsele;
+ while ((sdsele = setTypeNextObject(si)) != NULL) {
+ robj *field = createObject(OBJ_STRING, sdsele);
+ fn(key, field, NULL, privdata);
+ decrRefCount(field);
+ }
+ setTypeReleaseIterator(si);
+ cursor->cursor = 1;
+ cursor->done = 1;
+ ret = 0;
+ } else if (o->type == OBJ_ZSET || o->type == OBJ_HASH) {
+ unsigned char *p = lpSeek(o->ptr,0);
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vll;
+ while(p) {
+ vstr = lpGetValue(p,&vlen,&vll);
+ robj *field = (vstr != NULL) ?
+ createStringObject((char*)vstr,vlen) :
+ createStringObjectFromLongLongWithSds(vll);
+ p = lpNext(o->ptr,p);
+ vstr = lpGetValue(p,&vlen,&vll);
+ robj *value = (vstr != NULL) ?
+ createStringObject((char*)vstr,vlen) :
+ createStringObjectFromLongLongWithSds(vll);
+ fn(key, field, value, privdata);
+ p = lpNext(o->ptr,p);
+ decrRefCount(field);
+ decrRefCount(value);
+ }
+ cursor->cursor = 1;
+ cursor->done = 1;
+ ret = 0;
+ }
+ errno = 0;
+ return ret;
+}
+
+
+/* --------------------------------------------------------------------------
+ * ## Module fork API
+ * -------------------------------------------------------------------------- */
+
+/* Create a background child process with the current frozen snapshot of the
+ * main process where you can do some processing in the background without
+ * affecting / freezing the traffic and no need for threads and GIL locking.
+ * Note that Redis allows for only one concurrent fork.
+ * When the child wants to exit, it should call RedisModule_ExitFromChild.
+ * If the parent wants to kill the child it should call RedisModule_KillForkChild
+ * The done handler callback will be executed on the parent process when the
+ * child existed (but not when killed)
+ * Return: -1 on failure, on success the parent process will get a positive PID
+ * of the child, and the child process will get 0.
+ */
+int RM_Fork(RedisModuleForkDoneHandler cb, void *user_data) {
+ pid_t childpid;
+
+ if ((childpid = redisFork(CHILD_TYPE_MODULE)) == 0) {
+ /* Child */
+ redisSetProcTitle("redis-module-fork");
+ } else if (childpid == -1) {
+ serverLog(LL_WARNING,"Can't fork for module: %s", strerror(errno));
+ } else {
+ /* Parent */
+ moduleForkInfo.done_handler = cb;
+ moduleForkInfo.done_handler_user_data = user_data;
+ serverLog(LL_VERBOSE, "Module fork started pid: %ld ", (long) childpid);
+ }
+ return childpid;
+}
+
+/* The module is advised to call this function from the fork child once in a while,
+ * so that it can report progress and COW memory to the parent which will be
+ * reported in INFO.
+ * The `progress` argument should between 0 and 1, or -1 when not available. */
+void RM_SendChildHeartbeat(double progress) {
+ sendChildInfoGeneric(CHILD_INFO_TYPE_CURRENT_INFO, 0, progress, "Module fork");
+}
+
+/* Call from the child process when you want to terminate it.
+ * retcode will be provided to the done handler executed on the parent process.
+ */
+int RM_ExitFromChild(int retcode) {
+ sendChildCowInfo(CHILD_INFO_TYPE_MODULE_COW_SIZE, "Module fork");
+ exitFromChild(retcode);
+ return REDISMODULE_OK;
+}
+
+/* Kill the active module forked child, if there is one active and the
+ * pid matches, and returns C_OK. Otherwise if there is no active module
+ * child or the pid does not match, return C_ERR without doing anything. */
+int TerminateModuleForkChild(int child_pid, int wait) {
+ /* Module child should be active and pid should match. */
+ if (server.child_type != CHILD_TYPE_MODULE ||
+ server.child_pid != child_pid) return C_ERR;
+
+ int statloc;
+ serverLog(LL_VERBOSE,"Killing running module fork child: %ld",
+ (long) server.child_pid);
+ if (kill(server.child_pid,SIGUSR1) != -1 && wait) {
+ while(waitpid(server.child_pid, &statloc, 0) !=
+ server.child_pid);
+ }
+ /* Reset the buffer accumulating changes while the child saves. */
+ resetChildState();
+ moduleForkInfo.done_handler = NULL;
+ moduleForkInfo.done_handler_user_data = NULL;
+ return C_OK;
+}
+
+/* Can be used to kill the forked child process from the parent process.
+ * child_pid would be the return value of RedisModule_Fork. */
+int RM_KillForkChild(int child_pid) {
+ /* Kill module child, wait for child exit. */
+ if (TerminateModuleForkChild(child_pid,1) == C_OK)
+ return REDISMODULE_OK;
+ else
+ return REDISMODULE_ERR;
+}
+
+void ModuleForkDoneHandler(int exitcode, int bysignal) {
+ serverLog(LL_NOTICE,
+ "Module fork exited pid: %ld, retcode: %d, bysignal: %d",
+ (long) server.child_pid, exitcode, bysignal);
+ if (moduleForkInfo.done_handler) {
+ moduleForkInfo.done_handler(exitcode, bysignal,
+ moduleForkInfo.done_handler_user_data);
+ }
+
+ moduleForkInfo.done_handler = NULL;
+ moduleForkInfo.done_handler_user_data = NULL;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Server hooks implementation
+ * -------------------------------------------------------------------------- */
+
+/* This must be synced with REDISMODULE_EVENT_*
+ * We use -1 (MAX_UINT64) to denote that this event doesn't have
+ * a data structure associated with it. We use MAX_UINT64 on purpose,
+ * in order to pass the check in RedisModule_SubscribeToServerEvent. */
+static uint64_t moduleEventVersions[] = {
+ REDISMODULE_REPLICATIONINFO_VERSION, /* REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED */
+ -1, /* REDISMODULE_EVENT_PERSISTENCE */
+ REDISMODULE_FLUSHINFO_VERSION, /* REDISMODULE_EVENT_FLUSHDB */
+ -1, /* REDISMODULE_EVENT_LOADING */
+ REDISMODULE_CLIENTINFO_VERSION, /* REDISMODULE_EVENT_CLIENT_CHANGE */
+ -1, /* REDISMODULE_EVENT_SHUTDOWN */
+ -1, /* REDISMODULE_EVENT_REPLICA_CHANGE */
+ -1, /* REDISMODULE_EVENT_MASTER_LINK_CHANGE */
+ REDISMODULE_CRON_LOOP_VERSION, /* REDISMODULE_EVENT_CRON_LOOP */
+ REDISMODULE_MODULE_CHANGE_VERSION, /* REDISMODULE_EVENT_MODULE_CHANGE */
+ REDISMODULE_LOADING_PROGRESS_VERSION, /* REDISMODULE_EVENT_LOADING_PROGRESS */
+ REDISMODULE_SWAPDBINFO_VERSION, /* REDISMODULE_EVENT_SWAPDB */
+ -1, /* REDISMODULE_EVENT_REPL_BACKUP */
+ -1, /* REDISMODULE_EVENT_FORK_CHILD */
+ -1, /* REDISMODULE_EVENT_REPL_ASYNC_LOAD */
+ -1, /* REDISMODULE_EVENT_EVENTLOOP */
+ -1, /* REDISMODULE_EVENT_CONFIG */
+ REDISMODULE_KEYINFO_VERSION, /* REDISMODULE_EVENT_KEY */
+};
+
+/* Register to be notified, via a callback, when the specified server event
+ * happens. The callback is called with the event as argument, and an additional
+ * argument which is a void pointer and should be cased to a specific type
+ * that is event-specific (but many events will just use NULL since they do not
+ * have additional information to pass to the callback).
+ *
+ * If the callback is NULL and there was a previous subscription, the module
+ * will be unsubscribed. If there was a previous subscription and the callback
+ * is not null, the old callback will be replaced with the new one.
+ *
+ * The callback must be of this type:
+ *
+ * int (*RedisModuleEventCallback)(RedisModuleCtx *ctx,
+ * RedisModuleEvent eid,
+ * uint64_t subevent,
+ * void *data);
+ *
+ * The 'ctx' is a normal Redis module context that the callback can use in
+ * order to call other modules APIs. The 'eid' is the event itself, this
+ * is only useful in the case the module subscribed to multiple events: using
+ * the 'id' field of this structure it is possible to check if the event
+ * is one of the events we registered with this callback. The 'subevent' field
+ * depends on the event that fired.
+ *
+ * Finally the 'data' pointer may be populated, only for certain events, with
+ * more relevant data.
+ *
+ * Here is a list of events you can use as 'eid' and related sub events:
+ *
+ * * RedisModuleEvent_ReplicationRoleChanged:
+ *
+ * This event is called when the instance switches from master
+ * to replica or the other way around, however the event is
+ * also called when the replica remains a replica but starts to
+ * replicate with a different master.
+ *
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_REPLROLECHANGED_NOW_MASTER`
+ * * `REDISMODULE_SUBEVENT_REPLROLECHANGED_NOW_REPLICA`
+ *
+ * The 'data' field can be casted by the callback to a
+ * `RedisModuleReplicationInfo` structure with the following fields:
+ *
+ * int master; // true if master, false if replica
+ * char *masterhost; // master instance hostname for NOW_REPLICA
+ * int masterport; // master instance port for NOW_REPLICA
+ * char *replid1; // Main replication ID
+ * char *replid2; // Secondary replication ID
+ * uint64_t repl1_offset; // Main replication offset
+ * uint64_t repl2_offset; // Offset of replid2 validity
+ *
+ * * RedisModuleEvent_Persistence
+ *
+ * This event is called when RDB saving or AOF rewriting starts
+ * and ends. The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_RDB_START`
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_AOF_START`
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_RDB_START`
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_AOF_START`
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_ENDED`
+ * * `REDISMODULE_SUBEVENT_PERSISTENCE_FAILED`
+ *
+ * The above events are triggered not just when the user calls the
+ * relevant commands like BGSAVE, but also when a saving operation
+ * or AOF rewriting occurs because of internal server triggers.
+ * The SYNC_RDB_START sub events are happening in the foreground due to
+ * SAVE command, FLUSHALL, or server shutdown, and the other RDB and
+ * AOF sub events are executed in a background fork child, so any
+ * action the module takes can only affect the generated AOF or RDB,
+ * but will not be reflected in the parent process and affect connected
+ * clients and commands. Also note that the AOF_START sub event may end
+ * up saving RDB content in case of an AOF with rdb-preamble.
+ *
+ * * RedisModuleEvent_FlushDB
+ *
+ * The FLUSHALL, FLUSHDB or an internal flush (for instance
+ * because of replication, after the replica synchronization)
+ * happened. The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_FLUSHDB_START`
+ * * `REDISMODULE_SUBEVENT_FLUSHDB_END`
+ *
+ * The data pointer can be casted to a RedisModuleFlushInfo
+ * structure with the following fields:
+ *
+ * int32_t async; // True if the flush is done in a thread.
+ * // See for instance FLUSHALL ASYNC.
+ * // In this case the END callback is invoked
+ * // immediately after the database is put
+ * // in the free list of the thread.
+ * int32_t dbnum; // Flushed database number, -1 for all the DBs
+ * // in the case of the FLUSHALL operation.
+ *
+ * The start event is called *before* the operation is initiated, thus
+ * allowing the callback to call DBSIZE or other operation on the
+ * yet-to-free keyspace.
+ *
+ * * RedisModuleEvent_Loading
+ *
+ * Called on loading operations: at startup when the server is
+ * started, but also after a first synchronization when the
+ * replica is loading the RDB file from the master.
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_LOADING_RDB_START`
+ * * `REDISMODULE_SUBEVENT_LOADING_AOF_START`
+ * * `REDISMODULE_SUBEVENT_LOADING_REPL_START`
+ * * `REDISMODULE_SUBEVENT_LOADING_ENDED`
+ * * `REDISMODULE_SUBEVENT_LOADING_FAILED`
+ *
+ * Note that AOF loading may start with an RDB data in case of
+ * rdb-preamble, in which case you'll only receive an AOF_START event.
+ *
+ * * RedisModuleEvent_ClientChange
+ *
+ * Called when a client connects or disconnects.
+ * The data pointer can be casted to a RedisModuleClientInfo
+ * structure, documented in RedisModule_GetClientInfoById().
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED`
+ * * `REDISMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED`
+ *
+ * * RedisModuleEvent_Shutdown
+ *
+ * The server is shutting down. No subevents are available.
+ *
+ * * RedisModuleEvent_ReplicaChange
+ *
+ * This event is called when the instance (that can be both a
+ * master or a replica) get a new online replica, or lose a
+ * replica since it gets disconnected.
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_REPLICA_CHANGE_ONLINE`
+ * * `REDISMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE`
+ *
+ * No additional information is available so far: future versions
+ * of Redis will have an API in order to enumerate the replicas
+ * connected and their state.
+ *
+ * * RedisModuleEvent_CronLoop
+ *
+ * This event is called every time Redis calls the serverCron()
+ * function in order to do certain bookkeeping. Modules that are
+ * required to do operations from time to time may use this callback.
+ * Normally Redis calls this function 10 times per second, but
+ * this changes depending on the "hz" configuration.
+ * No sub events are available.
+ *
+ * The data pointer can be casted to a RedisModuleCronLoop
+ * structure with the following fields:
+ *
+ * int32_t hz; // Approximate number of events per second.
+ *
+ * * RedisModuleEvent_MasterLinkChange
+ *
+ * This is called for replicas in order to notify when the
+ * replication link becomes functional (up) with our master,
+ * or when it goes down. Note that the link is not considered
+ * up when we just connected to the master, but only if the
+ * replication is happening correctly.
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_MASTER_LINK_UP`
+ * * `REDISMODULE_SUBEVENT_MASTER_LINK_DOWN`
+ *
+ * * RedisModuleEvent_ModuleChange
+ *
+ * This event is called when a new module is loaded or one is unloaded.
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_MODULE_LOADED`
+ * * `REDISMODULE_SUBEVENT_MODULE_UNLOADED`
+ *
+ * The data pointer can be casted to a RedisModuleModuleChange
+ * structure with the following fields:
+ *
+ * const char* module_name; // Name of module loaded or unloaded.
+ * int32_t module_version; // Module version.
+ *
+ * * RedisModuleEvent_LoadingProgress
+ *
+ * This event is called repeatedly called while an RDB or AOF file
+ * is being loaded.
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_LOADING_PROGRESS_RDB`
+ * * `REDISMODULE_SUBEVENT_LOADING_PROGRESS_AOF`
+ *
+ * The data pointer can be casted to a RedisModuleLoadingProgress
+ * structure with the following fields:
+ *
+ * int32_t hz; // Approximate number of events per second.
+ * int32_t progress; // Approximate progress between 0 and 1024,
+ * // or -1 if unknown.
+ *
+ * * RedisModuleEvent_SwapDB
+ *
+ * This event is called when a SWAPDB command has been successfully
+ * Executed.
+ * For this event call currently there is no subevents available.
+ *
+ * The data pointer can be casted to a RedisModuleSwapDbInfo
+ * structure with the following fields:
+ *
+ * int32_t dbnum_first; // Swap Db first dbnum
+ * int32_t dbnum_second; // Swap Db second dbnum
+ *
+ * * RedisModuleEvent_ReplBackup
+ *
+ * WARNING: Replication Backup events are deprecated since Redis 7.0 and are never fired.
+ * See RedisModuleEvent_ReplAsyncLoad for understanding how Async Replication Loading events
+ * are now triggered when repl-diskless-load is set to swapdb.
+ *
+ * Called when repl-diskless-load config is set to swapdb,
+ * And redis needs to backup the current database for the
+ * possibility to be restored later. A module with global data and
+ * maybe with aux_load and aux_save callbacks may need to use this
+ * notification to backup / restore / discard its globals.
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_REPL_BACKUP_CREATE`
+ * * `REDISMODULE_SUBEVENT_REPL_BACKUP_RESTORE`
+ * * `REDISMODULE_SUBEVENT_REPL_BACKUP_DISCARD`
+ *
+ * * RedisModuleEvent_ReplAsyncLoad
+ *
+ * Called when repl-diskless-load config is set to swapdb and a replication with a master of same
+ * data set history (matching replication ID) occurs.
+ * In which case redis serves current data set while loading new database in memory from socket.
+ * Modules must have declared they support this mechanism in order to activate it, through
+ * REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD flag.
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_STARTED`
+ * * `REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_ABORTED`
+ * * `REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_COMPLETED`
+ *
+ * * RedisModuleEvent_ForkChild
+ *
+ * Called when a fork child (AOFRW, RDBSAVE, module fork...) is born/dies
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_FORK_CHILD_BORN`
+ * * `REDISMODULE_SUBEVENT_FORK_CHILD_DIED`
+ *
+ * * RedisModuleEvent_EventLoop
+ *
+ * Called on each event loop iteration, once just before the event loop goes
+ * to sleep or just after it wakes up.
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_EVENTLOOP_BEFORE_SLEEP`
+ * * `REDISMODULE_SUBEVENT_EVENTLOOP_AFTER_SLEEP`
+ *
+ * * RedisModule_Event_Config
+ *
+ * Called when a configuration event happens
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_CONFIG_CHANGE`
+ *
+ * The data pointer can be casted to a RedisModuleConfigChange
+ * structure with the following fields:
+ *
+ * const char **config_names; // An array of C string pointers containing the
+ * // name of each modified configuration item
+ * uint32_t num_changes; // The number of elements in the config_names array
+ *
+ * * RedisModule_Event_Key
+ *
+ * Called when a key is removed from the keyspace. We can't modify any key in
+ * the event.
+ * The following sub events are available:
+ *
+ * * `REDISMODULE_SUBEVENT_KEY_DELETED`
+ * * `REDISMODULE_SUBEVENT_KEY_EXPIRED`
+ * * `REDISMODULE_SUBEVENT_KEY_EVICTED`
+ * * `REDISMODULE_SUBEVENT_KEY_OVERWRITTEN`
+ *
+ * The data pointer can be casted to a RedisModuleKeyInfo
+ * structure with the following fields:
+ *
+ * RedisModuleKey *key; // Key name
+ *
+ * The function returns REDISMODULE_OK if the module was successfully subscribed
+ * for the specified event. If the API is called from a wrong context or unsupported event
+ * is given then REDISMODULE_ERR is returned. */
+int RM_SubscribeToServerEvent(RedisModuleCtx *ctx, RedisModuleEvent event, RedisModuleEventCallback callback) {
+ RedisModuleEventListener *el;
+
+ /* Protect in case of calls from contexts without a module reference. */
+ if (ctx->module == NULL) return REDISMODULE_ERR;
+ if (event.id >= _REDISMODULE_EVENT_NEXT) return REDISMODULE_ERR;
+ if (event.dataver > moduleEventVersions[event.id]) return REDISMODULE_ERR; /* Module compiled with a newer redismodule.h than we support */
+
+ /* Search an event matching this module and event ID. */
+ listIter li;
+ listNode *ln;
+ listRewind(RedisModule_EventListeners,&li);
+ while((ln = listNext(&li))) {
+ el = ln->value;
+ if (el->module == ctx->module && el->event.id == event.id)
+ break; /* Matching event found. */
+ }
+
+ /* Modify or remove the event listener if we already had one. */
+ if (ln) {
+ if (callback == NULL) {
+ listDelNode(RedisModule_EventListeners,ln);
+ zfree(el);
+ } else {
+ el->callback = callback; /* Update the callback with the new one. */
+ }
+ return REDISMODULE_OK;
+ }
+
+ /* No event found, we need to add a new one. */
+ el = zmalloc(sizeof(*el));
+ el->module = ctx->module;
+ el->event = event;
+ el->callback = callback;
+ listAddNodeTail(RedisModule_EventListeners,el);
+ return REDISMODULE_OK;
+}
+
+/**
+ * For a given server event and subevent, return zero if the
+ * subevent is not supported and non-zero otherwise.
+ */
+int RM_IsSubEventSupported(RedisModuleEvent event, int64_t subevent) {
+ switch (event.id) {
+ case REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED:
+ return subevent < _REDISMODULE_EVENT_REPLROLECHANGED_NEXT;
+ case REDISMODULE_EVENT_PERSISTENCE:
+ return subevent < _REDISMODULE_SUBEVENT_PERSISTENCE_NEXT;
+ case REDISMODULE_EVENT_FLUSHDB:
+ return subevent < _REDISMODULE_SUBEVENT_FLUSHDB_NEXT;
+ case REDISMODULE_EVENT_LOADING:
+ return subevent < _REDISMODULE_SUBEVENT_LOADING_NEXT;
+ case REDISMODULE_EVENT_CLIENT_CHANGE:
+ return subevent < _REDISMODULE_SUBEVENT_CLIENT_CHANGE_NEXT;
+ case REDISMODULE_EVENT_SHUTDOWN:
+ return subevent < _REDISMODULE_SUBEVENT_SHUTDOWN_NEXT;
+ case REDISMODULE_EVENT_REPLICA_CHANGE:
+ return subevent < _REDISMODULE_EVENT_REPLROLECHANGED_NEXT;
+ case REDISMODULE_EVENT_MASTER_LINK_CHANGE:
+ return subevent < _REDISMODULE_SUBEVENT_MASTER_NEXT;
+ case REDISMODULE_EVENT_CRON_LOOP:
+ return subevent < _REDISMODULE_SUBEVENT_CRON_LOOP_NEXT;
+ case REDISMODULE_EVENT_MODULE_CHANGE:
+ return subevent < _REDISMODULE_SUBEVENT_MODULE_NEXT;
+ case REDISMODULE_EVENT_LOADING_PROGRESS:
+ return subevent < _REDISMODULE_SUBEVENT_LOADING_PROGRESS_NEXT;
+ case REDISMODULE_EVENT_SWAPDB:
+ return subevent < _REDISMODULE_SUBEVENT_SWAPDB_NEXT;
+ case REDISMODULE_EVENT_REPL_ASYNC_LOAD:
+ return subevent < _REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_NEXT;
+ case REDISMODULE_EVENT_FORK_CHILD:
+ return subevent < _REDISMODULE_SUBEVENT_FORK_CHILD_NEXT;
+ case REDISMODULE_EVENT_EVENTLOOP:
+ return subevent < _REDISMODULE_SUBEVENT_EVENTLOOP_NEXT;
+ case REDISMODULE_EVENT_CONFIG:
+ return subevent < _REDISMODULE_SUBEVENT_CONFIG_NEXT;
+ case REDISMODULE_EVENT_KEY:
+ return subevent < _REDISMODULE_SUBEVENT_KEY_NEXT;
+ default:
+ break;
+ }
+ return 0;
+}
+
+typedef struct KeyInfo {
+ int32_t dbnum;
+ RedisModuleString *key;
+ robj *value;
+ int mode;
+} KeyInfo;
+
+/* This is called by the Redis internals every time we want to fire an
+ * event that can be intercepted by some module. The pointer 'data' is useful
+ * in order to populate the event-specific structure when needed, in order
+ * to return the structure with more information to the callback.
+ *
+ * 'eid' and 'subid' are just the main event ID and the sub event associated
+ * with the event, depending on what exactly happened. */
+void moduleFireServerEvent(uint64_t eid, int subid, void *data) {
+ /* Fast path to return ASAP if there is nothing to do, avoiding to
+ * setup the iterator and so forth: we want this call to be extremely
+ * cheap if there are no registered modules. */
+ if (listLength(RedisModule_EventListeners) == 0) return;
+
+ listIter li;
+ listNode *ln;
+ listRewind(RedisModule_EventListeners,&li);
+ while((ln = listNext(&li))) {
+ RedisModuleEventListener *el = ln->value;
+ if (el->event.id == eid) {
+ RedisModuleCtx ctx;
+ if (eid == REDISMODULE_EVENT_CLIENT_CHANGE) {
+ /* In the case of client changes, we're pushing the real client
+ * so the event handler can mutate it if needed. For example,
+ * to change its authentication state in a way that does not
+ * depend on specific commands executed later.
+ */
+ moduleCreateContext(&ctx,el->module,REDISMODULE_CTX_NONE);
+ ctx.client = (client *) data;
+ } else {
+ moduleCreateContext(&ctx,el->module,REDISMODULE_CTX_TEMP_CLIENT);
+ }
+
+ void *moduledata = NULL;
+ RedisModuleClientInfoV1 civ1;
+ RedisModuleReplicationInfoV1 riv1;
+ RedisModuleModuleChangeV1 mcv1;
+ RedisModuleKey key;
+ RedisModuleKeyInfoV1 ki = {REDISMODULE_KEYINFO_VERSION, &key};
+
+ /* Event specific context and data pointer setup. */
+ if (eid == REDISMODULE_EVENT_CLIENT_CHANGE) {
+ serverAssert(modulePopulateClientInfoStructure(&civ1,data, el->event.dataver) == REDISMODULE_OK);
+ moduledata = &civ1;
+ } else if (eid == REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED) {
+ serverAssert(modulePopulateReplicationInfoStructure(&riv1,el->event.dataver) == REDISMODULE_OK);
+ moduledata = &riv1;
+ } else if (eid == REDISMODULE_EVENT_FLUSHDB) {
+ moduledata = data;
+ RedisModuleFlushInfoV1 *fi = data;
+ if (fi->dbnum != -1)
+ selectDb(ctx.client, fi->dbnum);
+ } else if (eid == REDISMODULE_EVENT_MODULE_CHANGE) {
+ RedisModule *m = data;
+ if (m == el->module) {
+ moduleFreeContext(&ctx);
+ continue;
+ }
+ mcv1.version = REDISMODULE_MODULE_CHANGE_VERSION;
+ mcv1.module_name = m->name;
+ mcv1.module_version = m->ver;
+ moduledata = &mcv1;
+ } else if (eid == REDISMODULE_EVENT_LOADING_PROGRESS) {
+ moduledata = data;
+ } else if (eid == REDISMODULE_EVENT_CRON_LOOP) {
+ moduledata = data;
+ } else if (eid == REDISMODULE_EVENT_SWAPDB) {
+ moduledata = data;
+ } else if (eid == REDISMODULE_EVENT_CONFIG) {
+ moduledata = data;
+ } else if (eid == REDISMODULE_EVENT_KEY) {
+ KeyInfo *info = data;
+ selectDb(ctx.client, info->dbnum);
+ moduleInitKey(&key, &ctx, info->key, info->value, info->mode);
+ moduledata = &ki;
+ }
+
+ el->module->in_hook++;
+ el->callback(&ctx,el->event,subid,moduledata);
+ el->module->in_hook--;
+
+ if (eid == REDISMODULE_EVENT_KEY) {
+ moduleCloseKey(&key);
+ }
+
+ moduleFreeContext(&ctx);
+ }
+ }
+}
+
+/* Remove all the listeners for this module: this is used before unloading
+ * a module. */
+void moduleUnsubscribeAllServerEvents(RedisModule *module) {
+ RedisModuleEventListener *el;
+ listIter li;
+ listNode *ln;
+ listRewind(RedisModule_EventListeners,&li);
+
+ while((ln = listNext(&li))) {
+ el = ln->value;
+ if (el->module == module) {
+ listDelNode(RedisModule_EventListeners,ln);
+ zfree(el);
+ }
+ }
+}
+
+void processModuleLoadingProgressEvent(int is_aof) {
+ long long now = server.ustime;
+ static long long next_event = 0;
+ if (now >= next_event) {
+ /* Fire the loading progress modules end event. */
+ int progress = -1;
+ if (server.loading_total_bytes)
+ progress = (server.loading_loaded_bytes<<10) / server.loading_total_bytes;
+ RedisModuleLoadingProgressV1 fi = {REDISMODULE_LOADING_PROGRESS_VERSION,
+ server.hz,
+ progress};
+ moduleFireServerEvent(REDISMODULE_EVENT_LOADING_PROGRESS,
+ is_aof?
+ REDISMODULE_SUBEVENT_LOADING_PROGRESS_AOF:
+ REDISMODULE_SUBEVENT_LOADING_PROGRESS_RDB,
+ &fi);
+ /* decide when the next event should fire. */
+ next_event = now + 1000000 / server.hz;
+ }
+}
+
+/* When a key is deleted (in dbAsyncDelete/dbSyncDelete/setKey), it
+* will be called to tell the module which key is about to be released. */
+void moduleNotifyKeyUnlink(robj *key, robj *val, int dbid, int flags) {
+ server.lazy_expire_disabled++;
+ int subevent = REDISMODULE_SUBEVENT_KEY_DELETED;
+ if (flags & DB_FLAG_KEY_EXPIRED) {
+ subevent = REDISMODULE_SUBEVENT_KEY_EXPIRED;
+ } else if (flags & DB_FLAG_KEY_EVICTED) {
+ subevent = REDISMODULE_SUBEVENT_KEY_EVICTED;
+ } else if (flags & DB_FLAG_KEY_OVERWRITE) {
+ subevent = REDISMODULE_SUBEVENT_KEY_OVERWRITTEN;
+ }
+ KeyInfo info = {dbid, key, val, REDISMODULE_READ};
+ moduleFireServerEvent(REDISMODULE_EVENT_KEY, subevent, &info);
+
+ if (val->type == OBJ_MODULE) {
+ moduleValue *mv = val->ptr;
+ moduleType *mt = mv->type;
+ /* We prefer to use the enhanced version. */
+ if (mt->unlink2 != NULL) {
+ RedisModuleKeyOptCtx ctx = {key, NULL, dbid, -1};
+ mt->unlink2(&ctx,mv->value);
+ } else if (mt->unlink != NULL) {
+ mt->unlink(key,mv->value);
+ }
+ }
+ server.lazy_expire_disabled--;
+}
+
+/* Return the free_effort of the module, it will automatically choose to call
+ * `free_effort` or `free_effort2`, and the default return value is 1.
+ * value of 0 means very high effort (always asynchronous freeing). */
+size_t moduleGetFreeEffort(robj *key, robj *val, int dbid) {
+ moduleValue *mv = val->ptr;
+ moduleType *mt = mv->type;
+ size_t effort = 1;
+ /* We prefer to use the enhanced version. */
+ if (mt->free_effort2 != NULL) {
+ RedisModuleKeyOptCtx ctx = {key, NULL, dbid, -1};
+ effort = mt->free_effort2(&ctx,mv->value);
+ } else if (mt->free_effort != NULL) {
+ effort = mt->free_effort(key,mv->value);
+ }
+
+ return effort;
+}
+
+/* Return the memory usage of the module, it will automatically choose to call
+ * `mem_usage` or `mem_usage2`, and the default return value is 0. */
+size_t moduleGetMemUsage(robj *key, robj *val, size_t sample_size, int dbid) {
+ moduleValue *mv = val->ptr;
+ moduleType *mt = mv->type;
+ size_t size = 0;
+ /* We prefer to use the enhanced version. */
+ if (mt->mem_usage2 != NULL) {
+ RedisModuleKeyOptCtx ctx = {key, NULL, dbid, -1};
+ size = mt->mem_usage2(&ctx, mv->value, sample_size);
+ } else if (mt->mem_usage != NULL) {
+ size = mt->mem_usage(mv->value);
+ }
+
+ return size;
+}
+
+/* --------------------------------------------------------------------------
+ * Modules API internals
+ * -------------------------------------------------------------------------- */
+
+/* server.moduleapi dictionary type. Only uses plain C strings since
+ * this gets queries from modules. */
+
+uint64_t dictCStringKeyHash(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
+}
+
+int dictCStringKeyCompare(dict *d, const void *key1, const void *key2) {
+ UNUSED(d);
+ return strcmp(key1,key2) == 0;
+}
+
+dictType moduleAPIDictType = {
+ dictCStringKeyHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictCStringKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+int moduleRegisterApi(const char *funcname, void *funcptr) {
+ return dictAdd(server.moduleapi, (char*)funcname, funcptr);
+}
+
+#define REGISTER_API(name) \
+ moduleRegisterApi("RedisModule_" #name, (void *)(unsigned long)RM_ ## name)
+
+/* Global initialization at Redis startup. */
+void moduleRegisterCoreAPI(void);
+
+/* Currently, this function is just a placeholder for the module system
+ * initialization steps that need to be run after server initialization.
+ * A previous issue, selectDb() in createClient() requires that server.db has
+ * been initialized, see #7323. */
+void moduleInitModulesSystemLast(void) {
+}
+
+
+dictType sdsKeyValueHashDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictSdsDestructor, /* val destructor */
+ NULL /* allow to expand */
+};
+
+void moduleInitModulesSystem(void) {
+ moduleUnblockedClients = listCreate();
+ server.loadmodule_queue = listCreate();
+ server.module_configs_queue = dictCreate(&sdsKeyValueHashDictType);
+ modules = dictCreate(&modulesDictType);
+ moduleAuthCallbacks = listCreate();
+
+ /* Set up the keyspace notification subscriber list and static client */
+ moduleKeyspaceSubscribers = listCreate();
+
+ modulePostExecUnitJobs = listCreate();
+
+ /* Set up filter list */
+ moduleCommandFilters = listCreate();
+
+ moduleRegisterCoreAPI();
+
+ /* Create a pipe for module threads to be able to wake up the redis main thread.
+ * Make the pipe non blocking. This is just a best effort aware mechanism
+ * and we do not want to block not in the read nor in the write half.
+ * Enable close-on-exec flag on pipes in case of the fork-exec system calls in
+ * sentinels or redis servers. */
+ if (anetPipe(server.module_pipe, O_CLOEXEC|O_NONBLOCK, O_CLOEXEC|O_NONBLOCK) == -1) {
+ serverLog(LL_WARNING,
+ "Can't create the pipe for module threads: %s", strerror(errno));
+ exit(1);
+ }
+
+ /* Create the timers radix tree. */
+ Timers = raxNew();
+
+ /* Setup the event listeners data structures. */
+ RedisModule_EventListeners = listCreate();
+
+ /* Making sure moduleEventVersions is synced with the number of events. */
+ serverAssert(sizeof(moduleEventVersions)/sizeof(moduleEventVersions[0]) == _REDISMODULE_EVENT_NEXT);
+
+ /* Our thread-safe contexts GIL must start with already locked:
+ * it is just unlocked when it's safe. */
+ pthread_mutex_lock(&moduleGIL);
+}
+
+void modulesCron(void) {
+ /* Check number of temporary clients in the pool and free the unused ones
+ * since the last cron. moduleTempClientMinCount tracks minimum count of
+ * clients in the pool since the last cron. This is the number of clients
+ * that we didn't use for the last cron period. */
+
+ /* Limit the max client count to be freed at once to avoid latency spikes.*/
+ int iteration = 50;
+ /* We are freeing clients if we have more than 8 unused clients. Keeping
+ * small amount of clients to avoid client allocation costs if temporary
+ * clients are required after some idle period. */
+ const unsigned int min_client = 8;
+ while (iteration > 0 && moduleTempClientCount > 0 && moduleTempClientMinCount > min_client) {
+ client *c = moduleTempClients[--moduleTempClientCount];
+ freeClient(c);
+ iteration--;
+ moduleTempClientMinCount--;
+ }
+ moduleTempClientMinCount = moduleTempClientCount;
+
+ /* Shrink moduleTempClients array itself if it is wasting some space */
+ if (moduleTempClientCap > 32 && moduleTempClientCap > moduleTempClientCount * 4) {
+ moduleTempClientCap /= 4;
+ moduleTempClients = zrealloc(moduleTempClients,sizeof(client*)*moduleTempClientCap);
+ }
+}
+
+void moduleLoadQueueEntryFree(struct moduleLoadQueueEntry *loadmod) {
+ if (!loadmod) return;
+ sdsfree(loadmod->path);
+ for (int i = 0; i < loadmod->argc; i++) {
+ decrRefCount(loadmod->argv[i]);
+ }
+ zfree(loadmod->argv);
+ zfree(loadmod);
+}
+
+/* Remove Module Configs from standardConfig array in config.c */
+void moduleRemoveConfigs(RedisModule *module) {
+ listIter li;
+ listNode *ln;
+ listRewind(module->module_configs, &li);
+ while ((ln = listNext(&li))) {
+ ModuleConfig *config = listNodeValue(ln);
+ sds module_name = sdsnew(module->name);
+ sds full_name = sdscat(sdscat(module_name, "."), config->name); /* ModuleName.ModuleConfig */
+ removeConfig(full_name);
+ sdsfree(full_name);
+ }
+}
+
+/* Load all the modules in the server.loadmodule_queue list, which is
+ * populated by `loadmodule` directives in the configuration file.
+ * We can't load modules directly when processing the configuration file
+ * because the server must be fully initialized before loading modules.
+ *
+ * The function aborts the server on errors, since to start with missing
+ * modules is not considered sane: clients may rely on the existence of
+ * given commands, loading AOF also may need some modules to exist, and
+ * if this instance is a slave, it must understand commands from master. */
+void moduleLoadFromQueue(void) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.loadmodule_queue,&li);
+ while((ln = listNext(&li))) {
+ struct moduleLoadQueueEntry *loadmod = ln->value;
+ if (moduleLoad(loadmod->path,(void **)loadmod->argv,loadmod->argc, 0)
+ == C_ERR)
+ {
+ serverLog(LL_WARNING,
+ "Can't load module from %s: server aborting",
+ loadmod->path);
+ exit(1);
+ }
+ moduleLoadQueueEntryFree(loadmod);
+ listDelNode(server.loadmodule_queue, ln);
+ }
+ if (dictSize(server.module_configs_queue)) {
+ serverLog(LL_WARNING, "Module Configuration detected without loadmodule directive or no ApplyConfig call: aborting");
+ exit(1);
+ }
+}
+
+void moduleFreeModuleStructure(struct RedisModule *module) {
+ listRelease(module->types);
+ listRelease(module->filters);
+ listRelease(module->usedby);
+ listRelease(module->using);
+ listRelease(module->module_configs);
+ sdsfree(module->name);
+ moduleLoadQueueEntryFree(module->loadmod);
+ zfree(module);
+}
+
+void moduleFreeArgs(struct redisCommandArg *args, int num_args) {
+ for (int j = 0; j < num_args; j++) {
+ zfree((char *)args[j].name);
+ zfree((char *)args[j].token);
+ zfree((char *)args[j].summary);
+ zfree((char *)args[j].since);
+ zfree((char *)args[j].deprecated_since);
+ zfree((char *)args[j].display_text);
+
+ if (args[j].subargs) {
+ moduleFreeArgs(args[j].subargs, args[j].num_args);
+ }
+ }
+ zfree(args);
+}
+
+/* Free the command registered with the specified module.
+ * On success C_OK is returned, otherwise C_ERR is returned.
+ *
+ * Note that caller needs to handle the deletion of the command table dict,
+ * and after that needs to free the command->fullname and the command itself.
+ */
+int moduleFreeCommand(struct RedisModule *module, struct redisCommand *cmd) {
+ if (cmd->proc != RedisModuleCommandDispatcher)
+ return C_ERR;
+
+ RedisModuleCommand *cp = cmd->module_cmd;
+ if (cp->module != module)
+ return C_ERR;
+
+ /* Free everything except cmd->fullname and cmd itself. */
+ for (int j = 0; j < cmd->key_specs_num; j++) {
+ if (cmd->key_specs[j].notes)
+ zfree((char *)cmd->key_specs[j].notes);
+ if (cmd->key_specs[j].begin_search_type == KSPEC_BS_KEYWORD)
+ zfree((char *)cmd->key_specs[j].bs.keyword.keyword);
+ }
+ zfree(cmd->key_specs);
+ for (int j = 0; cmd->tips && cmd->tips[j]; j++)
+ zfree((char *)cmd->tips[j]);
+ zfree(cmd->tips);
+ for (int j = 0; cmd->history && cmd->history[j].since; j++) {
+ zfree((char *)cmd->history[j].since);
+ zfree((char *)cmd->history[j].changes);
+ }
+ zfree(cmd->history);
+ zfree((char *)cmd->summary);
+ zfree((char *)cmd->since);
+ zfree((char *)cmd->deprecated_since);
+ zfree((char *)cmd->complexity);
+ if (cmd->latency_histogram) {
+ hdr_close(cmd->latency_histogram);
+ cmd->latency_histogram = NULL;
+ }
+ moduleFreeArgs(cmd->args, cmd->num_args);
+ zfree(cp);
+
+ if (cmd->subcommands_dict) {
+ dictEntry *de;
+ dictIterator *di = dictGetSafeIterator(cmd->subcommands_dict);
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *sub = dictGetVal(de);
+ if (moduleFreeCommand(module, sub) != C_OK) continue;
+
+ serverAssert(dictDelete(cmd->subcommands_dict, sub->declared_name) == DICT_OK);
+ sdsfree((sds)sub->declared_name);
+ sdsfree(sub->fullname);
+ zfree(sub);
+ }
+ dictReleaseIterator(di);
+ dictRelease(cmd->subcommands_dict);
+ }
+
+ return C_OK;
+}
+
+void moduleUnregisterCommands(struct RedisModule *module) {
+ /* Unregister all the commands registered by this module. */
+ dictIterator *di = dictGetSafeIterator(server.commands);
+ dictEntry *de;
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *cmd = dictGetVal(de);
+ if (moduleFreeCommand(module, cmd) != C_OK) continue;
+
+ serverAssert(dictDelete(server.commands, cmd->fullname) == DICT_OK);
+ serverAssert(dictDelete(server.orig_commands, cmd->fullname) == DICT_OK);
+ sdsfree((sds)cmd->declared_name);
+ sdsfree(cmd->fullname);
+ zfree(cmd);
+ }
+ dictReleaseIterator(di);
+}
+
+/* We parse argv to add sds "NAME VALUE" pairs to the server.module_configs_queue list of configs.
+ * We also increment the module_argv pointer to just after ARGS if there are args, otherwise
+ * we set it to NULL */
+int parseLoadexArguments(RedisModuleString ***module_argv, int *module_argc) {
+ int args_specified = 0;
+ RedisModuleString **argv = *module_argv;
+ int argc = *module_argc;
+ for (int i = 0; i < argc; i++) {
+ char *arg_val = argv[i]->ptr;
+ if (!strcasecmp(arg_val, "CONFIG")) {
+ if (i + 2 >= argc) {
+ serverLog(LL_NOTICE, "CONFIG specified without name value pair");
+ return REDISMODULE_ERR;
+ }
+ sds name = sdsdup(argv[i + 1]->ptr);
+ sds value = sdsdup(argv[i + 2]->ptr);
+ if (!dictReplace(server.module_configs_queue, name, value)) sdsfree(name);
+ i += 2;
+ } else if (!strcasecmp(arg_val, "ARGS")) {
+ args_specified = 1;
+ i++;
+ if (i >= argc) {
+ *module_argv = NULL;
+ *module_argc = 0;
+ } else {
+ *module_argv = argv + i;
+ *module_argc = argc - i;
+ }
+ break;
+ } else {
+ serverLog(LL_NOTICE, "Syntax Error from arguments to loadex around %s.", arg_val);
+ return REDISMODULE_ERR;
+ }
+ }
+ if (!args_specified) {
+ *module_argv = NULL;
+ *module_argc = 0;
+ }
+ return REDISMODULE_OK;
+}
+
+/* Unregister module-related things, called when moduleLoad fails or moduleUnload. */
+void moduleUnregisterCleanup(RedisModule *module) {
+ moduleFreeAuthenticatedClients(module);
+ moduleUnregisterCommands(module);
+ moduleUnsubscribeNotifications(module);
+ moduleUnregisterSharedAPI(module);
+ moduleUnregisterUsedAPI(module);
+ moduleUnregisterFilters(module);
+ moduleUnsubscribeAllServerEvents(module);
+ moduleRemoveConfigs(module);
+ moduleUnregisterAuthCBs(module);
+}
+
+/* Load a module and initialize it. On success C_OK is returned, otherwise
+ * C_ERR is returned. */
+int moduleLoad(const char *path, void **module_argv, int module_argc, int is_loadex) {
+ int (*onload)(void *, void **, int);
+ void *handle;
+
+ struct stat st;
+ if (stat(path, &st) == 0) {
+ /* This check is best effort */
+ if (!(st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
+ serverLog(LL_WARNING, "Module %s failed to load: It does not have execute permissions.", path);
+ return C_ERR;
+ }
+ }
+
+ handle = dlopen(path,RTLD_NOW|RTLD_LOCAL);
+ if (handle == NULL) {
+ serverLog(LL_WARNING, "Module %s failed to load: %s", path, dlerror());
+ return C_ERR;
+ }
+ onload = (int (*)(void *, void **, int))(unsigned long) dlsym(handle,"RedisModule_OnLoad");
+ if (onload == NULL) {
+ dlclose(handle);
+ serverLog(LL_WARNING,
+ "Module %s does not export RedisModule_OnLoad() "
+ "symbol. Module not loaded.",path);
+ return C_ERR;
+ }
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, NULL, REDISMODULE_CTX_TEMP_CLIENT); /* We pass NULL since we don't have a module yet. */
+ if (onload((void*)&ctx,module_argv,module_argc) == REDISMODULE_ERR) {
+ serverLog(LL_WARNING,
+ "Module %s initialization failed. Module not loaded",path);
+ if (ctx.module) {
+ moduleUnregisterCleanup(ctx.module);
+ moduleFreeModuleStructure(ctx.module);
+ }
+ moduleFreeContext(&ctx);
+ dlclose(handle);
+ return C_ERR;
+ }
+
+ /* Redis module loaded! Register it. */
+ dictAdd(modules,ctx.module->name,ctx.module);
+ ctx.module->blocked_clients = 0;
+ ctx.module->handle = handle;
+ ctx.module->loadmod = zmalloc(sizeof(struct moduleLoadQueueEntry));
+ ctx.module->loadmod->path = sdsnew(path);
+ ctx.module->loadmod->argv = module_argc ? zmalloc(sizeof(robj*)*module_argc) : NULL;
+ ctx.module->loadmod->argc = module_argc;
+ for (int i = 0; i < module_argc; i++) {
+ ctx.module->loadmod->argv[i] = module_argv[i];
+ incrRefCount(ctx.module->loadmod->argv[i]);
+ }
+
+ /* If module commands have ACL categories, recompute command bits
+ * for all existing users once the modules has been registered. */
+ if (ctx.module->num_commands_with_acl_categories) {
+ ACLRecomputeCommandBitsFromCommandRulesAllUsers();
+ }
+ serverLog(LL_NOTICE,"Module '%s' loaded from %s",ctx.module->name,path);
+ ctx.module->onload = 0;
+
+ int post_load_err = 0;
+ if (listLength(ctx.module->module_configs) && !ctx.module->configs_initialized) {
+ serverLogRaw(LL_WARNING, "Module Configurations were not set, likely a missing LoadConfigs call. Unloading the module.");
+ post_load_err = 1;
+ }
+
+ if (is_loadex && dictSize(server.module_configs_queue)) {
+ serverLogRaw(LL_WARNING, "Loadex configurations were not applied, likely due to invalid arguments. Unloading the module.");
+ post_load_err = 1;
+ }
+
+ if (post_load_err) {
+ moduleUnload(ctx.module->name, NULL);
+ moduleFreeContext(&ctx);
+ return C_ERR;
+ }
+
+ /* Fire the loaded modules event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_MODULE_CHANGE,
+ REDISMODULE_SUBEVENT_MODULE_LOADED,
+ ctx.module);
+
+ moduleFreeContext(&ctx);
+ return C_OK;
+}
+
+/* Unload the module registered with the specified name. On success
+ * C_OK is returned, otherwise C_ERR is returned and errmsg is set
+ * with an appropriate message. */
+int moduleUnload(sds name, const char **errmsg) {
+ struct RedisModule *module = dictFetchValue(modules,name);
+
+ if (module == NULL) {
+ *errmsg = "no such module with that name";
+ return C_ERR;
+ } else if (listLength(module->types)) {
+ *errmsg = "the module exports one or more module-side data "
+ "types, can't unload";
+ return C_ERR;
+ } else if (listLength(module->usedby)) {
+ *errmsg = "the module exports APIs used by other modules. "
+ "Please unload them first and try again";
+ return C_ERR;
+ } else if (module->blocked_clients) {
+ *errmsg = "the module has blocked clients. "
+ "Please wait for them to be unblocked and try again";
+ return C_ERR;
+ } else if (moduleHoldsTimer(module)) {
+ *errmsg = "the module holds timer that is not fired. "
+ "Please stop the timer or wait until it fires.";
+ return C_ERR;
+ }
+
+ /* Give module a chance to clean up. */
+ int (*onunload)(void *);
+ onunload = (int (*)(void *))(unsigned long) dlsym(module->handle, "RedisModule_OnUnload");
+ if (onunload) {
+ RedisModuleCtx ctx;
+ moduleCreateContext(&ctx, module, REDISMODULE_CTX_TEMP_CLIENT);
+ int unload_status = onunload((void*)&ctx);
+ moduleFreeContext(&ctx);
+
+ if (unload_status == REDISMODULE_ERR) {
+ serverLog(LL_WARNING, "Module %s OnUnload failed. Unload canceled.", name);
+ errno = ECANCELED;
+ return C_ERR;
+ }
+ }
+
+ moduleUnregisterCleanup(module);
+
+ /* Unload the dynamic library. */
+ if (dlclose(module->handle) == -1) {
+ char *error = dlerror();
+ if (error == NULL) error = "Unknown error";
+ serverLog(LL_WARNING,"Error when trying to close the %s module: %s",
+ module->name, error);
+ }
+
+ /* Fire the unloaded modules event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_MODULE_CHANGE,
+ REDISMODULE_SUBEVENT_MODULE_UNLOADED,
+ module);
+
+ /* Remove from list of modules. */
+ serverLog(LL_NOTICE,"Module %s unloaded",module->name);
+ dictDelete(modules,module->name);
+ module->name = NULL; /* The name was already freed by dictDelete(). */
+ moduleFreeModuleStructure(module);
+
+ /* Recompute command bits for all users once the modules has been completely unloaded. */
+ ACLRecomputeCommandBitsFromCommandRulesAllUsers();
+ return C_OK;
+}
+
+void modulePipeReadable(aeEventLoop *el, int fd, void *privdata, int mask) {
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(mask);
+ UNUSED(privdata);
+
+ char buf[128];
+ while (read(fd, buf, sizeof(buf)) == sizeof(buf));
+
+ /* Handle event loop events if pipe was written from event loop API */
+ eventLoopHandleOneShotEvents();
+}
+
+/* Helper function for the MODULE and HELLO command: send the list of the
+ * loaded modules to the client. */
+void addReplyLoadedModules(client *c) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ addReplyArrayLen(c,dictSize(modules));
+ while ((de = dictNext(di)) != NULL) {
+ sds name = dictGetKey(de);
+ struct RedisModule *module = dictGetVal(de);
+ sds path = module->loadmod->path;
+ addReplyMapLen(c,4);
+ addReplyBulkCString(c,"name");
+ addReplyBulkCBuffer(c,name,sdslen(name));
+ addReplyBulkCString(c,"ver");
+ addReplyLongLong(c,module->ver);
+ addReplyBulkCString(c,"path");
+ addReplyBulkCBuffer(c,path,sdslen(path));
+ addReplyBulkCString(c,"args");
+ addReplyArrayLen(c,module->loadmod->argc);
+ for (int i = 0; i < module->loadmod->argc; i++) {
+ addReplyBulk(c,module->loadmod->argv[i]);
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* Helper for genModulesInfoString(): given a list of modules, return
+ * an SDS string in the form "[modulename|modulename2|...]" */
+sds genModulesInfoStringRenderModulesList(list *l) {
+ listIter li;
+ listNode *ln;
+ listRewind(l,&li);
+ sds output = sdsnew("[");
+ while((ln = listNext(&li))) {
+ RedisModule *module = ln->value;
+ output = sdscat(output,module->name);
+ if (ln != listLast(l))
+ output = sdscat(output,"|");
+ }
+ output = sdscat(output,"]");
+ return output;
+}
+
+/* Helper for genModulesInfoString(): render module options as an SDS string. */
+sds genModulesInfoStringRenderModuleOptions(struct RedisModule *module) {
+ sds output = sdsnew("[");
+ if (module->options & REDISMODULE_OPTIONS_HANDLE_IO_ERRORS)
+ output = sdscat(output,"handle-io-errors|");
+ if (module->options & REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD)
+ output = sdscat(output,"handle-repl-async-load|");
+ if (module->options & REDISMODULE_OPTION_NO_IMPLICIT_SIGNAL_MODIFIED)
+ output = sdscat(output,"no-implicit-signal-modified|");
+ output = sdstrim(output,"|");
+ output = sdscat(output,"]");
+ return output;
+}
+
+
+/* Helper function for the INFO command: adds loaded modules as to info's
+ * output.
+ *
+ * After the call, the passed sds info string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds genModulesInfoString(sds info) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL) {
+ sds name = dictGetKey(de);
+ struct RedisModule *module = dictGetVal(de);
+
+ sds usedby = genModulesInfoStringRenderModulesList(module->usedby);
+ sds using = genModulesInfoStringRenderModulesList(module->using);
+ sds options = genModulesInfoStringRenderModuleOptions(module);
+ info = sdscatfmt(info,
+ "module:name=%S,ver=%i,api=%i,filters=%i,"
+ "usedby=%S,using=%S,options=%S\r\n",
+ name, module->ver, module->apiver,
+ (int)listLength(module->filters), usedby, using, options);
+ sdsfree(usedby);
+ sdsfree(using);
+ sdsfree(options);
+ }
+ dictReleaseIterator(di);
+ return info;
+}
+
+/* --------------------------------------------------------------------------
+ * Module Configurations API internals
+ * -------------------------------------------------------------------------- */
+
+/* Check if the configuration name is already registered */
+int isModuleConfigNameRegistered(RedisModule *module, sds name) {
+ listNode *match = listSearchKey(module->module_configs, (void *) name);
+ return match != NULL;
+}
+
+/* Assert that the flags passed into the RM_RegisterConfig Suite are valid */
+int moduleVerifyConfigFlags(unsigned int flags, configType type) {
+ if ((flags & ~(REDISMODULE_CONFIG_DEFAULT
+ | REDISMODULE_CONFIG_IMMUTABLE
+ | REDISMODULE_CONFIG_SENSITIVE
+ | REDISMODULE_CONFIG_HIDDEN
+ | REDISMODULE_CONFIG_PROTECTED
+ | REDISMODULE_CONFIG_DENY_LOADING
+ | REDISMODULE_CONFIG_BITFLAGS
+ | REDISMODULE_CONFIG_MEMORY))) {
+ serverLogRaw(LL_WARNING, "Invalid flag(s) for configuration");
+ return REDISMODULE_ERR;
+ }
+ if (type != NUMERIC_CONFIG && flags & REDISMODULE_CONFIG_MEMORY) {
+ serverLogRaw(LL_WARNING, "Numeric flag provided for non-numeric configuration.");
+ return REDISMODULE_ERR;
+ }
+ if (type != ENUM_CONFIG && flags & REDISMODULE_CONFIG_BITFLAGS) {
+ serverLogRaw(LL_WARNING, "Enum flag provided for non-enum configuration.");
+ return REDISMODULE_ERR;
+ }
+ return REDISMODULE_OK;
+}
+
+int moduleVerifyConfigName(sds name) {
+ if (sdslen(name) == 0) {
+ serverLogRaw(LL_WARNING, "Module config names cannot be an empty string.");
+ return REDISMODULE_ERR;
+ }
+ for (size_t i = 0 ; i < sdslen(name) ; ++i) {
+ char curr_char = name[i];
+ if ((curr_char >= 'a' && curr_char <= 'z') ||
+ (curr_char >= 'A' && curr_char <= 'Z') ||
+ (curr_char >= '0' && curr_char <= '9') ||
+ (curr_char == '_') || (curr_char == '-'))
+ {
+ continue;
+ }
+ serverLog(LL_WARNING, "Invalid character %c in Module Config name %s.", curr_char, name);
+ return REDISMODULE_ERR;
+ }
+ return REDISMODULE_OK;
+}
+
+/* This is a series of set functions for each type that act as dispatchers for
+ * config.c to call module set callbacks. */
+#define CONFIG_ERR_SIZE 256
+static char configerr[CONFIG_ERR_SIZE];
+static void propagateErrorString(RedisModuleString *err_in, const char **err) {
+ if (err_in) {
+ redis_strlcpy(configerr, err_in->ptr, CONFIG_ERR_SIZE);
+ decrRefCount(err_in);
+ *err = configerr;
+ }
+}
+
+int setModuleBoolConfig(ModuleConfig *config, int val, const char **err) {
+ RedisModuleString *error = NULL;
+ int return_code = config->set_fn.set_bool(config->name, val, config->privdata, &error);
+ propagateErrorString(error, err);
+ return return_code == REDISMODULE_OK ? 1 : 0;
+}
+
+int setModuleStringConfig(ModuleConfig *config, sds strval, const char **err) {
+ RedisModuleString *error = NULL;
+ RedisModuleString *new = createStringObject(strval, sdslen(strval));
+ int return_code = config->set_fn.set_string(config->name, new, config->privdata, &error);
+ propagateErrorString(error, err);
+ decrRefCount(new);
+ return return_code == REDISMODULE_OK ? 1 : 0;
+}
+
+int setModuleEnumConfig(ModuleConfig *config, int val, const char **err) {
+ RedisModuleString *error = NULL;
+ int return_code = config->set_fn.set_enum(config->name, val, config->privdata, &error);
+ propagateErrorString(error, err);
+ return return_code == REDISMODULE_OK ? 1 : 0;
+}
+
+int setModuleNumericConfig(ModuleConfig *config, long long val, const char **err) {
+ RedisModuleString *error = NULL;
+ int return_code = config->set_fn.set_numeric(config->name, val, config->privdata, &error);
+ propagateErrorString(error, err);
+ return return_code == REDISMODULE_OK ? 1 : 0;
+}
+
+/* This is a series of get functions for each type that act as dispatchers for
+ * config.c to call module set callbacks. */
+int getModuleBoolConfig(ModuleConfig *module_config) {
+ return module_config->get_fn.get_bool(module_config->name, module_config->privdata);
+}
+
+sds getModuleStringConfig(ModuleConfig *module_config) {
+ RedisModuleString *val = module_config->get_fn.get_string(module_config->name, module_config->privdata);
+ return val ? sdsdup(val->ptr) : NULL;
+}
+
+int getModuleEnumConfig(ModuleConfig *module_config) {
+ return module_config->get_fn.get_enum(module_config->name, module_config->privdata);
+}
+
+long long getModuleNumericConfig(ModuleConfig *module_config) {
+ return module_config->get_fn.get_numeric(module_config->name, module_config->privdata);
+}
+
+/* This function takes a module and a list of configs stored as sds NAME VALUE pairs.
+ * It attempts to call set on each of these configs. */
+int loadModuleConfigs(RedisModule *module) {
+ listIter li;
+ listNode *ln;
+ const char *err = NULL;
+ listRewind(module->module_configs, &li);
+ while ((ln = listNext(&li))) {
+ ModuleConfig *module_config = listNodeValue(ln);
+ sds config_name = sdscatfmt(sdsempty(), "%s.%s", module->name, module_config->name);
+ dictEntry *config_argument = dictFind(server.module_configs_queue, config_name);
+ if (config_argument) {
+ if (!performModuleConfigSetFromName(dictGetKey(config_argument), dictGetVal(config_argument), &err)) {
+ serverLog(LL_WARNING, "Issue during loading of configuration %s : %s", (sds) dictGetKey(config_argument), err);
+ sdsfree(config_name);
+ dictEmpty(server.module_configs_queue, NULL);
+ return REDISMODULE_ERR;
+ }
+ } else {
+ if (!performModuleConfigSetDefaultFromName(config_name, &err)) {
+ serverLog(LL_WARNING, "Issue attempting to set default value of configuration %s : %s", module_config->name, err);
+ sdsfree(config_name);
+ dictEmpty(server.module_configs_queue, NULL);
+ return REDISMODULE_ERR;
+ }
+ }
+ dictDelete(server.module_configs_queue, config_name);
+ sdsfree(config_name);
+ }
+ module->configs_initialized = 1;
+ return REDISMODULE_OK;
+}
+
+/* Add module_config to the list if the apply and privdata do not match one already in it. */
+void addModuleConfigApply(list *module_configs, ModuleConfig *module_config) {
+ if (!module_config->apply_fn) return;
+ listIter li;
+ listNode *ln;
+ ModuleConfig *pending_apply;
+ listRewind(module_configs, &li);
+ while ((ln = listNext(&li))) {
+ pending_apply = listNodeValue(ln);
+ if (pending_apply->apply_fn == module_config->apply_fn && pending_apply->privdata == module_config->privdata) {
+ return;
+ }
+ }
+ listAddNodeTail(module_configs, module_config);
+}
+
+/* Call apply on all module configs specified in set, if an apply function was specified at registration time. */
+int moduleConfigApplyConfig(list *module_configs, const char **err, const char **err_arg_name) {
+ if (!listLength(module_configs)) return 1;
+ listIter li;
+ listNode *ln;
+ ModuleConfig *module_config;
+ RedisModuleString *error = NULL;
+ RedisModuleCtx ctx;
+
+ listRewind(module_configs, &li);
+ while ((ln = listNext(&li))) {
+ module_config = listNodeValue(ln);
+ moduleCreateContext(&ctx, module_config->module, REDISMODULE_CTX_NONE);
+ if (module_config->apply_fn(&ctx, module_config->privdata, &error)) {
+ if (err_arg_name) *err_arg_name = module_config->name;
+ propagateErrorString(error, err);
+ moduleFreeContext(&ctx);
+ return 0;
+ }
+ moduleFreeContext(&ctx);
+ }
+ return 1;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Module Configurations API
+ * -------------------------------------------------------------------------- */
+
+/* Create a module config object. */
+ModuleConfig *createModuleConfig(sds name, RedisModuleConfigApplyFunc apply_fn, void *privdata, RedisModule *module) {
+ ModuleConfig *new_config = zmalloc(sizeof(ModuleConfig));
+ new_config->name = sdsdup(name);
+ new_config->apply_fn = apply_fn;
+ new_config->privdata = privdata;
+ new_config->module = module;
+ return new_config;
+}
+
+int moduleConfigValidityCheck(RedisModule *module, sds name, unsigned int flags, configType type) {
+ if (!module->onload) {
+ errno = EBUSY;
+ return REDISMODULE_ERR;
+ }
+ if (moduleVerifyConfigFlags(flags, type) || moduleVerifyConfigName(name)) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+ if (isModuleConfigNameRegistered(module, name)) {
+ serverLog(LL_WARNING, "Configuration by the name: %s already registered", name);
+ errno = EALREADY;
+ return REDISMODULE_ERR;
+ }
+ return REDISMODULE_OK;
+}
+
+unsigned int maskModuleConfigFlags(unsigned int flags) {
+ unsigned int new_flags = 0;
+ if (flags & REDISMODULE_CONFIG_DEFAULT) new_flags |= MODIFIABLE_CONFIG;
+ if (flags & REDISMODULE_CONFIG_IMMUTABLE) new_flags |= IMMUTABLE_CONFIG;
+ if (flags & REDISMODULE_CONFIG_HIDDEN) new_flags |= HIDDEN_CONFIG;
+ if (flags & REDISMODULE_CONFIG_PROTECTED) new_flags |= PROTECTED_CONFIG;
+ if (flags & REDISMODULE_CONFIG_DENY_LOADING) new_flags |= DENY_LOADING_CONFIG;
+ return new_flags;
+}
+
+unsigned int maskModuleNumericConfigFlags(unsigned int flags) {
+ unsigned int new_flags = 0;
+ if (flags & REDISMODULE_CONFIG_MEMORY) new_flags |= MEMORY_CONFIG;
+ return new_flags;
+}
+
+unsigned int maskModuleEnumConfigFlags(unsigned int flags) {
+ unsigned int new_flags = 0;
+ if (flags & REDISMODULE_CONFIG_BITFLAGS) new_flags |= MULTI_ARG_CONFIG;
+ return new_flags;
+}
+
+/* Create a string config that Redis users can interact with via the Redis config file,
+ * `CONFIG SET`, `CONFIG GET`, and `CONFIG REWRITE` commands.
+ *
+ * The actual config value is owned by the module, and the `getfn`, `setfn` and optional
+ * `applyfn` callbacks that are provided to Redis in order to access or manipulate the
+ * value. The `getfn` callback retrieves the value from the module, while the `setfn`
+ * callback provides a value to be stored into the module config.
+ * The optional `applyfn` callback is called after a `CONFIG SET` command modified one or
+ * more configs using the `setfn` callback and can be used to atomically apply a config
+ * after several configs were changed together.
+ * If there are multiple configs with `applyfn` callbacks set by a single `CONFIG SET`
+ * command, they will be deduplicated if their `applyfn` function and `privdata` pointers
+ * are identical, and the callback will only be run once.
+ * Both the `setfn` and `applyfn` can return an error if the provided value is invalid or
+ * cannot be used.
+ * The config also declares a type for the value that is validated by Redis and
+ * provided to the module. The config system provides the following types:
+ *
+ * * Redis String: Binary safe string data.
+ * * Enum: One of a finite number of string tokens, provided during registration.
+ * * Numeric: 64 bit signed integer, which also supports min and max values.
+ * * Bool: Yes or no value.
+ *
+ * The `setfn` callback is expected to return REDISMODULE_OK when the value is successfully
+ * applied. It can also return REDISMODULE_ERR if the value can't be applied, and the
+ * *err pointer can be set with a RedisModuleString error message to provide to the client.
+ * This RedisModuleString will be freed by redis after returning from the set callback.
+ *
+ * All configs are registered with a name, a type, a default value, private data that is made
+ * available in the callbacks, as well as several flags that modify the behavior of the config.
+ * The name must only contain alphanumeric characters or dashes. The supported flags are:
+ *
+ * * REDISMODULE_CONFIG_DEFAULT: The default flags for a config. This creates a config that can be modified after startup.
+ * * REDISMODULE_CONFIG_IMMUTABLE: This config can only be provided loading time.
+ * * REDISMODULE_CONFIG_SENSITIVE: The value stored in this config is redacted from all logging.
+ * * REDISMODULE_CONFIG_HIDDEN: The name is hidden from `CONFIG GET` with pattern matching.
+ * * REDISMODULE_CONFIG_PROTECTED: This config will be only be modifiable based off the value of enable-protected-configs.
+ * * REDISMODULE_CONFIG_DENY_LOADING: This config is not modifiable while the server is loading data.
+ * * REDISMODULE_CONFIG_MEMORY: For numeric configs, this config will convert data unit notations into their byte equivalent.
+ * * REDISMODULE_CONFIG_BITFLAGS: For enum configs, this config will allow multiple entries to be combined as bit flags.
+ *
+ * Default values are used on startup to set the value if it is not provided via the config file
+ * or command line. Default values are also used to compare to on a config rewrite.
+ *
+ * Notes:
+ *
+ * 1. On string config sets that the string passed to the set callback will be freed after execution and the module must retain it.
+ * 2. On string config gets the string will not be consumed and will be valid after execution.
+ *
+ * Example implementation:
+ *
+ * RedisModuleString *strval;
+ * int adjustable = 1;
+ * RedisModuleString *getStringConfigCommand(const char *name, void *privdata) {
+ * return strval;
+ * }
+ *
+ * int setStringConfigCommand(const char *name, RedisModuleString *new, void *privdata, RedisModuleString **err) {
+ * if (adjustable) {
+ * RedisModule_Free(strval);
+ * RedisModule_RetainString(NULL, new);
+ * strval = new;
+ * return REDISMODULE_OK;
+ * }
+ * *err = RedisModule_CreateString(NULL, "Not adjustable.", 15);
+ * return REDISMODULE_ERR;
+ * }
+ * ...
+ * RedisModule_RegisterStringConfig(ctx, "string", NULL, REDISMODULE_CONFIG_DEFAULT, getStringConfigCommand, setStringConfigCommand, NULL, NULL);
+ *
+ * If the registration fails, REDISMODULE_ERR is returned and one of the following
+ * errno is set:
+ * * EBUSY: Registering the Config outside of RedisModule_OnLoad.
+ * * EINVAL: The provided flags are invalid for the registration or the name of the config contains invalid characters.
+ * * EALREADY: The provided configuration name is already used. */
+int RM_RegisterStringConfig(RedisModuleCtx *ctx, const char *name, const char *default_val, unsigned int flags, RedisModuleConfigGetStringFunc getfn, RedisModuleConfigSetStringFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) {
+ RedisModule *module = ctx->module;
+ sds config_name = sdsnew(name);
+ if (moduleConfigValidityCheck(module, config_name, flags, NUMERIC_CONFIG)) {
+ sdsfree(config_name);
+ return REDISMODULE_ERR;
+ }
+ ModuleConfig *new_config = createModuleConfig(config_name, applyfn, privdata, module);
+ sdsfree(config_name);
+ new_config->get_fn.get_string = getfn;
+ new_config->set_fn.set_string = setfn;
+ listAddNodeTail(module->module_configs, new_config);
+ flags = maskModuleConfigFlags(flags);
+ addModuleStringConfig(module->name, name, flags, new_config, default_val ? sdsnew(default_val) : NULL);
+ return REDISMODULE_OK;
+}
+
+/* Create a bool config that server clients can interact with via the
+ * `CONFIG SET`, `CONFIG GET`, and `CONFIG REWRITE` commands. See
+ * RedisModule_RegisterStringConfig for detailed information about configs. */
+int RM_RegisterBoolConfig(RedisModuleCtx *ctx, const char *name, int default_val, unsigned int flags, RedisModuleConfigGetBoolFunc getfn, RedisModuleConfigSetBoolFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) {
+ RedisModule *module = ctx->module;
+ sds config_name = sdsnew(name);
+ if (moduleConfigValidityCheck(module, config_name, flags, BOOL_CONFIG)) {
+ sdsfree(config_name);
+ return REDISMODULE_ERR;
+ }
+ ModuleConfig *new_config = createModuleConfig(config_name, applyfn, privdata, module);
+ sdsfree(config_name);
+ new_config->get_fn.get_bool = getfn;
+ new_config->set_fn.set_bool = setfn;
+ listAddNodeTail(module->module_configs, new_config);
+ flags = maskModuleConfigFlags(flags);
+ addModuleBoolConfig(module->name, name, flags, new_config, default_val);
+ return REDISMODULE_OK;
+}
+
+/*
+ * Create an enum config that server clients can interact with via the
+ * `CONFIG SET`, `CONFIG GET`, and `CONFIG REWRITE` commands.
+ * Enum configs are a set of string tokens to corresponding integer values, where
+ * the string value is exposed to Redis clients but the value passed Redis and the
+ * module is the integer value. These values are defined in enum_values, an array
+ * of null-terminated c strings, and int_vals, an array of enum values who has an
+ * index partner in enum_values.
+ * Example Implementation:
+ * const char *enum_vals[3] = {"first", "second", "third"};
+ * const int int_vals[3] = {0, 2, 4};
+ * int enum_val = 0;
+ *
+ * int getEnumConfigCommand(const char *name, void *privdata) {
+ * return enum_val;
+ * }
+ *
+ * int setEnumConfigCommand(const char *name, int val, void *privdata, const char **err) {
+ * enum_val = val;
+ * return REDISMODULE_OK;
+ * }
+ * ...
+ * RedisModule_RegisterEnumConfig(ctx, "enum", 0, REDISMODULE_CONFIG_DEFAULT, enum_vals, int_vals, 3, getEnumConfigCommand, setEnumConfigCommand, NULL, NULL);
+ *
+ * Note that you can use REDISMODULE_CONFIG_BITFLAGS so that multiple enum string
+ * can be combined into one integer as bit flags, in which case you may want to
+ * sort your enums so that the preferred combinations are present first.
+ *
+ * See RedisModule_RegisterStringConfig for detailed general information about configs. */
+int RM_RegisterEnumConfig(RedisModuleCtx *ctx, const char *name, int default_val, unsigned int flags, const char **enum_values, const int *int_values, int num_enum_vals, RedisModuleConfigGetEnumFunc getfn, RedisModuleConfigSetEnumFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) {
+ RedisModule *module = ctx->module;
+ sds config_name = sdsnew(name);
+ if (moduleConfigValidityCheck(module, config_name, flags, ENUM_CONFIG)) {
+ sdsfree(config_name);
+ return REDISMODULE_ERR;
+ }
+ ModuleConfig *new_config = createModuleConfig(config_name, applyfn, privdata, module);
+ sdsfree(config_name);
+ new_config->get_fn.get_enum = getfn;
+ new_config->set_fn.set_enum = setfn;
+ configEnum *enum_vals = zmalloc((num_enum_vals + 1) * sizeof(configEnum));
+ for (int i = 0; i < num_enum_vals; i++) {
+ enum_vals[i].name = zstrdup(enum_values[i]);
+ enum_vals[i].val = int_values[i];
+ }
+ enum_vals[num_enum_vals].name = NULL;
+ enum_vals[num_enum_vals].val = 0;
+ listAddNodeTail(module->module_configs, new_config);
+ flags = maskModuleConfigFlags(flags) | maskModuleEnumConfigFlags(flags);
+ addModuleEnumConfig(module->name, name, flags, new_config, default_val, enum_vals);
+ return REDISMODULE_OK;
+}
+
+/*
+ * Create an integer config that server clients can interact with via the
+ * `CONFIG SET`, `CONFIG GET`, and `CONFIG REWRITE` commands. See
+ * RedisModule_RegisterStringConfig for detailed information about configs. */
+int RM_RegisterNumericConfig(RedisModuleCtx *ctx, const char *name, long long default_val, unsigned int flags, long long min, long long max, RedisModuleConfigGetNumericFunc getfn, RedisModuleConfigSetNumericFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) {
+ RedisModule *module = ctx->module;
+ sds config_name = sdsnew(name);
+ if (moduleConfigValidityCheck(module, config_name, flags, NUMERIC_CONFIG)) {
+ sdsfree(config_name);
+ return REDISMODULE_ERR;
+ }
+ ModuleConfig *new_config = createModuleConfig(config_name, applyfn, privdata, module);
+ sdsfree(config_name);
+ new_config->get_fn.get_numeric = getfn;
+ new_config->set_fn.set_numeric = setfn;
+ listAddNodeTail(module->module_configs, new_config);
+ unsigned int numeric_flags = maskModuleNumericConfigFlags(flags);
+ flags = maskModuleConfigFlags(flags);
+ addModuleNumericConfig(module->name, name, flags, new_config, default_val, numeric_flags, min, max);
+ return REDISMODULE_OK;
+}
+
+/* Applies all pending configurations on the module load. This should be called
+ * after all of the configurations have been registered for the module inside of RedisModule_OnLoad.
+ * This will return REDISMODULE_ERR if it is called outside RedisModule_OnLoad.
+ * This API needs to be called when configurations are provided in either `MODULE LOADEX`
+ * or provided as startup arguments. */
+int RM_LoadConfigs(RedisModuleCtx *ctx) {
+ if (!ctx || !ctx->module || !ctx->module->onload) {
+ return REDISMODULE_ERR;
+ }
+ RedisModule *module = ctx->module;
+ /* Load configs from conf file or arguments from loadex */
+ if (loadModuleConfigs(module)) return REDISMODULE_ERR;
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * ## RDB load/save API
+ * -------------------------------------------------------------------------- */
+
+#define REDISMODULE_RDB_STREAM_FILE 1
+
+typedef struct RedisModuleRdbStream {
+ int type;
+
+ union {
+ char *filename;
+ } data;
+} RedisModuleRdbStream;
+
+/* Create a stream object to save/load RDB to/from a file.
+ *
+ * This function returns a pointer to RedisModuleRdbStream which is owned
+ * by the caller. It requires a call to RM_RdbStreamFree() to free
+ * the object. */
+RedisModuleRdbStream *RM_RdbStreamCreateFromFile(const char *filename) {
+ RedisModuleRdbStream *stream = zmalloc(sizeof(*stream));
+ stream->type = REDISMODULE_RDB_STREAM_FILE;
+ stream->data.filename = zstrdup(filename);
+ return stream;
+}
+
+/* Release an RDB stream object. */
+void RM_RdbStreamFree(RedisModuleRdbStream *stream) {
+ switch (stream->type) {
+ case REDISMODULE_RDB_STREAM_FILE:
+ zfree(stream->data.filename);
+ break;
+ default:
+ serverAssert(0);
+ break;
+ }
+ zfree(stream);
+}
+
+/* Load RDB file from the `stream`. Dataset will be cleared first and then RDB
+ * file will be loaded.
+ *
+ * `flags` must be zero. This parameter is for future use.
+ *
+ * On success REDISMODULE_OK is returned, otherwise REDISMODULE_ERR is returned
+ * and errno is set accordingly.
+ *
+ * Example:
+ *
+ * RedisModuleRdbStream *s = RedisModule_RdbStreamCreateFromFile("exp.rdb");
+ * RedisModule_RdbLoad(ctx, s, 0);
+ * RedisModule_RdbStreamFree(s);
+ */
+int RM_RdbLoad(RedisModuleCtx *ctx, RedisModuleRdbStream *stream, int flags) {
+ UNUSED(ctx);
+
+ if (!stream || flags != 0) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+
+ /* Not allowed on replicas. */
+ if (server.masterhost != NULL) {
+ errno = ENOTSUP;
+ return REDISMODULE_ERR;
+ }
+
+ /* Drop replicas if exist. */
+ disconnectSlaves();
+ freeReplicationBacklog();
+
+ if (server.aof_state != AOF_OFF) stopAppendOnly();
+
+ /* Kill existing RDB fork as it is saving outdated data. Also killing it
+ * will prevent COW memory issue. */
+ if (server.child_type == CHILD_TYPE_RDB) killRDBChild();
+
+ emptyData(-1,EMPTYDB_NO_FLAGS,NULL);
+
+ /* rdbLoad() can go back to the networking and process network events. If
+ * RM_RdbLoad() is called inside a command callback, we don't want to
+ * process the current client. Otherwise, we may free the client or try to
+ * process next message while we are already in the command callback. */
+ if (server.current_client) protectClient(server.current_client);
+
+ serverAssert(stream->type == REDISMODULE_RDB_STREAM_FILE);
+ int ret = rdbLoad(stream->data.filename,NULL,RDBFLAGS_NONE);
+
+ if (server.current_client) unprotectClient(server.current_client);
+ if (server.aof_state != AOF_OFF) startAppendOnly();
+
+ if (ret != RDB_OK) {
+ errno = (ret == RDB_NOT_EXIST) ? ENOENT : EIO;
+ return REDISMODULE_ERR;
+ }
+
+ errno = 0;
+ return REDISMODULE_OK;
+}
+
+/* Save dataset to the RDB stream.
+ *
+ * `flags` must be zero. This parameter is for future use.
+ *
+ * On success REDISMODULE_OK is returned, otherwise REDISMODULE_ERR is returned
+ * and errno is set accordingly.
+ *
+ * Example:
+ *
+ * RedisModuleRdbStream *s = RedisModule_RdbStreamCreateFromFile("exp.rdb");
+ * RedisModule_RdbSave(ctx, s, 0);
+ * RedisModule_RdbStreamFree(s);
+ */
+int RM_RdbSave(RedisModuleCtx *ctx, RedisModuleRdbStream *stream, int flags) {
+ UNUSED(ctx);
+
+ if (!stream || flags != 0) {
+ errno = EINVAL;
+ return REDISMODULE_ERR;
+ }
+
+ serverAssert(stream->type == REDISMODULE_RDB_STREAM_FILE);
+
+ if (rdbSaveToFile(stream->data.filename) != C_OK) {
+ return REDISMODULE_ERR;
+ }
+
+ errno = 0;
+ return REDISMODULE_OK;
+}
+
+/* Redis MODULE command.
+ *
+ * MODULE LIST
+ * MODULE LOAD <path> [args...]
+ * MODULE LOADEX <path> [[CONFIG NAME VALUE] [CONFIG NAME VALUE]] [ARGS ...]
+ * MODULE UNLOAD <name>
+ */
+void moduleCommand(client *c) {
+ char *subcmd = c->argv[1]->ptr;
+
+ if (c->argc == 2 && !strcasecmp(subcmd,"help")) {
+ const char *help[] = {
+"LIST",
+" Return a list of loaded modules.",
+"LOAD <path> [<arg> ...]",
+" Load a module library from <path>, passing to it any optional arguments.",
+"LOADEX <path> [[CONFIG NAME VALUE] [CONFIG NAME VALUE]] [ARGS ...]",
+" Load a module library from <path>, while passing it module configurations and optional arguments.",
+"UNLOAD <name>",
+" Unload a module.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(subcmd,"load") && c->argc >= 3) {
+ robj **argv = NULL;
+ int argc = 0;
+
+ if (c->argc > 3) {
+ argc = c->argc - 3;
+ argv = &c->argv[3];
+ }
+
+ if (moduleLoad(c->argv[2]->ptr,(void **)argv,argc, 0) == C_OK)
+ addReply(c,shared.ok);
+ else
+ addReplyError(c,
+ "Error loading the extension. Please check the server logs.");
+ } else if (!strcasecmp(subcmd,"loadex") && c->argc >= 3) {
+ robj **argv = NULL;
+ int argc = 0;
+
+ if (c->argc > 3) {
+ argc = c->argc - 3;
+ argv = &c->argv[3];
+ }
+ /* If this is a loadex command we want to populate server.module_configs_queue with
+ * sds NAME VALUE pairs. We also want to increment argv to just after ARGS, if supplied. */
+ if (parseLoadexArguments((RedisModuleString ***) &argv, &argc) == REDISMODULE_OK &&
+ moduleLoad(c->argv[2]->ptr, (void **)argv, argc, 1) == C_OK)
+ addReply(c,shared.ok);
+ else {
+ dictEmpty(server.module_configs_queue, NULL);
+ addReplyError(c,
+ "Error loading the extension. Please check the server logs.");
+ }
+
+ } else if (!strcasecmp(subcmd,"unload") && c->argc == 3) {
+ const char *errmsg = NULL;
+ if (moduleUnload(c->argv[2]->ptr, &errmsg) == C_OK)
+ addReply(c,shared.ok);
+ else {
+ if (errmsg == NULL) errmsg = "operation not possible.";
+ addReplyErrorFormat(c, "Error unloading module: %s", errmsg);
+ serverLog(LL_WARNING, "Error unloading module %s: %s", (sds) c->argv[2]->ptr, errmsg);
+ }
+ } else if (!strcasecmp(subcmd,"list") && c->argc == 2) {
+ addReplyLoadedModules(c);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+}
+
+/* Return the number of registered modules. */
+size_t moduleCount(void) {
+ return dictSize(modules);
+}
+
+/* --------------------------------------------------------------------------
+ * ## Key eviction API
+ * -------------------------------------------------------------------------- */
+
+/* Set the key last access time for LRU based eviction. not relevant if the
+ * servers's maxmemory policy is LFU based. Value is idle time in milliseconds.
+ * returns REDISMODULE_OK if the LRU was updated, REDISMODULE_ERR otherwise. */
+int RM_SetLRU(RedisModuleKey *key, mstime_t lru_idle) {
+ if (!key->value)
+ return REDISMODULE_ERR;
+ if (objectSetLRUOrLFU(key->value, -1, lru_idle, lru_idle>=0 ? LRU_CLOCK() : 0, 1))
+ return REDISMODULE_OK;
+ return REDISMODULE_ERR;
+}
+
+/* Gets the key last access time.
+ * Value is idletime in milliseconds or -1 if the server's eviction policy is
+ * LFU based.
+ * returns REDISMODULE_OK if when key is valid. */
+int RM_GetLRU(RedisModuleKey *key, mstime_t *lru_idle) {
+ *lru_idle = -1;
+ if (!key->value)
+ return REDISMODULE_ERR;
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU)
+ return REDISMODULE_OK;
+ *lru_idle = estimateObjectIdleTime(key->value);
+ return REDISMODULE_OK;
+}
+
+/* Set the key access frequency. only relevant if the server's maxmemory policy
+ * is LFU based.
+ * The frequency is a logarithmic counter that provides an indication of
+ * the access frequencyonly (must be <= 255).
+ * returns REDISMODULE_OK if the LFU was updated, REDISMODULE_ERR otherwise. */
+int RM_SetLFU(RedisModuleKey *key, long long lfu_freq) {
+ if (!key->value)
+ return REDISMODULE_ERR;
+ if (objectSetLRUOrLFU(key->value, lfu_freq, -1, 0, 1))
+ return REDISMODULE_OK;
+ return REDISMODULE_ERR;
+}
+
+/* Gets the key access frequency or -1 if the server's eviction policy is not
+ * LFU based.
+ * returns REDISMODULE_OK if when key is valid. */
+int RM_GetLFU(RedisModuleKey *key, long long *lfu_freq) {
+ *lfu_freq = -1;
+ if (!key->value)
+ return REDISMODULE_ERR;
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU)
+ *lfu_freq = LFUDecrAndReturn(key->value);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Miscellaneous APIs
+ * -------------------------------------------------------------------------- */
+
+/**
+ * Returns the full module options flags mask, using the return value
+ * the module can check if a certain set of module options are supported
+ * by the redis server version in use.
+ * Example:
+ *
+ * int supportedFlags = RM_GetModuleOptionsAll();
+ * if (supportedFlags & REDISMODULE_OPTIONS_ALLOW_NESTED_KEYSPACE_NOTIFICATIONS) {
+ * // REDISMODULE_OPTIONS_ALLOW_NESTED_KEYSPACE_NOTIFICATIONS is supported
+ * } else{
+ * // REDISMODULE_OPTIONS_ALLOW_NESTED_KEYSPACE_NOTIFICATIONS is not supported
+ * }
+ */
+int RM_GetModuleOptionsAll(void) {
+ return _REDISMODULE_OPTIONS_FLAGS_NEXT - 1;
+}
+
+/**
+ * Returns the full ContextFlags mask, using the return value
+ * the module can check if a certain set of flags are supported
+ * by the redis server version in use.
+ * Example:
+ *
+ * int supportedFlags = RM_GetContextFlagsAll();
+ * if (supportedFlags & REDISMODULE_CTX_FLAGS_MULTI) {
+ * // REDISMODULE_CTX_FLAGS_MULTI is supported
+ * } else{
+ * // REDISMODULE_CTX_FLAGS_MULTI is not supported
+ * }
+ */
+int RM_GetContextFlagsAll(void) {
+ return _REDISMODULE_CTX_FLAGS_NEXT - 1;
+}
+
+/**
+ * Returns the full KeyspaceNotification mask, using the return value
+ * the module can check if a certain set of flags are supported
+ * by the redis server version in use.
+ * Example:
+ *
+ * int supportedFlags = RM_GetKeyspaceNotificationFlagsAll();
+ * if (supportedFlags & REDISMODULE_NOTIFY_LOADED) {
+ * // REDISMODULE_NOTIFY_LOADED is supported
+ * } else{
+ * // REDISMODULE_NOTIFY_LOADED is not supported
+ * }
+ */
+int RM_GetKeyspaceNotificationFlagsAll(void) {
+ return _REDISMODULE_NOTIFY_NEXT - 1;
+}
+
+/**
+ * Return the redis version in format of 0x00MMmmpp.
+ * Example for 6.0.7 the return value will be 0x00060007.
+ */
+int RM_GetServerVersion(void) {
+ return REDIS_VERSION_NUM;
+}
+
+/**
+ * Return the current redis-server runtime value of REDISMODULE_TYPE_METHOD_VERSION.
+ * You can use that when calling RM_CreateDataType to know which fields of
+ * RedisModuleTypeMethods are gonna be supported and which will be ignored.
+ */
+int RM_GetTypeMethodVersion(void) {
+ return REDISMODULE_TYPE_METHOD_VERSION;
+}
+
+/* Replace the value assigned to a module type.
+ *
+ * The key must be open for writing, have an existing value, and have a moduleType
+ * that matches the one specified by the caller.
+ *
+ * Unlike RM_ModuleTypeSetValue() which will free the old value, this function
+ * simply swaps the old value with the new value.
+ *
+ * The function returns REDISMODULE_OK on success, REDISMODULE_ERR on errors
+ * such as:
+ *
+ * 1. Key is not opened for writing.
+ * 2. Key is not a module data type key.
+ * 3. Key is a module datatype other than 'mt'.
+ *
+ * If old_value is non-NULL, the old value is returned by reference.
+ */
+int RM_ModuleTypeReplaceValue(RedisModuleKey *key, moduleType *mt, void *new_value, void **old_value) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->iter)
+ return REDISMODULE_ERR;
+ if (!key->value || key->value->type != OBJ_MODULE)
+ return REDISMODULE_ERR;
+
+ moduleValue *mv = key->value->ptr;
+ if (mv->type != mt)
+ return REDISMODULE_ERR;
+
+ if (old_value)
+ *old_value = mv->value;
+ mv->value = new_value;
+
+ return REDISMODULE_OK;
+}
+
+/* For a specified command, parse its arguments and return an array that
+ * contains the indexes of all key name arguments. This function is
+ * essentially a more efficient way to do `COMMAND GETKEYS`.
+ *
+ * The out_flags argument is optional, and can be set to NULL.
+ * When provided it is filled with REDISMODULE_CMD_KEY_ flags in matching
+ * indexes with the key indexes of the returned array.
+ *
+ * A NULL return value indicates the specified command has no keys, or
+ * an error condition. Error conditions are indicated by setting errno
+ * as follows:
+ *
+ * * ENOENT: Specified command does not exist.
+ * * EINVAL: Invalid command arity specified.
+ *
+ * NOTE: The returned array is not a Redis Module object so it does not
+ * get automatically freed even when auto-memory is used. The caller
+ * must explicitly call RM_Free() to free it, same as the out_flags pointer if
+ * used.
+ */
+int *RM_GetCommandKeysWithFlags(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, int *num_keys, int **out_flags) {
+ UNUSED(ctx);
+ struct redisCommand *cmd;
+ int *res = NULL;
+
+ /* Find command */
+ if ((cmd = lookupCommand(argv,argc)) == NULL) {
+ errno = ENOENT;
+ return NULL;
+ }
+
+ /* Bail out if command has no keys */
+ if (!doesCommandHaveKeys(cmd)) {
+ errno = 0;
+ return NULL;
+ }
+
+ if ((cmd->arity > 0 && cmd->arity != argc) || (argc < -cmd->arity)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ getKeysResult result = GETKEYS_RESULT_INIT;
+ getKeysFromCommand(cmd, argv, argc, &result);
+
+ *num_keys = result.numkeys;
+ if (!result.numkeys) {
+ errno = 0;
+ getKeysFreeResult(&result);
+ return NULL;
+ }
+
+ /* The return value here expects an array of key positions */
+ unsigned long int size = sizeof(int) * result.numkeys;
+ res = zmalloc(size);
+ if (out_flags)
+ *out_flags = zmalloc(size);
+ for (int i = 0; i < result.numkeys; i++) {
+ res[i] = result.keys[i].pos;
+ if (out_flags)
+ (*out_flags)[i] = moduleConvertKeySpecsFlags(result.keys[i].flags, 0);
+ }
+
+ return res;
+}
+
+/* Identical to RM_GetCommandKeysWithFlags when flags are not needed. */
+int *RM_GetCommandKeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, int *num_keys) {
+ return RM_GetCommandKeysWithFlags(ctx, argv, argc, num_keys, NULL);
+}
+
+/* Return the name of the command currently running */
+const char *RM_GetCurrentCommandName(RedisModuleCtx *ctx) {
+ if (!ctx || !ctx->client || !ctx->client->cmd)
+ return NULL;
+
+ return (const char*)ctx->client->cmd->fullname;
+}
+
+/* --------------------------------------------------------------------------
+ * ## Defrag API
+ * -------------------------------------------------------------------------- */
+
+/* The defrag context, used to manage state during calls to the data type
+ * defrag callback.
+ */
+struct RedisModuleDefragCtx {
+ long long int endtime;
+ unsigned long *cursor;
+ struct redisObject *key; /* Optional name of key processed, NULL when unknown. */
+ int dbid; /* The dbid of the key being processed, -1 when unknown. */
+};
+
+/* Register a defrag callback for global data, i.e. anything that the module
+ * may allocate that is not tied to a specific data type.
+ */
+int RM_RegisterDefragFunc(RedisModuleCtx *ctx, RedisModuleDefragFunc cb) {
+ ctx->module->defrag_cb = cb;
+ return REDISMODULE_OK;
+}
+
+/* When the data type defrag callback iterates complex structures, this
+ * function should be called periodically. A zero (false) return
+ * indicates the callback may continue its work. A non-zero value (true)
+ * indicates it should stop.
+ *
+ * When stopped, the callback may use RM_DefragCursorSet() to store its
+ * position so it can later use RM_DefragCursorGet() to resume defragging.
+ *
+ * When stopped and more work is left to be done, the callback should
+ * return 1. Otherwise, it should return 0.
+ *
+ * NOTE: Modules should consider the frequency in which this function is called,
+ * so it generally makes sense to do small batches of work in between calls.
+ */
+int RM_DefragShouldStop(RedisModuleDefragCtx *ctx) {
+ return (ctx->endtime != 0 && ctx->endtime < ustime());
+}
+
+/* Store an arbitrary cursor value for future re-use.
+ *
+ * This should only be called if RM_DefragShouldStop() has returned a non-zero
+ * value and the defrag callback is about to exit without fully iterating its
+ * data type.
+ *
+ * This behavior is reserved to cases where late defrag is performed. Late
+ * defrag is selected for keys that implement the `free_effort` callback and
+ * return a `free_effort` value that is larger than the defrag
+ * 'active-defrag-max-scan-fields' configuration directive.
+ *
+ * Smaller keys, keys that do not implement `free_effort` or the global
+ * defrag callback are not called in late-defrag mode. In those cases, a
+ * call to this function will return REDISMODULE_ERR.
+ *
+ * The cursor may be used by the module to represent some progress into the
+ * module's data type. Modules may also store additional cursor-related
+ * information locally and use the cursor as a flag that indicates when
+ * traversal of a new key begins. This is possible because the API makes
+ * a guarantee that concurrent defragmentation of multiple keys will
+ * not be performed.
+ */
+int RM_DefragCursorSet(RedisModuleDefragCtx *ctx, unsigned long cursor) {
+ if (!ctx->cursor)
+ return REDISMODULE_ERR;
+
+ *ctx->cursor = cursor;
+ return REDISMODULE_OK;
+}
+
+/* Fetch a cursor value that has been previously stored using RM_DefragCursorSet().
+ *
+ * If not called for a late defrag operation, REDISMODULE_ERR will be returned and
+ * the cursor should be ignored. See RM_DefragCursorSet() for more details on
+ * defrag cursors.
+ */
+int RM_DefragCursorGet(RedisModuleDefragCtx *ctx, unsigned long *cursor) {
+ if (!ctx->cursor)
+ return REDISMODULE_ERR;
+
+ *cursor = *ctx->cursor;
+ return REDISMODULE_OK;
+}
+
+/* Defrag a memory allocation previously allocated by RM_Alloc, RM_Calloc, etc.
+ * The defragmentation process involves allocating a new memory block and copying
+ * the contents to it, like realloc().
+ *
+ * If defragmentation was not necessary, NULL is returned and the operation has
+ * no other effect.
+ *
+ * If a non-NULL value is returned, the caller should use the new pointer instead
+ * of the old one and update any reference to the old pointer, which must not
+ * be used again.
+ */
+void *RM_DefragAlloc(RedisModuleDefragCtx *ctx, void *ptr) {
+ UNUSED(ctx);
+ return activeDefragAlloc(ptr);
+}
+
+/* Defrag a RedisModuleString previously allocated by RM_Alloc, RM_Calloc, etc.
+ * See RM_DefragAlloc() for more information on how the defragmentation process
+ * works.
+ *
+ * NOTE: It is only possible to defrag strings that have a single reference.
+ * Typically this means strings retained with RM_RetainString or RM_HoldString
+ * may not be defragmentable. One exception is command argvs which, if retained
+ * by the module, will end up with a single reference (because the reference
+ * on the Redis side is dropped as soon as the command callback returns).
+ */
+RedisModuleString *RM_DefragRedisModuleString(RedisModuleDefragCtx *ctx, RedisModuleString *str) {
+ UNUSED(ctx);
+ return activeDefragStringOb(str);
+}
+
+
+/* Perform a late defrag of a module datatype key.
+ *
+ * Returns a zero value (and initializes the cursor) if no more needs to be done,
+ * or a non-zero value otherwise.
+ */
+int moduleLateDefrag(robj *key, robj *value, unsigned long *cursor, long long endtime, int dbid) {
+ moduleValue *mv = value->ptr;
+ moduleType *mt = mv->type;
+
+ RedisModuleDefragCtx defrag_ctx = { endtime, cursor, key, dbid};
+
+ /* Invoke callback. Note that the callback may be missing if the key has been
+ * replaced with a different type since our last visit.
+ */
+ int ret = 0;
+ if (mt->defrag)
+ ret = mt->defrag(&defrag_ctx, key, &mv->value);
+
+ if (!ret) {
+ *cursor = 0; /* No more work to do */
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Attempt to defrag a module data type value. Depending on complexity,
+ * the operation may happen immediately or be scheduled for later.
+ *
+ * Returns 1 if the operation has been completed or 0 if it needs to
+ * be scheduled for late defrag.
+ */
+int moduleDefragValue(robj *key, robj *value, int dbid) {
+ moduleValue *mv = value->ptr;
+ moduleType *mt = mv->type;
+
+ /* Try to defrag moduleValue itself regardless of whether or not
+ * defrag callbacks are provided.
+ */
+ moduleValue *newmv = activeDefragAlloc(mv);
+ if (newmv) {
+ value->ptr = mv = newmv;
+ }
+
+ if (!mt->defrag)
+ return 1;
+
+ /* Use free_effort to determine complexity of module value, and if
+ * necessary schedule it for defragLater instead of quick immediate
+ * defrag.
+ */
+ size_t effort = moduleGetFreeEffort(key, value, dbid);
+ if (!effort)
+ effort = SIZE_MAX;
+ if (effort > server.active_defrag_max_scan_fields) {
+ return 0; /* Defrag later */
+ }
+
+ RedisModuleDefragCtx defrag_ctx = { 0, NULL, key, dbid };
+ mt->defrag(&defrag_ctx, key, &mv->value);
+ return 1;
+}
+
+/* Call registered module API defrag functions */
+void moduleDefragGlobals(void) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ if (!module->defrag_cb)
+ continue;
+ RedisModuleDefragCtx defrag_ctx = { 0, NULL, NULL, -1};
+ module->defrag_cb(&defrag_ctx);
+ }
+ dictReleaseIterator(di);
+}
+
+/* Returns the name of the key currently being processed.
+ * There is no guarantee that the key name is always available, so this may return NULL.
+ */
+const RedisModuleString *RM_GetKeyNameFromDefragCtx(RedisModuleDefragCtx *ctx) {
+ return ctx->key;
+}
+
+/* Returns the database id of the key currently being processed.
+ * There is no guarantee that this info is always available, so this may return -1.
+ */
+int RM_GetDbIdFromDefragCtx(RedisModuleDefragCtx *ctx) {
+ return ctx->dbid;
+}
+
+/* Register all the APIs we export. Keep this function at the end of the
+ * file so that's easy to seek it to add new entries. */
+void moduleRegisterCoreAPI(void) {
+ server.moduleapi = dictCreate(&moduleAPIDictType);
+ server.sharedapi = dictCreate(&moduleAPIDictType);
+ REGISTER_API(Alloc);
+ REGISTER_API(TryAlloc);
+ REGISTER_API(Calloc);
+ REGISTER_API(Realloc);
+ REGISTER_API(Free);
+ REGISTER_API(Strdup);
+ REGISTER_API(CreateCommand);
+ REGISTER_API(GetCommand);
+ REGISTER_API(CreateSubcommand);
+ REGISTER_API(SetCommandInfo);
+ REGISTER_API(SetCommandACLCategories);
+ REGISTER_API(SetModuleAttribs);
+ REGISTER_API(IsModuleNameBusy);
+ REGISTER_API(WrongArity);
+ REGISTER_API(ReplyWithLongLong);
+ REGISTER_API(ReplyWithError);
+ REGISTER_API(ReplyWithErrorFormat);
+ REGISTER_API(ReplyWithSimpleString);
+ REGISTER_API(ReplyWithArray);
+ REGISTER_API(ReplyWithMap);
+ REGISTER_API(ReplyWithSet);
+ REGISTER_API(ReplyWithAttribute);
+ REGISTER_API(ReplyWithNullArray);
+ REGISTER_API(ReplyWithEmptyArray);
+ REGISTER_API(ReplySetArrayLength);
+ REGISTER_API(ReplySetMapLength);
+ REGISTER_API(ReplySetSetLength);
+ REGISTER_API(ReplySetAttributeLength);
+ REGISTER_API(ReplyWithString);
+ REGISTER_API(ReplyWithEmptyString);
+ REGISTER_API(ReplyWithVerbatimString);
+ REGISTER_API(ReplyWithVerbatimStringType);
+ REGISTER_API(ReplyWithStringBuffer);
+ REGISTER_API(ReplyWithCString);
+ REGISTER_API(ReplyWithNull);
+ REGISTER_API(ReplyWithBool);
+ REGISTER_API(ReplyWithCallReply);
+ REGISTER_API(ReplyWithDouble);
+ REGISTER_API(ReplyWithBigNumber);
+ REGISTER_API(ReplyWithLongDouble);
+ REGISTER_API(GetSelectedDb);
+ REGISTER_API(SelectDb);
+ REGISTER_API(KeyExists);
+ REGISTER_API(OpenKey);
+ REGISTER_API(GetOpenKeyModesAll);
+ REGISTER_API(CloseKey);
+ REGISTER_API(KeyType);
+ REGISTER_API(ValueLength);
+ REGISTER_API(ListPush);
+ REGISTER_API(ListPop);
+ REGISTER_API(ListGet);
+ REGISTER_API(ListSet);
+ REGISTER_API(ListInsert);
+ REGISTER_API(ListDelete);
+ REGISTER_API(StringToLongLong);
+ REGISTER_API(StringToULongLong);
+ REGISTER_API(StringToDouble);
+ REGISTER_API(StringToLongDouble);
+ REGISTER_API(StringToStreamID);
+ REGISTER_API(Call);
+ REGISTER_API(CallReplyProto);
+ REGISTER_API(FreeCallReply);
+ REGISTER_API(CallReplyInteger);
+ REGISTER_API(CallReplyDouble);
+ REGISTER_API(CallReplyBigNumber);
+ REGISTER_API(CallReplyVerbatim);
+ REGISTER_API(CallReplyBool);
+ REGISTER_API(CallReplySetElement);
+ REGISTER_API(CallReplyMapElement);
+ REGISTER_API(CallReplyAttributeElement);
+ REGISTER_API(CallReplyPromiseSetUnblockHandler);
+ REGISTER_API(CallReplyPromiseAbort);
+ REGISTER_API(CallReplyAttribute);
+ REGISTER_API(CallReplyType);
+ REGISTER_API(CallReplyLength);
+ REGISTER_API(CallReplyArrayElement);
+ REGISTER_API(CallReplyStringPtr);
+ REGISTER_API(CreateStringFromCallReply);
+ REGISTER_API(CreateString);
+ REGISTER_API(CreateStringFromLongLong);
+ REGISTER_API(CreateStringFromULongLong);
+ REGISTER_API(CreateStringFromDouble);
+ REGISTER_API(CreateStringFromLongDouble);
+ REGISTER_API(CreateStringFromString);
+ REGISTER_API(CreateStringFromStreamID);
+ REGISTER_API(CreateStringPrintf);
+ REGISTER_API(FreeString);
+ REGISTER_API(StringPtrLen);
+ REGISTER_API(AutoMemory);
+ REGISTER_API(Replicate);
+ REGISTER_API(ReplicateVerbatim);
+ REGISTER_API(DeleteKey);
+ REGISTER_API(UnlinkKey);
+ REGISTER_API(StringSet);
+ REGISTER_API(StringDMA);
+ REGISTER_API(StringTruncate);
+ REGISTER_API(SetExpire);
+ REGISTER_API(GetExpire);
+ REGISTER_API(SetAbsExpire);
+ REGISTER_API(GetAbsExpire);
+ REGISTER_API(ResetDataset);
+ REGISTER_API(DbSize);
+ REGISTER_API(RandomKey);
+ REGISTER_API(ZsetAdd);
+ REGISTER_API(ZsetIncrby);
+ REGISTER_API(ZsetScore);
+ REGISTER_API(ZsetRem);
+ REGISTER_API(ZsetRangeStop);
+ REGISTER_API(ZsetFirstInScoreRange);
+ REGISTER_API(ZsetLastInScoreRange);
+ REGISTER_API(ZsetFirstInLexRange);
+ REGISTER_API(ZsetLastInLexRange);
+ REGISTER_API(ZsetRangeCurrentElement);
+ REGISTER_API(ZsetRangeNext);
+ REGISTER_API(ZsetRangePrev);
+ REGISTER_API(ZsetRangeEndReached);
+ REGISTER_API(HashSet);
+ REGISTER_API(HashGet);
+ REGISTER_API(StreamAdd);
+ REGISTER_API(StreamDelete);
+ REGISTER_API(StreamIteratorStart);
+ REGISTER_API(StreamIteratorStop);
+ REGISTER_API(StreamIteratorNextID);
+ REGISTER_API(StreamIteratorNextField);
+ REGISTER_API(StreamIteratorDelete);
+ REGISTER_API(StreamTrimByLength);
+ REGISTER_API(StreamTrimByID);
+ REGISTER_API(IsKeysPositionRequest);
+ REGISTER_API(KeyAtPos);
+ REGISTER_API(KeyAtPosWithFlags);
+ REGISTER_API(IsChannelsPositionRequest);
+ REGISTER_API(ChannelAtPosWithFlags);
+ REGISTER_API(GetClientId);
+ REGISTER_API(GetClientUserNameById);
+ REGISTER_API(GetContextFlags);
+ REGISTER_API(AvoidReplicaTraffic);
+ REGISTER_API(PoolAlloc);
+ REGISTER_API(CreateDataType);
+ REGISTER_API(ModuleTypeSetValue);
+ REGISTER_API(ModuleTypeReplaceValue);
+ REGISTER_API(ModuleTypeGetType);
+ REGISTER_API(ModuleTypeGetValue);
+ REGISTER_API(IsIOError);
+ REGISTER_API(SetModuleOptions);
+ REGISTER_API(SignalModifiedKey);
+ REGISTER_API(SaveUnsigned);
+ REGISTER_API(LoadUnsigned);
+ REGISTER_API(SaveSigned);
+ REGISTER_API(LoadSigned);
+ REGISTER_API(SaveString);
+ REGISTER_API(SaveStringBuffer);
+ REGISTER_API(LoadString);
+ REGISTER_API(LoadStringBuffer);
+ REGISTER_API(SaveDouble);
+ REGISTER_API(LoadDouble);
+ REGISTER_API(SaveFloat);
+ REGISTER_API(LoadFloat);
+ REGISTER_API(SaveLongDouble);
+ REGISTER_API(LoadLongDouble);
+ REGISTER_API(SaveDataTypeToString);
+ REGISTER_API(LoadDataTypeFromString);
+ REGISTER_API(LoadDataTypeFromStringEncver);
+ REGISTER_API(EmitAOF);
+ REGISTER_API(Log);
+ REGISTER_API(LogIOError);
+ REGISTER_API(_Assert);
+ REGISTER_API(LatencyAddSample);
+ REGISTER_API(StringAppendBuffer);
+ REGISTER_API(TrimStringAllocation);
+ REGISTER_API(RetainString);
+ REGISTER_API(HoldString);
+ REGISTER_API(StringCompare);
+ REGISTER_API(GetContextFromIO);
+ REGISTER_API(GetKeyNameFromIO);
+ REGISTER_API(GetKeyNameFromModuleKey);
+ REGISTER_API(GetDbIdFromModuleKey);
+ REGISTER_API(GetDbIdFromIO);
+ REGISTER_API(GetKeyNameFromOptCtx);
+ REGISTER_API(GetToKeyNameFromOptCtx);
+ REGISTER_API(GetDbIdFromOptCtx);
+ REGISTER_API(GetToDbIdFromOptCtx);
+ REGISTER_API(GetKeyNameFromDefragCtx);
+ REGISTER_API(GetDbIdFromDefragCtx);
+ REGISTER_API(GetKeyNameFromDigest);
+ REGISTER_API(GetDbIdFromDigest);
+ REGISTER_API(BlockClient);
+ REGISTER_API(BlockClientGetPrivateData);
+ REGISTER_API(BlockClientSetPrivateData);
+ REGISTER_API(BlockClientOnAuth);
+ REGISTER_API(UnblockClient);
+ REGISTER_API(IsBlockedReplyRequest);
+ REGISTER_API(IsBlockedTimeoutRequest);
+ REGISTER_API(GetBlockedClientPrivateData);
+ REGISTER_API(AbortBlock);
+ REGISTER_API(Milliseconds);
+ REGISTER_API(MonotonicMicroseconds);
+ REGISTER_API(Microseconds);
+ REGISTER_API(CachedMicroseconds);
+ REGISTER_API(BlockedClientMeasureTimeStart);
+ REGISTER_API(BlockedClientMeasureTimeEnd);
+ REGISTER_API(GetThreadSafeContext);
+ REGISTER_API(GetDetachedThreadSafeContext);
+ REGISTER_API(FreeThreadSafeContext);
+ REGISTER_API(ThreadSafeContextLock);
+ REGISTER_API(ThreadSafeContextTryLock);
+ REGISTER_API(ThreadSafeContextUnlock);
+ REGISTER_API(DigestAddStringBuffer);
+ REGISTER_API(DigestAddLongLong);
+ REGISTER_API(DigestEndSequence);
+ REGISTER_API(NotifyKeyspaceEvent);
+ REGISTER_API(GetNotifyKeyspaceEvents);
+ REGISTER_API(SubscribeToKeyspaceEvents);
+ REGISTER_API(AddPostNotificationJob);
+ REGISTER_API(RegisterClusterMessageReceiver);
+ REGISTER_API(SendClusterMessage);
+ REGISTER_API(GetClusterNodeInfo);
+ REGISTER_API(GetClusterNodesList);
+ REGISTER_API(FreeClusterNodesList);
+ REGISTER_API(CreateTimer);
+ REGISTER_API(StopTimer);
+ REGISTER_API(GetTimerInfo);
+ REGISTER_API(GetMyClusterID);
+ REGISTER_API(GetClusterSize);
+ REGISTER_API(GetRandomBytes);
+ REGISTER_API(GetRandomHexChars);
+ REGISTER_API(BlockedClientDisconnected);
+ REGISTER_API(SetDisconnectCallback);
+ REGISTER_API(GetBlockedClientHandle);
+ REGISTER_API(SetClusterFlags);
+ REGISTER_API(CreateDict);
+ REGISTER_API(FreeDict);
+ REGISTER_API(DictSize);
+ REGISTER_API(DictSetC);
+ REGISTER_API(DictReplaceC);
+ REGISTER_API(DictSet);
+ REGISTER_API(DictReplace);
+ REGISTER_API(DictGetC);
+ REGISTER_API(DictGet);
+ REGISTER_API(DictDelC);
+ REGISTER_API(DictDel);
+ REGISTER_API(DictIteratorStartC);
+ REGISTER_API(DictIteratorStart);
+ REGISTER_API(DictIteratorStop);
+ REGISTER_API(DictIteratorReseekC);
+ REGISTER_API(DictIteratorReseek);
+ REGISTER_API(DictNextC);
+ REGISTER_API(DictPrevC);
+ REGISTER_API(DictNext);
+ REGISTER_API(DictPrev);
+ REGISTER_API(DictCompareC);
+ REGISTER_API(DictCompare);
+ REGISTER_API(ExportSharedAPI);
+ REGISTER_API(GetSharedAPI);
+ REGISTER_API(RegisterCommandFilter);
+ REGISTER_API(UnregisterCommandFilter);
+ REGISTER_API(CommandFilterArgsCount);
+ REGISTER_API(CommandFilterArgGet);
+ REGISTER_API(CommandFilterArgInsert);
+ REGISTER_API(CommandFilterArgReplace);
+ REGISTER_API(CommandFilterArgDelete);
+ REGISTER_API(CommandFilterGetClientId);
+ REGISTER_API(Fork);
+ REGISTER_API(SendChildHeartbeat);
+ REGISTER_API(ExitFromChild);
+ REGISTER_API(KillForkChild);
+ REGISTER_API(RegisterInfoFunc);
+ REGISTER_API(InfoAddSection);
+ REGISTER_API(InfoBeginDictField);
+ REGISTER_API(InfoEndDictField);
+ REGISTER_API(InfoAddFieldString);
+ REGISTER_API(InfoAddFieldCString);
+ REGISTER_API(InfoAddFieldDouble);
+ REGISTER_API(InfoAddFieldLongLong);
+ REGISTER_API(InfoAddFieldULongLong);
+ REGISTER_API(GetServerInfo);
+ REGISTER_API(FreeServerInfo);
+ REGISTER_API(ServerInfoGetField);
+ REGISTER_API(ServerInfoGetFieldC);
+ REGISTER_API(ServerInfoGetFieldSigned);
+ REGISTER_API(ServerInfoGetFieldUnsigned);
+ REGISTER_API(ServerInfoGetFieldDouble);
+ REGISTER_API(GetClientInfoById);
+ REGISTER_API(GetClientNameById);
+ REGISTER_API(SetClientNameById);
+ REGISTER_API(PublishMessage);
+ REGISTER_API(PublishMessageShard);
+ REGISTER_API(SubscribeToServerEvent);
+ REGISTER_API(SetLRU);
+ REGISTER_API(GetLRU);
+ REGISTER_API(SetLFU);
+ REGISTER_API(GetLFU);
+ REGISTER_API(BlockClientOnKeys);
+ REGISTER_API(BlockClientOnKeysWithFlags);
+ REGISTER_API(SignalKeyAsReady);
+ REGISTER_API(GetBlockedClientReadyKey);
+ REGISTER_API(GetUsedMemoryRatio);
+ REGISTER_API(MallocSize);
+ REGISTER_API(MallocUsableSize);
+ REGISTER_API(MallocSizeString);
+ REGISTER_API(MallocSizeDict);
+ REGISTER_API(ScanCursorCreate);
+ REGISTER_API(ScanCursorDestroy);
+ REGISTER_API(ScanCursorRestart);
+ REGISTER_API(Scan);
+ REGISTER_API(ScanKey);
+ REGISTER_API(CreateModuleUser);
+ REGISTER_API(SetContextUser);
+ REGISTER_API(SetModuleUserACL);
+ REGISTER_API(SetModuleUserACLString);
+ REGISTER_API(GetModuleUserACLString);
+ REGISTER_API(GetCurrentUserName);
+ REGISTER_API(GetModuleUserFromUserName);
+ REGISTER_API(ACLCheckCommandPermissions);
+ REGISTER_API(ACLCheckKeyPermissions);
+ REGISTER_API(ACLCheckChannelPermissions);
+ REGISTER_API(ACLAddLogEntry);
+ REGISTER_API(ACLAddLogEntryByUserName);
+ REGISTER_API(FreeModuleUser);
+ REGISTER_API(DeauthenticateAndCloseClient);
+ REGISTER_API(AuthenticateClientWithACLUser);
+ REGISTER_API(AuthenticateClientWithUser);
+ REGISTER_API(GetContextFlagsAll);
+ REGISTER_API(GetModuleOptionsAll);
+ REGISTER_API(GetKeyspaceNotificationFlagsAll);
+ REGISTER_API(IsSubEventSupported);
+ REGISTER_API(GetServerVersion);
+ REGISTER_API(GetClientCertificate);
+ REGISTER_API(RedactClientCommandArgument);
+ REGISTER_API(GetCommandKeys);
+ REGISTER_API(GetCommandKeysWithFlags);
+ REGISTER_API(GetCurrentCommandName);
+ REGISTER_API(GetTypeMethodVersion);
+ REGISTER_API(RegisterDefragFunc);
+ REGISTER_API(DefragAlloc);
+ REGISTER_API(DefragRedisModuleString);
+ REGISTER_API(DefragShouldStop);
+ REGISTER_API(DefragCursorSet);
+ REGISTER_API(DefragCursorGet);
+ REGISTER_API(EventLoopAdd);
+ REGISTER_API(EventLoopDel);
+ REGISTER_API(EventLoopAddOneShot);
+ REGISTER_API(Yield);
+ REGISTER_API(RegisterBoolConfig);
+ REGISTER_API(RegisterNumericConfig);
+ REGISTER_API(RegisterStringConfig);
+ REGISTER_API(RegisterEnumConfig);
+ REGISTER_API(LoadConfigs);
+ REGISTER_API(RegisterAuthCallback);
+ REGISTER_API(RdbStreamCreateFromFile);
+ REGISTER_API(RdbStreamFree);
+ REGISTER_API(RdbLoad);
+ REGISTER_API(RdbSave);
+}
diff --git a/src/modules/.gitignore b/src/modules/.gitignore
new file mode 100644
index 0000000..4de1735
--- /dev/null
+++ b/src/modules/.gitignore
@@ -0,0 +1,2 @@
+*.so
+*.xo
diff --git a/src/modules/Makefile b/src/modules/Makefile
new file mode 100644
index 0000000..b9ef578
--- /dev/null
+++ b/src/modules/Makefile
@@ -0,0 +1,69 @@
+
+# find the OS
+uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+
+# Compile flags for linux / osx
+ifeq ($(uname_S),Linux)
+ SHOBJ_CFLAGS ?= -W -Wall -fno-common -g -ggdb -std=c99 -O2
+ SHOBJ_LDFLAGS ?= -shared
+else
+ SHOBJ_CFLAGS ?= -W -Wall -dynamic -fno-common -g -ggdb -std=c99 -O2
+ SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup
+endif
+
+# OS X 11.x doesn't have /usr/lib/libSystem.dylib and needs an explicit setting.
+ifeq ($(uname_S),Darwin)
+ifeq ("$(wildcard /usr/lib/libSystem.dylib)","")
+LIBS = -L /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib -lsystem
+endif
+endif
+
+.SUFFIXES: .c .so .xo .o
+
+all: helloworld.so hellotype.so helloblock.so hellocluster.so hellotimer.so hellodict.so hellohook.so helloacl.so
+
+.c.xo:
+ $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@
+
+helloworld.xo: ../redismodule.h
+
+helloworld.so: helloworld.xo
+ $(LD) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+hellotype.xo: ../redismodule.h
+
+hellotype.so: hellotype.xo
+ $(LD) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+helloblock.xo: ../redismodule.h
+
+helloblock.so: helloblock.xo
+ $(LD) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) -lpthread -lc
+
+hellocluster.xo: ../redismodule.h
+
+hellocluster.so: hellocluster.xo
+ $(LD) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+hellotimer.xo: ../redismodule.h
+
+hellotimer.so: hellotimer.xo
+ $(LD) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+hellodict.xo: ../redismodule.h
+
+hellodict.so: hellodict.xo
+ $(LD) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+hellohook.xo: ../redismodule.h
+
+hellohook.so: hellohook.xo
+ $(LD) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+helloacl.xo: ../redismodule.h
+
+helloacl.so: helloacl.xo
+ $(LD) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+clean:
+ rm -rf *.xo *.so
diff --git a/src/modules/helloacl.c b/src/modules/helloacl.c
new file mode 100644
index 0000000..53f3a44
--- /dev/null
+++ b/src/modules/helloacl.c
@@ -0,0 +1,190 @@
+/* ACL API example - An example for performing custom synchronous and
+ * asynchronous password authentication.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <pthread.h>
+#include <unistd.h>
+
+// A simple global user
+static RedisModuleUser *global;
+static uint64_t global_auth_client_id = 0;
+
+/* HELLOACL.REVOKE
+ * Synchronously revoke access from a user. */
+int RevokeCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (global_auth_client_id) {
+ RedisModule_DeauthenticateAndCloseClient(ctx, global_auth_client_id);
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+ } else {
+ return RedisModule_ReplyWithError(ctx, "Global user currently not used");
+ }
+}
+
+/* HELLOACL.RESET
+ * Synchronously delete and re-create a module user. */
+int ResetCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModule_FreeModuleUser(global);
+ global = RedisModule_CreateModuleUser("global");
+ RedisModule_SetModuleUserACL(global, "allcommands");
+ RedisModule_SetModuleUserACL(global, "allkeys");
+ RedisModule_SetModuleUserACL(global, "on");
+
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/* Callback handler for user changes, use this to notify a module of
+ * changes to users authenticated by the module */
+void HelloACL_UserChanged(uint64_t client_id, void *privdata) {
+ REDISMODULE_NOT_USED(privdata);
+ REDISMODULE_NOT_USED(client_id);
+ global_auth_client_id = 0;
+}
+
+/* HELLOACL.AUTHGLOBAL
+ * Synchronously assigns a module user to the current context. */
+int AuthGlobalCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (global_auth_client_id) {
+ return RedisModule_ReplyWithError(ctx, "Global user currently used");
+ }
+
+ RedisModule_AuthenticateClientWithUser(ctx, global, HelloACL_UserChanged, NULL, &global_auth_client_id);
+
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+#define TIMEOUT_TIME 1000
+
+/* Reply callback for auth command HELLOACL.AUTHASYNC */
+int HelloACL_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ size_t length;
+
+ RedisModuleString *user_string = RedisModule_GetBlockedClientPrivateData(ctx);
+ const char *name = RedisModule_StringPtrLen(user_string, &length);
+
+ if (RedisModule_AuthenticateClientWithACLUser(ctx, name, length, NULL, NULL, NULL) ==
+ REDISMODULE_ERR) {
+ return RedisModule_ReplyWithError(ctx, "Invalid Username or password");
+ }
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/* Timeout callback for auth command HELLOACL.AUTHASYNC */
+int HelloACL_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ return RedisModule_ReplyWithSimpleString(ctx, "Request timedout");
+}
+
+/* Private data frees data for HELLOACL.AUTHASYNC command. */
+void HelloACL_FreeData(RedisModuleCtx *ctx, void *privdata) {
+ REDISMODULE_NOT_USED(ctx);
+ RedisModule_FreeString(NULL, privdata);
+}
+
+/* Background authentication can happen here. */
+void *HelloACL_ThreadMain(void *args) {
+ void **targs = args;
+ RedisModuleBlockedClient *bc = targs[0];
+ RedisModuleString *user = targs[1];
+ RedisModule_Free(targs);
+
+ RedisModule_UnblockClient(bc,user);
+ return NULL;
+}
+
+/* HELLOACL.AUTHASYNC
+ * Asynchronously assigns an ACL user to the current context. */
+int AuthAsyncCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+
+ pthread_t tid;
+ RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx, HelloACL_Reply, HelloACL_Timeout, HelloACL_FreeData, TIMEOUT_TIME);
+
+
+ void **targs = RedisModule_Alloc(sizeof(void*)*2);
+ targs[0] = bc;
+ targs[1] = RedisModule_CreateStringFromString(NULL, argv[1]);
+
+ if (pthread_create(&tid, NULL, HelloACL_ThreadMain, targs) != 0) {
+ RedisModule_AbortBlock(bc);
+ return RedisModule_ReplyWithError(ctx, "-ERR Can't start thread");
+ }
+
+ return REDISMODULE_OK;
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"helloacl",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"helloacl.reset",
+ ResetCommand_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"helloacl.revoke",
+ RevokeCommand_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"helloacl.authglobal",
+ AuthGlobalCommand_RedisCommand,"no-auth",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"helloacl.authasync",
+ AuthAsyncCommand_RedisCommand,"no-auth",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ global = RedisModule_CreateModuleUser("global");
+ RedisModule_SetModuleUserACL(global, "allcommands");
+ RedisModule_SetModuleUserACL(global, "allkeys");
+ RedisModule_SetModuleUserACL(global, "on");
+
+ global_auth_client_id = 0;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c
new file mode 100644
index 0000000..dc3d749
--- /dev/null
+++ b/src/modules/helloblock.c
@@ -0,0 +1,218 @@
+/* Helloblock module -- An example of blocking command implementation
+ * with threads.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <unistd.h>
+
+/* Reply callback for blocking command HELLO.BLOCK */
+int HelloBlock_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ int *myint = RedisModule_GetBlockedClientPrivateData(ctx);
+ return RedisModule_ReplyWithLongLong(ctx,*myint);
+}
+
+/* Timeout callback for blocking command HELLO.BLOCK */
+int HelloBlock_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ return RedisModule_ReplyWithSimpleString(ctx,"Request timedout");
+}
+
+/* Private data freeing callback for HELLO.BLOCK command. */
+void HelloBlock_FreeData(RedisModuleCtx *ctx, void *privdata) {
+ REDISMODULE_NOT_USED(ctx);
+ RedisModule_Free(privdata);
+}
+
+/* The thread entry point that actually executes the blocking part
+ * of the command HELLO.BLOCK. */
+void *HelloBlock_ThreadMain(void *arg) {
+ void **targ = arg;
+ RedisModuleBlockedClient *bc = targ[0];
+ long long delay = (unsigned long)targ[1];
+ RedisModule_Free(targ);
+
+ sleep(delay);
+ int *r = RedisModule_Alloc(sizeof(int));
+ *r = rand();
+ RedisModule_UnblockClient(bc,r);
+ return NULL;
+}
+
+/* An example blocked client disconnection callback.
+ *
+ * Note that in the case of the HELLO.BLOCK command, the blocked client is now
+ * owned by the thread calling sleep(). In this specific case, there is not
+ * much we can do, however normally we could instead implement a way to
+ * signal the thread that the client disconnected, and sleep the specified
+ * amount of seconds with a while loop calling sleep(1), so that once we
+ * detect the client disconnection, we can terminate the thread ASAP. */
+void HelloBlock_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc) {
+ RedisModule_Log(ctx,"warning","Blocked client %p disconnected!",
+ (void*)bc);
+
+ /* Here you should cleanup your state / threads, and if possible
+ * call RedisModule_UnblockClient(), or notify the thread that will
+ * call the function ASAP. */
+}
+
+/* HELLO.BLOCK <delay> <timeout> -- Block for <count> seconds, then reply with
+ * a random number. Timeout is the command timeout, so that you can test
+ * what happens when the delay is greater than the timeout. */
+int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+ long long delay;
+ long long timeout;
+
+ if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ pthread_t tid;
+ RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
+
+ /* Here we set a disconnection handler, however since this module will
+ * block in sleep() in a thread, there is not much we can do in the
+ * callback, so this is just to show you the API. */
+ RedisModule_SetDisconnectCallback(bc,HelloBlock_Disconnected);
+
+ /* Now that we setup a blocking client, we need to pass the control
+ * to the thread. However we need to pass arguments to the thread:
+ * the delay and a reference to the blocked client handle. */
+ void **targ = RedisModule_Alloc(sizeof(void*)*2);
+ targ[0] = bc;
+ targ[1] = (void*)(unsigned long) delay;
+
+ if (pthread_create(&tid,NULL,HelloBlock_ThreadMain,targ) != 0) {
+ RedisModule_AbortBlock(bc);
+ return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
+ }
+ return REDISMODULE_OK;
+}
+
+/* The thread entry point that actually executes the blocking part
+ * of the command HELLO.KEYS.
+ *
+ * Note: this implementation is very simple on purpose, so no duplicated
+ * keys (returned by SCAN) are filtered. However adding such a functionality
+ * would be trivial just using any data structure implementing a dictionary
+ * in order to filter the duplicated items. */
+void *HelloKeys_ThreadMain(void *arg) {
+ RedisModuleBlockedClient *bc = arg;
+ RedisModuleCtx *ctx = RedisModule_GetThreadSafeContext(bc);
+ long long cursor = 0;
+ size_t replylen = 0;
+
+ RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_LEN);
+ do {
+ RedisModule_ThreadSafeContextLock(ctx);
+ RedisModuleCallReply *reply = RedisModule_Call(ctx,
+ "SCAN","l",(long long)cursor);
+ RedisModule_ThreadSafeContextUnlock(ctx);
+
+ RedisModuleCallReply *cr_cursor =
+ RedisModule_CallReplyArrayElement(reply,0);
+ RedisModuleCallReply *cr_keys =
+ RedisModule_CallReplyArrayElement(reply,1);
+
+ RedisModuleString *s = RedisModule_CreateStringFromCallReply(cr_cursor);
+ RedisModule_StringToLongLong(s,&cursor);
+ RedisModule_FreeString(ctx,s);
+
+ size_t items = RedisModule_CallReplyLength(cr_keys);
+ for (size_t j = 0; j < items; j++) {
+ RedisModuleCallReply *ele =
+ RedisModule_CallReplyArrayElement(cr_keys,j);
+ RedisModule_ReplyWithCallReply(ctx,ele);
+ replylen++;
+ }
+ RedisModule_FreeCallReply(reply);
+ } while (cursor != 0);
+ RedisModule_ReplySetArrayLength(ctx,replylen);
+
+ RedisModule_FreeThreadSafeContext(ctx);
+ RedisModule_UnblockClient(bc,NULL);
+ return NULL;
+}
+
+/* HELLO.KEYS -- Return all the keys in the current database without blocking
+ * the server. The keys do not represent a point-in-time state so only the keys
+ * that were in the database from the start to the end are guaranteed to be
+ * there. */
+int HelloKeys_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ if (argc != 1) return RedisModule_WrongArity(ctx);
+
+ pthread_t tid;
+
+ /* Note that when blocking the client we do not set any callback: no
+ * timeout is possible since we passed '0', nor we need a reply callback
+ * because we'll use the thread safe context to accumulate a reply. */
+ RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,NULL,NULL,NULL,0);
+
+ /* Now that we setup a blocking client, we need to pass the control
+ * to the thread. However we need to pass arguments to the thread:
+ * the reference to the blocked client handle. */
+ if (pthread_create(&tid,NULL,HelloKeys_ThreadMain,bc) != 0) {
+ RedisModule_AbortBlock(bc);
+ return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
+ }
+ return REDISMODULE_OK;
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"helloblock",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.block",
+ HelloBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+ if (RedisModule_CreateCommand(ctx,"hello.keys",
+ HelloKeys_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/hellocluster.c b/src/modules/hellocluster.c
new file mode 100644
index 0000000..bc145c2
--- /dev/null
+++ b/src/modules/hellocluster.c
@@ -0,0 +1,118 @@
+/* Helloworld cluster -- A ping/pong cluster API example.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+#define MSGTYPE_PING 1
+#define MSGTYPE_PONG 2
+
+/* HELLOCLUSTER.PINGALL */
+int PingallCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModule_SendClusterMessage(ctx,NULL,MSGTYPE_PING,"Hey",3);
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/* HELLOCLUSTER.LIST */
+int ListCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ size_t numnodes;
+ char **ids = RedisModule_GetClusterNodesList(ctx,&numnodes);
+ if (ids == NULL) {
+ return RedisModule_ReplyWithError(ctx,"Cluster not enabled");
+ }
+
+ RedisModule_ReplyWithArray(ctx,numnodes);
+ for (size_t j = 0; j < numnodes; j++) {
+ int port;
+ RedisModule_GetClusterNodeInfo(ctx,ids[j],NULL,NULL,&port,NULL);
+ RedisModule_ReplyWithArray(ctx,2);
+ RedisModule_ReplyWithStringBuffer(ctx,ids[j],REDISMODULE_NODE_ID_LEN);
+ RedisModule_ReplyWithLongLong(ctx,port);
+ }
+ RedisModule_FreeClusterNodesList(ids);
+ return REDISMODULE_OK;
+}
+
+/* Callback for message MSGTYPE_PING */
+void PingReceiver(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len) {
+ RedisModule_Log(ctx,"notice","PING (type %d) RECEIVED from %.*s: '%.*s'",
+ type,REDISMODULE_NODE_ID_LEN,sender_id,(int)len, payload);
+ RedisModule_SendClusterMessage(ctx,NULL,MSGTYPE_PONG,"Ohi!",4);
+ RedisModuleCallReply *reply = RedisModule_Call(ctx, "INCR", "c", "pings_received");
+ RedisModule_FreeCallReply(reply);
+}
+
+/* Callback for message MSGTYPE_PONG. */
+void PongReceiver(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len) {
+ RedisModule_Log(ctx,"notice","PONG (type %d) RECEIVED from %.*s: '%.*s'",
+ type,REDISMODULE_NODE_ID_LEN,sender_id,(int)len, payload);
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"hellocluster",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellocluster.pingall",
+ PingallCommand_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellocluster.list",
+ ListCommand_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ /* Disable Redis Cluster sharding and redirections. This way every node
+ * will be able to access every possible key, regardless of the hash slot.
+ * This way the PING message handler will be able to increment a specific
+ * variable. Normally you do that in order for the distributed system
+ * you create as a module to have total freedom in the keyspace
+ * manipulation. */
+ RedisModule_SetClusterFlags(ctx,REDISMODULE_CLUSTER_FLAG_NO_REDIRECTION);
+
+ /* Register our handlers for different message types. */
+ RedisModule_RegisterClusterMessageReceiver(ctx,MSGTYPE_PING,PingReceiver);
+ RedisModule_RegisterClusterMessageReceiver(ctx,MSGTYPE_PONG,PongReceiver);
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/hellodict.c b/src/modules/hellodict.c
new file mode 100644
index 0000000..12b6e91
--- /dev/null
+++ b/src/modules/hellodict.c
@@ -0,0 +1,131 @@
+/* Hellodict -- An example of modules dictionary API
+ *
+ * This module implements a volatile key-value store on top of the
+ * dictionary exported by the Redis modules API.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+static RedisModuleDict *Keyspace;
+
+/* HELLODICT.SET <key> <value>
+ *
+ * Set the specified key to the specified value. */
+int cmd_SET(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+ RedisModule_DictSet(Keyspace,argv[1],argv[2]);
+ /* We need to keep a reference to the value stored at the key, otherwise
+ * it would be freed when this callback returns. */
+ RedisModule_RetainString(NULL,argv[2]);
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/* HELLODICT.GET <key>
+ *
+ * Return the value of the specified key, or a null reply if the key
+ * is not defined. */
+int cmd_GET(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+ RedisModuleString *val = RedisModule_DictGet(Keyspace,argv[1],NULL);
+ if (val == NULL) {
+ return RedisModule_ReplyWithNull(ctx);
+ } else {
+ return RedisModule_ReplyWithString(ctx, val);
+ }
+}
+
+/* HELLODICT.KEYRANGE <startkey> <endkey> <count>
+ *
+ * Return a list of matching keys, lexicographically between startkey
+ * and endkey. No more than 'count' items are emitted. */
+int cmd_KEYRANGE(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ /* Parse the count argument. */
+ long long count;
+ if (RedisModule_StringToLongLong(argv[3],&count) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ /* Seek the iterator. */
+ RedisModuleDictIter *iter = RedisModule_DictIteratorStart(
+ Keyspace, ">=", argv[1]);
+
+ /* Reply with the matching items. */
+ char *key;
+ size_t keylen;
+ long long replylen = 0; /* Keep track of the emitted array len. */
+ RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_LEN);
+ while((key = RedisModule_DictNextC(iter,&keylen,NULL)) != NULL) {
+ if (replylen >= count) break;
+ if (RedisModule_DictCompare(iter,"<=",argv[2]) == REDISMODULE_ERR)
+ break;
+ RedisModule_ReplyWithStringBuffer(ctx,key,keylen);
+ replylen++;
+ }
+ RedisModule_ReplySetArrayLength(ctx,replylen);
+
+ /* Cleanup. */
+ RedisModule_DictIteratorStop(iter);
+ return REDISMODULE_OK;
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"hellodict",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellodict.set",
+ cmd_SET,"write deny-oom",1,1,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellodict.get",
+ cmd_GET,"readonly",1,1,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellodict.keyrange",
+ cmd_KEYRANGE,"readonly",1,1,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ /* Create our global dictionary. Here we'll set our keys and values. */
+ Keyspace = RedisModule_CreateDict(NULL);
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/hellohook.c b/src/modules/hellohook.c
new file mode 100644
index 0000000..2859a8b
--- /dev/null
+++ b/src/modules/hellohook.c
@@ -0,0 +1,92 @@
+/* Server hooks API example
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+/* Client state change callback. */
+void clientChangeCallback(RedisModuleCtx *ctx, RedisModuleEvent e, uint64_t sub, void *data)
+{
+ REDISMODULE_NOT_USED(ctx);
+ REDISMODULE_NOT_USED(e);
+
+ RedisModuleClientInfo *ci = data;
+ printf("Client %s event for client #%llu %s:%d\n",
+ (sub == REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED) ?
+ "connection" : "disconnection",
+ (unsigned long long)ci->id,ci->addr,ci->port);
+}
+
+void flushdbCallback(RedisModuleCtx *ctx, RedisModuleEvent e, uint64_t sub, void *data)
+{
+ REDISMODULE_NOT_USED(ctx);
+ REDISMODULE_NOT_USED(e);
+
+ RedisModuleFlushInfo *fi = data;
+ if (sub == REDISMODULE_SUBEVENT_FLUSHDB_START) {
+ if (fi->dbnum != -1) {
+ RedisModuleCallReply *reply;
+ reply = RedisModule_Call(ctx,"DBSIZE","");
+ long long numkeys = RedisModule_CallReplyInteger(reply);
+ printf("FLUSHDB event of database %d started (%lld keys in DB)\n",
+ fi->dbnum, numkeys);
+ RedisModule_FreeCallReply(reply);
+ } else {
+ printf("FLUSHALL event started\n");
+ }
+ } else {
+ if (fi->dbnum != -1) {
+ printf("FLUSHDB event of database %d ended\n",fi->dbnum);
+ } else {
+ printf("FLUSHALL event ended\n");
+ }
+ }
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"hellohook",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ RedisModule_SubscribeToServerEvent(ctx,
+ RedisModuleEvent_ClientChange, clientChangeCallback);
+ RedisModule_SubscribeToServerEvent(ctx,
+ RedisModuleEvent_FlushDB, flushdbCallback);
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/hellotimer.c b/src/modules/hellotimer.c
new file mode 100644
index 0000000..67e1e67
--- /dev/null
+++ b/src/modules/hellotimer.c
@@ -0,0 +1,75 @@
+/* Timer API example -- Register and handle timer events
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+/* Timer callback. */
+void timerHandler(RedisModuleCtx *ctx, void *data) {
+ REDISMODULE_NOT_USED(ctx);
+ printf("Fired %s!\n", (char *)data);
+ RedisModule_Free(data);
+}
+
+/* HELLOTIMER.TIMER*/
+int TimerCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ for (int j = 0; j < 10; j++) {
+ int delay = rand() % 5000;
+ char *buf = RedisModule_Alloc(256);
+ snprintf(buf,256,"After %d", delay);
+ RedisModuleTimerID tid = RedisModule_CreateTimer(ctx,delay,timerHandler,buf);
+ REDISMODULE_NOT_USED(tid);
+ }
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"hellotimer",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotimer.timer",
+ TimerCommand_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/hellotype.c b/src/modules/hellotype.c
new file mode 100644
index 0000000..1dc53d2
--- /dev/null
+++ b/src/modules/hellotype.c
@@ -0,0 +1,362 @@
+/* This file implements a new module native data type called "HELLOTYPE".
+ * The data structure implemented is a very simple ordered linked list of
+ * 64 bit integers, in order to have something that is real world enough, but
+ * at the same time, extremely simple to understand, to show how the API
+ * works, how a new data type is created, and how to write basic methods
+ * for RDB loading, saving and AOF rewriting.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdint.h>
+
+static RedisModuleType *HelloType;
+
+/* ========================== Internal data structure =======================
+ * This is just a linked list of 64 bit integers where elements are inserted
+ * in-place, so it's ordered. There is no pop/push operation but just insert
+ * because it is enough to show the implementation of new data types without
+ * making things complex. */
+
+struct HelloTypeNode {
+ int64_t value;
+ struct HelloTypeNode *next;
+};
+
+struct HelloTypeObject {
+ struct HelloTypeNode *head;
+ size_t len; /* Number of elements added. */
+};
+
+struct HelloTypeObject *createHelloTypeObject(void) {
+ struct HelloTypeObject *o;
+ o = RedisModule_Alloc(sizeof(*o));
+ o->head = NULL;
+ o->len = 0;
+ return o;
+}
+
+void HelloTypeInsert(struct HelloTypeObject *o, int64_t ele) {
+ struct HelloTypeNode *next = o->head, *newnode, *prev = NULL;
+
+ while(next && next->value < ele) {
+ prev = next;
+ next = next->next;
+ }
+ newnode = RedisModule_Alloc(sizeof(*newnode));
+ newnode->value = ele;
+ newnode->next = next;
+ if (prev) {
+ prev->next = newnode;
+ } else {
+ o->head = newnode;
+ }
+ o->len++;
+}
+
+void HelloTypeReleaseObject(struct HelloTypeObject *o) {
+ struct HelloTypeNode *cur, *next;
+ cur = o->head;
+ while(cur) {
+ next = cur->next;
+ RedisModule_Free(cur);
+ cur = next;
+ }
+ RedisModule_Free(o);
+}
+
+/* ========================= "hellotype" type commands ======================= */
+
+/* HELLOTYPE.INSERT key value */
+int HelloTypeInsert_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long value;
+ if ((RedisModule_StringToLongLong(argv[2],&value) != REDISMODULE_OK)) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid value: must be a signed 64 bit integer");
+ }
+
+ /* Create an empty value object if the key is currently empty. */
+ struct HelloTypeObject *hto;
+ if (type == REDISMODULE_KEYTYPE_EMPTY) {
+ hto = createHelloTypeObject();
+ RedisModule_ModuleTypeSetValue(key,HelloType,hto);
+ } else {
+ hto = RedisModule_ModuleTypeGetValue(key);
+ }
+
+ /* Insert the new element. */
+ HelloTypeInsert(hto,value);
+ RedisModule_SignalKeyAsReady(ctx,argv[1]);
+
+ RedisModule_ReplyWithLongLong(ctx,hto->len);
+ RedisModule_ReplicateVerbatim(ctx);
+ return REDISMODULE_OK;
+}
+
+/* HELLOTYPE.RANGE key first count */
+int HelloTypeRange_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long first, count;
+ if (RedisModule_StringToLongLong(argv[2],&first) != REDISMODULE_OK ||
+ RedisModule_StringToLongLong(argv[3],&count) != REDISMODULE_OK ||
+ first < 0 || count < 0)
+ {
+ return RedisModule_ReplyWithError(ctx,
+ "ERR invalid first or count parameters");
+ }
+
+ struct HelloTypeObject *hto = RedisModule_ModuleTypeGetValue(key);
+ struct HelloTypeNode *node = hto ? hto->head : NULL;
+ RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_LEN);
+ long long arraylen = 0;
+ while(node && count--) {
+ RedisModule_ReplyWithLongLong(ctx,node->value);
+ arraylen++;
+ node = node->next;
+ }
+ RedisModule_ReplySetArrayLength(ctx,arraylen);
+ return REDISMODULE_OK;
+}
+
+/* HELLOTYPE.LEN key */
+int HelloTypeLen_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ struct HelloTypeObject *hto = RedisModule_ModuleTypeGetValue(key);
+ RedisModule_ReplyWithLongLong(ctx,hto ? hto->len : 0);
+ return REDISMODULE_OK;
+}
+
+/* ====================== Example of a blocking command ==================== */
+
+/* Reply callback for blocking command HELLOTYPE.BRANGE, this will get
+ * called when the key we blocked for is ready: we need to check if we
+ * can really serve the client, and reply OK or ERR accordingly. */
+int HelloBlock_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModuleString *keyname = RedisModule_GetBlockedClientReadyKey(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,keyname,REDISMODULE_READ);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_MODULE ||
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ RedisModule_CloseKey(key);
+ return REDISMODULE_ERR;
+ }
+
+ /* In case the key is able to serve our blocked client, let's directly
+ * use our original command implementation to make this example simpler. */
+ RedisModule_CloseKey(key);
+ return HelloTypeRange_RedisCommand(ctx,argv,argc-1);
+}
+
+/* Timeout callback for blocking command HELLOTYPE.BRANGE */
+int HelloBlock_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ return RedisModule_ReplyWithSimpleString(ctx,"Request timedout");
+}
+
+/* Private data freeing callback for HELLOTYPE.BRANGE command. */
+void HelloBlock_FreeData(RedisModuleCtx *ctx, void *privdata) {
+ REDISMODULE_NOT_USED(ctx);
+ RedisModule_Free(privdata);
+}
+
+/* HELLOTYPE.BRANGE key first count timeout -- This is a blocking version of
+ * the RANGE operation, in order to show how to use the API
+ * RedisModule_BlockClientOnKeys(). */
+int HelloTypeBRange_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 5) return RedisModule_WrongArity(ctx);
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ /* Parse the timeout before even trying to serve the client synchronously,
+ * so that we always fail ASAP on syntax errors. */
+ long long timeout;
+ if (RedisModule_StringToLongLong(argv[4],&timeout) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,
+ "ERR invalid timeout parameter");
+ }
+
+ /* Can we serve the reply synchronously? */
+ if (type != REDISMODULE_KEYTYPE_EMPTY) {
+ return HelloTypeRange_RedisCommand(ctx,argv,argc-1);
+ }
+
+ /* Otherwise let's block on the key. */
+ void *privdata = RedisModule_Alloc(100);
+ RedisModule_BlockClientOnKeys(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout,argv+1,1,privdata);
+ return REDISMODULE_OK;
+}
+
+/* ========================== "hellotype" type methods ======================= */
+
+void *HelloTypeRdbLoad(RedisModuleIO *rdb, int encver) {
+ if (encver != 0) {
+ /* RedisModule_Log("warning","Can't load data with version %d", encver);*/
+ return NULL;
+ }
+ uint64_t elements = RedisModule_LoadUnsigned(rdb);
+ struct HelloTypeObject *hto = createHelloTypeObject();
+ while(elements--) {
+ int64_t ele = RedisModule_LoadSigned(rdb);
+ HelloTypeInsert(hto,ele);
+ }
+ return hto;
+}
+
+void HelloTypeRdbSave(RedisModuleIO *rdb, void *value) {
+ struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ RedisModule_SaveUnsigned(rdb,hto->len);
+ while(node) {
+ RedisModule_SaveSigned(rdb,node->value);
+ node = node->next;
+ }
+}
+
+void HelloTypeAofRewrite(RedisModuleIO *aof, RedisModuleString *key, void *value) {
+ struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ while(node) {
+ RedisModule_EmitAOF(aof,"HELLOTYPE.INSERT","sl",key,node->value);
+ node = node->next;
+ }
+}
+
+/* The goal of this function is to return the amount of memory used by
+ * the HelloType value. */
+size_t HelloTypeMemUsage(const void *value) {
+ const struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ return sizeof(*hto) + sizeof(*node)*hto->len;
+}
+
+void HelloTypeFree(void *value) {
+ HelloTypeReleaseObject(value);
+}
+
+void HelloTypeDigest(RedisModuleDigest *md, void *value) {
+ struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ while(node) {
+ RedisModule_DigestAddLongLong(md,node->value);
+ node = node->next;
+ }
+ RedisModule_DigestEndSequence(md);
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"hellotype",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ RedisModuleTypeMethods tm = {
+ .version = REDISMODULE_TYPE_METHOD_VERSION,
+ .rdb_load = HelloTypeRdbLoad,
+ .rdb_save = HelloTypeRdbSave,
+ .aof_rewrite = HelloTypeAofRewrite,
+ .mem_usage = HelloTypeMemUsage,
+ .free = HelloTypeFree,
+ .digest = HelloTypeDigest
+ };
+
+ HelloType = RedisModule_CreateDataType(ctx,"hellotype",0,&tm);
+ if (HelloType == NULL) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotype.insert",
+ HelloTypeInsert_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotype.range",
+ HelloTypeRange_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotype.len",
+ HelloTypeLen_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotype.brange",
+ HelloTypeBRange_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/helloworld.c b/src/modules/helloworld.c
new file mode 100644
index 0000000..e517963
--- /dev/null
+++ b/src/modules/helloworld.c
@@ -0,0 +1,621 @@
+/* Helloworld module -- A few examples of the Redis Modules API in the form
+ * of commands showing how to accomplish common tasks.
+ *
+ * This module does not do anything useful, if not for a few commands. The
+ * examples are designed in order to show the API.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+/* HELLO.SIMPLE is among the simplest commands you can implement.
+ * It just returns the currently selected DB id, a functionality which is
+ * missing in Redis. The command uses two important API calls: one to
+ * fetch the currently selected DB, the other in order to send the client
+ * an integer reply as response. */
+int HelloSimple_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ RedisModule_ReplyWithLongLong(ctx,RedisModule_GetSelectedDb(ctx));
+ return REDISMODULE_OK;
+}
+
+/* HELLO.PUSH.NATIVE re-implements RPUSH, and shows the low level modules API
+ * where you can "open" keys, make low level operations, create new keys by
+ * pushing elements into non-existing keys, and so forth.
+ *
+ * You'll find this command to be roughly as fast as the actual RPUSH
+ * command. */
+int HelloPushNative_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ RedisModule_ListPush(key,REDISMODULE_LIST_TAIL,argv[2]);
+ size_t newlen = RedisModule_ValueLength(key);
+ RedisModule_CloseKey(key);
+ RedisModule_ReplyWithLongLong(ctx,newlen);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.PUSH.CALL implements RPUSH using an higher level approach, calling
+ * a Redis command instead of working with the key in a low level way. This
+ * approach is useful when you need to call Redis commands that are not
+ * available as low level APIs, or when you don't need the maximum speed
+ * possible but instead prefer implementation simplicity. */
+int HelloPushCall_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ RedisModuleCallReply *reply;
+
+ reply = RedisModule_Call(ctx,"RPUSH","ss",argv[1],argv[2]);
+ long long len = RedisModule_CallReplyInteger(reply);
+ RedisModule_FreeCallReply(reply);
+ RedisModule_ReplyWithLongLong(ctx,len);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.PUSH.CALL2
+ * This is exactly as HELLO.PUSH.CALL, but shows how we can reply to the
+ * client using directly a reply object that Call() returned. */
+int HelloPushCall2_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ RedisModuleCallReply *reply;
+
+ reply = RedisModule_Call(ctx,"RPUSH","ss",argv[1],argv[2]);
+ RedisModule_ReplyWithCallReply(ctx,reply);
+ RedisModule_FreeCallReply(reply);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LIST.SUM.LEN returns the total length of all the items inside
+ * a Redis list, by using the high level Call() API.
+ * This command is an example of the array reply access. */
+int HelloListSumLen_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+
+ RedisModuleCallReply *reply;
+
+ reply = RedisModule_Call(ctx,"LRANGE","sll",argv[1],(long long)0,(long long)-1);
+ size_t strlen = 0;
+ size_t items = RedisModule_CallReplyLength(reply);
+ size_t j;
+ for (j = 0; j < items; j++) {
+ RedisModuleCallReply *ele = RedisModule_CallReplyArrayElement(reply,j);
+ strlen += RedisModule_CallReplyLength(ele);
+ }
+ RedisModule_FreeCallReply(reply);
+ RedisModule_ReplyWithLongLong(ctx,strlen);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LIST.SPLICE srclist dstlist count
+ * Moves 'count' elements from the tail of 'srclist' to the head of
+ * 'dstlist'. If less than count elements are available, it moves as much
+ * elements as possible. */
+int HelloListSplice_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *srckey = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ RedisModuleKey *dstkey = RedisModule_OpenKey(ctx,argv[2],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ /* Src and dst key must be empty or lists. */
+ if ((RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_EMPTY) ||
+ (RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_EMPTY))
+ {
+ RedisModule_CloseKey(srckey);
+ RedisModule_CloseKey(dstkey);
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long count;
+ if ((RedisModule_StringToLongLong(argv[3],&count) != REDISMODULE_OK) ||
+ (count < 0)) {
+ RedisModule_CloseKey(srckey);
+ RedisModule_CloseKey(dstkey);
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ while(count-- > 0) {
+ RedisModuleString *ele;
+
+ ele = RedisModule_ListPop(srckey,REDISMODULE_LIST_TAIL);
+ if (ele == NULL) break;
+ RedisModule_ListPush(dstkey,REDISMODULE_LIST_HEAD,ele);
+ RedisModule_FreeString(ctx,ele);
+ }
+
+ size_t len = RedisModule_ValueLength(srckey);
+ RedisModule_CloseKey(srckey);
+ RedisModule_CloseKey(dstkey);
+ RedisModule_ReplyWithLongLong(ctx,len);
+ return REDISMODULE_OK;
+}
+
+/* Like the HELLO.LIST.SPLICE above, but uses automatic memory management
+ * in order to avoid freeing stuff. */
+int HelloListSpliceAuto_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ RedisModule_AutoMemory(ctx);
+
+ RedisModuleKey *srckey = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ RedisModuleKey *dstkey = RedisModule_OpenKey(ctx,argv[2],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ /* Src and dst key must be empty or lists. */
+ if ((RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_EMPTY) ||
+ (RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_EMPTY))
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long count;
+ if ((RedisModule_StringToLongLong(argv[3],&count) != REDISMODULE_OK) ||
+ (count < 0))
+ {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ while(count-- > 0) {
+ RedisModuleString *ele;
+
+ ele = RedisModule_ListPop(srckey,REDISMODULE_LIST_TAIL);
+ if (ele == NULL) break;
+ RedisModule_ListPush(dstkey,REDISMODULE_LIST_HEAD,ele);
+ }
+
+ size_t len = RedisModule_ValueLength(srckey);
+ RedisModule_ReplyWithLongLong(ctx,len);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.RAND.ARRAY <count>
+ * Shows how to generate arrays as commands replies.
+ * It just outputs <count> random numbers. */
+int HelloRandArray_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+ long long count;
+ if (RedisModule_StringToLongLong(argv[1],&count) != REDISMODULE_OK ||
+ count < 0)
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+
+ /* To reply with an array, we call RedisModule_ReplyWithArray() followed
+ * by other "count" calls to other reply functions in order to generate
+ * the elements of the array. */
+ RedisModule_ReplyWithArray(ctx,count);
+ while(count--) RedisModule_ReplyWithLongLong(ctx,rand());
+ return REDISMODULE_OK;
+}
+
+/* This is a simple command to test replication. Because of the "!" modified
+ * in the RedisModule_Call() call, the two INCRs get replicated.
+ * Also note how the ECHO is replicated in an unexpected position (check
+ * comments the function implementation). */
+int HelloRepl1_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ RedisModule_AutoMemory(ctx);
+
+ /* This will be replicated *after* the two INCR statements, since
+ * the Call() replication has precedence, so the actual replication
+ * stream will be:
+ *
+ * MULTI
+ * INCR foo
+ * INCR bar
+ * ECHO c foo
+ * EXEC
+ */
+ RedisModule_Replicate(ctx,"ECHO","c","foo");
+
+ /* Using the "!" modifier we replicate the command if it
+ * modified the dataset in some way. */
+ RedisModule_Call(ctx,"INCR","c!","foo");
+ RedisModule_Call(ctx,"INCR","c!","bar");
+
+ RedisModule_ReplyWithLongLong(ctx,0);
+
+ return REDISMODULE_OK;
+}
+
+/* Another command to show replication. In this case, we call
+ * RedisModule_ReplicateVerbatim() to mean we want just the command to be
+ * propagated to slaves / AOF exactly as it was called by the user.
+ *
+ * This command also shows how to work with string objects.
+ * It takes a list, and increments all the elements (that must have
+ * a numerical value) by 1, returning the sum of all the elements
+ * as reply.
+ *
+ * Usage: HELLO.REPL2 <list-key> */
+int HelloRepl2_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_LIST)
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+
+ size_t listlen = RedisModule_ValueLength(key);
+ long long sum = 0;
+
+ /* Rotate and increment. */
+ while(listlen--) {
+ RedisModuleString *ele = RedisModule_ListPop(key,REDISMODULE_LIST_TAIL);
+ long long val;
+ if (RedisModule_StringToLongLong(ele,&val) != REDISMODULE_OK) val = 0;
+ val++;
+ sum += val;
+ RedisModuleString *newele = RedisModule_CreateStringFromLongLong(ctx,val);
+ RedisModule_ListPush(key,REDISMODULE_LIST_HEAD,newele);
+ }
+ RedisModule_ReplyWithLongLong(ctx,sum);
+ RedisModule_ReplicateVerbatim(ctx);
+ return REDISMODULE_OK;
+}
+
+/* This is an example of strings DMA access. Given a key containing a string
+ * it toggles the case of each character from lower to upper case or the
+ * other way around.
+ *
+ * No automatic memory management is used in this example (for the sake
+ * of variety).
+ *
+ * HELLO.TOGGLE.CASE key */
+int HelloToggleCase_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ int keytype = RedisModule_KeyType(key);
+ if (keytype != REDISMODULE_KEYTYPE_STRING &&
+ keytype != REDISMODULE_KEYTYPE_EMPTY)
+ {
+ RedisModule_CloseKey(key);
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ if (keytype == REDISMODULE_KEYTYPE_STRING) {
+ size_t len, j;
+ char *s = RedisModule_StringDMA(key,&len,REDISMODULE_WRITE);
+ for (j = 0; j < len; j++) {
+ if (isupper(s[j])) {
+ s[j] = tolower(s[j]);
+ } else {
+ s[j] = toupper(s[j]);
+ }
+ }
+ }
+
+ RedisModule_CloseKey(key);
+ RedisModule_ReplyWithSimpleString(ctx,"OK");
+ RedisModule_ReplicateVerbatim(ctx);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.MORE.EXPIRE key milliseconds.
+ *
+ * If the key has already an associated TTL, extends it by "milliseconds"
+ * milliseconds. Otherwise no operation is performed. */
+int HelloMoreExpire_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ mstime_t addms, expire;
+
+ if (RedisModule_StringToLongLong(argv[2],&addms) != REDISMODULE_OK)
+ return RedisModule_ReplyWithError(ctx,"ERR invalid expire time");
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ expire = RedisModule_GetExpire(key);
+ if (expire != REDISMODULE_NO_EXPIRE) {
+ expire += addms;
+ RedisModule_SetExpire(key,expire);
+ }
+ return RedisModule_ReplyWithSimpleString(ctx,"OK");
+}
+
+/* HELLO.ZSUMRANGE key startscore endscore
+ * Return the sum of all the scores elements between startscore and endscore.
+ *
+ * The computation is performed two times, one time from start to end and
+ * another time backward. The two scores, returned as a two element array,
+ * should match.*/
+int HelloZsumRange_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ double score_start, score_end;
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ if (RedisModule_StringToDouble(argv[2],&score_start) != REDISMODULE_OK ||
+ RedisModule_StringToDouble(argv[3],&score_end) != REDISMODULE_OK)
+ {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid range");
+ }
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_ZSET) {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ double scoresum_a = 0;
+ double scoresum_b = 0;
+
+ RedisModule_ZsetFirstInScoreRange(key,score_start,score_end,0,0);
+ while(!RedisModule_ZsetRangeEndReached(key)) {
+ double score;
+ RedisModuleString *ele = RedisModule_ZsetRangeCurrentElement(key,&score);
+ RedisModule_FreeString(ctx,ele);
+ scoresum_a += score;
+ RedisModule_ZsetRangeNext(key);
+ }
+ RedisModule_ZsetRangeStop(key);
+
+ RedisModule_ZsetLastInScoreRange(key,score_start,score_end,0,0);
+ while(!RedisModule_ZsetRangeEndReached(key)) {
+ double score;
+ RedisModuleString *ele = RedisModule_ZsetRangeCurrentElement(key,&score);
+ RedisModule_FreeString(ctx,ele);
+ scoresum_b += score;
+ RedisModule_ZsetRangePrev(key);
+ }
+
+ RedisModule_ZsetRangeStop(key);
+
+ RedisModule_CloseKey(key);
+
+ RedisModule_ReplyWithArray(ctx,2);
+ RedisModule_ReplyWithDouble(ctx,scoresum_a);
+ RedisModule_ReplyWithDouble(ctx,scoresum_b);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LEXRANGE key min_lex max_lex min_age max_age
+ * This command expects a sorted set stored at key in the following form:
+ * - All the elements have score 0.
+ * - Elements are pairs of "<name>:<age>", for example "Anna:52".
+ * The command will return all the sorted set items that are lexicographically
+ * between the specified range (using the same format as ZRANGEBYLEX)
+ * and having an age between min_age and max_age. */
+int HelloLexRange_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 6) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_ZSET) {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ if (RedisModule_ZsetFirstInLexRange(key,argv[2],argv[3]) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"invalid range");
+ }
+
+ int arraylen = 0;
+ RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_LEN);
+ while(!RedisModule_ZsetRangeEndReached(key)) {
+ double score;
+ RedisModuleString *ele = RedisModule_ZsetRangeCurrentElement(key,&score);
+ RedisModule_ReplyWithString(ctx,ele);
+ RedisModule_FreeString(ctx,ele);
+ RedisModule_ZsetRangeNext(key);
+ arraylen++;
+ }
+ RedisModule_ZsetRangeStop(key);
+ RedisModule_ReplySetArrayLength(ctx,arraylen);
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.HCOPY key srcfield dstfield
+ * This is just an example command that sets the hash field dstfield to the
+ * same value of srcfield. If srcfield does not exist no operation is
+ * performed.
+ *
+ * The command returns 1 if the copy is performed (srcfield exists) otherwise
+ * 0 is returned. */
+int HelloHCopy_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_HASH &&
+ type != REDISMODULE_KEYTYPE_EMPTY)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ /* Get the old field value. */
+ RedisModuleString *oldval;
+ RedisModule_HashGet(key,REDISMODULE_HASH_NONE,argv[2],&oldval,NULL);
+ if (oldval) {
+ RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[3],oldval,NULL);
+ }
+ RedisModule_ReplyWithLongLong(ctx,oldval != NULL);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LEFTPAD str len ch
+ * This is an implementation of the infamous LEFTPAD function, that
+ * was at the center of an issue with the npm modules system in March 2016.
+ *
+ * LEFTPAD is a good example of using a Redis Modules API called
+ * "pool allocator", that was a famous way to allocate memory in yet another
+ * open source project, the Apache web server.
+ *
+ * The concept is very simple: there is memory that is useful to allocate
+ * only in the context of serving a request, and must be freed anyway when
+ * the callback implementing the command returns. So in that case the module
+ * does not need to retain a reference to these allocations, it is just
+ * required to free the memory before returning. When this is the case the
+ * module can call RedisModule_PoolAlloc() instead, that works like malloc()
+ * but will automatically free the memory when the module callback returns.
+ *
+ * Note that PoolAlloc() does not necessarily require AutoMemory to be
+ * active. */
+int HelloLeftPad_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+ long long padlen;
+
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ if ((RedisModule_StringToLongLong(argv[2],&padlen) != REDISMODULE_OK) ||
+ (padlen< 0)) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid padding length");
+ }
+ size_t strlen, chlen;
+ const char *str = RedisModule_StringPtrLen(argv[1], &strlen);
+ const char *ch = RedisModule_StringPtrLen(argv[3], &chlen);
+
+ /* If the string is already larger than the target len, just return
+ * the string itself. */
+ if (strlen >= (size_t)padlen)
+ return RedisModule_ReplyWithString(ctx,argv[1]);
+
+ /* Padding must be a single character in this simple implementation. */
+ if (chlen != 1)
+ return RedisModule_ReplyWithError(ctx,
+ "ERR padding must be a single char");
+
+ /* Here we use our pool allocator, for our throw-away allocation. */
+ padlen -= strlen;
+ char *buf = RedisModule_PoolAlloc(ctx,padlen+strlen);
+ for (long long j = 0; j < padlen; j++) buf[j] = *ch;
+ memcpy(buf+padlen,str,strlen);
+
+ RedisModule_ReplyWithStringBuffer(ctx,buf,padlen+strlen);
+ return REDISMODULE_OK;
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (RedisModule_Init(ctx,"helloworld",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ /* Log the list of parameters passing loading the module. */
+ for (int j = 0; j < argc; j++) {
+ const char *s = RedisModule_StringPtrLen(argv[j],NULL);
+ printf("Module loaded with ARGV[%d] = %s\n", j, s);
+ }
+
+ if (RedisModule_CreateCommand(ctx,"hello.simple",
+ HelloSimple_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.push.native",
+ HelloPushNative_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.push.call",
+ HelloPushCall_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.push.call2",
+ HelloPushCall2_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.list.sum.len",
+ HelloListSumLen_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.list.splice",
+ HelloListSplice_RedisCommand,"write deny-oom",1,2,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.list.splice.auto",
+ HelloListSpliceAuto_RedisCommand,
+ "write deny-oom",1,2,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.rand.array",
+ HelloRandArray_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.repl1",
+ HelloRepl1_RedisCommand,"write",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.repl2",
+ HelloRepl2_RedisCommand,"write",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.toggle.case",
+ HelloToggleCase_RedisCommand,"write",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.more.expire",
+ HelloMoreExpire_RedisCommand,"write",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.zsumrange",
+ HelloZsumRange_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.lexrange",
+ HelloLexRange_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.hcopy",
+ HelloHCopy_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.leftpad",
+ HelloLeftPad_RedisCommand,"",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/monotonic.c b/src/monotonic.c
new file mode 100644
index 0000000..1d71962
--- /dev/null
+++ b/src/monotonic.c
@@ -0,0 +1,180 @@
+#include "monotonic.h"
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+#undef NDEBUG
+#include <assert.h>
+
+
+/* The function pointer for clock retrieval. */
+monotime (*getMonotonicUs)(void) = NULL;
+
+static char monotonic_info_string[32];
+
+
+/* Using the processor clock (aka TSC on x86) can provide improved performance
+ * throughout Redis wherever the monotonic clock is used. The processor clock
+ * is significantly faster than calling 'clock_getting' (POSIX). While this is
+ * generally safe on modern systems, this link provides additional information
+ * about use of the x86 TSC: http://oliveryang.net/2015/09/pitfalls-of-TSC-usage
+ *
+ * To use the processor clock, either uncomment this line, or build with
+ * CFLAGS="-DUSE_PROCESSOR_CLOCK"
+#define USE_PROCESSOR_CLOCK
+ */
+
+
+#if defined(USE_PROCESSOR_CLOCK) && defined(__x86_64__) && defined(__linux__)
+#include <regex.h>
+#include <x86intrin.h>
+
+static long mono_ticksPerMicrosecond = 0;
+
+static monotime getMonotonicUs_x86(void) {
+ return __rdtsc() / mono_ticksPerMicrosecond;
+}
+
+static void monotonicInit_x86linux(void) {
+ const int bufflen = 256;
+ char buf[bufflen];
+ regex_t cpuGhzRegex, constTscRegex;
+ const size_t nmatch = 2;
+ regmatch_t pmatch[nmatch];
+ int constantTsc = 0;
+ int rc;
+
+ /* Determine the number of TSC ticks in a micro-second. This is
+ * a constant value matching the standard speed of the processor.
+ * On modern processors, this speed remains constant even though
+ * the actual clock speed varies dynamically for each core. */
+ rc = regcomp(&cpuGhzRegex, "^model name\\s+:.*@ ([0-9.]+)GHz", REG_EXTENDED);
+ assert(rc == 0);
+
+ /* Also check that the constant_tsc flag is present. (It should be
+ * unless this is a really old CPU. */
+ rc = regcomp(&constTscRegex, "^flags\\s+:.* constant_tsc", REG_EXTENDED);
+ assert(rc == 0);
+
+ FILE *cpuinfo = fopen("/proc/cpuinfo", "r");
+ if (cpuinfo != NULL) {
+ while (fgets(buf, bufflen, cpuinfo) != NULL) {
+ if (regexec(&cpuGhzRegex, buf, nmatch, pmatch, 0) == 0) {
+ buf[pmatch[1].rm_eo] = '\0';
+ double ghz = atof(&buf[pmatch[1].rm_so]);
+ mono_ticksPerMicrosecond = (long)(ghz * 1000);
+ break;
+ }
+ }
+ while (fgets(buf, bufflen, cpuinfo) != NULL) {
+ if (regexec(&constTscRegex, buf, nmatch, pmatch, 0) == 0) {
+ constantTsc = 1;
+ break;
+ }
+ }
+
+ fclose(cpuinfo);
+ }
+ regfree(&cpuGhzRegex);
+ regfree(&constTscRegex);
+
+ if (mono_ticksPerMicrosecond == 0) {
+ fprintf(stderr, "monotonic: x86 linux, unable to determine clock rate");
+ return;
+ }
+ if (!constantTsc) {
+ fprintf(stderr, "monotonic: x86 linux, 'constant_tsc' flag not present");
+ return;
+ }
+
+ snprintf(monotonic_info_string, sizeof(monotonic_info_string),
+ "X86 TSC @ %ld ticks/us", mono_ticksPerMicrosecond);
+ getMonotonicUs = getMonotonicUs_x86;
+}
+#endif
+
+
+#if defined(USE_PROCESSOR_CLOCK) && defined(__aarch64__)
+static long mono_ticksPerMicrosecond = 0;
+
+/* Read the clock value. */
+static inline uint64_t __cntvct(void) {
+ uint64_t virtual_timer_value;
+ __asm__ volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
+ return virtual_timer_value;
+}
+
+/* Read the Count-timer Frequency. */
+static inline uint32_t cntfrq_hz(void) {
+ uint64_t virtual_freq_value;
+ __asm__ volatile("mrs %0, cntfrq_el0" : "=r"(virtual_freq_value));
+ return (uint32_t)virtual_freq_value; /* top 32 bits are reserved */
+}
+
+static monotime getMonotonicUs_aarch64(void) {
+ return __cntvct() / mono_ticksPerMicrosecond;
+}
+
+static void monotonicInit_aarch64(void) {
+ mono_ticksPerMicrosecond = (long)cntfrq_hz() / 1000L / 1000L;
+ if (mono_ticksPerMicrosecond == 0) {
+ fprintf(stderr, "monotonic: aarch64, unable to determine clock rate");
+ return;
+ }
+
+ snprintf(monotonic_info_string, sizeof(monotonic_info_string),
+ "ARM CNTVCT @ %ld ticks/us", mono_ticksPerMicrosecond);
+ getMonotonicUs = getMonotonicUs_aarch64;
+}
+#endif
+
+
+static monotime getMonotonicUs_posix(void) {
+ /* clock_gettime() is specified in POSIX.1b (1993). Even so, some systems
+ * did not support this until much later. CLOCK_MONOTONIC is technically
+ * optional and may not be supported - but it appears to be universal.
+ * If this is not supported, provide a system-specific alternate version. */
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ((uint64_t)ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
+}
+
+static void monotonicInit_posix(void) {
+ /* Ensure that CLOCK_MONOTONIC is supported. This should be supported
+ * on any reasonably current OS. If the assertion below fails, provide
+ * an appropriate alternate implementation. */
+ struct timespec ts;
+ int rc = clock_gettime(CLOCK_MONOTONIC, &ts);
+ assert(rc == 0);
+
+ snprintf(monotonic_info_string, sizeof(monotonic_info_string),
+ "POSIX clock_gettime");
+ getMonotonicUs = getMonotonicUs_posix;
+}
+
+
+
+const char * monotonicInit(void) {
+ #if defined(USE_PROCESSOR_CLOCK) && defined(__x86_64__) && defined(__linux__)
+ if (getMonotonicUs == NULL) monotonicInit_x86linux();
+ #endif
+
+ #if defined(USE_PROCESSOR_CLOCK) && defined(__aarch64__)
+ if (getMonotonicUs == NULL) monotonicInit_aarch64();
+ #endif
+
+ if (getMonotonicUs == NULL) monotonicInit_posix();
+
+ return monotonic_info_string;
+}
+
+const char *monotonicInfoString(void) {
+ return monotonic_info_string;
+}
+
+monotonic_clock_type monotonicGetType(void) {
+ if (getMonotonicUs == getMonotonicUs_posix)
+ return MONOTONIC_CLOCK_POSIX;
+ return MONOTONIC_CLOCK_HW;
+}
diff --git a/src/monotonic.h b/src/monotonic.h
new file mode 100644
index 0000000..b465f90
--- /dev/null
+++ b/src/monotonic.h
@@ -0,0 +1,61 @@
+#ifndef __MONOTONIC_H
+#define __MONOTONIC_H
+/* The monotonic clock is an always increasing clock source. It is unrelated to
+ * the actual time of day and should only be used for relative timings. The
+ * monotonic clock is also not guaranteed to be chronologically precise; there
+ * may be slight skew/shift from a precise clock.
+ *
+ * Depending on system architecture, the monotonic time may be able to be
+ * retrieved much faster than a normal clock source by using an instruction
+ * counter on the CPU. On x86 architectures (for example), the RDTSC
+ * instruction is a very fast clock source for this purpose.
+ */
+
+#include "fmacros.h"
+#include <stdint.h>
+#include <unistd.h>
+
+/* A counter in micro-seconds. The 'monotime' type is provided for variables
+ * holding a monotonic time. This will help distinguish & document that the
+ * variable is associated with the monotonic clock and should not be confused
+ * with other types of time.*/
+typedef uint64_t monotime;
+
+/* Retrieve counter of micro-seconds relative to an arbitrary point in time. */
+extern monotime (*getMonotonicUs)(void);
+
+typedef enum monotonic_clock_type {
+ MONOTONIC_CLOCK_POSIX,
+ MONOTONIC_CLOCK_HW,
+} monotonic_clock_type;
+
+/* Call once at startup to initialize the monotonic clock. Though this only
+ * needs to be called once, it may be called additional times without impact.
+ * Returns a printable string indicating the type of clock initialized.
+ * (The returned string is static and doesn't need to be freed.) */
+const char *monotonicInit(void);
+
+/* Return a string indicating the type of monotonic clock being used. */
+const char *monotonicInfoString(void);
+
+/* Return the type of monotonic clock being used. */
+monotonic_clock_type monotonicGetType(void);
+
+/* Functions to measure elapsed time. Example:
+ * monotime myTimer;
+ * elapsedStart(&myTimer);
+ * while (elapsedMs(myTimer) < 10) {} // loops for 10ms
+ */
+static inline void elapsedStart(monotime *start_time) {
+ *start_time = getMonotonicUs();
+}
+
+static inline uint64_t elapsedUs(monotime start_time) {
+ return getMonotonicUs() - start_time;
+}
+
+static inline uint64_t elapsedMs(monotime start_time) {
+ return elapsedUs(start_time) / 1000;
+}
+
+#endif
diff --git a/src/mt19937-64.c b/src/mt19937-64.c
new file mode 100644
index 0000000..a0c897f
--- /dev/null
+++ b/src/mt19937-64.c
@@ -0,0 +1,187 @@
+/*
+ A C-program for MT19937-64 (2004/9/29 version).
+ Coded by Takuji Nishimura and Makoto Matsumoto.
+
+ This is a 64-bit version of Mersenne Twister pseudorandom number
+ generator.
+
+ Before using, initialize the state by using init_genrand64(seed)
+ or init_by_array64(init_key, key_length).
+
+ Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura,
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. The names of its contributors may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ References:
+ T. Nishimura, ``Tables of 64-bit Mersenne Twisters''
+ ACM Transactions on Modeling and
+ Computer Simulation 10. (2000) 348--357.
+ M. Matsumoto and T. Nishimura,
+ ``Mersenne Twister: a 623-dimensionally equidistributed
+ uniform pseudorandom number generator''
+ ACM Transactions on Modeling and
+ Computer Simulation 8. (Jan. 1998) 3--30.
+
+ Any feedback is very welcome.
+ http://www.math.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+ email: m-mat @ math.sci.hiroshima-u.ac.jp (remove spaces)
+*/
+
+
+#include "mt19937-64.h"
+#include <stdio.h>
+
+#define NN 312
+#define MM 156
+#define MATRIX_A 0xB5026F5AA96619E9ULL
+#define UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */
+#define LM 0x7FFFFFFFULL /* Least significant 31 bits */
+
+
+/* The array for the state vector */
+static unsigned long long mt[NN];
+/* mti==NN+1 means mt[NN] is not initialized */
+static int mti=NN+1;
+
+/* initializes mt[NN] with a seed */
+void init_genrand64(unsigned long long seed)
+{
+ mt[0] = seed;
+ for (mti=1; mti<NN; mti++)
+ mt[mti] = (6364136223846793005ULL * (mt[mti-1] ^ (mt[mti-1] >> 62)) + mti);
+}
+
+/* initialize by an array with array-length */
+/* init_key is the array for initializing keys */
+/* key_length is its length */
+void init_by_array64(unsigned long long init_key[],
+ unsigned long long key_length)
+{
+ unsigned long long i, j, k;
+ init_genrand64(19650218ULL);
+ i=1; j=0;
+ k = (NN>key_length ? NN : key_length);
+ for (; k; k--) {
+ mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 62)) * 3935559000370003845ULL))
+ + init_key[j] + j; /* non linear */
+ i++; j++;
+ if (i>=NN) { mt[0] = mt[NN-1]; i=1; }
+ if (j>=key_length) j=0;
+ }
+ for (k=NN-1; k; k--) {
+ mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 62)) * 2862933555777941757ULL))
+ - i; /* non linear */
+ i++;
+ if (i>=NN) { mt[0] = mt[NN-1]; i=1; }
+ }
+
+ mt[0] = 1ULL << 63; /* MSB is 1; assuring non-zero initial array */
+}
+
+/* generates a random number on [0, 2^64-1]-interval */
+unsigned long long genrand64_int64(void)
+{
+ int i;
+ unsigned long long x;
+ static unsigned long long mag01[2]={0ULL, MATRIX_A};
+
+ if (mti >= NN) { /* generate NN words at one time */
+
+ /* if init_genrand64() has not been called, */
+ /* a default initial seed is used */
+ if (mti == NN+1)
+ init_genrand64(5489ULL);
+
+ for (i=0;i<NN-MM;i++) {
+ x = (mt[i]&UM)|(mt[i+1]&LM);
+ mt[i] = mt[i+MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+ }
+ for (;i<NN-1;i++) {
+ x = (mt[i]&UM)|(mt[i+1]&LM);
+ mt[i] = mt[i+(MM-NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+ }
+ x = (mt[NN-1]&UM)|(mt[0]&LM);
+ mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+
+ mti = 0;
+ }
+
+ x = mt[mti++];
+
+ x ^= (x >> 29) & 0x5555555555555555ULL;
+ x ^= (x << 17) & 0x71D67FFFEDA60000ULL;
+ x ^= (x << 37) & 0xFFF7EEE000000000ULL;
+ x ^= (x >> 43);
+
+ return x;
+}
+
+/* generates a random number on [0, 2^63-1]-interval */
+long long genrand64_int63(void)
+{
+ return (long long)(genrand64_int64() >> 1);
+}
+
+/* generates a random number on [0,1]-real-interval */
+double genrand64_real1(void)
+{
+ return (genrand64_int64() >> 11) * (1.0/9007199254740991.0);
+}
+
+/* generates a random number on [0,1)-real-interval */
+double genrand64_real2(void)
+{
+ return (genrand64_int64() >> 11) * (1.0/9007199254740992.0);
+}
+
+/* generates a random number on (0,1)-real-interval */
+double genrand64_real3(void)
+{
+ return ((genrand64_int64() >> 12) + 0.5) * (1.0/4503599627370496.0);
+}
+
+#ifdef MT19937_64_MAIN
+int main(void)
+{
+ int i;
+ unsigned long long init[4]={0x12345ULL, 0x23456ULL, 0x34567ULL, 0x45678ULL}, length=4;
+ init_by_array64(init, length);
+ printf("1000 outputs of genrand64_int64()\n");
+ for (i=0; i<1000; i++) {
+ printf("%20llu ", genrand64_int64());
+ if (i%5==4) printf("\n");
+ }
+ printf("\n1000 outputs of genrand64_real2()\n");
+ for (i=0; i<1000; i++) {
+ printf("%10.8f ", genrand64_real2());
+ if (i%5==4) printf("\n");
+ }
+ return 0;
+}
+#endif
diff --git a/src/mt19937-64.h b/src/mt19937-64.h
new file mode 100644
index 0000000..b98348f
--- /dev/null
+++ b/src/mt19937-64.h
@@ -0,0 +1,87 @@
+/*
+ A C-program for MT19937-64 (2004/9/29 version).
+ Coded by Takuji Nishimura and Makoto Matsumoto.
+
+ This is a 64-bit version of Mersenne Twister pseudorandom number
+ generator.
+
+ Before using, initialize the state by using init_genrand64(seed)
+ or init_by_array64(init_key, key_length).
+
+ Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura,
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. The names of its contributors may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ References:
+ T. Nishimura, ``Tables of 64-bit Mersenne Twisters''
+ ACM Transactions on Modeling and
+ Computer Simulation 10. (2000) 348--357.
+ M. Matsumoto and T. Nishimura,
+ ``Mersenne Twister: a 623-dimensionally equidistributed
+ uniform pseudorandom number generator''
+ ACM Transactions on Modeling and
+ Computer Simulation 8. (Jan. 1998) 3--30.
+
+ Any feedback is very welcome.
+ http://www.math.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+ email: m-mat @ math.sci.hiroshima-u.ac.jp (remove spaces)
+*/
+
+#ifndef __MT19937_64_H
+#define __MT19937_64_H
+
+/* initializes mt[NN] with a seed */
+void init_genrand64(unsigned long long seed);
+
+/* initialize by an array with array-length */
+/* init_key is the array for initializing keys */
+/* key_length is its length */
+void init_by_array64(unsigned long long init_key[],
+ unsigned long long key_length);
+
+/* generates a random number on [0, 2^64-1]-interval */
+unsigned long long genrand64_int64(void);
+
+
+/* generates a random number on [0, 2^63-1]-interval */
+long long genrand64_int63(void);
+
+/* generates a random number on [0,1]-real-interval */
+double genrand64_real1(void);
+
+/* generates a random number on [0,1)-real-interval */
+double genrand64_real2(void);
+
+/* generates a random number on (0,1)-real-interval */
+double genrand64_real3(void);
+
+/* generates a random number on (0,1]-real-interval */
+double genrand64_real4(void);
+
+#endif
diff --git a/src/multi.c b/src/multi.c
new file mode 100644
index 0000000..65d502c
--- /dev/null
+++ b/src/multi.c
@@ -0,0 +1,500 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+/* ================================ MULTI/EXEC ============================== */
+
+/* Client state initialization for MULTI/EXEC */
+void initClientMultiState(client *c) {
+ c->mstate.commands = NULL;
+ c->mstate.count = 0;
+ c->mstate.cmd_flags = 0;
+ c->mstate.cmd_inv_flags = 0;
+ c->mstate.argv_len_sums = 0;
+ c->mstate.alloc_count = 0;
+}
+
+/* Release all the resources associated with MULTI/EXEC state */
+void freeClientMultiState(client *c) {
+ int j;
+
+ for (j = 0; j < c->mstate.count; j++) {
+ int i;
+ multiCmd *mc = c->mstate.commands+j;
+
+ for (i = 0; i < mc->argc; i++)
+ decrRefCount(mc->argv[i]);
+ zfree(mc->argv);
+ }
+ zfree(c->mstate.commands);
+}
+
+/* Add a new command into the MULTI commands queue */
+void queueMultiCommand(client *c, uint64_t cmd_flags) {
+ multiCmd *mc;
+
+ /* No sense to waste memory if the transaction is already aborted.
+ * this is useful in case client sends these in a pipeline, or doesn't
+ * bother to read previous responses and didn't notice the multi was already
+ * aborted. */
+ if (c->flags & (CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC))
+ return;
+ if (c->mstate.count == 0) {
+ /* If a client is using multi/exec, assuming it is used to execute at least
+ * two commands. Hence, creating by default size of 2. */
+ c->mstate.commands = zmalloc(sizeof(multiCmd)*2);
+ c->mstate.alloc_count = 2;
+ }
+ if (c->mstate.count == c->mstate.alloc_count) {
+ c->mstate.alloc_count = c->mstate.alloc_count < INT_MAX/2 ? c->mstate.alloc_count*2 : INT_MAX;
+ c->mstate.commands = zrealloc(c->mstate.commands, sizeof(multiCmd)*(c->mstate.alloc_count));
+ }
+ mc = c->mstate.commands+c->mstate.count;
+ mc->cmd = c->cmd;
+ mc->argc = c->argc;
+ mc->argv = c->argv;
+ mc->argv_len = c->argv_len;
+
+ c->mstate.count++;
+ c->mstate.cmd_flags |= cmd_flags;
+ c->mstate.cmd_inv_flags |= ~cmd_flags;
+ c->mstate.argv_len_sums += c->argv_len_sum + sizeof(robj*)*c->argc;
+
+ /* Reset the client's args since we copied them into the mstate and shouldn't
+ * reference them from c anymore. */
+ c->argv = NULL;
+ c->argc = 0;
+ c->argv_len_sum = 0;
+ c->argv_len = 0;
+}
+
+void discardTransaction(client *c) {
+ freeClientMultiState(c);
+ initClientMultiState(c);
+ c->flags &= ~(CLIENT_MULTI|CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC);
+ unwatchAllKeys(c);
+}
+
+/* Flag the transaction as DIRTY_EXEC so that EXEC will fail.
+ * Should be called every time there is an error while queueing a command. */
+void flagTransaction(client *c) {
+ if (c->flags & CLIENT_MULTI)
+ c->flags |= CLIENT_DIRTY_EXEC;
+}
+
+void multiCommand(client *c) {
+ if (c->flags & CLIENT_MULTI) {
+ addReplyError(c,"MULTI calls can not be nested");
+ return;
+ }
+ c->flags |= CLIENT_MULTI;
+
+ addReply(c,shared.ok);
+}
+
+void discardCommand(client *c) {
+ if (!(c->flags & CLIENT_MULTI)) {
+ addReplyError(c,"DISCARD without MULTI");
+ return;
+ }
+ discardTransaction(c);
+ addReply(c,shared.ok);
+}
+
+/* Aborts a transaction, with a specific error message.
+ * The transaction is always aborted with -EXECABORT so that the client knows
+ * the server exited the multi state, but the actual reason for the abort is
+ * included too.
+ * Note: 'error' may or may not end with \r\n. see addReplyErrorFormat. */
+void execCommandAbort(client *c, sds error) {
+ discardTransaction(c);
+
+ if (error[0] == '-') error++;
+ addReplyErrorFormat(c, "-EXECABORT Transaction discarded because of: %s", error);
+
+ /* Send EXEC to clients waiting data from MONITOR. We did send a MULTI
+ * already, and didn't send any of the queued commands, now we'll just send
+ * EXEC so it is clear that the transaction is over. */
+ replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
+}
+
+void execCommand(client *c) {
+ int j;
+ robj **orig_argv;
+ int orig_argc, orig_argv_len;
+ struct redisCommand *orig_cmd;
+
+ if (!(c->flags & CLIENT_MULTI)) {
+ addReplyError(c,"EXEC without MULTI");
+ return;
+ }
+
+ /* EXEC with expired watched key is disallowed*/
+ if (isWatchedKeyExpired(c)) {
+ c->flags |= (CLIENT_DIRTY_CAS);
+ }
+
+ /* Check if we need to abort the EXEC because:
+ * 1) Some WATCHed key was touched.
+ * 2) There was a previous error while queueing commands.
+ * A failed EXEC in the first case returns a multi bulk nil object
+ * (technically it is not an error but a special behavior), while
+ * in the second an EXECABORT error is returned. */
+ if (c->flags & (CLIENT_DIRTY_CAS | CLIENT_DIRTY_EXEC)) {
+ if (c->flags & CLIENT_DIRTY_EXEC) {
+ addReplyErrorObject(c, shared.execaborterr);
+ } else {
+ addReply(c, shared.nullarray[c->resp]);
+ }
+
+ discardTransaction(c);
+ return;
+ }
+
+ uint64_t old_flags = c->flags;
+
+ /* we do not want to allow blocking commands inside multi */
+ c->flags |= CLIENT_DENY_BLOCKING;
+
+ /* Exec all the queued commands */
+ unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
+
+ server.in_exec = 1;
+
+ orig_argv = c->argv;
+ orig_argv_len = c->argv_len;
+ orig_argc = c->argc;
+ orig_cmd = c->cmd;
+ addReplyArrayLen(c,c->mstate.count);
+ for (j = 0; j < c->mstate.count; j++) {
+ c->argc = c->mstate.commands[j].argc;
+ c->argv = c->mstate.commands[j].argv;
+ c->argv_len = c->mstate.commands[j].argv_len;
+ c->cmd = c->realcmd = c->mstate.commands[j].cmd;
+
+ /* ACL permissions are also checked at the time of execution in case
+ * they were changed after the commands were queued. */
+ int acl_errpos;
+ int acl_retval = ACLCheckAllPerm(c,&acl_errpos);
+ if (acl_retval != ACL_OK) {
+ char *reason;
+ switch (acl_retval) {
+ case ACL_DENIED_CMD:
+ reason = "no permission to execute the command or subcommand";
+ break;
+ case ACL_DENIED_KEY:
+ reason = "no permission to touch the specified keys";
+ break;
+ case ACL_DENIED_CHANNEL:
+ reason = "no permission to access one of the channels used "
+ "as arguments";
+ break;
+ default:
+ reason = "no permission";
+ break;
+ }
+ addACLLogEntry(c,acl_retval,ACL_LOG_CTX_MULTI,acl_errpos,NULL,NULL);
+ addReplyErrorFormat(c,
+ "-NOPERM ACLs rules changed between the moment the "
+ "transaction was accumulated and the EXEC call. "
+ "This command is no longer allowed for the "
+ "following reason: %s", reason);
+ } else {
+ if (c->id == CLIENT_ID_AOF)
+ call(c,CMD_CALL_NONE);
+ else
+ call(c,CMD_CALL_FULL);
+
+ serverAssert((c->flags & CLIENT_BLOCKED) == 0);
+ }
+
+ /* Commands may alter argc/argv, restore mstate. */
+ c->mstate.commands[j].argc = c->argc;
+ c->mstate.commands[j].argv = c->argv;
+ c->mstate.commands[j].argv_len = c->argv_len;
+ c->mstate.commands[j].cmd = c->cmd;
+ }
+
+ // restore old DENY_BLOCKING value
+ if (!(old_flags & CLIENT_DENY_BLOCKING))
+ c->flags &= ~CLIENT_DENY_BLOCKING;
+
+ c->argv = orig_argv;
+ c->argv_len = orig_argv_len;
+ c->argc = orig_argc;
+ c->cmd = c->realcmd = orig_cmd;
+ discardTransaction(c);
+
+ server.in_exec = 0;
+}
+
+/* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
+ *
+ * The implementation uses a per-DB hash table mapping keys to list of clients
+ * WATCHing those keys, so that given a key that is going to be modified
+ * we can mark all the associated clients as dirty.
+ *
+ * Also every client contains a list of WATCHed keys so that's possible to
+ * un-watch such keys when the client is freed or when UNWATCH is called. */
+
+/* The watchedKey struct is included in two lists: the client->watched_keys list,
+ * and db->watched_keys dict (each value in that dict is a list of watchedKey structs).
+ * The list in the client struct is a plain list, where each node's value is a pointer to a watchedKey.
+ * The list in the db db->watched_keys is different, the listnode member that's embedded in this struct
+ * is the node in the dict. And the value inside that listnode is a pointer to the that list, and we can use
+ * struct member offset math to get from the listnode to the watchedKey struct.
+ * This is done to avoid the need for listSearchKey and dictFind when we remove from the list. */
+typedef struct watchedKey {
+ listNode node;
+ robj *key;
+ redisDb *db;
+ client *client;
+ unsigned expired:1; /* Flag that we're watching an already expired key. */
+} watchedKey;
+
+/* Attach a watchedKey to the list of clients watching that key. */
+static inline void watchedKeyLinkToClients(list *clients, watchedKey *wk) {
+ wk->node.value = clients; /* Point the value back to the list */
+ listLinkNodeTail(clients, &wk->node); /* Link the embedded node */
+}
+
+/* Get the list of clients watching that key. */
+static inline list *watchedKeyGetClients(watchedKey *wk) {
+ return listNodeValue(&wk->node); /* embedded node->value points back to the list */
+}
+
+/* Get the node with wk->client in the list of clients watching that key. Actually it
+ * is just the embedded node. */
+static inline listNode *watchedKeyGetClientNode(watchedKey *wk) {
+ return &wk->node;
+}
+
+/* Watch for the specified key */
+void watchForKey(client *c, robj *key) {
+ list *clients = NULL;
+ listIter li;
+ listNode *ln;
+ watchedKey *wk;
+
+ /* Check if we are already watching for this key */
+ listRewind(c->watched_keys,&li);
+ while((ln = listNext(&li))) {
+ wk = listNodeValue(ln);
+ if (wk->db == c->db && equalStringObjects(key,wk->key))
+ return; /* Key already watched */
+ }
+ /* This key is not already watched in this DB. Let's add it */
+ clients = dictFetchValue(c->db->watched_keys,key);
+ if (!clients) {
+ clients = listCreate();
+ dictAdd(c->db->watched_keys,key,clients);
+ incrRefCount(key);
+ }
+ /* Add the new key to the list of keys watched by this client */
+ wk = zmalloc(sizeof(*wk));
+ wk->key = key;
+ wk->client = c;
+ wk->db = c->db;
+ wk->expired = keyIsExpired(c->db, key);
+ incrRefCount(key);
+ listAddNodeTail(c->watched_keys, wk);
+ watchedKeyLinkToClients(clients, wk);
+}
+
+/* Unwatch all the keys watched by this client. To clean the EXEC dirty
+ * flag is up to the caller. */
+void unwatchAllKeys(client *c) {
+ listIter li;
+ listNode *ln;
+
+ if (listLength(c->watched_keys) == 0) return;
+ listRewind(c->watched_keys,&li);
+ while((ln = listNext(&li))) {
+ list *clients;
+ watchedKey *wk;
+
+ /* Remove the client's wk from the list of clients watching the key. */
+ wk = listNodeValue(ln);
+ clients = watchedKeyGetClients(wk);
+ serverAssertWithInfo(c,NULL,clients != NULL);
+ listUnlinkNode(clients, watchedKeyGetClientNode(wk));
+ /* Kill the entry at all if this was the only client */
+ if (listLength(clients) == 0)
+ dictDelete(wk->db->watched_keys, wk->key);
+ /* Remove this watched key from the client->watched list */
+ listDelNode(c->watched_keys,ln);
+ decrRefCount(wk->key);
+ zfree(wk);
+ }
+}
+
+/* Iterates over the watched_keys list and looks for an expired key. Keys which
+ * were expired already when WATCH was called are ignored. */
+int isWatchedKeyExpired(client *c) {
+ listIter li;
+ listNode *ln;
+ watchedKey *wk;
+ if (listLength(c->watched_keys) == 0) return 0;
+ listRewind(c->watched_keys,&li);
+ while ((ln = listNext(&li))) {
+ wk = listNodeValue(ln);
+ if (wk->expired) continue; /* was expired when WATCH was called */
+ if (keyIsExpired(wk->db, wk->key)) return 1;
+ }
+
+ return 0;
+}
+
+/* "Touch" a key, so that if this key is being WATCHed by some client the
+ * next EXEC will fail. */
+void touchWatchedKey(redisDb *db, robj *key) {
+ list *clients;
+ listIter li;
+ listNode *ln;
+
+ if (dictSize(db->watched_keys) == 0) return;
+ clients = dictFetchValue(db->watched_keys, key);
+ if (!clients) return;
+
+ /* Mark all the clients watching this key as CLIENT_DIRTY_CAS */
+ /* Check if we are already watching for this key */
+ listRewind(clients,&li);
+ while((ln = listNext(&li))) {
+ watchedKey *wk = redis_member2struct(watchedKey, node, ln);
+ client *c = wk->client;
+
+ if (wk->expired) {
+ /* The key was already expired when WATCH was called. */
+ if (db == wk->db &&
+ equalStringObjects(key, wk->key) &&
+ dictFind(db->dict, key->ptr) == NULL)
+ {
+ /* Already expired key is deleted, so logically no change. Clear
+ * the flag. Deleted keys are not flagged as expired. */
+ wk->expired = 0;
+ goto skip_client;
+ }
+ break;
+ }
+
+ c->flags |= CLIENT_DIRTY_CAS;
+ /* As the client is marked as dirty, there is no point in getting here
+ * again in case that key (or others) are modified again (or keep the
+ * memory overhead till EXEC). */
+ unwatchAllKeys(c);
+
+ skip_client:
+ continue;
+ }
+}
+
+/* Set CLIENT_DIRTY_CAS to all clients of DB when DB is dirty.
+ * It may happen in the following situations:
+ * FLUSHDB, FLUSHALL, SWAPDB, end of successful diskless replication.
+ *
+ * replaced_with: for SWAPDB, the WATCH should be invalidated if
+ * the key exists in either of them, and skipped only if it
+ * doesn't exist in both. */
+void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with) {
+ listIter li;
+ listNode *ln;
+ dictEntry *de;
+
+ if (dictSize(emptied->watched_keys) == 0) return;
+
+ dictIterator *di = dictGetSafeIterator(emptied->watched_keys);
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ int exists_in_emptied = dictFind(emptied->dict, key->ptr) != NULL;
+ if (exists_in_emptied ||
+ (replaced_with && dictFind(replaced_with->dict, key->ptr)))
+ {
+ list *clients = dictGetVal(de);
+ if (!clients) continue;
+ listRewind(clients,&li);
+ while((ln = listNext(&li))) {
+ watchedKey *wk = redis_member2struct(watchedKey, node, ln);
+ if (wk->expired) {
+ if (!replaced_with || !dictFind(replaced_with->dict, key->ptr)) {
+ /* Expired key now deleted. No logical change. Clear the
+ * flag. Deleted keys are not flagged as expired. */
+ wk->expired = 0;
+ continue;
+ } else if (keyIsExpired(replaced_with, key)) {
+ /* Expired key remains expired. */
+ continue;
+ }
+ } else if (!exists_in_emptied && keyIsExpired(replaced_with, key)) {
+ /* Non-existing key is replaced with an expired key. */
+ wk->expired = 1;
+ continue;
+ }
+ client *c = wk->client;
+ c->flags |= CLIENT_DIRTY_CAS;
+ /* Note - we could potentially call unwatchAllKeys for this specific client in order to reduce
+ * the total number of iterations. BUT this could also free the current next entry pointer
+ * held by the iterator and can lead to use-after-free. */
+ }
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+void watchCommand(client *c) {
+ int j;
+
+ if (c->flags & CLIENT_MULTI) {
+ addReplyError(c,"WATCH inside MULTI is not allowed");
+ return;
+ }
+ /* No point in watching if the client is already dirty. */
+ if (c->flags & CLIENT_DIRTY_CAS) {
+ addReply(c,shared.ok);
+ return;
+ }
+ for (j = 1; j < c->argc; j++)
+ watchForKey(c,c->argv[j]);
+ addReply(c,shared.ok);
+}
+
+void unwatchCommand(client *c) {
+ unwatchAllKeys(c);
+ c->flags &= (~CLIENT_DIRTY_CAS);
+ addReply(c,shared.ok);
+}
+
+size_t multiStateMemOverhead(client *c) {
+ size_t mem = c->mstate.argv_len_sums;
+ /* Add watched keys overhead, Note: this doesn't take into account the watched keys themselves, because they aren't managed per-client. */
+ mem += listLength(c->watched_keys) * (sizeof(listNode) + sizeof(watchedKey));
+ /* Reserved memory for queued multi commands. */
+ mem += c->mstate.alloc_count * sizeof(multiCmd);
+ return mem;
+}
diff --git a/src/networking.c b/src/networking.c
new file mode 100644
index 0000000..7696e8c
--- /dev/null
+++ b/src/networking.c
@@ -0,0 +1,4578 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "atomicvar.h"
+#include "cluster.h"
+#include "script.h"
+#include "fpconv_dtoa.h"
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <math.h>
+#include <ctype.h>
+
+static void setProtocolError(const char *errstr, client *c);
+static void pauseClientsByClient(mstime_t end, int isPauseClientAll);
+int postponeClientRead(client *c);
+char *getClientSockname(client *c);
+int ProcessingEventsWhileBlocked = 0; /* See processEventsWhileBlocked(). */
+
+/* Return the size consumed from the allocator, for the specified SDS string,
+ * including internal fragmentation. This function is used in order to compute
+ * the client output buffer size. */
+size_t sdsZmallocSize(sds s) {
+ void *sh = sdsAllocPtr(s);
+ return zmalloc_size(sh);
+}
+
+/* Return the amount of memory used by the sds string at object->ptr
+ * for a string object. This includes internal fragmentation. */
+size_t getStringObjectSdsUsedMemory(robj *o) {
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
+ switch(o->encoding) {
+ case OBJ_ENCODING_RAW: return sdsZmallocSize(o->ptr);
+ case OBJ_ENCODING_EMBSTR: return zmalloc_size(o)-sizeof(robj);
+ default: return 0; /* Just integer encoding for now. */
+ }
+}
+
+/* Return the length of a string object.
+ * This does NOT includes internal fragmentation or sds unused space. */
+size_t getStringObjectLen(robj *o) {
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
+ switch(o->encoding) {
+ case OBJ_ENCODING_RAW: return sdslen(o->ptr);
+ case OBJ_ENCODING_EMBSTR: return sdslen(o->ptr);
+ default: return 0; /* Just integer encoding for now. */
+ }
+}
+
+/* Client.reply list dup and free methods. */
+void *dupClientReplyValue(void *o) {
+ clientReplyBlock *old = o;
+ clientReplyBlock *buf = zmalloc(sizeof(clientReplyBlock) + old->size);
+ memcpy(buf, o, sizeof(clientReplyBlock) + old->size);
+ return buf;
+}
+
+void freeClientReplyValue(void *o) {
+ zfree(o);
+}
+
+/* This function links the client to the global linked list of clients.
+ * unlinkClient() does the opposite, among other things. */
+void linkClient(client *c) {
+ listAddNodeTail(server.clients,c);
+ /* Note that we remember the linked list node where the client is stored,
+ * this way removing the client in unlinkClient() will not require
+ * a linear scan, but just a constant time operation. */
+ c->client_list_node = listLast(server.clients);
+ uint64_t id = htonu64(c->id);
+ raxInsert(server.clients_index,(unsigned char*)&id,sizeof(id),c,NULL);
+}
+
+/* Initialize client authentication state.
+ */
+static void clientSetDefaultAuth(client *c) {
+ /* If the default user does not require authentication, the user is
+ * directly authenticated. */
+ c->user = DefaultUser;
+ c->authenticated = (c->user->flags & USER_FLAG_NOPASS) &&
+ !(c->user->flags & USER_FLAG_DISABLED);
+}
+
+int authRequired(client *c) {
+ /* Check if the user is authenticated. This check is skipped in case
+ * the default user is flagged as "nopass" and is active. */
+ int auth_required = (!(DefaultUser->flags & USER_FLAG_NOPASS) ||
+ (DefaultUser->flags & USER_FLAG_DISABLED)) &&
+ !c->authenticated;
+ return auth_required;
+}
+
+client *createClient(connection *conn) {
+ client *c = zmalloc(sizeof(client));
+
+ /* passing NULL as conn it is possible to create a non connected client.
+ * This is useful since all the commands needs to be executed
+ * in the context of a client. When commands are executed in other
+ * contexts (for instance a Lua script) we need a non connected client. */
+ if (conn) {
+ connEnableTcpNoDelay(conn);
+ if (server.tcpkeepalive)
+ connKeepAlive(conn,server.tcpkeepalive);
+ connSetReadHandler(conn, readQueryFromClient);
+ connSetPrivateData(conn, c);
+ }
+ c->buf = zmalloc_usable(PROTO_REPLY_CHUNK_BYTES, &c->buf_usable_size);
+ selectDb(c,0);
+ uint64_t client_id;
+ atomicGetIncr(server.next_client_id, client_id, 1);
+ c->id = client_id;
+#ifdef LOG_REQ_RES
+ reqresReset(c, 0);
+ c->resp = server.client_default_resp;
+#else
+ c->resp = 2;
+#endif
+ c->conn = conn;
+ c->name = NULL;
+ c->lib_name = NULL;
+ c->lib_ver = NULL;
+ c->bufpos = 0;
+ c->buf_peak = c->buf_usable_size;
+ c->buf_peak_last_reset_time = server.unixtime;
+ c->ref_repl_buf_node = NULL;
+ c->ref_block_pos = 0;
+ c->qb_pos = 0;
+ c->querybuf = sdsempty();
+ c->querybuf_peak = 0;
+ c->reqtype = 0;
+ c->argc = 0;
+ c->argv = NULL;
+ c->argv_len = 0;
+ c->argv_len_sum = 0;
+ c->original_argc = 0;
+ c->original_argv = NULL;
+ c->cmd = c->lastcmd = c->realcmd = NULL;
+ c->cur_script = NULL;
+ c->multibulklen = 0;
+ c->bulklen = -1;
+ c->sentlen = 0;
+ c->flags = 0;
+ c->slot = -1;
+ c->ctime = c->lastinteraction = server.unixtime;
+ c->duration = 0;
+ clientSetDefaultAuth(c);
+ c->replstate = REPL_STATE_NONE;
+ c->repl_start_cmd_stream_on_ack = 0;
+ c->reploff = 0;
+ c->read_reploff = 0;
+ c->repl_applied = 0;
+ c->repl_ack_off = 0;
+ c->repl_ack_time = 0;
+ c->repl_aof_off = 0;
+ c->repl_last_partial_write = 0;
+ c->slave_listening_port = 0;
+ c->slave_addr = NULL;
+ c->slave_capa = SLAVE_CAPA_NONE;
+ c->slave_req = SLAVE_REQ_NONE;
+ c->reply = listCreate();
+ c->deferred_reply_errors = NULL;
+ c->reply_bytes = 0;
+ c->obuf_soft_limit_reached_time = 0;
+ listSetFreeMethod(c->reply,freeClientReplyValue);
+ listSetDupMethod(c->reply,dupClientReplyValue);
+ initClientBlockingState(c);
+ c->woff = 0;
+ c->watched_keys = listCreate();
+ c->pubsub_channels = dictCreate(&objectKeyPointerValueDictType);
+ c->pubsub_patterns = dictCreate(&objectKeyPointerValueDictType);
+ c->pubsubshard_channels = dictCreate(&objectKeyPointerValueDictType);
+ c->peerid = NULL;
+ c->sockname = NULL;
+ c->client_list_node = NULL;
+ c->postponed_list_node = NULL;
+ c->pending_read_list_node = NULL;
+ c->client_tracking_redirection = 0;
+ c->client_tracking_prefixes = NULL;
+ c->last_memory_usage = 0;
+ c->last_memory_type = CLIENT_TYPE_NORMAL;
+ c->module_blocked_client = NULL;
+ c->module_auth_ctx = NULL;
+ c->auth_callback = NULL;
+ c->auth_callback_privdata = NULL;
+ c->auth_module = NULL;
+ listInitNode(&c->clients_pending_write_node, c);
+ c->mem_usage_bucket = NULL;
+ c->mem_usage_bucket_node = NULL;
+ if (conn) linkClient(c);
+ initClientMultiState(c);
+ return c;
+}
+
+void installClientWriteHandler(client *c) {
+ int ae_barrier = 0;
+ /* For the fsync=always policy, we want that a given FD is never
+ * served for reading and writing in the same event loop iteration,
+ * so that in the middle of receiving the query, and serving it
+ * to the client, we'll call beforeSleep() that will do the
+ * actual fsync of AOF to disk. the write barrier ensures that. */
+ if (server.aof_state == AOF_ON &&
+ server.aof_fsync == AOF_FSYNC_ALWAYS)
+ {
+ ae_barrier = 1;
+ }
+ if (connSetWriteHandlerWithBarrier(c->conn, sendReplyToClient, ae_barrier) == C_ERR) {
+ freeClientAsync(c);
+ }
+}
+
+/* This function puts the client in the queue of clients that should write
+ * their output buffers to the socket. Note that it does not *yet* install
+ * the write handler, to start clients are put in a queue of clients that need
+ * to write, so we try to do that before returning in the event loop (see the
+ * handleClientsWithPendingWrites() function).
+ * If we fail and there is more data to write, compared to what the socket
+ * buffers can hold, then we'll really install the handler. */
+void putClientInPendingWriteQueue(client *c) {
+ /* Schedule the client to write the output buffers to the socket only
+ * if not already done and, for slaves, if the slave can actually receive
+ * writes at this stage. */
+ if (!(c->flags & CLIENT_PENDING_WRITE) &&
+ (c->replstate == REPL_STATE_NONE ||
+ (c->replstate == SLAVE_STATE_ONLINE && !c->repl_start_cmd_stream_on_ack)))
+ {
+ /* Here instead of installing the write handler, we just flag the
+ * client and put it into a list of clients that have something
+ * to write to the socket. This way before re-entering the event
+ * loop, we can try to directly write to the client sockets avoiding
+ * a system call. We'll only really install the write handler if
+ * we'll not be able to write the whole reply at once. */
+ c->flags |= CLIENT_PENDING_WRITE;
+ listLinkNodeHead(server.clients_pending_write, &c->clients_pending_write_node);
+ }
+}
+
+/* This function is called every time we are going to transmit new data
+ * to the client. The behavior is the following:
+ *
+ * If the client should receive new data (normal clients will) the function
+ * returns C_OK, and make sure to install the write handler in our event
+ * loop so that when the socket is writable new data gets written.
+ *
+ * If the client should not receive new data, because it is a fake client
+ * (used to load AOF in memory), a master or because the setup of the write
+ * handler failed, the function returns C_ERR.
+ *
+ * The function may return C_OK without actually installing the write
+ * event handler in the following cases:
+ *
+ * 1) The event handler should already be installed since the output buffer
+ * already contains something.
+ * 2) The client is a slave but not yet online, so we want to just accumulate
+ * writes in the buffer but not actually sending them yet.
+ *
+ * Typically gets called every time a reply is built, before adding more
+ * data to the clients output buffers. If the function returns C_ERR no
+ * data should be appended to the output buffers. */
+int prepareClientToWrite(client *c) {
+ /* If it's the Lua client we always return ok without installing any
+ * handler since there is no socket at all. */
+ if (c->flags & (CLIENT_SCRIPT|CLIENT_MODULE)) return C_OK;
+
+ /* If CLIENT_CLOSE_ASAP flag is set, we need not write anything. */
+ if (c->flags & CLIENT_CLOSE_ASAP) return C_ERR;
+
+ /* CLIENT REPLY OFF / SKIP handling: don't send replies.
+ * CLIENT_PUSHING handling: disables the reply silencing flags. */
+ if ((c->flags & (CLIENT_REPLY_OFF|CLIENT_REPLY_SKIP)) &&
+ !(c->flags & CLIENT_PUSHING)) return C_ERR;
+
+ /* Masters don't receive replies, unless CLIENT_MASTER_FORCE_REPLY flag
+ * is set. */
+ if ((c->flags & CLIENT_MASTER) &&
+ !(c->flags & CLIENT_MASTER_FORCE_REPLY)) return C_ERR;
+
+ if (!c->conn) return C_ERR; /* Fake client for AOF loading. */
+
+ /* Schedule the client to write the output buffers to the socket, unless
+ * it should already be setup to do so (it has already pending data).
+ *
+ * If CLIENT_PENDING_READ is set, we're in an IO thread and should
+ * not put the client in pending write queue. Instead, it will be
+ * done by handleClientsWithPendingReadsUsingThreads() upon return.
+ */
+ if (!clientHasPendingReplies(c) && io_threads_op == IO_THREADS_OP_IDLE)
+ putClientInPendingWriteQueue(c);
+
+ /* Authorize the caller to queue in the output buffer of this client. */
+ return C_OK;
+}
+
+/* -----------------------------------------------------------------------------
+ * Low level functions to add more data to output buffers.
+ * -------------------------------------------------------------------------- */
+
+/* Attempts to add the reply to the static buffer in the client struct.
+ * Returns the length of data that is added to the reply buffer.
+ *
+ * Sanitizer suppression: client->buf_usable_size determined by
+ * zmalloc_usable_size() call. Writing beyond client->buf boundaries confuses
+ * sanitizer and generates a false positive out-of-bounds error */
+REDIS_NO_SANITIZE("bounds")
+size_t _addReplyToBuffer(client *c, const char *s, size_t len) {
+ size_t available = c->buf_usable_size - c->bufpos;
+
+ /* If there already are entries in the reply list, we cannot
+ * add anything more to the static buffer. */
+ if (listLength(c->reply) > 0) return 0;
+
+ size_t reply_len = len > available ? available : len;
+ memcpy(c->buf+c->bufpos,s,reply_len);
+ c->bufpos+=reply_len;
+ /* We update the buffer peak after appending the reply to the buffer */
+ if(c->buf_peak < (size_t)c->bufpos)
+ c->buf_peak = (size_t)c->bufpos;
+ return reply_len;
+}
+
+/* Adds the reply to the reply linked list.
+ * Note: some edits to this function need to be relayed to AddReplyFromClient. */
+void _addReplyProtoToList(client *c, list *reply_list, const char *s, size_t len) {
+ listNode *ln = listLast(reply_list);
+ clientReplyBlock *tail = ln? listNodeValue(ln): NULL;
+
+ /* Note that 'tail' may be NULL even if we have a tail node, because when
+ * addReplyDeferredLen() is used, it sets a dummy node to NULL just
+ * to fill it later, when the size of the bulk length is set. */
+
+ /* Append to tail string when possible. */
+ if (tail) {
+ /* Copy the part we can fit into the tail, and leave the rest for a
+ * new node */
+ size_t avail = tail->size - tail->used;
+ size_t copy = avail >= len? len: avail;
+ memcpy(tail->buf + tail->used, s, copy);
+ tail->used += copy;
+ s += copy;
+ len -= copy;
+ }
+ if (len) {
+ /* Create a new node, make sure it is allocated to at
+ * least PROTO_REPLY_CHUNK_BYTES */
+ size_t usable_size;
+ size_t size = len < PROTO_REPLY_CHUNK_BYTES? PROTO_REPLY_CHUNK_BYTES: len;
+ tail = zmalloc_usable(size + sizeof(clientReplyBlock), &usable_size);
+ /* take over the allocation's internal fragmentation */
+ tail->size = usable_size - sizeof(clientReplyBlock);
+ tail->used = len;
+ memcpy(tail->buf, s, len);
+ listAddNodeTail(reply_list, tail);
+ c->reply_bytes += tail->size;
+
+ closeClientOnOutputBufferLimitReached(c, 1);
+ }
+}
+
+/* The subscribe / unsubscribe command family has a push as a reply,
+ * or in other words, it responds with a push (or several of them
+ * depending on how many arguments it got), and has no reply. */
+int cmdHasPushAsReply(struct redisCommand *cmd) {
+ if (!cmd) return 0;
+ return cmd->proc == subscribeCommand || cmd->proc == unsubscribeCommand ||
+ cmd->proc == psubscribeCommand || cmd->proc == punsubscribeCommand ||
+ cmd->proc == ssubscribeCommand || cmd->proc == sunsubscribeCommand;
+}
+
+void _addReplyToBufferOrList(client *c, const char *s, size_t len) {
+ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return;
+
+ /* Replicas should normally not cause any writes to the reply buffer. In case a rogue replica sent a command on the
+ * replication link that caused a reply to be generated we'll simply disconnect it.
+ * Note this is the simplest way to check a command added a response. Replication links are used to write data but
+ * not for responses, so we should normally never get here on a replica client. */
+ if (getClientType(c) == CLIENT_TYPE_SLAVE) {
+ sds cmdname = c->lastcmd ? c->lastcmd->fullname : NULL;
+ logInvalidUseAndFreeClientAsync(c, "Replica generated a reply to command '%s'",
+ cmdname ? cmdname : "<unknown>");
+ return;
+ }
+
+ /* We call it here because this function may affect the reply
+ * buffer offset (see function comment) */
+ reqresSaveClientReplyOffset(c);
+
+ /* If we're processing a push message into the current client (i.e. executing PUBLISH
+ * to a channel which we are subscribed to, then we wanna postpone that message to be added
+ * after the command's reply (specifically important during multi-exec). the exception is
+ * the SUBSCRIBE command family, which (currently) have a push message instead of a proper reply.
+ * The check for executing_client also avoids affecting push messages that are part of eviction. */
+ if (c == server.current_client && (c->flags & CLIENT_PUSHING) &&
+ server.executing_client && !cmdHasPushAsReply(server.executing_client->cmd))
+ {
+ _addReplyProtoToList(c,server.pending_push_messages,s,len);
+ return;
+ }
+
+ size_t reply_len = _addReplyToBuffer(c,s,len);
+ if (len > reply_len) _addReplyProtoToList(c,c->reply,s+reply_len,len-reply_len);
+}
+
+/* -----------------------------------------------------------------------------
+ * Higher level functions to queue data on the client output buffer.
+ * The following functions are the ones that commands implementations will call.
+ * -------------------------------------------------------------------------- */
+
+/* Add the object 'obj' string representation to the client output buffer. */
+void addReply(client *c, robj *obj) {
+ if (prepareClientToWrite(c) != C_OK) return;
+
+ if (sdsEncodedObject(obj)) {
+ _addReplyToBufferOrList(c,obj->ptr,sdslen(obj->ptr));
+ } else if (obj->encoding == OBJ_ENCODING_INT) {
+ /* For integer encoded strings we just convert it into a string
+ * using our optimized function, and attach the resulting string
+ * to the output buffer. */
+ char buf[32];
+ size_t len = ll2string(buf,sizeof(buf),(long)obj->ptr);
+ _addReplyToBufferOrList(c,buf,len);
+ } else {
+ serverPanic("Wrong obj->encoding in addReply()");
+ }
+}
+
+/* Add the SDS 's' string to the client output buffer, as a side effect
+ * the SDS string is freed. */
+void addReplySds(client *c, sds s) {
+ if (prepareClientToWrite(c) != C_OK) {
+ /* The caller expects the sds to be free'd. */
+ sdsfree(s);
+ return;
+ }
+ _addReplyToBufferOrList(c,s,sdslen(s));
+ sdsfree(s);
+}
+
+/* This low level function just adds whatever protocol you send it to the
+ * client buffer, trying the static buffer initially, and using the string
+ * of objects if not possible.
+ *
+ * It is efficient because does not create an SDS object nor an Redis object
+ * if not needed. The object will only be created by calling
+ * _addReplyProtoToList() if we fail to extend the existing tail object
+ * in the list of objects. */
+void addReplyProto(client *c, const char *s, size_t len) {
+ if (prepareClientToWrite(c) != C_OK) return;
+ _addReplyToBufferOrList(c,s,len);
+}
+
+/* Low level function called by the addReplyError...() functions.
+ * It emits the protocol for a Redis error, in the form:
+ *
+ * -ERRORCODE Error Message<CR><LF>
+ *
+ * If the error code is already passed in the string 's', the error
+ * code provided is used, otherwise the string "-ERR " for the generic
+ * error code is automatically added.
+ * Note that 's' must NOT end with \r\n. */
+void addReplyErrorLength(client *c, const char *s, size_t len) {
+ /* If the string already starts with "-..." then the error code
+ * is provided by the caller. Otherwise we use "-ERR". */
+ if (!len || s[0] != '-') addReplyProto(c,"-ERR ",5);
+ addReplyProto(c,s,len);
+ addReplyProto(c,"\r\n",2);
+}
+
+/* Do some actions after an error reply was sent (Log if needed, updates stats, etc.)
+ * Possible flags:
+ * * ERR_REPLY_FLAG_NO_STATS_UPDATE - indicate not to update any error stats. */
+void afterErrorReply(client *c, const char *s, size_t len, int flags) {
+ /* Module clients fall into two categories:
+ * Calls to RM_Call, in which case the error isn't being returned to a client, so should not be counted.
+ * Module thread safe context calls to RM_ReplyWithError, which will be added to a real client by the main thread later. */
+ if (c->flags & CLIENT_MODULE) {
+ if (!c->deferred_reply_errors) {
+ c->deferred_reply_errors = listCreate();
+ listSetFreeMethod(c->deferred_reply_errors, (void (*)(void*))sdsfree);
+ }
+ listAddNodeTail(c->deferred_reply_errors, sdsnewlen(s, len));
+ return;
+ }
+
+ if (!(flags & ERR_REPLY_FLAG_NO_STATS_UPDATE)) {
+ /* Increment the global error counter */
+ server.stat_total_error_replies++;
+ /* Increment the error stats
+ * If the string already starts with "-..." then the error prefix
+ * is provided by the caller ( we limit the search to 32 chars). Otherwise we use "-ERR". */
+ if (s[0] != '-') {
+ incrementErrorCount("ERR", 3);
+ } else {
+ char *spaceloc = memchr(s, ' ', len < 32 ? len : 32);
+ if (spaceloc) {
+ const size_t errEndPos = (size_t)(spaceloc - s);
+ incrementErrorCount(s+1, errEndPos-1);
+ } else {
+ /* Fallback to ERR if we can't retrieve the error prefix */
+ incrementErrorCount("ERR", 3);
+ }
+ }
+ } else {
+ /* stat_total_error_replies will not be updated, which means that
+ * the cmd stats will not be updated as well, we still want this command
+ * to be counted as failed so we update it here. We update c->realcmd in
+ * case c->cmd was changed (like in GEOADD). */
+ c->realcmd->failed_calls++;
+ }
+
+ /* Sometimes it could be normal that a slave replies to a master with
+ * an error and this function gets called. Actually the error will never
+ * be sent because addReply*() against master clients has no effect...
+ * A notable example is:
+ *
+ * EVAL 'redis.call("incr",KEYS[1]); redis.call("nonexisting")' 1 x
+ *
+ * Where the master must propagate the first change even if the second
+ * will produce an error. However it is useful to log such events since
+ * they are rare and may hint at errors in a script or a bug in Redis. */
+ int ctype = getClientType(c);
+ if (ctype == CLIENT_TYPE_MASTER || ctype == CLIENT_TYPE_SLAVE || c->id == CLIENT_ID_AOF) {
+ char *to, *from;
+
+ if (c->id == CLIENT_ID_AOF) {
+ to = "AOF-loading-client";
+ from = "server";
+ } else if (ctype == CLIENT_TYPE_MASTER) {
+ to = "master";
+ from = "replica";
+ } else {
+ to = "replica";
+ from = "master";
+ }
+
+ if (len > 4096) len = 4096;
+ sds cmdname = c->lastcmd ? c->lastcmd->fullname : NULL;
+ serverLog(LL_WARNING,"== CRITICAL == This %s is sending an error "
+ "to its %s: '%.*s' after processing the command "
+ "'%s'", from, to, (int)len, s, cmdname ? cmdname : "<unknown>");
+ if (ctype == CLIENT_TYPE_MASTER && server.repl_backlog &&
+ server.repl_backlog->histlen > 0)
+ {
+ showLatestBacklog();
+ }
+ server.stat_unexpected_error_replies++;
+
+ /* Based off the propagation error behavior, check if we need to panic here. There
+ * are currently two checked cases:
+ * * If this command was from our master and we are not a writable replica.
+ * * We are reading from an AOF file. */
+ int panic_in_replicas = (ctype == CLIENT_TYPE_MASTER && server.repl_slave_ro)
+ && (server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC ||
+ server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC_ON_REPLICAS);
+ int panic_in_aof = c->id == CLIENT_ID_AOF
+ && server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC;
+ if (panic_in_replicas || panic_in_aof) {
+ serverPanic("This %s panicked sending an error to its %s"
+ " after processing the command '%s'",
+ from, to, cmdname ? cmdname : "<unknown>");
+ }
+ }
+}
+
+/* The 'err' object is expected to start with -ERRORCODE and end with \r\n.
+ * Unlike addReplyErrorSds and others alike which rely on addReplyErrorLength. */
+void addReplyErrorObject(client *c, robj *err) {
+ addReply(c, err);
+ afterErrorReply(c, err->ptr, sdslen(err->ptr)-2, 0); /* Ignore trailing \r\n */
+}
+
+/* Sends either a reply or an error reply by checking the first char.
+ * If the first char is '-' the reply is considered an error.
+ * In any case the given reply is sent, if the reply is also recognize
+ * as an error we also perform some post reply operations such as
+ * logging and stats update. */
+void addReplyOrErrorObject(client *c, robj *reply) {
+ serverAssert(sdsEncodedObject(reply));
+ sds rep = reply->ptr;
+ if (sdslen(rep) > 1 && rep[0] == '-') {
+ addReplyErrorObject(c, reply);
+ } else {
+ addReply(c, reply);
+ }
+}
+
+/* See addReplyErrorLength for expectations from the input string. */
+void addReplyError(client *c, const char *err) {
+ addReplyErrorLength(c,err,strlen(err));
+ afterErrorReply(c,err,strlen(err),0);
+}
+
+/* Add error reply to the given client.
+ * Supported flags:
+ * * ERR_REPLY_FLAG_NO_STATS_UPDATE - indicate not to perform any error stats updates */
+void addReplyErrorSdsEx(client *c, sds err, int flags) {
+ addReplyErrorLength(c,err,sdslen(err));
+ afterErrorReply(c,err,sdslen(err),flags);
+ sdsfree(err);
+}
+
+/* See addReplyErrorLength for expectations from the input string. */
+/* As a side effect the SDS string is freed. */
+void addReplyErrorSds(client *c, sds err) {
+ addReplyErrorSdsEx(c, err, 0);
+}
+
+/* See addReplyErrorLength for expectations from the input string. */
+/* As a side effect the SDS string is freed. */
+void addReplyErrorSdsSafe(client *c, sds err) {
+ err = sdsmapchars(err, "\r\n", " ", 2);
+ addReplyErrorSdsEx(c, err, 0);
+}
+
+/* Internal function used by addReplyErrorFormat, addReplyErrorFormatEx and RM_ReplyWithErrorFormat.
+ * Refer to afterErrorReply for more information about the flags. */
+void addReplyErrorFormatInternal(client *c, int flags, const char *fmt, va_list ap) {
+ va_list cpy;
+ va_copy(cpy,ap);
+ sds s = sdscatvprintf(sdsempty(),fmt,cpy);
+ va_end(cpy);
+ /* Trim any newlines at the end (ones will be added by addReplyErrorLength) */
+ s = sdstrim(s, "\r\n");
+ /* Make sure there are no newlines in the middle of the string, otherwise
+ * invalid protocol is emitted. */
+ s = sdsmapchars(s, "\r\n", " ", 2);
+ addReplyErrorLength(c,s,sdslen(s));
+ afterErrorReply(c,s,sdslen(s),flags);
+ sdsfree(s);
+}
+
+void addReplyErrorFormatEx(client *c, int flags, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap,fmt);
+ addReplyErrorFormatInternal(c, flags, fmt, ap);
+ va_end(ap);
+}
+
+/* See addReplyErrorLength for expectations from the formatted string.
+ * The formatted string is safe to contain \r and \n anywhere. */
+void addReplyErrorFormat(client *c, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap,fmt);
+ addReplyErrorFormatInternal(c, 0, fmt, ap);
+ va_end(ap);
+}
+
+void addReplyErrorArity(client *c) {
+ addReplyErrorFormat(c, "wrong number of arguments for '%s' command",
+ c->cmd->fullname);
+}
+
+void addReplyErrorExpireTime(client *c) {
+ addReplyErrorFormat(c, "invalid expire time in '%s' command",
+ c->cmd->fullname);
+}
+
+void addReplyStatusLength(client *c, const char *s, size_t len) {
+ addReplyProto(c,"+",1);
+ addReplyProto(c,s,len);
+ addReplyProto(c,"\r\n",2);
+}
+
+void addReplyStatus(client *c, const char *status) {
+ addReplyStatusLength(c,status,strlen(status));
+}
+
+void addReplyStatusFormat(client *c, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap,fmt);
+ sds s = sdscatvprintf(sdsempty(),fmt,ap);
+ va_end(ap);
+ addReplyStatusLength(c,s,sdslen(s));
+ sdsfree(s);
+}
+
+/* Sometimes we are forced to create a new reply node, and we can't append to
+ * the previous one, when that happens, we wanna try to trim the unused space
+ * at the end of the last reply node which we won't use anymore. */
+void trimReplyUnusedTailSpace(client *c) {
+ listNode *ln = listLast(c->reply);
+ clientReplyBlock *tail = ln? listNodeValue(ln): NULL;
+
+ /* Note that 'tail' may be NULL even if we have a tail node, because when
+ * addReplyDeferredLen() is used */
+ if (!tail) return;
+
+ /* We only try to trim the space is relatively high (more than a 1/4 of the
+ * allocation), otherwise there's a high chance realloc will NOP.
+ * Also, to avoid large memmove which happens as part of realloc, we only do
+ * that if the used part is small. */
+ if (tail->size - tail->used > tail->size / 4 &&
+ tail->used < PROTO_REPLY_CHUNK_BYTES)
+ {
+ size_t usable_size;
+ size_t old_size = tail->size;
+ tail = zrealloc_usable(tail, tail->used + sizeof(clientReplyBlock), &usable_size);
+ /* take over the allocation's internal fragmentation (at least for
+ * memory usage tracking) */
+ tail->size = usable_size - sizeof(clientReplyBlock);
+ c->reply_bytes = c->reply_bytes + tail->size - old_size;
+ listNodeValue(ln) = tail;
+ }
+}
+
+/* Adds an empty object to the reply list that will contain the multi bulk
+ * length, which is not known when this function is called. */
+void *addReplyDeferredLen(client *c) {
+ /* Note that we install the write event here even if the object is not
+ * ready to be sent, since we are sure that before returning to the
+ * event loop setDeferredAggregateLen() will be called. */
+ if (prepareClientToWrite(c) != C_OK) return NULL;
+
+ /* Replicas should normally not cause any writes to the reply buffer. In case a rogue replica sent a command on the
+ * replication link that caused a reply to be generated we'll simply disconnect it.
+ * Note this is the simplest way to check a command added a response. Replication links are used to write data but
+ * not for responses, so we should normally never get here on a replica client. */
+ if (getClientType(c) == CLIENT_TYPE_SLAVE) {
+ sds cmdname = c->lastcmd ? c->lastcmd->fullname : NULL;
+ logInvalidUseAndFreeClientAsync(c, "Replica generated a reply to command '%s'",
+ cmdname ? cmdname : "<unknown>");
+ return NULL;
+ }
+
+ /* We call it here because this function conceptually affects the reply
+ * buffer offset (see function comment) */
+ reqresSaveClientReplyOffset(c);
+
+ trimReplyUnusedTailSpace(c);
+ listAddNodeTail(c->reply,NULL); /* NULL is our placeholder. */
+ return listLast(c->reply);
+}
+
+void setDeferredReply(client *c, void *node, const char *s, size_t length) {
+ listNode *ln = (listNode*)node;
+ clientReplyBlock *next, *prev;
+
+ /* Abort when *node is NULL: when the client should not accept writes
+ * we return NULL in addReplyDeferredLen() */
+ if (node == NULL) return;
+ serverAssert(!listNodeValue(ln));
+
+ /* Normally we fill this dummy NULL node, added by addReplyDeferredLen(),
+ * with a new buffer structure containing the protocol needed to specify
+ * the length of the array following. However sometimes there might be room
+ * in the previous/next node so we can instead remove this NULL node, and
+ * suffix/prefix our data in the node immediately before/after it, in order
+ * to save a write(2) syscall later. Conditions needed to do it:
+ *
+ * - The prev node is non-NULL and has space in it or
+ * - The next node is non-NULL,
+ * - It has enough room already allocated
+ * - And not too large (avoid large memmove) */
+ if (ln->prev != NULL && (prev = listNodeValue(ln->prev)) &&
+ prev->size - prev->used > 0)
+ {
+ size_t len_to_copy = prev->size - prev->used;
+ if (len_to_copy > length)
+ len_to_copy = length;
+ memcpy(prev->buf + prev->used, s, len_to_copy);
+ prev->used += len_to_copy;
+ length -= len_to_copy;
+ if (length == 0) {
+ listDelNode(c->reply, ln);
+ return;
+ }
+ s += len_to_copy;
+ }
+
+ if (ln->next != NULL && (next = listNodeValue(ln->next)) &&
+ next->size - next->used >= length &&
+ next->used < PROTO_REPLY_CHUNK_BYTES * 4)
+ {
+ memmove(next->buf + length, next->buf, next->used);
+ memcpy(next->buf, s, length);
+ next->used += length;
+ listDelNode(c->reply,ln);
+ } else {
+ /* Create a new node */
+ size_t usable_size;
+ clientReplyBlock *buf = zmalloc_usable(length + sizeof(clientReplyBlock), &usable_size);
+ /* Take over the allocation's internal fragmentation */
+ buf->size = usable_size - sizeof(clientReplyBlock);
+ buf->used = length;
+ memcpy(buf->buf, s, length);
+ listNodeValue(ln) = buf;
+ c->reply_bytes += buf->size;
+
+ closeClientOnOutputBufferLimitReached(c, 1);
+ }
+}
+
+/* Populate the length object and try gluing it to the next chunk. */
+void setDeferredAggregateLen(client *c, void *node, long length, char prefix) {
+ serverAssert(length >= 0);
+
+ /* Abort when *node is NULL: when the client should not accept writes
+ * we return NULL in addReplyDeferredLen() */
+ if (node == NULL) return;
+
+ /* Things like *2\r\n, %3\r\n or ~4\r\n are emitted very often by the protocol
+ * so we have a few shared objects to use if the integer is small
+ * like it is most of the times. */
+ const size_t hdr_len = OBJ_SHARED_HDR_STRLEN(length);
+ const int opt_hdr = length < OBJ_SHARED_BULKHDR_LEN;
+ if (prefix == '*' && opt_hdr) {
+ setDeferredReply(c, node, shared.mbulkhdr[length]->ptr, hdr_len);
+ return;
+ }
+ if (prefix == '%' && opt_hdr) {
+ setDeferredReply(c, node, shared.maphdr[length]->ptr, hdr_len);
+ return;
+ }
+ if (prefix == '~' && opt_hdr) {
+ setDeferredReply(c, node, shared.sethdr[length]->ptr, hdr_len);
+ return;
+ }
+
+ char lenstr[128];
+ size_t lenstr_len = snprintf(lenstr, sizeof(lenstr), "%c%ld\r\n", prefix, length);
+ setDeferredReply(c, node, lenstr, lenstr_len);
+}
+
+void setDeferredArrayLen(client *c, void *node, long length) {
+ setDeferredAggregateLen(c,node,length,'*');
+}
+
+void setDeferredMapLen(client *c, void *node, long length) {
+ int prefix = c->resp == 2 ? '*' : '%';
+ if (c->resp == 2) length *= 2;
+ setDeferredAggregateLen(c,node,length,prefix);
+}
+
+void setDeferredSetLen(client *c, void *node, long length) {
+ int prefix = c->resp == 2 ? '*' : '~';
+ setDeferredAggregateLen(c,node,length,prefix);
+}
+
+void setDeferredAttributeLen(client *c, void *node, long length) {
+ serverAssert(c->resp >= 3);
+ setDeferredAggregateLen(c,node,length,'|');
+}
+
+void setDeferredPushLen(client *c, void *node, long length) {
+ serverAssert(c->resp >= 3);
+ setDeferredAggregateLen(c,node,length,'>');
+}
+
+/* Add a double as a bulk reply */
+void addReplyDouble(client *c, double d) {
+ if (c->resp == 3) {
+ char dbuf[MAX_D2STRING_CHARS+3];
+ dbuf[0] = ',';
+ const int dlen = d2string(dbuf+1,sizeof(dbuf)-1,d);
+ dbuf[dlen+1] = '\r';
+ dbuf[dlen+2] = '\n';
+ dbuf[dlen+3] = '\0';
+ addReplyProto(c,dbuf,dlen+3);
+ } else {
+ char dbuf[MAX_LONG_DOUBLE_CHARS+32];
+ /* In order to prepend the string length before the formatted number,
+ * but still avoid an extra memcpy of the whole number, we reserve space
+ * for maximum header `$0000\r\n`, print double, add the resp header in
+ * front of it, and then send the buffer with the right `start` offset. */
+ const int dlen = d2string(dbuf+7,sizeof(dbuf)-7,d);
+ int digits = digits10(dlen);
+ int start = 4 - digits;
+ serverAssert(start >= 0);
+ dbuf[start] = '$';
+
+ /* Convert `dlen` to string, putting it's digits after '$' and before the
+ * formatted double string. */
+ for(int i = digits, val = dlen; val && i > 0 ; --i, val /= 10) {
+ dbuf[start + i] = "0123456789"[val % 10];
+ }
+ dbuf[5] = '\r';
+ dbuf[6] = '\n';
+ dbuf[dlen+7] = '\r';
+ dbuf[dlen+8] = '\n';
+ dbuf[dlen+9] = '\0';
+ addReplyProto(c,dbuf+start,dlen+9-start);
+ }
+}
+
+void addReplyBigNum(client *c, const char* num, size_t len) {
+ if (c->resp == 2) {
+ addReplyBulkCBuffer(c, num, len);
+ } else {
+ addReplyProto(c,"(",1);
+ addReplyProto(c,num,len);
+ addReplyProto(c,"\r\n",2);
+ }
+}
+
+/* Add a long double as a bulk reply, but uses a human readable formatting
+ * of the double instead of exposing the crude behavior of doubles to the
+ * dear user. */
+void addReplyHumanLongDouble(client *c, long double d) {
+ if (c->resp == 2) {
+ robj *o = createStringObjectFromLongDouble(d,1);
+ addReplyBulk(c,o);
+ decrRefCount(o);
+ } else {
+ char buf[MAX_LONG_DOUBLE_CHARS];
+ int len = ld2string(buf,sizeof(buf),d,LD_STR_HUMAN);
+ addReplyProto(c,",",1);
+ addReplyProto(c,buf,len);
+ addReplyProto(c,"\r\n",2);
+ }
+}
+
+/* Add a long long as integer reply or bulk len / multi bulk count.
+ * Basically this is used to output <prefix><long long><crlf>. */
+void addReplyLongLongWithPrefix(client *c, long long ll, char prefix) {
+ char buf[128];
+ int len;
+
+ /* Things like $3\r\n or *2\r\n are emitted very often by the protocol
+ * so we have a few shared objects to use if the integer is small
+ * like it is most of the times. */
+ const int opt_hdr = ll < OBJ_SHARED_BULKHDR_LEN && ll >= 0;
+ const size_t hdr_len = OBJ_SHARED_HDR_STRLEN(ll);
+ if (prefix == '*' && opt_hdr) {
+ addReplyProto(c,shared.mbulkhdr[ll]->ptr,hdr_len);
+ return;
+ } else if (prefix == '$' && opt_hdr) {
+ addReplyProto(c,shared.bulkhdr[ll]->ptr,hdr_len);
+ return;
+ } else if (prefix == '%' && opt_hdr) {
+ addReplyProto(c,shared.maphdr[ll]->ptr,hdr_len);
+ return;
+ } else if (prefix == '~' && opt_hdr) {
+ addReplyProto(c,shared.sethdr[ll]->ptr,hdr_len);
+ return;
+ }
+
+ buf[0] = prefix;
+ len = ll2string(buf+1,sizeof(buf)-1,ll);
+ buf[len+1] = '\r';
+ buf[len+2] = '\n';
+ addReplyProto(c,buf,len+3);
+}
+
+void addReplyLongLong(client *c, long long ll) {
+ if (ll == 0)
+ addReply(c,shared.czero);
+ else if (ll == 1)
+ addReply(c,shared.cone);
+ else
+ addReplyLongLongWithPrefix(c,ll,':');
+}
+
+void addReplyAggregateLen(client *c, long length, int prefix) {
+ serverAssert(length >= 0);
+ addReplyLongLongWithPrefix(c,length,prefix);
+}
+
+void addReplyArrayLen(client *c, long length) {
+ addReplyAggregateLen(c,length,'*');
+}
+
+void addReplyMapLen(client *c, long length) {
+ int prefix = c->resp == 2 ? '*' : '%';
+ if (c->resp == 2) length *= 2;
+ addReplyAggregateLen(c,length,prefix);
+}
+
+void addReplySetLen(client *c, long length) {
+ int prefix = c->resp == 2 ? '*' : '~';
+ addReplyAggregateLen(c,length,prefix);
+}
+
+void addReplyAttributeLen(client *c, long length) {
+ serverAssert(c->resp >= 3);
+ addReplyAggregateLen(c,length,'|');
+}
+
+void addReplyPushLen(client *c, long length) {
+ serverAssert(c->resp >= 3);
+ serverAssertWithInfo(c, NULL, c->flags & CLIENT_PUSHING);
+ addReplyAggregateLen(c,length,'>');
+}
+
+void addReplyNull(client *c) {
+ if (c->resp == 2) {
+ addReplyProto(c,"$-1\r\n",5);
+ } else {
+ addReplyProto(c,"_\r\n",3);
+ }
+}
+
+void addReplyBool(client *c, int b) {
+ if (c->resp == 2) {
+ addReply(c, b ? shared.cone : shared.czero);
+ } else {
+ addReplyProto(c, b ? "#t\r\n" : "#f\r\n",4);
+ }
+}
+
+/* A null array is a concept that no longer exists in RESP3. However
+ * RESP2 had it, so API-wise we have this call, that will emit the correct
+ * RESP2 protocol, however for RESP3 the reply will always be just the
+ * Null type "_\r\n". */
+void addReplyNullArray(client *c) {
+ if (c->resp == 2) {
+ addReplyProto(c,"*-1\r\n",5);
+ } else {
+ addReplyProto(c,"_\r\n",3);
+ }
+}
+
+/* Create the length prefix of a bulk reply, example: $2234 */
+void addReplyBulkLen(client *c, robj *obj) {
+ size_t len = stringObjectLen(obj);
+
+ addReplyLongLongWithPrefix(c,len,'$');
+}
+
+/* Add a Redis Object as a bulk reply */
+void addReplyBulk(client *c, robj *obj) {
+ addReplyBulkLen(c,obj);
+ addReply(c,obj);
+ addReplyProto(c,"\r\n",2);
+}
+
+/* Add a C buffer as bulk reply */
+void addReplyBulkCBuffer(client *c, const void *p, size_t len) {
+ addReplyLongLongWithPrefix(c,len,'$');
+ addReplyProto(c,p,len);
+ addReplyProto(c,"\r\n",2);
+}
+
+/* Add sds to reply (takes ownership of sds and frees it) */
+void addReplyBulkSds(client *c, sds s) {
+ addReplyLongLongWithPrefix(c,sdslen(s),'$');
+ addReplySds(c,s);
+ addReplyProto(c,"\r\n",2);
+}
+
+/* Set sds to a deferred reply (for symmetry with addReplyBulkSds it also frees the sds) */
+void setDeferredReplyBulkSds(client *c, void *node, sds s) {
+ sds reply = sdscatprintf(sdsempty(), "$%d\r\n%s\r\n", (unsigned)sdslen(s), s);
+ setDeferredReply(c, node, reply, sdslen(reply));
+ sdsfree(reply);
+ sdsfree(s);
+}
+
+/* Add a C null term string as bulk reply */
+void addReplyBulkCString(client *c, const char *s) {
+ if (s == NULL) {
+ addReplyNull(c);
+ } else {
+ addReplyBulkCBuffer(c,s,strlen(s));
+ }
+}
+
+/* Add a long long as a bulk reply */
+void addReplyBulkLongLong(client *c, long long ll) {
+ char buf[64];
+ int len;
+
+ len = ll2string(buf,64,ll);
+ addReplyBulkCBuffer(c,buf,len);
+}
+
+/* Reply with a verbatim type having the specified extension.
+ *
+ * The 'ext' is the "extension" of the file, actually just a three
+ * character type that describes the format of the verbatim string.
+ * For instance "txt" means it should be interpreted as a text only
+ * file by the receiver, "md " as markdown, and so forth. Only the
+ * three first characters of the extension are used, and if the
+ * provided one is shorter than that, the remaining is filled with
+ * spaces. */
+void addReplyVerbatim(client *c, const char *s, size_t len, const char *ext) {
+ if (c->resp == 2) {
+ addReplyBulkCBuffer(c,s,len);
+ } else {
+ char buf[32];
+ size_t preflen = snprintf(buf,sizeof(buf),"=%zu\r\nxxx:",len+4);
+ char *p = buf+preflen-4;
+ for (int i = 0; i < 3; i++) {
+ if (*ext == '\0') {
+ p[i] = ' ';
+ } else {
+ p[i] = *ext++;
+ }
+ }
+ addReplyProto(c,buf,preflen);
+ addReplyProto(c,s,len);
+ addReplyProto(c,"\r\n",2);
+ }
+}
+
+/* Add an array of C strings as status replies with a heading.
+ * This function is typically invoked by from commands that support
+ * subcommands in response to the 'help' subcommand. The help array
+ * is terminated by NULL sentinel. */
+void addReplyHelp(client *c, const char **help) {
+ sds cmd = sdsnew((char*) c->argv[0]->ptr);
+ void *blenp = addReplyDeferredLen(c);
+ int blen = 0;
+
+ sdstoupper(cmd);
+ addReplyStatusFormat(c,
+ "%s <subcommand> [<arg> [value] [opt] ...]. Subcommands are:",cmd);
+ sdsfree(cmd);
+
+ while (help[blen]) addReplyStatus(c,help[blen++]);
+
+ addReplyStatus(c,"HELP");
+ addReplyStatus(c," Print this help.");
+
+ blen += 1; /* Account for the header. */
+ blen += 2; /* Account for the footer. */
+ setDeferredArrayLen(c,blenp,blen);
+}
+
+/* Add a suggestive error reply.
+ * This function is typically invoked by from commands that support
+ * subcommands in response to an unknown subcommand or argument error. */
+void addReplySubcommandSyntaxError(client *c) {
+ sds cmd = sdsnew((char*) c->argv[0]->ptr);
+ sdstoupper(cmd);
+ addReplyErrorFormat(c,
+ "unknown subcommand or wrong number of arguments for '%.128s'. Try %s HELP.",
+ (char*)c->argv[1]->ptr,cmd);
+ sdsfree(cmd);
+}
+
+/* Append 'src' client output buffers into 'dst' client output buffers.
+ * This function clears the output buffers of 'src' */
+void AddReplyFromClient(client *dst, client *src) {
+ /* If the source client contains a partial response due to client output
+ * buffer limits, propagate that to the dest rather than copy a partial
+ * reply. We don't wanna run the risk of copying partial response in case
+ * for some reason the output limits don't reach the same decision (maybe
+ * they changed) */
+ if (src->flags & CLIENT_CLOSE_ASAP) {
+ sds client = catClientInfoString(sdsempty(),dst);
+ freeClientAsync(dst);
+ serverLog(LL_WARNING,"Client %s scheduled to be closed ASAP for overcoming of output buffer limits.", client);
+ sdsfree(client);
+ return;
+ }
+
+ /* First add the static buffer (either into the static buffer or reply list) */
+ addReplyProto(dst,src->buf, src->bufpos);
+
+ /* We need to check with prepareClientToWrite again (after addReplyProto)
+ * since addReplyProto may have changed something (like CLIENT_CLOSE_ASAP) */
+ if (prepareClientToWrite(dst) != C_OK)
+ return;
+
+ /* We're bypassing _addReplyProtoToList, so we need to add the pre/post
+ * checks in it. */
+ if (dst->flags & CLIENT_CLOSE_AFTER_REPLY) return;
+
+ /* Concatenate the reply list into the dest */
+ if (listLength(src->reply))
+ listJoin(dst->reply,src->reply);
+ dst->reply_bytes += src->reply_bytes;
+ src->reply_bytes = 0;
+ src->bufpos = 0;
+
+ if (src->deferred_reply_errors) {
+ deferredAfterErrorReply(dst, src->deferred_reply_errors);
+ listRelease(src->deferred_reply_errors);
+ src->deferred_reply_errors = NULL;
+ }
+
+ /* Check output buffer limits */
+ closeClientOnOutputBufferLimitReached(dst, 1);
+}
+
+/* Append the listed errors to the server error statistics. the input
+ * list is not modified and remains the responsibility of the caller. */
+void deferredAfterErrorReply(client *c, list *errors) {
+ listIter li;
+ listNode *ln;
+ listRewind(errors,&li);
+ while((ln = listNext(&li))) {
+ sds err = ln->value;
+ afterErrorReply(c, err, sdslen(err), 0);
+ }
+}
+
+/* Logically copy 'src' replica client buffers info to 'dst' replica.
+ * Basically increase referenced buffer block node reference count. */
+void copyReplicaOutputBuffer(client *dst, client *src) {
+ serverAssert(src->bufpos == 0 && listLength(src->reply) == 0);
+
+ if (src->ref_repl_buf_node == NULL) return;
+ dst->ref_repl_buf_node = src->ref_repl_buf_node;
+ dst->ref_block_pos = src->ref_block_pos;
+ ((replBufBlock *)listNodeValue(dst->ref_repl_buf_node))->refcount++;
+}
+
+/* Return true if the specified client has pending reply buffers to write to
+ * the socket. */
+int clientHasPendingReplies(client *c) {
+ if (getClientType(c) == CLIENT_TYPE_SLAVE) {
+ /* Replicas use global shared replication buffer instead of
+ * private output buffer. */
+ serverAssert(c->bufpos == 0 && listLength(c->reply) == 0);
+ if (c->ref_repl_buf_node == NULL) return 0;
+
+ /* If the last replication buffer block content is totally sent,
+ * we have nothing to send. */
+ listNode *ln = listLast(server.repl_buffer_blocks);
+ replBufBlock *tail = listNodeValue(ln);
+ if (ln == c->ref_repl_buf_node &&
+ c->ref_block_pos == tail->used) return 0;
+
+ return 1;
+ } else {
+ return c->bufpos || listLength(c->reply);
+ }
+}
+
+void clientAcceptHandler(connection *conn) {
+ client *c = connGetPrivateData(conn);
+
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_WARNING,
+ "Error accepting a client connection: %s (addr=%s laddr=%s)",
+ connGetLastError(conn), getClientPeerId(c), getClientSockname(c));
+ freeClientAsync(c);
+ return;
+ }
+
+ /* If the server is running in protected mode (the default) and there
+ * is no password set, nor a specific interface is bound, we don't accept
+ * requests from non loopback interfaces. Instead we try to explain the
+ * user what to do to fix it if needed. */
+ if (server.protected_mode &&
+ DefaultUser->flags & USER_FLAG_NOPASS)
+ {
+ if (connIsLocal(conn) != 1) {
+ char *err =
+ "-DENIED Redis is running in protected mode because protected "
+ "mode is enabled and no password is set for the default user. "
+ "In this mode connections are only accepted from the loopback interface. "
+ "If you want to connect from external computers to Redis you "
+ "may adopt one of the following solutions: "
+ "1) Just disable protected mode sending the command "
+ "'CONFIG SET protected-mode no' from the loopback interface "
+ "by connecting to Redis from the same host the server is "
+ "running, however MAKE SURE Redis is not publicly accessible "
+ "from internet if you do so. Use CONFIG REWRITE to make this "
+ "change permanent. "
+ "2) Alternatively you can just disable the protected mode by "
+ "editing the Redis configuration file, and setting the protected "
+ "mode option to 'no', and then restarting the server. "
+ "3) If you started the server manually just for testing, restart "
+ "it with the '--protected-mode no' option. "
+ "4) Set up an authentication password for the default user. "
+ "NOTE: You only need to do one of the above things in order for "
+ "the server to start accepting connections from the outside.\r\n";
+ if (connWrite(c->conn,err,strlen(err)) == -1) {
+ /* Nothing to do, Just to avoid the warning... */
+ }
+ server.stat_rejected_conn++;
+ freeClientAsync(c);
+ return;
+ }
+ }
+
+ server.stat_numconnections++;
+ moduleFireServerEvent(REDISMODULE_EVENT_CLIENT_CHANGE,
+ REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED,
+ c);
+}
+
+void acceptCommonHandler(connection *conn, int flags, char *ip) {
+ client *c;
+ UNUSED(ip);
+
+ if (connGetState(conn) != CONN_STATE_ACCEPTING) {
+ char addr[NET_ADDR_STR_LEN] = {0};
+ char laddr[NET_ADDR_STR_LEN] = {0};
+ connFormatAddr(conn, addr, sizeof(addr), 1);
+ connFormatAddr(conn, laddr, sizeof(addr), 0);
+ serverLog(LL_VERBOSE,
+ "Accepted client connection in error state: %s (addr=%s laddr=%s)",
+ connGetLastError(conn), addr, laddr);
+ connClose(conn);
+ return;
+ }
+
+ /* Limit the number of connections we take at the same time.
+ *
+ * Admission control will happen before a client is created and connAccept()
+ * called, because we don't want to even start transport-level negotiation
+ * if rejected. */
+ if (listLength(server.clients) + getClusterConnectionsCount()
+ >= server.maxclients)
+ {
+ char *err;
+ if (server.cluster_enabled)
+ err = "-ERR max number of clients + cluster "
+ "connections reached\r\n";
+ else
+ err = "-ERR max number of clients reached\r\n";
+
+ /* That's a best effort error message, don't check write errors.
+ * Note that for TLS connections, no handshake was done yet so nothing
+ * is written and the connection will just drop. */
+ if (connWrite(conn,err,strlen(err)) == -1) {
+ /* Nothing to do, Just to avoid the warning... */
+ }
+ server.stat_rejected_conn++;
+ connClose(conn);
+ return;
+ }
+
+ /* Create connection and client */
+ if ((c = createClient(conn)) == NULL) {
+ char addr[NET_ADDR_STR_LEN] = {0};
+ char laddr[NET_ADDR_STR_LEN] = {0};
+ connFormatAddr(conn, addr, sizeof(addr), 1);
+ connFormatAddr(conn, laddr, sizeof(addr), 0);
+ serverLog(LL_WARNING,
+ "Error registering fd event for the new client connection: %s (addr=%s laddr=%s)",
+ connGetLastError(conn), addr, laddr);
+ connClose(conn); /* May be already closed, just ignore errors */
+ return;
+ }
+
+ /* Last chance to keep flags */
+ c->flags |= flags;
+
+ /* Initiate accept.
+ *
+ * Note that connAccept() is free to do two things here:
+ * 1. Call clientAcceptHandler() immediately;
+ * 2. Schedule a future call to clientAcceptHandler().
+ *
+ * Because of that, we must do nothing else afterwards.
+ */
+ if (connAccept(conn, clientAcceptHandler) == C_ERR) {
+ if (connGetState(conn) == CONN_STATE_ERROR)
+ serverLog(LL_WARNING,
+ "Error accepting a client connection: %s (addr=%s laddr=%s)",
+ connGetLastError(conn), getClientPeerId(c), getClientSockname(c));
+ freeClient(connGetPrivateData(conn));
+ return;
+ }
+}
+
+void freeClientOriginalArgv(client *c) {
+ /* We didn't rewrite this client */
+ if (!c->original_argv) return;
+
+ for (int j = 0; j < c->original_argc; j++)
+ decrRefCount(c->original_argv[j]);
+ zfree(c->original_argv);
+ c->original_argv = NULL;
+ c->original_argc = 0;
+}
+
+void freeClientArgv(client *c) {
+ int j;
+ for (j = 0; j < c->argc; j++)
+ decrRefCount(c->argv[j]);
+ c->argc = 0;
+ c->cmd = NULL;
+ c->argv_len_sum = 0;
+ c->argv_len = 0;
+ zfree(c->argv);
+ c->argv = NULL;
+}
+
+/* Close all the slaves connections. This is useful in chained replication
+ * when we resync with our own master and want to force all our slaves to
+ * resync with us as well. */
+void disconnectSlaves(void) {
+ listIter li;
+ listNode *ln;
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ freeClient((client*)ln->value);
+ }
+}
+
+/* Check if there is any other slave waiting dumping RDB finished expect me.
+ * This function is useful to judge current dumping RDB can be used for full
+ * synchronization or not. */
+int anyOtherSlaveWaitRdb(client *except_me) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves, &li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ if (slave != except_me &&
+ slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END)
+ {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Remove the specified client from global lists where the client could
+ * be referenced, not including the Pub/Sub channels.
+ * This is used by freeClient() and replicationCacheMaster(). */
+void unlinkClient(client *c) {
+ listNode *ln;
+
+ /* If this is marked as current client unset it. */
+ if (server.current_client == c) server.current_client = NULL;
+
+ /* Certain operations must be done only if the client has an active connection.
+ * If the client was already unlinked or if it's a "fake client" the
+ * conn is already set to NULL. */
+ if (c->conn) {
+ /* Remove from the list of active clients. */
+ if (c->client_list_node) {
+ uint64_t id = htonu64(c->id);
+ raxRemove(server.clients_index,(unsigned char*)&id,sizeof(id),NULL);
+ listDelNode(server.clients,c->client_list_node);
+ c->client_list_node = NULL;
+ }
+
+ /* Check if this is a replica waiting for diskless replication (rdb pipe),
+ * in which case it needs to be cleaned from that list */
+ if (c->flags & CLIENT_SLAVE &&
+ c->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
+ server.rdb_pipe_conns)
+ {
+ int i;
+ for (i=0; i < server.rdb_pipe_numconns; i++) {
+ if (server.rdb_pipe_conns[i] == c->conn) {
+ rdbPipeWriteHandlerConnRemoved(c->conn);
+ server.rdb_pipe_conns[i] = NULL;
+ break;
+ }
+ }
+ }
+ /* Only use shutdown when the fork is active and we are the parent. */
+ if (server.child_type) connShutdown(c->conn);
+ connClose(c->conn);
+ c->conn = NULL;
+ }
+
+ /* Remove from the list of pending writes if needed. */
+ if (c->flags & CLIENT_PENDING_WRITE) {
+ serverAssert(&c->clients_pending_write_node.next != NULL ||
+ &c->clients_pending_write_node.prev != NULL);
+ listUnlinkNode(server.clients_pending_write, &c->clients_pending_write_node);
+ c->flags &= ~CLIENT_PENDING_WRITE;
+ }
+
+ /* Remove from the list of pending reads if needed. */
+ serverAssert(io_threads_op == IO_THREADS_OP_IDLE);
+ if (c->pending_read_list_node != NULL) {
+ listDelNode(server.clients_pending_read,c->pending_read_list_node);
+ c->pending_read_list_node = NULL;
+ }
+
+
+ /* When client was just unblocked because of a blocking operation,
+ * remove it from the list of unblocked clients. */
+ if (c->flags & CLIENT_UNBLOCKED) {
+ ln = listSearchKey(server.unblocked_clients,c);
+ serverAssert(ln != NULL);
+ listDelNode(server.unblocked_clients,ln);
+ c->flags &= ~CLIENT_UNBLOCKED;
+ }
+
+ /* Clear the tracking status. */
+ if (c->flags & CLIENT_TRACKING) disableTracking(c);
+}
+
+/* Clear the client state to resemble a newly connected client. */
+void clearClientConnectionState(client *c) {
+ listNode *ln;
+
+ /* MONITOR clients are also marked with CLIENT_SLAVE, we need to
+ * distinguish between the two.
+ */
+ if (c->flags & CLIENT_MONITOR) {
+ ln = listSearchKey(server.monitors,c);
+ serverAssert(ln != NULL);
+ listDelNode(server.monitors,ln);
+
+ c->flags &= ~(CLIENT_MONITOR|CLIENT_SLAVE);
+ }
+
+ serverAssert(!(c->flags &(CLIENT_SLAVE|CLIENT_MASTER)));
+
+ if (c->flags & CLIENT_TRACKING) disableTracking(c);
+ selectDb(c,0);
+#ifdef LOG_REQ_RES
+ c->resp = server.client_default_resp;
+#else
+ c->resp = 2;
+#endif
+
+ clientSetDefaultAuth(c);
+ moduleNotifyUserChanged(c);
+ discardTransaction(c);
+
+ pubsubUnsubscribeAllChannels(c,0);
+ pubsubUnsubscribeShardAllChannels(c, 0);
+ pubsubUnsubscribeAllPatterns(c,0);
+
+ if (c->name) {
+ decrRefCount(c->name);
+ c->name = NULL;
+ }
+
+ /* Note: lib_name and lib_ver are not reset since they still
+ * represent the client library behind the connection. */
+
+ /* Selectively clear state flags not covered above */
+ c->flags &= ~(CLIENT_ASKING|CLIENT_READONLY|CLIENT_PUBSUB|CLIENT_REPLY_OFF|
+ CLIENT_REPLY_SKIP_NEXT|CLIENT_NO_TOUCH|CLIENT_NO_EVICT);
+}
+
+void freeClient(client *c) {
+ listNode *ln;
+
+ /* If a client is protected, yet we need to free it right now, make sure
+ * to at least use asynchronous freeing. */
+ if (c->flags & CLIENT_PROTECTED) {
+ freeClientAsync(c);
+ return;
+ }
+
+ /* For connected clients, call the disconnection event of modules hooks. */
+ if (c->conn) {
+ moduleFireServerEvent(REDISMODULE_EVENT_CLIENT_CHANGE,
+ REDISMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED,
+ c);
+ }
+
+ /* Notify module system that this client auth status changed. */
+ moduleNotifyUserChanged(c);
+
+ /* Free the RedisModuleBlockedClient held onto for reprocessing if not already freed. */
+ zfree(c->module_blocked_client);
+
+ /* If this client was scheduled for async freeing we need to remove it
+ * from the queue. Note that we need to do this here, because later
+ * we may call replicationCacheMaster() and the client should already
+ * be removed from the list of clients to free. */
+ if (c->flags & CLIENT_CLOSE_ASAP) {
+ ln = listSearchKey(server.clients_to_close,c);
+ serverAssert(ln != NULL);
+ listDelNode(server.clients_to_close,ln);
+ }
+
+ /* If it is our master that's being disconnected we should make sure
+ * to cache the state to try a partial resynchronization later.
+ *
+ * Note that before doing this we make sure that the client is not in
+ * some unexpected state, by checking its flags. */
+ if (server.master && c->flags & CLIENT_MASTER) {
+ serverLog(LL_NOTICE,"Connection with master lost.");
+ if (!(c->flags & (CLIENT_PROTOCOL_ERROR|CLIENT_BLOCKED))) {
+ c->flags &= ~(CLIENT_CLOSE_ASAP|CLIENT_CLOSE_AFTER_REPLY);
+ replicationCacheMaster(c);
+ return;
+ }
+ }
+
+ /* Log link disconnection with slave */
+ if (getClientType(c) == CLIENT_TYPE_SLAVE) {
+ serverLog(LL_NOTICE,"Connection with replica %s lost.",
+ replicationGetSlaveName(c));
+ }
+
+ /* Free the query buffer */
+ sdsfree(c->querybuf);
+ c->querybuf = NULL;
+
+ /* Deallocate structures used to block on blocking ops. */
+ /* If there is any in-flight command, we don't record their duration. */
+ c->duration = 0;
+ if (c->flags & CLIENT_BLOCKED) unblockClient(c, 1);
+ dictRelease(c->bstate.keys);
+
+ /* UNWATCH all the keys */
+ unwatchAllKeys(c);
+ listRelease(c->watched_keys);
+
+ /* Unsubscribe from all the pubsub channels */
+ pubsubUnsubscribeAllChannels(c,0);
+ pubsubUnsubscribeShardAllChannels(c, 0);
+ pubsubUnsubscribeAllPatterns(c,0);
+ dictRelease(c->pubsub_channels);
+ dictRelease(c->pubsub_patterns);
+ dictRelease(c->pubsubshard_channels);
+
+ /* Free data structures. */
+ listRelease(c->reply);
+ zfree(c->buf);
+ freeReplicaReferencedReplBuffer(c);
+ freeClientArgv(c);
+ freeClientOriginalArgv(c);
+ if (c->deferred_reply_errors)
+ listRelease(c->deferred_reply_errors);
+#ifdef LOG_REQ_RES
+ reqresReset(c, 1);
+#endif
+
+ /* Unlink the client: this will close the socket, remove the I/O
+ * handlers, and remove references of the client from different
+ * places where active clients may be referenced. */
+ unlinkClient(c);
+
+ /* Master/slave cleanup Case 1:
+ * we lost the connection with a slave. */
+ if (c->flags & CLIENT_SLAVE) {
+ /* If there is no any other slave waiting dumping RDB finished, the
+ * current child process need not continue to dump RDB, then we kill it.
+ * So child process won't use more memory, and we also can fork a new
+ * child process asap to dump rdb for next full synchronization or bgsave.
+ * But we also need to check if users enable 'save' RDB, if enable, we
+ * should not remove directly since that means RDB is important for users
+ * to keep data safe and we may delay configured 'save' for full sync. */
+ if (server.saveparamslen == 0 &&
+ c->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
+ server.child_type == CHILD_TYPE_RDB &&
+ server.rdb_child_type == RDB_CHILD_TYPE_DISK &&
+ anyOtherSlaveWaitRdb(c) == 0)
+ {
+ killRDBChild();
+ }
+ if (c->replstate == SLAVE_STATE_SEND_BULK) {
+ if (c->repldbfd != -1) close(c->repldbfd);
+ if (c->replpreamble) sdsfree(c->replpreamble);
+ }
+ list *l = (c->flags & CLIENT_MONITOR) ? server.monitors : server.slaves;
+ ln = listSearchKey(l,c);
+ serverAssert(ln != NULL);
+ listDelNode(l,ln);
+ /* We need to remember the time when we started to have zero
+ * attached slaves, as after some time we'll free the replication
+ * backlog. */
+ if (getClientType(c) == CLIENT_TYPE_SLAVE && listLength(server.slaves) == 0)
+ server.repl_no_slaves_since = server.unixtime;
+ refreshGoodSlavesCount();
+ /* Fire the replica change modules event. */
+ if (c->replstate == SLAVE_STATE_ONLINE)
+ moduleFireServerEvent(REDISMODULE_EVENT_REPLICA_CHANGE,
+ REDISMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE,
+ NULL);
+ }
+
+ /* Master/slave cleanup Case 2:
+ * we lost the connection with the master. */
+ if (c->flags & CLIENT_MASTER) replicationHandleMasterDisconnection();
+
+ /* Remove the contribution that this client gave to our
+ * incrementally computed memory usage. */
+ server.stat_clients_type_memory[c->last_memory_type] -=
+ c->last_memory_usage;
+ /* Remove client from memory usage buckets */
+ if (c->mem_usage_bucket) {
+ c->mem_usage_bucket->mem_usage_sum -= c->last_memory_usage;
+ listDelNode(c->mem_usage_bucket->clients, c->mem_usage_bucket_node);
+ }
+
+ /* Release other dynamically allocated client structure fields,
+ * and finally release the client structure itself. */
+ if (c->name) decrRefCount(c->name);
+ if (c->lib_name) decrRefCount(c->lib_name);
+ if (c->lib_ver) decrRefCount(c->lib_ver);
+ freeClientMultiState(c);
+ sdsfree(c->peerid);
+ sdsfree(c->sockname);
+ sdsfree(c->slave_addr);
+ zfree(c);
+}
+
+/* Schedule a client to free it at a safe time in the serverCron() function.
+ * This function is useful when we need to terminate a client but we are in
+ * a context where calling freeClient() is not possible, because the client
+ * should be valid for the continuation of the flow of the program. */
+void freeClientAsync(client *c) {
+ /* We need to handle concurrent access to the server.clients_to_close list
+ * only in the freeClientAsync() function, since it's the only function that
+ * may access the list while Redis uses I/O threads. All the other accesses
+ * are in the context of the main thread while the other threads are
+ * idle. */
+ if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_SCRIPT) return;
+ c->flags |= CLIENT_CLOSE_ASAP;
+ if (server.io_threads_num == 1) {
+ /* no need to bother with locking if there's just one thread (the main thread) */
+ listAddNodeTail(server.clients_to_close,c);
+ return;
+ }
+ static pthread_mutex_t async_free_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
+ pthread_mutex_lock(&async_free_queue_mutex);
+ listAddNodeTail(server.clients_to_close,c);
+ pthread_mutex_unlock(&async_free_queue_mutex);
+}
+
+/* Log errors for invalid use and free the client in async way.
+ * We will add additional information about the client to the message. */
+void logInvalidUseAndFreeClientAsync(client *c, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ sds info = sdscatvprintf(sdsempty(), fmt, ap);
+ va_end(ap);
+
+ sds client = catClientInfoString(sdsempty(), c);
+ serverLog(LL_WARNING, "%s, disconnecting it: %s", info, client);
+
+ sdsfree(info);
+ sdsfree(client);
+ freeClientAsync(c);
+}
+
+/* Perform processing of the client before moving on to processing the next client
+ * this is useful for performing operations that affect the global state but can't
+ * wait until we're done with all clients. In other words can't wait until beforeSleep()
+ * return C_ERR in case client is no longer valid after call.
+ * The input client argument: c, may be NULL in case the previous client was
+ * freed before the call. */
+int beforeNextClient(client *c) {
+ /* Notice, this code is also called from 'processUnblockedClients'.
+ * But in case of a module blocked client (see RM_Call 'K' flag) we do not reach this code path.
+ * So whenever we change the code here we need to consider if we need this change on module
+ * blocked client as well */
+
+ /* Skip the client processing if we're in an IO thread, in that case we'll perform
+ this operation later (this function is called again) in the fan-in stage of the threading mechanism */
+ if (io_threads_op != IO_THREADS_OP_IDLE)
+ return C_OK;
+ /* Handle async frees */
+ /* Note: this doesn't make the server.clients_to_close list redundant because of
+ * cases where we want an async free of a client other than myself. For example
+ * in ACL modifications we disconnect clients authenticated to non-existent
+ * users (see ACL LOAD). */
+ if (c && (c->flags & CLIENT_CLOSE_ASAP)) {
+ freeClient(c);
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+/* Free the clients marked as CLOSE_ASAP, return the number of clients
+ * freed. */
+int freeClientsInAsyncFreeQueue(void) {
+ int freed = 0;
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.clients_to_close,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *c = listNodeValue(ln);
+
+ if (c->flags & CLIENT_PROTECTED) continue;
+
+ c->flags &= ~CLIENT_CLOSE_ASAP;
+ freeClient(c);
+ listDelNode(server.clients_to_close,ln);
+ freed++;
+ }
+ return freed;
+}
+
+/* Return a client by ID, or NULL if the client ID is not in the set
+ * of registered clients. Note that "fake clients", created with -1 as FD,
+ * are not registered clients. */
+client *lookupClientByID(uint64_t id) {
+ id = htonu64(id);
+ client *c = raxFind(server.clients_index,(unsigned char*)&id,sizeof(id));
+ return (c == raxNotFound) ? NULL : c;
+}
+
+/* This function should be called from _writeToClient when the reply list is not empty,
+ * it gathers the scattered buffers from reply list and sends them away with connWritev.
+ * If we write successfully, it returns C_OK, otherwise, C_ERR is returned,
+ * and 'nwritten' is an output parameter, it means how many bytes server write
+ * to client. */
+static int _writevToClient(client *c, ssize_t *nwritten) {
+ int iovcnt = 0;
+ int iovmax = min(IOV_MAX, c->conn->iovcnt);
+ struct iovec iov[iovmax];
+ size_t iov_bytes_len = 0;
+ /* If the static reply buffer is not empty,
+ * add it to the iov array for writev() as well. */
+ if (c->bufpos > 0) {
+ iov[iovcnt].iov_base = c->buf + c->sentlen;
+ iov[iovcnt].iov_len = c->bufpos - c->sentlen;
+ iov_bytes_len += iov[iovcnt++].iov_len;
+ }
+ /* The first node of reply list might be incomplete from the last call,
+ * thus it needs to be calibrated to get the actual data address and length. */
+ size_t offset = c->bufpos > 0 ? 0 : c->sentlen;
+ listIter iter;
+ listNode *next;
+ clientReplyBlock *o;
+ listRewind(c->reply, &iter);
+ while ((next = listNext(&iter)) && iovcnt < iovmax && iov_bytes_len < NET_MAX_WRITES_PER_EVENT) {
+ o = listNodeValue(next);
+ if (o->used == 0) { /* empty node, just release it and skip. */
+ c->reply_bytes -= o->size;
+ listDelNode(c->reply, next);
+ offset = 0;
+ continue;
+ }
+
+ iov[iovcnt].iov_base = o->buf + offset;
+ iov[iovcnt].iov_len = o->used - offset;
+ iov_bytes_len += iov[iovcnt++].iov_len;
+ offset = 0;
+ }
+ if (iovcnt == 0) return C_OK;
+ *nwritten = connWritev(c->conn, iov, iovcnt);
+ if (*nwritten <= 0) return C_ERR;
+
+ /* Locate the new node which has leftover data and
+ * release all nodes in front of it. */
+ ssize_t remaining = *nwritten;
+ if (c->bufpos > 0) { /* deal with static reply buffer first. */
+ int buf_len = c->bufpos - c->sentlen;
+ c->sentlen += remaining;
+ /* If the buffer was sent, set bufpos to zero to continue with
+ * the remainder of the reply. */
+ if (remaining >= buf_len) {
+ c->bufpos = 0;
+ c->sentlen = 0;
+ }
+ remaining -= buf_len;
+ }
+ listRewind(c->reply, &iter);
+ while (remaining > 0) {
+ next = listNext(&iter);
+ o = listNodeValue(next);
+ if (remaining < (ssize_t)(o->used - c->sentlen)) {
+ c->sentlen += remaining;
+ break;
+ }
+ remaining -= (ssize_t)(o->used - c->sentlen);
+ c->reply_bytes -= o->size;
+ listDelNode(c->reply, next);
+ c->sentlen = 0;
+ }
+
+ return C_OK;
+}
+
+/* This function does actual writing output buffers to different types of
+ * clients, it is called by writeToClient.
+ * If we write successfully, it returns C_OK, otherwise, C_ERR is returned,
+ * and 'nwritten' is an output parameter, it means how many bytes server write
+ * to client. */
+int _writeToClient(client *c, ssize_t *nwritten) {
+ *nwritten = 0;
+ if (getClientType(c) == CLIENT_TYPE_SLAVE) {
+ serverAssert(c->bufpos == 0 && listLength(c->reply) == 0);
+
+ replBufBlock *o = listNodeValue(c->ref_repl_buf_node);
+ serverAssert(o->used >= c->ref_block_pos);
+ /* Send current block if it is not fully sent. */
+ if (o->used > c->ref_block_pos) {
+ *nwritten = connWrite(c->conn, o->buf+c->ref_block_pos,
+ o->used-c->ref_block_pos);
+ if (*nwritten <= 0) return C_ERR;
+ c->ref_block_pos += *nwritten;
+ }
+
+ /* If we fully sent the object on head, go to the next one. */
+ listNode *next = listNextNode(c->ref_repl_buf_node);
+ if (next && c->ref_block_pos == o->used) {
+ o->refcount--;
+ ((replBufBlock *)(listNodeValue(next)))->refcount++;
+ c->ref_repl_buf_node = next;
+ c->ref_block_pos = 0;
+ incrementalTrimReplicationBacklog(REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
+ }
+ return C_OK;
+ }
+
+ /* When the reply list is not empty, it's better to use writev to save us some
+ * system calls and TCP packets. */
+ if (listLength(c->reply) > 0) {
+ int ret = _writevToClient(c, nwritten);
+ if (ret != C_OK) return ret;
+
+ /* If there are no longer objects in the list, we expect
+ * the count of reply bytes to be exactly zero. */
+ if (listLength(c->reply) == 0)
+ serverAssert(c->reply_bytes == 0);
+ } else if (c->bufpos > 0) {
+ *nwritten = connWrite(c->conn, c->buf + c->sentlen, c->bufpos - c->sentlen);
+ if (*nwritten <= 0) return C_ERR;
+ c->sentlen += *nwritten;
+
+ /* If the buffer was sent, set bufpos to zero to continue with
+ * the remainder of the reply. */
+ if ((int)c->sentlen == c->bufpos) {
+ c->bufpos = 0;
+ c->sentlen = 0;
+ }
+ }
+
+ return C_OK;
+}
+
+/* Write data in output buffers to client. Return C_OK if the client
+ * is still valid after the call, C_ERR if it was freed because of some
+ * error. If handler_installed is set, it will attempt to clear the
+ * write event.
+ *
+ * This function is called by threads, but always with handler_installed
+ * set to 0. So when handler_installed is set to 0 the function must be
+ * thread safe. */
+int writeToClient(client *c, int handler_installed) {
+ /* Update total number of writes on server */
+ atomicIncr(server.stat_total_writes_processed, 1);
+
+ ssize_t nwritten = 0, totwritten = 0;
+
+ while(clientHasPendingReplies(c)) {
+ int ret = _writeToClient(c, &nwritten);
+ if (ret == C_ERR) break;
+ totwritten += nwritten;
+ /* Note that we avoid to send more than NET_MAX_WRITES_PER_EVENT
+ * bytes, in a single threaded server it's a good idea to serve
+ * other clients as well, even if a very large request comes from
+ * super fast link that is always able to accept data (in real world
+ * scenario think about 'KEYS *' against the loopback interface).
+ *
+ * However if we are over the maxmemory limit we ignore that and
+ * just deliver as much data as it is possible to deliver.
+ *
+ * Moreover, we also send as much as possible if the client is
+ * a slave or a monitor (otherwise, on high-speed traffic, the
+ * replication/output buffer will grow indefinitely) */
+ if (totwritten > NET_MAX_WRITES_PER_EVENT &&
+ (server.maxmemory == 0 ||
+ zmalloc_used_memory() < server.maxmemory) &&
+ !(c->flags & CLIENT_SLAVE)) break;
+ }
+
+ if (getClientType(c) == CLIENT_TYPE_SLAVE) {
+ atomicIncr(server.stat_net_repl_output_bytes, totwritten);
+ } else {
+ atomicIncr(server.stat_net_output_bytes, totwritten);
+ }
+
+ if (nwritten == -1) {
+ if (connGetState(c->conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_VERBOSE,
+ "Error writing to client: %s", connGetLastError(c->conn));
+ freeClientAsync(c);
+ return C_ERR;
+ }
+ }
+ if (totwritten > 0) {
+ /* For clients representing masters we don't count sending data
+ * as an interaction, since we always send REPLCONF ACK commands
+ * that take some time to just fill the socket output buffer.
+ * We just rely on data / pings received for timeout detection. */
+ if (!(c->flags & CLIENT_MASTER)) c->lastinteraction = server.unixtime;
+ }
+ if (!clientHasPendingReplies(c)) {
+ c->sentlen = 0;
+ /* Note that writeToClient() is called in a threaded way, but
+ * aeDeleteFileEvent() is not thread safe: however writeToClient()
+ * is always called with handler_installed set to 0 from threads
+ * so we are fine. */
+ if (handler_installed) {
+ serverAssert(io_threads_op == IO_THREADS_OP_IDLE);
+ connSetWriteHandler(c->conn, NULL);
+ }
+
+ /* Close connection after entire reply has been sent. */
+ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) {
+ freeClientAsync(c);
+ return C_ERR;
+ }
+ }
+ /* Update client's memory usage after writing.
+ * Since this isn't thread safe we do this conditionally. In case of threaded writes this is done in
+ * handleClientsWithPendingWritesUsingThreads(). */
+ if (io_threads_op == IO_THREADS_OP_IDLE)
+ updateClientMemUsageAndBucket(c);
+ return C_OK;
+}
+
+/* Write event handler. Just send data to the client. */
+void sendReplyToClient(connection *conn) {
+ client *c = connGetPrivateData(conn);
+ writeToClient(c,1);
+}
+
+/* This function is called just before entering the event loop, in the hope
+ * we can just write the replies to the client output buffer without any
+ * need to use a syscall in order to install the writable event handler,
+ * get it called, and so forth. */
+int handleClientsWithPendingWrites(void) {
+ listIter li;
+ listNode *ln;
+ int processed = listLength(server.clients_pending_write);
+
+ listRewind(server.clients_pending_write,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ c->flags &= ~CLIENT_PENDING_WRITE;
+ listUnlinkNode(server.clients_pending_write,ln);
+
+ /* If a client is protected, don't do anything,
+ * that may trigger write error or recreate handler. */
+ if (c->flags & CLIENT_PROTECTED) continue;
+
+ /* Don't write to clients that are going to be closed anyway. */
+ if (c->flags & CLIENT_CLOSE_ASAP) continue;
+
+ /* Try to write buffers to the client socket. */
+ if (writeToClient(c,0) == C_ERR) continue;
+
+ /* If after the synchronous writes above we still have data to
+ * output to the client, we need to install the writable handler. */
+ if (clientHasPendingReplies(c)) {
+ installClientWriteHandler(c);
+ }
+ }
+ return processed;
+}
+
+/* resetClient prepare the client to process the next command */
+void resetClient(client *c) {
+ redisCommandProc *prevcmd = c->cmd ? c->cmd->proc : NULL;
+
+ freeClientArgv(c);
+ c->cur_script = NULL;
+ c->reqtype = 0;
+ c->multibulklen = 0;
+ c->bulklen = -1;
+ c->slot = -1;
+ c->flags &= ~CLIENT_EXECUTING_COMMAND;
+
+ /* Make sure the duration has been recorded to some command. */
+ serverAssert(c->duration == 0);
+#ifdef LOG_REQ_RES
+ reqresReset(c, 1);
+#endif
+
+ if (c->deferred_reply_errors)
+ listRelease(c->deferred_reply_errors);
+ c->deferred_reply_errors = NULL;
+
+ /* We clear the ASKING flag as well if we are not inside a MULTI, and
+ * if what we just executed is not the ASKING command itself. */
+ if (!(c->flags & CLIENT_MULTI) && prevcmd != askingCommand)
+ c->flags &= ~CLIENT_ASKING;
+
+ /* We do the same for the CACHING command as well. It also affects
+ * the next command or transaction executed, in a way very similar
+ * to ASKING. */
+ if (!(c->flags & CLIENT_MULTI) && prevcmd != clientCommand)
+ c->flags &= ~CLIENT_TRACKING_CACHING;
+
+ /* Remove the CLIENT_REPLY_SKIP flag if any so that the reply
+ * to the next command will be sent, but set the flag if the command
+ * we just processed was "CLIENT REPLY SKIP". */
+ c->flags &= ~CLIENT_REPLY_SKIP;
+ if (c->flags & CLIENT_REPLY_SKIP_NEXT) {
+ c->flags |= CLIENT_REPLY_SKIP;
+ c->flags &= ~CLIENT_REPLY_SKIP_NEXT;
+ }
+}
+
+/* This function is used when we want to re-enter the event loop but there
+ * is the risk that the client we are dealing with will be freed in some
+ * way. This happens for instance in:
+ *
+ * * DEBUG RELOAD and similar.
+ * * When a Lua script is in -BUSY state.
+ *
+ * So the function will protect the client by doing two things:
+ *
+ * 1) It removes the file events. This way it is not possible that an
+ * error is signaled on the socket, freeing the client.
+ * 2) Moreover it makes sure that if the client is freed in a different code
+ * path, it is not really released, but only marked for later release. */
+void protectClient(client *c) {
+ c->flags |= CLIENT_PROTECTED;
+ if (c->conn) {
+ connSetReadHandler(c->conn,NULL);
+ connSetWriteHandler(c->conn,NULL);
+ }
+}
+
+/* This will undo the client protection done by protectClient() */
+void unprotectClient(client *c) {
+ if (c->flags & CLIENT_PROTECTED) {
+ c->flags &= ~CLIENT_PROTECTED;
+ if (c->conn) {
+ connSetReadHandler(c->conn,readQueryFromClient);
+ if (clientHasPendingReplies(c)) putClientInPendingWriteQueue(c);
+ }
+ }
+}
+
+/* Like processMultibulkBuffer(), but for the inline protocol instead of RESP,
+ * this function consumes the client query buffer and creates a command ready
+ * to be executed inside the client structure. Returns C_OK if the command
+ * is ready to be executed, or C_ERR if there is still protocol to read to
+ * have a well formed command. The function also returns C_ERR when there is
+ * a protocol error: in such a case the client structure is setup to reply
+ * with the error and close the connection. */
+int processInlineBuffer(client *c) {
+ char *newline;
+ int argc, j, linefeed_chars = 1;
+ sds *argv, aux;
+ size_t querylen;
+
+ /* Search for end of line */
+ newline = strchr(c->querybuf+c->qb_pos,'\n');
+
+ /* Nothing to do without a \r\n */
+ if (newline == NULL) {
+ if (sdslen(c->querybuf)-c->qb_pos > PROTO_INLINE_MAX_SIZE) {
+ addReplyError(c,"Protocol error: too big inline request");
+ setProtocolError("too big inline request",c);
+ }
+ return C_ERR;
+ }
+
+ /* Handle the \r\n case. */
+ if (newline != c->querybuf+c->qb_pos && *(newline-1) == '\r')
+ newline--, linefeed_chars++;
+
+ /* Split the input buffer up to the \r\n */
+ querylen = newline-(c->querybuf+c->qb_pos);
+ aux = sdsnewlen(c->querybuf+c->qb_pos,querylen);
+ argv = sdssplitargs(aux,&argc);
+ sdsfree(aux);
+ if (argv == NULL) {
+ addReplyError(c,"Protocol error: unbalanced quotes in request");
+ setProtocolError("unbalanced quotes in inline request",c);
+ return C_ERR;
+ }
+
+ /* Newline from slaves can be used to refresh the last ACK time.
+ * This is useful for a slave to ping back while loading a big
+ * RDB file. */
+ if (querylen == 0 && getClientType(c) == CLIENT_TYPE_SLAVE)
+ c->repl_ack_time = server.unixtime;
+
+ /* Masters should never send us inline protocol to run actual
+ * commands. If this happens, it is likely due to a bug in Redis where
+ * we got some desynchronization in the protocol, for example
+ * because of a PSYNC gone bad.
+ *
+ * However there is an exception: masters may send us just a newline
+ * to keep the connection active. */
+ if (querylen != 0 && c->flags & CLIENT_MASTER) {
+ sdsfreesplitres(argv,argc);
+ serverLog(LL_WARNING,"WARNING: Receiving inline protocol from master, master stream corruption? Closing the master connection and discarding the cached master.");
+ setProtocolError("Master using the inline protocol. Desync?",c);
+ return C_ERR;
+ }
+
+ /* Move querybuffer position to the next query in the buffer. */
+ c->qb_pos += querylen+linefeed_chars;
+
+ /* Setup argv array on client structure */
+ if (argc) {
+ if (c->argv) zfree(c->argv);
+ c->argv_len = argc;
+ c->argv = zmalloc(sizeof(robj*)*c->argv_len);
+ c->argv_len_sum = 0;
+ }
+
+ /* Create redis objects for all arguments. */
+ for (c->argc = 0, j = 0; j < argc; j++) {
+ c->argv[c->argc] = createObject(OBJ_STRING,argv[j]);
+ c->argc++;
+ c->argv_len_sum += sdslen(argv[j]);
+ }
+ zfree(argv);
+ return C_OK;
+}
+
+/* Helper function. Record protocol error details in server log,
+ * and set the client as CLIENT_CLOSE_AFTER_REPLY and
+ * CLIENT_PROTOCOL_ERROR. */
+#define PROTO_DUMP_LEN 128
+static void setProtocolError(const char *errstr, client *c) {
+ if (server.verbosity <= LL_VERBOSE || c->flags & CLIENT_MASTER) {
+ sds client = catClientInfoString(sdsempty(),c);
+
+ /* Sample some protocol to given an idea about what was inside. */
+ char buf[256];
+ if (sdslen(c->querybuf)-c->qb_pos < PROTO_DUMP_LEN) {
+ snprintf(buf,sizeof(buf),"Query buffer during protocol error: '%s'", c->querybuf+c->qb_pos);
+ } else {
+ snprintf(buf,sizeof(buf),"Query buffer during protocol error: '%.*s' (... more %zu bytes ...) '%.*s'", PROTO_DUMP_LEN/2, c->querybuf+c->qb_pos, sdslen(c->querybuf)-c->qb_pos-PROTO_DUMP_LEN, PROTO_DUMP_LEN/2, c->querybuf+sdslen(c->querybuf)-PROTO_DUMP_LEN/2);
+ }
+
+ /* Remove non printable chars. */
+ char *p = buf;
+ while (*p != '\0') {
+ if (!isprint(*p)) *p = '.';
+ p++;
+ }
+
+ /* Log all the client and protocol info. */
+ int loglevel = (c->flags & CLIENT_MASTER) ? LL_WARNING :
+ LL_VERBOSE;
+ serverLog(loglevel,
+ "Protocol error (%s) from client: %s. %s", errstr, client, buf);
+ sdsfree(client);
+ }
+ c->flags |= (CLIENT_CLOSE_AFTER_REPLY|CLIENT_PROTOCOL_ERROR);
+}
+
+/* Process the query buffer for client 'c', setting up the client argument
+ * vector for command execution. Returns C_OK if after running the function
+ * the client has a well-formed ready to be processed command, otherwise
+ * C_ERR if there is still to read more buffer to get the full command.
+ * The function also returns C_ERR when there is a protocol error: in such a
+ * case the client structure is setup to reply with the error and close
+ * the connection.
+ *
+ * This function is called if processInputBuffer() detects that the next
+ * command is in RESP format, so the first byte in the command is found
+ * to be '*'. Otherwise for inline commands processInlineBuffer() is called. */
+int processMultibulkBuffer(client *c) {
+ char *newline = NULL;
+ int ok;
+ long long ll;
+
+ if (c->multibulklen == 0) {
+ /* The client should have been reset */
+ serverAssertWithInfo(c,NULL,c->argc == 0);
+
+ /* Multi bulk length cannot be read without a \r\n */
+ newline = strchr(c->querybuf+c->qb_pos,'\r');
+ if (newline == NULL) {
+ if (sdslen(c->querybuf)-c->qb_pos > PROTO_INLINE_MAX_SIZE) {
+ addReplyError(c,"Protocol error: too big mbulk count string");
+ setProtocolError("too big mbulk count string",c);
+ }
+ return C_ERR;
+ }
+
+ /* Buffer should also contain \n */
+ if (newline-(c->querybuf+c->qb_pos) > (ssize_t)(sdslen(c->querybuf)-c->qb_pos-2))
+ return C_ERR;
+
+ /* We know for sure there is a whole line since newline != NULL,
+ * so go ahead and find out the multi bulk length. */
+ serverAssertWithInfo(c,NULL,c->querybuf[c->qb_pos] == '*');
+ ok = string2ll(c->querybuf+1+c->qb_pos,newline-(c->querybuf+1+c->qb_pos),&ll);
+ if (!ok || ll > INT_MAX) {
+ addReplyError(c,"Protocol error: invalid multibulk length");
+ setProtocolError("invalid mbulk count",c);
+ return C_ERR;
+ } else if (ll > 10 && authRequired(c)) {
+ addReplyError(c, "Protocol error: unauthenticated multibulk length");
+ setProtocolError("unauth mbulk count", c);
+ return C_ERR;
+ }
+
+ c->qb_pos = (newline-c->querybuf)+2;
+
+ if (ll <= 0) return C_OK;
+
+ c->multibulklen = ll;
+
+ /* Setup argv array on client structure */
+ if (c->argv) zfree(c->argv);
+ c->argv_len = min(c->multibulklen, 1024);
+ c->argv = zmalloc(sizeof(robj*)*c->argv_len);
+ c->argv_len_sum = 0;
+ }
+
+ serverAssertWithInfo(c,NULL,c->multibulklen > 0);
+ while(c->multibulklen) {
+ /* Read bulk length if unknown */
+ if (c->bulklen == -1) {
+ newline = strchr(c->querybuf+c->qb_pos,'\r');
+ if (newline == NULL) {
+ if (sdslen(c->querybuf)-c->qb_pos > PROTO_INLINE_MAX_SIZE) {
+ addReplyError(c,
+ "Protocol error: too big bulk count string");
+ setProtocolError("too big bulk count string",c);
+ return C_ERR;
+ }
+ break;
+ }
+
+ /* Buffer should also contain \n */
+ if (newline-(c->querybuf+c->qb_pos) > (ssize_t)(sdslen(c->querybuf)-c->qb_pos-2))
+ break;
+
+ if (c->querybuf[c->qb_pos] != '$') {
+ addReplyErrorFormat(c,
+ "Protocol error: expected '$', got '%c'",
+ c->querybuf[c->qb_pos]);
+ setProtocolError("expected $ but got something else",c);
+ return C_ERR;
+ }
+
+ ok = string2ll(c->querybuf+c->qb_pos+1,newline-(c->querybuf+c->qb_pos+1),&ll);
+ if (!ok || ll < 0 ||
+ (!(c->flags & CLIENT_MASTER) && ll > server.proto_max_bulk_len)) {
+ addReplyError(c,"Protocol error: invalid bulk length");
+ setProtocolError("invalid bulk length",c);
+ return C_ERR;
+ } else if (ll > 16384 && authRequired(c)) {
+ addReplyError(c, "Protocol error: unauthenticated bulk length");
+ setProtocolError("unauth bulk length", c);
+ return C_ERR;
+ }
+
+ c->qb_pos = newline-c->querybuf+2;
+ if (!(c->flags & CLIENT_MASTER) && ll >= PROTO_MBULK_BIG_ARG) {
+ /* When the client is not a master client (because master
+ * client's querybuf can only be trimmed after data applied
+ * and sent to replicas).
+ *
+ * If we are going to read a large object from network
+ * try to make it likely that it will start at c->querybuf
+ * boundary so that we can optimize object creation
+ * avoiding a large copy of data.
+ *
+ * But only when the data we have not parsed is less than
+ * or equal to ll+2. If the data length is greater than
+ * ll+2, trimming querybuf is just a waste of time, because
+ * at this time the querybuf contains not only our bulk. */
+ if (sdslen(c->querybuf)-c->qb_pos <= (size_t)ll+2) {
+ sdsrange(c->querybuf,c->qb_pos,-1);
+ c->qb_pos = 0;
+ /* Hint the sds library about the amount of bytes this string is
+ * going to contain. */
+ c->querybuf = sdsMakeRoomForNonGreedy(c->querybuf,ll+2-sdslen(c->querybuf));
+ /* We later set the peak to the used portion of the buffer, but here we over
+ * allocated because we know what we need, make sure it'll not be shrunk before used. */
+ if (c->querybuf_peak < (size_t)ll + 2) c->querybuf_peak = ll + 2;
+ }
+ }
+ c->bulklen = ll;
+ }
+
+ /* Read bulk argument */
+ if (sdslen(c->querybuf)-c->qb_pos < (size_t)(c->bulklen+2)) {
+ /* Not enough data (+2 == trailing \r\n) */
+ break;
+ } else {
+ /* Check if we have space in argv, grow if needed */
+ if (c->argc >= c->argv_len) {
+ c->argv_len = min(c->argv_len < INT_MAX/2 ? c->argv_len*2 : INT_MAX, c->argc+c->multibulklen);
+ c->argv = zrealloc(c->argv, sizeof(robj*)*c->argv_len);
+ }
+
+ /* Optimization: if a non-master client's buffer contains JUST our bulk element
+ * instead of creating a new object by *copying* the sds we
+ * just use the current sds string. */
+ if (!(c->flags & CLIENT_MASTER) &&
+ c->qb_pos == 0 &&
+ c->bulklen >= PROTO_MBULK_BIG_ARG &&
+ sdslen(c->querybuf) == (size_t)(c->bulklen+2))
+ {
+ c->argv[c->argc++] = createObject(OBJ_STRING,c->querybuf);
+ c->argv_len_sum += c->bulklen;
+ sdsIncrLen(c->querybuf,-2); /* remove CRLF */
+ /* Assume that if we saw a fat argument we'll see another one
+ * likely... */
+ c->querybuf = sdsnewlen(SDS_NOINIT,c->bulklen+2);
+ sdsclear(c->querybuf);
+ } else {
+ c->argv[c->argc++] =
+ createStringObject(c->querybuf+c->qb_pos,c->bulklen);
+ c->argv_len_sum += c->bulklen;
+ c->qb_pos += c->bulklen+2;
+ }
+ c->bulklen = -1;
+ c->multibulklen--;
+ }
+ }
+
+ /* We're done when c->multibulk == 0 */
+ if (c->multibulklen == 0) return C_OK;
+
+ /* Still not ready to process the command */
+ return C_ERR;
+}
+
+/* Perform necessary tasks after a command was executed:
+ *
+ * 1. The client is reset unless there are reasons to avoid doing it.
+ * 2. In the case of master clients, the replication offset is updated.
+ * 3. Propagate commands we got from our master to replicas down the line. */
+void commandProcessed(client *c) {
+ /* If client is blocked(including paused), just return avoid reset and replicate.
+ *
+ * 1. Don't reset the client structure for blocked clients, so that the reply
+ * callback will still be able to access the client argv and argc fields.
+ * The client will be reset in unblockClient().
+ * 2. Don't update replication offset or propagate commands to replicas,
+ * since we have not applied the command. */
+ if (c->flags & CLIENT_BLOCKED) return;
+
+ reqresAppendResponse(c);
+ resetClient(c);
+
+ long long prev_offset = c->reploff;
+ if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) {
+ /* Update the applied replication offset of our master. */
+ c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos;
+ }
+
+ /* If the client is a master we need to compute the difference
+ * between the applied offset before and after processing the buffer,
+ * to understand how much of the replication stream was actually
+ * applied to the master state: this quantity, and its corresponding
+ * part of the replication stream, will be propagated to the
+ * sub-replicas and to the replication backlog. */
+ if (c->flags & CLIENT_MASTER) {
+ long long applied = c->reploff - prev_offset;
+ if (applied) {
+ replicationFeedStreamFromMasterStream(c->querybuf+c->repl_applied,applied);
+ c->repl_applied += applied;
+ }
+ }
+}
+
+/* This function calls processCommand(), but also performs a few sub tasks
+ * for the client that are useful in that context:
+ *
+ * 1. It sets the current client to the client 'c'.
+ * 2. calls commandProcessed() if the command was handled.
+ *
+ * The function returns C_ERR in case the client was freed as a side effect
+ * of processing the command, otherwise C_OK is returned. */
+int processCommandAndResetClient(client *c) {
+ int deadclient = 0;
+ client *old_client = server.current_client;
+ server.current_client = c;
+ if (processCommand(c) == C_OK) {
+ commandProcessed(c);
+ /* Update the client's memory to include output buffer growth following the
+ * processed command. */
+ updateClientMemUsageAndBucket(c);
+ }
+
+ if (server.current_client == NULL) deadclient = 1;
+ /*
+ * Restore the old client, this is needed because when a script
+ * times out, we will get into this code from processEventsWhileBlocked.
+ * Which will cause to set the server.current_client. If not restored
+ * we will return 1 to our caller which will falsely indicate the client
+ * is dead and will stop reading from its buffer.
+ */
+ server.current_client = old_client;
+ /* performEvictions may flush slave output buffers. This may
+ * result in a slave, that may be the active client, to be
+ * freed. */
+ return deadclient ? C_ERR : C_OK;
+}
+
+
+/* This function will execute any fully parsed commands pending on
+ * the client. Returns C_ERR if the client is no longer valid after executing
+ * the command, and C_OK for all other cases. */
+int processPendingCommandAndInputBuffer(client *c) {
+ /* Notice, this code is also called from 'processUnblockedClients'.
+ * But in case of a module blocked client (see RM_Call 'K' flag) we do not reach this code path.
+ * So whenever we change the code here we need to consider if we need this change on module
+ * blocked client as well */
+ if (c->flags & CLIENT_PENDING_COMMAND) {
+ c->flags &= ~CLIENT_PENDING_COMMAND;
+ if (processCommandAndResetClient(c) == C_ERR) {
+ return C_ERR;
+ }
+ }
+
+ /* Now process client if it has more data in it's buffer.
+ *
+ * Note: when a master client steps into this function,
+ * it can always satisfy this condition, because its querybuf
+ * contains data not applied. */
+ if (c->querybuf && sdslen(c->querybuf) > 0) {
+ return processInputBuffer(c);
+ }
+ return C_OK;
+}
+
+/* This function is called every time, in the client structure 'c', there is
+ * more query buffer to process, because we read more data from the socket
+ * or because a client was blocked and later reactivated, so there could be
+ * pending query buffer, already representing a full command, to process.
+ * return C_ERR in case the client was freed during the processing */
+int processInputBuffer(client *c) {
+ /* Keep processing while there is something in the input buffer */
+ while(c->qb_pos < sdslen(c->querybuf)) {
+ /* Immediately abort if the client is in the middle of something. */
+ if (c->flags & CLIENT_BLOCKED) break;
+
+ /* Don't process more buffers from clients that have already pending
+ * commands to execute in c->argv. */
+ if (c->flags & CLIENT_PENDING_COMMAND) break;
+
+ /* Don't process input from the master while there is a busy script
+ * condition on the slave. We want just to accumulate the replication
+ * stream (instead of replying -BUSY like we do with other clients) and
+ * later resume the processing. */
+ if (isInsideYieldingLongCommand() && c->flags & CLIENT_MASTER) break;
+
+ /* CLIENT_CLOSE_AFTER_REPLY closes the connection once the reply is
+ * written to the client. Make sure to not let the reply grow after
+ * this flag has been set (i.e. don't process more commands).
+ *
+ * The same applies for clients we want to terminate ASAP. */
+ if (c->flags & (CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP)) break;
+
+ /* Determine request type when unknown. */
+ if (!c->reqtype) {
+ if (c->querybuf[c->qb_pos] == '*') {
+ c->reqtype = PROTO_REQ_MULTIBULK;
+ } else {
+ c->reqtype = PROTO_REQ_INLINE;
+ }
+ }
+
+ if (c->reqtype == PROTO_REQ_INLINE) {
+ if (processInlineBuffer(c) != C_OK) break;
+ } else if (c->reqtype == PROTO_REQ_MULTIBULK) {
+ if (processMultibulkBuffer(c) != C_OK) break;
+ } else {
+ serverPanic("Unknown request type");
+ }
+
+ /* Multibulk processing could see a <= 0 length. */
+ if (c->argc == 0) {
+ resetClient(c);
+ } else {
+ /* If we are in the context of an I/O thread, we can't really
+ * execute the command here. All we can do is to flag the client
+ * as one that needs to process the command. */
+ if (io_threads_op != IO_THREADS_OP_IDLE) {
+ serverAssert(io_threads_op == IO_THREADS_OP_READ);
+ c->flags |= CLIENT_PENDING_COMMAND;
+ break;
+ }
+
+ /* We are finally ready to execute the command. */
+ if (processCommandAndResetClient(c) == C_ERR) {
+ /* If the client is no longer valid, we avoid exiting this
+ * loop and trimming the client buffer later. So we return
+ * ASAP in that case. */
+ return C_ERR;
+ }
+ }
+ }
+
+ if (c->flags & CLIENT_MASTER) {
+ /* If the client is a master, trim the querybuf to repl_applied,
+ * since master client is very special, its querybuf not only
+ * used to parse command, but also proxy to sub-replicas.
+ *
+ * Here are some scenarios we cannot trim to qb_pos:
+ * 1. we don't receive complete command from master
+ * 2. master client blocked cause of client pause
+ * 3. io threads operate read, master client flagged with CLIENT_PENDING_COMMAND
+ *
+ * In these scenarios, qb_pos points to the part of the current command
+ * or the beginning of next command, and the current command is not applied yet,
+ * so the repl_applied is not equal to qb_pos. */
+ if (c->repl_applied) {
+ sdsrange(c->querybuf,c->repl_applied,-1);
+ c->qb_pos -= c->repl_applied;
+ c->repl_applied = 0;
+ }
+ } else if (c->qb_pos) {
+ /* Trim to pos */
+ sdsrange(c->querybuf,c->qb_pos,-1);
+ c->qb_pos = 0;
+ }
+
+ /* Update client memory usage after processing the query buffer, this is
+ * important in case the query buffer is big and wasn't drained during
+ * the above loop (because of partially sent big commands). */
+ if (io_threads_op == IO_THREADS_OP_IDLE)
+ updateClientMemUsageAndBucket(c);
+
+ return C_OK;
+}
+
+void readQueryFromClient(connection *conn) {
+ client *c = connGetPrivateData(conn);
+ int nread, big_arg = 0;
+ size_t qblen, readlen;
+
+ /* Check if we want to read from the client later when exiting from
+ * the event loop. This is the case if threaded I/O is enabled. */
+ if (postponeClientRead(c)) return;
+
+ /* Update total number of reads on server */
+ atomicIncr(server.stat_total_reads_processed, 1);
+
+ readlen = PROTO_IOBUF_LEN;
+ /* If this is a multi bulk request, and we are processing a bulk reply
+ * that is large enough, try to maximize the probability that the query
+ * buffer contains exactly the SDS string representing the object, even
+ * at the risk of requiring more read(2) calls. This way the function
+ * processMultiBulkBuffer() can avoid copying buffers to create the
+ * Redis Object representing the argument. */
+ if (c->reqtype == PROTO_REQ_MULTIBULK && c->multibulklen && c->bulklen != -1
+ && c->bulklen >= PROTO_MBULK_BIG_ARG)
+ {
+ ssize_t remaining = (size_t)(c->bulklen+2)-(sdslen(c->querybuf)-c->qb_pos);
+ big_arg = 1;
+
+ /* Note that the 'remaining' variable may be zero in some edge case,
+ * for example once we resume a blocked client after CLIENT PAUSE. */
+ if (remaining > 0) readlen = remaining;
+
+ /* Master client needs expand the readlen when meet BIG_ARG(see #9100),
+ * but doesn't need align to the next arg, we can read more data. */
+ if (c->flags & CLIENT_MASTER && readlen < PROTO_IOBUF_LEN)
+ readlen = PROTO_IOBUF_LEN;
+ }
+
+ qblen = sdslen(c->querybuf);
+ if (!(c->flags & CLIENT_MASTER) && // master client's querybuf can grow greedy.
+ (big_arg || sdsalloc(c->querybuf) < PROTO_IOBUF_LEN)) {
+ /* When reading a BIG_ARG we won't be reading more than that one arg
+ * into the query buffer, so we don't need to pre-allocate more than we
+ * need, so using the non-greedy growing. For an initial allocation of
+ * the query buffer, we also don't wanna use the greedy growth, in order
+ * to avoid collision with the RESIZE_THRESHOLD mechanism. */
+ c->querybuf = sdsMakeRoomForNonGreedy(c->querybuf, readlen);
+ /* We later set the peak to the used portion of the buffer, but here we over
+ * allocated because we know what we need, make sure it'll not be shrunk before used. */
+ if (c->querybuf_peak < qblen + readlen) c->querybuf_peak = qblen + readlen;
+ } else {
+ c->querybuf = sdsMakeRoomFor(c->querybuf, readlen);
+
+ /* Read as much as possible from the socket to save read(2) system calls. */
+ readlen = sdsavail(c->querybuf);
+ }
+ nread = connRead(c->conn, c->querybuf+qblen, readlen);
+ if (nread == -1) {
+ if (connGetState(conn) == CONN_STATE_CONNECTED) {
+ return;
+ } else {
+ serverLog(LL_VERBOSE, "Reading from client: %s",connGetLastError(c->conn));
+ freeClientAsync(c);
+ goto done;
+ }
+ } else if (nread == 0) {
+ if (server.verbosity <= LL_VERBOSE) {
+ sds info = catClientInfoString(sdsempty(), c);
+ serverLog(LL_VERBOSE, "Client closed connection %s", info);
+ sdsfree(info);
+ }
+ freeClientAsync(c);
+ goto done;
+ }
+
+ sdsIncrLen(c->querybuf,nread);
+ qblen = sdslen(c->querybuf);
+ if (c->querybuf_peak < qblen) c->querybuf_peak = qblen;
+
+ c->lastinteraction = server.unixtime;
+ if (c->flags & CLIENT_MASTER) {
+ c->read_reploff += nread;
+ atomicIncr(server.stat_net_repl_input_bytes, nread);
+ } else {
+ atomicIncr(server.stat_net_input_bytes, nread);
+ }
+
+ if (!(c->flags & CLIENT_MASTER) && sdslen(c->querybuf) > server.client_max_querybuf_len) {
+ sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty();
+
+ bytes = sdscatrepr(bytes,c->querybuf,64);
+ serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes);
+ sdsfree(ci);
+ sdsfree(bytes);
+ freeClientAsync(c);
+ goto done;
+ }
+
+ /* There is more data in the client input buffer, continue parsing it
+ * and check if there is a full command to execute. */
+ if (processInputBuffer(c) == C_ERR)
+ c = NULL;
+
+done:
+ beforeNextClient(c);
+}
+
+/* A Redis "Address String" is a colon separated ip:port pair.
+ * For IPv4 it's in the form x.y.z.k:port, example: "127.0.0.1:1234".
+ * For IPv6 addresses we use [] around the IP part, like in "[::1]:1234".
+ * For Unix sockets we use path:0, like in "/tmp/redis:0".
+ *
+ * An Address String always fits inside a buffer of NET_ADDR_STR_LEN bytes,
+ * including the null term.
+ *
+ * On failure the function still populates 'addr' with the "?:0" string in case
+ * you want to relax error checking or need to display something anyway (see
+ * anetFdToString implementation for more info). */
+void genClientAddrString(client *client, char *addr,
+ size_t addr_len, int remote) {
+ if (client->flags & CLIENT_UNIX_SOCKET) {
+ /* Unix socket client. */
+ snprintf(addr,addr_len,"%s:0",server.unixsocket);
+ } else {
+ /* TCP client. */
+ connFormatAddr(client->conn,addr,addr_len,remote);
+ }
+}
+
+/* This function returns the client peer id, by creating and caching it
+ * if client->peerid is NULL, otherwise returning the cached value.
+ * The Peer ID never changes during the life of the client, however it
+ * is expensive to compute. */
+char *getClientPeerId(client *c) {
+ char peerid[NET_ADDR_STR_LEN] = {0};
+
+ if (c->peerid == NULL) {
+ genClientAddrString(c,peerid,sizeof(peerid),1);
+ c->peerid = sdsnew(peerid);
+ }
+ return c->peerid;
+}
+
+/* This function returns the client bound socket name, by creating and caching
+ * it if client->sockname is NULL, otherwise returning the cached value.
+ * The Socket Name never changes during the life of the client, however it
+ * is expensive to compute. */
+char *getClientSockname(client *c) {
+ char sockname[NET_ADDR_STR_LEN] = {0};
+
+ if (c->sockname == NULL) {
+ genClientAddrString(c,sockname,sizeof(sockname),0);
+ c->sockname = sdsnew(sockname);
+ }
+ return c->sockname;
+}
+
+/* Concatenate a string representing the state of a client in a human
+ * readable format, into the sds string 's'. */
+sds catClientInfoString(sds s, client *client) {
+ char flags[17], events[3], conninfo[CONN_INFO_LEN], *p;
+
+ p = flags;
+ if (client->flags & CLIENT_SLAVE) {
+ if (client->flags & CLIENT_MONITOR)
+ *p++ = 'O';
+ else
+ *p++ = 'S';
+ }
+ if (client->flags & CLIENT_MASTER) *p++ = 'M';
+ if (client->flags & CLIENT_PUBSUB) *p++ = 'P';
+ if (client->flags & CLIENT_MULTI) *p++ = 'x';
+ if (client->flags & CLIENT_BLOCKED) *p++ = 'b';
+ if (client->flags & CLIENT_TRACKING) *p++ = 't';
+ if (client->flags & CLIENT_TRACKING_BROKEN_REDIR) *p++ = 'R';
+ if (client->flags & CLIENT_TRACKING_BCAST) *p++ = 'B';
+ if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd';
+ if (client->flags & CLIENT_CLOSE_AFTER_REPLY) *p++ = 'c';
+ if (client->flags & CLIENT_UNBLOCKED) *p++ = 'u';
+ if (client->flags & CLIENT_CLOSE_ASAP) *p++ = 'A';
+ if (client->flags & CLIENT_UNIX_SOCKET) *p++ = 'U';
+ if (client->flags & CLIENT_READONLY) *p++ = 'r';
+ if (client->flags & CLIENT_NO_EVICT) *p++ = 'e';
+ if (client->flags & CLIENT_NO_TOUCH) *p++ = 'T';
+ if (p == flags) *p++ = 'N';
+ *p++ = '\0';
+
+ p = events;
+ if (client->conn) {
+ if (connHasReadHandler(client->conn)) *p++ = 'r';
+ if (connHasWriteHandler(client->conn)) *p++ = 'w';
+ }
+ *p = '\0';
+
+ /* Compute the total memory consumed by this client. */
+ size_t obufmem, total_mem = getClientMemoryUsage(client, &obufmem);
+
+ size_t used_blocks_of_repl_buf = 0;
+ if (client->ref_repl_buf_node) {
+ replBufBlock *last = listNodeValue(listLast(server.repl_buffer_blocks));
+ replBufBlock *cur = listNodeValue(client->ref_repl_buf_node);
+ used_blocks_of_repl_buf = last->id - cur->id + 1;
+ }
+
+ sds ret = sdscatfmt(s,
+ "id=%U addr=%s laddr=%s %s name=%s age=%I idle=%I flags=%s db=%i sub=%i psub=%i ssub=%i multi=%i qbuf=%U qbuf-free=%U argv-mem=%U multi-mem=%U rbs=%U rbp=%U obl=%U oll=%U omem=%U tot-mem=%U events=%s cmd=%s user=%s redir=%I resp=%i lib-name=%s lib-ver=%s",
+ (unsigned long long) client->id,
+ getClientPeerId(client),
+ getClientSockname(client),
+ connGetInfo(client->conn, conninfo, sizeof(conninfo)),
+ client->name ? (char*)client->name->ptr : "",
+ (long long)(server.unixtime - client->ctime),
+ (long long)(server.unixtime - client->lastinteraction),
+ flags,
+ client->db->id,
+ (int) dictSize(client->pubsub_channels),
+ (int) dictSize(client->pubsub_patterns),
+ (int) dictSize(client->pubsubshard_channels),
+ (client->flags & CLIENT_MULTI) ? client->mstate.count : -1,
+ (unsigned long long) sdslen(client->querybuf),
+ (unsigned long long) sdsavail(client->querybuf),
+ (unsigned long long) client->argv_len_sum,
+ (unsigned long long) client->mstate.argv_len_sums,
+ (unsigned long long) client->buf_usable_size,
+ (unsigned long long) client->buf_peak,
+ (unsigned long long) client->bufpos,
+ (unsigned long long) listLength(client->reply) + used_blocks_of_repl_buf,
+ (unsigned long long) obufmem, /* should not include client->buf since we want to see 0 for static clients. */
+ (unsigned long long) total_mem,
+ events,
+ client->lastcmd ? client->lastcmd->fullname : "NULL",
+ client->user ? client->user->name : "(superuser)",
+ (client->flags & CLIENT_TRACKING) ? (long long) client->client_tracking_redirection : -1,
+ client->resp,
+ client->lib_name ? (char*)client->lib_name->ptr : "",
+ client->lib_ver ? (char*)client->lib_ver->ptr : ""
+ );
+ return ret;
+}
+
+sds getAllClientsInfoString(int type) {
+ listNode *ln;
+ listIter li;
+ client *client;
+ sds o = sdsnewlen(SDS_NOINIT,200*listLength(server.clients));
+ sdsclear(o);
+ listRewind(server.clients,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ client = listNodeValue(ln);
+ if (type != -1 && getClientType(client) != type) continue;
+ o = catClientInfoString(o,client);
+ o = sdscatlen(o,"\n",1);
+ }
+ return o;
+}
+
+/* Check validity of an attribute that's gonna be shown in CLIENT LIST. */
+int validateClientAttr(const char *val) {
+ /* Check if the charset is ok. We need to do this otherwise
+ * CLIENT LIST format will break. You should always be able to
+ * split by space to get the different fields. */
+ while (*val) {
+ if (*val < '!' || *val > '~') { /* ASCII is assumed. */
+ return C_ERR;
+ }
+ val++;
+ }
+ return C_OK;
+}
+
+/* Returns C_OK if the name is valid. Returns C_ERR & sets `err` (when provided) otherwise. */
+int validateClientName(robj *name, const char **err) {
+ const char *err_msg = "Client names cannot contain spaces, newlines or special characters.";
+ int len = (name != NULL) ? sdslen(name->ptr) : 0;
+ /* We allow setting the client name to an empty string. */
+ if (len == 0)
+ return C_OK;
+ if (validateClientAttr(name->ptr) == C_ERR) {
+ if (err) *err = err_msg;
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+/* Returns C_OK if the name has been set or C_ERR if the name is invalid. */
+int clientSetName(client *c, robj *name, const char **err) {
+ if (validateClientName(name, err) == C_ERR) {
+ return C_ERR;
+ }
+ int len = (name != NULL) ? sdslen(name->ptr) : 0;
+ /* Setting the client name to an empty string actually removes
+ * the current name. */
+ if (len == 0) {
+ if (c->name) decrRefCount(c->name);
+ c->name = NULL;
+ return C_OK;
+ }
+ if (c->name) decrRefCount(c->name);
+ c->name = name;
+ incrRefCount(name);
+ return C_OK;
+}
+
+/* This function implements CLIENT SETNAME, including replying to the
+ * user with an error if the charset is wrong (in that case C_ERR is
+ * returned). If the function succeeded C_OK is returned, and it's up
+ * to the caller to send a reply if needed.
+ *
+ * Setting an empty string as name has the effect of unsetting the
+ * currently set name: the client will remain unnamed.
+ *
+ * This function is also used to implement the HELLO SETNAME option. */
+int clientSetNameOrReply(client *c, robj *name) {
+ const char *err = NULL;
+ int result = clientSetName(c, name, &err);
+ if (result == C_ERR) {
+ addReplyError(c, err);
+ }
+ return result;
+}
+
+/* Set client or connection related info */
+void clientSetinfoCommand(client *c) {
+ sds attr = c->argv[2]->ptr;
+ robj *valob = c->argv[3];
+ sds val = valob->ptr;
+ robj **destvar = NULL;
+ if (!strcasecmp(attr,"lib-name")) {
+ destvar = &c->lib_name;
+ } else if (!strcasecmp(attr,"lib-ver")) {
+ destvar = &c->lib_ver;
+ } else {
+ addReplyErrorFormat(c,"Unrecognized option '%s'", attr);
+ return;
+ }
+
+ if (validateClientAttr(val)==C_ERR) {
+ addReplyErrorFormat(c,
+ "%s cannot contain spaces, newlines or special characters.", attr);
+ return;
+ }
+ if (*destvar) decrRefCount(*destvar);
+ if (sdslen(val)) {
+ *destvar = valob;
+ incrRefCount(valob);
+ } else
+ *destvar = NULL;
+ addReply(c,shared.ok);
+}
+
+/* Reset the client state to resemble a newly connected client.
+ */
+void resetCommand(client *c) {
+ /* MONITOR clients are also marked with CLIENT_SLAVE, we need to
+ * distinguish between the two.
+ */
+ uint64_t flags = c->flags;
+ if (flags & CLIENT_MONITOR) flags &= ~(CLIENT_MONITOR|CLIENT_SLAVE);
+
+ if (flags & (CLIENT_SLAVE|CLIENT_MASTER|CLIENT_MODULE)) {
+ addReplyError(c,"can only reset normal client connections");
+ return;
+ }
+
+ clearClientConnectionState(c);
+ addReplyStatus(c,"RESET");
+}
+
+/* Disconnect the current client */
+void quitCommand(client *c) {
+ addReply(c,shared.ok);
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+}
+
+void clientCommand(client *c) {
+ listNode *ln;
+ listIter li;
+
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"CACHING (YES|NO)",
+" Enable/disable tracking of the keys for next command in OPTIN/OPTOUT modes.",
+"GETREDIR",
+" Return the client ID we are redirecting to when tracking is enabled.",
+"GETNAME",
+" Return the name of the current connection.",
+"ID",
+" Return the ID of the current connection.",
+"INFO",
+" Return information about the current client connection.",
+"KILL <ip:port>",
+" Kill connection made from <ip:port>.",
+"KILL <option> <value> [<option> <value> [...]]",
+" Kill connections. Options are:",
+" * ADDR (<ip:port>|<unixsocket>:0)",
+" Kill connections made from the specified address",
+" * LADDR (<ip:port>|<unixsocket>:0)",
+" Kill connections made to specified local address",
+" * TYPE (NORMAL|MASTER|REPLICA|PUBSUB)",
+" Kill connections by type.",
+" * USER <username>",
+" Kill connections authenticated by <username>.",
+" * SKIPME (YES|NO)",
+" Skip killing current connection (default: yes).",
+"LIST [options ...]",
+" Return information about client connections. Options:",
+" * TYPE (NORMAL|MASTER|REPLICA|PUBSUB)",
+" Return clients of specified type.",
+"UNPAUSE",
+" Stop the current client pause, resuming traffic.",
+"PAUSE <timeout> [WRITE|ALL]",
+" Suspend all, or just write, clients for <timeout> milliseconds.",
+"REPLY (ON|OFF|SKIP)",
+" Control the replies sent to the current connection.",
+"SETNAME <name>",
+" Assign the name <name> to the current connection.",
+"SETINFO <option> <value>",
+" Set client meta attr. Options are:",
+" * LIB-NAME: the client lib name.",
+" * LIB-VER: the client lib version.",
+"UNBLOCK <clientid> [TIMEOUT|ERROR]",
+" Unblock the specified blocked client.",
+"TRACKING (ON|OFF) [REDIRECT <id>] [BCAST] [PREFIX <prefix> [...]]",
+" [OPTIN] [OPTOUT] [NOLOOP]",
+" Control server assisted client side caching.",
+"TRACKINGINFO",
+" Report tracking status for the current connection.",
+"NO-EVICT (ON|OFF)",
+" Protect current client connection from eviction.",
+"NO-TOUCH (ON|OFF)",
+" Will not touch LRU/LFU stats when this mode is on.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"id") && c->argc == 2) {
+ /* CLIENT ID */
+ addReplyLongLong(c,c->id);
+ } else if (!strcasecmp(c->argv[1]->ptr,"info") && c->argc == 2) {
+ /* CLIENT INFO */
+ sds o = catClientInfoString(sdsempty(), c);
+ o = sdscatlen(o,"\n",1);
+ addReplyVerbatim(c,o,sdslen(o),"txt");
+ sdsfree(o);
+ } else if (!strcasecmp(c->argv[1]->ptr,"list")) {
+ /* CLIENT LIST */
+ int type = -1;
+ sds o = NULL;
+ if (c->argc == 4 && !strcasecmp(c->argv[2]->ptr,"type")) {
+ type = getClientTypeByName(c->argv[3]->ptr);
+ if (type == -1) {
+ addReplyErrorFormat(c,"Unknown client type '%s'",
+ (char*) c->argv[3]->ptr);
+ return;
+ }
+ } else if (c->argc > 3 && !strcasecmp(c->argv[2]->ptr,"id")) {
+ int j;
+ o = sdsempty();
+ for (j = 3; j < c->argc; j++) {
+ long long cid;
+ if (getLongLongFromObjectOrReply(c, c->argv[j], &cid,
+ "Invalid client ID")) {
+ sdsfree(o);
+ return;
+ }
+ client *cl = lookupClientByID(cid);
+ if (cl) {
+ o = catClientInfoString(o, cl);
+ o = sdscatlen(o, "\n", 1);
+ }
+ }
+ } else if (c->argc != 2) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ if (!o)
+ o = getAllClientsInfoString(type);
+ addReplyVerbatim(c,o,sdslen(o),"txt");
+ sdsfree(o);
+ } else if (!strcasecmp(c->argv[1]->ptr,"reply") && c->argc == 3) {
+ /* CLIENT REPLY ON|OFF|SKIP */
+ if (!strcasecmp(c->argv[2]->ptr,"on")) {
+ c->flags &= ~(CLIENT_REPLY_SKIP|CLIENT_REPLY_OFF);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"off")) {
+ c->flags |= CLIENT_REPLY_OFF;
+ } else if (!strcasecmp(c->argv[2]->ptr,"skip")) {
+ if (!(c->flags & CLIENT_REPLY_OFF))
+ c->flags |= CLIENT_REPLY_SKIP_NEXT;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"no-evict") && c->argc == 3) {
+ /* CLIENT NO-EVICT ON|OFF */
+ if (!strcasecmp(c->argv[2]->ptr,"on")) {
+ c->flags |= CLIENT_NO_EVICT;
+ removeClientFromMemUsageBucket(c, 0);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"off")) {
+ c->flags &= ~CLIENT_NO_EVICT;
+ updateClientMemUsageAndBucket(c);
+ addReply(c,shared.ok);
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"kill")) {
+ /* CLIENT KILL <ip:port>
+ * CLIENT KILL <option> [value] ... <option> [value] */
+ char *addr = NULL;
+ char *laddr = NULL;
+ user *user = NULL;
+ int type = -1;
+ uint64_t id = 0;
+ int skipme = 1;
+ int killed = 0, close_this_client = 0;
+
+ if (c->argc == 3) {
+ /* Old style syntax: CLIENT KILL <addr> */
+ addr = c->argv[2]->ptr;
+ skipme = 0; /* With the old form, you can kill yourself. */
+ } else if (c->argc > 3) {
+ int i = 2; /* Next option index. */
+
+ /* New style syntax: parse options. */
+ while(i < c->argc) {
+ int moreargs = c->argc > i+1;
+
+ if (!strcasecmp(c->argv[i]->ptr,"id") && moreargs) {
+ long tmp;
+
+ if (getRangeLongFromObjectOrReply(c, c->argv[i+1], 1, LONG_MAX, &tmp,
+ "client-id should be greater than 0") != C_OK)
+ return;
+ id = tmp;
+ } else if (!strcasecmp(c->argv[i]->ptr,"type") && moreargs) {
+ type = getClientTypeByName(c->argv[i+1]->ptr);
+ if (type == -1) {
+ addReplyErrorFormat(c,"Unknown client type '%s'",
+ (char*) c->argv[i+1]->ptr);
+ return;
+ }
+ } else if (!strcasecmp(c->argv[i]->ptr,"addr") && moreargs) {
+ addr = c->argv[i+1]->ptr;
+ } else if (!strcasecmp(c->argv[i]->ptr,"laddr") && moreargs) {
+ laddr = c->argv[i+1]->ptr;
+ } else if (!strcasecmp(c->argv[i]->ptr,"user") && moreargs) {
+ user = ACLGetUserByName(c->argv[i+1]->ptr,
+ sdslen(c->argv[i+1]->ptr));
+ if (user == NULL) {
+ addReplyErrorFormat(c,"No such user '%s'",
+ (char*) c->argv[i+1]->ptr);
+ return;
+ }
+ } else if (!strcasecmp(c->argv[i]->ptr,"skipme") && moreargs) {
+ if (!strcasecmp(c->argv[i+1]->ptr,"yes")) {
+ skipme = 1;
+ } else if (!strcasecmp(c->argv[i+1]->ptr,"no")) {
+ skipme = 0;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ i += 2;
+ }
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Iterate clients killing all the matching clients. */
+ listRewind(server.clients,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *client = listNodeValue(ln);
+ if (addr && strcmp(getClientPeerId(client),addr) != 0) continue;
+ if (laddr && strcmp(getClientSockname(client),laddr) != 0) continue;
+ if (type != -1 && getClientType(client) != type) continue;
+ if (id != 0 && client->id != id) continue;
+ if (user && client->user != user) continue;
+ if (c == client && skipme) continue;
+
+ /* Kill it. */
+ if (c == client) {
+ close_this_client = 1;
+ } else {
+ freeClient(client);
+ }
+ killed++;
+ }
+
+ /* Reply according to old/new format. */
+ if (c->argc == 3) {
+ if (killed == 0)
+ addReplyError(c,"No such client");
+ else
+ addReply(c,shared.ok);
+ } else {
+ addReplyLongLong(c,killed);
+ }
+
+ /* If this client has to be closed, flag it as CLOSE_AFTER_REPLY
+ * only after we queued the reply to its output buffers. */
+ if (close_this_client) c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ } else if (!strcasecmp(c->argv[1]->ptr,"unblock") && (c->argc == 3 ||
+ c->argc == 4))
+ {
+ /* CLIENT UNBLOCK <id> [timeout|error] */
+ long long id;
+ int unblock_error = 0;
+
+ if (c->argc == 4) {
+ if (!strcasecmp(c->argv[3]->ptr,"timeout")) {
+ unblock_error = 0;
+ } else if (!strcasecmp(c->argv[3]->ptr,"error")) {
+ unblock_error = 1;
+ } else {
+ addReplyError(c,
+ "CLIENT UNBLOCK reason should be TIMEOUT or ERROR");
+ return;
+ }
+ }
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&id,NULL)
+ != C_OK) return;
+ struct client *target = lookupClientByID(id);
+ /* Note that we never try to unblock a client blocked on a module command, which
+ * doesn't have a timeout callback (even in the case of UNBLOCK ERROR).
+ * The reason is that we assume that if a command doesn't expect to be timedout,
+ * it also doesn't expect to be unblocked by CLIENT UNBLOCK */
+ if (target && target->flags & CLIENT_BLOCKED && moduleBlockedClientMayTimeout(target)) {
+ if (unblock_error)
+ unblockClientOnError(target,
+ "-UNBLOCKED client unblocked via CLIENT UNBLOCK");
+ else
+ unblockClientOnTimeout(target);
+
+ addReply(c,shared.cone);
+ } else {
+ addReply(c,shared.czero);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"setname") && c->argc == 3) {
+ /* CLIENT SETNAME */
+ if (clientSetNameOrReply(c,c->argv[2]) == C_OK)
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"getname") && c->argc == 2) {
+ /* CLIENT GETNAME */
+ if (c->name)
+ addReplyBulk(c,c->name);
+ else
+ addReplyNull(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"unpause") && c->argc == 2) {
+ /* CLIENT UNPAUSE */
+ unpauseActions(PAUSE_BY_CLIENT_COMMAND);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"pause") && (c->argc == 3 ||
+ c->argc == 4))
+ {
+ /* CLIENT PAUSE TIMEOUT [WRITE|ALL] */
+ mstime_t end;
+ int isPauseClientAll = 1;
+ if (c->argc == 4) {
+ if (!strcasecmp(c->argv[3]->ptr,"write")) {
+ isPauseClientAll = 0;
+ } else if (strcasecmp(c->argv[3]->ptr,"all")) {
+ addReplyError(c,
+ "CLIENT PAUSE mode must be WRITE or ALL");
+ return;
+ }
+ }
+
+ if (getTimeoutFromObjectOrReply(c,c->argv[2],&end,
+ UNIT_MILLISECONDS) != C_OK) return;
+ pauseClientsByClient(end, isPauseClientAll);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"tracking") && c->argc >= 3) {
+ /* CLIENT TRACKING (on|off) [REDIRECT <id>] [BCAST] [PREFIX first]
+ * [PREFIX second] [OPTIN] [OPTOUT] [NOLOOP]... */
+ long long redir = 0;
+ uint64_t options = 0;
+ robj **prefix = NULL;
+ size_t numprefix = 0;
+
+ /* Parse the options. */
+ for (int j = 3; j < c->argc; j++) {
+ int moreargs = (c->argc-1) - j;
+
+ if (!strcasecmp(c->argv[j]->ptr,"redirect") && moreargs) {
+ j++;
+ if (redir != 0) {
+ addReplyError(c,"A client can only redirect to a single "
+ "other client");
+ zfree(prefix);
+ return;
+ }
+
+ if (getLongLongFromObjectOrReply(c,c->argv[j],&redir,NULL) !=
+ C_OK)
+ {
+ zfree(prefix);
+ return;
+ }
+ /* We will require the client with the specified ID to exist
+ * right now, even if it is possible that it gets disconnected
+ * later. Still a valid sanity check. */
+ if (lookupClientByID(redir) == NULL) {
+ addReplyError(c,"The client ID you want redirect to "
+ "does not exist");
+ zfree(prefix);
+ return;
+ }
+ } else if (!strcasecmp(c->argv[j]->ptr,"bcast")) {
+ options |= CLIENT_TRACKING_BCAST;
+ } else if (!strcasecmp(c->argv[j]->ptr,"optin")) {
+ options |= CLIENT_TRACKING_OPTIN;
+ } else if (!strcasecmp(c->argv[j]->ptr,"optout")) {
+ options |= CLIENT_TRACKING_OPTOUT;
+ } else if (!strcasecmp(c->argv[j]->ptr,"noloop")) {
+ options |= CLIENT_TRACKING_NOLOOP;
+ } else if (!strcasecmp(c->argv[j]->ptr,"prefix") && moreargs) {
+ j++;
+ prefix = zrealloc(prefix,sizeof(robj*)*(numprefix+1));
+ prefix[numprefix++] = c->argv[j];
+ } else {
+ zfree(prefix);
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* Options are ok: enable or disable the tracking for this client. */
+ if (!strcasecmp(c->argv[2]->ptr,"on")) {
+ /* Before enabling tracking, make sure options are compatible
+ * among each other and with the current state of the client. */
+ if (!(options & CLIENT_TRACKING_BCAST) && numprefix) {
+ addReplyError(c,
+ "PREFIX option requires BCAST mode to be enabled");
+ zfree(prefix);
+ return;
+ }
+
+ if (c->flags & CLIENT_TRACKING) {
+ int oldbcast = !!(c->flags & CLIENT_TRACKING_BCAST);
+ int newbcast = !!(options & CLIENT_TRACKING_BCAST);
+ if (oldbcast != newbcast) {
+ addReplyError(c,
+ "You can't switch BCAST mode on/off before disabling "
+ "tracking for this client, and then re-enabling it with "
+ "a different mode.");
+ zfree(prefix);
+ return;
+ }
+ }
+
+ if (options & CLIENT_TRACKING_BCAST &&
+ options & (CLIENT_TRACKING_OPTIN|CLIENT_TRACKING_OPTOUT))
+ {
+ addReplyError(c,
+ "OPTIN and OPTOUT are not compatible with BCAST");
+ zfree(prefix);
+ return;
+ }
+
+ if (options & CLIENT_TRACKING_OPTIN && options & CLIENT_TRACKING_OPTOUT)
+ {
+ addReplyError(c,
+ "You can't specify both OPTIN mode and OPTOUT mode");
+ zfree(prefix);
+ return;
+ }
+
+ if ((options & CLIENT_TRACKING_OPTIN && c->flags & CLIENT_TRACKING_OPTOUT) ||
+ (options & CLIENT_TRACKING_OPTOUT && c->flags & CLIENT_TRACKING_OPTIN))
+ {
+ addReplyError(c,
+ "You can't switch OPTIN/OPTOUT mode before disabling "
+ "tracking for this client, and then re-enabling it with "
+ "a different mode.");
+ zfree(prefix);
+ return;
+ }
+
+ if (options & CLIENT_TRACKING_BCAST) {
+ if (!checkPrefixCollisionsOrReply(c,prefix,numprefix)) {
+ zfree(prefix);
+ return;
+ }
+ }
+
+ enableTracking(c,redir,options,prefix,numprefix);
+ } else if (!strcasecmp(c->argv[2]->ptr,"off")) {
+ disableTracking(c);
+ } else {
+ zfree(prefix);
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ zfree(prefix);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"caching") && c->argc >= 3) {
+ if (!(c->flags & CLIENT_TRACKING)) {
+ addReplyError(c,"CLIENT CACHING can be called only when the "
+ "client is in tracking mode with OPTIN or "
+ "OPTOUT mode enabled");
+ return;
+ }
+
+ char *opt = c->argv[2]->ptr;
+ if (!strcasecmp(opt,"yes")) {
+ if (c->flags & CLIENT_TRACKING_OPTIN) {
+ c->flags |= CLIENT_TRACKING_CACHING;
+ } else {
+ addReplyError(c,"CLIENT CACHING YES is only valid when tracking is enabled in OPTIN mode.");
+ return;
+ }
+ } else if (!strcasecmp(opt,"no")) {
+ if (c->flags & CLIENT_TRACKING_OPTOUT) {
+ c->flags |= CLIENT_TRACKING_CACHING;
+ } else {
+ addReplyError(c,"CLIENT CACHING NO is only valid when tracking is enabled in OPTOUT mode.");
+ return;
+ }
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Common reply for when we succeeded. */
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"getredir") && c->argc == 2) {
+ /* CLIENT GETREDIR */
+ if (c->flags & CLIENT_TRACKING) {
+ addReplyLongLong(c,c->client_tracking_redirection);
+ } else {
+ addReplyLongLong(c,-1);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"trackinginfo") && c->argc == 2) {
+ addReplyMapLen(c,3);
+
+ /* Flags */
+ addReplyBulkCString(c,"flags");
+ void *arraylen_ptr = addReplyDeferredLen(c);
+ int numflags = 0;
+ addReplyBulkCString(c,c->flags & CLIENT_TRACKING ? "on" : "off");
+ numflags++;
+ if (c->flags & CLIENT_TRACKING_BCAST) {
+ addReplyBulkCString(c,"bcast");
+ numflags++;
+ }
+ if (c->flags & CLIENT_TRACKING_OPTIN) {
+ addReplyBulkCString(c,"optin");
+ numflags++;
+ if (c->flags & CLIENT_TRACKING_CACHING) {
+ addReplyBulkCString(c,"caching-yes");
+ numflags++;
+ }
+ }
+ if (c->flags & CLIENT_TRACKING_OPTOUT) {
+ addReplyBulkCString(c,"optout");
+ numflags++;
+ if (c->flags & CLIENT_TRACKING_CACHING) {
+ addReplyBulkCString(c,"caching-no");
+ numflags++;
+ }
+ }
+ if (c->flags & CLIENT_TRACKING_NOLOOP) {
+ addReplyBulkCString(c,"noloop");
+ numflags++;
+ }
+ if (c->flags & CLIENT_TRACKING_BROKEN_REDIR) {
+ addReplyBulkCString(c,"broken_redirect");
+ numflags++;
+ }
+ setDeferredSetLen(c,arraylen_ptr,numflags);
+
+ /* Redirect */
+ addReplyBulkCString(c,"redirect");
+ if (c->flags & CLIENT_TRACKING) {
+ addReplyLongLong(c,c->client_tracking_redirection);
+ } else {
+ addReplyLongLong(c,-1);
+ }
+
+ /* Prefixes */
+ addReplyBulkCString(c,"prefixes");
+ if (c->client_tracking_prefixes) {
+ addReplyArrayLen(c,raxSize(c->client_tracking_prefixes));
+ raxIterator ri;
+ raxStart(&ri,c->client_tracking_prefixes);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ addReplyBulkCBuffer(c,ri.key,ri.key_len);
+ }
+ raxStop(&ri);
+ } else {
+ addReplyArrayLen(c,0);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr, "no-touch")) {
+ /* CLIENT NO-TOUCH ON|OFF */
+ if (!strcasecmp(c->argv[2]->ptr,"on")) {
+ c->flags |= CLIENT_NO_TOUCH;
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"off")) {
+ c->flags &= ~CLIENT_NO_TOUCH;
+ addReply(c,shared.ok);
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ }
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
+
+/* HELLO [<protocol-version> [AUTH <user> <password>] [SETNAME <name>] ] */
+void helloCommand(client *c) {
+ long long ver = 0;
+ int next_arg = 1;
+
+ if (c->argc >= 2) {
+ if (getLongLongFromObjectOrReply(c, c->argv[next_arg++], &ver,
+ "Protocol version is not an integer or out of range") != C_OK) {
+ return;
+ }
+
+ if (ver < 2 || ver > 3) {
+ addReplyError(c,"-NOPROTO unsupported protocol version");
+ return;
+ }
+ }
+
+ robj *username = NULL;
+ robj *password = NULL;
+ robj *clientname = NULL;
+ for (int j = next_arg; j < c->argc; j++) {
+ int moreargs = (c->argc-1) - j;
+ const char *opt = c->argv[j]->ptr;
+ if (!strcasecmp(opt,"AUTH") && moreargs >= 2) {
+ redactClientCommandArgument(c, j+1);
+ redactClientCommandArgument(c, j+2);
+ username = c->argv[j+1];
+ password = c->argv[j+2];
+ j += 2;
+ } else if (!strcasecmp(opt,"SETNAME") && moreargs) {
+ clientname = c->argv[j+1];
+ const char *err = NULL;
+ if (validateClientName(clientname, &err) == C_ERR) {
+ addReplyError(c, err);
+ return;
+ }
+ j++;
+ } else {
+ addReplyErrorFormat(c,"Syntax error in HELLO option '%s'",opt);
+ return;
+ }
+ }
+
+ if (username && password) {
+ robj *err = NULL;
+ int auth_result = ACLAuthenticateUser(c, username, password, &err);
+ if (auth_result == AUTH_ERR) {
+ addAuthErrReply(c, err);
+ }
+ if (err) decrRefCount(err);
+ /* In case of auth errors, return early since we already replied with an ERR.
+ * In case of blocking module auth, we reply to the client/setname later upon unblocking. */
+ if (auth_result == AUTH_ERR || auth_result == AUTH_BLOCKED) {
+ return;
+ }
+ }
+
+ /* At this point we need to be authenticated to continue. */
+ if (!c->authenticated) {
+ addReplyError(c,"-NOAUTH HELLO must be called with the client already "
+ "authenticated, otherwise the HELLO <proto> AUTH <user> <pass> "
+ "option can be used to authenticate the client and "
+ "select the RESP protocol version at the same time");
+ return;
+ }
+
+ /* Now that we're authenticated, set the client name. */
+ if (clientname) clientSetName(c, clientname, NULL);
+
+ /* Let's switch to the specified RESP mode. */
+ if (ver) c->resp = ver;
+ addReplyMapLen(c,6 + !server.sentinel_mode);
+
+ addReplyBulkCString(c,"server");
+ addReplyBulkCString(c,"redis");
+
+ addReplyBulkCString(c,"version");
+ addReplyBulkCString(c,REDIS_VERSION);
+
+ addReplyBulkCString(c,"proto");
+ addReplyLongLong(c,c->resp);
+
+ addReplyBulkCString(c,"id");
+ addReplyLongLong(c,c->id);
+
+ addReplyBulkCString(c,"mode");
+ if (server.sentinel_mode) addReplyBulkCString(c,"sentinel");
+ else if (server.cluster_enabled) addReplyBulkCString(c,"cluster");
+ else addReplyBulkCString(c,"standalone");
+
+ if (!server.sentinel_mode) {
+ addReplyBulkCString(c,"role");
+ addReplyBulkCString(c,server.masterhost ? "replica" : "master");
+ }
+
+ addReplyBulkCString(c,"modules");
+ addReplyLoadedModules(c);
+}
+
+/* This callback is bound to POST and "Host:" command names. Those are not
+ * really commands, but are used in security attacks in order to talk to
+ * Redis instances via HTTP, with a technique called "cross protocol scripting"
+ * which exploits the fact that services like Redis will discard invalid
+ * HTTP headers and will process what follows.
+ *
+ * As a protection against this attack, Redis will terminate the connection
+ * when a POST or "Host:" header is seen, and will log the event from
+ * time to time (to avoid creating a DOS as a result of too many logs). */
+void securityWarningCommand(client *c) {
+ static time_t logged_time = 0;
+ time_t now = time(NULL);
+
+ if (llabs(now-logged_time) > 60) {
+ char ip[NET_IP_STR_LEN];
+ int port;
+ if (connAddrPeerName(c->conn, ip, sizeof(ip), &port) == -1) {
+ serverLog(LL_WARNING,"Possible SECURITY ATTACK detected. It looks like somebody is sending POST or Host: commands to Redis. This is likely due to an attacker attempting to use Cross Protocol Scripting to compromise your Redis instance. Connection aborted.");
+ } else {
+ serverLog(LL_WARNING,"Possible SECURITY ATTACK detected. It looks like somebody is sending POST or Host: commands to Redis. This is likely due to an attacker attempting to use Cross Protocol Scripting to compromise your Redis instance. Connection from %s:%d aborted.", ip, port);
+ }
+ logged_time = now;
+ }
+ freeClientAsync(c);
+}
+
+/* Keep track of the original command arguments so that we can generate
+ * an accurate slowlog entry after the command has been executed. */
+static void retainOriginalCommandVector(client *c) {
+ /* We already rewrote this command, so don't rewrite it again */
+ if (c->original_argv) return;
+ c->original_argc = c->argc;
+ c->original_argv = zmalloc(sizeof(robj*)*(c->argc));
+ for (int j = 0; j < c->argc; j++) {
+ c->original_argv[j] = c->argv[j];
+ incrRefCount(c->argv[j]);
+ }
+}
+
+/* Redact a given argument to prevent it from being shown
+ * in the slowlog. This information is stored in the
+ * original_argv array. */
+void redactClientCommandArgument(client *c, int argc) {
+ retainOriginalCommandVector(c);
+ if (c->original_argv[argc] == shared.redacted) {
+ /* This argument has already been redacted */
+ return;
+ }
+ decrRefCount(c->original_argv[argc]);
+ c->original_argv[argc] = shared.redacted;
+}
+
+/* Rewrite the command vector of the client. All the new objects ref count
+ * is incremented. The old command vector is freed, and the old objects
+ * ref count is decremented. */
+void rewriteClientCommandVector(client *c, int argc, ...) {
+ va_list ap;
+ int j;
+ robj **argv; /* The new argument vector */
+
+ argv = zmalloc(sizeof(robj*)*argc);
+ va_start(ap,argc);
+ for (j = 0; j < argc; j++) {
+ robj *a;
+
+ a = va_arg(ap, robj*);
+ argv[j] = a;
+ incrRefCount(a);
+ }
+ replaceClientCommandVector(c, argc, argv);
+ va_end(ap);
+}
+
+/* Completely replace the client command vector with the provided one. */
+void replaceClientCommandVector(client *c, int argc, robj **argv) {
+ int j;
+ retainOriginalCommandVector(c);
+ freeClientArgv(c);
+ c->argv = argv;
+ c->argc = argc;
+ c->argv_len_sum = 0;
+ for (j = 0; j < c->argc; j++)
+ if (c->argv[j])
+ c->argv_len_sum += getStringObjectLen(c->argv[j]);
+ c->cmd = lookupCommandOrOriginal(c->argv,c->argc);
+ serverAssertWithInfo(c,NULL,c->cmd != NULL);
+}
+
+/* Rewrite a single item in the command vector.
+ * The new val ref count is incremented, and the old decremented.
+ *
+ * It is possible to specify an argument over the current size of the
+ * argument vector: in this case the array of objects gets reallocated
+ * and c->argc set to the max value. However it's up to the caller to
+ *
+ * 1. Make sure there are no "holes" and all the arguments are set.
+ * 2. If the original argument vector was longer than the one we
+ * want to end with, it's up to the caller to set c->argc and
+ * free the no longer used objects on c->argv. */
+void rewriteClientCommandArgument(client *c, int i, robj *newval) {
+ robj *oldval;
+ retainOriginalCommandVector(c);
+
+ /* We need to handle both extending beyond argc (just update it and
+ * initialize the new element) or beyond argv_len (realloc is needed).
+ */
+ if (i >= c->argc) {
+ if (i >= c->argv_len) {
+ c->argv = zrealloc(c->argv,sizeof(robj*)*(i+1));
+ c->argv_len = i+1;
+ }
+ c->argc = i+1;
+ c->argv[i] = NULL;
+ }
+ oldval = c->argv[i];
+ if (oldval) c->argv_len_sum -= getStringObjectLen(oldval);
+ if (newval) c->argv_len_sum += getStringObjectLen(newval);
+ c->argv[i] = newval;
+ incrRefCount(newval);
+ if (oldval) decrRefCount(oldval);
+
+ /* If this is the command name make sure to fix c->cmd. */
+ if (i == 0) {
+ c->cmd = lookupCommandOrOriginal(c->argv,c->argc);
+ serverAssertWithInfo(c,NULL,c->cmd != NULL);
+ }
+}
+
+/* This function returns the number of bytes that Redis is
+ * using to store the reply still not read by the client.
+ *
+ * Note: this function is very fast so can be called as many time as
+ * the caller wishes. The main usage of this function currently is
+ * enforcing the client output length limits. */
+size_t getClientOutputBufferMemoryUsage(client *c) {
+ if (getClientType(c) == CLIENT_TYPE_SLAVE) {
+ size_t repl_buf_size = 0;
+ size_t repl_node_num = 0;
+ size_t repl_node_size = sizeof(listNode) + sizeof(replBufBlock);
+ if (c->ref_repl_buf_node) {
+ replBufBlock *last = listNodeValue(listLast(server.repl_buffer_blocks));
+ replBufBlock *cur = listNodeValue(c->ref_repl_buf_node);
+ repl_buf_size = last->repl_offset + last->size - cur->repl_offset;
+ repl_node_num = last->id - cur->id + 1;
+ }
+ return repl_buf_size + (repl_node_size*repl_node_num);
+ } else {
+ size_t list_item_size = sizeof(listNode) + sizeof(clientReplyBlock);
+ return c->reply_bytes + (list_item_size*listLength(c->reply));
+ }
+}
+
+/* Returns the total client's memory usage.
+ * Optionally, if output_buffer_mem_usage is not NULL, it fills it with
+ * the client output buffer memory usage portion of the total. */
+size_t getClientMemoryUsage(client *c, size_t *output_buffer_mem_usage) {
+ size_t mem = getClientOutputBufferMemoryUsage(c);
+ if (output_buffer_mem_usage != NULL)
+ *output_buffer_mem_usage = mem;
+ mem += sdsZmallocSize(c->querybuf);
+ mem += zmalloc_size(c);
+ mem += c->buf_usable_size;
+ /* For efficiency (less work keeping track of the argv memory), it doesn't include the used memory
+ * i.e. unused sds space and internal fragmentation, just the string length. but this is enough to
+ * spot problematic clients. */
+ mem += c->argv_len_sum + sizeof(robj*)*c->argc;
+ mem += multiStateMemOverhead(c);
+
+ /* Add memory overhead of pubsub channels and patterns. Note: this is just the overhead of the robj pointers
+ * to the strings themselves because they aren't stored per client. */
+ mem += pubsubMemOverhead(c);
+
+ /* Add memory overhead of the tracking prefixes, this is an underestimation so we don't need to traverse the entire rax */
+ if (c->client_tracking_prefixes)
+ mem += c->client_tracking_prefixes->numnodes * (sizeof(raxNode) * sizeof(raxNode*));
+
+ return mem;
+}
+
+/* Get the class of a client, used in order to enforce limits to different
+ * classes of clients.
+ *
+ * The function will return one of the following:
+ * CLIENT_TYPE_NORMAL -> Normal client, including MONITOR
+ * CLIENT_TYPE_SLAVE -> Slave
+ * CLIENT_TYPE_PUBSUB -> Client subscribed to Pub/Sub channels
+ * CLIENT_TYPE_MASTER -> The client representing our replication master.
+ */
+int getClientType(client *c) {
+ if (c->flags & CLIENT_MASTER) return CLIENT_TYPE_MASTER;
+ /* Even though MONITOR clients are marked as replicas, we
+ * want the expose them as normal clients. */
+ if ((c->flags & CLIENT_SLAVE) && !(c->flags & CLIENT_MONITOR))
+ return CLIENT_TYPE_SLAVE;
+ if (c->flags & CLIENT_PUBSUB) return CLIENT_TYPE_PUBSUB;
+ return CLIENT_TYPE_NORMAL;
+}
+
+int getClientTypeByName(char *name) {
+ if (!strcasecmp(name,"normal")) return CLIENT_TYPE_NORMAL;
+ else if (!strcasecmp(name,"slave")) return CLIENT_TYPE_SLAVE;
+ else if (!strcasecmp(name,"replica")) return CLIENT_TYPE_SLAVE;
+ else if (!strcasecmp(name,"pubsub")) return CLIENT_TYPE_PUBSUB;
+ else if (!strcasecmp(name,"master")) return CLIENT_TYPE_MASTER;
+ else return -1;
+}
+
+char *getClientTypeName(int class) {
+ switch(class) {
+ case CLIENT_TYPE_NORMAL: return "normal";
+ case CLIENT_TYPE_SLAVE: return "slave";
+ case CLIENT_TYPE_PUBSUB: return "pubsub";
+ case CLIENT_TYPE_MASTER: return "master";
+ default: return NULL;
+ }
+}
+
+/* The function checks if the client reached output buffer soft or hard
+ * limit, and also update the state needed to check the soft limit as
+ * a side effect.
+ *
+ * Return value: non-zero if the client reached the soft or the hard limit.
+ * Otherwise zero is returned. */
+int checkClientOutputBufferLimits(client *c) {
+ int soft = 0, hard = 0, class;
+ unsigned long used_mem = getClientOutputBufferMemoryUsage(c);
+
+ class = getClientType(c);
+ /* For the purpose of output buffer limiting, masters are handled
+ * like normal clients. */
+ if (class == CLIENT_TYPE_MASTER) class = CLIENT_TYPE_NORMAL;
+
+ /* Note that it doesn't make sense to set the replica clients output buffer
+ * limit lower than the repl-backlog-size config (partial sync will succeed
+ * and then replica will get disconnected).
+ * Such a configuration is ignored (the size of repl-backlog-size will be used).
+ * This doesn't have memory consumption implications since the replica client
+ * will share the backlog buffers memory. */
+ size_t hard_limit_bytes = server.client_obuf_limits[class].hard_limit_bytes;
+ if (class == CLIENT_TYPE_SLAVE && hard_limit_bytes &&
+ (long long)hard_limit_bytes < server.repl_backlog_size)
+ hard_limit_bytes = server.repl_backlog_size;
+ if (server.client_obuf_limits[class].hard_limit_bytes &&
+ used_mem >= hard_limit_bytes)
+ hard = 1;
+ if (server.client_obuf_limits[class].soft_limit_bytes &&
+ used_mem >= server.client_obuf_limits[class].soft_limit_bytes)
+ soft = 1;
+
+ /* We need to check if the soft limit is reached continuously for the
+ * specified amount of seconds. */
+ if (soft) {
+ if (c->obuf_soft_limit_reached_time == 0) {
+ c->obuf_soft_limit_reached_time = server.unixtime;
+ soft = 0; /* First time we see the soft limit reached */
+ } else {
+ time_t elapsed = server.unixtime - c->obuf_soft_limit_reached_time;
+
+ if (elapsed <=
+ server.client_obuf_limits[class].soft_limit_seconds) {
+ soft = 0; /* The client still did not reached the max number of
+ seconds for the soft limit to be considered
+ reached. */
+ }
+ }
+ } else {
+ c->obuf_soft_limit_reached_time = 0;
+ }
+ return soft || hard;
+}
+
+/* Asynchronously close a client if soft or hard limit is reached on the
+ * output buffer size. The caller can check if the client will be closed
+ * checking if the client CLIENT_CLOSE_ASAP flag is set.
+ *
+ * Note: we need to close the client asynchronously because this function is
+ * called from contexts where the client can't be freed safely, i.e. from the
+ * lower level functions pushing data inside the client output buffers.
+ * When `async` is set to 0, we close the client immediately, this is
+ * useful when called from cron.
+ *
+ * Returns 1 if client was (flagged) closed. */
+int closeClientOnOutputBufferLimitReached(client *c, int async) {
+ if (!c->conn) return 0; /* It is unsafe to free fake clients. */
+ serverAssert(c->reply_bytes < SIZE_MAX-(1024*64));
+ /* Note that c->reply_bytes is irrelevant for replica clients
+ * (they use the global repl buffers). */
+ if ((c->reply_bytes == 0 && getClientType(c) != CLIENT_TYPE_SLAVE) ||
+ c->flags & CLIENT_CLOSE_ASAP) return 0;
+ if (checkClientOutputBufferLimits(c)) {
+ sds client = catClientInfoString(sdsempty(),c);
+
+ if (async) {
+ freeClientAsync(c);
+ serverLog(LL_WARNING,
+ "Client %s scheduled to be closed ASAP for overcoming of output buffer limits.",
+ client);
+ } else {
+ freeClient(c);
+ serverLog(LL_WARNING,
+ "Client %s closed for overcoming of output buffer limits.",
+ client);
+ }
+ sdsfree(client);
+ return 1;
+ }
+ return 0;
+}
+
+/* Helper function used by performEvictions() in order to flush slaves
+ * output buffers without returning control to the event loop.
+ * This is also called by SHUTDOWN for a best-effort attempt to send
+ * slaves the latest writes. */
+void flushSlavesOutputBuffers(void) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = listNodeValue(ln);
+ int can_receive_writes = connHasWriteHandler(slave->conn) ||
+ (slave->flags & CLIENT_PENDING_WRITE);
+
+ /* We don't want to send the pending data to the replica in a few
+ * cases:
+ *
+ * 1. For some reason there is neither the write handler installed
+ * nor the client is flagged as to have pending writes: for some
+ * reason this replica may not be set to receive data. This is
+ * just for the sake of defensive programming.
+ *
+ * 2. The put_online_on_ack flag is true. To know why we don't want
+ * to send data to the replica in this case, please grep for the
+ * flag for this flag.
+ *
+ * 3. Obviously if the slave is not ONLINE.
+ */
+ if (slave->replstate == SLAVE_STATE_ONLINE &&
+ !(slave->flags & CLIENT_CLOSE_ASAP) &&
+ can_receive_writes &&
+ !slave->repl_start_cmd_stream_on_ack &&
+ clientHasPendingReplies(slave))
+ {
+ writeToClient(slave,0);
+ }
+ }
+}
+
+/* Compute current paused actions and its end time, aggregated for
+ * all pause purposes. */
+void updatePausedActions(void) {
+ uint32_t prev_paused_actions = server.paused_actions;
+ server.paused_actions = 0;
+
+ for (int i = 0; i < NUM_PAUSE_PURPOSES; i++) {
+ pause_event *p = &(server.client_pause_per_purpose[i]);
+ if (p->end > server.mstime)
+ server.paused_actions |= p->paused_actions;
+ else {
+ p->paused_actions = 0;
+ p->end = 0;
+ }
+ }
+
+ /* If the pause type is less restrictive than before, we unblock all clients
+ * so they are reprocessed (may get re-paused). */
+ uint32_t mask_cli = (PAUSE_ACTION_CLIENT_WRITE|PAUSE_ACTION_CLIENT_ALL);
+ if ((server.paused_actions & mask_cli) < (prev_paused_actions & mask_cli)) {
+ unblockPostponedClients();
+ }
+}
+
+/* Unblock all paused clients (ones that where blocked by BLOCKED_POSTPONE (possibly in processCommand).
+ * This means they'll get re-processed in beforeSleep, and may get paused again if needed. */
+void unblockPostponedClients(void) {
+ listNode *ln;
+ listIter li;
+ listRewind(server.postponed_clients, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *c = listNodeValue(ln);
+ unblockClient(c, 1);
+ }
+}
+
+/* Set pause-client end-time and restricted action. If already paused, then:
+ * 1. Keep higher end-time value between configured and the new one
+ * 2. Keep most restrictive action between configured and the new one */
+static void pauseClientsByClient(mstime_t endTime, int isPauseClientAll) {
+ uint32_t actions;
+ pause_event *p = &server.client_pause_per_purpose[PAUSE_BY_CLIENT_COMMAND];
+
+ if (isPauseClientAll)
+ actions = PAUSE_ACTIONS_CLIENT_ALL_SET;
+ else {
+ actions = PAUSE_ACTIONS_CLIENT_WRITE_SET;
+ /* If currently configured most restrictive client pause, then keep it */
+ if (p->paused_actions & PAUSE_ACTION_CLIENT_ALL)
+ actions = PAUSE_ACTIONS_CLIENT_ALL_SET;
+ }
+
+ pauseActions(PAUSE_BY_CLIENT_COMMAND, endTime, actions);
+}
+
+/* Pause actions up to the specified unixtime (in ms) for a given type of
+ * commands.
+ *
+ * A main use case of this function is to allow pausing replication traffic
+ * so that a failover without data loss to occur. Replicas will continue to receive
+ * traffic to facilitate this functionality.
+ *
+ * This function is also internally used by Redis Cluster for the manual
+ * failover procedure implemented by CLUSTER FAILOVER.
+ *
+ * The function always succeed, even if there is already a pause in progress.
+ * The new paused_actions of a given 'purpose' will override the old ones and
+ * end time will be updated if new end time is bigger than currently configured */
+void pauseActions(pause_purpose purpose, mstime_t end, uint32_t actions) {
+ /* Manage pause type and end time per pause purpose. */
+ server.client_pause_per_purpose[purpose].paused_actions = actions;
+
+ /* If currently configured end time bigger than new one, then keep it */
+ if (server.client_pause_per_purpose[purpose].end < end)
+ server.client_pause_per_purpose[purpose].end = end;
+
+ updatePausedActions();
+
+ /* We allow write commands that were queued
+ * up before and after to execute. We need
+ * to track this state so that we don't assert
+ * in propagateNow(). */
+ if (server.in_exec) {
+ server.client_pause_in_transaction = 1;
+ }
+}
+
+/* Unpause actions and queue them for reprocessing. */
+void unpauseActions(pause_purpose purpose) {
+ server.client_pause_per_purpose[purpose].end = 0;
+ server.client_pause_per_purpose[purpose].paused_actions = 0;
+ updatePausedActions();
+}
+
+/* Returns bitmask of paused actions */
+uint32_t isPausedActions(uint32_t actions_bitmask) {
+ return (server.paused_actions & actions_bitmask);
+}
+
+/* Returns bitmask of paused actions */
+uint32_t isPausedActionsWithUpdate(uint32_t actions_bitmask) {
+ if (!(server.paused_actions & actions_bitmask)) return 0;
+ updatePausedActions();
+ return (server.paused_actions & actions_bitmask);
+}
+
+/* This function is called by Redis in order to process a few events from
+ * time to time while blocked into some not interruptible operation.
+ * This allows to reply to clients with the -LOADING error while loading the
+ * data set at startup or after a full resynchronization with the master
+ * and so forth.
+ *
+ * It calls the event loop in order to process a few events. Specifically we
+ * try to call the event loop 4 times as long as we receive acknowledge that
+ * some event was processed, in order to go forward with the accept, read,
+ * write, close sequence needed to serve a client.
+ *
+ * The function returns the total number of events processed. */
+void processEventsWhileBlocked(void) {
+ int iterations = 4; /* See the function top-comment. */
+
+ /* Update our cached time since it is used to create and update the last
+ * interaction time with clients and for other important things. */
+ updateCachedTime(0);
+
+ /* For the few commands that are allowed during busy scripts, we rather
+ * provide a fresher time than the one from when the script started (they
+ * still won't get it from the call due to execution_nesting. For commands
+ * during loading this doesn't matter. */
+ mstime_t prev_cmd_time_snapshot = server.cmd_time_snapshot;
+ server.cmd_time_snapshot = server.mstime;
+
+ /* Note: when we are processing events while blocked (for instance during
+ * busy Lua scripts), we set a global flag. When such flag is set, we
+ * avoid handling the read part of clients using threaded I/O.
+ * See https://github.com/redis/redis/issues/6988 for more info.
+ * Note that there could be cases of nested calls to this function,
+ * specifically on a busy script during async_loading rdb, and scripts
+ * that came from AOF. */
+ ProcessingEventsWhileBlocked++;
+ while (iterations--) {
+ long long startval = server.events_processed_while_blocked;
+ long long ae_events = aeProcessEvents(server.el,
+ AE_FILE_EVENTS|AE_DONT_WAIT|
+ AE_CALL_BEFORE_SLEEP|AE_CALL_AFTER_SLEEP);
+ /* Note that server.events_processed_while_blocked will also get
+ * incremented by callbacks called by the event loop handlers. */
+ server.events_processed_while_blocked += ae_events;
+ long long events = server.events_processed_while_blocked - startval;
+ if (!events) break;
+ }
+
+ whileBlockedCron();
+
+ ProcessingEventsWhileBlocked--;
+ serverAssert(ProcessingEventsWhileBlocked >= 0);
+
+ server.cmd_time_snapshot = prev_cmd_time_snapshot;
+}
+
+/* ==========================================================================
+ * Threaded I/O
+ * ========================================================================== */
+
+#define IO_THREADS_MAX_NUM 128
+#ifndef CACHE_LINE_SIZE
+#if defined(__aarch64__) && defined(__APPLE__)
+#define CACHE_LINE_SIZE 128
+#else
+#define CACHE_LINE_SIZE 64
+#endif
+#endif
+
+typedef struct __attribute__((aligned(CACHE_LINE_SIZE))) threads_pending {
+ redisAtomic unsigned long value;
+} threads_pending;
+
+pthread_t io_threads[IO_THREADS_MAX_NUM];
+pthread_mutex_t io_threads_mutex[IO_THREADS_MAX_NUM];
+threads_pending io_threads_pending[IO_THREADS_MAX_NUM];
+int io_threads_op; /* IO_THREADS_OP_IDLE, IO_THREADS_OP_READ or IO_THREADS_OP_WRITE. */ // TODO: should access to this be atomic??!
+
+/* This is the list of clients each thread will serve when threaded I/O is
+ * used. We spawn io_threads_num-1 threads, since one is the main thread
+ * itself. */
+list *io_threads_list[IO_THREADS_MAX_NUM];
+
+static inline unsigned long getIOPendingCount(int i) {
+ unsigned long count = 0;
+ atomicGetWithSync(io_threads_pending[i].value, count);
+ return count;
+}
+
+static inline void setIOPendingCount(int i, unsigned long count) {
+ atomicSetWithSync(io_threads_pending[i].value, count);
+}
+
+void *IOThreadMain(void *myid) {
+ /* The ID is the thread number (from 0 to server.io_threads_num-1), and is
+ * used by the thread to just manipulate a single sub-array of clients. */
+ long id = (unsigned long)myid;
+ char thdname[16];
+
+ snprintf(thdname, sizeof(thdname), "io_thd_%ld", id);
+ redis_set_thread_title(thdname);
+ redisSetCpuAffinity(server.server_cpulist);
+ makeThreadKillable();
+
+ while(1) {
+ /* Wait for start */
+ for (int j = 0; j < 1000000; j++) {
+ if (getIOPendingCount(id) != 0) break;
+ }
+
+ /* Give the main thread a chance to stop this thread. */
+ if (getIOPendingCount(id) == 0) {
+ pthread_mutex_lock(&io_threads_mutex[id]);
+ pthread_mutex_unlock(&io_threads_mutex[id]);
+ continue;
+ }
+
+ serverAssert(getIOPendingCount(id) != 0);
+
+ /* Process: note that the main thread will never touch our list
+ * before we drop the pending count to 0. */
+ listIter li;
+ listNode *ln;
+ listRewind(io_threads_list[id],&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ if (io_threads_op == IO_THREADS_OP_WRITE) {
+ writeToClient(c,0);
+ } else if (io_threads_op == IO_THREADS_OP_READ) {
+ readQueryFromClient(c->conn);
+ } else {
+ serverPanic("io_threads_op value is unknown");
+ }
+ }
+ listEmpty(io_threads_list[id]);
+ setIOPendingCount(id, 0);
+ }
+}
+
+/* Initialize the data structures needed for threaded I/O. */
+void initThreadedIO(void) {
+ server.io_threads_active = 0; /* We start with threads not active. */
+
+ /* Indicate that io-threads are currently idle */
+ io_threads_op = IO_THREADS_OP_IDLE;
+
+ /* Don't spawn any thread if the user selected a single thread:
+ * we'll handle I/O directly from the main thread. */
+ if (server.io_threads_num == 1) return;
+
+ if (server.io_threads_num > IO_THREADS_MAX_NUM) {
+ serverLog(LL_WARNING,"Fatal: too many I/O threads configured. "
+ "The maximum number is %d.", IO_THREADS_MAX_NUM);
+ exit(1);
+ }
+
+ /* Spawn and initialize the I/O threads. */
+ for (int i = 0; i < server.io_threads_num; i++) {
+ /* Things we do for all the threads including the main thread. */
+ io_threads_list[i] = listCreate();
+ if (i == 0) continue; /* Thread 0 is the main thread. */
+
+ /* Things we do only for the additional threads. */
+ pthread_t tid;
+ pthread_mutex_init(&io_threads_mutex[i],NULL);
+ setIOPendingCount(i, 0);
+ pthread_mutex_lock(&io_threads_mutex[i]); /* Thread will be stopped. */
+ if (pthread_create(&tid,NULL,IOThreadMain,(void*)(long)i) != 0) {
+ serverLog(LL_WARNING,"Fatal: Can't initialize IO thread.");
+ exit(1);
+ }
+ io_threads[i] = tid;
+ }
+}
+
+void killIOThreads(void) {
+ int err, j;
+ for (j = 0; j < server.io_threads_num; j++) {
+ if (io_threads[j] == pthread_self()) continue;
+ if (io_threads[j] && pthread_cancel(io_threads[j]) == 0) {
+ if ((err = pthread_join(io_threads[j],NULL)) != 0) {
+ serverLog(LL_WARNING,
+ "IO thread(tid:%lu) can not be joined: %s",
+ (unsigned long)io_threads[j], strerror(err));
+ } else {
+ serverLog(LL_WARNING,
+ "IO thread(tid:%lu) terminated",(unsigned long)io_threads[j]);
+ }
+ }
+ }
+}
+
+void startThreadedIO(void) {
+ serverAssert(server.io_threads_active == 0);
+ for (int j = 1; j < server.io_threads_num; j++)
+ pthread_mutex_unlock(&io_threads_mutex[j]);
+ server.io_threads_active = 1;
+}
+
+void stopThreadedIO(void) {
+ /* We may have still clients with pending reads when this function
+ * is called: handle them before stopping the threads. */
+ handleClientsWithPendingReadsUsingThreads();
+ serverAssert(server.io_threads_active == 1);
+ for (int j = 1; j < server.io_threads_num; j++)
+ pthread_mutex_lock(&io_threads_mutex[j]);
+ server.io_threads_active = 0;
+}
+
+/* This function checks if there are not enough pending clients to justify
+ * taking the I/O threads active: in that case I/O threads are stopped if
+ * currently active. We track the pending writes as a measure of clients
+ * we need to handle in parallel, however the I/O threading is disabled
+ * globally for reads as well if we have too little pending clients.
+ *
+ * The function returns 0 if the I/O threading should be used because there
+ * are enough active threads, otherwise 1 is returned and the I/O threads
+ * could be possibly stopped (if already active) as a side effect. */
+int stopThreadedIOIfNeeded(void) {
+ int pending = listLength(server.clients_pending_write);
+
+ /* Return ASAP if IO threads are disabled (single threaded mode). */
+ if (server.io_threads_num == 1) return 1;
+
+ if (pending < (server.io_threads_num*2)) {
+ if (server.io_threads_active) stopThreadedIO();
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* This function achieves thread safety using a fan-out -> fan-in paradigm:
+ * Fan out: The main thread fans out work to the io-threads which block until
+ * setIOPendingCount() is called with a value larger than 0 by the main thread.
+ * Fan in: The main thread waits until getIOPendingCount() returns 0. Then
+ * it can safely perform post-processing and return to normal synchronous
+ * work. */
+int handleClientsWithPendingWritesUsingThreads(void) {
+ int processed = listLength(server.clients_pending_write);
+ if (processed == 0) return 0; /* Return ASAP if there are no clients. */
+
+ /* If I/O threads are disabled or we have few clients to serve, don't
+ * use I/O threads, but the boring synchronous code. */
+ if (server.io_threads_num == 1 || stopThreadedIOIfNeeded()) {
+ return handleClientsWithPendingWrites();
+ }
+
+ /* Start threads if needed. */
+ if (!server.io_threads_active) startThreadedIO();
+
+ /* Distribute the clients across N different lists. */
+ listIter li;
+ listNode *ln;
+ listRewind(server.clients_pending_write,&li);
+ int item_id = 0;
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ c->flags &= ~CLIENT_PENDING_WRITE;
+
+ /* Remove clients from the list of pending writes since
+ * they are going to be closed ASAP. */
+ if (c->flags & CLIENT_CLOSE_ASAP) {
+ listUnlinkNode(server.clients_pending_write, ln);
+ continue;
+ }
+
+ /* Since all replicas and replication backlog use global replication
+ * buffer, to guarantee data accessing thread safe, we must put all
+ * replicas client into io_threads_list[0] i.e. main thread handles
+ * sending the output buffer of all replicas. */
+ if (getClientType(c) == CLIENT_TYPE_SLAVE) {
+ listAddNodeTail(io_threads_list[0],c);
+ continue;
+ }
+
+ int target_id = item_id % server.io_threads_num;
+ listAddNodeTail(io_threads_list[target_id],c);
+ item_id++;
+ }
+
+ /* Give the start condition to the waiting threads, by setting the
+ * start condition atomic var. */
+ io_threads_op = IO_THREADS_OP_WRITE;
+ for (int j = 1; j < server.io_threads_num; j++) {
+ int count = listLength(io_threads_list[j]);
+ setIOPendingCount(j, count);
+ }
+
+ /* Also use the main thread to process a slice of clients. */
+ listRewind(io_threads_list[0],&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ writeToClient(c,0);
+ }
+ listEmpty(io_threads_list[0]);
+
+ /* Wait for all the other threads to end their work. */
+ while(1) {
+ unsigned long pending = 0;
+ for (int j = 1; j < server.io_threads_num; j++)
+ pending += getIOPendingCount(j);
+ if (pending == 0) break;
+ }
+
+ io_threads_op = IO_THREADS_OP_IDLE;
+
+ /* Run the list of clients again to install the write handler where
+ * needed. */
+ listRewind(server.clients_pending_write,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+
+ /* Update the client in the mem usage after we're done processing it in the io-threads */
+ updateClientMemUsageAndBucket(c);
+
+ /* Install the write handler if there are pending writes in some
+ * of the clients. */
+ if (clientHasPendingReplies(c)) {
+ installClientWriteHandler(c);
+ }
+ }
+ while(listLength(server.clients_pending_write) > 0) {
+ listUnlinkNode(server.clients_pending_write, server.clients_pending_write->head);
+ }
+
+ /* Update processed count on server */
+ server.stat_io_writes_processed += processed;
+
+ return processed;
+}
+
+/* Return 1 if we want to handle the client read later using threaded I/O.
+ * This is called by the readable handler of the event loop.
+ * As a side effect of calling this function the client is put in the
+ * pending read clients and flagged as such. */
+int postponeClientRead(client *c) {
+ if (server.io_threads_active &&
+ server.io_threads_do_reads &&
+ !ProcessingEventsWhileBlocked &&
+ !(c->flags & (CLIENT_MASTER|CLIENT_SLAVE|CLIENT_BLOCKED)) &&
+ io_threads_op == IO_THREADS_OP_IDLE)
+ {
+ listAddNodeHead(server.clients_pending_read,c);
+ c->pending_read_list_node = listFirst(server.clients_pending_read);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* When threaded I/O is also enabled for the reading + parsing side, the
+ * readable handler will just put normal clients into a queue of clients to
+ * process (instead of serving them synchronously). This function runs
+ * the queue using the I/O threads, and process them in order to accumulate
+ * the reads in the buffers, and also parse the first command available
+ * rendering it in the client structures.
+ * This function achieves thread safety using a fan-out -> fan-in paradigm:
+ * Fan out: The main thread fans out work to the io-threads which block until
+ * setIOPendingCount() is called with a value larger than 0 by the main thread.
+ * Fan in: The main thread waits until getIOPendingCount() returns 0. Then
+ * it can safely perform post-processing and return to normal synchronous
+ * work. */
+int handleClientsWithPendingReadsUsingThreads(void) {
+ if (!server.io_threads_active || !server.io_threads_do_reads) return 0;
+ int processed = listLength(server.clients_pending_read);
+ if (processed == 0) return 0;
+
+ /* Distribute the clients across N different lists. */
+ listIter li;
+ listNode *ln;
+ listRewind(server.clients_pending_read,&li);
+ int item_id = 0;
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ int target_id = item_id % server.io_threads_num;
+ listAddNodeTail(io_threads_list[target_id],c);
+ item_id++;
+ }
+
+ /* Give the start condition to the waiting threads, by setting the
+ * start condition atomic var. */
+ io_threads_op = IO_THREADS_OP_READ;
+ for (int j = 1; j < server.io_threads_num; j++) {
+ int count = listLength(io_threads_list[j]);
+ setIOPendingCount(j, count);
+ }
+
+ /* Also use the main thread to process a slice of clients. */
+ listRewind(io_threads_list[0],&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ readQueryFromClient(c->conn);
+ }
+ listEmpty(io_threads_list[0]);
+
+ /* Wait for all the other threads to end their work. */
+ while(1) {
+ unsigned long pending = 0;
+ for (int j = 1; j < server.io_threads_num; j++)
+ pending += getIOPendingCount(j);
+ if (pending == 0) break;
+ }
+
+ io_threads_op = IO_THREADS_OP_IDLE;
+
+ /* Run the list of clients again to process the new buffers. */
+ while(listLength(server.clients_pending_read)) {
+ ln = listFirst(server.clients_pending_read);
+ client *c = listNodeValue(ln);
+ listDelNode(server.clients_pending_read,ln);
+ c->pending_read_list_node = NULL;
+
+ serverAssert(!(c->flags & CLIENT_BLOCKED));
+
+ if (beforeNextClient(c) == C_ERR) {
+ /* If the client is no longer valid, we avoid
+ * processing the client later. So we just go
+ * to the next. */
+ continue;
+ }
+
+ /* Once io-threads are idle we can update the client in the mem usage */
+ updateClientMemUsageAndBucket(c);
+
+ if (processPendingCommandAndInputBuffer(c) == C_ERR) {
+ /* If the client is no longer valid, we avoid
+ * processing the client later. So we just go
+ * to the next. */
+ continue;
+ }
+
+ /* We may have pending replies if a thread readQueryFromClient() produced
+ * replies and did not put the client in pending write queue (it can't).
+ */
+ if (!(c->flags & CLIENT_PENDING_WRITE) && clientHasPendingReplies(c))
+ putClientInPendingWriteQueue(c);
+ }
+
+ /* Update processed count on server */
+ server.stat_io_reads_processed += processed;
+
+ return processed;
+}
+
+/* Returns the actual client eviction limit based on current configuration or
+ * 0 if no limit. */
+size_t getClientEvictionLimit(void) {
+ size_t maxmemory_clients_actual = SIZE_MAX;
+
+ /* Handle percentage of maxmemory*/
+ if (server.maxmemory_clients < 0 && server.maxmemory > 0) {
+ unsigned long long maxmemory_clients_bytes = (unsigned long long)((double)server.maxmemory * -(double) server.maxmemory_clients / 100);
+ if (maxmemory_clients_bytes <= SIZE_MAX)
+ maxmemory_clients_actual = maxmemory_clients_bytes;
+ }
+ else if (server.maxmemory_clients > 0)
+ maxmemory_clients_actual = server.maxmemory_clients;
+ else
+ return 0;
+
+ /* Don't allow a too small maxmemory-clients to avoid cases where we can't communicate
+ * at all with the server because of bad configuration */
+ if (maxmemory_clients_actual < 1024*128)
+ maxmemory_clients_actual = 1024*128;
+
+ return maxmemory_clients_actual;
+}
+
+void evictClients(void) {
+ if (!server.client_mem_usage_buckets)
+ return;
+ /* Start eviction from topmost bucket (largest clients) */
+ int curr_bucket = CLIENT_MEM_USAGE_BUCKETS-1;
+ listIter bucket_iter;
+ listRewind(server.client_mem_usage_buckets[curr_bucket].clients, &bucket_iter);
+ size_t client_eviction_limit = getClientEvictionLimit();
+ if (client_eviction_limit == 0)
+ return;
+ while (server.stat_clients_type_memory[CLIENT_TYPE_NORMAL] +
+ server.stat_clients_type_memory[CLIENT_TYPE_PUBSUB] >= client_eviction_limit) {
+ listNode *ln = listNext(&bucket_iter);
+ if (ln) {
+ client *c = ln->value;
+ sds ci = catClientInfoString(sdsempty(),c);
+ serverLog(LL_NOTICE, "Evicting client: %s", ci);
+ freeClient(c);
+ sdsfree(ci);
+ server.stat_evictedclients++;
+ } else {
+ curr_bucket--;
+ if (curr_bucket < 0) {
+ serverLog(LL_WARNING, "Over client maxmemory after evicting all evictable clients");
+ break;
+ }
+ listRewind(server.client_mem_usage_buckets[curr_bucket].clients, &bucket_iter);
+ }
+ }
+}
diff --git a/src/notify.c b/src/notify.c
new file mode 100644
index 0000000..2881a48
--- /dev/null
+++ b/src/notify.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2013, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+/* This file implements keyspace events notification via Pub/Sub and
+ * described at https://redis.io/topics/notifications. */
+
+/* Turn a string representing notification classes into an integer
+ * representing notification classes flags xored.
+ *
+ * The function returns -1 if the input contains characters not mapping to
+ * any class. */
+int keyspaceEventsStringToFlags(char *classes) {
+ char *p = classes;
+ int c, flags = 0;
+
+ while((c = *p++) != '\0') {
+ switch(c) {
+ case 'A': flags |= NOTIFY_ALL; break;
+ case 'g': flags |= NOTIFY_GENERIC; break;
+ case '$': flags |= NOTIFY_STRING; break;
+ case 'l': flags |= NOTIFY_LIST; break;
+ case 's': flags |= NOTIFY_SET; break;
+ case 'h': flags |= NOTIFY_HASH; break;
+ case 'z': flags |= NOTIFY_ZSET; break;
+ case 'x': flags |= NOTIFY_EXPIRED; break;
+ case 'e': flags |= NOTIFY_EVICTED; break;
+ case 'K': flags |= NOTIFY_KEYSPACE; break;
+ case 'E': flags |= NOTIFY_KEYEVENT; break;
+ case 't': flags |= NOTIFY_STREAM; break;
+ case 'm': flags |= NOTIFY_KEY_MISS; break;
+ case 'd': flags |= NOTIFY_MODULE; break;
+ case 'n': flags |= NOTIFY_NEW; break;
+ default: return -1;
+ }
+ }
+ return flags;
+}
+
+/* This function does exactly the reverse of the function above: it gets
+ * as input an integer with the xored flags and returns a string representing
+ * the selected classes. The string returned is an sds string that needs to
+ * be released with sdsfree(). */
+sds keyspaceEventsFlagsToString(int flags) {
+ sds res;
+
+ res = sdsempty();
+ if ((flags & NOTIFY_ALL) == NOTIFY_ALL) {
+ res = sdscatlen(res,"A",1);
+ } else {
+ if (flags & NOTIFY_GENERIC) res = sdscatlen(res,"g",1);
+ if (flags & NOTIFY_STRING) res = sdscatlen(res,"$",1);
+ if (flags & NOTIFY_LIST) res = sdscatlen(res,"l",1);
+ if (flags & NOTIFY_SET) res = sdscatlen(res,"s",1);
+ if (flags & NOTIFY_HASH) res = sdscatlen(res,"h",1);
+ if (flags & NOTIFY_ZSET) res = sdscatlen(res,"z",1);
+ if (flags & NOTIFY_EXPIRED) res = sdscatlen(res,"x",1);
+ if (flags & NOTIFY_EVICTED) res = sdscatlen(res,"e",1);
+ if (flags & NOTIFY_STREAM) res = sdscatlen(res,"t",1);
+ if (flags & NOTIFY_MODULE) res = sdscatlen(res,"d",1);
+ if (flags & NOTIFY_NEW) res = sdscatlen(res,"n",1);
+ }
+ if (flags & NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1);
+ if (flags & NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1);
+ if (flags & NOTIFY_KEY_MISS) res = sdscatlen(res,"m",1);
+ return res;
+}
+
+/* The API provided to the rest of the Redis core is a simple function:
+ *
+ * notifyKeyspaceEvent(int type, char *event, robj *key, int dbid);
+ *
+ * 'type' is the notification class we define in `server.h`.
+ * 'event' is a C string representing the event name.
+ * 'key' is a Redis object representing the key name.
+ * 'dbid' is the database ID where the key lives. */
+void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid) {
+ sds chan;
+ robj *chanobj, *eventobj;
+ int len = -1;
+ char buf[24];
+
+ /* If any modules are interested in events, notify the module system now.
+ * This bypasses the notifications configuration, but the module engine
+ * will only call event subscribers if the event type matches the types
+ * they are interested in. */
+ moduleNotifyKeyspaceEvent(type, event, key, dbid);
+
+ /* If notifications for this class of events are off, return ASAP. */
+ if (!(server.notify_keyspace_events & type)) return;
+
+ eventobj = createStringObject(event,strlen(event));
+
+ /* __keyspace@<db>__:<key> <event> notifications. */
+ if (server.notify_keyspace_events & NOTIFY_KEYSPACE) {
+ chan = sdsnewlen("__keyspace@",11);
+ len = ll2string(buf,sizeof(buf),dbid);
+ chan = sdscatlen(chan, buf, len);
+ chan = sdscatlen(chan, "__:", 3);
+ chan = sdscatsds(chan, key->ptr);
+ chanobj = createObject(OBJ_STRING, chan);
+ pubsubPublishMessage(chanobj, eventobj, 0);
+ decrRefCount(chanobj);
+ }
+
+ /* __keyevent@<db>__:<event> <key> notifications. */
+ if (server.notify_keyspace_events & NOTIFY_KEYEVENT) {
+ chan = sdsnewlen("__keyevent@",11);
+ if (len == -1) len = ll2string(buf,sizeof(buf),dbid);
+ chan = sdscatlen(chan, buf, len);
+ chan = sdscatlen(chan, "__:", 3);
+ chan = sdscatsds(chan, eventobj->ptr);
+ chanobj = createObject(OBJ_STRING, chan);
+ pubsubPublishMessage(chanobj, key, 0);
+ decrRefCount(chanobj);
+ }
+ decrRefCount(eventobj);
+}
diff --git a/src/object.c b/src/object.c
new file mode 100644
index 0000000..4b3526a
--- /dev/null
+++ b/src/object.c
@@ -0,0 +1,1688 @@
+/* Redis Object implementation.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "functions.h"
+#include "intset.h" /* Compact integer set structure */
+#include <math.h>
+#include <ctype.h>
+
+#ifdef __CYGWIN__
+#define strtold(a,b) ((long double)strtod((a),(b)))
+#endif
+
+/* ===================== Creation and parsing of objects ==================== */
+
+robj *createObject(int type, void *ptr) {
+ robj *o = zmalloc(sizeof(*o));
+ o->type = type;
+ o->encoding = OBJ_ENCODING_RAW;
+ o->ptr = ptr;
+ o->refcount = 1;
+ o->lru = 0;
+ return o;
+}
+
+void initObjectLRUOrLFU(robj *o) {
+ if (o->refcount == OBJ_SHARED_REFCOUNT)
+ return;
+ /* Set the LRU to the current lruclock (minutes resolution), or
+ * alternatively the LFU counter. */
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ o->lru = (LFUGetTimeInMinutes() << 8) | LFU_INIT_VAL;
+ } else {
+ o->lru = LRU_CLOCK();
+ }
+ return;
+}
+
+/* Set a special refcount in the object to make it "shared":
+ * incrRefCount and decrRefCount() will test for this special refcount
+ * and will not touch the object. This way it is free to access shared
+ * objects such as small integers from different threads without any
+ * mutex.
+ *
+ * A common pattern to create shared objects:
+ *
+ * robj *myobject = makeObjectShared(createObject(...));
+ *
+ */
+robj *makeObjectShared(robj *o) {
+ serverAssert(o->refcount == 1);
+ o->refcount = OBJ_SHARED_REFCOUNT;
+ return o;
+}
+
+/* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
+ * string object where o->ptr points to a proper sds string. */
+robj *createRawStringObject(const char *ptr, size_t len) {
+ return createObject(OBJ_STRING, sdsnewlen(ptr,len));
+}
+
+/* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
+ * an object where the sds string is actually an unmodifiable string
+ * allocated in the same chunk as the object itself. */
+robj *createEmbeddedStringObject(const char *ptr, size_t len) {
+ robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
+ struct sdshdr8 *sh = (void*)(o+1);
+
+ o->type = OBJ_STRING;
+ o->encoding = OBJ_ENCODING_EMBSTR;
+ o->ptr = sh+1;
+ o->refcount = 1;
+ o->lru = 0;
+
+ sh->len = len;
+ sh->alloc = len;
+ sh->flags = SDS_TYPE_8;
+ if (ptr == SDS_NOINIT)
+ sh->buf[len] = '\0';
+ else if (ptr) {
+ memcpy(sh->buf,ptr,len);
+ sh->buf[len] = '\0';
+ } else {
+ memset(sh->buf,0,len+1);
+ }
+ return o;
+}
+
+/* Create a string object with EMBSTR encoding if it is smaller than
+ * OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
+ * used.
+ *
+ * The current limit of 44 is chosen so that the biggest string object
+ * we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
+#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
+robj *createStringObject(const char *ptr, size_t len) {
+ if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
+ return createEmbeddedStringObject(ptr,len);
+ else
+ return createRawStringObject(ptr,len);
+}
+
+/* Same as CreateRawStringObject, can return NULL if allocation fails */
+robj *tryCreateRawStringObject(const char *ptr, size_t len) {
+ sds str = sdstrynewlen(ptr,len);
+ if (!str) return NULL;
+ return createObject(OBJ_STRING, str);
+}
+
+/* Same as createStringObject, can return NULL if allocation fails */
+robj *tryCreateStringObject(const char *ptr, size_t len) {
+ if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
+ return createEmbeddedStringObject(ptr,len);
+ else
+ return tryCreateRawStringObject(ptr,len);
+}
+
+/* Create a string object from a long long value according to the specified flag. */
+#define LL2STROBJ_AUTO 0 /* automatically create the optimal string object */
+#define LL2STROBJ_NO_SHARED 1 /* disallow shared objects */
+#define LL2STROBJ_NO_INT_ENC 2 /* disallow integer encoded objects. */
+robj *createStringObjectFromLongLongWithOptions(long long value, int flag) {
+ robj *o;
+
+ if (value >= 0 && value < OBJ_SHARED_INTEGERS && flag == LL2STROBJ_AUTO) {
+ o = shared.integers[value];
+ } else {
+ if ((value >= LONG_MIN && value <= LONG_MAX) && flag != LL2STROBJ_NO_INT_ENC) {
+ o = createObject(OBJ_STRING, NULL);
+ o->encoding = OBJ_ENCODING_INT;
+ o->ptr = (void*)((long)value);
+ } else {
+ char buf[LONG_STR_SIZE];
+ int len = ll2string(buf, sizeof(buf), value);
+ o = createStringObject(buf, len);
+ }
+ }
+ return o;
+}
+
+/* Wrapper for createStringObjectFromLongLongWithOptions() always demanding
+ * to create a shared object if possible. */
+robj *createStringObjectFromLongLong(long long value) {
+ return createStringObjectFromLongLongWithOptions(value, LL2STROBJ_AUTO);
+}
+
+/* The function avoids returning a shared integer when LFU/LRU info
+ * are needed, that is, when the object is used as a value in the key
+ * space(for instance when the INCR command is used), and Redis is
+ * configured to evict based on LFU/LRU, so we want LFU/LRU values
+ * specific for each key. */
+robj *createStringObjectFromLongLongForValue(long long value) {
+ if (server.maxmemory == 0 || !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) {
+ /* If the maxmemory policy permits, we can still return shared integers */
+ return createStringObjectFromLongLongWithOptions(value, LL2STROBJ_AUTO);
+ } else {
+ return createStringObjectFromLongLongWithOptions(value, LL2STROBJ_NO_SHARED);
+ }
+}
+
+/* Create a string object that contains an sds inside it. That means it can't be
+ * integer encoded (OBJ_ENCODING_INT), and it'll always be an EMBSTR type. */
+robj *createStringObjectFromLongLongWithSds(long long value) {
+ return createStringObjectFromLongLongWithOptions(value, LL2STROBJ_NO_INT_ENC);
+}
+
+/* Create a string object from a long double. If humanfriendly is non-zero
+ * it does not use exponential format and trims trailing zeroes at the end,
+ * however this results in loss of precision. Otherwise exp format is used
+ * and the output of snprintf() is not modified.
+ *
+ * The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
+robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
+ char buf[MAX_LONG_DOUBLE_CHARS];
+ int len = ld2string(buf,sizeof(buf),value,humanfriendly? LD_STR_HUMAN: LD_STR_AUTO);
+ return createStringObject(buf,len);
+}
+
+/* Duplicate a string object, with the guarantee that the returned object
+ * has the same encoding as the original one.
+ *
+ * This function also guarantees that duplicating a small integer object
+ * (or a string object that contains a representation of a small integer)
+ * will always result in a fresh object that is unshared (refcount == 1).
+ *
+ * The resulting object always has refcount set to 1. */
+robj *dupStringObject(const robj *o) {
+ robj *d;
+
+ serverAssert(o->type == OBJ_STRING);
+
+ switch(o->encoding) {
+ case OBJ_ENCODING_RAW:
+ return createRawStringObject(o->ptr,sdslen(o->ptr));
+ case OBJ_ENCODING_EMBSTR:
+ return createEmbeddedStringObject(o->ptr,sdslen(o->ptr));
+ case OBJ_ENCODING_INT:
+ d = createObject(OBJ_STRING, NULL);
+ d->encoding = OBJ_ENCODING_INT;
+ d->ptr = o->ptr;
+ return d;
+ default:
+ serverPanic("Wrong encoding.");
+ break;
+ }
+}
+
+robj *createQuicklistObject(void) {
+ quicklist *l = quicklistCreate();
+ robj *o = createObject(OBJ_LIST,l);
+ o->encoding = OBJ_ENCODING_QUICKLIST;
+ return o;
+}
+
+robj *createListListpackObject(void) {
+ unsigned char *lp = lpNew(0);
+ robj *o = createObject(OBJ_LIST,lp);
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ return o;
+}
+
+robj *createSetObject(void) {
+ dict *d = dictCreate(&setDictType);
+ robj *o = createObject(OBJ_SET,d);
+ o->encoding = OBJ_ENCODING_HT;
+ return o;
+}
+
+robj *createIntsetObject(void) {
+ intset *is = intsetNew();
+ robj *o = createObject(OBJ_SET,is);
+ o->encoding = OBJ_ENCODING_INTSET;
+ return o;
+}
+
+robj *createSetListpackObject(void) {
+ unsigned char *lp = lpNew(0);
+ robj *o = createObject(OBJ_SET, lp);
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ return o;
+}
+
+robj *createHashObject(void) {
+ unsigned char *zl = lpNew(0);
+ robj *o = createObject(OBJ_HASH, zl);
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ return o;
+}
+
+robj *createZsetObject(void) {
+ zset *zs = zmalloc(sizeof(*zs));
+ robj *o;
+
+ zs->dict = dictCreate(&zsetDictType);
+ zs->zsl = zslCreate();
+ o = createObject(OBJ_ZSET,zs);
+ o->encoding = OBJ_ENCODING_SKIPLIST;
+ return o;
+}
+
+robj *createZsetListpackObject(void) {
+ unsigned char *lp = lpNew(0);
+ robj *o = createObject(OBJ_ZSET,lp);
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ return o;
+}
+
+robj *createStreamObject(void) {
+ stream *s = streamNew();
+ robj *o = createObject(OBJ_STREAM,s);
+ o->encoding = OBJ_ENCODING_STREAM;
+ return o;
+}
+
+robj *createModuleObject(moduleType *mt, void *value) {
+ moduleValue *mv = zmalloc(sizeof(*mv));
+ mv->type = mt;
+ mv->value = value;
+ return createObject(OBJ_MODULE,mv);
+}
+
+void freeStringObject(robj *o) {
+ if (o->encoding == OBJ_ENCODING_RAW) {
+ sdsfree(o->ptr);
+ }
+}
+
+void freeListObject(robj *o) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklistRelease(o->ptr);
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ lpFree(o->ptr);
+ } else {
+ serverPanic("Unknown list encoding type");
+ }
+}
+
+void freeSetObject(robj *o) {
+ switch (o->encoding) {
+ case OBJ_ENCODING_HT:
+ dictRelease((dict*) o->ptr);
+ break;
+ case OBJ_ENCODING_INTSET:
+ case OBJ_ENCODING_LISTPACK:
+ zfree(o->ptr);
+ break;
+ default:
+ serverPanic("Unknown set encoding type");
+ }
+}
+
+void freeZsetObject(robj *o) {
+ zset *zs;
+ switch (o->encoding) {
+ case OBJ_ENCODING_SKIPLIST:
+ zs = o->ptr;
+ dictRelease(zs->dict);
+ zslFree(zs->zsl);
+ zfree(zs);
+ break;
+ case OBJ_ENCODING_LISTPACK:
+ zfree(o->ptr);
+ break;
+ default:
+ serverPanic("Unknown sorted set encoding");
+ }
+}
+
+void freeHashObject(robj *o) {
+ switch (o->encoding) {
+ case OBJ_ENCODING_HT:
+ dictRelease((dict*) o->ptr);
+ break;
+ case OBJ_ENCODING_LISTPACK:
+ lpFree(o->ptr);
+ break;
+ default:
+ serverPanic("Unknown hash encoding type");
+ break;
+ }
+}
+
+void freeModuleObject(robj *o) {
+ moduleValue *mv = o->ptr;
+ mv->type->free(mv->value);
+ zfree(mv);
+}
+
+void freeStreamObject(robj *o) {
+ freeStream(o->ptr);
+}
+
+void incrRefCount(robj *o) {
+ if (o->refcount < OBJ_FIRST_SPECIAL_REFCOUNT) {
+ o->refcount++;
+ } else {
+ if (o->refcount == OBJ_SHARED_REFCOUNT) {
+ /* Nothing to do: this refcount is immutable. */
+ } else if (o->refcount == OBJ_STATIC_REFCOUNT) {
+ serverPanic("You tried to retain an object allocated in the stack");
+ }
+ }
+}
+
+void decrRefCount(robj *o) {
+ if (o->refcount == 1) {
+ switch(o->type) {
+ case OBJ_STRING: freeStringObject(o); break;
+ case OBJ_LIST: freeListObject(o); break;
+ case OBJ_SET: freeSetObject(o); break;
+ case OBJ_ZSET: freeZsetObject(o); break;
+ case OBJ_HASH: freeHashObject(o); break;
+ case OBJ_MODULE: freeModuleObject(o); break;
+ case OBJ_STREAM: freeStreamObject(o); break;
+ default: serverPanic("Unknown object type"); break;
+ }
+ zfree(o);
+ } else {
+ if (o->refcount <= 0) serverPanic("decrRefCount against refcount <= 0");
+ if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount--;
+ }
+}
+
+/* See dismissObject() */
+void dismissSds(sds s) {
+ dismissMemory(sdsAllocPtr(s), sdsAllocSize(s));
+}
+
+/* See dismissObject() */
+void dismissStringObject(robj *o) {
+ if (o->encoding == OBJ_ENCODING_RAW) {
+ dismissSds(o->ptr);
+ }
+}
+
+/* See dismissObject() */
+void dismissListObject(robj *o, size_t size_hint) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ serverAssert(ql->len != 0);
+ /* We iterate all nodes only when average node size is bigger than a
+ * page size, and there's a high chance we'll actually dismiss something. */
+ if (size_hint / ql->len >= server.page_size) {
+ quicklistNode *node = ql->head;
+ while (node) {
+ if (quicklistNodeIsCompressed(node)) {
+ dismissMemory(node->entry, ((quicklistLZF*)node->entry)->sz);
+ } else {
+ dismissMemory(node->entry, node->sz);
+ }
+ node = node->next;
+ }
+ }
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ dismissMemory(o->ptr, lpBytes((unsigned char*)o->ptr));
+ } else {
+ serverPanic("Unknown list encoding type");
+ }
+}
+
+/* See dismissObject() */
+void dismissSetObject(robj *o, size_t size_hint) {
+ if (o->encoding == OBJ_ENCODING_HT) {
+ dict *set = o->ptr;
+ serverAssert(dictSize(set) != 0);
+ /* We iterate all nodes only when average member size is bigger than a
+ * page size, and there's a high chance we'll actually dismiss something. */
+ if (size_hint / dictSize(set) >= server.page_size) {
+ dictEntry *de;
+ dictIterator *di = dictGetIterator(set);
+ while ((de = dictNext(di)) != NULL) {
+ dismissSds(dictGetKey(de));
+ }
+ dictReleaseIterator(di);
+ }
+
+ /* Dismiss hash table memory. */
+ dismissMemory(set->ht_table[0], DICTHT_SIZE(set->ht_size_exp[0])*sizeof(dictEntry*));
+ dismissMemory(set->ht_table[1], DICTHT_SIZE(set->ht_size_exp[1])*sizeof(dictEntry*));
+ } else if (o->encoding == OBJ_ENCODING_INTSET) {
+ dismissMemory(o->ptr, intsetBlobLen((intset*)o->ptr));
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ dismissMemory(o->ptr, lpBytes((unsigned char *)o->ptr));
+ } else {
+ serverPanic("Unknown set encoding type");
+ }
+}
+
+/* See dismissObject() */
+void dismissZsetObject(robj *o, size_t size_hint) {
+ if (o->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = o->ptr;
+ zskiplist *zsl = zs->zsl;
+ serverAssert(zsl->length != 0);
+ /* We iterate all nodes only when average member size is bigger than a
+ * page size, and there's a high chance we'll actually dismiss something. */
+ if (size_hint / zsl->length >= server.page_size) {
+ zskiplistNode *zn = zsl->tail;
+ while (zn != NULL) {
+ dismissSds(zn->ele);
+ zn = zn->backward;
+ }
+ }
+
+ /* Dismiss hash table memory. */
+ dict *d = zs->dict;
+ dismissMemory(d->ht_table[0], DICTHT_SIZE(d->ht_size_exp[0])*sizeof(dictEntry*));
+ dismissMemory(d->ht_table[1], DICTHT_SIZE(d->ht_size_exp[1])*sizeof(dictEntry*));
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ dismissMemory(o->ptr, lpBytes((unsigned char*)o->ptr));
+ } else {
+ serverPanic("Unknown zset encoding type");
+ }
+}
+
+/* See dismissObject() */
+void dismissHashObject(robj *o, size_t size_hint) {
+ if (o->encoding == OBJ_ENCODING_HT) {
+ dict *d = o->ptr;
+ serverAssert(dictSize(d) != 0);
+ /* We iterate all fields only when average field/value size is bigger than
+ * a page size, and there's a high chance we'll actually dismiss something. */
+ if (size_hint / dictSize(d) >= server.page_size) {
+ dictEntry *de;
+ dictIterator *di = dictGetIterator(d);
+ while ((de = dictNext(di)) != NULL) {
+ /* Only dismiss values memory since the field size
+ * usually is small. */
+ dismissSds(dictGetVal(de));
+ }
+ dictReleaseIterator(di);
+ }
+
+ /* Dismiss hash table memory. */
+ dismissMemory(d->ht_table[0], DICTHT_SIZE(d->ht_size_exp[0])*sizeof(dictEntry*));
+ dismissMemory(d->ht_table[1], DICTHT_SIZE(d->ht_size_exp[1])*sizeof(dictEntry*));
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ dismissMemory(o->ptr, lpBytes((unsigned char*)o->ptr));
+ } else {
+ serverPanic("Unknown hash encoding type");
+ }
+}
+
+/* See dismissObject() */
+void dismissStreamObject(robj *o, size_t size_hint) {
+ stream *s = o->ptr;
+ rax *rax = s->rax;
+ if (raxSize(rax) == 0) return;
+
+ /* Iterate only on stream entries, although size_hint may include serialized
+ * consumer groups info, but usually, stream entries take up most of
+ * the space. */
+ if (size_hint / raxSize(rax) >= server.page_size) {
+ raxIterator ri;
+ raxStart(&ri,rax);
+ raxSeek(&ri,"^",NULL,0);
+ while (raxNext(&ri)) {
+ dismissMemory(ri.data, lpBytes(ri.data));
+ }
+ raxStop(&ri);
+ }
+}
+
+/* When creating a snapshot in a fork child process, the main process and child
+ * process share the same physical memory pages, and if / when the parent
+ * modifies any keys due to write traffic, it'll cause CoW which consume
+ * physical memory. In the child process, after serializing the key and value,
+ * the data is definitely not accessed again, so to avoid unnecessary CoW, we
+ * try to release their memory back to OS. see dismissMemory().
+ *
+ * Because of the cost of iterating all node/field/member/entry of complex data
+ * types, we iterate and dismiss them only when approximate average we estimate
+ * the size of an individual allocation is more than a page size of OS.
+ * 'size_hint' is the size of serialized value. This method is not accurate, but
+ * it can reduce unnecessary iteration for complex data types that are probably
+ * not going to release any memory. */
+void dismissObject(robj *o, size_t size_hint) {
+ /* madvise(MADV_DONTNEED) may not work if Transparent Huge Pages is enabled. */
+ if (server.thp_enabled) return;
+
+ /* Currently we use zmadvise_dontneed only when we use jemalloc with Linux.
+ * so we avoid these pointless loops when they're not going to do anything. */
+#if defined(USE_JEMALLOC) && defined(__linux__)
+ if (o->refcount != 1) return;
+ switch(o->type) {
+ case OBJ_STRING: dismissStringObject(o); break;
+ case OBJ_LIST: dismissListObject(o, size_hint); break;
+ case OBJ_SET: dismissSetObject(o, size_hint); break;
+ case OBJ_ZSET: dismissZsetObject(o, size_hint); break;
+ case OBJ_HASH: dismissHashObject(o, size_hint); break;
+ case OBJ_STREAM: dismissStreamObject(o, size_hint); break;
+ default: break;
+ }
+#else
+ UNUSED(o); UNUSED(size_hint);
+#endif
+}
+
+/* This variant of decrRefCount() gets its argument as void, and is useful
+ * as free method in data structures that expect a 'void free_object(void*)'
+ * prototype for the free method. */
+void decrRefCountVoid(void *o) {
+ decrRefCount(o);
+}
+
+int checkType(client *c, robj *o, int type) {
+ /* A NULL is considered an empty key */
+ if (o && o->type != type) {
+ addReplyErrorObject(c,shared.wrongtypeerr);
+ return 1;
+ }
+ return 0;
+}
+
+int isSdsRepresentableAsLongLong(sds s, long long *llval) {
+ return string2ll(s,sdslen(s),llval) ? C_OK : C_ERR;
+}
+
+int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
+ if (o->encoding == OBJ_ENCODING_INT) {
+ if (llval) *llval = (long) o->ptr;
+ return C_OK;
+ } else {
+ return isSdsRepresentableAsLongLong(o->ptr,llval);
+ }
+}
+
+/* Optimize the SDS string inside the string object to require little space,
+ * in case there is more than 10% of free space at the end of the SDS. */
+void trimStringObjectIfNeeded(robj *o, int trim_small_values) {
+ if (o->encoding != OBJ_ENCODING_RAW) return;
+ /* A string may have free space in the following cases:
+ * 1. When an arg len is greater than PROTO_MBULK_BIG_ARG the query buffer may be used directly as the SDS string.
+ * 2. When utilizing the argument caching mechanism in Lua.
+ * 3. When calling from RM_TrimStringAllocation (trim_small_values is true). */
+ size_t len = sdslen(o->ptr);
+ if (len >= PROTO_MBULK_BIG_ARG ||
+ trim_small_values||
+ (server.executing_client && server.executing_client->flags & CLIENT_SCRIPT && len < LUA_CMD_OBJCACHE_MAX_LEN)) {
+ if (sdsavail(o->ptr) > len/10) {
+ o->ptr = sdsRemoveFreeSpace(o->ptr, 0);
+ }
+ }
+}
+
+/* Try to encode a string object in order to save space */
+robj *tryObjectEncodingEx(robj *o, int try_trim) {
+ long value;
+ sds s = o->ptr;
+ size_t len;
+
+ /* Make sure this is a string object, the only type we encode
+ * in this function. Other types use encoded memory efficient
+ * representations but are handled by the commands implementing
+ * the type. */
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
+
+ /* We try some specialized encoding only for objects that are
+ * RAW or EMBSTR encoded, in other words objects that are still
+ * in represented by an actually array of chars. */
+ if (!sdsEncodedObject(o)) return o;
+
+ /* It's not safe to encode shared objects: shared objects can be shared
+ * everywhere in the "object space" of Redis and may end in places where
+ * they are not handled. We handle them only as values in the keyspace. */
+ if (o->refcount > 1) return o;
+
+ /* Check if we can represent this string as a long integer.
+ * Note that we are sure that a string larger than 20 chars is not
+ * representable as a 32 nor 64 bit integer. */
+ len = sdslen(s);
+ if (len <= 20 && string2l(s,len,&value)) {
+ /* This object is encodable as a long. Try to use a shared object.
+ * Note that we avoid using shared integers when maxmemory is used
+ * because every object needs to have a private LRU field for the LRU
+ * algorithm to work well. */
+ if ((server.maxmemory == 0 ||
+ !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) &&
+ value >= 0 &&
+ value < OBJ_SHARED_INTEGERS)
+ {
+ decrRefCount(o);
+ return shared.integers[value];
+ } else {
+ if (o->encoding == OBJ_ENCODING_RAW) {
+ sdsfree(o->ptr);
+ o->encoding = OBJ_ENCODING_INT;
+ o->ptr = (void*) value;
+ return o;
+ } else if (o->encoding == OBJ_ENCODING_EMBSTR) {
+ decrRefCount(o);
+ return createStringObjectFromLongLongForValue(value);
+ }
+ }
+ }
+
+ /* If the string is small and is still RAW encoded,
+ * try the EMBSTR encoding which is more efficient.
+ * In this representation the object and the SDS string are allocated
+ * in the same chunk of memory to save space and cache misses. */
+ if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) {
+ robj *emb;
+
+ if (o->encoding == OBJ_ENCODING_EMBSTR) return o;
+ emb = createEmbeddedStringObject(s,sdslen(s));
+ decrRefCount(o);
+ return emb;
+ }
+
+ /* We can't encode the object...
+ * Do the last try, and at least optimize the SDS string inside */
+ if (try_trim)
+ trimStringObjectIfNeeded(o, 0);
+
+ /* Return the original object. */
+ return o;
+}
+
+robj *tryObjectEncoding(robj *o) {
+ return tryObjectEncodingEx(o, 1);
+}
+
+/* Get a decoded version of an encoded object (returned as a new object).
+ * If the object is already raw-encoded just increment the ref count. */
+robj *getDecodedObject(robj *o) {
+ robj *dec;
+
+ if (sdsEncodedObject(o)) {
+ incrRefCount(o);
+ return o;
+ }
+ if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_INT) {
+ char buf[32];
+
+ ll2string(buf,32,(long)o->ptr);
+ dec = createStringObject(buf,strlen(buf));
+ return dec;
+ } else {
+ serverPanic("Unknown encoding type");
+ }
+}
+
+/* Compare two string objects via strcmp() or strcoll() depending on flags.
+ * Note that the objects may be integer-encoded. In such a case we
+ * use ll2string() to get a string representation of the numbers on the stack
+ * and compare the strings, it's much faster than calling getDecodedObject().
+ *
+ * Important note: when REDIS_COMPARE_BINARY is used a binary-safe comparison
+ * is used. */
+
+#define REDIS_COMPARE_BINARY (1<<0)
+#define REDIS_COMPARE_COLL (1<<1)
+
+int compareStringObjectsWithFlags(const robj *a, const robj *b, int flags) {
+ serverAssertWithInfo(NULL,a,a->type == OBJ_STRING && b->type == OBJ_STRING);
+ char bufa[128], bufb[128], *astr, *bstr;
+ size_t alen, blen, minlen;
+
+ if (a == b) return 0;
+ if (sdsEncodedObject(a)) {
+ astr = a->ptr;
+ alen = sdslen(astr);
+ } else {
+ alen = ll2string(bufa,sizeof(bufa),(long) a->ptr);
+ astr = bufa;
+ }
+ if (sdsEncodedObject(b)) {
+ bstr = b->ptr;
+ blen = sdslen(bstr);
+ } else {
+ blen = ll2string(bufb,sizeof(bufb),(long) b->ptr);
+ bstr = bufb;
+ }
+ if (flags & REDIS_COMPARE_COLL) {
+ return strcoll(astr,bstr);
+ } else {
+ int cmp;
+
+ minlen = (alen < blen) ? alen : blen;
+ cmp = memcmp(astr,bstr,minlen);
+ if (cmp == 0) return alen-blen;
+ return cmp;
+ }
+}
+
+/* Wrapper for compareStringObjectsWithFlags() using binary comparison. */
+int compareStringObjects(const robj *a, const robj *b) {
+ return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_BINARY);
+}
+
+/* Wrapper for compareStringObjectsWithFlags() using collation. */
+int collateStringObjects(const robj *a, const robj *b) {
+ return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_COLL);
+}
+
+/* Equal string objects return 1 if the two objects are the same from the
+ * point of view of a string comparison, otherwise 0 is returned. Note that
+ * this function is faster then checking for (compareStringObject(a,b) == 0)
+ * because it can perform some more optimization. */
+int equalStringObjects(robj *a, robj *b) {
+ if (a->encoding == OBJ_ENCODING_INT &&
+ b->encoding == OBJ_ENCODING_INT){
+ /* If both strings are integer encoded just check if the stored
+ * long is the same. */
+ return a->ptr == b->ptr;
+ } else {
+ return compareStringObjects(a,b) == 0;
+ }
+}
+
+size_t stringObjectLen(robj *o) {
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
+ if (sdsEncodedObject(o)) {
+ return sdslen(o->ptr);
+ } else {
+ return sdigits10((long)o->ptr);
+ }
+}
+
+int getDoubleFromObject(const robj *o, double *target) {
+ double value;
+
+ if (o == NULL) {
+ value = 0;
+ } else {
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
+ if (sdsEncodedObject(o)) {
+ if (!string2d(o->ptr, sdslen(o->ptr), &value))
+ return C_ERR;
+ } else if (o->encoding == OBJ_ENCODING_INT) {
+ value = (long)o->ptr;
+ } else {
+ serverPanic("Unknown string encoding");
+ }
+ }
+ *target = value;
+ return C_OK;
+}
+
+int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg) {
+ double value;
+ if (getDoubleFromObject(o, &value) != C_OK) {
+ if (msg != NULL) {
+ addReplyError(c,(char*)msg);
+ } else {
+ addReplyError(c,"value is not a valid float");
+ }
+ return C_ERR;
+ }
+ *target = value;
+ return C_OK;
+}
+
+int getLongDoubleFromObject(robj *o, long double *target) {
+ long double value;
+
+ if (o == NULL) {
+ value = 0;
+ } else {
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
+ if (sdsEncodedObject(o)) {
+ if (!string2ld(o->ptr, sdslen(o->ptr), &value))
+ return C_ERR;
+ } else if (o->encoding == OBJ_ENCODING_INT) {
+ value = (long)o->ptr;
+ } else {
+ serverPanic("Unknown string encoding");
+ }
+ }
+ *target = value;
+ return C_OK;
+}
+
+int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg) {
+ long double value;
+ if (getLongDoubleFromObject(o, &value) != C_OK) {
+ if (msg != NULL) {
+ addReplyError(c,(char*)msg);
+ } else {
+ addReplyError(c,"value is not a valid float");
+ }
+ return C_ERR;
+ }
+ *target = value;
+ return C_OK;
+}
+
+int getLongLongFromObject(robj *o, long long *target) {
+ long long value;
+
+ if (o == NULL) {
+ value = 0;
+ } else {
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
+ if (sdsEncodedObject(o)) {
+ if (string2ll(o->ptr,sdslen(o->ptr),&value) == 0) return C_ERR;
+ } else if (o->encoding == OBJ_ENCODING_INT) {
+ value = (long)o->ptr;
+ } else {
+ serverPanic("Unknown string encoding");
+ }
+ }
+ if (target) *target = value;
+ return C_OK;
+}
+
+int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) {
+ long long value;
+ if (getLongLongFromObject(o, &value) != C_OK) {
+ if (msg != NULL) {
+ addReplyError(c,(char*)msg);
+ } else {
+ addReplyError(c,"value is not an integer or out of range");
+ }
+ return C_ERR;
+ }
+ *target = value;
+ return C_OK;
+}
+
+int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg) {
+ long long value;
+
+ if (getLongLongFromObjectOrReply(c, o, &value, msg) != C_OK) return C_ERR;
+ if (value < LONG_MIN || value > LONG_MAX) {
+ if (msg != NULL) {
+ addReplyError(c,(char*)msg);
+ } else {
+ addReplyError(c,"value is out of range");
+ }
+ return C_ERR;
+ }
+ *target = value;
+ return C_OK;
+}
+
+int getRangeLongFromObjectOrReply(client *c, robj *o, long min, long max, long *target, const char *msg) {
+ if (getLongFromObjectOrReply(c, o, target, msg) != C_OK) return C_ERR;
+ if (*target < min || *target > max) {
+ if (msg != NULL) {
+ addReplyError(c,(char*)msg);
+ } else {
+ addReplyErrorFormat(c,"value is out of range, value must between %ld and %ld", min, max);
+ }
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+int getPositiveLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg) {
+ if (msg) {
+ return getRangeLongFromObjectOrReply(c, o, 0, LONG_MAX, target, msg);
+ } else {
+ return getRangeLongFromObjectOrReply(c, o, 0, LONG_MAX, target, "value is out of range, must be positive");
+ }
+}
+
+int getIntFromObjectOrReply(client *c, robj *o, int *target, const char *msg) {
+ long value;
+
+ if (getRangeLongFromObjectOrReply(c, o, INT_MIN, INT_MAX, &value, msg) != C_OK)
+ return C_ERR;
+
+ *target = value;
+ return C_OK;
+}
+
+char *strEncoding(int encoding) {
+ switch(encoding) {
+ case OBJ_ENCODING_RAW: return "raw";
+ case OBJ_ENCODING_INT: return "int";
+ case OBJ_ENCODING_HT: return "hashtable";
+ case OBJ_ENCODING_QUICKLIST: return "quicklist";
+ case OBJ_ENCODING_LISTPACK: return "listpack";
+ case OBJ_ENCODING_INTSET: return "intset";
+ case OBJ_ENCODING_SKIPLIST: return "skiplist";
+ case OBJ_ENCODING_EMBSTR: return "embstr";
+ case OBJ_ENCODING_STREAM: return "stream";
+ default: return "unknown";
+ }
+}
+
+/* =========================== Memory introspection ========================= */
+
+
+/* This is a helper function with the goal of estimating the memory
+ * size of a radix tree that is used to store Stream IDs.
+ *
+ * Note: to guess the size of the radix tree is not trivial, so we
+ * approximate it considering 16 bytes of data overhead for each
+ * key (the ID), and then adding the number of bare nodes, plus some
+ * overhead due by the data and child pointers. This secret recipe
+ * was obtained by checking the average radix tree created by real
+ * workloads, and then adjusting the constants to get numbers that
+ * more or less match the real memory usage.
+ *
+ * Actually the number of nodes and keys may be different depending
+ * on the insertion speed and thus the ability of the radix tree
+ * to compress prefixes. */
+size_t streamRadixTreeMemoryUsage(rax *rax) {
+ size_t size = sizeof(*rax);
+ size = rax->numele * sizeof(streamID);
+ size += rax->numnodes * sizeof(raxNode);
+ /* Add a fixed overhead due to the aux data pointer, children, ... */
+ size += rax->numnodes * sizeof(long)*30;
+ return size;
+}
+
+/* Returns the size in bytes consumed by the key's value in RAM.
+ * Note that the returned value is just an approximation, especially in the
+ * case of aggregated data types where only "sample_size" elements
+ * are checked and averaged to estimate the total size. */
+#define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
+size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
+ sds ele, ele2;
+ dict *d;
+ dictIterator *di;
+ struct dictEntry *de;
+ size_t asize = 0, elesize = 0, samples = 0;
+
+ if (o->type == OBJ_STRING) {
+ if(o->encoding == OBJ_ENCODING_INT) {
+ asize = sizeof(*o);
+ } else if(o->encoding == OBJ_ENCODING_RAW) {
+ asize = sdsZmallocSize(o->ptr)+sizeof(*o);
+ } else if(o->encoding == OBJ_ENCODING_EMBSTR) {
+ asize = zmalloc_size((void *)o);
+ } else {
+ serverPanic("Unknown string encoding");
+ }
+ } else if (o->type == OBJ_LIST) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ quicklistNode *node = ql->head;
+ asize = sizeof(*o)+sizeof(quicklist);
+ do {
+ elesize += sizeof(quicklistNode)+zmalloc_size(node->entry);
+ samples++;
+ } while ((node = node->next) && samples < sample_size);
+ asize += (double)elesize/samples*ql->len;
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ asize = sizeof(*o)+zmalloc_size(o->ptr);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ } else if (o->type == OBJ_SET) {
+ if (o->encoding == OBJ_ENCODING_HT) {
+ d = o->ptr;
+ di = dictGetIterator(d);
+ asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
+ while((de = dictNext(di)) != NULL && samples < sample_size) {
+ ele = dictGetKey(de);
+ elesize += dictEntryMemUsage() + sdsZmallocSize(ele);
+ samples++;
+ }
+ dictReleaseIterator(di);
+ if (samples) asize += (double)elesize/samples*dictSize(d);
+ } else if (o->encoding == OBJ_ENCODING_INTSET) {
+ asize = sizeof(*o)+zmalloc_size(o->ptr);
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ asize = sizeof(*o)+zmalloc_size(o->ptr);
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (o->type == OBJ_ZSET) {
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ asize = sizeof(*o)+zmalloc_size(o->ptr);
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
+ d = ((zset*)o->ptr)->dict;
+ zskiplist *zsl = ((zset*)o->ptr)->zsl;
+ zskiplistNode *znode = zsl->header->level[0].forward;
+ asize = sizeof(*o)+sizeof(zset)+sizeof(zskiplist)+sizeof(dict)+
+ (sizeof(struct dictEntry*)*dictSlots(d))+
+ zmalloc_size(zsl->header);
+ while(znode != NULL && samples < sample_size) {
+ elesize += sdsZmallocSize(znode->ele);
+ elesize += dictEntryMemUsage()+zmalloc_size(znode);
+ samples++;
+ znode = znode->level[0].forward;
+ }
+ if (samples) asize += (double)elesize/samples*dictSize(d);
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else if (o->type == OBJ_HASH) {
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ asize = sizeof(*o)+zmalloc_size(o->ptr);
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ d = o->ptr;
+ di = dictGetIterator(d);
+ asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
+ while((de = dictNext(di)) != NULL && samples < sample_size) {
+ ele = dictGetKey(de);
+ ele2 = dictGetVal(de);
+ elesize += sdsZmallocSize(ele) + sdsZmallocSize(ele2);
+ elesize += dictEntryMemUsage();
+ samples++;
+ }
+ dictReleaseIterator(di);
+ if (samples) asize += (double)elesize/samples*dictSize(d);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ } else if (o->type == OBJ_STREAM) {
+ stream *s = o->ptr;
+ asize = sizeof(*o)+sizeof(*s);
+ asize += streamRadixTreeMemoryUsage(s->rax);
+
+ /* Now we have to add the listpacks. The last listpack is often non
+ * complete, so we estimate the size of the first N listpacks, and
+ * use the average to compute the size of the first N-1 listpacks, and
+ * finally add the real size of the last node. */
+ raxIterator ri;
+ raxStart(&ri,s->rax);
+ raxSeek(&ri,"^",NULL,0);
+ size_t lpsize = 0, samples = 0;
+ while(samples < sample_size && raxNext(&ri)) {
+ unsigned char *lp = ri.data;
+ /* Use the allocated size, since we overprovision the node initially. */
+ lpsize += zmalloc_size(lp);
+ samples++;
+ }
+ if (s->rax->numele <= samples) {
+ asize += lpsize;
+ } else {
+ if (samples) lpsize /= samples; /* Compute the average. */
+ asize += lpsize * (s->rax->numele-1);
+ /* No need to check if seek succeeded, we enter this branch only
+ * if there are a few elements in the radix tree. */
+ raxSeek(&ri,"$",NULL,0);
+ raxNext(&ri);
+ /* Use the allocated size, since we overprovision the node initially. */
+ asize += zmalloc_size(ri.data);
+ }
+ raxStop(&ri);
+
+ /* Consumer groups also have a non trivial memory overhead if there
+ * are many consumers and many groups, let's count at least the
+ * overhead of the pending entries in the groups and consumers
+ * PELs. */
+ if (s->cgroups) {
+ raxStart(&ri,s->cgroups);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamCG *cg = ri.data;
+ asize += sizeof(*cg);
+ asize += streamRadixTreeMemoryUsage(cg->pel);
+ asize += sizeof(streamNACK)*raxSize(cg->pel);
+
+ /* For each consumer we also need to add the basic data
+ * structures and the PEL memory usage. */
+ raxIterator cri;
+ raxStart(&cri,cg->consumers);
+ raxSeek(&cri,"^",NULL,0);
+ while(raxNext(&cri)) {
+ streamConsumer *consumer = cri.data;
+ asize += sizeof(*consumer);
+ asize += sdslen(consumer->name);
+ asize += streamRadixTreeMemoryUsage(consumer->pel);
+ /* Don't count NACKs again, they are shared with the
+ * consumer group PEL. */
+ }
+ raxStop(&cri);
+ }
+ raxStop(&ri);
+ }
+ } else if (o->type == OBJ_MODULE) {
+ asize = moduleGetMemUsage(key, o, sample_size, dbid);
+ } else {
+ serverPanic("Unknown object type");
+ }
+ return asize;
+}
+
+/* Release data obtained with getMemoryOverheadData(). */
+void freeMemoryOverheadData(struct redisMemOverhead *mh) {
+ zfree(mh->db);
+ zfree(mh);
+}
+
+/* Return a struct redisMemOverhead filled with memory overhead
+ * information used for the MEMORY OVERHEAD and INFO command. The returned
+ * structure pointer should be freed calling freeMemoryOverheadData(). */
+struct redisMemOverhead *getMemoryOverheadData(void) {
+ int j;
+ size_t mem_total = 0;
+ size_t mem = 0;
+ size_t zmalloc_used = zmalloc_used_memory();
+ struct redisMemOverhead *mh = zcalloc(sizeof(*mh));
+
+ mh->total_allocated = zmalloc_used;
+ mh->startup_allocated = server.initial_memory_usage;
+ mh->peak_allocated = server.stat_peak_memory;
+ mh->total_frag =
+ (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.zmalloc_used;
+ mh->total_frag_bytes =
+ server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
+ mh->allocator_frag =
+ (float)server.cron_malloc_stats.allocator_active / server.cron_malloc_stats.allocator_allocated;
+ mh->allocator_frag_bytes =
+ server.cron_malloc_stats.allocator_active - server.cron_malloc_stats.allocator_allocated;
+ mh->allocator_rss =
+ (float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
+ mh->allocator_rss_bytes =
+ server.cron_malloc_stats.allocator_resident - server.cron_malloc_stats.allocator_active;
+ mh->rss_extra =
+ (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.allocator_resident;
+ mh->rss_extra_bytes =
+ server.cron_malloc_stats.process_rss - server.cron_malloc_stats.allocator_resident;
+
+ mem_total += server.initial_memory_usage;
+
+ /* Replication backlog and replicas share one global replication buffer,
+ * only if replication buffer memory is more than the repl backlog setting,
+ * we consider the excess as replicas' memory. Otherwise, replication buffer
+ * memory is the consumption of repl backlog. */
+ if (listLength(server.slaves) &&
+ (long long)server.repl_buffer_mem > server.repl_backlog_size)
+ {
+ mh->clients_slaves = server.repl_buffer_mem - server.repl_backlog_size;
+ mh->repl_backlog = server.repl_backlog_size;
+ } else {
+ mh->clients_slaves = 0;
+ mh->repl_backlog = server.repl_buffer_mem;
+ }
+ if (server.repl_backlog) {
+ /* The approximate memory of rax tree for indexed blocks. */
+ mh->repl_backlog +=
+ server.repl_backlog->blocks_index->numnodes * sizeof(raxNode) +
+ raxSize(server.repl_backlog->blocks_index) * sizeof(void*);
+ }
+ mem_total += mh->repl_backlog;
+ mem_total += mh->clients_slaves;
+
+ /* Computing the memory used by the clients would be O(N) if done
+ * here online. We use our values computed incrementally by
+ * updateClientMemoryUsage(). */
+ mh->clients_normal = server.stat_clients_type_memory[CLIENT_TYPE_MASTER]+
+ server.stat_clients_type_memory[CLIENT_TYPE_PUBSUB]+
+ server.stat_clients_type_memory[CLIENT_TYPE_NORMAL];
+ mem_total += mh->clients_normal;
+
+ mh->cluster_links = server.stat_cluster_links_memory;
+ mem_total += mh->cluster_links;
+
+ mem = 0;
+ if (server.aof_state != AOF_OFF) {
+ mem += sdsZmallocSize(server.aof_buf);
+ }
+ mh->aof_buffer = mem;
+ mem_total+=mem;
+
+ mem = evalScriptsMemory();
+ mh->lua_caches = mem;
+ mem_total+=mem;
+ mh->functions_caches = functionsMemoryOverhead();
+ mem_total+=mh->functions_caches;
+
+ for (j = 0; j < server.dbnum; j++) {
+ redisDb *db = server.db+j;
+ long long keyscount = dictSize(db->dict);
+ if (keyscount==0) continue;
+
+ mh->total_keys += keyscount;
+ mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1));
+ mh->db[mh->num_dbs].dbid = j;
+
+ mem = dictMemUsage(db->dict) +
+ dictSize(db->dict) * sizeof(robj);
+ mh->db[mh->num_dbs].overhead_ht_main = mem;
+ mem_total+=mem;
+
+ mem = dictMemUsage(db->expires);
+ mh->db[mh->num_dbs].overhead_ht_expires = mem;
+ mem_total+=mem;
+
+ /* Account for the slot to keys map in cluster mode */
+ mem = dictSize(db->dict) * dictEntryMetadataSize(db->dict) +
+ dictMetadataSize(db->dict);
+ mh->db[mh->num_dbs].overhead_ht_slot_to_keys = mem;
+ mem_total+=mem;
+
+ mh->num_dbs++;
+ }
+
+ mh->overhead_total = mem_total;
+ mh->dataset = zmalloc_used - mem_total;
+ mh->peak_perc = (float)zmalloc_used*100/mh->peak_allocated;
+
+ /* Metrics computed after subtracting the startup memory from
+ * the total memory. */
+ size_t net_usage = 1;
+ if (zmalloc_used > mh->startup_allocated)
+ net_usage = zmalloc_used - mh->startup_allocated;
+ mh->dataset_perc = (float)mh->dataset*100/net_usage;
+ mh->bytes_per_key = mh->total_keys ? (net_usage / mh->total_keys) : 0;
+
+ return mh;
+}
+
+/* Helper for "MEMORY allocator-stats", used as a callback for the jemalloc
+ * stats output. */
+void inputCatSds(void *result, const char *str) {
+ /* result is actually a (sds *), so re-cast it here */
+ sds *info = (sds *)result;
+ *info = sdscat(*info, str);
+}
+
+/* This implements MEMORY DOCTOR. An human readable analysis of the Redis
+ * memory condition. */
+sds getMemoryDoctorReport(void) {
+ int empty = 0; /* Instance is empty or almost empty. */
+ int big_peak = 0; /* Memory peak is much larger than used mem. */
+ int high_frag = 0; /* High fragmentation. */
+ int high_alloc_frag = 0;/* High allocator fragmentation. */
+ int high_proc_rss = 0; /* High process rss overhead. */
+ int high_alloc_rss = 0; /* High rss overhead. */
+ int big_slave_buf = 0; /* Slave buffers are too big. */
+ int big_client_buf = 0; /* Client buffers are too big. */
+ int many_scripts = 0; /* Script cache has too many scripts. */
+ int num_reports = 0;
+ struct redisMemOverhead *mh = getMemoryOverheadData();
+
+ if (mh->total_allocated < (1024*1024*5)) {
+ empty = 1;
+ num_reports++;
+ } else {
+ /* Peak is > 150% of current used memory? */
+ if (((float)mh->peak_allocated / mh->total_allocated) > 1.5) {
+ big_peak = 1;
+ num_reports++;
+ }
+
+ /* Fragmentation is higher than 1.4 and 10MB ?*/
+ if (mh->total_frag > 1.4 && mh->total_frag_bytes > 10<<20) {
+ high_frag = 1;
+ num_reports++;
+ }
+
+ /* External fragmentation is higher than 1.1 and 10MB? */
+ if (mh->allocator_frag > 1.1 && mh->allocator_frag_bytes > 10<<20) {
+ high_alloc_frag = 1;
+ num_reports++;
+ }
+
+ /* Allocator rss is higher than 1.1 and 10MB ? */
+ if (mh->allocator_rss > 1.1 && mh->allocator_rss_bytes > 10<<20) {
+ high_alloc_rss = 1;
+ num_reports++;
+ }
+
+ /* Non-Allocator rss is higher than 1.1 and 10MB ? */
+ if (mh->rss_extra > 1.1 && mh->rss_extra_bytes > 10<<20) {
+ high_proc_rss = 1;
+ num_reports++;
+ }
+
+ /* Clients using more than 200k each average? */
+ long numslaves = listLength(server.slaves);
+ long numclients = listLength(server.clients)-numslaves;
+ if (mh->clients_normal / numclients > (1024*200)) {
+ big_client_buf = 1;
+ num_reports++;
+ }
+
+ /* Slaves using more than 10 MB each? */
+ if (numslaves > 0 && mh->clients_slaves > (1024*1024*10)) {
+ big_slave_buf = 1;
+ num_reports++;
+ }
+
+ /* Too many scripts are cached? */
+ if (dictSize(evalScriptsDict()) > 1000) {
+ many_scripts = 1;
+ num_reports++;
+ }
+ }
+
+ sds s;
+ if (num_reports == 0) {
+ s = sdsnew(
+ "Hi Sam, I can't find any memory issue in your instance. "
+ "I can only account for what occurs on this base.\n");
+ } else if (empty == 1) {
+ s = sdsnew(
+ "Hi Sam, this instance is empty or is using very little memory, "
+ "my issues detector can't be used in these conditions. "
+ "Please, leave for your mission on Earth and fill it with some data. "
+ "The new Sam and I will be back to our programming as soon as I "
+ "finished rebooting.\n");
+ } else {
+ s = sdsnew("Sam, I detected a few issues in this Redis instance memory implants:\n\n");
+ if (big_peak) {
+ s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n");
+ }
+ if (high_frag) {
+ s = sdscatprintf(s," * High total RSS: This instance has a memory fragmentation and RSS overhead greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB);
+ }
+ if (high_alloc_frag) {
+ s = sdscatprintf(s," * High allocator fragmentation: This instance has an allocator external fragmentation greater than 1.1. This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. You can try enabling 'activedefrag' config option.\n\n");
+ }
+ if (high_alloc_rss) {
+ s = sdscatprintf(s," * High allocator RSS overhead: This instance has an RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the allocator is much larger than the sum what the allocator actually holds). This problem is usually due to a large peak memory (check if there is a peak memory entry above in the report), you can try the MEMORY PURGE command to reclaim it.\n\n");
+ }
+ if (high_proc_rss) {
+ s = sdscatprintf(s," * High process RSS overhead: This instance has non-allocator RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the Redis process is much larger than the RSS the allocator holds). This problem may be due to Lua scripts or Modules.\n\n");
+ }
+ if (big_slave_buf) {
+ s = sdscat(s," * Big replica buffers: The replica output buffers in this instance are greater than 10MB for each replica (on average). This likely means that there is some replica instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what replica is not receiving data correctly and why. You can use the INFO output in order to check the replicas delays and the CLIENT LIST command to check the output buffers of each replica.\n\n");
+ }
+ if (big_client_buf) {
+ s = sdscat(s," * Big client buffers: The clients output buffers in this instance are greater than 200K per client (on average). This may result from different causes, like Pub/Sub clients subscribed to channels bot not receiving data fast enough, so that data piles on the Redis instance output buffer, or clients sending commands with large replies or very large sequences of commands in the same pipeline. Please use the CLIENT LIST command in order to investigate the issue if it causes problems in your instance, or to understand better why certain clients are using a big amount of memory.\n\n");
+ }
+ if (many_scripts) {
+ s = sdscat(s," * Many scripts: There seem to be many cached scripts in this instance (more than 1000). This may be because scripts are generated and `EVAL`ed, instead of being parameterized (with KEYS and ARGV), `SCRIPT LOAD`ed and `EVALSHA`ed. Unless `SCRIPT FLUSH` is called periodically, the scripts' caches may end up consuming most of your memory.\n\n");
+ }
+ s = sdscat(s,"I'm here to keep you safe, Sam. I want to help you.\n");
+ }
+ freeMemoryOverheadData(mh);
+ return s;
+}
+
+/* Set the object LRU/LFU depending on server.maxmemory_policy.
+ * The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
+ * The lru_idle and lru_clock args are only relevant if policy
+ * is MAXMEMORY_FLAG_LRU.
+ * Either or both of them may be <0, in that case, nothing is set. */
+int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
+ long long lru_clock, int lru_multiplier) {
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ if (lfu_freq >= 0) {
+ serverAssert(lfu_freq <= 255);
+ val->lru = (LFUGetTimeInMinutes()<<8) | lfu_freq;
+ return 1;
+ }
+ } else if (lru_idle >= 0) {
+ /* Provided LRU idle time is in seconds. Scale
+ * according to the LRU clock resolution this Redis
+ * instance was compiled with (normally 1000 ms, so the
+ * below statement will expand to lru_idle*1000/1000. */
+ lru_idle = lru_idle*lru_multiplier/LRU_CLOCK_RESOLUTION;
+ long lru_abs = lru_clock - lru_idle; /* Absolute access time. */
+ /* If the LRU field underflows (since lru_clock is a wrapping clock),
+ * we need to make it positive again. This be handled by the unwrapping
+ * code in estimateObjectIdleTime. I.e. imagine a day when lru_clock
+ * wrap arounds (happens once in some 6 months), and becomes a low
+ * value, like 10, an lru_idle of 1000 should be near LRU_CLOCK_MAX. */
+ if (lru_abs < 0)
+ lru_abs += LRU_CLOCK_MAX;
+ val->lru = lru_abs;
+ return 1;
+ }
+ return 0;
+}
+
+/* ======================= The OBJECT and MEMORY commands =================== */
+
+/* This is a helper function for the OBJECT command. We need to lookup keys
+ * without any modification of LRU or other parameters. */
+robj *objectCommandLookup(client *c, robj *key) {
+ return lookupKeyReadWithFlags(c->db,key,LOOKUP_NOTOUCH|LOOKUP_NONOTIFY);
+}
+
+robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) {
+ robj *o = objectCommandLookup(c,key);
+ if (!o) addReplyOrErrorObject(c, reply);
+ return o;
+}
+
+/* Object command allows to inspect the internals of a Redis Object.
+ * Usage: OBJECT <refcount|encoding|idletime|freq> <key> */
+void objectCommand(client *c) {
+ robj *o;
+
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"ENCODING <key>",
+" Return the kind of internal representation used in order to store the value",
+" associated with a <key>.",
+"FREQ <key>",
+" Return the access frequency index of the <key>. The returned integer is",
+" proportional to the logarithm of the recent access frequency of the key.",
+"IDLETIME <key>",
+" Return the idle time of the <key>, that is the approximated number of",
+" seconds elapsed since the last access to the key.",
+"REFCOUNT <key>",
+" Return the number of references of the value associated with the specified",
+" <key>.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) {
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
+ == NULL) return;
+ addReplyLongLong(c,o->refcount);
+ } else if (!strcasecmp(c->argv[1]->ptr,"encoding") && c->argc == 3) {
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
+ == NULL) return;
+ addReplyBulkCString(c,strEncoding(o->encoding));
+ } else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) {
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
+ == NULL) return;
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
+ return;
+ }
+ addReplyLongLong(c,estimateObjectIdleTime(o)/1000);
+ } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) {
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
+ == NULL) return;
+ if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) {
+ addReplyError(c,"An LFU maxmemory policy is not selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
+ return;
+ }
+ /* LFUDecrAndReturn should be called
+ * in case of the key has not been accessed for a long time,
+ * because we update the access time only
+ * when the key is read or overwritten. */
+ addReplyLongLong(c,LFUDecrAndReturn(o));
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
+
+/* The memory command will eventually be a complete interface for the
+ * memory introspection capabilities of Redis.
+ *
+ * Usage: MEMORY usage <key> */
+void memoryCommand(client *c) {
+ if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
+ const char *help[] = {
+"DOCTOR",
+" Return memory problems reports.",
+"MALLOC-STATS",
+" Return internal statistics report from the memory allocator.",
+"PURGE",
+" Attempt to purge dirty pages for reclamation by the allocator.",
+"STATS",
+" Return information about the memory usage of the server.",
+"USAGE <key> [SAMPLES <count>]",
+" Return memory in bytes used by <key> and its value. Nested values are",
+" sampled up to <count> times (default: 5, 0 means sample all).",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"usage") && c->argc >= 3) {
+ dictEntry *de;
+ long long samples = OBJ_COMPUTE_SIZE_DEF_SAMPLES;
+ for (int j = 3; j < c->argc; j++) {
+ if (!strcasecmp(c->argv[j]->ptr,"samples") &&
+ j+1 < c->argc)
+ {
+ if (getLongLongFromObjectOrReply(c,c->argv[j+1],&samples,NULL)
+ == C_ERR) return;
+ if (samples < 0) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ if (samples == 0) samples = LLONG_MAX;
+ j++; /* skip option argument. */
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+ if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
+ addReplyNull(c);
+ return;
+ }
+ size_t usage = objectComputeSize(c->argv[2],dictGetVal(de),samples,c->db->id);
+ usage += sdsZmallocSize(dictGetKey(de));
+ usage += dictEntryMemUsage();
+ usage += dictMetadataSize(c->db->dict);
+ addReplyLongLong(c,usage);
+ } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
+ struct redisMemOverhead *mh = getMemoryOverheadData();
+
+ addReplyMapLen(c,27+mh->num_dbs);
+
+ addReplyBulkCString(c,"peak.allocated");
+ addReplyLongLong(c,mh->peak_allocated);
+
+ addReplyBulkCString(c,"total.allocated");
+ addReplyLongLong(c,mh->total_allocated);
+
+ addReplyBulkCString(c,"startup.allocated");
+ addReplyLongLong(c,mh->startup_allocated);
+
+ addReplyBulkCString(c,"replication.backlog");
+ addReplyLongLong(c,mh->repl_backlog);
+
+ addReplyBulkCString(c,"clients.slaves");
+ addReplyLongLong(c,mh->clients_slaves);
+
+ addReplyBulkCString(c,"clients.normal");
+ addReplyLongLong(c,mh->clients_normal);
+
+ addReplyBulkCString(c,"cluster.links");
+ addReplyLongLong(c,mh->cluster_links);
+
+ addReplyBulkCString(c,"aof.buffer");
+ addReplyLongLong(c,mh->aof_buffer);
+
+ addReplyBulkCString(c,"lua.caches");
+ addReplyLongLong(c,mh->lua_caches);
+
+ addReplyBulkCString(c,"functions.caches");
+ addReplyLongLong(c,mh->functions_caches);
+
+ for (size_t j = 0; j < mh->num_dbs; j++) {
+ char dbname[32];
+ snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);
+ addReplyBulkCString(c,dbname);
+ addReplyMapLen(c,3);
+
+ addReplyBulkCString(c,"overhead.hashtable.main");
+ addReplyLongLong(c,mh->db[j].overhead_ht_main);
+
+ addReplyBulkCString(c,"overhead.hashtable.expires");
+ addReplyLongLong(c,mh->db[j].overhead_ht_expires);
+
+ addReplyBulkCString(c,"overhead.hashtable.slot-to-keys");
+ addReplyLongLong(c,mh->db[j].overhead_ht_slot_to_keys);
+ }
+
+
+ addReplyBulkCString(c,"overhead.total");
+ addReplyLongLong(c,mh->overhead_total);
+
+ addReplyBulkCString(c,"keys.count");
+ addReplyLongLong(c,mh->total_keys);
+
+ addReplyBulkCString(c,"keys.bytes-per-key");
+ addReplyLongLong(c,mh->bytes_per_key);
+
+ addReplyBulkCString(c,"dataset.bytes");
+ addReplyLongLong(c,mh->dataset);
+
+ addReplyBulkCString(c,"dataset.percentage");
+ addReplyDouble(c,mh->dataset_perc);
+
+ addReplyBulkCString(c,"peak.percentage");
+ addReplyDouble(c,mh->peak_perc);
+
+ addReplyBulkCString(c,"allocator.allocated");
+ addReplyLongLong(c,server.cron_malloc_stats.allocator_allocated);
+
+ addReplyBulkCString(c,"allocator.active");
+ addReplyLongLong(c,server.cron_malloc_stats.allocator_active);
+
+ addReplyBulkCString(c,"allocator.resident");
+ addReplyLongLong(c,server.cron_malloc_stats.allocator_resident);
+
+ addReplyBulkCString(c,"allocator-fragmentation.ratio");
+ addReplyDouble(c,mh->allocator_frag);
+
+ addReplyBulkCString(c,"allocator-fragmentation.bytes");
+ addReplyLongLong(c,mh->allocator_frag_bytes);
+
+ addReplyBulkCString(c,"allocator-rss.ratio");
+ addReplyDouble(c,mh->allocator_rss);
+
+ addReplyBulkCString(c,"allocator-rss.bytes");
+ addReplyLongLong(c,mh->allocator_rss_bytes);
+
+ addReplyBulkCString(c,"rss-overhead.ratio");
+ addReplyDouble(c,mh->rss_extra);
+
+ addReplyBulkCString(c,"rss-overhead.bytes");
+ addReplyLongLong(c,mh->rss_extra_bytes);
+
+ addReplyBulkCString(c,"fragmentation"); /* this is the total RSS overhead, including fragmentation */
+ addReplyDouble(c,mh->total_frag); /* it is kept here for backwards compatibility */
+
+ addReplyBulkCString(c,"fragmentation.bytes");
+ addReplyLongLong(c,mh->total_frag_bytes);
+
+ freeMemoryOverheadData(mh);
+ } else if (!strcasecmp(c->argv[1]->ptr,"malloc-stats") && c->argc == 2) {
+#if defined(USE_JEMALLOC)
+ sds info = sdsempty();
+ je_malloc_stats_print(inputCatSds, &info, NULL);
+ addReplyVerbatim(c,info,sdslen(info),"txt");
+ sdsfree(info);
+#else
+ addReplyBulkCString(c,"Stats not supported for the current allocator");
+#endif
+ } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
+ sds report = getMemoryDoctorReport();
+ addReplyVerbatim(c,report,sdslen(report),"txt");
+ sdsfree(report);
+ } else if (!strcasecmp(c->argv[1]->ptr,"purge") && c->argc == 2) {
+ if (jemalloc_purge() == 0)
+ addReply(c, shared.ok);
+ else
+ addReplyError(c, "Error purging dirty pages");
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
diff --git a/src/pqsort.c b/src/pqsort.c
new file mode 100644
index 0000000..fab54e0
--- /dev/null
+++ b/src/pqsort.c
@@ -0,0 +1,185 @@
+/* The following is the NetBSD libc qsort implementation modified in order to
+ * support partial sorting of ranges for Redis.
+ *
+ * Copyright(C) 2009-2012 Salvatore Sanfilippo. All rights reserved.
+ *
+ * The original copyright notice follows. */
+
+
+/* $NetBSD: qsort.c,v 1.19 2009/01/30 23:38:44 lukem Exp $ */
+
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <errno.h>
+#include <stdlib.h>
+
+static inline char *med3 (char *, char *, char *,
+ int (*)(const void *, const void *));
+static inline void swapfunc (char *, char *, size_t, int);
+
+#define min(a, b) (a) < (b) ? a : b
+
+/*
+ * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
+ */
+#define swapcode(TYPE, parmi, parmj, n) { \
+ size_t i = (n) / sizeof (TYPE); \
+ TYPE *pi = (TYPE *)(void *)(parmi); \
+ TYPE *pj = (TYPE *)(void *)(parmj); \
+ do { \
+ TYPE t = *pi; \
+ *pi++ = *pj; \
+ *pj++ = t; \
+ } while (--i > 0); \
+}
+
+#define SWAPINIT(a, es) swaptype = (uintptr_t)a % sizeof(long) || \
+ es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
+
+static inline void
+swapfunc(char *a, char *b, size_t n, int swaptype)
+{
+
+ if (swaptype <= 1)
+ swapcode(long, a, b, n)
+ else
+ swapcode(char, a, b, n)
+}
+
+#define swap(a, b) \
+ if (swaptype == 0) { \
+ long t = *(long *)(void *)(a); \
+ *(long *)(void *)(a) = *(long *)(void *)(b); \
+ *(long *)(void *)(b) = t; \
+ } else \
+ swapfunc(a, b, es, swaptype)
+
+#define vecswap(a, b, n) if ((n) > 0) swapfunc((a), (b), (size_t)(n), swaptype)
+
+static inline char *
+med3(char *a, char *b, char *c,
+ int (*cmp) (const void *, const void *))
+{
+
+ return cmp(a, b) < 0 ?
+ (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
+ :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
+}
+
+static void
+_pqsort(void *a, size_t n, size_t es,
+ int (*cmp) (const void *, const void *), void *lrange, void *rrange)
+{
+ char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+ size_t d, r;
+ int swaptype, cmp_result;
+
+loop: SWAPINIT(a, es);
+ if (n < 7) {
+ for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
+ for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
+ pl -= es)
+ swap(pl, pl - es);
+ return;
+ }
+ pm = (char *) a + (n / 2) * es;
+ if (n > 7) {
+ pl = (char *) a;
+ pn = (char *) a + (n - 1) * es;
+ if (n > 40) {
+ d = (n / 8) * es;
+ pl = med3(pl, pl + d, pl + 2 * d, cmp);
+ pm = med3(pm - d, pm, pm + d, cmp);
+ pn = med3(pn - 2 * d, pn - d, pn, cmp);
+ }
+ pm = med3(pl, pm, pn, cmp);
+ }
+ swap(a, pm);
+ pa = pb = (char *) a + es;
+
+ pc = pd = (char *) a + (n - 1) * es;
+ for (;;) {
+ while (pb <= pc && (cmp_result = cmp(pb, a)) <= 0) {
+ if (cmp_result == 0) {
+ swap(pa, pb);
+ pa += es;
+ }
+ pb += es;
+ }
+ while (pb <= pc && (cmp_result = cmp(pc, a)) >= 0) {
+ if (cmp_result == 0) {
+ swap(pc, pd);
+ pd -= es;
+ }
+ pc -= es;
+ }
+ if (pb > pc)
+ break;
+ swap(pb, pc);
+ pb += es;
+ pc -= es;
+ }
+
+ pn = (char *) a + n * es;
+ r = min(pa - (char *) a, pb - pa);
+ vecswap(a, pb - r, r);
+ r = min((size_t)(pd - pc), pn - pd - es);
+ vecswap(pb, pn - r, r);
+ if ((r = pb - pa) > es) {
+ void *_l = a, *_r = ((unsigned char*)a)+r-1;
+ if (!((lrange < _l && rrange < _l) ||
+ (lrange > _r && rrange > _r)))
+ _pqsort(a, r / es, es, cmp, lrange, rrange);
+ }
+ if ((r = pd - pc) > es) {
+ void *_l, *_r;
+
+ /* Iterate rather than recurse to save stack space */
+ a = pn - r;
+ n = r / es;
+
+ _l = a;
+ _r = ((unsigned char*)a)+r-1;
+ if (!((lrange < _l && rrange < _l) ||
+ (lrange > _r && rrange > _r)))
+ goto loop;
+ }
+/* qsort(pn - r, r / es, es, cmp);*/
+}
+
+void
+pqsort(void *a, size_t n, size_t es,
+ int (*cmp) (const void *, const void *), size_t lrange, size_t rrange)
+{
+ _pqsort(a,n,es,cmp,((unsigned char*)a)+(lrange*es),
+ ((unsigned char*)a)+((rrange+1)*es)-1);
+}
diff --git a/src/pqsort.h b/src/pqsort.h
new file mode 100644
index 0000000..824ab5c
--- /dev/null
+++ b/src/pqsort.h
@@ -0,0 +1,40 @@
+/* The following is the NetBSD libc qsort implementation modified in order to
+ * support partial sorting of ranges for Redis.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * See the pqsort.c file for the original copyright notice. */
+
+#ifndef __PQSORT_H
+#define __PQSORT_H
+
+void
+pqsort(void *a, size_t n, size_t es,
+ int (*cmp) (const void *, const void *), size_t lrange, size_t rrange);
+
+#endif
diff --git a/src/pubsub.c b/src/pubsub.c
new file mode 100644
index 0000000..a13c5a6
--- /dev/null
+++ b/src/pubsub.c
@@ -0,0 +1,754 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "cluster.h"
+
+/* Structure to hold the pubsub related metadata. Currently used
+ * for pubsub and pubsubshard feature. */
+typedef struct pubsubtype {
+ int shard;
+ dict *(*clientPubSubChannels)(client*);
+ int (*subscriptionCount)(client*);
+ dict **serverPubSubChannels;
+ robj **subscribeMsg;
+ robj **unsubscribeMsg;
+ robj **messageBulk;
+}pubsubtype;
+
+/*
+ * Get client's global Pub/Sub channels subscription count.
+ */
+int clientSubscriptionsCount(client *c);
+
+/*
+ * Get client's shard level Pub/Sub channels subscription count.
+ */
+int clientShardSubscriptionsCount(client *c);
+
+/*
+ * Get client's global Pub/Sub channels dict.
+ */
+dict* getClientPubSubChannels(client *c);
+
+/*
+ * Get client's shard level Pub/Sub channels dict.
+ */
+dict* getClientPubSubShardChannels(client *c);
+
+/*
+ * Get list of channels client is subscribed to.
+ * If a pattern is provided, the subset of channels is returned
+ * matching the pattern.
+ */
+void channelList(client *c, sds pat, dict* pubsub_channels);
+
+/*
+ * Pub/Sub type for global channels.
+ */
+pubsubtype pubSubType = {
+ .shard = 0,
+ .clientPubSubChannels = getClientPubSubChannels,
+ .subscriptionCount = clientSubscriptionsCount,
+ .serverPubSubChannels = &server.pubsub_channels,
+ .subscribeMsg = &shared.subscribebulk,
+ .unsubscribeMsg = &shared.unsubscribebulk,
+ .messageBulk = &shared.messagebulk,
+};
+
+/*
+ * Pub/Sub type for shard level channels bounded to a slot.
+ */
+pubsubtype pubSubShardType = {
+ .shard = 1,
+ .clientPubSubChannels = getClientPubSubShardChannels,
+ .subscriptionCount = clientShardSubscriptionsCount,
+ .serverPubSubChannels = &server.pubsubshard_channels,
+ .subscribeMsg = &shared.ssubscribebulk,
+ .unsubscribeMsg = &shared.sunsubscribebulk,
+ .messageBulk = &shared.smessagebulk,
+};
+
+/*-----------------------------------------------------------------------------
+ * Pubsub client replies API
+ *----------------------------------------------------------------------------*/
+
+/* Send a pubsub message of type "message" to the client.
+ * Normally 'msg' is a Redis object containing the string to send as
+ * message. However if the caller sets 'msg' as NULL, it will be able
+ * to send a special message (for instance an Array type) by using the
+ * addReply*() API family. */
+void addReplyPubsubMessage(client *c, robj *channel, robj *msg, robj *message_bulk) {
+ uint64_t old_flags = c->flags;
+ c->flags |= CLIENT_PUSHING;
+ if (c->resp == 2)
+ addReply(c,shared.mbulkhdr[3]);
+ else
+ addReplyPushLen(c,3);
+ addReply(c,message_bulk);
+ addReplyBulk(c,channel);
+ if (msg) addReplyBulk(c,msg);
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+}
+
+/* Send a pubsub message of type "pmessage" to the client. The difference
+ * with the "message" type delivered by addReplyPubsubMessage() is that
+ * this message format also includes the pattern that matched the message. */
+void addReplyPubsubPatMessage(client *c, robj *pat, robj *channel, robj *msg) {
+ uint64_t old_flags = c->flags;
+ c->flags |= CLIENT_PUSHING;
+ if (c->resp == 2)
+ addReply(c,shared.mbulkhdr[4]);
+ else
+ addReplyPushLen(c,4);
+ addReply(c,shared.pmessagebulk);
+ addReplyBulk(c,pat);
+ addReplyBulk(c,channel);
+ addReplyBulk(c,msg);
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+}
+
+/* Send the pubsub subscription notification to the client. */
+void addReplyPubsubSubscribed(client *c, robj *channel, pubsubtype type) {
+ uint64_t old_flags = c->flags;
+ c->flags |= CLIENT_PUSHING;
+ if (c->resp == 2)
+ addReply(c,shared.mbulkhdr[3]);
+ else
+ addReplyPushLen(c,3);
+ addReply(c,*type.subscribeMsg);
+ addReplyBulk(c,channel);
+ addReplyLongLong(c,type.subscriptionCount(c));
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+}
+
+/* Send the pubsub unsubscription notification to the client.
+ * Channel can be NULL: this is useful when the client sends a mass
+ * unsubscribe command but there are no channels to unsubscribe from: we
+ * still send a notification. */
+void addReplyPubsubUnsubscribed(client *c, robj *channel, pubsubtype type) {
+ uint64_t old_flags = c->flags;
+ c->flags |= CLIENT_PUSHING;
+ if (c->resp == 2)
+ addReply(c,shared.mbulkhdr[3]);
+ else
+ addReplyPushLen(c,3);
+ addReply(c, *type.unsubscribeMsg);
+ if (channel)
+ addReplyBulk(c,channel);
+ else
+ addReplyNull(c);
+ addReplyLongLong(c,type.subscriptionCount(c));
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+}
+
+/* Send the pubsub pattern subscription notification to the client. */
+void addReplyPubsubPatSubscribed(client *c, robj *pattern) {
+ uint64_t old_flags = c->flags;
+ c->flags |= CLIENT_PUSHING;
+ if (c->resp == 2)
+ addReply(c,shared.mbulkhdr[3]);
+ else
+ addReplyPushLen(c,3);
+ addReply(c,shared.psubscribebulk);
+ addReplyBulk(c,pattern);
+ addReplyLongLong(c,clientSubscriptionsCount(c));
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+}
+
+/* Send the pubsub pattern unsubscription notification to the client.
+ * Pattern can be NULL: this is useful when the client sends a mass
+ * punsubscribe command but there are no pattern to unsubscribe from: we
+ * still send a notification. */
+void addReplyPubsubPatUnsubscribed(client *c, robj *pattern) {
+ uint64_t old_flags = c->flags;
+ c->flags |= CLIENT_PUSHING;
+ if (c->resp == 2)
+ addReply(c,shared.mbulkhdr[3]);
+ else
+ addReplyPushLen(c,3);
+ addReply(c,shared.punsubscribebulk);
+ if (pattern)
+ addReplyBulk(c,pattern);
+ else
+ addReplyNull(c);
+ addReplyLongLong(c,clientSubscriptionsCount(c));
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+}
+
+/*-----------------------------------------------------------------------------
+ * Pubsub low level API
+ *----------------------------------------------------------------------------*/
+
+/* Return the number of pubsub channels + patterns is handled. */
+int serverPubsubSubscriptionCount(void) {
+ return dictSize(server.pubsub_channels) + dictSize(server.pubsub_patterns);
+}
+
+/* Return the number of pubsub shard level channels is handled. */
+int serverPubsubShardSubscriptionCount(void) {
+ return dictSize(server.pubsubshard_channels);
+}
+
+
+/* Return the number of channels + patterns a client is subscribed to. */
+int clientSubscriptionsCount(client *c) {
+ return dictSize(c->pubsub_channels) + dictSize(c->pubsub_patterns);
+}
+
+/* Return the number of shard level channels a client is subscribed to. */
+int clientShardSubscriptionsCount(client *c) {
+ return dictSize(c->pubsubshard_channels);
+}
+
+dict* getClientPubSubChannels(client *c) {
+ return c->pubsub_channels;
+}
+
+dict* getClientPubSubShardChannels(client *c) {
+ return c->pubsubshard_channels;
+}
+
+/* Return the number of pubsub + pubsub shard level channels
+ * a client is subscribed to. */
+int clientTotalPubSubSubscriptionCount(client *c) {
+ return clientSubscriptionsCount(c) + clientShardSubscriptionsCount(c);
+}
+
+/* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
+ * 0 if the client was already subscribed to that channel. */
+int pubsubSubscribeChannel(client *c, robj *channel, pubsubtype type) {
+ dictEntry *de;
+ list *clients = NULL;
+ int retval = 0;
+
+ /* Add the channel to the client -> channels hash table */
+ if (dictAdd(type.clientPubSubChannels(c),channel,NULL) == DICT_OK) {
+ retval = 1;
+ incrRefCount(channel);
+ /* Add the client to the channel -> list of clients hash table */
+ de = dictFind(*type.serverPubSubChannels, channel);
+ if (de == NULL) {
+ clients = listCreate();
+ dictAdd(*type.serverPubSubChannels, channel, clients);
+ incrRefCount(channel);
+ } else {
+ clients = dictGetVal(de);
+ }
+ listAddNodeTail(clients,c);
+ }
+ /* Notify the client */
+ addReplyPubsubSubscribed(c,channel,type);
+ return retval;
+}
+
+/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
+ * 0 if the client was not subscribed to the specified channel. */
+int pubsubUnsubscribeChannel(client *c, robj *channel, int notify, pubsubtype type) {
+ dictEntry *de;
+ list *clients;
+ listNode *ln;
+ int retval = 0;
+
+ /* Remove the channel from the client -> channels hash table */
+ incrRefCount(channel); /* channel may be just a pointer to the same object
+ we have in the hash tables. Protect it... */
+ if (dictDelete(type.clientPubSubChannels(c),channel) == DICT_OK) {
+ retval = 1;
+ /* Remove the client from the channel -> clients list hash table */
+ de = dictFind(*type.serverPubSubChannels, channel);
+ serverAssertWithInfo(c,NULL,de != NULL);
+ clients = dictGetVal(de);
+ ln = listSearchKey(clients,c);
+ serverAssertWithInfo(c,NULL,ln != NULL);
+ listDelNode(clients,ln);
+ if (listLength(clients) == 0) {
+ /* Free the list and associated hash entry at all if this was
+ * the latest client, so that it will be possible to abuse
+ * Redis PUBSUB creating millions of channels. */
+ dictDelete(*type.serverPubSubChannels, channel);
+ /* As this channel isn't subscribed by anyone, it's safe
+ * to remove the channel from the slot. */
+ if (server.cluster_enabled & type.shard) {
+ slotToChannelDel(channel->ptr);
+ }
+ }
+ }
+ /* Notify the client */
+ if (notify) {
+ addReplyPubsubUnsubscribed(c,channel,type);
+ }
+ decrRefCount(channel); /* it is finally safe to release it */
+ return retval;
+}
+
+void pubsubShardUnsubscribeAllClients(robj *channel) {
+ int retval;
+ dictEntry *de = dictFind(server.pubsubshard_channels, channel);
+ serverAssertWithInfo(NULL,channel,de != NULL);
+ list *clients = dictGetVal(de);
+ if (listLength(clients) > 0) {
+ /* For each client subscribed to the channel, unsubscribe it. */
+ listIter li;
+ listNode *ln;
+ listRewind(clients, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *c = listNodeValue(ln);
+ retval = dictDelete(c->pubsubshard_channels, channel);
+ serverAssertWithInfo(c,channel,retval == DICT_OK);
+ addReplyPubsubUnsubscribed(c, channel, pubSubShardType);
+ /* If the client has no other pubsub subscription,
+ * move out of pubsub mode. */
+ if (clientTotalPubSubSubscriptionCount(c) == 0) {
+ c->flags &= ~CLIENT_PUBSUB;
+ }
+ }
+ }
+ /* Delete the channel from server pubsubshard channels hash table. */
+ retval = dictDelete(server.pubsubshard_channels, channel);
+ /* Delete the channel from slots_to_channel mapping. */
+ slotToChannelDel(channel->ptr);
+ serverAssertWithInfo(NULL,channel,retval == DICT_OK);
+ decrRefCount(channel); /* it is finally safe to release it */
+}
+
+
+/* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the client was already subscribed to that pattern. */
+int pubsubSubscribePattern(client *c, robj *pattern) {
+ dictEntry *de;
+ list *clients;
+ int retval = 0;
+
+ if (dictAdd(c->pubsub_patterns, pattern, NULL) == DICT_OK) {
+ retval = 1;
+ incrRefCount(pattern);
+ /* Add the client to the pattern -> list of clients hash table */
+ de = dictFind(server.pubsub_patterns,pattern);
+ if (de == NULL) {
+ clients = listCreate();
+ dictAdd(server.pubsub_patterns,pattern,clients);
+ incrRefCount(pattern);
+ } else {
+ clients = dictGetVal(de);
+ }
+ listAddNodeTail(clients,c);
+ }
+ /* Notify the client */
+ addReplyPubsubPatSubscribed(c,pattern);
+ return retval;
+}
+
+/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
+ * 0 if the client was not subscribed to the specified channel. */
+int pubsubUnsubscribePattern(client *c, robj *pattern, int notify) {
+ dictEntry *de;
+ list *clients;
+ listNode *ln;
+ int retval = 0;
+
+ incrRefCount(pattern); /* Protect the object. May be the same we remove */
+ if (dictDelete(c->pubsub_patterns, pattern) == DICT_OK) {
+ retval = 1;
+ /* Remove the client from the pattern -> clients list hash table */
+ de = dictFind(server.pubsub_patterns,pattern);
+ serverAssertWithInfo(c,NULL,de != NULL);
+ clients = dictGetVal(de);
+ ln = listSearchKey(clients,c);
+ serverAssertWithInfo(c,NULL,ln != NULL);
+ listDelNode(clients,ln);
+ if (listLength(clients) == 0) {
+ /* Free the list and associated hash entry at all if this was
+ * the latest client. */
+ dictDelete(server.pubsub_patterns,pattern);
+ }
+ }
+ /* Notify the client */
+ if (notify) addReplyPubsubPatUnsubscribed(c,pattern);
+ decrRefCount(pattern);
+ return retval;
+}
+
+/* Unsubscribe from all the channels. Return the number of channels the
+ * client was subscribed to. */
+int pubsubUnsubscribeAllChannelsInternal(client *c, int notify, pubsubtype type) {
+ int count = 0;
+ if (dictSize(type.clientPubSubChannels(c)) > 0) {
+ dictIterator *di = dictGetSafeIterator(type.clientPubSubChannels(c));
+ dictEntry *de;
+
+ while((de = dictNext(di)) != NULL) {
+ robj *channel = dictGetKey(de);
+
+ count += pubsubUnsubscribeChannel(c,channel,notify,type);
+ }
+ dictReleaseIterator(di);
+ }
+ /* We were subscribed to nothing? Still reply to the client. */
+ if (notify && count == 0) {
+ addReplyPubsubUnsubscribed(c,NULL,type);
+ }
+ return count;
+}
+
+/*
+ * Unsubscribe a client from all global channels.
+ */
+int pubsubUnsubscribeAllChannels(client *c, int notify) {
+ int count = pubsubUnsubscribeAllChannelsInternal(c,notify,pubSubType);
+ return count;
+}
+
+/*
+ * Unsubscribe a client from all shard subscribed channels.
+ */
+int pubsubUnsubscribeShardAllChannels(client *c, int notify) {
+ int count = pubsubUnsubscribeAllChannelsInternal(c, notify, pubSubShardType);
+ return count;
+}
+
+/*
+ * Unsubscribe a client from provided shard subscribed channel(s).
+ */
+void pubsubUnsubscribeShardChannels(robj **channels, unsigned int count) {
+ for (unsigned int j = 0; j < count; j++) {
+ /* Remove the channel from server and from the clients
+ * subscribed to it as well as notify them. */
+ pubsubShardUnsubscribeAllClients(channels[j]);
+ }
+}
+
+/* Unsubscribe from all the patterns. Return the number of patterns the
+ * client was subscribed from. */
+int pubsubUnsubscribeAllPatterns(client *c, int notify) {
+ int count = 0;
+
+ if (dictSize(c->pubsub_patterns) > 0) {
+ dictIterator *di = dictGetSafeIterator(c->pubsub_patterns);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL) {
+ robj *pattern = dictGetKey(de);
+ count += pubsubUnsubscribePattern(c, pattern, notify);
+ }
+ dictReleaseIterator(di);
+ }
+
+ /* We were subscribed to nothing? Still reply to the client. */
+ if (notify && count == 0) addReplyPubsubPatUnsubscribed(c,NULL);
+ return count;
+}
+
+/*
+ * Publish a message to all the subscribers.
+ */
+int pubsubPublishMessageInternal(robj *channel, robj *message, pubsubtype type) {
+ int receivers = 0;
+ dictEntry *de;
+ dictIterator *di;
+ listNode *ln;
+ listIter li;
+
+ /* Send to clients listening for that channel */
+ de = dictFind(*type.serverPubSubChannels, channel);
+ if (de) {
+ list *list = dictGetVal(de);
+ listNode *ln;
+ listIter li;
+
+ listRewind(list,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *c = ln->value;
+ addReplyPubsubMessage(c,channel,message,*type.messageBulk);
+ updateClientMemUsageAndBucket(c);
+ receivers++;
+ }
+ }
+
+ if (type.shard) {
+ /* Shard pubsub ignores patterns. */
+ return receivers;
+ }
+
+ /* Send to clients listening to matching channels */
+ di = dictGetIterator(server.pubsub_patterns);
+ if (di) {
+ channel = getDecodedObject(channel);
+ while((de = dictNext(di)) != NULL) {
+ robj *pattern = dictGetKey(de);
+ list *clients = dictGetVal(de);
+ if (!stringmatchlen((char*)pattern->ptr,
+ sdslen(pattern->ptr),
+ (char*)channel->ptr,
+ sdslen(channel->ptr),0)) continue;
+
+ listRewind(clients,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *c = listNodeValue(ln);
+ addReplyPubsubPatMessage(c,pattern,channel,message);
+ updateClientMemUsageAndBucket(c);
+ receivers++;
+ }
+ }
+ decrRefCount(channel);
+ dictReleaseIterator(di);
+ }
+ return receivers;
+}
+
+/* Publish a message to all the subscribers. */
+int pubsubPublishMessage(robj *channel, robj *message, int sharded) {
+ return pubsubPublishMessageInternal(channel, message, sharded? pubSubShardType : pubSubType);
+}
+
+/*-----------------------------------------------------------------------------
+ * Pubsub commands implementation
+ *----------------------------------------------------------------------------*/
+
+/* SUBSCRIBE channel [channel ...] */
+void subscribeCommand(client *c) {
+ int j;
+ if ((c->flags & CLIENT_DENY_BLOCKING) && !(c->flags & CLIENT_MULTI)) {
+ /**
+ * A client that has CLIENT_DENY_BLOCKING flag on
+ * expect a reply per command and so can not execute subscribe.
+ *
+ * Notice that we have a special treatment for multi because of
+ * backward compatibility
+ */
+ addReplyError(c, "SUBSCRIBE isn't allowed for a DENY BLOCKING client");
+ return;
+ }
+ for (j = 1; j < c->argc; j++)
+ pubsubSubscribeChannel(c,c->argv[j],pubSubType);
+ c->flags |= CLIENT_PUBSUB;
+}
+
+/* UNSUBSCRIBE [channel ...] */
+void unsubscribeCommand(client *c) {
+ if (c->argc == 1) {
+ pubsubUnsubscribeAllChannels(c,1);
+ } else {
+ int j;
+
+ for (j = 1; j < c->argc; j++)
+ pubsubUnsubscribeChannel(c,c->argv[j],1,pubSubType);
+ }
+ if (clientTotalPubSubSubscriptionCount(c) == 0) c->flags &= ~CLIENT_PUBSUB;
+}
+
+/* PSUBSCRIBE pattern [pattern ...] */
+void psubscribeCommand(client *c) {
+ int j;
+ if ((c->flags & CLIENT_DENY_BLOCKING) && !(c->flags & CLIENT_MULTI)) {
+ /**
+ * A client that has CLIENT_DENY_BLOCKING flag on
+ * expect a reply per command and so can not execute subscribe.
+ *
+ * Notice that we have a special treatment for multi because of
+ * backward compatibility
+ */
+ addReplyError(c, "PSUBSCRIBE isn't allowed for a DENY BLOCKING client");
+ return;
+ }
+
+ for (j = 1; j < c->argc; j++)
+ pubsubSubscribePattern(c,c->argv[j]);
+ c->flags |= CLIENT_PUBSUB;
+}
+
+/* PUNSUBSCRIBE [pattern [pattern ...]] */
+void punsubscribeCommand(client *c) {
+ if (c->argc == 1) {
+ pubsubUnsubscribeAllPatterns(c,1);
+ } else {
+ int j;
+
+ for (j = 1; j < c->argc; j++)
+ pubsubUnsubscribePattern(c,c->argv[j],1);
+ }
+ if (clientTotalPubSubSubscriptionCount(c) == 0) c->flags &= ~CLIENT_PUBSUB;
+}
+
+/* This function wraps pubsubPublishMessage and also propagates the message to cluster.
+ * Used by the commands PUBLISH/SPUBLISH and their respective module APIs.*/
+int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded) {
+ int receivers = pubsubPublishMessage(channel, message, sharded);
+ if (server.cluster_enabled)
+ clusterPropagatePublish(channel, message, sharded);
+ return receivers;
+}
+
+/* PUBLISH <channel> <message> */
+void publishCommand(client *c) {
+ if (server.sentinel_mode) {
+ sentinelPublishCommand(c);
+ return;
+ }
+
+ int receivers = pubsubPublishMessageAndPropagateToCluster(c->argv[1],c->argv[2],0);
+ if (!server.cluster_enabled)
+ forceCommandPropagation(c,PROPAGATE_REPL);
+ addReplyLongLong(c,receivers);
+}
+
+/* PUBSUB command for Pub/Sub introspection. */
+void pubsubCommand(client *c) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"CHANNELS [<pattern>]",
+" Return the currently active channels matching a <pattern> (default: '*').",
+"NUMPAT",
+" Return number of subscriptions to patterns.",
+"NUMSUB [<channel> ...]",
+" Return the number of subscribers for the specified channels, excluding",
+" pattern subscriptions(default: no channels).",
+"SHARDCHANNELS [<pattern>]",
+" Return the currently active shard level channels matching a <pattern> (default: '*').",
+"SHARDNUMSUB [<shardchannel> ...]",
+" Return the number of subscribers for the specified shard level channel(s)",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"channels") &&
+ (c->argc == 2 || c->argc == 3))
+ {
+ /* PUBSUB CHANNELS [<pattern>] */
+ sds pat = (c->argc == 2) ? NULL : c->argv[2]->ptr;
+ channelList(c, pat, server.pubsub_channels);
+ } else if (!strcasecmp(c->argv[1]->ptr,"numsub") && c->argc >= 2) {
+ /* PUBSUB NUMSUB [Channel_1 ... Channel_N] */
+ int j;
+
+ addReplyArrayLen(c,(c->argc-2)*2);
+ for (j = 2; j < c->argc; j++) {
+ list *l = dictFetchValue(server.pubsub_channels,c->argv[j]);
+
+ addReplyBulk(c,c->argv[j]);
+ addReplyLongLong(c,l ? listLength(l) : 0);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"numpat") && c->argc == 2) {
+ /* PUBSUB NUMPAT */
+ addReplyLongLong(c,dictSize(server.pubsub_patterns));
+ } else if (!strcasecmp(c->argv[1]->ptr,"shardchannels") &&
+ (c->argc == 2 || c->argc == 3))
+ {
+ /* PUBSUB SHARDCHANNELS */
+ sds pat = (c->argc == 2) ? NULL : c->argv[2]->ptr;
+ channelList(c,pat,server.pubsubshard_channels);
+ } else if (!strcasecmp(c->argv[1]->ptr,"shardnumsub") && c->argc >= 2) {
+ /* PUBSUB SHARDNUMSUB [ShardChannel_1 ... ShardChannel_N] */
+ int j;
+
+ addReplyArrayLen(c, (c->argc-2)*2);
+ for (j = 2; j < c->argc; j++) {
+ list *l = dictFetchValue(server.pubsubshard_channels, c->argv[j]);
+
+ addReplyBulk(c,c->argv[j]);
+ addReplyLongLong(c,l ? listLength(l) : 0);
+ }
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
+
+void channelList(client *c, sds pat, dict *pubsub_channels) {
+ dictIterator *di = dictGetIterator(pubsub_channels);
+ dictEntry *de;
+ long mblen = 0;
+ void *replylen;
+
+ replylen = addReplyDeferredLen(c);
+ while((de = dictNext(di)) != NULL) {
+ robj *cobj = dictGetKey(de);
+ sds channel = cobj->ptr;
+
+ if (!pat || stringmatchlen(pat, sdslen(pat),
+ channel, sdslen(channel),0))
+ {
+ addReplyBulk(c,cobj);
+ mblen++;
+ }
+ }
+ dictReleaseIterator(di);
+ setDeferredArrayLen(c,replylen,mblen);
+}
+
+/* SPUBLISH <shardchannel> <message> */
+void spublishCommand(client *c) {
+ int receivers = pubsubPublishMessageAndPropagateToCluster(c->argv[1],c->argv[2],1);
+ if (!server.cluster_enabled)
+ forceCommandPropagation(c,PROPAGATE_REPL);
+ addReplyLongLong(c,receivers);
+}
+
+/* SSUBSCRIBE shardchannel [shardchannel ...] */
+void ssubscribeCommand(client *c) {
+ if (c->flags & CLIENT_DENY_BLOCKING) {
+ /* A client that has CLIENT_DENY_BLOCKING flag on
+ * expect a reply per command and so can not execute subscribe. */
+ addReplyError(c, "SSUBSCRIBE isn't allowed for a DENY BLOCKING client");
+ return;
+ }
+
+ for (int j = 1; j < c->argc; j++) {
+ /* A channel is only considered to be added, if a
+ * subscriber exists for it. And if a subscriber
+ * already exists the slotToChannel doesn't needs
+ * to be incremented. */
+ if (server.cluster_enabled &
+ (dictFind(*pubSubShardType.serverPubSubChannels, c->argv[j]) == NULL)) {
+ slotToChannelAdd(c->argv[j]->ptr);
+ }
+ pubsubSubscribeChannel(c, c->argv[j], pubSubShardType);
+ }
+ c->flags |= CLIENT_PUBSUB;
+}
+
+
+/* SUNSUBSCRIBE [shardchannel [shardchannel ...]] */
+void sunsubscribeCommand(client *c) {
+ if (c->argc == 1) {
+ pubsubUnsubscribeShardAllChannels(c, 1);
+ } else {
+ for (int j = 1; j < c->argc; j++) {
+ pubsubUnsubscribeChannel(c, c->argv[j], 1, pubSubShardType);
+ }
+ }
+ if (clientTotalPubSubSubscriptionCount(c) == 0) c->flags &= ~CLIENT_PUBSUB;
+}
+
+size_t pubsubMemOverhead(client *c) {
+ /* PubSub patterns */
+ size_t mem = dictMemUsage(c->pubsub_patterns);
+ /* Global PubSub channels */
+ mem += dictMemUsage(c->pubsub_channels);
+ /* Sharded PubSub channels */
+ mem += dictMemUsage(c->pubsubshard_channels);
+ return mem;
+}
diff --git a/src/quicklist.c b/src/quicklist.c
new file mode 100644
index 0000000..301a216
--- /dev/null
+++ b/src/quicklist.c
@@ -0,0 +1,3257 @@
+/* quicklist.c - A doubly linked list of listpacks
+ *
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must start the above copyright notice,
+ * this quicklist of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this quicklist of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h> /* for memcpy */
+#include <limits.h>
+#include "quicklist.h"
+#include "zmalloc.h"
+#include "config.h"
+#include "listpack.h"
+#include "util.h" /* for ll2string */
+#include "lzf.h"
+#include "redisassert.h"
+
+#ifndef REDIS_STATIC
+#define REDIS_STATIC static
+#endif
+
+/* Optimization levels for size-based filling.
+ * Note that the largest possible limit is 64k, so even if each record takes
+ * just one byte, it still won't overflow the 16 bit count field. */
+static const size_t optimization_level[] = {4096, 8192, 16384, 32768, 65536};
+
+/* packed_threshold is initialized to 1gb*/
+static size_t packed_threshold = (1 << 30);
+
+/* set threshold for PLAIN nodes, the real limit is 4gb */
+#define isLargeElement(size) ((size) >= packed_threshold)
+
+int quicklistisSetPackedThreshold(size_t sz) {
+ /* Don't allow threshold to be set above or even slightly below 4GB */
+ if (sz > (1ull<<32) - (1<<20)) {
+ return 0;
+ } else if (sz == 0) { /* 0 means restore threshold */
+ sz = (1 << 30);
+ }
+ packed_threshold = sz;
+ return 1;
+}
+
+/* Maximum size in bytes of any multi-element listpack.
+ * Larger values will live in their own isolated listpacks.
+ * This is used only if we're limited by record count. when we're limited by
+ * size, the maximum limit is bigger, but still safe.
+ * 8k is a recommended / default size limit */
+#define SIZE_SAFETY_LIMIT 8192
+
+/* Maximum estimate of the listpack entry overhead.
+ * Although in the worst case(sz < 64), we will waste 6 bytes in one
+ * quicklistNode, but can avoid memory waste due to internal fragmentation
+ * when the listpack exceeds the size limit by a few bytes (e.g. being 16388). */
+#define SIZE_ESTIMATE_OVERHEAD 8
+
+/* Minimum listpack size in bytes for attempting compression. */
+#define MIN_COMPRESS_BYTES 48
+
+/* Minimum size reduction in bytes to store compressed quicklistNode data.
+ * This also prevents us from storing compression if the compression
+ * resulted in a larger size than the original data. */
+#define MIN_COMPRESS_IMPROVE 8
+
+/* If not verbose testing, remove all debug printing. */
+#ifndef REDIS_TEST_VERBOSE
+#define D(...)
+#else
+#define D(...) \
+ do { \
+ printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
+ printf(__VA_ARGS__); \
+ printf("\n"); \
+ } while (0)
+#endif
+
+/* Bookmarks forward declarations */
+#define QL_MAX_BM ((1 << QL_BM_BITS)-1)
+quicklistBookmark *_quicklistBookmarkFindByName(quicklist *ql, const char *name);
+quicklistBookmark *_quicklistBookmarkFindByNode(quicklist *ql, quicklistNode *node);
+void _quicklistBookmarkDelete(quicklist *ql, quicklistBookmark *bm);
+
+/* Simple way to give quicklistEntry structs default values with one call. */
+#define initEntry(e) \
+ do { \
+ (e)->zi = (e)->value = NULL; \
+ (e)->longval = -123456789; \
+ (e)->quicklist = NULL; \
+ (e)->node = NULL; \
+ (e)->offset = 123456789; \
+ (e)->sz = 0; \
+ } while (0)
+
+/* Reset the quicklistIter to prevent it from being used again after
+ * insert, replace, or other against quicklist operation. */
+#define resetIterator(iter) \
+ do { \
+ (iter)->current = NULL; \
+ (iter)->zi = NULL; \
+ } while (0)
+
+/* Create a new quicklist.
+ * Free with quicklistRelease(). */
+quicklist *quicklistCreate(void) {
+ struct quicklist *quicklist;
+
+ quicklist = zmalloc(sizeof(*quicklist));
+ quicklist->head = quicklist->tail = NULL;
+ quicklist->len = 0;
+ quicklist->count = 0;
+ quicklist->compress = 0;
+ quicklist->fill = -2;
+ quicklist->bookmark_count = 0;
+ return quicklist;
+}
+
+#define COMPRESS_MAX ((1 << QL_COMP_BITS)-1)
+void quicklistSetCompressDepth(quicklist *quicklist, int compress) {
+ if (compress > COMPRESS_MAX) {
+ compress = COMPRESS_MAX;
+ } else if (compress < 0) {
+ compress = 0;
+ }
+ quicklist->compress = compress;
+}
+
+#define FILL_MAX ((1 << (QL_FILL_BITS-1))-1)
+void quicklistSetFill(quicklist *quicklist, int fill) {
+ if (fill > FILL_MAX) {
+ fill = FILL_MAX;
+ } else if (fill < -5) {
+ fill = -5;
+ }
+ quicklist->fill = fill;
+}
+
+void quicklistSetOptions(quicklist *quicklist, int fill, int depth) {
+ quicklistSetFill(quicklist, fill);
+ quicklistSetCompressDepth(quicklist, depth);
+}
+
+/* Create a new quicklist with some default parameters. */
+quicklist *quicklistNew(int fill, int compress) {
+ quicklist *quicklist = quicklistCreate();
+ quicklistSetOptions(quicklist, fill, compress);
+ return quicklist;
+}
+
+REDIS_STATIC quicklistNode *quicklistCreateNode(void) {
+ quicklistNode *node;
+ node = zmalloc(sizeof(*node));
+ node->entry = NULL;
+ node->count = 0;
+ node->sz = 0;
+ node->next = node->prev = NULL;
+ node->encoding = QUICKLIST_NODE_ENCODING_RAW;
+ node->container = QUICKLIST_NODE_CONTAINER_PACKED;
+ node->recompress = 0;
+ node->dont_compress = 0;
+ return node;
+}
+
+/* Return cached quicklist count */
+unsigned long quicklistCount(const quicklist *ql) { return ql->count; }
+
+/* Free entire quicklist. */
+void quicklistRelease(quicklist *quicklist) {
+ unsigned long len;
+ quicklistNode *current, *next;
+
+ current = quicklist->head;
+ len = quicklist->len;
+ while (len--) {
+ next = current->next;
+
+ zfree(current->entry);
+ quicklist->count -= current->count;
+
+ zfree(current);
+
+ quicklist->len--;
+ current = next;
+ }
+ quicklistBookmarksClear(quicklist);
+ zfree(quicklist);
+}
+
+/* Compress the listpack in 'node' and update encoding details.
+ * Returns 1 if listpack compressed successfully.
+ * Returns 0 if compression failed or if listpack too small to compress. */
+REDIS_STATIC int __quicklistCompressNode(quicklistNode *node) {
+#ifdef REDIS_TEST
+ node->attempted_compress = 1;
+#endif
+ if (node->dont_compress) return 0;
+
+ /* validate that the node is neither
+ * tail nor head (it has prev and next)*/
+ assert(node->prev && node->next);
+
+ node->recompress = 0;
+ /* Don't bother compressing small values */
+ if (node->sz < MIN_COMPRESS_BYTES)
+ return 0;
+
+ quicklistLZF *lzf = zmalloc(sizeof(*lzf) + node->sz);
+
+ /* Cancel if compression fails or doesn't compress small enough */
+ if (((lzf->sz = lzf_compress(node->entry, node->sz, lzf->compressed,
+ node->sz)) == 0) ||
+ lzf->sz + MIN_COMPRESS_IMPROVE >= node->sz) {
+ /* lzf_compress aborts/rejects compression if value not compressible. */
+ zfree(lzf);
+ return 0;
+ }
+ lzf = zrealloc(lzf, sizeof(*lzf) + lzf->sz);
+ zfree(node->entry);
+ node->entry = (unsigned char *)lzf;
+ node->encoding = QUICKLIST_NODE_ENCODING_LZF;
+ return 1;
+}
+
+/* Compress only uncompressed nodes. */
+#define quicklistCompressNode(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_RAW) { \
+ __quicklistCompressNode((_node)); \
+ } \
+ } while (0)
+
+/* Uncompress the listpack in 'node' and update encoding details.
+ * Returns 1 on successful decode, 0 on failure to decode. */
+REDIS_STATIC int __quicklistDecompressNode(quicklistNode *node) {
+#ifdef REDIS_TEST
+ node->attempted_compress = 0;
+#endif
+ node->recompress = 0;
+
+ void *decompressed = zmalloc(node->sz);
+ quicklistLZF *lzf = (quicklistLZF *)node->entry;
+ if (lzf_decompress(lzf->compressed, lzf->sz, decompressed, node->sz) == 0) {
+ /* Someone requested decompress, but we can't decompress. Not good. */
+ zfree(decompressed);
+ return 0;
+ }
+ zfree(lzf);
+ node->entry = decompressed;
+ node->encoding = QUICKLIST_NODE_ENCODING_RAW;
+ return 1;
+}
+
+/* Decompress only compressed nodes. */
+#define quicklistDecompressNode(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_LZF) { \
+ __quicklistDecompressNode((_node)); \
+ } \
+ } while (0)
+
+/* Force node to not be immediately re-compressible */
+#define quicklistDecompressNodeForUse(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_LZF) { \
+ __quicklistDecompressNode((_node)); \
+ (_node)->recompress = 1; \
+ } \
+ } while (0)
+
+/* Extract the raw LZF data from this quicklistNode.
+ * Pointer to LZF data is assigned to '*data'.
+ * Return value is the length of compressed LZF data. */
+size_t quicklistGetLzf(const quicklistNode *node, void **data) {
+ quicklistLZF *lzf = (quicklistLZF *)node->entry;
+ *data = lzf->compressed;
+ return lzf->sz;
+}
+
+#define quicklistAllowsCompression(_ql) ((_ql)->compress != 0)
+
+/* Force 'quicklist' to meet compression guidelines set by compress depth.
+ * The only way to guarantee interior nodes get compressed is to iterate
+ * to our "interior" compress depth then compress the next node we find.
+ * If compress depth is larger than the entire list, we return immediately. */
+REDIS_STATIC void __quicklistCompress(const quicklist *quicklist,
+ quicklistNode *node) {
+ if (quicklist->len == 0) return;
+
+ /* The head and tail should never be compressed (we should not attempt to recompress them) */
+ assert(quicklist->head->recompress == 0 && quicklist->tail->recompress == 0);
+
+ /* If length is less than our compress depth (from both sides),
+ * we can't compress anything. */
+ if (!quicklistAllowsCompression(quicklist) ||
+ quicklist->len < (unsigned int)(quicklist->compress * 2))
+ return;
+
+#if 0
+ /* Optimized cases for small depth counts */
+ if (quicklist->compress == 1) {
+ quicklistNode *h = quicklist->head, *t = quicklist->tail;
+ quicklistDecompressNode(h);
+ quicklistDecompressNode(t);
+ if (h != node && t != node)
+ quicklistCompressNode(node);
+ return;
+ } else if (quicklist->compress == 2) {
+ quicklistNode *h = quicklist->head, *hn = h->next, *hnn = hn->next;
+ quicklistNode *t = quicklist->tail, *tp = t->prev, *tpp = tp->prev;
+ quicklistDecompressNode(h);
+ quicklistDecompressNode(hn);
+ quicklistDecompressNode(t);
+ quicklistDecompressNode(tp);
+ if (h != node && hn != node && t != node && tp != node) {
+ quicklistCompressNode(node);
+ }
+ if (hnn != t) {
+ quicklistCompressNode(hnn);
+ }
+ if (tpp != h) {
+ quicklistCompressNode(tpp);
+ }
+ return;
+ }
+#endif
+
+ /* Iterate until we reach compress depth for both sides of the list.a
+ * Note: because we do length checks at the *top* of this function,
+ * we can skip explicit null checks below. Everything exists. */
+ quicklistNode *forward = quicklist->head;
+ quicklistNode *reverse = quicklist->tail;
+ int depth = 0;
+ int in_depth = 0;
+ while (depth++ < quicklist->compress) {
+ quicklistDecompressNode(forward);
+ quicklistDecompressNode(reverse);
+
+ if (forward == node || reverse == node)
+ in_depth = 1;
+
+ /* We passed into compress depth of opposite side of the quicklist
+ * so there's no need to compress anything and we can exit. */
+ if (forward == reverse || forward->next == reverse)
+ return;
+
+ forward = forward->next;
+ reverse = reverse->prev;
+ }
+
+ if (!in_depth)
+ quicklistCompressNode(node);
+
+ /* At this point, forward and reverse are one node beyond depth */
+ quicklistCompressNode(forward);
+ quicklistCompressNode(reverse);
+}
+
+#define quicklistCompress(_ql, _node) \
+ do { \
+ if ((_node)->recompress) \
+ quicklistCompressNode((_node)); \
+ else \
+ __quicklistCompress((_ql), (_node)); \
+ } while (0)
+
+/* If we previously used quicklistDecompressNodeForUse(), just recompress. */
+#define quicklistRecompressOnly(_node) \
+ do { \
+ if ((_node)->recompress) \
+ quicklistCompressNode((_node)); \
+ } while (0)
+
+/* Insert 'new_node' after 'old_node' if 'after' is 1.
+ * Insert 'new_node' before 'old_node' if 'after' is 0.
+ * Note: 'new_node' is *always* uncompressed, so if we assign it to
+ * head or tail, we do not need to uncompress it. */
+REDIS_STATIC void __quicklistInsertNode(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node, int after) {
+ if (after) {
+ new_node->prev = old_node;
+ if (old_node) {
+ new_node->next = old_node->next;
+ if (old_node->next)
+ old_node->next->prev = new_node;
+ old_node->next = new_node;
+ }
+ if (quicklist->tail == old_node)
+ quicklist->tail = new_node;
+ } else {
+ new_node->next = old_node;
+ if (old_node) {
+ new_node->prev = old_node->prev;
+ if (old_node->prev)
+ old_node->prev->next = new_node;
+ old_node->prev = new_node;
+ }
+ if (quicklist->head == old_node)
+ quicklist->head = new_node;
+ }
+ /* If this insert creates the only element so far, initialize head/tail. */
+ if (quicklist->len == 0) {
+ quicklist->head = quicklist->tail = new_node;
+ }
+
+ /* Update len first, so in __quicklistCompress we know exactly len */
+ quicklist->len++;
+
+ if (old_node)
+ quicklistCompress(quicklist, old_node);
+
+ quicklistCompress(quicklist, new_node);
+}
+
+/* Wrappers for node inserting around existing node. */
+REDIS_STATIC void _quicklistInsertNodeBefore(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node) {
+ __quicklistInsertNode(quicklist, old_node, new_node, 0);
+}
+
+REDIS_STATIC void _quicklistInsertNodeAfter(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node) {
+ __quicklistInsertNode(quicklist, old_node, new_node, 1);
+}
+
+#define sizeMeetsSafetyLimit(sz) ((sz) <= SIZE_SAFETY_LIMIT)
+
+/* Calculate the size limit or length limit of the quicklist node
+ * based on 'fill', and is also used to limit list listpack. */
+void quicklistNodeLimit(int fill, size_t *size, unsigned int *count) {
+ *size = SIZE_MAX;
+ *count = UINT_MAX;
+
+ if (fill >= 0) {
+ /* Ensure that one node have at least one entry */
+ *count = (fill == 0) ? 1 : fill;
+ } else {
+ size_t offset = (-fill) - 1;
+ size_t max_level = sizeof(optimization_level) / sizeof(*optimization_level);
+ if (offset >= max_level) offset = max_level - 1;
+ *size = optimization_level[offset];
+ }
+}
+
+/* Check if the limit of the quicklist node has been reached to determine if
+ * insertions, merges or other operations that would increase the size of
+ * the node can be performed.
+ * Return 1 if exceeds the limit, otherwise 0. */
+int quicklistNodeExceedsLimit(int fill, size_t new_sz, unsigned int new_count) {
+ size_t sz_limit;
+ unsigned int count_limit;
+ quicklistNodeLimit(fill, &sz_limit, &count_limit);
+
+ if (likely(sz_limit != SIZE_MAX)) {
+ return new_sz > sz_limit;
+ } else if (count_limit != UINT_MAX) {
+ /* when we reach here we know that the limit is a size limit (which is
+ * safe, see comments next to optimization_level and SIZE_SAFETY_LIMIT) */
+ if (!sizeMeetsSafetyLimit(new_sz)) return 1;
+ return new_count > count_limit;
+ }
+
+ redis_unreachable();
+}
+
+REDIS_STATIC int _quicklistNodeAllowInsert(const quicklistNode *node,
+ const int fill, const size_t sz) {
+ if (unlikely(!node))
+ return 0;
+
+ if (unlikely(QL_NODE_IS_PLAIN(node) || isLargeElement(sz)))
+ return 0;
+
+ /* Estimate how many bytes will be added to the listpack by this one entry.
+ * We prefer an overestimation, which would at worse lead to a few bytes
+ * below the lowest limit of 4k (see optimization_level).
+ * Note: No need to check for overflow below since both `node->sz` and
+ * `sz` are to be less than 1GB after the plain/large element check above. */
+ size_t new_sz = node->sz + sz + SIZE_ESTIMATE_OVERHEAD;
+ if (unlikely(quicklistNodeExceedsLimit(fill, new_sz, node->count + 1)))
+ return 0;
+ return 1;
+}
+
+REDIS_STATIC int _quicklistNodeAllowMerge(const quicklistNode *a,
+ const quicklistNode *b,
+ const int fill) {
+ if (!a || !b)
+ return 0;
+
+ if (unlikely(QL_NODE_IS_PLAIN(a) || QL_NODE_IS_PLAIN(b)))
+ return 0;
+
+ /* approximate merged listpack size (- 7 to remove one listpack
+ * header/trailer, see LP_HDR_SIZE and LP_EOF) */
+ unsigned int merge_sz = a->sz + b->sz - 7;
+ if (unlikely(quicklistNodeExceedsLimit(fill, merge_sz, a->count + b->count)))
+ return 0;
+ return 1;
+}
+
+#define quicklistNodeUpdateSz(node) \
+ do { \
+ (node)->sz = lpBytes((node)->entry); \
+ } while (0)
+
+static quicklistNode* __quicklistCreatePlainNode(void *value, size_t sz) {
+ quicklistNode *new_node = quicklistCreateNode();
+ new_node->entry = zmalloc(sz);
+ new_node->container = QUICKLIST_NODE_CONTAINER_PLAIN;
+ memcpy(new_node->entry, value, sz);
+ new_node->sz = sz;
+ new_node->count++;
+ return new_node;
+}
+
+static void __quicklistInsertPlainNode(quicklist *quicklist, quicklistNode *old_node,
+ void *value, size_t sz, int after) {
+ __quicklistInsertNode(quicklist, old_node, __quicklistCreatePlainNode(value, sz), after);
+ quicklist->count++;
+}
+
+/* Add new entry to head node of quicklist.
+ *
+ * Returns 0 if used existing head.
+ * Returns 1 if new head created. */
+int quicklistPushHead(quicklist *quicklist, void *value, size_t sz) {
+ quicklistNode *orig_head = quicklist->head;
+
+ if (unlikely(isLargeElement(sz))) {
+ __quicklistInsertPlainNode(quicklist, quicklist->head, value, sz, 0);
+ return 1;
+ }
+
+ if (likely(
+ _quicklistNodeAllowInsert(quicklist->head, quicklist->fill, sz))) {
+ quicklist->head->entry = lpPrepend(quicklist->head->entry, value, sz);
+ quicklistNodeUpdateSz(quicklist->head);
+ } else {
+ quicklistNode *node = quicklistCreateNode();
+ node->entry = lpPrepend(lpNew(0), value, sz);
+
+ quicklistNodeUpdateSz(node);
+ _quicklistInsertNodeBefore(quicklist, quicklist->head, node);
+ }
+ quicklist->count++;
+ quicklist->head->count++;
+ return (orig_head != quicklist->head);
+}
+
+/* Add new entry to tail node of quicklist.
+ *
+ * Returns 0 if used existing tail.
+ * Returns 1 if new tail created. */
+int quicklistPushTail(quicklist *quicklist, void *value, size_t sz) {
+ quicklistNode *orig_tail = quicklist->tail;
+ if (unlikely(isLargeElement(sz))) {
+ __quicklistInsertPlainNode(quicklist, quicklist->tail, value, sz, 1);
+ return 1;
+ }
+
+ if (likely(
+ _quicklistNodeAllowInsert(quicklist->tail, quicklist->fill, sz))) {
+ quicklist->tail->entry = lpAppend(quicklist->tail->entry, value, sz);
+ quicklistNodeUpdateSz(quicklist->tail);
+ } else {
+ quicklistNode *node = quicklistCreateNode();
+ node->entry = lpAppend(lpNew(0), value, sz);
+
+ quicklistNodeUpdateSz(node);
+ _quicklistInsertNodeAfter(quicklist, quicklist->tail, node);
+ }
+ quicklist->count++;
+ quicklist->tail->count++;
+ return (orig_tail != quicklist->tail);
+}
+
+/* Create new node consisting of a pre-formed listpack.
+ * Used for loading RDBs where entire listpacks have been stored
+ * to be retrieved later. */
+void quicklistAppendListpack(quicklist *quicklist, unsigned char *zl) {
+ quicklistNode *node = quicklistCreateNode();
+
+ node->entry = zl;
+ node->count = lpLength(node->entry);
+ node->sz = lpBytes(zl);
+
+ _quicklistInsertNodeAfter(quicklist, quicklist->tail, node);
+ quicklist->count += node->count;
+}
+
+/* Create new node consisting of a pre-formed plain node.
+ * Used for loading RDBs where entire plain node has been stored
+ * to be retrieved later.
+ * data - the data to add (pointer becomes the responsibility of quicklist) */
+void quicklistAppendPlainNode(quicklist *quicklist, unsigned char *data, size_t sz) {
+ quicklistNode *node = quicklistCreateNode();
+
+ node->entry = data;
+ node->count = 1;
+ node->sz = sz;
+ node->container = QUICKLIST_NODE_CONTAINER_PLAIN;
+
+ _quicklistInsertNodeAfter(quicklist, quicklist->tail, node);
+ quicklist->count += node->count;
+}
+
+#define quicklistDeleteIfEmpty(ql, n) \
+ do { \
+ if ((n)->count == 0) { \
+ __quicklistDelNode((ql), (n)); \
+ (n) = NULL; \
+ } \
+ } while (0)
+
+REDIS_STATIC void __quicklistDelNode(quicklist *quicklist,
+ quicklistNode *node) {
+ /* Update the bookmark if any */
+ quicklistBookmark *bm = _quicklistBookmarkFindByNode(quicklist, node);
+ if (bm) {
+ bm->node = node->next;
+ /* if the bookmark was to the last node, delete it. */
+ if (!bm->node)
+ _quicklistBookmarkDelete(quicklist, bm);
+ }
+
+ if (node->next)
+ node->next->prev = node->prev;
+ if (node->prev)
+ node->prev->next = node->next;
+
+ if (node == quicklist->tail) {
+ quicklist->tail = node->prev;
+ }
+
+ if (node == quicklist->head) {
+ quicklist->head = node->next;
+ }
+
+ /* Update len first, so in __quicklistCompress we know exactly len */
+ quicklist->len--;
+ quicklist->count -= node->count;
+
+ /* If we deleted a node within our compress depth, we
+ * now have compressed nodes needing to be decompressed. */
+ __quicklistCompress(quicklist, NULL);
+
+ zfree(node->entry);
+ zfree(node);
+}
+
+/* Delete one entry from list given the node for the entry and a pointer
+ * to the entry in the node.
+ *
+ * Note: quicklistDelIndex() *requires* uncompressed nodes because you
+ * already had to get *p from an uncompressed node somewhere.
+ *
+ * Returns 1 if the entire node was deleted, 0 if node still exists.
+ * Also updates in/out param 'p' with the next offset in the listpack. */
+REDIS_STATIC int quicklistDelIndex(quicklist *quicklist, quicklistNode *node,
+ unsigned char **p) {
+ int gone = 0;
+
+ if (unlikely(QL_NODE_IS_PLAIN(node))) {
+ __quicklistDelNode(quicklist, node);
+ return 1;
+ }
+ node->entry = lpDelete(node->entry, *p, p);
+ node->count--;
+ if (node->count == 0) {
+ gone = 1;
+ __quicklistDelNode(quicklist, node);
+ } else {
+ quicklistNodeUpdateSz(node);
+ }
+ quicklist->count--;
+ /* If we deleted the node, the original node is no longer valid */
+ return gone ? 1 : 0;
+}
+
+/* Delete one element represented by 'entry'
+ *
+ * 'entry' stores enough metadata to delete the proper position in
+ * the correct listpack in the correct quicklist node. */
+void quicklistDelEntry(quicklistIter *iter, quicklistEntry *entry) {
+ quicklistNode *prev = entry->node->prev;
+ quicklistNode *next = entry->node->next;
+ int deleted_node = quicklistDelIndex((quicklist *)entry->quicklist,
+ entry->node, &entry->zi);
+
+ /* after delete, the zi is now invalid for any future usage. */
+ iter->zi = NULL;
+
+ /* If current node is deleted, we must update iterator node and offset. */
+ if (deleted_node) {
+ if (iter->direction == AL_START_HEAD) {
+ iter->current = next;
+ iter->offset = 0;
+ } else if (iter->direction == AL_START_TAIL) {
+ iter->current = prev;
+ iter->offset = -1;
+ }
+ }
+ /* else if (!deleted_node), no changes needed.
+ * we already reset iter->zi above, and the existing iter->offset
+ * doesn't move again because:
+ * - [1, 2, 3] => delete offset 1 => [1, 3]: next element still offset 1
+ * - [1, 2, 3] => delete offset 0 => [2, 3]: next element still offset 0
+ * if we deleted the last element at offset N and now
+ * length of this listpack is N-1, the next call into
+ * quicklistNext() will jump to the next node. */
+}
+
+/* Replace quicklist entry by 'data' with length 'sz'. */
+void quicklistReplaceEntry(quicklistIter *iter, quicklistEntry *entry,
+ void *data, size_t sz)
+{
+ quicklist* quicklist = iter->quicklist;
+
+ if (likely(!QL_NODE_IS_PLAIN(entry->node) && !isLargeElement(sz))) {
+ entry->node->entry = lpReplace(entry->node->entry, &entry->zi, data, sz);
+ quicklistNodeUpdateSz(entry->node);
+ /* quicklistNext() and quicklistGetIteratorEntryAtIdx() provide an uncompressed node */
+ quicklistCompress(quicklist, entry->node);
+ } else if (QL_NODE_IS_PLAIN(entry->node)) {
+ if (isLargeElement(sz)) {
+ zfree(entry->node->entry);
+ entry->node->entry = zmalloc(sz);
+ entry->node->sz = sz;
+ memcpy(entry->node->entry, data, sz);
+ quicklistCompress(quicklist, entry->node);
+ } else {
+ quicklistInsertAfter(iter, entry, data, sz);
+ __quicklistDelNode(quicklist, entry->node);
+ }
+ } else {
+ entry->node->dont_compress = 1; /* Prevent compression in quicklistInsertAfter() */
+ quicklistInsertAfter(iter, entry, data, sz);
+ if (entry->node->count == 1) {
+ __quicklistDelNode(quicklist, entry->node);
+ } else {
+ unsigned char *p = lpSeek(entry->node->entry, -1);
+ quicklistDelIndex(quicklist, entry->node, &p);
+ entry->node->dont_compress = 0; /* Re-enable compression */
+ quicklistCompress(quicklist, entry->node);
+ quicklistCompress(quicklist, entry->node->next);
+ }
+ }
+
+ /* In any case, we reset iterator to forbid use of iterator after insert.
+ * Notice: iter->current has been compressed above. */
+ resetIterator(iter);
+}
+
+/* Replace quicklist entry at offset 'index' by 'data' with length 'sz'.
+ *
+ * Returns 1 if replace happened.
+ * Returns 0 if replace failed and no changes happened. */
+int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data,
+ size_t sz) {
+ quicklistEntry entry;
+ quicklistIter *iter = quicklistGetIteratorEntryAtIdx(quicklist, index, &entry);
+ if (likely(iter)) {
+ quicklistReplaceEntry(iter, &entry, data, sz);
+ quicklistReleaseIterator(iter);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Given two nodes, try to merge their listpacks.
+ *
+ * This helps us not have a quicklist with 3 element listpacks if
+ * our fill factor can handle much higher levels.
+ *
+ * Note: 'a' must be to the LEFT of 'b'.
+ *
+ * After calling this function, both 'a' and 'b' should be considered
+ * unusable. The return value from this function must be used
+ * instead of re-using any of the quicklistNode input arguments.
+ *
+ * Returns the input node picked to merge against or NULL if
+ * merging was not possible. */
+REDIS_STATIC quicklistNode *_quicklistListpackMerge(quicklist *quicklist,
+ quicklistNode *a,
+ quicklistNode *b) {
+ D("Requested merge (a,b) (%u, %u)", a->count, b->count);
+
+ quicklistDecompressNode(a);
+ quicklistDecompressNode(b);
+ if ((lpMerge(&a->entry, &b->entry))) {
+ /* We merged listpacks! Now remove the unused quicklistNode. */
+ quicklistNode *keep = NULL, *nokeep = NULL;
+ if (!a->entry) {
+ nokeep = a;
+ keep = b;
+ } else if (!b->entry) {
+ nokeep = b;
+ keep = a;
+ }
+ keep->count = lpLength(keep->entry);
+ quicklistNodeUpdateSz(keep);
+
+ nokeep->count = 0;
+ __quicklistDelNode(quicklist, nokeep);
+ quicklistCompress(quicklist, keep);
+ return keep;
+ } else {
+ /* else, the merge returned NULL and nothing changed. */
+ return NULL;
+ }
+}
+
+/* Attempt to merge listpacks within two nodes on either side of 'center'.
+ *
+ * We attempt to merge:
+ * - (center->prev->prev, center->prev)
+ * - (center->next, center->next->next)
+ * - (center->prev, center)
+ * - (center, center->next)
+ */
+REDIS_STATIC void _quicklistMergeNodes(quicklist *quicklist,
+ quicklistNode *center) {
+ int fill = quicklist->fill;
+ quicklistNode *prev, *prev_prev, *next, *next_next, *target;
+ prev = prev_prev = next = next_next = target = NULL;
+
+ if (center->prev) {
+ prev = center->prev;
+ if (center->prev->prev)
+ prev_prev = center->prev->prev;
+ }
+
+ if (center->next) {
+ next = center->next;
+ if (center->next->next)
+ next_next = center->next->next;
+ }
+
+ /* Try to merge prev_prev and prev */
+ if (_quicklistNodeAllowMerge(prev, prev_prev, fill)) {
+ _quicklistListpackMerge(quicklist, prev_prev, prev);
+ prev_prev = prev = NULL; /* they could have moved, invalidate them. */
+ }
+
+ /* Try to merge next and next_next */
+ if (_quicklistNodeAllowMerge(next, next_next, fill)) {
+ _quicklistListpackMerge(quicklist, next, next_next);
+ next = next_next = NULL; /* they could have moved, invalidate them. */
+ }
+
+ /* Try to merge center node and previous node */
+ if (_quicklistNodeAllowMerge(center, center->prev, fill)) {
+ target = _quicklistListpackMerge(quicklist, center->prev, center);
+ center = NULL; /* center could have been deleted, invalidate it. */
+ } else {
+ /* else, we didn't merge here, but target needs to be valid below. */
+ target = center;
+ }
+
+ /* Use result of center merge (or original) to merge with next node. */
+ if (_quicklistNodeAllowMerge(target, target->next, fill)) {
+ _quicklistListpackMerge(quicklist, target, target->next);
+ }
+}
+
+/* Split 'node' into two parts, parameterized by 'offset' and 'after'.
+ *
+ * The 'after' argument controls which quicklistNode gets returned.
+ * If 'after'==1, returned node has elements after 'offset'.
+ * input node keeps elements up to 'offset', including 'offset'.
+ * If 'after'==0, returned node has elements up to 'offset'.
+ * input node keeps elements after 'offset', including 'offset'.
+ *
+ * Or in other words:
+ * If 'after'==1, returned node will have elements after 'offset'.
+ * The returned node will have elements [OFFSET+1, END].
+ * The input node keeps elements [0, OFFSET].
+ * If 'after'==0, returned node will keep elements up to but not including 'offset'.
+ * The returned node will have elements [0, OFFSET-1].
+ * The input node keeps elements [OFFSET, END].
+ *
+ * The input node keeps all elements not taken by the returned node.
+ *
+ * Returns newly created node or NULL if split not possible. */
+REDIS_STATIC quicklistNode *_quicklistSplitNode(quicklistNode *node, int offset,
+ int after) {
+ size_t zl_sz = node->sz;
+
+ quicklistNode *new_node = quicklistCreateNode();
+ new_node->entry = zmalloc(zl_sz);
+
+ /* Copy original listpack so we can split it */
+ memcpy(new_node->entry, node->entry, zl_sz);
+
+ /* Need positive offset for calculating extent below. */
+ if (offset < 0) offset = node->count + offset;
+
+ /* Ranges to be trimmed: -1 here means "continue deleting until the list ends" */
+ int orig_start = after ? offset + 1 : 0;
+ int orig_extent = after ? -1 : offset;
+ int new_start = after ? 0 : offset;
+ int new_extent = after ? offset + 1 : -1;
+
+ D("After %d (%d); ranges: [%d, %d], [%d, %d]", after, offset, orig_start,
+ orig_extent, new_start, new_extent);
+
+ node->entry = lpDeleteRange(node->entry, orig_start, orig_extent);
+ node->count = lpLength(node->entry);
+ quicklistNodeUpdateSz(node);
+
+ new_node->entry = lpDeleteRange(new_node->entry, new_start, new_extent);
+ new_node->count = lpLength(new_node->entry);
+ quicklistNodeUpdateSz(new_node);
+
+ D("After split lengths: orig (%d), new (%d)", node->count, new_node->count);
+ return new_node;
+}
+
+/* Insert a new entry before or after existing entry 'entry'.
+ *
+ * If after==1, the new value is inserted after 'entry', otherwise
+ * the new value is inserted before 'entry'. */
+REDIS_STATIC void _quicklistInsert(quicklistIter *iter, quicklistEntry *entry,
+ void *value, const size_t sz, int after)
+{
+ quicklist *quicklist = iter->quicklist;
+ int full = 0, at_tail = 0, at_head = 0, avail_next = 0, avail_prev = 0;
+ int fill = quicklist->fill;
+ quicklistNode *node = entry->node;
+ quicklistNode *new_node = NULL;
+
+ if (!node) {
+ /* we have no reference node, so let's create only node in the list */
+ D("No node given!");
+ if (unlikely(isLargeElement(sz))) {
+ __quicklistInsertPlainNode(quicklist, quicklist->tail, value, sz, after);
+ return;
+ }
+ new_node = quicklistCreateNode();
+ new_node->entry = lpPrepend(lpNew(0), value, sz);
+ __quicklistInsertNode(quicklist, NULL, new_node, after);
+ new_node->count++;
+ quicklist->count++;
+ return;
+ }
+
+ /* Populate accounting flags for easier boolean checks later */
+ if (!_quicklistNodeAllowInsert(node, fill, sz)) {
+ D("Current node is full with count %d with requested fill %d",
+ node->count, fill);
+ full = 1;
+ }
+
+ if (after && (entry->offset == node->count - 1 || entry->offset == -1)) {
+ D("At Tail of current listpack");
+ at_tail = 1;
+ if (_quicklistNodeAllowInsert(node->next, fill, sz)) {
+ D("Next node is available.");
+ avail_next = 1;
+ }
+ }
+
+ if (!after && (entry->offset == 0 || entry->offset == -(node->count))) {
+ D("At Head");
+ at_head = 1;
+ if (_quicklistNodeAllowInsert(node->prev, fill, sz)) {
+ D("Prev node is available.");
+ avail_prev = 1;
+ }
+ }
+
+ if (unlikely(isLargeElement(sz))) {
+ if (QL_NODE_IS_PLAIN(node) || (at_tail && after) || (at_head && !after)) {
+ __quicklistInsertPlainNode(quicklist, node, value, sz, after);
+ } else {
+ quicklistDecompressNodeForUse(node);
+ new_node = _quicklistSplitNode(node, entry->offset, after);
+ quicklistNode *entry_node = __quicklistCreatePlainNode(value, sz);
+ __quicklistInsertNode(quicklist, node, entry_node, after);
+ __quicklistInsertNode(quicklist, entry_node, new_node, after);
+ quicklist->count++;
+ }
+ return;
+ }
+
+ /* Now determine where and how to insert the new element */
+ if (!full && after) {
+ D("Not full, inserting after current position.");
+ quicklistDecompressNodeForUse(node);
+ node->entry = lpInsertString(node->entry, value, sz, entry->zi, LP_AFTER, NULL);
+ node->count++;
+ quicklistNodeUpdateSz(node);
+ quicklistRecompressOnly(node);
+ } else if (!full && !after) {
+ D("Not full, inserting before current position.");
+ quicklistDecompressNodeForUse(node);
+ node->entry = lpInsertString(node->entry, value, sz, entry->zi, LP_BEFORE, NULL);
+ node->count++;
+ quicklistNodeUpdateSz(node);
+ quicklistRecompressOnly(node);
+ } else if (full && at_tail && avail_next && after) {
+ /* If we are: at tail, next has free space, and inserting after:
+ * - insert entry at head of next node. */
+ D("Full and tail, but next isn't full; inserting next node head");
+ new_node = node->next;
+ quicklistDecompressNodeForUse(new_node);
+ new_node->entry = lpPrepend(new_node->entry, value, sz);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ quicklistRecompressOnly(new_node);
+ quicklistRecompressOnly(node);
+ } else if (full && at_head && avail_prev && !after) {
+ /* If we are: at head, previous has free space, and inserting before:
+ * - insert entry at tail of previous node. */
+ D("Full and head, but prev isn't full, inserting prev node tail");
+ new_node = node->prev;
+ quicklistDecompressNodeForUse(new_node);
+ new_node->entry = lpAppend(new_node->entry, value, sz);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ quicklistRecompressOnly(new_node);
+ quicklistRecompressOnly(node);
+ } else if (full && ((at_tail && !avail_next && after) ||
+ (at_head && !avail_prev && !after))) {
+ /* If we are: full, and our prev/next has no available space, then:
+ * - create new node and attach to quicklist */
+ D("\tprovisioning new node...");
+ new_node = quicklistCreateNode();
+ new_node->entry = lpPrepend(lpNew(0), value, sz);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ __quicklistInsertNode(quicklist, node, new_node, after);
+ } else if (full) {
+ /* else, node is full we need to split it. */
+ /* covers both after and !after cases */
+ D("\tsplitting node...");
+ quicklistDecompressNodeForUse(node);
+ new_node = _quicklistSplitNode(node, entry->offset, after);
+ if (after)
+ new_node->entry = lpPrepend(new_node->entry, value, sz);
+ else
+ new_node->entry = lpAppend(new_node->entry, value, sz);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ __quicklistInsertNode(quicklist, node, new_node, after);
+ _quicklistMergeNodes(quicklist, node);
+ }
+
+ quicklist->count++;
+
+ /* In any case, we reset iterator to forbid use of iterator after insert.
+ * Notice: iter->current has been compressed in _quicklistInsert(). */
+ resetIterator(iter);
+}
+
+void quicklistInsertBefore(quicklistIter *iter, quicklistEntry *entry,
+ void *value, const size_t sz)
+{
+ _quicklistInsert(iter, entry, value, sz, 0);
+}
+
+void quicklistInsertAfter(quicklistIter *iter, quicklistEntry *entry,
+ void *value, const size_t sz)
+{
+ _quicklistInsert(iter, entry, value, sz, 1);
+}
+
+/* Delete a range of elements from the quicklist.
+ *
+ * elements may span across multiple quicklistNodes, so we
+ * have to be careful about tracking where we start and end.
+ *
+ * Returns 1 if entries were deleted, 0 if nothing was deleted. */
+int quicklistDelRange(quicklist *quicklist, const long start,
+ const long count) {
+ if (count <= 0)
+ return 0;
+
+ unsigned long extent = count; /* range is inclusive of start position */
+
+ if (start >= 0 && extent > (quicklist->count - start)) {
+ /* if requesting delete more elements than exist, limit to list size. */
+ extent = quicklist->count - start;
+ } else if (start < 0 && extent > (unsigned long)(-start)) {
+ /* else, if at negative offset, limit max size to rest of list. */
+ extent = -start; /* c.f. LREM -29 29; just delete until end. */
+ }
+
+ quicklistIter *iter = quicklistGetIteratorAtIdx(quicklist, AL_START_TAIL, start);
+ if (!iter)
+ return 0;
+
+ D("Quicklist delete request for start %ld, count %ld, extent: %ld", start,
+ count, extent);
+ quicklistNode *node = iter->current;
+ long offset = iter->offset;
+ quicklistReleaseIterator(iter);
+
+ /* iterate over next nodes until everything is deleted. */
+ while (extent) {
+ quicklistNode *next = node->next;
+
+ unsigned long del;
+ int delete_entire_node = 0;
+ if (offset == 0 && extent >= node->count) {
+ /* If we are deleting more than the count of this node, we
+ * can just delete the entire node without listpack math. */
+ delete_entire_node = 1;
+ del = node->count;
+ } else if (offset >= 0 && extent + offset >= node->count) {
+ /* If deleting more nodes after this one, calculate delete based
+ * on size of current node. */
+ del = node->count - offset;
+ } else if (offset < 0) {
+ /* If offset is negative, we are in the first run of this loop
+ * and we are deleting the entire range
+ * from this start offset to end of list. Since the Negative
+ * offset is the number of elements until the tail of the list,
+ * just use it directly as the deletion count. */
+ del = -offset;
+
+ /* If the positive offset is greater than the remaining extent,
+ * we only delete the remaining extent, not the entire offset.
+ */
+ if (del > extent)
+ del = extent;
+ } else {
+ /* else, we are deleting less than the extent of this node, so
+ * use extent directly. */
+ del = extent;
+ }
+
+ D("[%ld]: asking to del: %ld because offset: %d; (ENTIRE NODE: %d), "
+ "node count: %u",
+ extent, del, offset, delete_entire_node, node->count);
+
+ if (delete_entire_node || QL_NODE_IS_PLAIN(node)) {
+ __quicklistDelNode(quicklist, node);
+ } else {
+ quicklistDecompressNodeForUse(node);
+ node->entry = lpDeleteRange(node->entry, offset, del);
+ quicklistNodeUpdateSz(node);
+ node->count -= del;
+ quicklist->count -= del;
+ quicklistDeleteIfEmpty(quicklist, node);
+ if (node)
+ quicklistRecompressOnly(node);
+ }
+
+ extent -= del;
+
+ node = next;
+
+ offset = 0;
+ }
+ return 1;
+}
+
+/* compare between a two entries */
+int quicklistCompare(quicklistEntry* entry, unsigned char *p2, const size_t p2_len) {
+ if (unlikely(QL_NODE_IS_PLAIN(entry->node))) {
+ return ((entry->sz == p2_len) && (memcmp(entry->value, p2, p2_len) == 0));
+ }
+ return lpCompare(entry->zi, p2, p2_len);
+}
+
+/* Returns a quicklist iterator 'iter'. After the initialization every
+ * call to quicklistNext() will return the next element of the quicklist. */
+quicklistIter *quicklistGetIterator(quicklist *quicklist, int direction) {
+ quicklistIter *iter;
+
+ iter = zmalloc(sizeof(*iter));
+
+ if (direction == AL_START_HEAD) {
+ iter->current = quicklist->head;
+ iter->offset = 0;
+ } else if (direction == AL_START_TAIL) {
+ iter->current = quicklist->tail;
+ iter->offset = -1;
+ }
+
+ iter->direction = direction;
+ iter->quicklist = quicklist;
+
+ iter->zi = NULL;
+
+ return iter;
+}
+
+/* Initialize an iterator at a specific offset 'idx' and make the iterator
+ * return nodes in 'direction' direction. */
+quicklistIter *quicklistGetIteratorAtIdx(quicklist *quicklist,
+ const int direction,
+ const long long idx)
+{
+ quicklistNode *n;
+ unsigned long long accum = 0;
+ unsigned long long index;
+ int forward = idx < 0 ? 0 : 1; /* < 0 -> reverse, 0+ -> forward */
+
+ index = forward ? idx : (-idx) - 1;
+ if (index >= quicklist->count)
+ return NULL;
+
+ /* Seek in the other direction if that way is shorter. */
+ int seek_forward = forward;
+ unsigned long long seek_index = index;
+ if (index > (quicklist->count - 1) / 2) {
+ seek_forward = !forward;
+ seek_index = quicklist->count - 1 - index;
+ }
+
+ n = seek_forward ? quicklist->head : quicklist->tail;
+ while (likely(n)) {
+ if ((accum + n->count) > seek_index) {
+ break;
+ } else {
+ D("Skipping over (%p) %u at accum %lld", (void *)n, n->count,
+ accum);
+ accum += n->count;
+ n = seek_forward ? n->next : n->prev;
+ }
+ }
+
+ if (!n)
+ return NULL;
+
+ /* Fix accum so it looks like we seeked in the other direction. */
+ if (seek_forward != forward) accum = quicklist->count - n->count - accum;
+
+ D("Found node: %p at accum %llu, idx %llu, sub+ %llu, sub- %llu", (void *)n,
+ accum, index, index - accum, (-index) - 1 + accum);
+
+ quicklistIter *iter = quicklistGetIterator(quicklist, direction);
+ iter->current = n;
+ if (forward) {
+ /* forward = normal head-to-tail offset. */
+ iter->offset = index - accum;
+ } else {
+ /* reverse = need negative offset for tail-to-head, so undo
+ * the result of the original index = (-idx) - 1 above. */
+ iter->offset = (-index) - 1 + accum;
+ }
+
+ return iter;
+}
+
+/* Release iterator.
+ * If we still have a valid current node, then re-encode current node. */
+void quicklistReleaseIterator(quicklistIter *iter) {
+ if (!iter) return;
+ if (iter->current)
+ quicklistCompress(iter->quicklist, iter->current);
+
+ zfree(iter);
+}
+
+/* Get next element in iterator.
+ *
+ * Note: You must NOT insert into the list while iterating over it.
+ * You *may* delete from the list while iterating using the
+ * quicklistDelEntry() function.
+ * If you insert into the quicklist while iterating, you should
+ * re-create the iterator after your addition.
+ *
+ * iter = quicklistGetIterator(quicklist,<direction>);
+ * quicklistEntry entry;
+ * while (quicklistNext(iter, &entry)) {
+ * if (entry.value)
+ * [[ use entry.value with entry.sz ]]
+ * else
+ * [[ use entry.longval ]]
+ * }
+ *
+ * Populates 'entry' with values for this iteration.
+ * Returns 0 when iteration is complete or if iteration not possible.
+ * If return value is 0, the contents of 'entry' are not valid.
+ */
+int quicklistNext(quicklistIter *iter, quicklistEntry *entry) {
+ initEntry(entry);
+
+ if (!iter) {
+ D("Returning because no iter!");
+ return 0;
+ }
+
+ entry->quicklist = iter->quicklist;
+ entry->node = iter->current;
+
+ if (!iter->current) {
+ D("Returning because current node is NULL");
+ return 0;
+ }
+
+ unsigned char *(*nextFn)(unsigned char *, unsigned char *) = NULL;
+ int offset_update = 0;
+
+ int plain = QL_NODE_IS_PLAIN(iter->current);
+ if (!iter->zi) {
+ /* If !zi, use current index. */
+ quicklistDecompressNodeForUse(iter->current);
+ if (unlikely(plain))
+ iter->zi = iter->current->entry;
+ else
+ iter->zi = lpSeek(iter->current->entry, iter->offset);
+ } else if (unlikely(plain)) {
+ iter->zi = NULL;
+ } else {
+ /* else, use existing iterator offset and get prev/next as necessary. */
+ if (iter->direction == AL_START_HEAD) {
+ nextFn = lpNext;
+ offset_update = 1;
+ } else if (iter->direction == AL_START_TAIL) {
+ nextFn = lpPrev;
+ offset_update = -1;
+ }
+ iter->zi = nextFn(iter->current->entry, iter->zi);
+ iter->offset += offset_update;
+ }
+
+ entry->zi = iter->zi;
+ entry->offset = iter->offset;
+
+ if (iter->zi) {
+ if (unlikely(plain)) {
+ entry->value = entry->node->entry;
+ entry->sz = entry->node->sz;
+ return 1;
+ }
+ /* Populate value from existing listpack position */
+ unsigned int sz = 0;
+ entry->value = lpGetValue(entry->zi, &sz, &entry->longval);
+ entry->sz = sz;
+ return 1;
+ } else {
+ /* We ran out of listpack entries.
+ * Pick next node, update offset, then re-run retrieval. */
+ quicklistCompress(iter->quicklist, iter->current);
+ if (iter->direction == AL_START_HEAD) {
+ /* Forward traversal */
+ D("Jumping to start of next node");
+ iter->current = iter->current->next;
+ iter->offset = 0;
+ } else if (iter->direction == AL_START_TAIL) {
+ /* Reverse traversal */
+ D("Jumping to end of previous node");
+ iter->current = iter->current->prev;
+ iter->offset = -1;
+ }
+ iter->zi = NULL;
+ return quicklistNext(iter, entry);
+ }
+}
+
+/* Sets the direction of a quicklist iterator. */
+void quicklistSetDirection(quicklistIter *iter, int direction) {
+ iter->direction = direction;
+}
+
+/* Duplicate the quicklist.
+ * On success a copy of the original quicklist is returned.
+ *
+ * The original quicklist both on success or error is never modified.
+ *
+ * Returns newly allocated quicklist. */
+quicklist *quicklistDup(quicklist *orig) {
+ quicklist *copy;
+
+ copy = quicklistNew(orig->fill, orig->compress);
+
+ for (quicklistNode *current = orig->head; current;
+ current = current->next) {
+ quicklistNode *node = quicklistCreateNode();
+
+ if (current->encoding == QUICKLIST_NODE_ENCODING_LZF) {
+ quicklistLZF *lzf = (quicklistLZF *)current->entry;
+ size_t lzf_sz = sizeof(*lzf) + lzf->sz;
+ node->entry = zmalloc(lzf_sz);
+ memcpy(node->entry, current->entry, lzf_sz);
+ } else if (current->encoding == QUICKLIST_NODE_ENCODING_RAW) {
+ node->entry = zmalloc(current->sz);
+ memcpy(node->entry, current->entry, current->sz);
+ }
+
+ node->count = current->count;
+ copy->count += node->count;
+ node->sz = current->sz;
+ node->encoding = current->encoding;
+ node->container = current->container;
+
+ _quicklistInsertNodeAfter(copy, copy->tail, node);
+ }
+
+ /* copy->count must equal orig->count here */
+ return copy;
+}
+
+/* Populate 'entry' with the element at the specified zero-based index
+ * where 0 is the head, 1 is the element next to head
+ * and so on. Negative integers are used in order to count
+ * from the tail, -1 is the last element, -2 the penultimate
+ * and so on. If the index is out of range 0 is returned.
+ *
+ * Returns an iterator at a specific offset 'idx' if element found
+ * Returns NULL if element not found */
+quicklistIter *quicklistGetIteratorEntryAtIdx(quicklist *quicklist, const long long idx,
+ quicklistEntry *entry)
+{
+ quicklistIter *iter = quicklistGetIteratorAtIdx(quicklist, AL_START_TAIL, idx);
+ if (!iter) return NULL;
+ assert(quicklistNext(iter, entry));
+ return iter;
+}
+
+static void quicklistRotatePlain(quicklist *quicklist) {
+ quicklistNode *new_head = quicklist->tail;
+ quicklistNode *new_tail = quicklist->tail->prev;
+ quicklist->head->prev = new_head;
+ new_tail->next = NULL;
+ new_head->next = quicklist->head;
+ new_head->prev = NULL;
+ quicklist->head = new_head;
+ quicklist->tail = new_tail;
+}
+
+/* Rotate quicklist by moving the tail element to the head. */
+void quicklistRotate(quicklist *quicklist) {
+ if (quicklist->count <= 1)
+ return;
+
+ if (unlikely(QL_NODE_IS_PLAIN(quicklist->tail))) {
+ quicklistRotatePlain(quicklist);
+ return;
+ }
+
+ /* First, get the tail entry */
+ unsigned char *p = lpSeek(quicklist->tail->entry, -1);
+ unsigned char *value, *tmp;
+ long long longval;
+ unsigned int sz;
+ char longstr[32] = {0};
+ tmp = lpGetValue(p, &sz, &longval);
+
+ /* If value found is NULL, then lpGet populated longval instead */
+ if (!tmp) {
+ /* Write the longval as a string so we can re-add it */
+ sz = ll2string(longstr, sizeof(longstr), longval);
+ value = (unsigned char *)longstr;
+ } else if (quicklist->len == 1) {
+ /* Copy buffer since there could be a memory overlap when move
+ * entity from tail to head in the same listpack. */
+ value = zmalloc(sz);
+ memcpy(value, tmp, sz);
+ } else {
+ value = tmp;
+ }
+
+ /* Add tail entry to head (must happen before tail is deleted). */
+ quicklistPushHead(quicklist, value, sz);
+
+ /* If quicklist has only one node, the head listpack is also the
+ * tail listpack and PushHead() could have reallocated our single listpack,
+ * which would make our pre-existing 'p' unusable. */
+ if (quicklist->len == 1) {
+ p = lpSeek(quicklist->tail->entry, -1);
+ }
+
+ /* Remove tail entry. */
+ quicklistDelIndex(quicklist, quicklist->tail, &p);
+ if (value != (unsigned char*)longstr && value != tmp)
+ zfree(value);
+}
+
+/* pop from quicklist and return result in 'data' ptr. Value of 'data'
+ * is the return value of 'saver' function pointer if the data is NOT a number.
+ *
+ * If the quicklist element is a long long, then the return value is returned in
+ * 'sval'.
+ *
+ * Return value of 0 means no elements available.
+ * Return value of 1 means check 'data' and 'sval' for values.
+ * If 'data' is set, use 'data' and 'sz'. Otherwise, use 'sval'. */
+int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data,
+ size_t *sz, long long *sval,
+ void *(*saver)(unsigned char *data, size_t sz)) {
+ unsigned char *p;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ int pos = (where == QUICKLIST_HEAD) ? 0 : -1;
+
+ if (quicklist->count == 0)
+ return 0;
+
+ if (data)
+ *data = NULL;
+ if (sz)
+ *sz = 0;
+ if (sval)
+ *sval = -123456789;
+
+ quicklistNode *node;
+ if (where == QUICKLIST_HEAD && quicklist->head) {
+ node = quicklist->head;
+ } else if (where == QUICKLIST_TAIL && quicklist->tail) {
+ node = quicklist->tail;
+ } else {
+ return 0;
+ }
+
+ /* The head and tail should never be compressed */
+ assert(node->encoding != QUICKLIST_NODE_ENCODING_LZF);
+
+ if (unlikely(QL_NODE_IS_PLAIN(node))) {
+ if (data)
+ *data = saver(node->entry, node->sz);
+ if (sz)
+ *sz = node->sz;
+ quicklistDelIndex(quicklist, node, NULL);
+ return 1;
+ }
+
+ p = lpSeek(node->entry, pos);
+ vstr = lpGetValue(p, &vlen, &vlong);
+ if (vstr) {
+ if (data)
+ *data = saver(vstr, vlen);
+ if (sz)
+ *sz = vlen;
+ } else {
+ if (data)
+ *data = NULL;
+ if (sval)
+ *sval = vlong;
+ }
+ quicklistDelIndex(quicklist, node, &p);
+ return 1;
+}
+
+/* Return a malloc'd copy of data passed in */
+REDIS_STATIC void *_quicklistSaver(unsigned char *data, size_t sz) {
+ unsigned char *vstr;
+ if (data) {
+ vstr = zmalloc(sz);
+ memcpy(vstr, data, sz);
+ return vstr;
+ }
+ return NULL;
+}
+
+/* Default pop function
+ *
+ * Returns malloc'd value from quicklist */
+int quicklistPop(quicklist *quicklist, int where, unsigned char **data,
+ size_t *sz, long long *slong) {
+ unsigned char *vstr = NULL;
+ size_t vlen = 0;
+ long long vlong = 0;
+ if (quicklist->count == 0)
+ return 0;
+ int ret = quicklistPopCustom(quicklist, where, &vstr, &vlen, &vlong,
+ _quicklistSaver);
+ if (data)
+ *data = vstr;
+ if (slong)
+ *slong = vlong;
+ if (sz)
+ *sz = vlen;
+ return ret;
+}
+
+/* Wrapper to allow argument-based switching between HEAD/TAIL pop */
+void quicklistPush(quicklist *quicklist, void *value, const size_t sz,
+ int where) {
+ /* The head and tail should never be compressed (we don't attempt to decompress them) */
+ if (quicklist->head)
+ assert(quicklist->head->encoding != QUICKLIST_NODE_ENCODING_LZF);
+ if (quicklist->tail)
+ assert(quicklist->tail->encoding != QUICKLIST_NODE_ENCODING_LZF);
+
+ if (where == QUICKLIST_HEAD) {
+ quicklistPushHead(quicklist, value, sz);
+ } else if (where == QUICKLIST_TAIL) {
+ quicklistPushTail(quicklist, value, sz);
+ }
+}
+
+/* Print info of quicklist which is used in debugCommand. */
+void quicklistRepr(unsigned char *ql, int full) {
+ int i = 0;
+ quicklist *quicklist = (struct quicklist*) ql;
+ printf("{count : %ld}\n", quicklist->count);
+ printf("{len : %ld}\n", quicklist->len);
+ printf("{fill : %d}\n", quicklist->fill);
+ printf("{compress : %d}\n", quicklist->compress);
+ printf("{bookmark_count : %d}\n", quicklist->bookmark_count);
+ quicklistNode* node = quicklist->head;
+
+ while(node != NULL) {
+ printf("{quicklist node(%d)\n", i++);
+ printf("{container : %s, encoding: %s, size: %zu, count: %d, recompress: %d, attempted_compress: %d}\n",
+ QL_NODE_IS_PLAIN(node) ? "PLAIN": "PACKED",
+ (node->encoding == QUICKLIST_NODE_ENCODING_RAW) ? "RAW": "LZF",
+ node->sz,
+ node->count,
+ node->recompress,
+ node->attempted_compress);
+
+ if (full) {
+ quicklistDecompressNode(node);
+ if (node->container == QUICKLIST_NODE_CONTAINER_PACKED) {
+ printf("{ listpack:\n");
+ lpRepr(node->entry);
+ printf("}\n");
+
+ } else if (QL_NODE_IS_PLAIN(node)) {
+ printf("{ entry : %s }\n", node->entry);
+ }
+ printf("}\n");
+ quicklistRecompressOnly(node);
+ }
+ node = node->next;
+ }
+}
+
+/* Create or update a bookmark in the list which will be updated to the next node
+ * automatically when the one referenced gets deleted.
+ * Returns 1 on success (creation of new bookmark or override of an existing one).
+ * Returns 0 on failure (reached the maximum supported number of bookmarks).
+ * NOTE: use short simple names, so that string compare on find is quick.
+ * NOTE: bookmark creation may re-allocate the quicklist, so the input pointer
+ may change and it's the caller responsibility to update the reference.
+ */
+int quicklistBookmarkCreate(quicklist **ql_ref, const char *name, quicklistNode *node) {
+ quicklist *ql = *ql_ref;
+ if (ql->bookmark_count >= QL_MAX_BM)
+ return 0;
+ quicklistBookmark *bm = _quicklistBookmarkFindByName(ql, name);
+ if (bm) {
+ bm->node = node;
+ return 1;
+ }
+ ql = zrealloc(ql, sizeof(quicklist) + (ql->bookmark_count+1) * sizeof(quicklistBookmark));
+ *ql_ref = ql;
+ ql->bookmarks[ql->bookmark_count].node = node;
+ ql->bookmarks[ql->bookmark_count].name = zstrdup(name);
+ ql->bookmark_count++;
+ return 1;
+}
+
+/* Find the quicklist node referenced by a named bookmark.
+ * When the bookmarked node is deleted the bookmark is updated to the next node,
+ * and if that's the last node, the bookmark is deleted (so find returns NULL). */
+quicklistNode *quicklistBookmarkFind(quicklist *ql, const char *name) {
+ quicklistBookmark *bm = _quicklistBookmarkFindByName(ql, name);
+ if (!bm) return NULL;
+ return bm->node;
+}
+
+/* Delete a named bookmark.
+ * returns 0 if bookmark was not found, and 1 if deleted.
+ * Note that the bookmark memory is not freed yet, and is kept for future use. */
+int quicklistBookmarkDelete(quicklist *ql, const char *name) {
+ quicklistBookmark *bm = _quicklistBookmarkFindByName(ql, name);
+ if (!bm)
+ return 0;
+ _quicklistBookmarkDelete(ql, bm);
+ return 1;
+}
+
+quicklistBookmark *_quicklistBookmarkFindByName(quicklist *ql, const char *name) {
+ unsigned i;
+ for (i=0; i<ql->bookmark_count; i++) {
+ if (!strcmp(ql->bookmarks[i].name, name)) {
+ return &ql->bookmarks[i];
+ }
+ }
+ return NULL;
+}
+
+quicklistBookmark *_quicklistBookmarkFindByNode(quicklist *ql, quicklistNode *node) {
+ unsigned i;
+ for (i=0; i<ql->bookmark_count; i++) {
+ if (ql->bookmarks[i].node == node) {
+ return &ql->bookmarks[i];
+ }
+ }
+ return NULL;
+}
+
+void _quicklistBookmarkDelete(quicklist *ql, quicklistBookmark *bm) {
+ int index = bm - ql->bookmarks;
+ zfree(bm->name);
+ ql->bookmark_count--;
+ memmove(bm, bm+1, (ql->bookmark_count - index)* sizeof(*bm));
+ /* NOTE: We do not shrink (realloc) the quicklist yet (to avoid resonance,
+ * it may be re-used later (a call to realloc may NOP). */
+}
+
+void quicklistBookmarksClear(quicklist *ql) {
+ while (ql->bookmark_count)
+ zfree(ql->bookmarks[--ql->bookmark_count].name);
+ /* NOTE: We do not shrink (realloc) the quick list. main use case for this
+ * function is just before releasing the allocation. */
+}
+
+/* The rest of this file is test cases and test helpers. */
+#ifdef REDIS_TEST
+#include <stdint.h>
+#include <sys/time.h>
+#include "testhelp.h"
+#include <stdlib.h>
+
+#define yell(str, ...) printf("ERROR! " str "\n\n", __VA_ARGS__)
+
+#define ERROR \
+ do { \
+ printf("\tERROR!\n"); \
+ err++; \
+ } while (0)
+
+#define ERR(x, ...) \
+ do { \
+ printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
+ printf("ERROR! " x "\n", __VA_ARGS__); \
+ err++; \
+ } while (0)
+
+#define TEST(name) printf("test — %s\n", name);
+#define TEST_DESC(name, ...) printf("test — " name "\n", __VA_ARGS__);
+
+#define QL_TEST_VERBOSE 0
+
+#define UNUSED(x) (void)(x)
+static void ql_info(quicklist *ql) {
+#if QL_TEST_VERBOSE
+ printf("Container length: %lu\n", ql->len);
+ printf("Container size: %lu\n", ql->count);
+ if (ql->head)
+ printf("\t(zsize head: %lu)\n", lpLength(ql->head->entry));
+ if (ql->tail)
+ printf("\t(zsize tail: %lu)\n", lpLength(ql->tail->entry));
+ printf("\n");
+#else
+ UNUSED(ql);
+#endif
+}
+
+/* Return the UNIX time in microseconds */
+static long long ustime(void) {
+ struct timeval tv;
+ long long ust;
+
+ gettimeofday(&tv, NULL);
+ ust = ((long long)tv.tv_sec) * 1000000;
+ ust += tv.tv_usec;
+ return ust;
+}
+
+/* Return the UNIX time in milliseconds */
+static long long mstime(void) { return ustime() / 1000; }
+
+/* Iterate over an entire quicklist.
+ * Print the list if 'print' == 1.
+ *
+ * Returns physical count of elements found by iterating over the list. */
+static int _itrprintr(quicklist *ql, int print, int forward) {
+ quicklistIter *iter =
+ quicklistGetIterator(ql, forward ? AL_START_HEAD : AL_START_TAIL);
+ quicklistEntry entry;
+ int i = 0;
+ int p = 0;
+ quicklistNode *prev = NULL;
+ while (quicklistNext(iter, &entry)) {
+ if (entry.node != prev) {
+ /* Count the number of list nodes too */
+ p++;
+ prev = entry.node;
+ }
+ if (print) {
+ int size = (entry.sz > (1<<20)) ? 1<<20 : entry.sz;
+ printf("[%3d (%2d)]: [%.*s] (%lld)\n", i, p, size,
+ (char *)entry.value, entry.longval);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+ return i;
+}
+static int itrprintr(quicklist *ql, int print) {
+ return _itrprintr(ql, print, 1);
+}
+
+static int itrprintr_rev(quicklist *ql, int print) {
+ return _itrprintr(ql, print, 0);
+}
+
+#define ql_verify(a, b, c, d, e) \
+ do { \
+ err += _ql_verify((a), (b), (c), (d), (e)); \
+ } while (0)
+
+static int _ql_verify_compress(quicklist *ql) {
+ int errors = 0;
+ if (quicklistAllowsCompression(ql)) {
+ quicklistNode *node = ql->head;
+ unsigned int low_raw = ql->compress;
+ unsigned int high_raw = ql->len - ql->compress;
+
+ for (unsigned int at = 0; at < ql->len; at++, node = node->next) {
+ if (node && (at < low_raw || at >= high_raw)) {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
+ yell("Incorrect compression: node %d is "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %lu; size: %zu; recompress: %d)",
+ at, ql->compress, low_raw, high_raw, ql->len, node->sz,
+ node->recompress);
+ errors++;
+ }
+ } else {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_LZF &&
+ !node->attempted_compress) {
+ yell("Incorrect non-compression: node %d is NOT "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %lu; size: %zu; recompress: %d; attempted: %d)",
+ at, ql->compress, low_raw, high_raw, ql->len, node->sz,
+ node->recompress, node->attempted_compress);
+ errors++;
+ }
+ }
+ }
+ }
+ return errors;
+}
+
+/* Verify list metadata matches physical list contents. */
+static int _ql_verify(quicklist *ql, uint32_t len, uint32_t count,
+ uint32_t head_count, uint32_t tail_count) {
+ int errors = 0;
+
+ ql_info(ql);
+ if (len != ql->len) {
+ yell("quicklist length wrong: expected %d, got %lu", len, ql->len);
+ errors++;
+ }
+
+ if (count != ql->count) {
+ yell("quicklist count wrong: expected %d, got %lu", count, ql->count);
+ errors++;
+ }
+
+ int loopr = itrprintr(ql, 0);
+ if (loopr != (int)ql->count) {
+ yell("quicklist cached count not match actual count: expected %lu, got "
+ "%d",
+ ql->count, loopr);
+ errors++;
+ }
+
+ int rloopr = itrprintr_rev(ql, 0);
+ if (loopr != rloopr) {
+ yell("quicklist has different forward count than reverse count! "
+ "Forward count is %d, reverse count is %d.",
+ loopr, rloopr);
+ errors++;
+ }
+
+ if (ql->len == 0 && !errors) {
+ return errors;
+ }
+
+ if (ql->head && head_count != ql->head->count &&
+ head_count != lpLength(ql->head->entry)) {
+ yell("quicklist head count wrong: expected %d, "
+ "got cached %d vs. actual %lu",
+ head_count, ql->head->count, lpLength(ql->head->entry));
+ errors++;
+ }
+
+ if (ql->tail && tail_count != ql->tail->count &&
+ tail_count != lpLength(ql->tail->entry)) {
+ yell("quicklist tail count wrong: expected %d, "
+ "got cached %u vs. actual %lu",
+ tail_count, ql->tail->count, lpLength(ql->tail->entry));
+ errors++;
+ }
+
+ errors += _ql_verify_compress(ql);
+ return errors;
+}
+
+/* Release iterator and verify compress correctly. */
+static void ql_release_iterator(quicklistIter *iter) {
+ quicklist *ql = NULL;
+ if (iter) ql = iter->quicklist;
+ quicklistReleaseIterator(iter);
+ if (ql) assert(!_ql_verify_compress(ql));
+}
+
+/* Generate new string concatenating integer i against string 'prefix' */
+static char *genstr(char *prefix, int i) {
+ static char result[64] = {0};
+ snprintf(result, sizeof(result), "%s%d", prefix, i);
+ return result;
+}
+
+static void randstring(unsigned char *target, size_t sz) {
+ size_t p = 0;
+ int minval, maxval;
+ switch(rand() % 3) {
+ case 0:
+ minval = 'a';
+ maxval = 'z';
+ break;
+ case 1:
+ minval = '0';
+ maxval = '9';
+ break;
+ case 2:
+ minval = 'A';
+ maxval = 'Z';
+ break;
+ default:
+ assert(NULL);
+ }
+
+ while(p < sz)
+ target[p++] = minval+rand()%(maxval-minval+1);
+}
+
+/* main test, but callable from other files */
+int quicklistTest(int argc, char *argv[], int flags) {
+ UNUSED(argc);
+ UNUSED(argv);
+
+ int accurate = (flags & REDIS_TEST_ACCURATE);
+ unsigned int err = 0;
+ int optimize_start =
+ -(int)(sizeof(optimization_level) / sizeof(*optimization_level));
+
+ printf("Starting optimization offset at: %d\n", optimize_start);
+
+ int options[] = {0, 1, 2, 3, 4, 5, 6, 10};
+ int fills[] = {-5, -4, -3, -2, -1, 0,
+ 1, 2, 32, 66, 128, 999};
+ size_t option_count = sizeof(options) / sizeof(*options);
+ int fill_count = (int)(sizeof(fills) / sizeof(*fills));
+ long long runtime[option_count];
+
+ for (int _i = 0; _i < (int)option_count; _i++) {
+ printf("Testing Compression option %d\n", options[_i]);
+ long long start = mstime();
+ quicklistIter *iter;
+
+ TEST("create list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("add to tail of empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "hello", 6);
+ /* 1 for head and 1 for tail because 1 node = head = tail */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST("add to head of empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ /* 1 for head and 1 for tail because 1 node = head = tail */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("add to tail 5x at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ for (int i = 0; i < 5; i++)
+ quicklistPushTail(ql, genstr("hello", i), 32);
+ if (ql->count != 5)
+ ERROR;
+ if (fills[f] == 32)
+ ql_verify(ql, 1, 5, 5, 5);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("add to head 5x at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ for (int i = 0; i < 5; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ if (ql->count != 5)
+ ERROR;
+ if (fills[f] == 32)
+ ql_verify(ql, 1, 5, 5, 5);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("add to tail 500x at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i), 64);
+ if (ql->count != 500)
+ ERROR;
+ if (fills[f] == 32)
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("add to head 500x at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ if (ql->count != 500)
+ ERROR;
+ if (fills[f] == 32)
+ ql_verify(ql, 16, 500, 20, 32);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("rotate empty") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistRotate(ql);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("Comprassion Plain node") {
+ char buf[256];
+ quicklistisSetPackedThreshold(1);
+ quicklist *ql = quicklistNew(-2, 1);
+ for (int i = 0; i < 500; i++) {
+ /* Set to 256 to allow the node to be triggered to compress,
+ * if it is less than 48(nocompress), the test will be successful. */
+ snprintf(buf, sizeof(buf), "hello%d", i);
+ quicklistPushHead(ql, buf, 256);
+ }
+
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
+ quicklistEntry entry;
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ snprintf(buf, sizeof(buf), "hello%d", i);
+ if (strcmp((char *)entry.value, buf))
+ ERR("value [%s] didn't match [%s] at position %d",
+ entry.value, buf, i);
+ i++;
+ }
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("NEXT plain node")
+ {
+ packed_threshold = 3;
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ char *strings[] = {"hello1", "hello2", "h3", "h4", "hello5"};
+
+ for (int i = 0; i < 5; ++i)
+ quicklistPushHead(ql, strings[i], strlen(strings[i]));
+
+ quicklistEntry entry;
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
+ int j = 0;
+
+ while(quicklistNext(iter, &entry) != 0) {
+ assert(strncmp(strings[j], (char *)entry.value, strlen(strings[j])) == 0);
+ j++;
+ }
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("rotate plain node ") {
+ unsigned char *data = NULL;
+ size_t sz;
+ long long lv;
+ int i =0;
+ packed_threshold = 5;
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello1", 6);
+ quicklistPushHead(ql, "hello4", 6);
+ quicklistPushHead(ql, "hello3", 6);
+ quicklistPushHead(ql, "hello2", 6);
+ quicklistRotate(ql);
+
+ for(i = 1 ; i < 5; i++) {
+ quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ int temp_char = data[5];
+ zfree(data);
+ assert(temp_char == ('0' + i));
+ }
+
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ packed_threshold = (1 << 30);
+ }
+
+ TEST("rotate one val once") {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistRotate(ql);
+ /* Ignore compression verify because listpack is
+ * too small to compress. */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("rotate 500 val 5000 times at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ quicklistPushHead(ql, "900", 3);
+ quicklistPushHead(ql, "7000", 4);
+ quicklistPushHead(ql, "-1200", 5);
+ quicklistPushHead(ql, "42", 2);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 64);
+ ql_info(ql);
+ for (int i = 0; i < 5000; i++) {
+ ql_info(ql);
+ quicklistRotate(ql);
+ }
+ if (fills[f] == 1)
+ ql_verify(ql, 504, 504, 1, 1);
+ else if (fills[f] == 2)
+ ql_verify(ql, 252, 504, 2, 2);
+ else if (fills[f] == 32)
+ ql_verify(ql, 16, 504, 32, 24);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("pop empty") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPop(ql, QUICKLIST_HEAD, NULL, NULL, NULL);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop 1 string from 1") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ char *populate = genstr("hello", 331);
+ quicklistPushHead(ql, populate, 32);
+ unsigned char *data;
+ size_t sz;
+ long long lv;
+ ql_info(ql);
+ assert(quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv));
+ assert(data != NULL);
+ assert(sz == 32);
+ if (strcmp(populate, (char *)data)) {
+ int size = sz;
+ ERR("Pop'd value (%.*s) didn't equal original value (%s)", size,
+ data, populate);
+ }
+ zfree(data);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 1 number from 1") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "55513", 5);
+ unsigned char *data;
+ size_t sz;
+ long long lv;
+ ql_info(ql);
+ assert(quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv));
+ assert(data == NULL);
+ assert(lv == 55513);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 500 from 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_info(ql);
+ for (int i = 0; i < 500; i++) {
+ unsigned char *data;
+ size_t sz;
+ long long lv;
+ int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ assert(ret == 1);
+ assert(data != NULL);
+ assert(sz == 32);
+ if (strcmp(genstr("hello", 499 - i), (char *)data)) {
+ int size = sz;
+ ERR("Pop'd value (%.*s) didn't equal original value (%s)",
+ size, data, genstr("hello", 499 - i));
+ }
+ zfree(data);
+ }
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 5000 from 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 5000; i++) {
+ unsigned char *data;
+ size_t sz;
+ long long lv;
+ int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ if (i < 500) {
+ assert(ret == 1);
+ assert(data != NULL);
+ assert(sz == 32);
+ if (strcmp(genstr("hello", 499 - i), (char *)data)) {
+ int size = sz;
+ ERR("Pop'd value (%.*s) didn't equal original value "
+ "(%s)",
+ size, data, genstr("hello", 499 - i));
+ }
+ zfree(data);
+ } else {
+ assert(ret == 0);
+ }
+ }
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("iterate forward over 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ int i = 499, count = 0;
+ while (quicklistNext(iter, &entry)) {
+ char *h = genstr("hello", i);
+ if (strcmp((char *)entry.value, h))
+ ERR("value [%s] didn't match [%s] at position %d",
+ entry.value, h, i);
+ i--;
+ count++;
+ }
+ if (count != 500)
+ ERR("Didn't iterate over exactly 500 elements (%d)", i);
+ ql_verify(ql, 16, 500, 20, 32);
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("iterate reverse over 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
+ quicklistEntry entry;
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ char *h = genstr("hello", i);
+ if (strcmp((char *)entry.value, h))
+ ERR("value [%s] didn't match [%s] at position %d",
+ entry.value, h, i);
+ i++;
+ }
+ if (i != 500)
+ ERR("Didn't iterate over exactly 500 elements (%d)", i);
+ ql_verify(ql, 16, 500, 20, 32);
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert after 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry);
+ quicklistInsertAfter(iter, &entry, "abc", 4);
+ ql_release_iterator(iter);
+ ql_verify(ql, 1, 2, 2, 2);
+
+ /* verify results */
+ iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry);
+ int sz = entry.sz;
+ if (strncmp((char *)entry.value, "hello", 5)) {
+ ERR("Value 0 didn't match, instead got: %.*s", sz,
+ entry.value);
+ }
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry);
+ sz = entry.sz;
+ if (strncmp((char *)entry.value, "abc", 3)) {
+ ERR("Value 1 didn't match, instead got: %.*s", sz,
+ entry.value);
+ }
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert before 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry);
+ quicklistInsertBefore(iter, &entry, "abc", 4);
+ ql_release_iterator(iter);
+ ql_verify(ql, 1, 2, 2, 2);
+
+ /* verify results */
+ iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry);
+ int sz = entry.sz;
+ if (strncmp((char *)entry.value, "abc", 3)) {
+ ERR("Value 0 didn't match, instead got: %.*s", sz,
+ entry.value);
+ }
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry);
+ sz = entry.sz;
+ if (strncmp((char *)entry.value, "hello", 5)) {
+ ERR("Value 1 didn't match, instead got: %.*s", sz,
+ entry.value);
+ }
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert head while head node is full") {
+ quicklist *ql = quicklistNew(4, options[_i]);
+ for (int i = 0; i < 10; i++)
+ quicklistPushTail(ql, genstr("hello", i), 6);
+ quicklistSetFill(ql, -1);
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, -10, &entry);
+ char buf[4096] = {0};
+ quicklistInsertBefore(iter, &entry, buf, 4096);
+ ql_release_iterator(iter);
+ ql_verify(ql, 4, 11, 1, 2);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert tail while tail node is full") {
+ quicklist *ql = quicklistNew(4, options[_i]);
+ for (int i = 0; i < 10; i++)
+ quicklistPushHead(ql, genstr("hello", i), 6);
+ quicklistSetFill(ql, -1);
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry);
+ char buf[4096] = {0};
+ quicklistInsertAfter(iter, &entry, buf, 4096);
+ ql_release_iterator(iter);
+ ql_verify(ql, 4, 11, 2, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("insert once in elements while iterating at compress %d",
+ options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ quicklistPushTail(ql, "abc", 3);
+ quicklistSetFill(ql, 1);
+ quicklistPushTail(ql, "def", 3); /* force to unique node */
+ quicklistSetFill(ql, f);
+ quicklistPushTail(ql, "bob", 3); /* force to reset for +3 */
+ quicklistPushTail(ql, "foo", 3);
+ quicklistPushTail(ql, "zoo", 3);
+
+ itrprintr(ql, 0);
+ /* insert "bar" before "bob" while iterating over list. */
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ while (quicklistNext(iter, &entry)) {
+ if (!strncmp((char *)entry.value, "bob", 3)) {
+ /* Insert as fill = 1 so it spills into new node. */
+ quicklistInsertBefore(iter, &entry, "bar", 3);
+ break; /* didn't we fix insert-while-iterating? */
+ }
+ }
+ ql_release_iterator(iter);
+ itrprintr(ql, 0);
+
+ /* verify results */
+ iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry);
+ int sz = entry.sz;
+
+ if (strncmp((char *)entry.value, "abc", 3))
+ ERR("Value 0 didn't match, instead got: %.*s", sz,
+ entry.value);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry);
+ if (strncmp((char *)entry.value, "def", 3))
+ ERR("Value 1 didn't match, instead got: %.*s", sz,
+ entry.value);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 2, &entry);
+ if (strncmp((char *)entry.value, "bar", 3))
+ ERR("Value 2 didn't match, instead got: %.*s", sz,
+ entry.value);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 3, &entry);
+ if (strncmp((char *)entry.value, "bob", 3))
+ ERR("Value 3 didn't match, instead got: %.*s", sz,
+ entry.value);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 4, &entry);
+ if (strncmp((char *)entry.value, "foo", 3))
+ ERR("Value 4 didn't match, instead got: %.*s", sz,
+ entry.value);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 5, &entry);
+ if (strncmp((char *)entry.value, "zoo", 3))
+ ERR("Value 5 didn't match, instead got: %.*s", sz,
+ entry.value);
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("insert [before] 250 new in middle of 500 elements at compress %d",
+ options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 250; i++) {
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, 250, &entry);
+ quicklistInsertBefore(iter, &entry, genstr("abc", i), 32);
+ ql_release_iterator(iter);
+ }
+ if (fills[f] == 32)
+ ql_verify(ql, 25, 750, 32, 20);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("insert [after] 250 new in middle of 500 elements at compress %d",
+ options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 250; i++) {
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, 250, &entry);
+ quicklistInsertAfter(iter, &entry, genstr("abc", i), 32);
+ ql_release_iterator(iter);
+ }
+
+ if (ql->count != 750)
+ ERR("List size not 750, but rather %ld", ql->count);
+
+ if (fills[f] == 32)
+ ql_verify(ql, 26, 750, 20, 32);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("duplicate empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ TEST("duplicate list of 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, genstr("hello", 3), 32);
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ TEST("duplicate list of 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 16, 500, 20, 32);
+
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 16, 500, 20, 32);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ for (int f = 0; f < fill_count; f++) {
+ TEST_DESC("index 1,200 from 500 list at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry);
+ if (strcmp((char *)entry.value, "hello2") != 0)
+ ERR("Value: %s", entry.value);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 200, &entry);
+ if (strcmp((char *)entry.value, "hello201") != 0)
+ ERR("Value: %s", entry.value);
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index -1,-2 from 500 list at fill %d at compress %d",
+ fills[f], options[_i]) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry);
+ if (strcmp((char *)entry.value, "hello500") != 0)
+ ERR("Value: %s", entry.value);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, -2, &entry);
+ if (strcmp((char *)entry.value, "hello499") != 0)
+ ERR("Value: %s", entry.value);
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index -100 from 500 list at fill %d at compress %d",
+ fills[f], options[_i]) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, -100, &entry);
+ if (strcmp((char *)entry.value, "hello401") != 0)
+ ERR("Value: %s", entry.value);
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index too big +1 from 50 list at fill %d at compress %d",
+ fills[f], options[_i]) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ for (int i = 0; i < 50; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ int sz = entry.sz;
+ iter = quicklistGetIteratorEntryAtIdx(ql, 50, &entry);
+ if (iter)
+ ERR("Index found at 50 with 50 list: %.*s", sz,
+ entry.value);
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("delete range empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistDelRange(ql, 5, 20);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete range of entire node in list of one node") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 32; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 1, 32, 32, 32);
+ quicklistDelRange(ql, 0, 32);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete range of entire node with overflow counts") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 32; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 1, 32, 32, 32);
+ quicklistDelRange(ql, 0, 128);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete middle 100 of 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, 200, 100);
+ ql_verify(ql, 14, 400, 32, 20);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete less than fill but across nodes") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, 60, 10);
+ ql_verify(ql, 16, 490, 32, 20);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 1 from 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, -1, 1);
+ ql_verify(ql, 16, 499, 32, 19);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 1 from 500 list with overflow counts") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, -1, 128);
+ ql_verify(ql, 16, 499, 32, 19);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 100 from 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistDelRange(ql, -100, 100);
+ ql_verify(ql, 13, 400, 32, 16);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete -10 count 5 from 50 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 50; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 2, 50, 32, 18);
+ quicklistDelRange(ql, -10, 5);
+ ql_verify(ql, 2, 45, 32, 13);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers only list read") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "1111", 4);
+ quicklistPushTail(ql, "2222", 4);
+ quicklistPushTail(ql, "3333", 4);
+ quicklistPushTail(ql, "4444", 4);
+ ql_verify(ql, 1, 4, 4, 4);
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry);
+ if (entry.longval != 1111)
+ ERR("Not 1111, %lld", entry.longval);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry);
+ if (entry.longval != 2222)
+ ERR("Not 2222, %lld", entry.longval);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 2, &entry);
+ if (entry.longval != 3333)
+ ERR("Not 3333, %lld", entry.longval);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 3, &entry);
+ if (entry.longval != 4444)
+ ERR("Not 4444, %lld", entry.longval);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 4, &entry);
+ if (iter)
+ ERR("Index past elements: %lld", entry.longval);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry);
+ if (entry.longval != 4444)
+ ERR("Not 4444 (reverse), %lld", entry.longval);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, -2, &entry);
+ if (entry.longval != 3333)
+ ERR("Not 3333 (reverse), %lld", entry.longval);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, -3, &entry);
+ if (entry.longval != 2222)
+ ERR("Not 2222 (reverse), %lld", entry.longval);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, -4, &entry);
+ if (entry.longval != 1111)
+ ERR("Not 1111 (reverse), %lld", entry.longval);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, -5, &entry);
+ if (iter)
+ ERR("Index past elements (reverse), %lld", entry.longval);
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers larger list read") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 5000; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20);
+ quicklistEntry entry;
+ for (int i = 0; i < 5000; i++) {
+ iter = quicklistGetIteratorEntryAtIdx(ql, i, &entry);
+ if (entry.longval != nums[i])
+ ERR("[%d] Not longval %lld but rather %lld", i, nums[i],
+ entry.longval);
+ entry.longval = 0xdeadbeef;
+ ql_release_iterator(iter);
+ }
+ iter = quicklistGetIteratorEntryAtIdx(ql, 5000, &entry);
+ if (strncmp((char *)entry.value, "xxxxxxxxxxxxxxxxxxxx", 20))
+ ERR("String val not match: %s", entry.value);
+ ql_verify(ql, 157, 5001, 32, 9);
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers larger list read B") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "99", 2);
+ quicklistPushTail(ql, "98", 2);
+ quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20);
+ quicklistPushTail(ql, "96", 2);
+ quicklistPushTail(ql, "95", 2);
+ quicklistReplaceAtIndex(ql, 1, "foo", 3);
+ quicklistReplaceAtIndex(ql, -1, "bar", 3);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("lrem test at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ char *words[] = {"abc", "foo", "bar", "foobar", "foobared",
+ "zap", "bar", "test", "foo"};
+ char *result[] = {"abc", "foo", "foobar", "foobared",
+ "zap", "test", "foo"};
+ char *resultB[] = {"abc", "foo", "foobar",
+ "foobared", "zap", "test"};
+ for (int i = 0; i < 9; i++)
+ quicklistPushTail(ql, words[i], strlen(words[i]));
+
+ /* lrem 0 bar */
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(&entry, (unsigned char *)"bar", 3)) {
+ quicklistDelEntry(iter, &entry);
+ }
+ i++;
+ }
+ ql_release_iterator(iter);
+
+ /* check result of lrem 0 bar */
+ iter = quicklistGetIterator(ql, AL_START_HEAD);
+ i = 0;
+ while (quicklistNext(iter, &entry)) {
+ /* Result must be: abc, foo, foobar, foobared, zap, test,
+ * foo */
+ int sz = entry.sz;
+ if (strncmp((char *)entry.value, result[i], entry.sz)) {
+ ERR("No match at position %d, got %.*s instead of %s",
+ i, sz, entry.value, result[i]);
+ }
+ i++;
+ }
+ ql_release_iterator(iter);
+
+ quicklistPushTail(ql, "foo", 3);
+
+ /* lrem -2 foo */
+ iter = quicklistGetIterator(ql, AL_START_TAIL);
+ i = 0;
+ int del = 2;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(&entry, (unsigned char *)"foo", 3)) {
+ quicklistDelEntry(iter, &entry);
+ del--;
+ }
+ if (!del)
+ break;
+ i++;
+ }
+ ql_release_iterator(iter);
+
+ /* check result of lrem -2 foo */
+ /* (we're ignoring the '2' part and still deleting all foo
+ * because
+ * we only have two foo) */
+ iter = quicklistGetIterator(ql, AL_START_TAIL);
+ i = 0;
+ size_t resB = sizeof(resultB) / sizeof(*resultB);
+ while (quicklistNext(iter, &entry)) {
+ /* Result must be: abc, foo, foobar, foobared, zap, test,
+ * foo */
+ int sz = entry.sz;
+ if (strncmp((char *)entry.value, resultB[resB - 1 - i],
+ sz)) {
+ ERR("No match at position %d, got %.*s instead of %s",
+ i, sz, entry.value, resultB[resB - 1 - i]);
+ }
+ i++;
+ }
+
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("iterate reverse + delete at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ quicklistPushTail(ql, "abc", 3);
+ quicklistPushTail(ql, "def", 3);
+ quicklistPushTail(ql, "hij", 3);
+ quicklistPushTail(ql, "jkl", 3);
+ quicklistPushTail(ql, "oop", 3);
+
+ quicklistEntry entry;
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(&entry, (unsigned char *)"hij", 3)) {
+ quicklistDelEntry(iter, &entry);
+ }
+ i++;
+ }
+ ql_release_iterator(iter);
+
+ if (i != 5)
+ ERR("Didn't iterate 5 times, iterated %d times.", i);
+
+ /* Check results after deletion of "hij" */
+ iter = quicklistGetIterator(ql, AL_START_HEAD);
+ i = 0;
+ char *vals[] = {"abc", "def", "jkl", "oop"};
+ while (quicklistNext(iter, &entry)) {
+ if (!quicklistCompare(&entry, (unsigned char *)vals[i],
+ 3)) {
+ ERR("Value at %d didn't match %s\n", i, vals[i]);
+ }
+ i++;
+ }
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("iterator at index test at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 760; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+
+ quicklistEntry entry;
+ quicklistIter *iter =
+ quicklistGetIteratorAtIdx(ql, AL_START_HEAD, 437);
+ int i = 437;
+ while (quicklistNext(iter, &entry)) {
+ if (entry.longval != nums[i])
+ ERR("Expected %lld, but got %lld", entry.longval,
+ nums[i]);
+ i++;
+ }
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("ltrim test A at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 32; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (fills[f] == 32)
+ ql_verify(ql, 1, 32, 32, 32);
+ /* ltrim 25 53 (keep [25,32] inclusive = 7 remaining) */
+ quicklistDelRange(ql, 0, 25);
+ quicklistDelRange(ql, 0, 0);
+ quicklistEntry entry;
+ for (int i = 0; i < 7; i++) {
+ iter = quicklistGetIteratorEntryAtIdx(ql, i, &entry);
+ if (entry.longval != nums[25 + i])
+ ERR("Deleted invalid range! Expected %lld but got "
+ "%lld",
+ entry.longval, nums[25 + i]);
+ ql_release_iterator(iter);
+ }
+ if (fills[f] == 32)
+ ql_verify(ql, 1, 7, 7, 7);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("ltrim test B at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ /* Force-disable compression because our 33 sequential
+ * integers don't compress and the check always fails. */
+ quicklist *ql = quicklistNew(fills[f], QUICKLIST_NOCOMPRESS);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (fills[f] == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ /* ltrim 5 16 (keep [5,16] inclusive = 12 remaining) */
+ quicklistDelRange(ql, 0, 5);
+ quicklistDelRange(ql, -16, 16);
+ if (fills[f] == 32)
+ ql_verify(ql, 1, 12, 12, 12);
+ quicklistEntry entry;
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry);
+ if (entry.longval != 5)
+ ERR("A: longval not 5, but %lld", entry.longval);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry);
+ if (entry.longval != 16)
+ ERR("B! got instead: %lld", entry.longval);
+ quicklistPushTail(ql, "bobobob", 7);
+ ql_release_iterator(iter);
+
+ iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry);
+ int sz = entry.sz;
+ if (strncmp((char *)entry.value, "bobobob", 7))
+ ERR("Tail doesn't match bobobob, it's %.*s instead",
+ sz, entry.value);
+ ql_release_iterator(iter);
+
+ for (int i = 0; i < 12; i++) {
+ iter = quicklistGetIteratorEntryAtIdx(ql, i, &entry);
+ if (entry.longval != nums[5 + i])
+ ERR("Deleted invalid range! Expected %lld but got "
+ "%lld",
+ entry.longval, nums[5 + i]);
+ ql_release_iterator(iter);
+ }
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("ltrim test C at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (fills[f] == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ /* ltrim 3 3 (keep [3,3] inclusive = 1 remaining) */
+ quicklistDelRange(ql, 0, 3);
+ quicklistDelRange(ql, -29,
+ 4000); /* make sure not loop forever */
+ if (fills[f] == 32)
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistEntry entry;
+ iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry);
+ if (entry.longval != -5157318210846258173)
+ ERROR;
+ ql_release_iterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST_DESC("ltrim test D at compress %d", options[_i]) {
+ for (int f = 0; f < fill_count; f++) {
+ quicklist *ql = quicklistNew(fills[f], options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (fills[f] == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ quicklistDelRange(ql, -12, 3);
+ if (ql->count != 30)
+ ERR("Didn't delete exactly three elements! Count is: %lu",
+ ql->count);
+ quicklistRelease(ql);
+ }
+ }
+
+ long long stop = mstime();
+ runtime[_i] = stop - start;
+ }
+
+ /* Run a longer test of compression depth outside of primary test loop. */
+ int list_sizes[] = {250, 251, 500, 999, 1000};
+ long long start = mstime();
+ int list_count = accurate ? (int)(sizeof(list_sizes) / sizeof(*list_sizes)) : 1;
+ for (int list = 0; list < list_count; list++) {
+ TEST_DESC("verify specific compression of interior nodes with %d list ",
+ list_sizes[list]) {
+ for (int f = 0; f < fill_count; f++) {
+ for (int depth = 1; depth < 40; depth++) {
+ /* skip over many redundant test cases */
+ quicklist *ql = quicklistNew(fills[f], depth);
+ for (int i = 0; i < list_sizes[list]; i++) {
+ quicklistPushTail(ql, genstr("hello TAIL", i + 1), 64);
+ quicklistPushHead(ql, genstr("hello HEAD", i + 1), 64);
+ }
+
+ for (int step = 0; step < 2; step++) {
+ /* test remove node */
+ if (step == 1) {
+ for (int i = 0; i < list_sizes[list] / 2; i++) {
+ unsigned char *data;
+ assert(quicklistPop(ql, QUICKLIST_HEAD, &data,
+ NULL, NULL));
+ zfree(data);
+ assert(quicklistPop(ql, QUICKLIST_TAIL, &data,
+ NULL, NULL));
+ zfree(data);
+ }
+ }
+ quicklistNode *node = ql->head;
+ unsigned int low_raw = ql->compress;
+ unsigned int high_raw = ql->len - ql->compress;
+
+ for (unsigned int at = 0; at < ql->len;
+ at++, node = node->next) {
+ if (at < low_raw || at >= high_raw) {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
+ ERR("Incorrect compression: node %d is "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %lu; size: %zu)",
+ at, depth, low_raw, high_raw, ql->len,
+ node->sz);
+ }
+ } else {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_LZF) {
+ ERR("Incorrect non-compression: node %d is NOT "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %lu; size: %zu; attempted: %d)",
+ at, depth, low_raw, high_raw, ql->len,
+ node->sz, node->attempted_compress);
+ }
+ }
+ }
+ }
+
+ quicklistRelease(ql);
+ }
+ }
+ }
+ }
+ long long stop = mstime();
+
+ printf("\n");
+ for (size_t i = 0; i < option_count; i++)
+ printf("Test Loop %02d: %0.2f seconds.\n", options[i],
+ (float)runtime[i] / 1000);
+ printf("Compressions: %0.2f seconds.\n", (float)(stop - start) / 1000);
+ printf("\n");
+
+ TEST("bookmark get updated to next item") {
+ quicklist *ql = quicklistNew(1, 0);
+ quicklistPushTail(ql, "1", 1);
+ quicklistPushTail(ql, "2", 1);
+ quicklistPushTail(ql, "3", 1);
+ quicklistPushTail(ql, "4", 1);
+ quicklistPushTail(ql, "5", 1);
+ assert(ql->len==5);
+ /* add two bookmarks, one pointing to the node before the last. */
+ assert(quicklistBookmarkCreate(&ql, "_dummy", ql->head->next));
+ assert(quicklistBookmarkCreate(&ql, "_test", ql->tail->prev));
+ /* test that the bookmark returns the right node, delete it and see that the bookmark points to the last node */
+ assert(quicklistBookmarkFind(ql, "_test") == ql->tail->prev);
+ assert(quicklistDelRange(ql, -2, 1));
+ assert(quicklistBookmarkFind(ql, "_test") == ql->tail);
+ /* delete the last node, and see that the bookmark was deleted. */
+ assert(quicklistDelRange(ql, -1, 1));
+ assert(quicklistBookmarkFind(ql, "_test") == NULL);
+ /* test that other bookmarks aren't affected */
+ assert(quicklistBookmarkFind(ql, "_dummy") == ql->head->next);
+ assert(quicklistBookmarkFind(ql, "_missing") == NULL);
+ assert(ql->len==3);
+ quicklistBookmarksClear(ql); /* for coverage */
+ assert(quicklistBookmarkFind(ql, "_dummy") == NULL);
+ quicklistRelease(ql);
+ }
+
+ TEST("bookmark limit") {
+ int i;
+ quicklist *ql = quicklistNew(1, 0);
+ quicklistPushHead(ql, "1", 1);
+ for (i=0; i<QL_MAX_BM; i++)
+ assert(quicklistBookmarkCreate(&ql, genstr("",i), ql->head));
+ /* when all bookmarks are used, creation fails */
+ assert(!quicklistBookmarkCreate(&ql, "_test", ql->head));
+ /* delete one and see that we can now create another */
+ assert(quicklistBookmarkDelete(ql, "0"));
+ assert(quicklistBookmarkCreate(&ql, "_test", ql->head));
+ /* delete one and see that the rest survive */
+ assert(quicklistBookmarkDelete(ql, "_test"));
+ for (i=1; i<QL_MAX_BM; i++)
+ assert(quicklistBookmarkFind(ql, genstr("",i)) == ql->head);
+ /* make sure the deleted ones are indeed gone */
+ assert(!quicklistBookmarkFind(ql, "0"));
+ assert(!quicklistBookmarkFind(ql, "_test"));
+ quicklistRelease(ql);
+ }
+
+ if (flags & REDIS_TEST_LARGE_MEMORY) {
+ TEST("compress and decompress quicklist listpack node") {
+ quicklistNode *node = quicklistCreateNode();
+ node->entry = lpNew(0);
+
+ /* Just to avoid triggering the assertion in __quicklistCompressNode(),
+ * it disables the passing of quicklist head or tail node. */
+ node->prev = quicklistCreateNode();
+ node->next = quicklistCreateNode();
+
+ /* Create a rand string */
+ size_t sz = (1 << 25); /* 32MB per one entry */
+ unsigned char *s = zmalloc(sz);
+ randstring(s, sz);
+
+ /* Keep filling the node, until it reaches 1GB */
+ for (int i = 0; i < 32; i++) {
+ node->entry = lpAppend(node->entry, s, sz);
+ quicklistNodeUpdateSz(node);
+
+ long long start = mstime();
+ assert(__quicklistCompressNode(node));
+ assert(__quicklistDecompressNode(node));
+ printf("Compress and decompress: %zu MB in %.2f seconds.\n",
+ node->sz/1024/1024, (float)(mstime() - start) / 1000);
+ }
+
+ zfree(s);
+ zfree(node->prev);
+ zfree(node->next);
+ zfree(node->entry);
+ zfree(node);
+ }
+
+#if ULONG_MAX >= 0xffffffffffffffff
+ TEST("compress and decomress quicklist plain node large than UINT32_MAX") {
+ size_t sz = (1ull << 32);
+ unsigned char *s = zmalloc(sz);
+ randstring(s, sz);
+ memcpy(s, "helloworld", 10);
+ memcpy(s + sz - 10, "1234567890", 10);
+
+ quicklistNode *node = __quicklistCreatePlainNode(s, sz);
+
+ /* Just to avoid triggering the assertion in __quicklistCompressNode(),
+ * it disables the passing of quicklist head or tail node. */
+ node->prev = quicklistCreateNode();
+ node->next = quicklistCreateNode();
+
+ long long start = mstime();
+ assert(__quicklistCompressNode(node));
+ assert(__quicklistDecompressNode(node));
+ printf("Compress and decompress: %zu MB in %.2f seconds.\n",
+ node->sz/1024/1024, (float)(mstime() - start) / 1000);
+
+ assert(memcmp(node->entry, "helloworld", 10) == 0);
+ assert(memcmp(node->entry + sz - 10, "1234567890", 10) == 0);
+ zfree(node->prev);
+ zfree(node->next);
+ zfree(node->entry);
+ zfree(node);
+ }
+#endif
+ }
+
+ if (!err)
+ printf("ALL TESTS PASSED!\n");
+ else
+ ERR("Sorry, not all tests passed! In fact, %d tests failed.", err);
+
+ return err;
+}
+#endif
diff --git a/src/quicklist.h b/src/quicklist.h
new file mode 100644
index 0000000..f17834b
--- /dev/null
+++ b/src/quicklist.h
@@ -0,0 +1,214 @@
+/* quicklist.h - A generic doubly linked quicklist implementation
+ *
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this quicklist of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this quicklist of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h> // for UINTPTR_MAX
+
+#ifndef __QUICKLIST_H__
+#define __QUICKLIST_H__
+
+/* Node, quicklist, and Iterator are the only data structures used currently. */
+
+/* quicklistNode is a 32 byte struct describing a listpack for a quicklist.
+ * We use bit fields keep the quicklistNode at 32 bytes.
+ * count: 16 bits, max 65536 (max lp bytes is 65k, so max count actually < 32k).
+ * encoding: 2 bits, RAW=1, LZF=2.
+ * container: 2 bits, PLAIN=1 (a single item as char array), PACKED=2 (listpack with multiple items).
+ * recompress: 1 bit, bool, true if node is temporary decompressed for usage.
+ * attempted_compress: 1 bit, boolean, used for verifying during testing.
+ * extra: 10 bits, free for future use; pads out the remainder of 32 bits */
+typedef struct quicklistNode {
+ struct quicklistNode *prev;
+ struct quicklistNode *next;
+ unsigned char *entry;
+ size_t sz; /* entry size in bytes */
+ unsigned int count : 16; /* count of items in listpack */
+ unsigned int encoding : 2; /* RAW==1 or LZF==2 */
+ unsigned int container : 2; /* PLAIN==1 or PACKED==2 */
+ unsigned int recompress : 1; /* was this node previous compressed? */
+ unsigned int attempted_compress : 1; /* node can't compress; too small */
+ unsigned int dont_compress : 1; /* prevent compression of entry that will be used later */
+ unsigned int extra : 9; /* more bits to steal for future usage */
+} quicklistNode;
+
+/* quicklistLZF is a 8+N byte struct holding 'sz' followed by 'compressed'.
+ * 'sz' is byte length of 'compressed' field.
+ * 'compressed' is LZF data with total (compressed) length 'sz'
+ * NOTE: uncompressed length is stored in quicklistNode->sz.
+ * When quicklistNode->entry is compressed, node->entry points to a quicklistLZF */
+typedef struct quicklistLZF {
+ size_t sz; /* LZF size in bytes*/
+ char compressed[];
+} quicklistLZF;
+
+/* Bookmarks are padded with realloc at the end of of the quicklist struct.
+ * They should only be used for very big lists if thousands of nodes were the
+ * excess memory usage is negligible, and there's a real need to iterate on them
+ * in portions.
+ * When not used, they don't add any memory overhead, but when used and then
+ * deleted, some overhead remains (to avoid resonance).
+ * The number of bookmarks used should be kept to minimum since it also adds
+ * overhead on node deletion (searching for a bookmark to update). */
+typedef struct quicklistBookmark {
+ quicklistNode *node;
+ char *name;
+} quicklistBookmark;
+
+#if UINTPTR_MAX == 0xffffffff
+/* 32-bit */
+# define QL_FILL_BITS 14
+# define QL_COMP_BITS 14
+# define QL_BM_BITS 4
+#elif UINTPTR_MAX == 0xffffffffffffffff
+/* 64-bit */
+# define QL_FILL_BITS 16
+# define QL_COMP_BITS 16
+# define QL_BM_BITS 4 /* we can encode more, but we rather limit the user
+ since they cause performance degradation. */
+#else
+# error unknown arch bits count
+#endif
+
+/* quicklist is a 40 byte struct (on 64-bit systems) describing a quicklist.
+ * 'count' is the number of total entries.
+ * 'len' is the number of quicklist nodes.
+ * 'compress' is: 0 if compression disabled, otherwise it's the number
+ * of quicklistNodes to leave uncompressed at ends of quicklist.
+ * 'fill' is the user-requested (or default) fill factor.
+ * 'bookmarks are an optional feature that is used by realloc this struct,
+ * so that they don't consume memory when not used. */
+typedef struct quicklist {
+ quicklistNode *head;
+ quicklistNode *tail;
+ unsigned long count; /* total count of all entries in all listpacks */
+ unsigned long len; /* number of quicklistNodes */
+ signed int fill : QL_FILL_BITS; /* fill factor for individual nodes */
+ unsigned int compress : QL_COMP_BITS; /* depth of end nodes not to compress;0=off */
+ unsigned int bookmark_count: QL_BM_BITS;
+ quicklistBookmark bookmarks[];
+} quicklist;
+
+typedef struct quicklistIter {
+ quicklist *quicklist;
+ quicklistNode *current;
+ unsigned char *zi; /* points to the current element */
+ long offset; /* offset in current listpack */
+ int direction;
+} quicklistIter;
+
+typedef struct quicklistEntry {
+ const quicklist *quicklist;
+ quicklistNode *node;
+ unsigned char *zi;
+ unsigned char *value;
+ long long longval;
+ size_t sz;
+ int offset;
+} quicklistEntry;
+
+#define QUICKLIST_HEAD 0
+#define QUICKLIST_TAIL -1
+
+/* quicklist node encodings */
+#define QUICKLIST_NODE_ENCODING_RAW 1
+#define QUICKLIST_NODE_ENCODING_LZF 2
+
+/* quicklist compression disable */
+#define QUICKLIST_NOCOMPRESS 0
+
+/* quicklist node container formats */
+#define QUICKLIST_NODE_CONTAINER_PLAIN 1
+#define QUICKLIST_NODE_CONTAINER_PACKED 2
+
+#define QL_NODE_IS_PLAIN(node) ((node)->container == QUICKLIST_NODE_CONTAINER_PLAIN)
+
+#define quicklistNodeIsCompressed(node) \
+ ((node)->encoding == QUICKLIST_NODE_ENCODING_LZF)
+
+/* Prototypes */
+quicklist *quicklistCreate(void);
+quicklist *quicklistNew(int fill, int compress);
+void quicklistSetCompressDepth(quicklist *quicklist, int depth);
+void quicklistSetFill(quicklist *quicklist, int fill);
+void quicklistSetOptions(quicklist *quicklist, int fill, int depth);
+void quicklistRelease(quicklist *quicklist);
+int quicklistPushHead(quicklist *quicklist, void *value, const size_t sz);
+int quicklistPushTail(quicklist *quicklist, void *value, const size_t sz);
+void quicklistPush(quicklist *quicklist, void *value, const size_t sz,
+ int where);
+void quicklistAppendListpack(quicklist *quicklist, unsigned char *zl);
+void quicklistAppendPlainNode(quicklist *quicklist, unsigned char *data, size_t sz);
+void quicklistInsertAfter(quicklistIter *iter, quicklistEntry *entry,
+ void *value, const size_t sz);
+void quicklistInsertBefore(quicklistIter *iter, quicklistEntry *entry,
+ void *value, const size_t sz);
+void quicklistDelEntry(quicklistIter *iter, quicklistEntry *entry);
+void quicklistReplaceEntry(quicklistIter *iter, quicklistEntry *entry,
+ void *data, size_t sz);
+int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data,
+ const size_t sz);
+int quicklistDelRange(quicklist *quicklist, const long start, const long stop);
+quicklistIter *quicklistGetIterator(quicklist *quicklist, int direction);
+quicklistIter *quicklistGetIteratorAtIdx(quicklist *quicklist,
+ int direction, const long long idx);
+quicklistIter *quicklistGetIteratorEntryAtIdx(quicklist *quicklist, const long long index,
+ quicklistEntry *entry);
+int quicklistNext(quicklistIter *iter, quicklistEntry *entry);
+void quicklistSetDirection(quicklistIter *iter, int direction);
+void quicklistReleaseIterator(quicklistIter *iter);
+quicklist *quicklistDup(quicklist *orig);
+void quicklistRotate(quicklist *quicklist);
+int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data,
+ size_t *sz, long long *sval,
+ void *(*saver)(unsigned char *data, size_t sz));
+int quicklistPop(quicklist *quicklist, int where, unsigned char **data,
+ size_t *sz, long long *slong);
+unsigned long quicklistCount(const quicklist *ql);
+int quicklistCompare(quicklistEntry *entry, unsigned char *p2, const size_t p2_len);
+size_t quicklistGetLzf(const quicklistNode *node, void **data);
+void quicklistNodeLimit(int fill, size_t *size, unsigned int *count);
+int quicklistNodeExceedsLimit(int fill, size_t new_sz, unsigned int new_count);
+void quicklistRepr(unsigned char *ql, int full);
+
+/* bookmarks */
+int quicklistBookmarkCreate(quicklist **ql_ref, const char *name, quicklistNode *node);
+int quicklistBookmarkDelete(quicklist *ql, const char *name);
+quicklistNode *quicklistBookmarkFind(quicklist *ql, const char *name);
+void quicklistBookmarksClear(quicklist *ql);
+int quicklistisSetPackedThreshold(size_t sz);
+
+#ifdef REDIS_TEST
+int quicklistTest(int argc, char *argv[], int flags);
+#endif
+
+/* Directions for iterators */
+#define AL_START_HEAD 0
+#define AL_START_TAIL 1
+
+#endif /* __QUICKLIST_H__ */
diff --git a/src/rand.c b/src/rand.c
new file mode 100644
index 0000000..e1e98e6
--- /dev/null
+++ b/src/rand.c
@@ -0,0 +1,93 @@
+/* Pseudo random number generation functions derived from the drand48()
+ * function obtained from pysam source code.
+ *
+ * This functions are used in order to replace the default math.random()
+ * Lua implementation with something having exactly the same behavior
+ * across different systems (by default Lua uses libc's rand() that is not
+ * required to implement a specific PRNG generating the same sequence
+ * in different systems if seeded with the same integer).
+ *
+ * The original code appears to be under the public domain.
+ * I modified it removing the non needed functions and all the
+ * 1960-style C coding stuff...
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2010-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#define N 16
+#define MASK ((1 << (N - 1)) + (1 << (N - 1)) - 1)
+#define LOW(x) ((unsigned)(x) & MASK)
+#define HIGH(x) LOW((x) >> N)
+#define MUL(x, y, z) { int32_t l = (long)(x) * (long)(y); \
+ (z)[0] = LOW(l); (z)[1] = HIGH(l); }
+#define CARRY(x, y) ((int32_t)(x) + (long)(y) > MASK)
+#define ADDEQU(x, y, z) (z = CARRY(x, (y)), x = LOW(x + (y)))
+#define X0 0x330E
+#define X1 0xABCD
+#define X2 0x1234
+#define A0 0xE66D
+#define A1 0xDEEC
+#define A2 0x5
+#define C 0xB
+#define SET3(x, x0, x1, x2) ((x)[0] = (x0), (x)[1] = (x1), (x)[2] = (x2))
+#define SETLOW(x, y, n) SET3(x, LOW((y)[n]), LOW((y)[(n)+1]), LOW((y)[(n)+2]))
+#define SEED(x0, x1, x2) (SET3(x, x0, x1, x2), SET3(a, A0, A1, A2), c = C)
+#define REST(v) for (i = 0; i < 3; i++) { xsubi[i] = x[i]; x[i] = temp[i]; } \
+ return (v);
+#define HI_BIT (1L << (2 * N - 1))
+
+static uint32_t x[3] = { X0, X1, X2 }, a[3] = { A0, A1, A2 }, c = C;
+static void next(void);
+
+int32_t redisLrand48(void) {
+ next();
+ return (((int32_t)x[2] << (N - 1)) + (x[1] >> 1));
+}
+
+void redisSrand48(int32_t seedval) {
+ SEED(X0, LOW(seedval), HIGH(seedval));
+}
+
+static void next(void) {
+ uint32_t p[2], q[2], r[2], carry0, carry1;
+
+ MUL(a[0], x[0], p);
+ ADDEQU(p[0], c, carry0);
+ ADDEQU(p[1], carry0, carry1);
+ MUL(a[0], x[1], q);
+ ADDEQU(p[1], q[0], carry0);
+ MUL(a[1], x[0], r);
+ x[2] = LOW(carry0 + carry1 + CARRY(p[1], r[0]) + q[1] + r[1] +
+ a[0] * x[2] + a[1] * x[1] + a[2] * x[0]);
+ x[1] = LOW(p[1] + r[0]);
+ x[0] = LOW(p[0]);
+}
diff --git a/src/rand.h b/src/rand.h
new file mode 100644
index 0000000..9884915
--- /dev/null
+++ b/src/rand.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef REDIS_RANDOM_H
+#define REDIS_RANDOM_H
+
+int32_t redisLrand48(void);
+void redisSrand48(int32_t seedval);
+
+#define REDIS_LRAND48_MAX INT32_MAX
+
+#endif
diff --git a/src/rax.c b/src/rax.c
new file mode 100644
index 0000000..287f985
--- /dev/null
+++ b/src/rax.c
@@ -0,0 +1,1927 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Version 1.2 -- 7 February 2019
+ *
+ * Copyright (c) 2017-2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+#include <errno.h>
+#include <math.h>
+#include "rax.h"
+
+#ifndef RAX_MALLOC_INCLUDE
+#define RAX_MALLOC_INCLUDE "rax_malloc.h"
+#endif
+
+#include RAX_MALLOC_INCLUDE
+
+/* This is a special pointer that is guaranteed to never have the same value
+ * of a radix tree node. It's used in order to report "not found" error without
+ * requiring the function to have multiple return values. */
+void *raxNotFound = (void*)"rax-not-found-pointer";
+
+/* -------------------------------- Debugging ------------------------------ */
+
+void raxDebugShowNode(const char *msg, raxNode *n);
+
+/* Turn debugging messages on/off by compiling with RAX_DEBUG_MSG macro on.
+ * When RAX_DEBUG_MSG is defined by default Rax operations will emit a lot
+ * of debugging info to the standard output, however you can still turn
+ * debugging on/off in order to enable it only when you suspect there is an
+ * operation causing a bug using the function raxSetDebugMsg(). */
+#ifdef RAX_DEBUG_MSG
+#define debugf(...) \
+ if (raxDebugMsg) { \
+ printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
+ printf(__VA_ARGS__); \
+ fflush(stdout); \
+ }
+
+#define debugnode(msg,n) raxDebugShowNode(msg,n)
+#else
+#define debugf(...)
+#define debugnode(msg,n)
+#endif
+
+/* By default log debug info if RAX_DEBUG_MSG is defined. */
+static int raxDebugMsg = 1;
+
+/* When debug messages are enabled, turn them on/off dynamically. By
+ * default they are enabled. Set the state to 0 to disable, and 1 to
+ * re-enable. */
+void raxSetDebugMsg(int onoff) {
+ raxDebugMsg = onoff;
+}
+
+/* ------------------------- raxStack functions --------------------------
+ * The raxStack is a simple stack of pointers that is capable of switching
+ * from using a stack-allocated array to dynamic heap once a given number of
+ * items are reached. It is used in order to retain the list of parent nodes
+ * while walking the radix tree in order to implement certain operations that
+ * need to navigate the tree upward.
+ * ------------------------------------------------------------------------- */
+
+/* Initialize the stack. */
+static inline void raxStackInit(raxStack *ts) {
+ ts->stack = ts->static_items;
+ ts->items = 0;
+ ts->maxitems = RAX_STACK_STATIC_ITEMS;
+ ts->oom = 0;
+}
+
+/* Push an item into the stack, returns 1 on success, 0 on out of memory. */
+static inline int raxStackPush(raxStack *ts, void *ptr) {
+ if (ts->items == ts->maxitems) {
+ if (ts->stack == ts->static_items) {
+ ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2);
+ if (ts->stack == NULL) {
+ ts->stack = ts->static_items;
+ ts->oom = 1;
+ errno = ENOMEM;
+ return 0;
+ }
+ memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems);
+ } else {
+ void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2);
+ if (newalloc == NULL) {
+ ts->oom = 1;
+ errno = ENOMEM;
+ return 0;
+ }
+ ts->stack = newalloc;
+ }
+ ts->maxitems *= 2;
+ }
+ ts->stack[ts->items] = ptr;
+ ts->items++;
+ return 1;
+}
+
+/* Pop an item from the stack, the function returns NULL if there are no
+ * items to pop. */
+static inline void *raxStackPop(raxStack *ts) {
+ if (ts->items == 0) return NULL;
+ ts->items--;
+ return ts->stack[ts->items];
+}
+
+/* Return the stack item at the top of the stack without actually consuming
+ * it. */
+static inline void *raxStackPeek(raxStack *ts) {
+ if (ts->items == 0) return NULL;
+ return ts->stack[ts->items-1];
+}
+
+/* Free the stack in case we used heap allocation. */
+static inline void raxStackFree(raxStack *ts) {
+ if (ts->stack != ts->static_items) rax_free(ts->stack);
+}
+
+/* ----------------------------------------------------------------------------
+ * Radix tree implementation
+ * --------------------------------------------------------------------------*/
+
+/* Return the padding needed in the characters section of a node having size
+ * 'nodesize'. The padding is needed to store the child pointers to aligned
+ * addresses. Note that we add 4 to the node size because the node has a four
+ * bytes header. */
+#define raxPadding(nodesize) ((sizeof(void*)-(((nodesize)+4) % sizeof(void*))) & (sizeof(void*)-1))
+
+/* Return the pointer to the last child pointer in a node. For the compressed
+ * nodes this is the only child pointer. */
+#define raxNodeLastChildPtr(n) ((raxNode**) ( \
+ ((char*)(n)) + \
+ raxNodeCurrentLength(n) - \
+ sizeof(raxNode*) - \
+ (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \
+))
+
+/* Return the pointer to the first child pointer. */
+#define raxNodeFirstChildPtr(n) ((raxNode**) ( \
+ (n)->data + \
+ (n)->size + \
+ raxPadding((n)->size)))
+
+/* Return the current total size of the node. Note that the second line
+ * computes the padding after the string of characters, needed in order to
+ * save pointers to aligned addresses. */
+#define raxNodeCurrentLength(n) ( \
+ sizeof(raxNode)+(n)->size+ \
+ raxPadding((n)->size)+ \
+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \
+ (((n)->iskey && !(n)->isnull)*sizeof(void*)) \
+)
+
+/* Allocate a new non compressed node with the specified number of children.
+ * If datafield is true, the allocation is made large enough to hold the
+ * associated data pointer.
+ * Returns the new node pointer. On out of memory NULL is returned. */
+raxNode *raxNewNode(size_t children, int datafield) {
+ size_t nodesize = sizeof(raxNode)+children+raxPadding(children)+
+ sizeof(raxNode*)*children;
+ if (datafield) nodesize += sizeof(void*);
+ raxNode *node = rax_malloc(nodesize);
+ if (node == NULL) return NULL;
+ node->iskey = 0;
+ node->isnull = 0;
+ node->iscompr = 0;
+ node->size = children;
+ return node;
+}
+
+/* Allocate a new rax and return its pointer. On out of memory the function
+ * returns NULL. */
+rax *raxNew(void) {
+ rax *rax = rax_malloc(sizeof(*rax));
+ if (rax == NULL) return NULL;
+ rax->numele = 0;
+ rax->numnodes = 1;
+ rax->head = raxNewNode(0,0);
+ if (rax->head == NULL) {
+ rax_free(rax);
+ return NULL;
+ } else {
+ return rax;
+ }
+}
+
+/* realloc the node to make room for auxiliary data in order
+ * to store an item in that node. On out of memory NULL is returned. */
+raxNode *raxReallocForData(raxNode *n, void *data) {
+ if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */
+ size_t curlen = raxNodeCurrentLength(n);
+ return rax_realloc(n,curlen+sizeof(void*));
+}
+
+/* Set the node auxiliary data to the specified pointer. */
+void raxSetData(raxNode *n, void *data) {
+ n->iskey = 1;
+ if (data != NULL) {
+ n->isnull = 0;
+ void **ndata = (void**)
+ ((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
+ memcpy(ndata,&data,sizeof(data));
+ } else {
+ n->isnull = 1;
+ }
+}
+
+/* Get the node auxiliary data. */
+void *raxGetData(raxNode *n) {
+ if (n->isnull) return NULL;
+ void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
+ void *data;
+ memcpy(&data,ndata,sizeof(data));
+ return data;
+}
+
+/* Add a new child to the node 'n' representing the character 'c' and return
+ * its new pointer, as well as the child pointer by reference. Additionally
+ * '***parentlink' is populated with the raxNode pointer-to-pointer of where
+ * the new child was stored, which is useful for the caller to replace the
+ * child pointer if it gets reallocated.
+ *
+ * On success the new parent node pointer is returned (it may change because
+ * of the realloc, so the caller should discard 'n' and use the new value).
+ * On out of memory NULL is returned, and the old node is still valid. */
+raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) {
+ assert(n->iscompr == 0);
+
+ size_t curlen = raxNodeCurrentLength(n);
+ n->size++;
+ size_t newlen = raxNodeCurrentLength(n);
+ n->size--; /* For now restore the original size. We'll update it only on
+ success at the end. */
+
+ /* Alloc the new child we will link to 'n'. */
+ raxNode *child = raxNewNode(0,0);
+ if (child == NULL) return NULL;
+
+ /* Make space in the original node. */
+ raxNode *newn = rax_realloc(n,newlen);
+ if (newn == NULL) {
+ rax_free(child);
+ return NULL;
+ }
+ n = newn;
+
+ /* After the reallocation, we have up to 8/16 (depending on the system
+ * pointer size, and the required node padding) bytes at the end, that is,
+ * the additional char in the 'data' section, plus one pointer to the new
+ * child, plus the padding needed in order to store addresses into aligned
+ * locations.
+ *
+ * So if we start with the following node, having "abde" edges.
+ *
+ * Note:
+ * - We assume 4 bytes pointer for simplicity.
+ * - Each space below corresponds to one byte
+ *
+ * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|
+ *
+ * After the reallocation we need: 1 byte for the new edge character
+ * plus 4 bytes for a new child pointer (assuming 32 bit machine).
+ * However after adding 1 byte to the edge char, the header + the edge
+ * characters are no longer aligned, so we also need 3 bytes of padding.
+ * In total the reallocation will add 1+4+3 bytes = 8 bytes:
+ *
+ * (Blank bytes are represented by ".")
+ *
+ * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|[....][....]
+ *
+ * Let's find where to insert the new child in order to make sure
+ * it is inserted in-place lexicographically. Assuming we are adding
+ * a child "c" in our case pos will be = 2 after the end of the following
+ * loop. */
+ int pos;
+ for (pos = 0; pos < n->size; pos++) {
+ if (n->data[pos] > c) break;
+ }
+
+ /* Now, if present, move auxiliary data pointer at the end
+ * so that we can mess with the other data without overwriting it.
+ * We will obtain something like that:
+ *
+ * [HDR*][abde][Aptr][Bptr][Dptr][Eptr][....][....]|AUXP|
+ */
+ unsigned char *src, *dst;
+ if (n->iskey && !n->isnull) {
+ src = ((unsigned char*)n+curlen-sizeof(void*));
+ dst = ((unsigned char*)n+newlen-sizeof(void*));
+ memmove(dst,src,sizeof(void*));
+ }
+
+ /* Compute the "shift", that is, how many bytes we need to move the
+ * pointers section forward because of the addition of the new child
+ * byte in the string section. Note that if we had no padding, that
+ * would be always "1", since we are adding a single byte in the string
+ * section of the node (where now there is "abde" basically).
+ *
+ * However we have padding, so it could be zero, or up to 8.
+ *
+ * Another way to think at the shift is, how many bytes we need to
+ * move child pointers forward *other than* the obvious sizeof(void*)
+ * needed for the additional pointer itself. */
+ size_t shift = newlen - curlen - sizeof(void*);
+
+ /* We said we are adding a node with edge 'c'. The insertion
+ * point is between 'b' and 'd', so the 'pos' variable value is
+ * the index of the first child pointer that we need to move forward
+ * to make space for our new pointer.
+ *
+ * To start, move all the child pointers after the insertion point
+ * of shift+sizeof(pointer) bytes on the right, to obtain:
+ *
+ * [HDR*][abde][Aptr][Bptr][....][....][Dptr][Eptr]|AUXP|
+ */
+ src = n->data+n->size+
+ raxPadding(n->size)+
+ sizeof(raxNode*)*pos;
+ memmove(src+shift+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos));
+
+ /* Move the pointers to the left of the insertion position as well. Often
+ * we don't need to do anything if there was already some padding to use. In
+ * that case the final destination of the pointers will be the same, however
+ * in our example there was no pre-existing padding, so we added one byte
+ * plus three bytes of padding. After the next memmove() things will look
+ * like that:
+ *
+ * [HDR*][abde][....][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
+ */
+ if (shift) {
+ src = (unsigned char*) raxNodeFirstChildPtr(n);
+ memmove(src+shift,src,sizeof(raxNode*)*pos);
+ }
+
+ /* Now make the space for the additional char in the data section,
+ * but also move the pointers before the insertion point to the right
+ * by shift bytes, in order to obtain the following:
+ *
+ * [HDR*][ab.d][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
+ */
+ src = n->data+pos;
+ memmove(src+1,src,n->size-pos);
+
+ /* We can now set the character and its child node pointer to get:
+ *
+ * [HDR*][abcd][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
+ * [HDR*][abcd][e...][Aptr][Bptr][Cptr][Dptr][Eptr]|AUXP|
+ */
+ n->data[pos] = c;
+ n->size++;
+ src = (unsigned char*) raxNodeFirstChildPtr(n);
+ raxNode **childfield = (raxNode**)(src+sizeof(raxNode*)*pos);
+ memcpy(childfield,&child,sizeof(child));
+ *childptr = child;
+ *parentlink = childfield;
+ return n;
+}
+
+/* Turn the node 'n', that must be a node without any children, into a
+ * compressed node representing a set of nodes linked one after the other
+ * and having exactly one child each. The node can be a key or not: this
+ * property and the associated value if any will be preserved.
+ *
+ * The function also returns a child node, since the last node of the
+ * compressed chain cannot be part of the chain: it has zero children while
+ * we can only compress inner nodes with exactly one child each. */
+raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) {
+ assert(n->size == 0 && n->iscompr == 0);
+ void *data = NULL; /* Initialized only to avoid warnings. */
+ size_t newsize;
+
+ debugf("Compress node: %.*s\n", (int)len,s);
+
+ /* Allocate the child to link to this node. */
+ *child = raxNewNode(0,0);
+ if (*child == NULL) return NULL;
+
+ /* Make space in the parent node. */
+ newsize = sizeof(raxNode)+len+raxPadding(len)+sizeof(raxNode*);
+ if (n->iskey) {
+ data = raxGetData(n); /* To restore it later. */
+ if (!n->isnull) newsize += sizeof(void*);
+ }
+ raxNode *newn = rax_realloc(n,newsize);
+ if (newn == NULL) {
+ rax_free(*child);
+ return NULL;
+ }
+ n = newn;
+
+ n->iscompr = 1;
+ n->size = len;
+ memcpy(n->data,s,len);
+ if (n->iskey) raxSetData(n,data);
+ raxNode **childfield = raxNodeLastChildPtr(n);
+ memcpy(childfield,child,sizeof(*child));
+ return n;
+}
+
+/* Low level function that walks the tree looking for the string
+ * 's' of 'len' bytes. The function returns the number of characters
+ * of the key that was possible to process: if the returned integer
+ * is the same as 'len', then it means that the node corresponding to the
+ * string was found (however it may not be a key in case the node->iskey is
+ * zero or if simply we stopped in the middle of a compressed node, so that
+ * 'splitpos' is non zero).
+ *
+ * Otherwise if the returned integer is not the same as 'len', there was an
+ * early stop during the tree walk because of a character mismatch.
+ *
+ * The node where the search ended (because the full string was processed
+ * or because there was an early stop) is returned by reference as
+ * '*stopnode' if the passed pointer is not NULL. This node link in the
+ * parent's node is returned as '*plink' if not NULL. Finally, if the
+ * search stopped in a compressed node, '*splitpos' returns the index
+ * inside the compressed node where the search ended. This is useful to
+ * know where to split the node for insertion.
+ *
+ * Note that when we stop in the middle of a compressed node with
+ * a perfect match, this function will return a length equal to the
+ * 'len' argument (all the key matched), and will return a *splitpos which is
+ * always positive (that will represent the index of the character immediately
+ * *after* the last match in the current compressed node).
+ *
+ * When instead we stop at a compressed node and *splitpos is zero, it
+ * means that the current node represents the key (that is, none of the
+ * compressed node characters are needed to represent the key, just all
+ * its parents nodes). */
+static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) {
+ raxNode *h = rax->head;
+ raxNode **parentlink = &rax->head;
+
+ size_t i = 0; /* Position in the string. */
+ size_t j = 0; /* Position in the node children (or bytes if compressed).*/
+ while(h->size && i < len) {
+ debugnode("Lookup current node",h);
+ unsigned char *v = h->data;
+
+ if (h->iscompr) {
+ for (j = 0; j < h->size && i < len; j++, i++) {
+ if (v[j] != s[i]) break;
+ }
+ if (j != h->size) break;
+ } else {
+ /* Even when h->size is large, linear scan provides good
+ * performances compared to other approaches that are in theory
+ * more sounding, like performing a binary search. */
+ for (j = 0; j < h->size; j++) {
+ if (v[j] == s[i]) break;
+ }
+ if (j == h->size) break;
+ i++;
+ }
+
+ if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */
+ raxNode **children = raxNodeFirstChildPtr(h);
+ if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */
+ memcpy(&h,children+j,sizeof(h));
+ parentlink = children+j;
+ j = 0; /* If the new node is non compressed and we do not
+ iterate again (since i == len) set the split
+ position to 0 to signal this node represents
+ the searched key. */
+ }
+ debugnode("Lookup stop node is",h);
+ if (stopnode) *stopnode = h;
+ if (plink) *plink = parentlink;
+ if (splitpos && h->iscompr) *splitpos = j;
+ return i;
+}
+
+/* Insert the element 's' of size 'len', setting as auxiliary data
+ * the pointer 'data'. If the element is already present, the associated
+ * data is updated (only if 'overwrite' is set to 1), and 0 is returned,
+ * otherwise the element is inserted and 1 is returned. On out of memory the
+ * function returns 0 as well but sets errno to ENOMEM, otherwise errno will
+ * be set to 0.
+ */
+int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) {
+ size_t i;
+ int j = 0; /* Split position. If raxLowWalk() stops in a compressed
+ node, the index 'j' represents the char we stopped within the
+ compressed node, that is, the position where to split the
+ node for insertion. */
+ raxNode *h, **parentlink;
+
+ debugf("### Insert %.*s with value %p\n", (int)len, s, data);
+ i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL);
+
+ /* If i == len we walked following the whole string. If we are not
+ * in the middle of a compressed node, the string is either already
+ * inserted or this middle node is currently not a key, but can represent
+ * our key. We have just to reallocate the node and make space for the
+ * data pointer. */
+ if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {
+ debugf("### Insert: node representing key exists\n");
+ /* Make space for the value pointer if needed. */
+ if (!h->iskey || (h->isnull && overwrite)) {
+ h = raxReallocForData(h,data);
+ if (h) memcpy(parentlink,&h,sizeof(h));
+ }
+ if (h == NULL) {
+ errno = ENOMEM;
+ return 0;
+ }
+
+ /* Update the existing key if there is already one. */
+ if (h->iskey) {
+ if (old) *old = raxGetData(h);
+ if (overwrite) raxSetData(h,data);
+ errno = 0;
+ return 0; /* Element already exists. */
+ }
+
+ /* Otherwise set the node as a key. Note that raxSetData()
+ * will set h->iskey. */
+ raxSetData(h,data);
+ rax->numele++;
+ return 1; /* Element inserted. */
+ }
+
+ /* If the node we stopped at is a compressed node, we need to
+ * split it before to continue.
+ *
+ * Splitting a compressed node have a few possible cases.
+ * Imagine that the node 'h' we are currently at is a compressed
+ * node containing the string "ANNIBALE" (it means that it represents
+ * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
+ * pointer of this node pointing at the 'E' node, because remember that
+ * we have characters at the edges of the graph, not inside the nodes
+ * themselves.
+ *
+ * In order to show a real case imagine our node to also point to
+ * another compressed node, that finally points at the node without
+ * children, representing 'O':
+ *
+ * "ANNIBALE" -> "SCO" -> []
+ *
+ * When inserting we may face the following cases. Note that all the cases
+ * require the insertion of a non compressed node with exactly two
+ * children, except for the last case which just requires splitting a
+ * compressed node.
+ *
+ * 1) Inserting "ANNIENTARE"
+ *
+ * |B| -> "ALE" -> "SCO" -> []
+ * "ANNI" -> |-|
+ * |E| -> (... continue algo ...) "NTARE" -> []
+ *
+ * 2) Inserting "ANNIBALI"
+ *
+ * |E| -> "SCO" -> []
+ * "ANNIBAL" -> |-|
+ * |I| -> (... continue algo ...) []
+ *
+ * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node)
+ *
+ * |N| -> "NIBALE" -> "SCO" -> []
+ * |A| -> |-|
+ * |G| -> (... continue algo ...) |O| -> []
+ *
+ * 4) Inserting "CIAO"
+ *
+ * |A| -> "NNIBALE" -> "SCO" -> []
+ * |-|
+ * |C| -> (... continue algo ...) "IAO" -> []
+ *
+ * 5) Inserting "ANNI"
+ *
+ * "ANNI" -> "BALE" -> "SCO" -> []
+ *
+ * The final algorithm for insertion covering all the above cases is as
+ * follows.
+ *
+ * ============================= ALGO 1 =============================
+ *
+ * For the above cases 1 to 4, that is, all cases where we stopped in
+ * the middle of a compressed node for a character mismatch, do:
+ *
+ * Let $SPLITPOS be the zero-based index at which, in the
+ * compressed node array of characters, we found the mismatching
+ * character. For example if the node contains "ANNIBALE" and we add
+ * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the
+ * mismatching character is found.
+ *
+ * 1. Save the current compressed node $NEXT pointer (the pointer to the
+ * child element, that is always present in compressed nodes).
+ *
+ * 2. Create "split node" having as child the non common letter
+ * at the compressed node. The other non common letter (at the key)
+ * will be added later as we continue the normal insertion algorithm
+ * at step "6".
+ *
+ * 3a. IF $SPLITPOS == 0:
+ * Replace the old node with the split node, by copying the auxiliary
+ * data if any. Fix parent's reference. Free old node eventually
+ * (we still need its data for the next steps of the algorithm).
+ *
+ * 3b. IF $SPLITPOS != 0:
+ * Trim the compressed node (reallocating it as well) in order to
+ * contain $splitpos characters. Change child pointer in order to link
+ * to the split node. If new compressed node len is just 1, set
+ * iscompr to 0 (layout is the same). Fix parent's reference.
+ *
+ * 4a. IF the postfix len (the length of the remaining string of the
+ * original compressed node after the split character) is non zero,
+ * create a "postfix node". If the postfix node has just one character
+ * set iscompr to 0, otherwise iscompr to 1. Set the postfix node
+ * child pointer to $NEXT.
+ *
+ * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer.
+ *
+ * 5. Set child[0] of split node to postfix node.
+ *
+ * 6. Set the split node as the current node, set current index at child[1]
+ * and continue insertion algorithm as usually.
+ *
+ * ============================= ALGO 2 =============================
+ *
+ * For case 5, that is, if we stopped in the middle of a compressed
+ * node but no mismatch was found, do:
+ *
+ * Let $SPLITPOS be the zero-based index at which, in the
+ * compressed node array of characters, we stopped iterating because
+ * there were no more keys character to match. So in the example of
+ * the node "ANNIBALE", adding the string "ANNI", the $SPLITPOS is 4.
+ *
+ * 1. Save the current compressed node $NEXT pointer (the pointer to the
+ * child element, that is always present in compressed nodes).
+ *
+ * 2. Create a "postfix node" containing all the characters from $SPLITPOS
+ * to the end. Use $NEXT as the postfix node child pointer.
+ * If the postfix node length is 1, set iscompr to 0.
+ * Set the node as a key with the associated value of the new
+ * inserted key.
+ *
+ * 3. Trim the current node to contain the first $SPLITPOS characters.
+ * As usually if the new node length is just 1, set iscompr to 0.
+ * Take the iskey / associated value as it was in the original node.
+ * Fix the parent's reference.
+ *
+ * 4. Set the postfix node as the only child pointer of the trimmed
+ * node created at step 1.
+ */
+
+ /* ------------------------- ALGORITHM 1 --------------------------- */
+ if (h->iscompr && i != len) {
+ debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n",
+ h->size, h->data, (void*)h);
+ debugf("Still to insert: %.*s\n", (int)(len-i), s+i);
+ debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]);
+ debugf("Other (key) letter is '%c'\n", s[i]);
+
+ /* 1: Save next pointer. */
+ raxNode **childfield = raxNodeLastChildPtr(h);
+ raxNode *next;
+ memcpy(&next,childfield,sizeof(next));
+ debugf("Next is %p\n", (void*)next);
+ debugf("iskey %d\n", h->iskey);
+ if (h->iskey) {
+ debugf("key value is %p\n", raxGetData(h));
+ }
+
+ /* Set the length of the additional nodes we will need. */
+ size_t trimmedlen = j;
+ size_t postfixlen = h->size - j - 1;
+ int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;
+ size_t nodesize;
+
+ /* 2: Create the split node. Also allocate the other nodes we'll need
+ * ASAP, so that it will be simpler to handle OOM. */
+ raxNode *splitnode = raxNewNode(1, split_node_is_key);
+ raxNode *trimmed = NULL;
+ raxNode *postfix = NULL;
+
+ if (trimmedlen) {
+ nodesize = sizeof(raxNode)+trimmedlen+raxPadding(trimmedlen)+
+ sizeof(raxNode*);
+ if (h->iskey && !h->isnull) nodesize += sizeof(void*);
+ trimmed = rax_malloc(nodesize);
+ }
+
+ if (postfixlen) {
+ nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
+ sizeof(raxNode*);
+ postfix = rax_malloc(nodesize);
+ }
+
+ /* OOM? Abort now that the tree is untouched. */
+ if (splitnode == NULL ||
+ (trimmedlen && trimmed == NULL) ||
+ (postfixlen && postfix == NULL))
+ {
+ rax_free(splitnode);
+ rax_free(trimmed);
+ rax_free(postfix);
+ errno = ENOMEM;
+ return 0;
+ }
+ splitnode->data[0] = h->data[j];
+
+ if (j == 0) {
+ /* 3a: Replace the old node with the split node. */
+ if (h->iskey) {
+ void *ndata = raxGetData(h);
+ raxSetData(splitnode,ndata);
+ }
+ memcpy(parentlink,&splitnode,sizeof(splitnode));
+ } else {
+ /* 3b: Trim the compressed node. */
+ trimmed->size = j;
+ memcpy(trimmed->data,h->data,j);
+ trimmed->iscompr = j > 1 ? 1 : 0;
+ trimmed->iskey = h->iskey;
+ trimmed->isnull = h->isnull;
+ if (h->iskey && !h->isnull) {
+ void *ndata = raxGetData(h);
+ raxSetData(trimmed,ndata);
+ }
+ raxNode **cp = raxNodeLastChildPtr(trimmed);
+ memcpy(cp,&splitnode,sizeof(splitnode));
+ memcpy(parentlink,&trimmed,sizeof(trimmed));
+ parentlink = cp; /* Set parentlink to splitnode parent. */
+ rax->numnodes++;
+ }
+
+ /* 4: Create the postfix node: what remains of the original
+ * compressed node after the split. */
+ if (postfixlen) {
+ /* 4a: create a postfix node. */
+ postfix->iskey = 0;
+ postfix->isnull = 0;
+ postfix->size = postfixlen;
+ postfix->iscompr = postfixlen > 1;
+ memcpy(postfix->data,h->data+j+1,postfixlen);
+ raxNode **cp = raxNodeLastChildPtr(postfix);
+ memcpy(cp,&next,sizeof(next));
+ rax->numnodes++;
+ } else {
+ /* 4b: just use next as postfix node. */
+ postfix = next;
+ }
+
+ /* 5: Set splitnode first child as the postfix node. */
+ raxNode **splitchild = raxNodeLastChildPtr(splitnode);
+ memcpy(splitchild,&postfix,sizeof(postfix));
+
+ /* 6. Continue insertion: this will cause the splitnode to
+ * get a new child (the non common character at the currently
+ * inserted key). */
+ rax_free(h);
+ h = splitnode;
+ } else if (h->iscompr && i == len) {
+ /* ------------------------- ALGORITHM 2 --------------------------- */
+ debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n",
+ h->size, h->data, (void*)h, j);
+
+ /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */
+ size_t postfixlen = h->size - j;
+ size_t nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
+ sizeof(raxNode*);
+ if (data != NULL) nodesize += sizeof(void*);
+ raxNode *postfix = rax_malloc(nodesize);
+
+ nodesize = sizeof(raxNode)+j+raxPadding(j)+sizeof(raxNode*);
+ if (h->iskey && !h->isnull) nodesize += sizeof(void*);
+ raxNode *trimmed = rax_malloc(nodesize);
+
+ if (postfix == NULL || trimmed == NULL) {
+ rax_free(postfix);
+ rax_free(trimmed);
+ errno = ENOMEM;
+ return 0;
+ }
+
+ /* 1: Save next pointer. */
+ raxNode **childfield = raxNodeLastChildPtr(h);
+ raxNode *next;
+ memcpy(&next,childfield,sizeof(next));
+
+ /* 2: Create the postfix node. */
+ postfix->size = postfixlen;
+ postfix->iscompr = postfixlen > 1;
+ postfix->iskey = 1;
+ postfix->isnull = 0;
+ memcpy(postfix->data,h->data+j,postfixlen);
+ raxSetData(postfix,data);
+ raxNode **cp = raxNodeLastChildPtr(postfix);
+ memcpy(cp,&next,sizeof(next));
+ rax->numnodes++;
+
+ /* 3: Trim the compressed node. */
+ trimmed->size = j;
+ trimmed->iscompr = j > 1;
+ trimmed->iskey = 0;
+ trimmed->isnull = 0;
+ memcpy(trimmed->data,h->data,j);
+ memcpy(parentlink,&trimmed,sizeof(trimmed));
+ if (h->iskey) {
+ void *aux = raxGetData(h);
+ raxSetData(trimmed,aux);
+ }
+
+ /* Fix the trimmed node child pointer to point to
+ * the postfix node. */
+ cp = raxNodeLastChildPtr(trimmed);
+ memcpy(cp,&postfix,sizeof(postfix));
+
+ /* Finish! We don't need to continue with the insertion
+ * algorithm for ALGO 2. The key is already inserted. */
+ rax->numele++;
+ rax_free(h);
+ return 1; /* Key inserted. */
+ }
+
+ /* We walked the radix tree as far as we could, but still there are left
+ * chars in our string. We need to insert the missing nodes. */
+ while(i < len) {
+ raxNode *child;
+
+ /* If this node is going to have a single child, and there
+ * are other characters, so that that would result in a chain
+ * of single-childed nodes, turn it into a compressed node. */
+ if (h->size == 0 && len-i > 1) {
+ debugf("Inserting compressed node\n");
+ size_t comprsize = len-i;
+ if (comprsize > RAX_NODE_MAX_SIZE)
+ comprsize = RAX_NODE_MAX_SIZE;
+ raxNode *newh = raxCompressNode(h,s+i,comprsize,&child);
+ if (newh == NULL) goto oom;
+ h = newh;
+ memcpy(parentlink,&h,sizeof(h));
+ parentlink = raxNodeLastChildPtr(h);
+ i += comprsize;
+ } else {
+ debugf("Inserting normal node\n");
+ raxNode **new_parentlink;
+ raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink);
+ if (newh == NULL) goto oom;
+ h = newh;
+ memcpy(parentlink,&h,sizeof(h));
+ parentlink = new_parentlink;
+ i++;
+ }
+ rax->numnodes++;
+ h = child;
+ }
+ raxNode *newh = raxReallocForData(h,data);
+ if (newh == NULL) goto oom;
+ h = newh;
+ if (!h->iskey) rax->numele++;
+ raxSetData(h,data);
+ memcpy(parentlink,&h,sizeof(h));
+ return 1; /* Element inserted. */
+
+oom:
+ /* This code path handles out of memory after part of the sub-tree was
+ * already modified. Set the node as a key, and then remove it. However we
+ * do that only if the node is a terminal node, otherwise if the OOM
+ * happened reallocating a node in the middle, we don't need to free
+ * anything. */
+ if (h->size == 0) {
+ h->isnull = 1;
+ h->iskey = 1;
+ rax->numele++; /* Compensate the next remove. */
+ assert(raxRemove(rax,s,i,NULL) != 0);
+ }
+ errno = ENOMEM;
+ return 0;
+}
+
+/* Overwriting insert. Just a wrapper for raxGenericInsert() that will
+ * update the element if there is already one for the same key. */
+int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
+ return raxGenericInsert(rax,s,len,data,old,1);
+}
+
+/* Non overwriting insert function: if an element with the same key
+ * exists, the value is not updated and the function returns 0.
+ * This is just a wrapper for raxGenericInsert(). */
+int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
+ return raxGenericInsert(rax,s,len,data,old,0);
+}
+
+/* Find a key in the rax, returns raxNotFound special void pointer value
+ * if the item was not found, otherwise the value associated with the
+ * item is returned. */
+void *raxFind(rax *rax, unsigned char *s, size_t len) {
+ raxNode *h;
+
+ debugf("### Lookup: %.*s\n", (int)len, s);
+ int splitpos = 0;
+ size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL);
+ if (i != len || (h->iscompr && splitpos != 0) || !h->iskey)
+ return raxNotFound;
+ return raxGetData(h);
+}
+
+/* Return the memory address where the 'parent' node stores the specified
+ * 'child' pointer, so that the caller can update the pointer with another
+ * one if needed. The function assumes it will find a match, otherwise the
+ * operation is an undefined behavior (it will continue scanning the
+ * memory without any bound checking). */
+raxNode **raxFindParentLink(raxNode *parent, raxNode *child) {
+ raxNode **cp = raxNodeFirstChildPtr(parent);
+ raxNode *c;
+ while(1) {
+ memcpy(&c,cp,sizeof(c));
+ if (c == child) break;
+ cp++;
+ }
+ return cp;
+}
+
+/* Low level child removal from node. The new node pointer (after the child
+ * removal) is returned. Note that this function does not fix the pointer
+ * of the parent node in its parent, so this task is up to the caller.
+ * The function never fails for out of memory. */
+raxNode *raxRemoveChild(raxNode *parent, raxNode *child) {
+ debugnode("raxRemoveChild before", parent);
+ /* If parent is a compressed node (having a single child, as for definition
+ * of the data structure), the removal of the child consists into turning
+ * it into a normal node without children. */
+ if (parent->iscompr) {
+ void *data = NULL;
+ if (parent->iskey) data = raxGetData(parent);
+ parent->isnull = 0;
+ parent->iscompr = 0;
+ parent->size = 0;
+ if (parent->iskey) raxSetData(parent,data);
+ debugnode("raxRemoveChild after", parent);
+ return parent;
+ }
+
+ /* Otherwise we need to scan for the child pointer and memmove()
+ * accordingly.
+ *
+ * 1. To start we seek the first element in both the children
+ * pointers and edge bytes in the node. */
+ raxNode **cp = raxNodeFirstChildPtr(parent);
+ raxNode **c = cp;
+ unsigned char *e = parent->data;
+
+ /* 2. Search the child pointer to remove inside the array of children
+ * pointers. */
+ while(1) {
+ raxNode *aux;
+ memcpy(&aux,c,sizeof(aux));
+ if (aux == child) break;
+ c++;
+ e++;
+ }
+
+ /* 3. Remove the edge and the pointer by memmoving the remaining children
+ * pointer and edge bytes one position before. */
+ int taillen = parent->size - (e - parent->data) - 1;
+ debugf("raxRemoveChild tail len: %d\n", taillen);
+ memmove(e,e+1,taillen);
+
+ /* Compute the shift, that is the amount of bytes we should move our
+ * child pointers to the left, since the removal of one edge character
+ * and the corresponding padding change, may change the layout.
+ * We just check if in the old version of the node there was at the
+ * end just a single byte and all padding: in that case removing one char
+ * will remove a whole sizeof(void*) word. */
+ size_t shift = ((parent->size+4) % sizeof(void*)) == 1 ? sizeof(void*) : 0;
+
+ /* Move the children pointers before the deletion point. */
+ if (shift)
+ memmove(((char*)cp)-shift,cp,(parent->size-taillen-1)*sizeof(raxNode**));
+
+ /* Move the remaining "tail" pointers at the right position as well. */
+ size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void*) : 0;
+ memmove(((char*)c)-shift,c+1,taillen*sizeof(raxNode**)+valuelen);
+
+ /* 4. Update size. */
+ parent->size--;
+
+ /* realloc the node according to the theoretical memory usage, to free
+ * data if we are over-allocating right now. */
+ raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent));
+ if (newnode) {
+ debugnode("raxRemoveChild after", newnode);
+ }
+ /* Note: if rax_realloc() fails we just return the old address, which
+ * is valid. */
+ return newnode ? newnode : parent;
+}
+
+/* Remove the specified item. Returns 1 if the item was found and
+ * deleted, 0 otherwise. */
+int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
+ raxNode *h;
+ raxStack ts;
+
+ debugf("### Delete: %.*s\n", (int)len, s);
+ raxStackInit(&ts);
+ int splitpos = 0;
+ size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,&ts);
+ if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) {
+ raxStackFree(&ts);
+ return 0;
+ }
+ if (old) *old = raxGetData(h);
+ h->iskey = 0;
+ rax->numele--;
+
+ /* If this node has no children, the deletion needs to reclaim the
+ * no longer used nodes. This is an iterative process that needs to
+ * walk the three upward, deleting all the nodes with just one child
+ * that are not keys, until the head of the rax is reached or the first
+ * node with more than one child is found. */
+
+ int trycompress = 0; /* Will be set to 1 if we should try to optimize the
+ tree resulting from the deletion. */
+
+ if (h->size == 0) {
+ debugf("Key deleted in node without children. Cleanup needed.\n");
+ raxNode *child = NULL;
+ while(h != rax->head) {
+ child = h;
+ debugf("Freeing child %p [%.*s] key:%d\n", (void*)child,
+ (int)child->size, (char*)child->data, child->iskey);
+ rax_free(child);
+ rax->numnodes--;
+ h = raxStackPop(&ts);
+ /* If this node has more then one child, or actually holds
+ * a key, stop here. */
+ if (h->iskey || (!h->iscompr && h->size != 1)) break;
+ }
+ if (child) {
+ debugf("Unlinking child %p from parent %p\n",
+ (void*)child, (void*)h);
+ raxNode *new = raxRemoveChild(h,child);
+ if (new != h) {
+ raxNode *parent = raxStackPeek(&ts);
+ raxNode **parentlink;
+ if (parent == NULL) {
+ parentlink = &rax->head;
+ } else {
+ parentlink = raxFindParentLink(parent,h);
+ }
+ memcpy(parentlink,&new,sizeof(new));
+ }
+
+ /* If after the removal the node has just a single child
+ * and is not a key, we need to try to compress it. */
+ if (new->size == 1 && new->iskey == 0) {
+ trycompress = 1;
+ h = new;
+ }
+ }
+ } else if (h->size == 1) {
+ /* If the node had just one child, after the removal of the key
+ * further compression with adjacent nodes is potentially possible. */
+ trycompress = 1;
+ }
+
+ /* Don't try node compression if our nodes pointers stack is not
+ * complete because of OOM while executing raxLowWalk() */
+ if (trycompress && ts.oom) trycompress = 0;
+
+ /* Recompression: if trycompress is true, 'h' points to a radix tree node
+ * that changed in a way that could allow to compress nodes in this
+ * sub-branch. Compressed nodes represent chains of nodes that are not
+ * keys and have a single child, so there are two deletion events that
+ * may alter the tree so that further compression is needed:
+ *
+ * 1) A node with a single child was a key and now no longer is a key.
+ * 2) A node with two children now has just one child.
+ *
+ * We try to navigate upward till there are other nodes that can be
+ * compressed, when we reach the upper node which is not a key and has
+ * a single child, we scan the chain of children to collect the
+ * compressible part of the tree, and replace the current node with the
+ * new one, fixing the child pointer to reference the first non
+ * compressible node.
+ *
+ * Example of case "1". A tree stores the keys "FOO" = 1 and
+ * "FOOBAR" = 2:
+ *
+ *
+ * "FOO" -> "BAR" -> [] (2)
+ * (1)
+ *
+ * After the removal of "FOO" the tree can be compressed as:
+ *
+ * "FOOBAR" -> [] (2)
+ *
+ *
+ * Example of case "2". A tree stores the keys "FOOBAR" = 1 and
+ * "FOOTER" = 2:
+ *
+ * |B| -> "AR" -> [] (1)
+ * "FOO" -> |-|
+ * |T| -> "ER" -> [] (2)
+ *
+ * After the removal of "FOOTER" the resulting tree is:
+ *
+ * "FOO" -> |B| -> "AR" -> [] (1)
+ *
+ * That can be compressed into:
+ *
+ * "FOOBAR" -> [] (1)
+ */
+ if (trycompress) {
+ debugf("After removing %.*s:\n", (int)len, s);
+ debugnode("Compression may be needed",h);
+ debugf("Seek start node\n");
+
+ /* Try to reach the upper node that is compressible.
+ * At the end of the loop 'h' will point to the first node we
+ * can try to compress and 'parent' to its parent. */
+ raxNode *parent;
+ while(1) {
+ parent = raxStackPop(&ts);
+ if (!parent || parent->iskey ||
+ (!parent->iscompr && parent->size != 1)) break;
+ h = parent;
+ debugnode("Going up to",h);
+ }
+ raxNode *start = h; /* Compression starting node. */
+
+ /* Scan chain of nodes we can compress. */
+ size_t comprsize = h->size;
+ int nodes = 1;
+ while(h->size != 0) {
+ raxNode **cp = raxNodeLastChildPtr(h);
+ memcpy(&h,cp,sizeof(h));
+ if (h->iskey || (!h->iscompr && h->size != 1)) break;
+ /* Stop here if going to the next node would result into
+ * a compressed node larger than h->size can hold. */
+ if (comprsize + h->size > RAX_NODE_MAX_SIZE) break;
+ nodes++;
+ comprsize += h->size;
+ }
+ if (nodes > 1) {
+ /* If we can compress, create the new node and populate it. */
+ size_t nodesize =
+ sizeof(raxNode)+comprsize+raxPadding(comprsize)+sizeof(raxNode*);
+ raxNode *new = rax_malloc(nodesize);
+ /* An out of memory here just means we cannot optimize this
+ * node, but the tree is left in a consistent state. */
+ if (new == NULL) {
+ raxStackFree(&ts);
+ return 1;
+ }
+ new->iskey = 0;
+ new->isnull = 0;
+ new->iscompr = 1;
+ new->size = comprsize;
+ rax->numnodes++;
+
+ /* Scan again, this time to populate the new node content and
+ * to fix the new node child pointer. At the same time we free
+ * all the nodes that we'll no longer use. */
+ comprsize = 0;
+ h = start;
+ while(h->size != 0) {
+ memcpy(new->data+comprsize,h->data,h->size);
+ comprsize += h->size;
+ raxNode **cp = raxNodeLastChildPtr(h);
+ raxNode *tofree = h;
+ memcpy(&h,cp,sizeof(h));
+ rax_free(tofree); rax->numnodes--;
+ if (h->iskey || (!h->iscompr && h->size != 1)) break;
+ }
+ debugnode("New node",new);
+
+ /* Now 'h' points to the first node that we still need to use,
+ * so our new node child pointer will point to it. */
+ raxNode **cp = raxNodeLastChildPtr(new);
+ memcpy(cp,&h,sizeof(h));
+
+ /* Fix parent link. */
+ if (parent) {
+ raxNode **parentlink = raxFindParentLink(parent,start);
+ memcpy(parentlink,&new,sizeof(new));
+ } else {
+ rax->head = new;
+ }
+
+ debugf("Compressed %d nodes, %d total bytes\n",
+ nodes, (int)comprsize);
+ }
+ }
+ raxStackFree(&ts);
+ return 1;
+}
+
+/* This is the core of raxFree(): performs a depth-first scan of the
+ * tree and releases all the nodes found. */
+void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) {
+ debugnode("free traversing",n);
+ int numchildren = n->iscompr ? 1 : n->size;
+ raxNode **cp = raxNodeLastChildPtr(n);
+ while(numchildren--) {
+ raxNode *child;
+ memcpy(&child,cp,sizeof(child));
+ raxRecursiveFree(rax,child,free_callback);
+ cp--;
+ }
+ debugnode("free depth-first",n);
+ if (free_callback && n->iskey && !n->isnull)
+ free_callback(raxGetData(n));
+ rax_free(n);
+ rax->numnodes--;
+}
+
+/* Free a whole radix tree, calling the specified callback in order to
+ * free the auxiliary data. */
+void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) {
+ raxRecursiveFree(rax,rax->head,free_callback);
+ assert(rax->numnodes == 0);
+ rax_free(rax);
+}
+
+/* Free a whole radix tree. */
+void raxFree(rax *rax) {
+ raxFreeWithCallback(rax,NULL);
+}
+
+/* ------------------------------- Iterator --------------------------------- */
+
+/* Initialize a Rax iterator. This call should be performed a single time
+ * to initialize the iterator, and must be followed by a raxSeek() call,
+ * otherwise the raxPrev()/raxNext() functions will just return EOF. */
+void raxStart(raxIterator *it, rax *rt) {
+ it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */
+ it->rt = rt;
+ it->key_len = 0;
+ it->key = it->key_static_string;
+ it->key_max = RAX_ITER_STATIC_LEN;
+ it->data = NULL;
+ it->node_cb = NULL;
+ raxStackInit(&it->stack);
+}
+
+/* Append characters at the current key string of the iterator 'it'. This
+ * is a low level function used to implement the iterator, not callable by
+ * the user. Returns 0 on out of memory, otherwise 1 is returned. */
+int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) {
+ if (len == 0) return 1;
+ if (it->key_max < it->key_len+len) {
+ unsigned char *old = (it->key == it->key_static_string) ? NULL :
+ it->key;
+ size_t new_max = (it->key_len+len)*2;
+ it->key = rax_realloc(old,new_max);
+ if (it->key == NULL) {
+ it->key = (!old) ? it->key_static_string : old;
+ errno = ENOMEM;
+ return 0;
+ }
+ if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len);
+ it->key_max = new_max;
+ }
+ /* Use memmove since there could be an overlap between 's' and
+ * it->key when we use the current key in order to re-seek. */
+ memmove(it->key+it->key_len,s,len);
+ it->key_len += len;
+ return 1;
+}
+
+/* Remove the specified number of chars from the right of the current
+ * iterator key. */
+void raxIteratorDelChars(raxIterator *it, size_t count) {
+ it->key_len -= count;
+}
+
+/* Do an iteration step towards the next element. At the end of the step the
+ * iterator key will represent the (new) current key. If it is not possible
+ * to step in the specified direction since there are no longer elements, the
+ * iterator is flagged with RAX_ITER_EOF.
+ *
+ * If 'noup' is true the function starts directly scanning for the next
+ * lexicographically smaller children, and the current node is already assumed
+ * to be the parent of the last key node, so the first operation to go back to
+ * the parent will be skipped. This option is used by raxSeek() when
+ * implementing seeking a non existing element with the ">" or "<" options:
+ * the starting node is not a key in that particular case, so we start the scan
+ * from a node that does not represent the key set.
+ *
+ * The function returns 1 on success or 0 on out of memory. */
+int raxIteratorNextStep(raxIterator *it, int noup) {
+ if (it->flags & RAX_ITER_EOF) {
+ return 1;
+ } else if (it->flags & RAX_ITER_JUST_SEEKED) {
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ return 1;
+ }
+
+ /* Save key len, stack items and the node where we are currently
+ * so that on iterator EOF we can restore the current key and state. */
+ size_t orig_key_len = it->key_len;
+ size_t orig_stack_items = it->stack.items;
+ raxNode *orig_node = it->node;
+
+ while(1) {
+ int children = it->node->iscompr ? 1 : it->node->size;
+ if (!noup && children) {
+ debugf("GO DEEPER\n");
+ /* Seek the lexicographically smaller key in this subtree, which
+ * is the first one found always going towards the first child
+ * of every successive node. */
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ raxNode **cp = raxNodeFirstChildPtr(it->node);
+ if (!raxIteratorAddChars(it,it->node->data,
+ it->node->iscompr ? it->node->size : 1)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ /* Call the node callback if any, and replace the node pointer
+ * if the callback returns true. */
+ if (it->node_cb && it->node_cb(&it->node))
+ memcpy(cp,&it->node,sizeof(it->node));
+ /* For "next" step, stop every time we find a key along the
+ * way, since the key is lexicographically smaller compared to
+ * what follows in the sub-children. */
+ if (it->node->iskey) {
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+ } else {
+ /* If we finished exploring the previous sub-tree, switch to the
+ * new one: go upper until a node is found where there are
+ * children representing keys lexicographically greater than the
+ * current key. */
+ while(1) {
+ int old_noup = noup;
+
+ /* Already on head? Can't go up, iteration finished. */
+ if (!noup && it->node == it->rt->head) {
+ it->flags |= RAX_ITER_EOF;
+ it->stack.items = orig_stack_items;
+ it->key_len = orig_key_len;
+ it->node = orig_node;
+ return 1;
+ }
+ /* If there are no children at the current node, try parent's
+ * next child. */
+ unsigned char prevchild = it->key[it->key_len-1];
+ if (!noup) {
+ it->node = raxStackPop(&it->stack);
+ } else {
+ noup = 0;
+ }
+ /* Adjust the current key to represent the node we are
+ * at. */
+ int todel = it->node->iscompr ? it->node->size : 1;
+ raxIteratorDelChars(it,todel);
+
+ /* Try visiting the next child if there was at least one
+ * additional child. */
+ if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
+ raxNode **cp = raxNodeFirstChildPtr(it->node);
+ int i = 0;
+ while (i < it->node->size) {
+ debugf("SCAN NEXT %c\n", it->node->data[i]);
+ if (it->node->data[i] > prevchild) break;
+ i++;
+ cp++;
+ }
+ if (i != it->node->size) {
+ debugf("SCAN found a new node\n");
+ raxIteratorAddChars(it,it->node->data+i,1);
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ /* Call the node callback if any, and replace the node
+ * pointer if the callback returns true. */
+ if (it->node_cb && it->node_cb(&it->node))
+ memcpy(cp,&it->node,sizeof(it->node));
+ if (it->node->iskey) {
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+/* Seek the greatest key in the subtree at the current node. Return 0 on
+ * out of memory, otherwise 1. This is a helper function for different
+ * iteration functions below. */
+int raxSeekGreatest(raxIterator *it) {
+ while(it->node->size) {
+ if (it->node->iscompr) {
+ if (!raxIteratorAddChars(it,it->node->data,
+ it->node->size)) return 0;
+ } else {
+ if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1))
+ return 0;
+ }
+ raxNode **cp = raxNodeLastChildPtr(it->node);
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ }
+ return 1;
+}
+
+/* Like raxIteratorNextStep() but implements an iteration step moving
+ * to the lexicographically previous element. The 'noup' option has a similar
+ * effect to the one of raxIteratorNextStep(). */
+int raxIteratorPrevStep(raxIterator *it, int noup) {
+ if (it->flags & RAX_ITER_EOF) {
+ return 1;
+ } else if (it->flags & RAX_ITER_JUST_SEEKED) {
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ return 1;
+ }
+
+ /* Save key len, stack items and the node where we are currently
+ * so that on iterator EOF we can restore the current key and state. */
+ size_t orig_key_len = it->key_len;
+ size_t orig_stack_items = it->stack.items;
+ raxNode *orig_node = it->node;
+
+ while(1) {
+ int old_noup = noup;
+
+ /* Already on head? Can't go up, iteration finished. */
+ if (!noup && it->node == it->rt->head) {
+ it->flags |= RAX_ITER_EOF;
+ it->stack.items = orig_stack_items;
+ it->key_len = orig_key_len;
+ it->node = orig_node;
+ return 1;
+ }
+
+ unsigned char prevchild = it->key[it->key_len-1];
+ if (!noup) {
+ it->node = raxStackPop(&it->stack);
+ } else {
+ noup = 0;
+ }
+
+ /* Adjust the current key to represent the node we are
+ * at. */
+ int todel = it->node->iscompr ? it->node->size : 1;
+ raxIteratorDelChars(it,todel);
+
+ /* Try visiting the prev child if there is at least one
+ * child. */
+ if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
+ raxNode **cp = raxNodeLastChildPtr(it->node);
+ int i = it->node->size-1;
+ while (i >= 0) {
+ debugf("SCAN PREV %c\n", it->node->data[i]);
+ if (it->node->data[i] < prevchild) break;
+ i--;
+ cp--;
+ }
+ /* If we found a new subtree to explore in this node,
+ * go deeper following all the last children in order to
+ * find the key lexicographically greater. */
+ if (i != -1) {
+ debugf("SCAN found a new node\n");
+ /* Enter the node we just found. */
+ if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0;
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ /* Seek sub-tree max. */
+ if (!raxSeekGreatest(it)) return 0;
+ }
+ }
+
+ /* Return the key: this could be the key we found scanning a new
+ * subtree, or if we did not find a new subtree to explore here,
+ * before giving up with this node, check if it's a key itself. */
+ if (it->node->iskey) {
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+ }
+}
+
+/* Seek an iterator at the specified element.
+ * Return 0 if the seek failed for syntax error or out of memory. Otherwise
+ * 1 is returned. When 0 is returned for out of memory, errno is set to
+ * the ENOMEM value. */
+int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) {
+ int eq = 0, lt = 0, gt = 0, first = 0, last = 0;
+
+ it->stack.items = 0; /* Just resetting. Initialized by raxStart(). */
+ it->flags |= RAX_ITER_JUST_SEEKED;
+ it->flags &= ~RAX_ITER_EOF;
+ it->key_len = 0;
+ it->node = NULL;
+
+ /* Set flags according to the operator used to perform the seek. */
+ if (op[0] == '>') {
+ gt = 1;
+ if (op[1] == '=') eq = 1;
+ } else if (op[0] == '<') {
+ lt = 1;
+ if (op[1] == '=') eq = 1;
+ } else if (op[0] == '=') {
+ eq = 1;
+ } else if (op[0] == '^') {
+ first = 1;
+ } else if (op[0] == '$') {
+ last = 1;
+ } else {
+ errno = 0;
+ return 0; /* Error. */
+ }
+
+ /* If there are no elements, set the EOF condition immediately and
+ * return. */
+ if (it->rt->numele == 0) {
+ it->flags |= RAX_ITER_EOF;
+ return 1;
+ }
+
+ if (first) {
+ /* Seeking the first key greater or equal to the empty string
+ * is equivalent to seeking the smaller key available. */
+ return raxSeek(it,">=",NULL,0);
+ }
+
+ if (last) {
+ /* Find the greatest key taking always the last child till a
+ * final node is found. */
+ it->node = it->rt->head;
+ if (!raxSeekGreatest(it)) return 0;
+ assert(it->node->iskey);
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+
+ /* We need to seek the specified key. What we do here is to actually
+ * perform a lookup, and later invoke the prev/next key code that
+ * we already use for iteration. */
+ int splitpos = 0;
+ size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL,&splitpos,&it->stack);
+
+ /* Return OOM on incomplete stack info. */
+ if (it->stack.oom) return 0;
+
+ if (eq && i == len && (!it->node->iscompr || splitpos == 0) &&
+ it->node->iskey)
+ {
+ /* We found our node, since the key matches and we have an
+ * "equal" condition. */
+ if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */
+ it->data = raxGetData(it->node);
+ } else if (lt || gt) {
+ /* Exact key not found or eq flag not set. We have to set as current
+ * key the one represented by the node we stopped at, and perform
+ * a next/prev operation to seek. */
+ raxIteratorAddChars(it, ele, i-splitpos);
+
+ /* We need to set the iterator in the correct state to call next/prev
+ * step in order to seek the desired element. */
+ debugf("After initial seek: i=%d len=%d key=%.*s\n",
+ (int)i, (int)len, (int)it->key_len, it->key);
+ if (i != len && !it->node->iscompr) {
+ /* If we stopped in the middle of a normal node because of a
+ * mismatch, add the mismatching character to the current key
+ * and call the iterator with the 'noup' flag so that it will try
+ * to seek the next/prev child in the current node directly based
+ * on the mismatching character. */
+ if (!raxIteratorAddChars(it,ele+i,1)) return 0;
+ debugf("Seek normal node on mismatch: %.*s\n",
+ (int)it->key_len, (char*)it->key);
+
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ if (lt && !raxIteratorPrevStep(it,1)) return 0;
+ if (gt && !raxIteratorNextStep(it,1)) return 0;
+ it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+ } else if (i != len && it->node->iscompr) {
+ debugf("Compressed mismatch: %.*s\n",
+ (int)it->key_len, (char*)it->key);
+ /* In case of a mismatch within a compressed node. */
+ int nodechar = it->node->data[splitpos];
+ int keychar = ele[i];
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ if (gt) {
+ /* If the key the compressed node represents is greater
+ * than our seek element, continue forward, otherwise set the
+ * state in order to go back to the next sub-tree. */
+ if (nodechar > keychar) {
+ if (!raxIteratorNextStep(it,0)) return 0;
+ } else {
+ if (!raxIteratorAddChars(it,it->node->data,it->node->size))
+ return 0;
+ if (!raxIteratorNextStep(it,1)) return 0;
+ }
+ }
+ if (lt) {
+ /* If the key the compressed node represents is smaller
+ * than our seek element, seek the greater key in this
+ * subtree, otherwise set the state in order to go back to
+ * the previous sub-tree. */
+ if (nodechar < keychar) {
+ if (!raxSeekGreatest(it)) return 0;
+ it->data = raxGetData(it->node);
+ } else {
+ if (!raxIteratorAddChars(it,it->node->data,it->node->size))
+ return 0;
+ if (!raxIteratorPrevStep(it,1)) return 0;
+ }
+ }
+ it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+ } else {
+ debugf("No mismatch: %.*s\n",
+ (int)it->key_len, (char*)it->key);
+ /* If there was no mismatch we are into a node representing the
+ * key, (but which is not a key or the seek operator does not
+ * include 'eq'), or we stopped in the middle of a compressed node
+ * after processing all the key. Continue iterating as this was
+ * a legitimate key we stopped at. */
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ if (it->node->iscompr && it->node->iskey && splitpos && lt) {
+ /* If we stopped in the middle of a compressed node with
+ * perfect match, and the condition is to seek a key "<" than
+ * the specified one, then if this node is a key it already
+ * represents our match. For instance we may have nodes:
+ *
+ * "f" -> "oobar" = 1 -> "" = 2
+ *
+ * Representing keys "f" = 1, "foobar" = 2. A seek for
+ * the key < "foo" will stop in the middle of the "oobar"
+ * node, but will be our match, representing the key "f".
+ *
+ * So in that case, we don't seek backward. */
+ it->data = raxGetData(it->node);
+ } else {
+ if (gt && !raxIteratorNextStep(it,0)) return 0;
+ if (lt && !raxIteratorPrevStep(it,0)) return 0;
+ }
+ it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+ }
+ } else {
+ /* If we are here just eq was set but no match was found. */
+ it->flags |= RAX_ITER_EOF;
+ return 1;
+ }
+ return 1;
+}
+
+/* Go to the next element in the scope of the iterator 'it'.
+ * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
+ * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
+int raxNext(raxIterator *it) {
+ if (!raxIteratorNextStep(it,0)) {
+ errno = ENOMEM;
+ return 0;
+ }
+ if (it->flags & RAX_ITER_EOF) {
+ errno = 0;
+ return 0;
+ }
+ return 1;
+}
+
+/* Go to the previous element in the scope of the iterator 'it'.
+ * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
+ * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
+int raxPrev(raxIterator *it) {
+ if (!raxIteratorPrevStep(it,0)) {
+ errno = ENOMEM;
+ return 0;
+ }
+ if (it->flags & RAX_ITER_EOF) {
+ errno = 0;
+ return 0;
+ }
+ return 1;
+}
+
+/* Perform a random walk starting in the current position of the iterator.
+ * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned
+ * and the iterator is set to the node reached after doing a random walk
+ * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed
+ * using a random number of steps between 1 and two times the logarithm of
+ * the number of elements.
+ *
+ * NOTE: if you use this function to generate random elements from the radix
+ * tree, expect a disappointing distribution. A random walk produces good
+ * random elements if the tree is not sparse, however in the case of a radix
+ * tree certain keys will be reported much more often than others. At least
+ * this function should be able to explore every possible element eventually. */
+int raxRandomWalk(raxIterator *it, size_t steps) {
+ if (it->rt->numele == 0) {
+ it->flags |= RAX_ITER_EOF;
+ return 0;
+ }
+
+ if (steps == 0) {
+ size_t fle = 1+floor(log(it->rt->numele));
+ fle *= 2;
+ steps = 1 + rand() % fle;
+ }
+
+ raxNode *n = it->node;
+ while(steps > 0 || !n->iskey) {
+ int numchildren = n->iscompr ? 1 : n->size;
+ int r = rand() % (numchildren+(n != it->rt->head));
+
+ if (r == numchildren) {
+ /* Go up to parent. */
+ n = raxStackPop(&it->stack);
+ int todel = n->iscompr ? n->size : 1;
+ raxIteratorDelChars(it,todel);
+ } else {
+ /* Select a random child. */
+ if (n->iscompr) {
+ if (!raxIteratorAddChars(it,n->data,n->size)) return 0;
+ } else {
+ if (!raxIteratorAddChars(it,n->data+r,1)) return 0;
+ }
+ raxNode **cp = raxNodeFirstChildPtr(n)+r;
+ if (!raxStackPush(&it->stack,n)) return 0;
+ memcpy(&n,cp,sizeof(n));
+ }
+ if (n->iskey) steps--;
+ }
+ it->node = n;
+ it->data = raxGetData(it->node);
+ return 1;
+}
+
+/* Compare the key currently pointed by the iterator to the specified
+ * key according to the specified operator. Returns 1 if the comparison is
+ * true, otherwise 0 is returned. */
+int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) {
+ int eq = 0, lt = 0, gt = 0;
+
+ if (op[0] == '=' || op[1] == '=') eq = 1;
+ if (op[0] == '>') gt = 1;
+ else if (op[0] == '<') lt = 1;
+ else if (op[1] != '=') return 0; /* Syntax error. */
+
+ size_t minlen = key_len < iter->key_len ? key_len : iter->key_len;
+ int cmp = memcmp(iter->key,key,minlen);
+
+ /* Handle == */
+ if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len;
+
+ /* Handle >, >=, <, <= */
+ if (cmp == 0) {
+ /* Same prefix: longer wins. */
+ if (eq && key_len == iter->key_len) return 1;
+ else if (lt) return iter->key_len < key_len;
+ else if (gt) return iter->key_len > key_len;
+ else return 0; /* Avoid warning, just 'eq' is handled before. */
+ } else if (cmp > 0) {
+ return gt ? 1 : 0;
+ } else /* (cmp < 0) */ {
+ return lt ? 1 : 0;
+ }
+}
+
+/* Free the iterator. */
+void raxStop(raxIterator *it) {
+ if (it->key != it->key_static_string) rax_free(it->key);
+ raxStackFree(&it->stack);
+}
+
+/* Return if the iterator is in an EOF state. This happens when raxSeek()
+ * failed to seek an appropriate element, so that raxNext() or raxPrev()
+ * will return zero, or when an EOF condition was reached while iterating
+ * with raxNext() and raxPrev(). */
+int raxEOF(raxIterator *it) {
+ return it->flags & RAX_ITER_EOF;
+}
+
+/* Return the number of elements inside the radix tree. */
+uint64_t raxSize(rax *rax) {
+ return rax->numele;
+}
+
+/* ----------------------------- Introspection ------------------------------ */
+
+/* This function is mostly used for debugging and learning purposes.
+ * It shows an ASCII representation of a tree on standard output, outline
+ * all the nodes and the contained keys.
+ *
+ * The representation is as follow:
+ *
+ * "foobar" (compressed node)
+ * [abc] (normal node with three children)
+ * [abc]=0x12345678 (node is a key, pointing to value 0x12345678)
+ * [] (a normal empty node)
+ *
+ * Children are represented in new indented lines, each children prefixed by
+ * the "`-(x)" string, where "x" is the edge byte.
+ *
+ * [abc]
+ * `-(a) "ladin"
+ * `-(b) [kj]
+ * `-(c) []
+ *
+ * However when a node has a single child the following representation
+ * is used instead:
+ *
+ * [abc] -> "ladin" -> []
+ */
+
+/* The actual implementation of raxShow(). */
+void raxRecursiveShow(int level, int lpad, raxNode *n) {
+ char s = n->iscompr ? '"' : '[';
+ char e = n->iscompr ? '"' : ']';
+
+ int numchars = printf("%c%.*s%c", s, n->size, n->data, e);
+ if (n->iskey) {
+ numchars += printf("=%p",raxGetData(n));
+ }
+
+ int numchildren = n->iscompr ? 1 : n->size;
+ /* Note that 7 and 4 magic constants are the string length
+ * of " `-(x) " and " -> " respectively. */
+ if (level) {
+ lpad += (numchildren > 1) ? 7 : 4;
+ if (numchildren == 1) lpad += numchars;
+ }
+ raxNode **cp = raxNodeFirstChildPtr(n);
+ for (int i = 0; i < numchildren; i++) {
+ char *branch = " `-(%c) ";
+ if (numchildren > 1) {
+ printf("\n");
+ for (int j = 0; j < lpad; j++) putchar(' ');
+ printf(branch,n->data[i]);
+ } else {
+ printf(" -> ");
+ }
+ raxNode *child;
+ memcpy(&child,cp,sizeof(child));
+ raxRecursiveShow(level+1,lpad,child);
+ cp++;
+ }
+}
+
+/* Show a tree, as outlined in the comment above. */
+void raxShow(rax *rax) {
+ raxRecursiveShow(0,0,rax->head);
+ putchar('\n');
+}
+
+/* Used by debugnode() macro to show info about a given node. */
+void raxDebugShowNode(const char *msg, raxNode *n) {
+ if (raxDebugMsg == 0) return;
+ printf("%s: %p [%.*s] key:%u size:%u children:",
+ msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size);
+ int numcld = n->iscompr ? 1 : n->size;
+ raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1);
+ while(numcld--) {
+ raxNode *child;
+ memcpy(&child,cldptr,sizeof(child));
+ cldptr++;
+ printf("%p ", (void*)child);
+ }
+ printf("\n");
+ fflush(stdout);
+}
+
+/* Touch all the nodes of a tree returning a check sum. This is useful
+ * in order to make Valgrind detect if there is something wrong while
+ * reading the data structure.
+ *
+ * This function was used in order to identify Rax bugs after a big refactoring
+ * using this technique:
+ *
+ * 1. The rax-test is executed using Valgrind, adding a printf() so that for
+ * the fuzz tester we see what iteration in the loop we are in.
+ * 2. After every modification of the radix tree made by the fuzz tester
+ * in rax-test.c, we add a call to raxTouch().
+ * 3. Now as soon as an operation will corrupt the tree, raxTouch() will
+ * detect it (via Valgrind) immediately. We can add more calls to narrow
+ * the state.
+ * 4. At this point a good idea is to enable Rax debugging messages immediately
+ * before the moment the tree is corrupted, to see what happens.
+ */
+unsigned long raxTouch(raxNode *n) {
+ debugf("Touching %p\n", (void*)n);
+ unsigned long sum = 0;
+ if (n->iskey) {
+ sum += (unsigned long)raxGetData(n);
+ }
+
+ int numchildren = n->iscompr ? 1 : n->size;
+ raxNode **cp = raxNodeFirstChildPtr(n);
+ int count = 0;
+ for (int i = 0; i < numchildren; i++) {
+ if (numchildren > 1) {
+ sum += (long)n->data[i];
+ }
+ raxNode *child;
+ memcpy(&child,cp,sizeof(child));
+ if (child == (void*)0x65d1760) count++;
+ if (count > 1) exit(1);
+ sum += raxTouch(child);
+ cp++;
+ }
+ return sum;
+}
diff --git a/src/rax.h b/src/rax.h
new file mode 100644
index 0000000..6b1fd41
--- /dev/null
+++ b/src/rax.h
@@ -0,0 +1,216 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Copyright (c) 2017-2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RAX_H
+#define RAX_H
+
+#include <stdint.h>
+
+/* Representation of a radix tree as implemented in this file, that contains
+ * the strings "foo", "foobar" and "footer" after the insertion of each
+ * word. When the node represents a key inside the radix tree, we write it
+ * between [], otherwise it is written between ().
+ *
+ * This is the vanilla representation:
+ *
+ * (f) ""
+ * \
+ * (o) "f"
+ * \
+ * (o) "fo"
+ * \
+ * [t b] "foo"
+ * / \
+ * "foot" (e) (a) "foob"
+ * / \
+ * "foote" (r) (r) "fooba"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * However, this implementation implements a very common optimization where
+ * successive nodes having a single child are "compressed" into the node
+ * itself as a string of characters, each representing a next-level child,
+ * and only the link to the node representing the last character node is
+ * provided inside the representation. So the above representation is turned
+ * into:
+ *
+ * ["foo"] ""
+ * |
+ * [t b] "foo"
+ * / \
+ * "foot" ("er") ("ar") "foob"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * However this optimization makes the implementation a bit more complex.
+ * For instance if a key "first" is added in the above radix tree, a
+ * "node splitting" operation is needed, since the "foo" prefix is no longer
+ * composed of nodes having a single child one after the other. This is the
+ * above tree and the resulting node splitting after this event happens:
+ *
+ *
+ * (f) ""
+ * /
+ * (i o) "f"
+ * / \
+ * "firs" ("rst") (o) "fo"
+ * / \
+ * "first" [] [t b] "foo"
+ * / \
+ * "foot" ("er") ("ar") "foob"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * Similarly after deletion, if a new chain of nodes having a single child
+ * is created (the chain must also not include nodes that represent keys),
+ * it must be compressed back into a single node.
+ *
+ */
+
+#define RAX_NODE_MAX_SIZE ((1<<29)-1)
+typedef struct raxNode {
+ uint32_t iskey:1; /* Does this node contain a key? */
+ uint32_t isnull:1; /* Associated value is NULL (don't store it). */
+ uint32_t iscompr:1; /* Node is compressed. */
+ uint32_t size:29; /* Number of children, or compressed string len. */
+ /* Data layout is as follows:
+ *
+ * If node is not compressed we have 'size' bytes, one for each children
+ * character, and 'size' raxNode pointers, point to each child node.
+ * Note how the character is not stored in the children but in the
+ * edge of the parents:
+ *
+ * [header iscompr=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)
+ *
+ * if node is compressed (iscompr bit is 1) the node has 1 children.
+ * In that case the 'size' bytes of the string stored immediately at
+ * the start of the data section, represent a sequence of successive
+ * nodes linked one after the other, for which only the last one in
+ * the sequence is actually represented as a node, and pointed to by
+ * the current compressed node.
+ *
+ * [header iscompr=1][xyz][z-ptr](value-ptr?)
+ *
+ * Both compressed and not compressed nodes can represent a key
+ * with associated data in the radix tree at any level (not just terminal
+ * nodes).
+ *
+ * If the node has an associated key (iskey=1) and is not NULL
+ * (isnull=0), then after the raxNode pointers pointing to the
+ * children, an additional value pointer is present (as you can see
+ * in the representation above as "value-ptr" field).
+ */
+ unsigned char data[];
+} raxNode;
+
+typedef struct rax {
+ raxNode *head;
+ uint64_t numele;
+ uint64_t numnodes;
+} rax;
+
+/* Stack data structure used by raxLowWalk() in order to, optionally, return
+ * a list of parent nodes to the caller. The nodes do not have a "parent"
+ * field for space concerns, so we use the auxiliary stack when needed. */
+#define RAX_STACK_STATIC_ITEMS 32
+typedef struct raxStack {
+ void **stack; /* Points to static_items or an heap allocated array. */
+ size_t items, maxitems; /* Number of items contained and total space. */
+ /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap
+ * and use this static array of pointers instead. */
+ void *static_items[RAX_STACK_STATIC_ITEMS];
+ int oom; /* True if pushing into this stack failed for OOM at some point. */
+} raxStack;
+
+/* Optional callback used for iterators and be notified on each rax node,
+ * including nodes not representing keys. If the callback returns true
+ * the callback changed the node pointer in the iterator structure, and the
+ * iterator implementation will have to replace the pointer in the radix tree
+ * internals. This allows the callback to reallocate the node to perform
+ * very special operations, normally not needed by normal applications.
+ *
+ * This callback is used to perform very low level analysis of the radix tree
+ * structure, scanning each possible node (but the root node), or in order to
+ * reallocate the nodes to reduce the allocation fragmentation (this is the
+ * Redis application for this callback).
+ *
+ * This is currently only supported in forward iterations (raxNext) */
+typedef int (*raxNodeCallback)(raxNode **noderef);
+
+/* Radix tree iterator state is encapsulated into this data structure. */
+#define RAX_ITER_STATIC_LEN 128
+#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
+ element for the first iteration and
+ clear the flag. */
+#define RAX_ITER_EOF (1<<1) /* End of iteration reached. */
+#define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while
+ iterating. But it is slower. */
+typedef struct raxIterator {
+ int flags;
+ rax *rt; /* Radix tree we are iterating. */
+ unsigned char *key; /* The current string. */
+ void *data; /* Data associated to this key. */
+ size_t key_len; /* Current key length. */
+ size_t key_max; /* Max key len the current key buffer can hold. */
+ unsigned char key_static_string[RAX_ITER_STATIC_LEN];
+ raxNode *node; /* Current node. Only for unsafe iteration. */
+ raxStack stack; /* Stack used for unsafe iteration. */
+ raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */
+} raxIterator;
+
+/* A special pointer returned for not found items. */
+extern void *raxNotFound;
+
+/* Exported API. */
+rax *raxNew(void);
+int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
+int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
+int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
+void *raxFind(rax *rax, unsigned char *s, size_t len);
+void raxFree(rax *rax);
+void raxFreeWithCallback(rax *rax, void (*free_callback)(void*));
+void raxStart(raxIterator *it, rax *rt);
+int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
+int raxNext(raxIterator *it);
+int raxPrev(raxIterator *it);
+int raxRandomWalk(raxIterator *it, size_t steps);
+int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len);
+void raxStop(raxIterator *it);
+int raxEOF(raxIterator *it);
+void raxShow(rax *rax);
+uint64_t raxSize(rax *rax);
+unsigned long raxTouch(raxNode *n);
+void raxSetDebugMsg(int onoff);
+
+/* Internal API. May be used by the node callback in order to access rax nodes
+ * in a low level way, so this function is exported as well. */
+void raxSetData(raxNode *n, void *data);
+
+#endif
diff --git a/src/rax_malloc.h b/src/rax_malloc.h
new file mode 100644
index 0000000..9295985
--- /dev/null
+++ b/src/rax_malloc.h
@@ -0,0 +1,44 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Allocator selection.
+ *
+ * This file is used in order to change the Rax allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#ifndef RAX_ALLOC_H
+#define RAX_ALLOC_H
+#include "zmalloc.h"
+#define rax_malloc zmalloc
+#define rax_realloc zrealloc
+#define rax_free zfree
+#endif
diff --git a/src/rdb.c b/src/rdb.c
new file mode 100644
index 0000000..ed30b65
--- /dev/null
+++ b/src/rdb.c
@@ -0,0 +1,3722 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "lzf.h" /* LZF compression library */
+#include "zipmap.h"
+#include "endianconv.h"
+#include "fpconv_dtoa.h"
+#include "stream.h"
+#include "functions.h"
+#include "intset.h" /* Compact integer set structure */
+#include "bio.h"
+
+#include <math.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <arpa/inet.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+
+/* This macro is called when the internal RDB structure is corrupt */
+#define rdbReportCorruptRDB(...) rdbReportError(1, __LINE__,__VA_ARGS__)
+/* This macro is called when RDB read failed (possibly a short read) */
+#define rdbReportReadError(...) rdbReportError(0, __LINE__,__VA_ARGS__)
+
+/* This macro tells if we are in the context of a RESTORE command, and not loading an RDB or AOF. */
+#define isRestoreContext() \
+ ((server.current_client == NULL || server.current_client->id == CLIENT_ID_AOF) ? 0 : 1)
+
+char* rdbFileBeingLoaded = NULL; /* used for rdb checking on read error */
+extern int rdbCheckMode;
+void rdbCheckError(const char *fmt, ...);
+void rdbCheckSetError(const char *fmt, ...);
+
+#ifdef __GNUC__
+void rdbReportError(int corruption_error, int linenum, char *reason, ...) __attribute__ ((format (printf, 3, 4)));
+#endif
+void rdbReportError(int corruption_error, int linenum, char *reason, ...) {
+ va_list ap;
+ char msg[1024];
+ int len;
+
+ len = snprintf(msg,sizeof(msg),
+ "Internal error in RDB reading offset %llu, function at rdb.c:%d -> ",
+ (unsigned long long)server.loading_loaded_bytes, linenum);
+ va_start(ap,reason);
+ vsnprintf(msg+len,sizeof(msg)-len,reason,ap);
+ va_end(ap);
+
+ if (isRestoreContext()) {
+ /* If we're in the context of a RESTORE command, just propagate the error. */
+ /* log in VERBOSE, and return (don't exit). */
+ serverLog(LL_VERBOSE, "%s", msg);
+ return;
+ } else if (rdbCheckMode) {
+ /* If we're inside the rdb checker, let it handle the error. */
+ rdbCheckError("%s",msg);
+ } else if (rdbFileBeingLoaded) {
+ /* If we're loading an rdb file form disk, run rdb check (and exit) */
+ serverLog(LL_WARNING, "%s", msg);
+ char *argv[2] = {"",rdbFileBeingLoaded};
+ if (anetIsFifo(argv[1])) {
+ /* Cannot check RDB FIFO because we cannot reopen the FIFO and check already streamed data. */
+ rdbCheckError("Cannot check RDB that is a FIFO: %s", argv[1]);
+ return;
+ }
+ redis_check_rdb_main(2,argv,NULL);
+ } else if (corruption_error) {
+ /* In diskless loading, in case of corrupt file, log and exit. */
+ serverLog(LL_WARNING, "%s. Failure loading rdb format", msg);
+ } else {
+ /* In diskless loading, in case of a short read (not a corrupt
+ * file), log and proceed (don't exit). */
+ serverLog(LL_WARNING, "%s. Failure loading rdb format from socket, assuming connection error, resuming operation.", msg);
+ return;
+ }
+ serverLog(LL_WARNING, "Terminating server after rdb file reading failure.");
+ exit(1);
+}
+
+ssize_t rdbWriteRaw(rio *rdb, void *p, size_t len) {
+ if (rdb && rioWrite(rdb,p,len) == 0)
+ return -1;
+ return len;
+}
+
+int rdbSaveType(rio *rdb, unsigned char type) {
+ return rdbWriteRaw(rdb,&type,1);
+}
+
+/* Load a "type" in RDB format, that is a one byte unsigned integer.
+ * This function is not only used to load object types, but also special
+ * "types" like the end-of-file type, the EXPIRE type, and so forth. */
+int rdbLoadType(rio *rdb) {
+ unsigned char type;
+ if (rioRead(rdb,&type,1) == 0) return -1;
+ return type;
+}
+
+/* This is only used to load old databases stored with the RDB_OPCODE_EXPIRETIME
+ * opcode. New versions of Redis store using the RDB_OPCODE_EXPIRETIME_MS
+ * opcode. On error -1 is returned, however this could be a valid time, so
+ * to check for loading errors the caller should call rioGetReadError() after
+ * calling this function. */
+time_t rdbLoadTime(rio *rdb) {
+ int32_t t32;
+ if (rioRead(rdb,&t32,4) == 0) return -1;
+ return (time_t)t32;
+}
+
+int rdbSaveMillisecondTime(rio *rdb, long long t) {
+ int64_t t64 = (int64_t) t;
+ memrev64ifbe(&t64); /* Store in little endian. */
+ return rdbWriteRaw(rdb,&t64,8);
+}
+
+/* This function loads a time from the RDB file. It gets the version of the
+ * RDB because, unfortunately, before Redis 5 (RDB version 9), the function
+ * failed to convert data to/from little endian, so RDB files with keys having
+ * expires could not be shared between big endian and little endian systems
+ * (because the expire time will be totally wrong). The fix for this is just
+ * to call memrev64ifbe(), however if we fix this for all the RDB versions,
+ * this call will introduce an incompatibility for big endian systems:
+ * after upgrading to Redis version 5 they will no longer be able to load their
+ * own old RDB files. Because of that, we instead fix the function only for new
+ * RDB versions, and load older RDB versions as we used to do in the past,
+ * allowing big endian systems to load their own old RDB files.
+ *
+ * On I/O error the function returns LLONG_MAX, however if this is also a
+ * valid stored value, the caller should use rioGetReadError() to check for
+ * errors after calling this function. */
+long long rdbLoadMillisecondTime(rio *rdb, int rdbver) {
+ int64_t t64;
+ if (rioRead(rdb,&t64,8) == 0) return LLONG_MAX;
+ if (rdbver >= 9) /* Check the top comment of this function. */
+ memrev64ifbe(&t64); /* Convert in big endian if the system is BE. */
+ return (long long)t64;
+}
+
+/* Saves an encoded length. The first two bits in the first byte are used to
+ * hold the encoding type. See the RDB_* definitions for more information
+ * on the types of encoding. */
+int rdbSaveLen(rio *rdb, uint64_t len) {
+ unsigned char buf[2];
+ size_t nwritten;
+
+ if (len < (1<<6)) {
+ /* Save a 6 bit len */
+ buf[0] = (len&0xFF)|(RDB_6BITLEN<<6);
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ nwritten = 1;
+ } else if (len < (1<<14)) {
+ /* Save a 14 bit len */
+ buf[0] = ((len>>8)&0xFF)|(RDB_14BITLEN<<6);
+ buf[1] = len&0xFF;
+ if (rdbWriteRaw(rdb,buf,2) == -1) return -1;
+ nwritten = 2;
+ } else if (len <= UINT32_MAX) {
+ /* Save a 32 bit len */
+ buf[0] = RDB_32BITLEN;
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ uint32_t len32 = htonl(len);
+ if (rdbWriteRaw(rdb,&len32,4) == -1) return -1;
+ nwritten = 1+4;
+ } else {
+ /* Save a 64 bit len */
+ buf[0] = RDB_64BITLEN;
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ len = htonu64(len);
+ if (rdbWriteRaw(rdb,&len,8) == -1) return -1;
+ nwritten = 1+8;
+ }
+ return nwritten;
+}
+
+
+/* Load an encoded length. If the loaded length is a normal length as stored
+ * with rdbSaveLen(), the read length is set to '*lenptr'. If instead the
+ * loaded length describes a special encoding that follows, then '*isencoded'
+ * is set to 1 and the encoding format is stored at '*lenptr'.
+ *
+ * See the RDB_ENC_* definitions in rdb.h for more information on special
+ * encodings.
+ *
+ * The function returns -1 on error, 0 on success. */
+int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr) {
+ unsigned char buf[2];
+ int type;
+
+ if (isencoded) *isencoded = 0;
+ if (rioRead(rdb,buf,1) == 0) return -1;
+ type = (buf[0]&0xC0)>>6;
+ if (type == RDB_ENCVAL) {
+ /* Read a 6 bit encoding type. */
+ if (isencoded) *isencoded = 1;
+ *lenptr = buf[0]&0x3F;
+ } else if (type == RDB_6BITLEN) {
+ /* Read a 6 bit len. */
+ *lenptr = buf[0]&0x3F;
+ } else if (type == RDB_14BITLEN) {
+ /* Read a 14 bit len. */
+ if (rioRead(rdb,buf+1,1) == 0) return -1;
+ *lenptr = ((buf[0]&0x3F)<<8)|buf[1];
+ } else if (buf[0] == RDB_32BITLEN) {
+ /* Read a 32 bit len. */
+ uint32_t len;
+ if (rioRead(rdb,&len,4) == 0) return -1;
+ *lenptr = ntohl(len);
+ } else if (buf[0] == RDB_64BITLEN) {
+ /* Read a 64 bit len. */
+ uint64_t len;
+ if (rioRead(rdb,&len,8) == 0) return -1;
+ *lenptr = ntohu64(len);
+ } else {
+ rdbReportCorruptRDB(
+ "Unknown length encoding %d in rdbLoadLen()",type);
+ return -1; /* Never reached. */
+ }
+ return 0;
+}
+
+/* This is like rdbLoadLenByRef() but directly returns the value read
+ * from the RDB stream, signaling an error by returning RDB_LENERR
+ * (since it is a too large count to be applicable in any Redis data
+ * structure). */
+uint64_t rdbLoadLen(rio *rdb, int *isencoded) {
+ uint64_t len;
+
+ if (rdbLoadLenByRef(rdb,isencoded,&len) == -1) return RDB_LENERR;
+ return len;
+}
+
+/* Encodes the "value" argument as integer when it fits in the supported ranges
+ * for encoded types. If the function successfully encodes the integer, the
+ * representation is stored in the buffer pointer to by "enc" and the string
+ * length is returned. Otherwise 0 is returned. */
+int rdbEncodeInteger(long long value, unsigned char *enc) {
+ if (value >= -(1<<7) && value <= (1<<7)-1) {
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT8;
+ enc[1] = value&0xFF;
+ return 2;
+ } else if (value >= -(1<<15) && value <= (1<<15)-1) {
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT16;
+ enc[1] = value&0xFF;
+ enc[2] = (value>>8)&0xFF;
+ return 3;
+ } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT32;
+ enc[1] = value&0xFF;
+ enc[2] = (value>>8)&0xFF;
+ enc[3] = (value>>16)&0xFF;
+ enc[4] = (value>>24)&0xFF;
+ return 5;
+ } else {
+ return 0;
+ }
+}
+
+/* Loads an integer-encoded object with the specified encoding type "enctype".
+ * The returned value changes according to the flags, see
+ * rdbGenericLoadStringObject() for more info. */
+void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) {
+ int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
+ int encode = flags & RDB_LOAD_ENC;
+ unsigned char enc[4];
+ long long val;
+
+ if (enctype == RDB_ENC_INT8) {
+ if (rioRead(rdb,enc,1) == 0) return NULL;
+ val = (signed char)enc[0];
+ } else if (enctype == RDB_ENC_INT16) {
+ uint16_t v;
+ if (rioRead(rdb,enc,2) == 0) return NULL;
+ v = ((uint32_t)enc[0])|
+ ((uint32_t)enc[1]<<8);
+ val = (int16_t)v;
+ } else if (enctype == RDB_ENC_INT32) {
+ uint32_t v;
+ if (rioRead(rdb,enc,4) == 0) return NULL;
+ v = ((uint32_t)enc[0])|
+ ((uint32_t)enc[1]<<8)|
+ ((uint32_t)enc[2]<<16)|
+ ((uint32_t)enc[3]<<24);
+ val = (int32_t)v;
+ } else {
+ rdbReportCorruptRDB("Unknown RDB integer encoding type %d",enctype);
+ return NULL; /* Never reached. */
+ }
+ if (plain || sds) {
+ char buf[LONG_STR_SIZE], *p;
+ int len = ll2string(buf,sizeof(buf),val);
+ if (lenptr) *lenptr = len;
+ p = plain ? zmalloc(len) : sdsnewlen(SDS_NOINIT,len);
+ memcpy(p,buf,len);
+ return p;
+ } else if (encode) {
+ return createStringObjectFromLongLongForValue(val);
+ } else {
+ return createStringObjectFromLongLongWithSds(val);
+ }
+}
+
+/* String objects in the form "2391" "-100" without any space and with a
+ * range of values that can fit in an 8, 16 or 32 bit signed value can be
+ * encoded as integers to save space */
+int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
+ long long value;
+ if (string2ll(s, len, &value)) {
+ return rdbEncodeInteger(value, enc);
+ } else {
+ return 0;
+ }
+}
+
+ssize_t rdbSaveLzfBlob(rio *rdb, void *data, size_t compress_len,
+ size_t original_len) {
+ unsigned char byte;
+ ssize_t n, nwritten = 0;
+
+ /* Data compressed! Let's save it on disk */
+ byte = (RDB_ENCVAL<<6)|RDB_ENC_LZF;
+ if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
+ nwritten += n;
+
+ if ((n = rdbSaveLen(rdb,compress_len)) == -1) goto writeerr;
+ nwritten += n;
+
+ if ((n = rdbSaveLen(rdb,original_len)) == -1) goto writeerr;
+ nwritten += n;
+
+ if ((n = rdbWriteRaw(rdb,data,compress_len)) == -1) goto writeerr;
+ nwritten += n;
+
+ return nwritten;
+
+writeerr:
+ return -1;
+}
+
+ssize_t rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
+ size_t comprlen, outlen;
+ void *out;
+
+ /* We require at least four bytes compression for this to be worth it */
+ if (len <= 4) return 0;
+ outlen = len-4;
+ if ((out = zmalloc(outlen+1)) == NULL) return 0;
+ comprlen = lzf_compress(s, len, out, outlen);
+ if (comprlen == 0) {
+ zfree(out);
+ return 0;
+ }
+ ssize_t nwritten = rdbSaveLzfBlob(rdb, out, comprlen, len);
+ zfree(out);
+ return nwritten;
+}
+
+/* Load an LZF compressed string in RDB format. The returned value
+ * changes according to 'flags'. For more info check the
+ * rdbGenericLoadStringObject() function. */
+void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) {
+ int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
+ uint64_t len, clen;
+ unsigned char *c = NULL;
+ char *val = NULL;
+
+ if ((clen = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if ((c = ztrymalloc(clen)) == NULL) {
+ serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbLoadLzfStringObject failed allocating %llu bytes", (unsigned long long)clen);
+ goto err;
+ }
+
+ /* Allocate our target according to the uncompressed size. */
+ if (plain) {
+ val = ztrymalloc(len);
+ } else {
+ val = sdstrynewlen(SDS_NOINIT,len);
+ }
+ if (!val) {
+ serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbLoadLzfStringObject failed allocating %llu bytes", (unsigned long long)len);
+ goto err;
+ }
+
+ if (lenptr) *lenptr = len;
+
+ /* Load the compressed representation and uncompress it to target. */
+ if (rioRead(rdb,c,clen) == 0) goto err;
+ if (lzf_decompress(c,clen,val,len) != len) {
+ rdbReportCorruptRDB("Invalid LZF compressed string");
+ goto err;
+ }
+ zfree(c);
+
+ if (plain || sds) {
+ return val;
+ } else {
+ return createObject(OBJ_STRING,val);
+ }
+err:
+ zfree(c);
+ if (plain)
+ zfree(val);
+ else
+ sdsfree(val);
+ return NULL;
+}
+
+/* Save a string object as [len][data] on disk. If the object is a string
+ * representation of an integer value we try to save it in a special form */
+ssize_t rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
+ int enclen;
+ ssize_t n, nwritten = 0;
+
+ /* Try integer encoding */
+ if (len <= 11) {
+ unsigned char buf[5];
+ if ((enclen = rdbTryIntegerEncoding((char*)s,len,buf)) > 0) {
+ if (rdbWriteRaw(rdb,buf,enclen) == -1) return -1;
+ return enclen;
+ }
+ }
+
+ /* Try LZF compression - under 20 bytes it's unable to compress even
+ * aaaaaaaaaaaaaaaaaa so skip it */
+ if (server.rdb_compression && len > 20) {
+ n = rdbSaveLzfStringObject(rdb,s,len);
+ if (n == -1) return -1;
+ if (n > 0) return n;
+ /* Return value of 0 means data can't be compressed, save the old way */
+ }
+
+ /* Store verbatim */
+ if ((n = rdbSaveLen(rdb,len)) == -1) return -1;
+ nwritten += n;
+ if (len > 0) {
+ if (rdbWriteRaw(rdb,s,len) == -1) return -1;
+ nwritten += len;
+ }
+ return nwritten;
+}
+
+/* Save a long long value as either an encoded string or a string. */
+ssize_t rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
+ unsigned char buf[32];
+ ssize_t n, nwritten = 0;
+ int enclen = rdbEncodeInteger(value,buf);
+ if (enclen > 0) {
+ return rdbWriteRaw(rdb,buf,enclen);
+ } else {
+ /* Encode as string */
+ enclen = ll2string((char*)buf,32,value);
+ serverAssert(enclen < 32);
+ if ((n = rdbSaveLen(rdb,enclen)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbWriteRaw(rdb,buf,enclen)) == -1) return -1;
+ nwritten += n;
+ }
+ return nwritten;
+}
+
+/* Like rdbSaveRawString() gets a Redis object instead. */
+ssize_t rdbSaveStringObject(rio *rdb, robj *obj) {
+ /* Avoid to decode the object, then encode it again, if the
+ * object is already integer encoded. */
+ if (obj->encoding == OBJ_ENCODING_INT) {
+ return rdbSaveLongLongAsStringObject(rdb,(long)obj->ptr);
+ } else {
+ serverAssertWithInfo(NULL,obj,sdsEncodedObject(obj));
+ return rdbSaveRawString(rdb,obj->ptr,sdslen(obj->ptr));
+ }
+}
+
+/* Load a string object from an RDB file according to flags:
+ *
+ * RDB_LOAD_NONE (no flags): load an RDB object, unencoded.
+ * RDB_LOAD_ENC: If the returned type is a Redis object, try to
+ * encode it in a special way to be more memory
+ * efficient. When this flag is passed the function
+ * no longer guarantees that obj->ptr is an SDS string.
+ * RDB_LOAD_PLAIN: Return a plain string allocated with zmalloc()
+ * instead of a Redis object with an sds in it.
+ * RDB_LOAD_SDS: Return an SDS string instead of a Redis object.
+ *
+ * On I/O error NULL is returned.
+ */
+void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) {
+ int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
+ int isencoded;
+ unsigned long long len;
+
+ len = rdbLoadLen(rdb,&isencoded);
+ if (len == RDB_LENERR) return NULL;
+
+ if (isencoded) {
+ switch(len) {
+ case RDB_ENC_INT8:
+ case RDB_ENC_INT16:
+ case RDB_ENC_INT32:
+ return rdbLoadIntegerObject(rdb,len,flags,lenptr);
+ case RDB_ENC_LZF:
+ return rdbLoadLzfStringObject(rdb,flags,lenptr);
+ default:
+ rdbReportCorruptRDB("Unknown RDB string encoding type %llu",len);
+ return NULL;
+ }
+ }
+
+ if (plain || sds) {
+ void *buf = plain ? ztrymalloc(len) : sdstrynewlen(SDS_NOINIT,len);
+ if (!buf) {
+ serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbGenericLoadStringObject failed allocating %llu bytes", len);
+ return NULL;
+ }
+ if (lenptr) *lenptr = len;
+ if (len && rioRead(rdb,buf,len) == 0) {
+ if (plain)
+ zfree(buf);
+ else
+ sdsfree(buf);
+ return NULL;
+ }
+ return buf;
+ } else {
+ robj *o = tryCreateStringObject(SDS_NOINIT,len);
+ if (!o) {
+ serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbGenericLoadStringObject failed allocating %llu bytes", len);
+ return NULL;
+ }
+ if (len && rioRead(rdb,o->ptr,len) == 0) {
+ decrRefCount(o);
+ return NULL;
+ }
+ return o;
+ }
+}
+
+robj *rdbLoadStringObject(rio *rdb) {
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL);
+}
+
+robj *rdbLoadEncodedStringObject(rio *rdb) {
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_ENC,NULL);
+}
+
+/* Save a double value. Doubles are saved as strings prefixed by an unsigned
+ * 8 bit integer specifying the length of the representation.
+ * This 8 bit integer has special values in order to specify the following
+ * conditions:
+ * 253: not a number
+ * 254: + inf
+ * 255: - inf
+ */
+int rdbSaveDoubleValue(rio *rdb, double val) {
+ unsigned char buf[128];
+ int len;
+
+ if (isnan(val)) {
+ buf[0] = 253;
+ len = 1;
+ } else if (!isfinite(val)) {
+ len = 1;
+ buf[0] = (val < 0) ? 255 : 254;
+ } else {
+ long long lvalue;
+ /* Integer printing function is much faster, check if we can safely use it. */
+ if (double2ll(val, &lvalue))
+ ll2string((char*)buf+1,sizeof(buf)-1,lvalue);
+ else {
+ const int dlen = fpconv_dtoa(val, (char*)buf+1);
+ buf[dlen+1] = '\0';
+ }
+ buf[0] = strlen((char*)buf+1);
+ len = buf[0]+1;
+ }
+ return rdbWriteRaw(rdb,buf,len);
+}
+
+/* For information about double serialization check rdbSaveDoubleValue() */
+int rdbLoadDoubleValue(rio *rdb, double *val) {
+ char buf[256];
+ unsigned char len;
+
+ if (rioRead(rdb,&len,1) == 0) return -1;
+ switch(len) {
+ case 255: *val = R_NegInf; return 0;
+ case 254: *val = R_PosInf; return 0;
+ case 253: *val = R_Nan; return 0;
+ default:
+ if (rioRead(rdb,buf,len) == 0) return -1;
+ buf[len] = '\0';
+ if (sscanf(buf, "%lg", val)!=1) return -1;
+ return 0;
+ }
+}
+
+/* Saves a double for RDB 8 or greater, where IE754 binary64 format is assumed.
+ * We just make sure the integer is always stored in little endian, otherwise
+ * the value is copied verbatim from memory to disk.
+ *
+ * Return -1 on error, the size of the serialized value on success. */
+int rdbSaveBinaryDoubleValue(rio *rdb, double val) {
+ memrev64ifbe(&val);
+ return rdbWriteRaw(rdb,&val,sizeof(val));
+}
+
+/* Loads a double from RDB 8 or greater. See rdbSaveBinaryDoubleValue() for
+ * more info. On error -1 is returned, otherwise 0. */
+int rdbLoadBinaryDoubleValue(rio *rdb, double *val) {
+ if (rioRead(rdb,val,sizeof(*val)) == 0) return -1;
+ memrev64ifbe(val);
+ return 0;
+}
+
+/* Like rdbSaveBinaryDoubleValue() but single precision. */
+int rdbSaveBinaryFloatValue(rio *rdb, float val) {
+ memrev32ifbe(&val);
+ return rdbWriteRaw(rdb,&val,sizeof(val));
+}
+
+/* Like rdbLoadBinaryDoubleValue() but single precision. */
+int rdbLoadBinaryFloatValue(rio *rdb, float *val) {
+ if (rioRead(rdb,val,sizeof(*val)) == 0) return -1;
+ memrev32ifbe(val);
+ return 0;
+}
+
+/* Save the object type of object "o". */
+int rdbSaveObjectType(rio *rdb, robj *o) {
+ switch (o->type) {
+ case OBJ_STRING:
+ return rdbSaveType(rdb,RDB_TYPE_STRING);
+ case OBJ_LIST:
+ if (o->encoding == OBJ_ENCODING_QUICKLIST || o->encoding == OBJ_ENCODING_LISTPACK)
+ return rdbSaveType(rdb, RDB_TYPE_LIST_QUICKLIST_2);
+ else
+ serverPanic("Unknown list encoding");
+ case OBJ_SET:
+ if (o->encoding == OBJ_ENCODING_INTSET)
+ return rdbSaveType(rdb,RDB_TYPE_SET_INTSET);
+ else if (o->encoding == OBJ_ENCODING_HT)
+ return rdbSaveType(rdb,RDB_TYPE_SET);
+ else if (o->encoding == OBJ_ENCODING_LISTPACK)
+ return rdbSaveType(rdb,RDB_TYPE_SET_LISTPACK);
+ else
+ serverPanic("Unknown set encoding");
+ case OBJ_ZSET:
+ if (o->encoding == OBJ_ENCODING_LISTPACK)
+ return rdbSaveType(rdb,RDB_TYPE_ZSET_LISTPACK);
+ else if (o->encoding == OBJ_ENCODING_SKIPLIST)
+ return rdbSaveType(rdb,RDB_TYPE_ZSET_2);
+ else
+ serverPanic("Unknown sorted set encoding");
+ case OBJ_HASH:
+ if (o->encoding == OBJ_ENCODING_LISTPACK)
+ return rdbSaveType(rdb,RDB_TYPE_HASH_LISTPACK);
+ else if (o->encoding == OBJ_ENCODING_HT)
+ return rdbSaveType(rdb,RDB_TYPE_HASH);
+ else
+ serverPanic("Unknown hash encoding");
+ case OBJ_STREAM:
+ return rdbSaveType(rdb,RDB_TYPE_STREAM_LISTPACKS_3);
+ case OBJ_MODULE:
+ return rdbSaveType(rdb,RDB_TYPE_MODULE_2);
+ default:
+ serverPanic("Unknown object type");
+ }
+ return -1; /* avoid warning */
+}
+
+/* Use rdbLoadType() to load a TYPE in RDB format, but returns -1 if the
+ * type is not specifically a valid Object Type. */
+int rdbLoadObjectType(rio *rdb) {
+ int type;
+ if ((type = rdbLoadType(rdb)) == -1) return -1;
+ if (!rdbIsObjectType(type)) return -1;
+ return type;
+}
+
+/* This helper function serializes a consumer group Pending Entries List (PEL)
+ * into the RDB file. The 'nacks' argument tells the function if also persist
+ * the information about the not acknowledged message, or if to persist
+ * just the IDs: this is useful because for the global consumer group PEL
+ * we serialized the NACKs as well, but when serializing the local consumer
+ * PELs we just add the ID, that will be resolved inside the global PEL to
+ * put a reference to the same structure. */
+ssize_t rdbSaveStreamPEL(rio *rdb, rax *pel, int nacks) {
+ ssize_t n, nwritten = 0;
+
+ /* Number of entries in the PEL. */
+ if ((n = rdbSaveLen(rdb,raxSize(pel))) == -1) return -1;
+ nwritten += n;
+
+ /* Save each entry. */
+ raxIterator ri;
+ raxStart(&ri,pel);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ /* We store IDs in raw form as 128 big big endian numbers, like
+ * they are inside the radix tree key. */
+ if ((n = rdbWriteRaw(rdb,ri.key,sizeof(streamID))) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ if (nacks) {
+ streamNACK *nack = ri.data;
+ if ((n = rdbSaveMillisecondTime(rdb,nack->delivery_time)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,nack->delivery_count)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ /* We don't save the consumer name: we'll save the pending IDs
+ * for each consumer in the consumer PEL, and resolve the consumer
+ * at loading time. */
+ }
+ }
+ raxStop(&ri);
+ return nwritten;
+}
+
+/* Serialize the consumers of a stream consumer group into the RDB. Helper
+ * function for the stream data type serialization. What we do here is to
+ * persist the consumer metadata, and it's PEL, for each consumer. */
+size_t rdbSaveStreamConsumers(rio *rdb, streamCG *cg) {
+ ssize_t n, nwritten = 0;
+
+ /* Number of consumers in this consumer group. */
+ if ((n = rdbSaveLen(rdb,raxSize(cg->consumers))) == -1) return -1;
+ nwritten += n;
+
+ /* Save each consumer. */
+ raxIterator ri;
+ raxStart(&ri,cg->consumers);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamConsumer *consumer = ri.data;
+
+ /* Consumer name. */
+ if ((n = rdbSaveRawString(rdb,ri.key,ri.key_len)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Seen time. */
+ if ((n = rdbSaveMillisecondTime(rdb,consumer->seen_time)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Active time. */
+ if ((n = rdbSaveMillisecondTime(rdb,consumer->active_time)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Consumer PEL, without the ACKs (see last parameter of the function
+ * passed with value of 0), at loading time we'll lookup the ID
+ * in the consumer group global PEL and will put a reference in the
+ * consumer local PEL. */
+ if ((n = rdbSaveStreamPEL(rdb,consumer->pel,0)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ }
+ raxStop(&ri);
+ return nwritten;
+}
+
+/* Save a Redis object.
+ * Returns -1 on error, number of bytes written on success. */
+ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) {
+ ssize_t n = 0, nwritten = 0;
+
+ if (o->type == OBJ_STRING) {
+ /* Save a string value */
+ if ((n = rdbSaveStringObject(rdb,o)) == -1) return -1;
+ nwritten += n;
+ } else if (o->type == OBJ_LIST) {
+ /* Save a list value */
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ quicklistNode *node = ql->head;
+
+ if ((n = rdbSaveLen(rdb,ql->len)) == -1) return -1;
+ nwritten += n;
+
+ while(node) {
+ if ((n = rdbSaveLen(rdb,node->container)) == -1) return -1;
+ nwritten += n;
+
+ if (quicklistNodeIsCompressed(node)) {
+ void *data;
+ size_t compress_len = quicklistGetLzf(node, &data);
+ if ((n = rdbSaveLzfBlob(rdb,data,compress_len,node->sz)) == -1) return -1;
+ nwritten += n;
+ } else {
+ if ((n = rdbSaveRawString(rdb,node->entry,node->sz)) == -1) return -1;
+ nwritten += n;
+ }
+ node = node->next;
+ }
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = o->ptr;
+
+ /* Save list listpack as a fake quicklist that only has a single node. */
+ if ((n = rdbSaveLen(rdb,1)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,QUICKLIST_NODE_CONTAINER_PACKED)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveRawString(rdb,lp,lpBytes(lp))) == -1) return -1;
+ nwritten += n;
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ } else if (o->type == OBJ_SET) {
+ /* Save a set value */
+ if (o->encoding == OBJ_ENCODING_HT) {
+ dict *set = o->ptr;
+ dictIterator *di = dictGetIterator(set);
+ dictEntry *de;
+
+ if ((n = rdbSaveLen(rdb,dictSize(set))) == -1) {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ nwritten += n;
+
+ while((de = dictNext(di)) != NULL) {
+ sds ele = dictGetKey(de);
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)ele,sdslen(ele)))
+ == -1)
+ {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ nwritten += n;
+ }
+ dictReleaseIterator(di);
+ } else if (o->encoding == OBJ_ENCODING_INTSET) {
+ size_t l = intsetBlobLen((intset*)o->ptr);
+
+ if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
+ nwritten += n;
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ size_t l = lpBytes((unsigned char *)o->ptr);
+ if ((n = rdbSaveRawString(rdb, o->ptr, l)) == -1) return -1;
+ nwritten += n;
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (o->type == OBJ_ZSET) {
+ /* Save a sorted set value */
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ size_t l = lpBytes((unsigned char*)o->ptr);
+
+ if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
+ nwritten += n;
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = o->ptr;
+ zskiplist *zsl = zs->zsl;
+
+ if ((n = rdbSaveLen(rdb,zsl->length)) == -1) return -1;
+ nwritten += n;
+
+ /* We save the skiplist elements from the greatest to the smallest
+ * (that's trivial since the elements are already ordered in the
+ * skiplist): this improves the load process, since the next loaded
+ * element will always be the smaller, so adding to the skiplist
+ * will always immediately stop at the head, making the insertion
+ * O(1) instead of O(log(N)). */
+ zskiplistNode *zn = zsl->tail;
+ while (zn != NULL) {
+ if ((n = rdbSaveRawString(rdb,
+ (unsigned char*)zn->ele,sdslen(zn->ele))) == -1)
+ {
+ return -1;
+ }
+ nwritten += n;
+ if ((n = rdbSaveBinaryDoubleValue(rdb,zn->score)) == -1)
+ return -1;
+ nwritten += n;
+ zn = zn->backward;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else if (o->type == OBJ_HASH) {
+ /* Save a hash value */
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ size_t l = lpBytes((unsigned char*)o->ptr);
+
+ if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
+ nwritten += n;
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ dictIterator *di = dictGetIterator(o->ptr);
+ dictEntry *de;
+
+ if ((n = rdbSaveLen(rdb,dictSize((dict*)o->ptr))) == -1) {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ nwritten += n;
+
+ while((de = dictNext(di)) != NULL) {
+ sds field = dictGetKey(de);
+ sds value = dictGetVal(de);
+
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)field,
+ sdslen(field))) == -1)
+ {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ nwritten += n;
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)value,
+ sdslen(value))) == -1)
+ {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ nwritten += n;
+ }
+ dictReleaseIterator(di);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ } else if (o->type == OBJ_STREAM) {
+ /* Store how many listpacks we have inside the radix tree. */
+ stream *s = o->ptr;
+ rax *rax = s->rax;
+ if ((n = rdbSaveLen(rdb,raxSize(rax))) == -1) return -1;
+ nwritten += n;
+
+ /* Serialize all the listpacks inside the radix tree as they are,
+ * when loading back, we'll use the first entry of each listpack
+ * to insert it back into the radix tree. */
+ raxIterator ri;
+ raxStart(&ri,rax);
+ raxSeek(&ri,"^",NULL,0);
+ while (raxNext(&ri)) {
+ unsigned char *lp = ri.data;
+ size_t lp_bytes = lpBytes(lp);
+ if ((n = rdbSaveRawString(rdb,ri.key,ri.key_len)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ if ((n = rdbSaveRawString(rdb,lp,lp_bytes)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ }
+ raxStop(&ri);
+
+ /* Save the number of elements inside the stream. We cannot obtain
+ * this easily later, since our macro nodes should be checked for
+ * number of items: not a great CPU / space tradeoff. */
+ if ((n = rdbSaveLen(rdb,s->length)) == -1) return -1;
+ nwritten += n;
+ /* Save the last entry ID. */
+ if ((n = rdbSaveLen(rdb,s->last_id.ms)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,s->last_id.seq)) == -1) return -1;
+ nwritten += n;
+ /* Save the first entry ID. */
+ if ((n = rdbSaveLen(rdb,s->first_id.ms)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,s->first_id.seq)) == -1) return -1;
+ nwritten += n;
+ /* Save the maximal tombstone ID. */
+ if ((n = rdbSaveLen(rdb,s->max_deleted_entry_id.ms)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,s->max_deleted_entry_id.seq)) == -1) return -1;
+ nwritten += n;
+ /* Save the offset. */
+ if ((n = rdbSaveLen(rdb,s->entries_added)) == -1) return -1;
+ nwritten += n;
+
+ /* The consumer groups and their clients are part of the stream
+ * type, so serialize every consumer group. */
+
+ /* Save the number of groups. */
+ size_t num_cgroups = s->cgroups ? raxSize(s->cgroups) : 0;
+ if ((n = rdbSaveLen(rdb,num_cgroups)) == -1) return -1;
+ nwritten += n;
+
+ if (num_cgroups) {
+ /* Serialize each consumer group. */
+ raxStart(&ri,s->cgroups);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamCG *cg = ri.data;
+
+ /* Save the group name. */
+ if ((n = rdbSaveRawString(rdb,ri.key,ri.key_len)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Last ID. */
+ if ((n = rdbSaveLen(rdb,cg->last_id.ms)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,cg->last_id.seq)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Save the group's logical reads counter. */
+ if ((n = rdbSaveLen(rdb,cg->entries_read)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Save the global PEL. */
+ if ((n = rdbSaveStreamPEL(rdb,cg->pel,1)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Save the consumers of this group. */
+ if ((n = rdbSaveStreamConsumers(rdb,cg)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ }
+ raxStop(&ri);
+ }
+ } else if (o->type == OBJ_MODULE) {
+ /* Save a module-specific value. */
+ RedisModuleIO io;
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+
+ /* Write the "module" identifier as prefix, so that we'll be able
+ * to call the right module during loading. */
+ int retval = rdbSaveLen(rdb,mt->id);
+ if (retval == -1) return -1;
+ moduleInitIOContext(io,mt,rdb,key,dbid);
+ io.bytes += retval;
+
+ /* Then write the module-specific representation + EOF marker. */
+ mt->rdb_save(&io,mv->value);
+ retval = rdbSaveLen(rdb,RDB_MODULE_OPCODE_EOF);
+ if (retval == -1)
+ io.error = 1;
+ else
+ io.bytes += retval;
+
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ return io.error ? -1 : (ssize_t)io.bytes;
+ } else {
+ serverPanic("Unknown object type");
+ }
+ return nwritten;
+}
+
+/* Return the length the object will have on disk if saved with
+ * the rdbSaveObject() function. Currently we use a trick to get
+ * this length with very little changes to the code. In the future
+ * we could switch to a faster solution. */
+size_t rdbSavedObjectLen(robj *o, robj *key, int dbid) {
+ ssize_t len = rdbSaveObject(NULL,o,key,dbid);
+ serverAssertWithInfo(NULL,o,len != -1);
+ return len;
+}
+
+/* Save a key-value pair, with expire time, type, key, value.
+ * On error -1 is returned.
+ * On success if the key was actually saved 1 is returned. */
+int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime, int dbid) {
+ int savelru = server.maxmemory_policy & MAXMEMORY_FLAG_LRU;
+ int savelfu = server.maxmemory_policy & MAXMEMORY_FLAG_LFU;
+
+ /* Save the expire time */
+ if (expiretime != -1) {
+ if (rdbSaveType(rdb,RDB_OPCODE_EXPIRETIME_MS) == -1) return -1;
+ if (rdbSaveMillisecondTime(rdb,expiretime) == -1) return -1;
+ }
+
+ /* Save the LRU info. */
+ if (savelru) {
+ uint64_t idletime = estimateObjectIdleTime(val);
+ idletime /= 1000; /* Using seconds is enough and requires less space.*/
+ if (rdbSaveType(rdb,RDB_OPCODE_IDLE) == -1) return -1;
+ if (rdbSaveLen(rdb,idletime) == -1) return -1;
+ }
+
+ /* Save the LFU info. */
+ if (savelfu) {
+ uint8_t buf[1];
+ buf[0] = LFUDecrAndReturn(val);
+ /* We can encode this in exactly two bytes: the opcode and an 8
+ * bit counter, since the frequency is logarithmic with a 0-255 range.
+ * Note that we do not store the halving time because to reset it
+ * a single time when loading does not affect the frequency much. */
+ if (rdbSaveType(rdb,RDB_OPCODE_FREQ) == -1) return -1;
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ }
+
+ /* Save type, key, value */
+ if (rdbSaveObjectType(rdb,val) == -1) return -1;
+ if (rdbSaveStringObject(rdb,key) == -1) return -1;
+ if (rdbSaveObject(rdb,val,key,dbid) == -1) return -1;
+
+ /* Delay return if required (for testing) */
+ if (server.rdb_key_save_delay)
+ debugDelay(server.rdb_key_save_delay);
+
+ return 1;
+}
+
+/* Save an AUX field. */
+ssize_t rdbSaveAuxField(rio *rdb, void *key, size_t keylen, void *val, size_t vallen) {
+ ssize_t ret, len = 0;
+ if ((ret = rdbSaveType(rdb,RDB_OPCODE_AUX)) == -1) return -1;
+ len += ret;
+ if ((ret = rdbSaveRawString(rdb,key,keylen)) == -1) return -1;
+ len += ret;
+ if ((ret = rdbSaveRawString(rdb,val,vallen)) == -1) return -1;
+ len += ret;
+ return len;
+}
+
+/* Wrapper for rdbSaveAuxField() used when key/val length can be obtained
+ * with strlen(). */
+ssize_t rdbSaveAuxFieldStrStr(rio *rdb, char *key, char *val) {
+ return rdbSaveAuxField(rdb,key,strlen(key),val,strlen(val));
+}
+
+/* Wrapper for strlen(key) + integer type (up to long long range). */
+ssize_t rdbSaveAuxFieldStrInt(rio *rdb, char *key, long long val) {
+ char buf[LONG_STR_SIZE];
+ int vlen = ll2string(buf,sizeof(buf),val);
+ return rdbSaveAuxField(rdb,key,strlen(key),buf,vlen);
+}
+
+/* Save a few default AUX fields with information about the RDB generated. */
+int rdbSaveInfoAuxFields(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
+ int redis_bits = (sizeof(void*) == 8) ? 64 : 32;
+ int aof_base = (rdbflags & RDBFLAGS_AOF_PREAMBLE) != 0;
+
+ /* Add a few fields about the state when the RDB was created. */
+ if (rdbSaveAuxFieldStrStr(rdb,"redis-ver",REDIS_VERSION) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"redis-bits",redis_bits) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"ctime",time(NULL)) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"used-mem",zmalloc_used_memory()) == -1) return -1;
+
+ /* Handle saving options that generate aux fields. */
+ if (rsi) {
+ if (rdbSaveAuxFieldStrInt(rdb,"repl-stream-db",rsi->repl_stream_db)
+ == -1) return -1;
+ if (rdbSaveAuxFieldStrStr(rdb,"repl-id",server.replid)
+ == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"repl-offset",server.master_repl_offset)
+ == -1) return -1;
+ }
+ if (rdbSaveAuxFieldStrInt(rdb, "aof-base", aof_base) == -1) return -1;
+ return 1;
+}
+
+ssize_t rdbSaveSingleModuleAux(rio *rdb, int when, moduleType *mt) {
+ /* Save a module-specific aux value. */
+ RedisModuleIO io;
+ int retval = 0;
+ moduleInitIOContext(io,mt,rdb,NULL,-1);
+
+ /* We save the AUX field header in a temporary buffer so we can support aux_save2 API.
+ * If aux_save2 is used the buffer will be flushed at the first time the module will perform
+ * a write operation to the RDB and will be ignored is case there was no writes. */
+ rio aux_save_headers_rio;
+ rioInitWithBuffer(&aux_save_headers_rio, sdsempty());
+
+ if (rdbSaveType(&aux_save_headers_rio, RDB_OPCODE_MODULE_AUX) == -1) goto error;
+
+ /* Write the "module" identifier as prefix, so that we'll be able
+ * to call the right module during loading. */
+ if (rdbSaveLen(&aux_save_headers_rio,mt->id) == -1) goto error;
+
+ /* write the 'when' so that we can provide it on loading. add a UINT opcode
+ * for backwards compatibility, everything after the MT needs to be prefixed
+ * by an opcode. */
+ if (rdbSaveLen(&aux_save_headers_rio,RDB_MODULE_OPCODE_UINT) == -1) goto error;
+ if (rdbSaveLen(&aux_save_headers_rio,when) == -1) goto error;
+
+ /* Then write the module-specific representation + EOF marker. */
+ if (mt->aux_save2) {
+ io.pre_flush_buffer = aux_save_headers_rio.io.buffer.ptr;
+ mt->aux_save2(&io,when);
+ if (io.pre_flush_buffer) {
+ /* aux_save did not save any data to the RDB.
+ * We will avoid saving any data related to this aux type
+ * to allow loading this RDB if the module is not present. */
+ sdsfree(io.pre_flush_buffer);
+ io.pre_flush_buffer = NULL;
+ return 0;
+ }
+ } else {
+ /* Write headers now, aux_save does not do lazy saving of the headers. */
+ retval = rdbWriteRaw(rdb, aux_save_headers_rio.io.buffer.ptr, sdslen(aux_save_headers_rio.io.buffer.ptr));
+ if (retval == -1) goto error;
+ io.bytes += retval;
+ sdsfree(aux_save_headers_rio.io.buffer.ptr);
+ mt->aux_save(&io,when);
+ }
+ retval = rdbSaveLen(rdb,RDB_MODULE_OPCODE_EOF);
+ serverAssert(!io.pre_flush_buffer);
+ if (retval == -1)
+ io.error = 1;
+ else
+ io.bytes += retval;
+
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ if (io.error)
+ return -1;
+ return io.bytes;
+error:
+ sdsfree(aux_save_headers_rio.io.buffer.ptr);
+ return -1;
+}
+
+ssize_t rdbSaveFunctions(rio *rdb) {
+ dict *functions = functionsLibGet();
+ dictIterator *iter = dictGetIterator(functions);
+ dictEntry *entry = NULL;
+ ssize_t written = 0;
+ ssize_t ret;
+ while ((entry = dictNext(iter))) {
+ if ((ret = rdbSaveType(rdb, RDB_OPCODE_FUNCTION2)) < 0) goto werr;
+ written += ret;
+ functionLibInfo *li = dictGetVal(entry);
+ if ((ret = rdbSaveRawString(rdb, (unsigned char *) li->code, sdslen(li->code))) < 0) goto werr;
+ written += ret;
+ }
+ dictReleaseIterator(iter);
+ return written;
+
+werr:
+ dictReleaseIterator(iter);
+ return -1;
+}
+
+ssize_t rdbSaveDb(rio *rdb, int dbid, int rdbflags, long *key_counter) {
+ dictIterator *di;
+ dictEntry *de;
+ ssize_t written = 0;
+ ssize_t res;
+ static long long info_updated_time = 0;
+ char *pname = (rdbflags & RDBFLAGS_AOF_PREAMBLE) ? "AOF rewrite" : "RDB";
+
+ redisDb *db = server.db + dbid;
+ dict *d = db->dict;
+ if (dictSize(d) == 0) return 0;
+ di = dictGetSafeIterator(d);
+
+ /* Write the SELECT DB opcode */
+ if ((res = rdbSaveType(rdb,RDB_OPCODE_SELECTDB)) < 0) goto werr;
+ written += res;
+ if ((res = rdbSaveLen(rdb, dbid)) < 0) goto werr;
+ written += res;
+
+ /* Write the RESIZE DB opcode. */
+ uint64_t db_size, expires_size;
+ db_size = dictSize(db->dict);
+ expires_size = dictSize(db->expires);
+ if ((res = rdbSaveType(rdb,RDB_OPCODE_RESIZEDB)) < 0) goto werr;
+ written += res;
+ if ((res = rdbSaveLen(rdb,db_size)) < 0) goto werr;
+ written += res;
+ if ((res = rdbSaveLen(rdb,expires_size)) < 0) goto werr;
+ written += res;
+
+ /* Iterate this DB writing every entry */
+ while((de = dictNext(di)) != NULL) {
+ sds keystr = dictGetKey(de);
+ robj key, *o = dictGetVal(de);
+ long long expire;
+ size_t rdb_bytes_before_key = rdb->processed_bytes;
+
+ initStaticStringObject(key,keystr);
+ expire = getExpire(db,&key);
+ if ((res = rdbSaveKeyValuePair(rdb, &key, o, expire, dbid)) < 0) goto werr;
+ written += res;
+
+ /* In fork child process, we can try to release memory back to the
+ * OS and possibly avoid or decrease COW. We give the dismiss
+ * mechanism a hint about an estimated size of the object we stored. */
+ size_t dump_size = rdb->processed_bytes - rdb_bytes_before_key;
+ if (server.in_fork_child) dismissObject(o, dump_size);
+
+ /* Update child info every 1 second (approximately).
+ * in order to avoid calling mstime() on each iteration, we will
+ * check the diff every 1024 keys */
+ if (((*key_counter)++ & 1023) == 0) {
+ long long now = mstime();
+ if (now - info_updated_time >= 1000) {
+ sendChildInfo(CHILD_INFO_TYPE_CURRENT_INFO, *key_counter, pname);
+ info_updated_time = now;
+ }
+ }
+ }
+
+ dictReleaseIterator(di);
+ return written;
+
+werr:
+ dictReleaseIterator(di);
+ return -1;
+}
+
+/* Produces a dump of the database in RDB format sending it to the specified
+ * Redis I/O channel. On success C_OK is returned, otherwise C_ERR
+ * is returned and part of the output, or all the output, can be
+ * missing because of I/O errors.
+ *
+ * When the function returns C_ERR and if 'error' is not NULL, the
+ * integer pointed by 'error' is set to the value of errno just after the I/O
+ * error. */
+int rdbSaveRio(int req, rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi) {
+ char magic[10];
+ uint64_t cksum;
+ long key_counter = 0;
+ int j;
+
+ if (server.rdb_checksum)
+ rdb->update_cksum = rioGenericUpdateChecksum;
+ snprintf(magic,sizeof(magic),"REDIS%04d",RDB_VERSION);
+ if (rdbWriteRaw(rdb,magic,9) == -1) goto werr;
+ if (rdbSaveInfoAuxFields(rdb,rdbflags,rsi) == -1) goto werr;
+ if (!(req & SLAVE_REQ_RDB_EXCLUDE_DATA) && rdbSaveModulesAux(rdb, REDISMODULE_AUX_BEFORE_RDB) == -1) goto werr;
+
+ /* save functions */
+ if (!(req & SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS) && rdbSaveFunctions(rdb) == -1) goto werr;
+
+ /* save all databases, skip this if we're in functions-only mode */
+ if (!(req & SLAVE_REQ_RDB_EXCLUDE_DATA)) {
+ for (j = 0; j < server.dbnum; j++) {
+ if (rdbSaveDb(rdb, j, rdbflags, &key_counter) == -1) goto werr;
+ }
+ }
+
+ if (!(req & SLAVE_REQ_RDB_EXCLUDE_DATA) && rdbSaveModulesAux(rdb, REDISMODULE_AUX_AFTER_RDB) == -1) goto werr;
+
+ /* EOF opcode */
+ if (rdbSaveType(rdb,RDB_OPCODE_EOF) == -1) goto werr;
+
+ /* CRC64 checksum. It will be zero if checksum computation is disabled, the
+ * loading code skips the check in this case. */
+ cksum = rdb->cksum;
+ memrev64ifbe(&cksum);
+ if (rioWrite(rdb,&cksum,8) == 0) goto werr;
+ return C_OK;
+
+werr:
+ if (error) *error = errno;
+ return C_ERR;
+}
+
+/* This is just a wrapper to rdbSaveRio() that additionally adds a prefix
+ * and a suffix to the generated RDB dump. The prefix is:
+ *
+ * $EOF:<40 bytes unguessable hex string>\r\n
+ *
+ * While the suffix is the 40 bytes hex string we announced in the prefix.
+ * This way processes receiving the payload can understand when it ends
+ * without doing any processing of the content. */
+int rdbSaveRioWithEOFMark(int req, rio *rdb, int *error, rdbSaveInfo *rsi) {
+ char eofmark[RDB_EOF_MARK_SIZE];
+
+ startSaving(RDBFLAGS_REPLICATION);
+ getRandomHexChars(eofmark,RDB_EOF_MARK_SIZE);
+ if (error) *error = 0;
+ if (rioWrite(rdb,"$EOF:",5) == 0) goto werr;
+ if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr;
+ if (rioWrite(rdb,"\r\n",2) == 0) goto werr;
+ if (rdbSaveRio(req,rdb,error,RDBFLAGS_NONE,rsi) == C_ERR) goto werr;
+ if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr;
+ stopSaving(1);
+ return C_OK;
+
+werr: /* Write error. */
+ /* Set 'error' only if not already set by rdbSaveRio() call. */
+ if (error && *error == 0) *error = errno;
+ stopSaving(0);
+ return C_ERR;
+}
+
+static int rdbSaveInternal(int req, const char *filename, rdbSaveInfo *rsi, int rdbflags) {
+ char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */
+ rio rdb;
+ int error = 0;
+ int saved_errno;
+ char *err_op; /* For a detailed log */
+
+ FILE *fp = fopen(filename,"w");
+ if (!fp) {
+ saved_errno = errno;
+ char *str_err = strerror(errno);
+ char *cwdp = getcwd(cwd,MAXPATHLEN);
+ serverLog(LL_WARNING,
+ "Failed opening the temp RDB file %s (in server root dir %s) "
+ "for saving: %s",
+ filename,
+ cwdp ? cwdp : "unknown",
+ str_err);
+ errno = saved_errno;
+ return C_ERR;
+ }
+
+ rioInitWithFile(&rdb,fp);
+
+ if (server.rdb_save_incremental_fsync) {
+ rioSetAutoSync(&rdb,REDIS_AUTOSYNC_BYTES);
+ if (!(rdbflags & RDBFLAGS_KEEP_CACHE)) rioSetReclaimCache(&rdb,1);
+ }
+
+ if (rdbSaveRio(req,&rdb,&error,rdbflags,rsi) == C_ERR) {
+ errno = error;
+ err_op = "rdbSaveRio";
+ goto werr;
+ }
+
+ /* Make sure data will not remain on the OS's output buffers */
+ if (fflush(fp)) { err_op = "fflush"; goto werr; }
+ if (fsync(fileno(fp))) { err_op = "fsync"; goto werr; }
+ if (!(rdbflags & RDBFLAGS_KEEP_CACHE) && reclaimFilePageCache(fileno(fp), 0, 0) == -1) {
+ serverLog(LL_NOTICE,"Unable to reclaim cache after saving RDB: %s", strerror(errno));
+ }
+ if (fclose(fp)) { fp = NULL; err_op = "fclose"; goto werr; }
+
+ return C_OK;
+
+werr:
+ saved_errno = errno;
+ serverLog(LL_WARNING,"Write error while saving DB to the disk(%s): %s", err_op, strerror(errno));
+ if (fp) fclose(fp);
+ unlink(filename);
+ errno = saved_errno;
+ return C_ERR;
+}
+
+/* Save DB to the file. Similar to rdbSave() but this function won't use a
+ * temporary file and won't update the metrics. */
+int rdbSaveToFile(const char *filename) {
+ startSaving(RDBFLAGS_NONE);
+
+ if (rdbSaveInternal(SLAVE_REQ_NONE,filename,NULL,RDBFLAGS_NONE) != C_OK) {
+ int saved_errno = errno;
+ stopSaving(0);
+ errno = saved_errno;
+ return C_ERR;
+ }
+
+ stopSaving(1);
+ return C_OK;
+}
+
+/* Save the DB on disk. Return C_ERR on error, C_OK on success. */
+int rdbSave(int req, char *filename, rdbSaveInfo *rsi, int rdbflags) {
+ char tmpfile[256];
+ char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */
+
+ startSaving(RDBFLAGS_NONE);
+ snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
+
+ if (rdbSaveInternal(req,tmpfile,rsi,rdbflags) != C_OK) {
+ stopSaving(0);
+ return C_ERR;
+ }
+
+ /* Use RENAME to make sure the DB file is changed atomically only
+ * if the generate DB file is ok. */
+ if (rename(tmpfile,filename) == -1) {
+ char *str_err = strerror(errno);
+ char *cwdp = getcwd(cwd,MAXPATHLEN);
+ serverLog(LL_WARNING,
+ "Error moving temp DB file %s on the final "
+ "destination %s (in server root dir %s): %s",
+ tmpfile,
+ filename,
+ cwdp ? cwdp : "unknown",
+ str_err);
+ unlink(tmpfile);
+ stopSaving(0);
+ return C_ERR;
+ }
+ if (fsyncFileDir(filename) != 0) {
+ serverLog(LL_WARNING,
+ "Failed to fsync directory while saving DB: %s", strerror(errno));
+ stopSaving(0);
+ return C_ERR;
+ }
+
+ serverLog(LL_NOTICE,"DB saved on disk");
+ server.dirty = 0;
+ server.lastsave = time(NULL);
+ server.lastbgsave_status = C_OK;
+ stopSaving(1);
+ return C_OK;
+}
+
+int rdbSaveBackground(int req, char *filename, rdbSaveInfo *rsi, int rdbflags) {
+ pid_t childpid;
+
+ if (hasActiveChildProcess()) return C_ERR;
+ server.stat_rdb_saves++;
+
+ server.dirty_before_bgsave = server.dirty;
+ server.lastbgsave_try = time(NULL);
+
+ if ((childpid = redisFork(CHILD_TYPE_RDB)) == 0) {
+ int retval;
+
+ /* Child */
+ redisSetProcTitle("redis-rdb-bgsave");
+ redisSetCpuAffinity(server.bgsave_cpulist);
+ retval = rdbSave(req, filename,rsi,rdbflags);
+ if (retval == C_OK) {
+ sendChildCowInfo(CHILD_INFO_TYPE_RDB_COW_SIZE, "RDB");
+ }
+ exitFromChild((retval == C_OK) ? 0 : 1);
+ } else {
+ /* Parent */
+ if (childpid == -1) {
+ server.lastbgsave_status = C_ERR;
+ serverLog(LL_WARNING,"Can't save in background: fork: %s",
+ strerror(errno));
+ return C_ERR;
+ }
+ serverLog(LL_NOTICE,"Background saving started by pid %ld",(long) childpid);
+ server.rdb_save_time_start = time(NULL);
+ server.rdb_child_type = RDB_CHILD_TYPE_DISK;
+ return C_OK;
+ }
+ return C_OK; /* unreached */
+}
+
+/* Note that we may call this function in signal handle 'sigShutdownHandler',
+ * so we need guarantee all functions we call are async-signal-safe.
+ * If we call this function from signal handle, we won't call bg_unlink that
+ * is not async-signal-safe. */
+void rdbRemoveTempFile(pid_t childpid, int from_signal) {
+ char tmpfile[256];
+ char pid[32];
+
+ /* Generate temp rdb file name using async-signal safe functions. */
+ ll2string(pid, sizeof(pid), childpid);
+ redis_strlcpy(tmpfile, "temp-", sizeof(tmpfile));
+ redis_strlcat(tmpfile, pid, sizeof(tmpfile));
+ redis_strlcat(tmpfile, ".rdb", sizeof(tmpfile));
+
+ if (from_signal) {
+ /* bg_unlink is not async-signal-safe, but in this case we don't really
+ * need to close the fd, it'll be released when the process exists. */
+ int fd = open(tmpfile, O_RDONLY|O_NONBLOCK);
+ UNUSED(fd);
+ unlink(tmpfile);
+ } else {
+ bg_unlink(tmpfile);
+ }
+}
+
+/* This function is called by rdbLoadObject() when the code is in RDB-check
+ * mode and we find a module value of type 2 that can be parsed without
+ * the need of the actual module. The value is parsed for errors, finally
+ * a dummy redis object is returned just to conform to the API. */
+robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) {
+ uint64_t opcode;
+ while((opcode = rdbLoadLen(rdb,NULL)) != RDB_MODULE_OPCODE_EOF) {
+ if (opcode == RDB_MODULE_OPCODE_SINT ||
+ opcode == RDB_MODULE_OPCODE_UINT)
+ {
+ uint64_t len;
+ if (rdbLoadLenByRef(rdb,NULL,&len) == -1) {
+ rdbReportCorruptRDB(
+ "Error reading integer from module %s value", modulename);
+ }
+ } else if (opcode == RDB_MODULE_OPCODE_STRING) {
+ robj *o = rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL);
+ if (o == NULL) {
+ rdbReportCorruptRDB(
+ "Error reading string from module %s value", modulename);
+ }
+ decrRefCount(o);
+ } else if (opcode == RDB_MODULE_OPCODE_FLOAT) {
+ float val;
+ if (rdbLoadBinaryFloatValue(rdb,&val) == -1) {
+ rdbReportCorruptRDB(
+ "Error reading float from module %s value", modulename);
+ }
+ } else if (opcode == RDB_MODULE_OPCODE_DOUBLE) {
+ double val;
+ if (rdbLoadBinaryDoubleValue(rdb,&val) == -1) {
+ rdbReportCorruptRDB(
+ "Error reading double from module %s value", modulename);
+ }
+ }
+ }
+ return createStringObject("module-dummy-value",18);
+}
+
+/* callback for hashZiplistConvertAndValidateIntegrity.
+ * Check that the ziplist doesn't have duplicate hash field names.
+ * The ziplist element pointed by 'p' will be converted and stored into listpack. */
+static int _ziplistPairsEntryConvertAndValidate(unsigned char *p, unsigned int head_count, void *userdata) {
+ unsigned char *str;
+ unsigned int slen;
+ long long vll;
+
+ struct {
+ long count;
+ dict *fields;
+ unsigned char **lp;
+ } *data = userdata;
+
+ if (data->fields == NULL) {
+ data->fields = dictCreate(&hashDictType);
+ dictExpand(data->fields, head_count/2);
+ }
+
+ if (!ziplistGet(p, &str, &slen, &vll))
+ return 0;
+
+ /* Even records are field names, add to dict and check that's not a dup */
+ if (((data->count) & 1) == 0) {
+ sds field = str? sdsnewlen(str, slen): sdsfromlonglong(vll);
+ if (dictAdd(data->fields, field, NULL) != DICT_OK) {
+ /* Duplicate, return an error */
+ sdsfree(field);
+ return 0;
+ }
+ }
+
+ if (str) {
+ *(data->lp) = lpAppend(*(data->lp), (unsigned char*)str, slen);
+ } else {
+ *(data->lp) = lpAppendInteger(*(data->lp), vll);
+ }
+
+ (data->count)++;
+ return 1;
+}
+
+/* Validate the integrity of the data structure while converting it to
+ * listpack and storing it at 'lp'.
+ * The function is safe to call on non-validated ziplists, it returns 0
+ * when encounter an integrity validation issue. */
+int ziplistPairsConvertAndValidateIntegrity(unsigned char *zl, size_t size, unsigned char **lp) {
+ /* Keep track of the field names to locate duplicate ones */
+ struct {
+ long count;
+ dict *fields; /* Initialisation at the first callback. */
+ unsigned char **lp;
+ } data = {0, NULL, lp};
+
+ int ret = ziplistValidateIntegrity(zl, size, 1, _ziplistPairsEntryConvertAndValidate, &data);
+
+ /* make sure we have an even number of records. */
+ if (data.count & 1)
+ ret = 0;
+
+ if (data.fields) dictRelease(data.fields);
+ return ret;
+}
+
+/* callback for ziplistValidateIntegrity.
+ * The ziplist element pointed by 'p' will be converted and stored into listpack. */
+static int _ziplistEntryConvertAndValidate(unsigned char *p, unsigned int head_count, void *userdata) {
+ UNUSED(head_count);
+ unsigned char *str;
+ unsigned int slen;
+ long long vll;
+ unsigned char **lp = (unsigned char**)userdata;
+
+ if (!ziplistGet(p, &str, &slen, &vll)) return 0;
+
+ if (str)
+ *lp = lpAppend(*lp, (unsigned char*)str, slen);
+ else
+ *lp = lpAppendInteger(*lp, vll);
+
+ return 1;
+}
+
+/* callback for ziplistValidateIntegrity.
+ * The ziplist element pointed by 'p' will be converted and stored into quicklist. */
+static int _listZiplistEntryConvertAndValidate(unsigned char *p, unsigned int head_count, void *userdata) {
+ UNUSED(head_count);
+ unsigned char *str;
+ unsigned int slen;
+ long long vll;
+ char longstr[32] = {0};
+ quicklist *ql = (quicklist*)userdata;
+
+ if (!ziplistGet(p, &str, &slen, &vll)) return 0;
+ if (!str) {
+ /* Write the longval as a string so we can re-add it */
+ slen = ll2string(longstr, sizeof(longstr), vll);
+ str = (unsigned char *)longstr;
+ }
+ quicklistPushTail(ql, str, slen);
+ return 1;
+}
+
+/* callback for to check the listpack doesn't have duplicate records */
+static int _lpEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) {
+ struct {
+ int pairs;
+ long count;
+ dict *fields;
+ } *data = userdata;
+
+ if (data->fields == NULL) {
+ data->fields = dictCreate(&hashDictType);
+ dictExpand(data->fields, data->pairs ? head_count/2 : head_count);
+ }
+
+ /* If we're checking pairs, then even records are field names. Otherwise
+ * we're checking all elements. Add to dict and check that's not a dup */
+ if (!data->pairs || ((data->count) & 1) == 0) {
+ unsigned char *str;
+ int64_t slen;
+ unsigned char buf[LP_INTBUF_SIZE];
+
+ str = lpGet(p, &slen, buf);
+ sds field = sdsnewlen(str, slen);
+ if (dictAdd(data->fields, field, NULL) != DICT_OK) {
+ /* Duplicate, return an error */
+ sdsfree(field);
+ return 0;
+ }
+ }
+
+ (data->count)++;
+ return 1;
+}
+
+/* Validate the integrity of the listpack structure.
+ * when `deep` is 0, only the integrity of the header is validated.
+ * when `deep` is 1, we scan all the entries one by one.
+ * when `pairs` is 0, all elements need to be unique (it's a set)
+ * when `pairs` is 1, odd elements need to be unique (it's a key-value map) */
+int lpValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep, int pairs) {
+ if (!deep)
+ return lpValidateIntegrity(lp, size, 0, NULL, NULL);
+
+ /* Keep track of the field names to locate duplicate ones */
+ struct {
+ int pairs;
+ long count;
+ dict *fields; /* Initialisation at the first callback. */
+ } data = {pairs, 0, NULL};
+
+ int ret = lpValidateIntegrity(lp, size, 1, _lpEntryValidation, &data);
+
+ /* make sure we have an even number of records. */
+ if (pairs && data.count & 1)
+ ret = 0;
+
+ if (data.fields) dictRelease(data.fields);
+ return ret;
+}
+
+/* Load a Redis object of the specified type from the specified file.
+ * On success a newly allocated object is returned, otherwise NULL.
+ * When the function returns NULL and if 'error' is not NULL, the
+ * integer pointed by 'error' is set to the type of error that occurred */
+robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
+ robj *o = NULL, *ele, *dec;
+ uint64_t len;
+ unsigned int i;
+
+ /* Set default error of load object, it will be set to 0 on success. */
+ if (error) *error = RDB_LOAD_ERR_OTHER;
+
+ int deep_integrity_validation = server.sanitize_dump_payload == SANITIZE_DUMP_YES;
+ if (server.sanitize_dump_payload == SANITIZE_DUMP_CLIENTS) {
+ /* Skip sanitization when loading (an RDB), or getting a RESTORE command
+ * from either the master or a client using an ACL user with the skip-sanitize-payload flag. */
+ int skip = server.loading ||
+ (server.current_client && (server.current_client->flags & CLIENT_MASTER));
+ if (!skip && server.current_client && server.current_client->user)
+ skip = !!(server.current_client->user->flags & USER_FLAG_SANITIZE_PAYLOAD_SKIP);
+ deep_integrity_validation = !skip;
+ }
+
+ if (rdbtype == RDB_TYPE_STRING) {
+ /* Read string value */
+ if ((o = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
+ o = tryObjectEncodingEx(o, 0);
+ } else if (rdbtype == RDB_TYPE_LIST) {
+ /* Read list value */
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if (len == 0) goto emptykey;
+
+ o = createQuicklistObject();
+ quicklistSetOptions(o->ptr, server.list_max_listpack_size,
+ server.list_compress_depth);
+
+ /* Load every single element of the list */
+ while(len--) {
+ if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) {
+ decrRefCount(o);
+ return NULL;
+ }
+ dec = getDecodedObject(ele);
+ size_t len = sdslen(dec->ptr);
+ quicklistPushTail(o->ptr, dec->ptr, len);
+ decrRefCount(dec);
+ decrRefCount(ele);
+ }
+
+ listTypeTryConversion(o,LIST_CONV_AUTO,NULL,NULL);
+ } else if (rdbtype == RDB_TYPE_SET) {
+ /* Read Set value */
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if (len == 0) goto emptykey;
+
+ /* Use a regular set when there are too many entries. */
+ size_t max_entries = server.set_max_intset_entries;
+ if (max_entries >= 1<<30) max_entries = 1<<30;
+ if (len > max_entries) {
+ o = createSetObject();
+ /* It's faster to expand the dict to the right size asap in order
+ * to avoid rehashing */
+ if (len > DICT_HT_INITIAL_SIZE && dictTryExpand(o->ptr,len) != DICT_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+ decrRefCount(o);
+ return NULL;
+ }
+ } else {
+ o = createIntsetObject();
+ }
+
+ /* Load every single element of the set */
+ size_t maxelelen = 0, sumelelen = 0;
+ for (i = 0; i < len; i++) {
+ long long llval;
+ sds sdsele;
+
+ if ((sdsele = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ decrRefCount(o);
+ return NULL;
+ }
+ size_t elelen = sdslen(sdsele);
+ sumelelen += elelen;
+ if (elelen > maxelelen) maxelelen = elelen;
+
+ if (o->encoding == OBJ_ENCODING_INTSET) {
+ /* Fetch integer value from element. */
+ if (isSdsRepresentableAsLongLong(sdsele,&llval) == C_OK) {
+ uint8_t success;
+ o->ptr = intsetAdd(o->ptr,llval,&success);
+ if (!success) {
+ rdbReportCorruptRDB("Duplicate set members detected");
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ } else if (setTypeSize(o) < server.set_max_listpack_entries &&
+ maxelelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(NULL, sumelelen))
+ {
+ /* We checked if it's safe to add one large element instead
+ * of many small ones. It's OK since lpSafeToAdd doesn't
+ * care about individual elements, only the total size. */
+ setTypeConvert(o, OBJ_ENCODING_LISTPACK);
+ } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HT, len, 0) != C_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+ sdsfree(sdsele);
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+
+ /* This will also be called when the set was just converted
+ * to a listpack encoded set. */
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ if (setTypeSize(o) < server.set_max_listpack_entries &&
+ elelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(o->ptr, elelen))
+ {
+ unsigned char *p = lpFirst(o->ptr);
+ if (p && lpFind(o->ptr, p, (unsigned char*)sdsele, elelen, 0)) {
+ rdbReportCorruptRDB("Duplicate set members detected");
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ o->ptr = lpAppend(o->ptr, (unsigned char *)sdsele, elelen);
+ } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HT, len, 0) != C_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu",
+ (unsigned long long)len);
+ sdsfree(sdsele);
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+
+ /* This will also be called when the set was just converted
+ * to a regular hash table encoded set. */
+ if (o->encoding == OBJ_ENCODING_HT) {
+ if (dictAdd((dict*)o->ptr,sdsele,NULL) != DICT_OK) {
+ rdbReportCorruptRDB("Duplicate set members detected");
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ } else {
+ sdsfree(sdsele);
+ }
+ }
+ } else if (rdbtype == RDB_TYPE_ZSET_2 || rdbtype == RDB_TYPE_ZSET) {
+ /* Read sorted set value. */
+ uint64_t zsetlen;
+ size_t maxelelen = 0, totelelen = 0;
+ zset *zs;
+
+ if ((zsetlen = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if (zsetlen == 0) goto emptykey;
+
+ o = createZsetObject();
+ zs = o->ptr;
+
+ if (zsetlen > DICT_HT_INITIAL_SIZE && dictTryExpand(zs->dict,zsetlen) != DICT_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)zsetlen);
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Load every single element of the sorted set. */
+ while(zsetlen--) {
+ sds sdsele;
+ double score;
+ zskiplistNode *znode;
+
+ if ((sdsele = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ decrRefCount(o);
+ return NULL;
+ }
+
+ if (rdbtype == RDB_TYPE_ZSET_2) {
+ if (rdbLoadBinaryDoubleValue(rdb,&score) == -1) {
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ } else {
+ if (rdbLoadDoubleValue(rdb,&score) == -1) {
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ }
+
+ if (isnan(score)) {
+ rdbReportCorruptRDB("Zset with NAN score detected");
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+
+ /* Don't care about integer-encoded strings. */
+ if (sdslen(sdsele) > maxelelen) maxelelen = sdslen(sdsele);
+ totelelen += sdslen(sdsele);
+
+ znode = zslInsert(zs->zsl,score,sdsele);
+ if (dictAdd(zs->dict,sdsele,&znode->score) != DICT_OK) {
+ rdbReportCorruptRDB("Duplicate zset fields detected");
+ decrRefCount(o);
+ /* no need to free 'sdsele', will be released by zslFree together with 'o' */
+ return NULL;
+ }
+ }
+
+ /* Convert *after* loading, since sorted sets are not stored ordered. */
+ if (zsetLength(o) <= server.zset_max_listpack_entries &&
+ maxelelen <= server.zset_max_listpack_value &&
+ lpSafeToAdd(NULL, totelelen))
+ {
+ zsetConvert(o,OBJ_ENCODING_LISTPACK);
+ }
+ } else if (rdbtype == RDB_TYPE_HASH) {
+ uint64_t len;
+ int ret;
+ sds field, value;
+ dict *dupSearchDict = NULL;
+
+ len = rdbLoadLen(rdb, NULL);
+ if (len == RDB_LENERR) return NULL;
+ if (len == 0) goto emptykey;
+
+ o = createHashObject();
+
+ /* Too many entries? Use a hash table right from the start. */
+ if (len > server.hash_max_listpack_entries)
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ else if (deep_integrity_validation) {
+ /* In this mode, we need to guarantee that the server won't crash
+ * later when the ziplist is converted to a dict.
+ * Create a set (dict with no values) to for a dup search.
+ * We can dismiss it as soon as we convert the ziplist to a hash. */
+ dupSearchDict = dictCreate(&hashDictType);
+ }
+
+
+ /* Load every field and value into the ziplist */
+ while (o->encoding == OBJ_ENCODING_LISTPACK && len > 0) {
+ len--;
+ /* Load raw strings */
+ if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ decrRefCount(o);
+ if (dupSearchDict) dictRelease(dupSearchDict);
+ return NULL;
+ }
+ if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ sdsfree(field);
+ decrRefCount(o);
+ if (dupSearchDict) dictRelease(dupSearchDict);
+ return NULL;
+ }
+
+ if (dupSearchDict) {
+ sds field_dup = sdsdup(field);
+ if (dictAdd(dupSearchDict, field_dup, NULL) != DICT_OK) {
+ rdbReportCorruptRDB("Hash with dup elements");
+ dictRelease(dupSearchDict);
+ decrRefCount(o);
+ sdsfree(field_dup);
+ sdsfree(field);
+ sdsfree(value);
+ return NULL;
+ }
+ }
+
+ /* Convert to hash table if size threshold is exceeded */
+ if (sdslen(field) > server.hash_max_listpack_value ||
+ sdslen(value) > server.hash_max_listpack_value ||
+ !lpSafeToAdd(o->ptr, sdslen(field)+sdslen(value)))
+ {
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ ret = dictAdd((dict*)o->ptr, field, value);
+ if (ret == DICT_ERR) {
+ rdbReportCorruptRDB("Duplicate hash fields detected");
+ if (dupSearchDict) dictRelease(dupSearchDict);
+ sdsfree(value);
+ sdsfree(field);
+ decrRefCount(o);
+ return NULL;
+ }
+ break;
+ }
+
+ /* Add pair to listpack */
+ o->ptr = lpAppend(o->ptr, (unsigned char*)field, sdslen(field));
+ o->ptr = lpAppend(o->ptr, (unsigned char*)value, sdslen(value));
+
+ sdsfree(field);
+ sdsfree(value);
+ }
+
+ if (dupSearchDict) {
+ /* We no longer need this, from now on the entries are added
+ * to a dict so the check is performed implicitly. */
+ dictRelease(dupSearchDict);
+ dupSearchDict = NULL;
+ }
+
+ if (o->encoding == OBJ_ENCODING_HT && len > DICT_HT_INITIAL_SIZE) {
+ if (dictTryExpand(o->ptr,len) != DICT_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+
+ /* Load remaining fields and values into the hash table */
+ while (o->encoding == OBJ_ENCODING_HT && len > 0) {
+ len--;
+ /* Load encoded strings */
+ if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ decrRefCount(o);
+ return NULL;
+ }
+ if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ sdsfree(field);
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Add pair to hash table */
+ ret = dictAdd((dict*)o->ptr, field, value);
+ if (ret == DICT_ERR) {
+ rdbReportCorruptRDB("Duplicate hash fields detected");
+ sdsfree(value);
+ sdsfree(field);
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+
+ /* All pairs should be read by now */
+ serverAssert(len == 0);
+ } else if (rdbtype == RDB_TYPE_LIST_QUICKLIST || rdbtype == RDB_TYPE_LIST_QUICKLIST_2) {
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if (len == 0) goto emptykey;
+
+ o = createQuicklistObject();
+ quicklistSetOptions(o->ptr, server.list_max_listpack_size,
+ server.list_compress_depth);
+ uint64_t container = QUICKLIST_NODE_CONTAINER_PACKED;
+ while (len--) {
+ unsigned char *lp;
+ size_t encoded_len;
+
+ if (rdbtype == RDB_TYPE_LIST_QUICKLIST_2) {
+ if ((container = rdbLoadLen(rdb,NULL)) == RDB_LENERR) {
+ decrRefCount(o);
+ return NULL;
+ }
+
+ if (container != QUICKLIST_NODE_CONTAINER_PACKED && container != QUICKLIST_NODE_CONTAINER_PLAIN) {
+ rdbReportCorruptRDB("Quicklist integrity check failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+
+ unsigned char *data =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,&encoded_len);
+ if (data == NULL || (encoded_len == 0)) {
+ zfree(data);
+ decrRefCount(o);
+ return NULL;
+ }
+
+ if (container == QUICKLIST_NODE_CONTAINER_PLAIN) {
+ quicklistAppendPlainNode(o->ptr, data, encoded_len);
+ continue;
+ }
+
+ if (rdbtype == RDB_TYPE_LIST_QUICKLIST_2) {
+ lp = data;
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!lpValidateIntegrity(lp, encoded_len, deep_integrity_validation, NULL, NULL)) {
+ rdbReportCorruptRDB("Listpack integrity check failed.");
+ decrRefCount(o);
+ zfree(lp);
+ return NULL;
+ }
+ } else {
+ lp = lpNew(encoded_len);
+ if (!ziplistValidateIntegrity(data, encoded_len, 1,
+ _ziplistEntryConvertAndValidate, &lp))
+ {
+ rdbReportCorruptRDB("Ziplist integrity check failed.");
+ decrRefCount(o);
+ zfree(data);
+ zfree(lp);
+ return NULL;
+ }
+ zfree(data);
+ lp = lpShrinkToFit(lp);
+ }
+
+ /* Silently skip empty ziplists, if we'll end up with empty quicklist we'll fail later. */
+ if (lpLength(lp) == 0) {
+ zfree(lp);
+ continue;
+ } else {
+ quicklistAppendListpack(o->ptr, lp);
+ }
+ }
+
+ if (quicklistCount(o->ptr) == 0) {
+ decrRefCount(o);
+ goto emptykey;
+ }
+
+ listTypeTryConversion(o,LIST_CONV_AUTO,NULL,NULL);
+ } else if (rdbtype == RDB_TYPE_HASH_ZIPMAP ||
+ rdbtype == RDB_TYPE_LIST_ZIPLIST ||
+ rdbtype == RDB_TYPE_SET_INTSET ||
+ rdbtype == RDB_TYPE_SET_LISTPACK ||
+ rdbtype == RDB_TYPE_ZSET_ZIPLIST ||
+ rdbtype == RDB_TYPE_ZSET_LISTPACK ||
+ rdbtype == RDB_TYPE_HASH_ZIPLIST ||
+ rdbtype == RDB_TYPE_HASH_LISTPACK)
+ {
+ size_t encoded_len;
+ unsigned char *encoded =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,&encoded_len);
+ if (encoded == NULL) return NULL;
+
+ o = createObject(OBJ_STRING,encoded); /* Obj type fixed below. */
+
+ /* Fix the object encoding, and make sure to convert the encoded
+ * data type into the base type if accordingly to the current
+ * configuration there are too many elements in the encoded data
+ * type. Note that we only check the length and not max element
+ * size as this is an O(N) scan. Eventually everything will get
+ * converted. */
+ switch(rdbtype) {
+ case RDB_TYPE_HASH_ZIPMAP:
+ /* Since we don't keep zipmaps anymore, the rdb loading for these
+ * is O(n) anyway, use `deep` validation. */
+ if (!zipmapValidateIntegrity(encoded, encoded_len, 1)) {
+ rdbReportCorruptRDB("Zipmap integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ /* Convert to ziplist encoded hash. This must be deprecated
+ * when loading dumps created by Redis 2.4 gets deprecated. */
+ {
+ unsigned char *lp = lpNew(0);
+ unsigned char *zi = zipmapRewind(o->ptr);
+ unsigned char *fstr, *vstr;
+ unsigned int flen, vlen;
+ unsigned int maxlen = 0;
+ dict *dupSearchDict = dictCreate(&hashDictType);
+
+ while ((zi = zipmapNext(zi, &fstr, &flen, &vstr, &vlen)) != NULL) {
+ if (flen > maxlen) maxlen = flen;
+ if (vlen > maxlen) maxlen = vlen;
+
+ /* search for duplicate records */
+ sds field = sdstrynewlen(fstr, flen);
+ if (!field || dictAdd(dupSearchDict, field, NULL) != DICT_OK ||
+ !lpSafeToAdd(lp, (size_t)flen + vlen)) {
+ rdbReportCorruptRDB("Hash zipmap with dup elements, or big length (%u)", flen);
+ dictRelease(dupSearchDict);
+ sdsfree(field);
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+
+ lp = lpAppend(lp, fstr, flen);
+ lp = lpAppend(lp, vstr, vlen);
+ }
+
+ dictRelease(dupSearchDict);
+ zfree(o->ptr);
+ o->ptr = lp;
+ o->type = OBJ_HASH;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+
+ if (hashTypeLength(o) > server.hash_max_listpack_entries ||
+ maxlen > server.hash_max_listpack_value)
+ {
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ }
+ }
+ break;
+ case RDB_TYPE_LIST_ZIPLIST:
+ {
+ quicklist *ql = quicklistNew(server.list_max_listpack_size,
+ server.list_compress_depth);
+
+ if (!ziplistValidateIntegrity(encoded, encoded_len, 1,
+ _listZiplistEntryConvertAndValidate, ql))
+ {
+ rdbReportCorruptRDB("List ziplist integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ quicklistRelease(ql);
+ return NULL;
+ }
+
+ if (ql->len == 0) {
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ quicklistRelease(ql);
+ goto emptykey;
+ }
+
+ zfree(encoded);
+ o->type = OBJ_LIST;
+ o->ptr = ql;
+ o->encoding = OBJ_ENCODING_QUICKLIST;
+ break;
+ }
+ case RDB_TYPE_SET_INTSET:
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!intsetValidateIntegrity(encoded, encoded_len, deep_integrity_validation)) {
+ rdbReportCorruptRDB("Intset integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ o->type = OBJ_SET;
+ o->encoding = OBJ_ENCODING_INTSET;
+ if (intsetLen(o->ptr) > server.set_max_intset_entries)
+ setTypeConvert(o,OBJ_ENCODING_HT);
+ break;
+ case RDB_TYPE_SET_LISTPACK:
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 0)) {
+ rdbReportCorruptRDB("Set listpack integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ o->type = OBJ_SET;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+
+ if (setTypeSize(o) == 0) {
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ goto emptykey;
+ }
+ if (setTypeSize(o) > server.set_max_listpack_entries)
+ setTypeConvert(o, OBJ_ENCODING_HT);
+ break;
+ case RDB_TYPE_ZSET_ZIPLIST:
+ {
+ unsigned char *lp = lpNew(encoded_len);
+ if (!ziplistPairsConvertAndValidateIntegrity(encoded, encoded_len, &lp)) {
+ rdbReportCorruptRDB("Zset ziplist integrity check failed.");
+ zfree(lp);
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+
+ zfree(o->ptr);
+ o->type = OBJ_ZSET;
+ o->ptr = lp;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ if (zsetLength(o) == 0) {
+ decrRefCount(o);
+ goto emptykey;
+ }
+
+ if (zsetLength(o) > server.zset_max_listpack_entries)
+ zsetConvert(o,OBJ_ENCODING_SKIPLIST);
+ else
+ o->ptr = lpShrinkToFit(o->ptr);
+ break;
+ }
+ case RDB_TYPE_ZSET_LISTPACK:
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
+ rdbReportCorruptRDB("Zset listpack integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ o->type = OBJ_ZSET;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ if (zsetLength(o) == 0) {
+ decrRefCount(o);
+ goto emptykey;
+ }
+
+ if (zsetLength(o) > server.zset_max_listpack_entries)
+ zsetConvert(o,OBJ_ENCODING_SKIPLIST);
+ break;
+ case RDB_TYPE_HASH_ZIPLIST:
+ {
+ unsigned char *lp = lpNew(encoded_len);
+ if (!ziplistPairsConvertAndValidateIntegrity(encoded, encoded_len, &lp)) {
+ rdbReportCorruptRDB("Hash ziplist integrity check failed.");
+ zfree(lp);
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+
+ zfree(o->ptr);
+ o->ptr = lp;
+ o->type = OBJ_HASH;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ if (hashTypeLength(o) == 0) {
+ decrRefCount(o);
+ goto emptykey;
+ }
+
+ if (hashTypeLength(o) > server.hash_max_listpack_entries)
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ else
+ o->ptr = lpShrinkToFit(o->ptr);
+ break;
+ }
+ case RDB_TYPE_HASH_LISTPACK:
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
+ rdbReportCorruptRDB("Hash listpack integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ o->type = OBJ_HASH;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ if (hashTypeLength(o) == 0) {
+ decrRefCount(o);
+ goto emptykey;
+ }
+
+ if (hashTypeLength(o) > server.hash_max_listpack_entries)
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ break;
+ default:
+ /* totally unreachable */
+ rdbReportCorruptRDB("Unknown RDB encoding type %d",rdbtype);
+ break;
+ }
+ } else if (rdbtype == RDB_TYPE_STREAM_LISTPACKS ||
+ rdbtype == RDB_TYPE_STREAM_LISTPACKS_2 ||
+ rdbtype == RDB_TYPE_STREAM_LISTPACKS_3)
+ {
+ o = createStreamObject();
+ stream *s = o->ptr;
+ uint64_t listpacks = rdbLoadLen(rdb,NULL);
+ if (listpacks == RDB_LENERR) {
+ rdbReportReadError("Stream listpacks len loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ while(listpacks--) {
+ /* Get the master ID, the one we'll use as key of the radix tree
+ * node: the entries inside the listpack itself are delta-encoded
+ * relatively to this ID. */
+ sds nodekey = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL);
+ if (nodekey == NULL) {
+ rdbReportReadError("Stream master ID loading failed: invalid encoding or I/O error.");
+ decrRefCount(o);
+ return NULL;
+ }
+ if (sdslen(nodekey) != sizeof(streamID)) {
+ rdbReportCorruptRDB("Stream node key entry is not the "
+ "size of a stream ID");
+ sdsfree(nodekey);
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Load the listpack. */
+ size_t lp_size;
+ unsigned char *lp =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,&lp_size);
+ if (lp == NULL) {
+ rdbReportReadError("Stream listpacks loading failed.");
+ sdsfree(nodekey);
+ decrRefCount(o);
+ return NULL;
+ }
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!streamValidateListpackIntegrity(lp, lp_size, deep_integrity_validation)) {
+ rdbReportCorruptRDB("Stream listpack integrity check failed.");
+ sdsfree(nodekey);
+ decrRefCount(o);
+ zfree(lp);
+ return NULL;
+ }
+
+ unsigned char *first = lpFirst(lp);
+ if (first == NULL) {
+ /* Serialized listpacks should never be empty, since on
+ * deletion we should remove the radix tree key if the
+ * resulting listpack is empty. */
+ rdbReportCorruptRDB("Empty listpack inside stream");
+ sdsfree(nodekey);
+ decrRefCount(o);
+ zfree(lp);
+ return NULL;
+ }
+
+ /* Insert the key in the radix tree. */
+ int retval = raxTryInsert(s->rax,
+ (unsigned char*)nodekey,sizeof(streamID),lp,NULL);
+ sdsfree(nodekey);
+ if (!retval) {
+ rdbReportCorruptRDB("Listpack re-added with existing key");
+ decrRefCount(o);
+ zfree(lp);
+ return NULL;
+ }
+ }
+ /* Load total number of items inside the stream. */
+ s->length = rdbLoadLen(rdb,NULL);
+
+ /* Load the last entry ID. */
+ s->last_id.ms = rdbLoadLen(rdb,NULL);
+ s->last_id.seq = rdbLoadLen(rdb,NULL);
+
+ if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_2) {
+ /* Load the first entry ID. */
+ s->first_id.ms = rdbLoadLen(rdb,NULL);
+ s->first_id.seq = rdbLoadLen(rdb,NULL);
+
+ /* Load the maximal deleted entry ID. */
+ s->max_deleted_entry_id.ms = rdbLoadLen(rdb,NULL);
+ s->max_deleted_entry_id.seq = rdbLoadLen(rdb,NULL);
+
+ /* Load the offset. */
+ s->entries_added = rdbLoadLen(rdb,NULL);
+ } else {
+ /* During migration the offset can be initialized to the stream's
+ * length. At this point, we also don't care about tombstones
+ * because CG offsets will be later initialized as well. */
+ s->max_deleted_entry_id.ms = 0;
+ s->max_deleted_entry_id.seq = 0;
+ s->entries_added = s->length;
+
+ /* Since the rax is already loaded, we can find the first entry's
+ * ID. */
+ streamGetEdgeID(s,1,1,&s->first_id);
+ }
+
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream object metadata loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ if (s->length && !raxSize(s->rax)) {
+ rdbReportCorruptRDB("Stream length inconsistent with rax entries");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Consumer groups loading */
+ uint64_t cgroups_count = rdbLoadLen(rdb,NULL);
+ if (cgroups_count == RDB_LENERR) {
+ rdbReportReadError("Stream cgroup count loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ while(cgroups_count--) {
+ /* Get the consumer group name and ID. We can then create the
+ * consumer group ASAP and populate its structure as
+ * we read more data. */
+ streamID cg_id;
+ sds cgname = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL);
+ if (cgname == NULL) {
+ rdbReportReadError(
+ "Error reading the consumer group name from Stream");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ cg_id.ms = rdbLoadLen(rdb,NULL);
+ cg_id.seq = rdbLoadLen(rdb,NULL);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream cgroup ID loading failed.");
+ sdsfree(cgname);
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Load group offset. */
+ uint64_t cg_offset;
+ if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_2) {
+ cg_offset = rdbLoadLen(rdb,NULL);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream cgroup offset loading failed.");
+ sdsfree(cgname);
+ decrRefCount(o);
+ return NULL;
+ }
+ } else {
+ cg_offset = streamEstimateDistanceFromFirstEverEntry(s,&cg_id);
+ }
+
+ streamCG *cgroup = streamCreateCG(s,cgname,sdslen(cgname),&cg_id,cg_offset);
+ if (cgroup == NULL) {
+ rdbReportCorruptRDB("Duplicated consumer group name %s",
+ cgname);
+ decrRefCount(o);
+ sdsfree(cgname);
+ return NULL;
+ }
+ sdsfree(cgname);
+
+ /* Load the global PEL for this consumer group, however we'll
+ * not yet populate the NACK structures with the message
+ * owner, since consumers for this group and their messages will
+ * be read as a next step. So for now leave them not resolved
+ * and later populate it. */
+ uint64_t pel_size = rdbLoadLen(rdb,NULL);
+ if (pel_size == RDB_LENERR) {
+ rdbReportReadError("Stream PEL size loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ while(pel_size--) {
+ unsigned char rawid[sizeof(streamID)];
+ if (rioRead(rdb,rawid,sizeof(rawid)) == 0) {
+ rdbReportReadError("Stream PEL ID loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ streamNACK *nack = streamCreateNACK(NULL);
+ nack->delivery_time = rdbLoadMillisecondTime(rdb,RDB_VERSION);
+ nack->delivery_count = rdbLoadLen(rdb,NULL);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream PEL NACK loading failed.");
+ decrRefCount(o);
+ streamFreeNACK(nack);
+ return NULL;
+ }
+ if (!raxTryInsert(cgroup->pel,rawid,sizeof(rawid),nack,NULL)) {
+ rdbReportCorruptRDB("Duplicated global PEL entry "
+ "loading stream consumer group");
+ decrRefCount(o);
+ streamFreeNACK(nack);
+ return NULL;
+ }
+ }
+
+ /* Now that we loaded our global PEL, we need to load the
+ * consumers and their local PELs. */
+ uint64_t consumers_num = rdbLoadLen(rdb,NULL);
+ if (consumers_num == RDB_LENERR) {
+ rdbReportReadError("Stream consumers num loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ while(consumers_num--) {
+ sds cname = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL);
+ if (cname == NULL) {
+ rdbReportReadError(
+ "Error reading the consumer name from Stream group.");
+ decrRefCount(o);
+ return NULL;
+ }
+ streamConsumer *consumer = streamCreateConsumer(cgroup,cname,NULL,0,
+ SCC_NO_NOTIFY|SCC_NO_DIRTIFY);
+ sdsfree(cname);
+ if (!consumer) {
+ rdbReportCorruptRDB("Duplicate stream consumer detected.");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ consumer->seen_time = rdbLoadMillisecondTime(rdb,RDB_VERSION);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream short read reading seen time.");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_3) {
+ consumer->active_time = rdbLoadMillisecondTime(rdb,RDB_VERSION);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream short read reading active time.");
+ decrRefCount(o);
+ return NULL;
+ }
+ } else {
+ /* That's the best estimate we got */
+ consumer->active_time = consumer->seen_time;
+ }
+
+ /* Load the PEL about entries owned by this specific
+ * consumer. */
+ pel_size = rdbLoadLen(rdb,NULL);
+ if (pel_size == RDB_LENERR) {
+ rdbReportReadError(
+ "Stream consumer PEL num loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ while(pel_size--) {
+ unsigned char rawid[sizeof(streamID)];
+ if (rioRead(rdb,rawid,sizeof(rawid)) == 0) {
+ rdbReportReadError(
+ "Stream short read reading PEL streamID.");
+ decrRefCount(o);
+ return NULL;
+ }
+ streamNACK *nack = raxFind(cgroup->pel,rawid,sizeof(rawid));
+ if (nack == raxNotFound) {
+ rdbReportCorruptRDB("Consumer entry not found in "
+ "group global PEL");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Set the NACK consumer, that was left to NULL when
+ * loading the global PEL. Then set the same shared
+ * NACK structure also in the consumer-specific PEL. */
+ nack->consumer = consumer;
+ if (!raxTryInsert(consumer->pel,rawid,sizeof(rawid),nack,NULL)) {
+ rdbReportCorruptRDB("Duplicated consumer PEL entry "
+ " loading a stream consumer "
+ "group");
+ decrRefCount(o);
+ streamFreeNACK(nack);
+ return NULL;
+ }
+ }
+ }
+
+ /* Verify that each PEL eventually got a consumer assigned to it. */
+ if (deep_integrity_validation) {
+ raxIterator ri_cg_pel;
+ raxStart(&ri_cg_pel,cgroup->pel);
+ raxSeek(&ri_cg_pel,"^",NULL,0);
+ while(raxNext(&ri_cg_pel)) {
+ streamNACK *nack = ri_cg_pel.data;
+ if (!nack->consumer) {
+ raxStop(&ri_cg_pel);
+ rdbReportCorruptRDB("Stream CG PEL entry without consumer");
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+ raxStop(&ri_cg_pel);
+ }
+ }
+ } else if (rdbtype == RDB_TYPE_MODULE_PRE_GA) {
+ rdbReportCorruptRDB("Pre-release module format not supported");
+ return NULL;
+ } else if (rdbtype == RDB_TYPE_MODULE_2) {
+ uint64_t moduleid = rdbLoadLen(rdb,NULL);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Short read module id");
+ return NULL;
+ }
+ moduleType *mt = moduleTypeLookupModuleByID(moduleid);
+
+ if (rdbCheckMode) {
+ char name[10];
+ moduleTypeNameByID(name,moduleid);
+ return rdbLoadCheckModuleValue(rdb,name);
+ }
+
+ if (mt == NULL) {
+ char name[10];
+ moduleTypeNameByID(name,moduleid);
+ rdbReportCorruptRDB("The RDB file contains module data I can't load: no matching module type '%s'", name);
+ return NULL;
+ }
+ RedisModuleIO io;
+ robj keyobj;
+ initStaticStringObject(keyobj,key);
+ moduleInitIOContext(io,mt,rdb,&keyobj,dbid);
+ /* Call the rdb_load method of the module providing the 10 bit
+ * encoding version in the lower 10 bits of the module ID. */
+ void *ptr = mt->rdb_load(&io,moduleid&1023);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+
+ /* Module v2 serialization has an EOF mark at the end. */
+ uint64_t eof = rdbLoadLen(rdb,NULL);
+ if (eof == RDB_LENERR) {
+ if (ptr) {
+ o = createModuleObject(mt,ptr); /* creating just in order to easily destroy */
+ decrRefCount(o);
+ }
+ return NULL;
+ }
+ if (eof != RDB_MODULE_OPCODE_EOF) {
+ rdbReportCorruptRDB("The RDB file contains module data for the module '%s' that is not terminated by "
+ "the proper module value EOF marker", moduleTypeModuleName(mt));
+ if (ptr) {
+ o = createModuleObject(mt,ptr); /* creating just in order to easily destroy */
+ decrRefCount(o);
+ }
+ return NULL;
+ }
+
+ if (ptr == NULL) {
+ rdbReportCorruptRDB("The RDB file contains module data for the module type '%s', that the responsible "
+ "module is not able to load. Check for modules log above for additional clues.",
+ moduleTypeModuleName(mt));
+ return NULL;
+ }
+ o = createModuleObject(mt,ptr);
+ } else {
+ rdbReportReadError("Unknown RDB encoding type %d",rdbtype);
+ return NULL;
+ }
+ if (error) *error = 0;
+ return o;
+
+emptykey:
+ if (error) *error = RDB_LOAD_ERR_EMPTY_KEY;
+ return NULL;
+}
+
+/* Mark that we are loading in the global state and setup the fields
+ * needed to provide loading stats. */
+void startLoading(size_t size, int rdbflags, int async) {
+ /* Load the DB */
+ server.loading = 1;
+ if (async == 1) server.async_loading = 1;
+ server.loading_start_time = time(NULL);
+ server.loading_loaded_bytes = 0;
+ server.loading_total_bytes = size;
+ server.loading_rdb_used_mem = 0;
+ server.rdb_last_load_keys_expired = 0;
+ server.rdb_last_load_keys_loaded = 0;
+ blockingOperationStarts();
+
+ /* Fire the loading modules start event. */
+ int subevent;
+ if (rdbflags & RDBFLAGS_AOF_PREAMBLE)
+ subevent = REDISMODULE_SUBEVENT_LOADING_AOF_START;
+ else if(rdbflags & RDBFLAGS_REPLICATION)
+ subevent = REDISMODULE_SUBEVENT_LOADING_REPL_START;
+ else
+ subevent = REDISMODULE_SUBEVENT_LOADING_RDB_START;
+ moduleFireServerEvent(REDISMODULE_EVENT_LOADING,subevent,NULL);
+}
+
+/* Mark that we are loading in the global state and setup the fields
+ * needed to provide loading stats.
+ * 'filename' is optional and used for rdb-check on error */
+void startLoadingFile(size_t size, char* filename, int rdbflags) {
+ rdbFileBeingLoaded = filename;
+ startLoading(size, rdbflags, 0);
+}
+
+/* Refresh the absolute loading progress info */
+void loadingAbsProgress(off_t pos) {
+ server.loading_loaded_bytes = pos;
+ if (server.stat_peak_memory < zmalloc_used_memory())
+ server.stat_peak_memory = zmalloc_used_memory();
+}
+
+/* Refresh the incremental loading progress info */
+void loadingIncrProgress(off_t size) {
+ server.loading_loaded_bytes += size;
+ if (server.stat_peak_memory < zmalloc_used_memory())
+ server.stat_peak_memory = zmalloc_used_memory();
+}
+
+/* Update the file name currently being loaded */
+void updateLoadingFileName(char* filename) {
+ rdbFileBeingLoaded = filename;
+}
+
+/* Loading finished */
+void stopLoading(int success) {
+ server.loading = 0;
+ server.async_loading = 0;
+ blockingOperationEnds();
+ rdbFileBeingLoaded = NULL;
+
+ /* Fire the loading modules end event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_LOADING,
+ success?
+ REDISMODULE_SUBEVENT_LOADING_ENDED:
+ REDISMODULE_SUBEVENT_LOADING_FAILED,
+ NULL);
+}
+
+void startSaving(int rdbflags) {
+ /* Fire the persistence modules start event. */
+ int subevent;
+ if (rdbflags & RDBFLAGS_AOF_PREAMBLE && getpid() != server.pid)
+ subevent = REDISMODULE_SUBEVENT_PERSISTENCE_AOF_START;
+ else if (rdbflags & RDBFLAGS_AOF_PREAMBLE)
+ subevent = REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_AOF_START;
+ else if (getpid()!=server.pid)
+ subevent = REDISMODULE_SUBEVENT_PERSISTENCE_RDB_START;
+ else
+ subevent = REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_RDB_START;
+ moduleFireServerEvent(REDISMODULE_EVENT_PERSISTENCE,subevent,NULL);
+}
+
+void stopSaving(int success) {
+ /* Fire the persistence modules end event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_PERSISTENCE,
+ success?
+ REDISMODULE_SUBEVENT_PERSISTENCE_ENDED:
+ REDISMODULE_SUBEVENT_PERSISTENCE_FAILED,
+ NULL);
+}
+
+/* Track loading progress in order to serve client's from time to time
+ and if needed calculate rdb checksum */
+void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) {
+ if (server.rdb_checksum)
+ rioGenericUpdateChecksum(r, buf, len);
+ if (server.loading_process_events_interval_bytes &&
+ (r->processed_bytes + len)/server.loading_process_events_interval_bytes > r->processed_bytes/server.loading_process_events_interval_bytes)
+ {
+ if (server.masterhost && server.repl_state == REPL_STATE_TRANSFER)
+ replicationSendNewlineToMaster();
+ loadingAbsProgress(r->processed_bytes);
+ processEventsWhileBlocked();
+ processModuleLoadingProgressEvent(0);
+ }
+ if (server.repl_state == REPL_STATE_TRANSFER && rioCheckType(r) == RIO_TYPE_CONN) {
+ atomicIncr(server.stat_net_repl_input_bytes, len);
+ }
+}
+
+/* Save the given functions_ctx to the rdb.
+ * The err output parameter is optional and will be set with relevant error
+ * message on failure, it is the caller responsibility to free the error
+ * message on failure.
+ *
+ * The lib_ctx argument is also optional. If NULL is given, only verify rdb
+ * structure with out performing the actual functions loading. */
+int rdbFunctionLoad(rio *rdb, int ver, functionsLibCtx* lib_ctx, int rdbflags, sds *err) {
+ UNUSED(ver);
+ sds error = NULL;
+ sds final_payload = NULL;
+ int res = C_ERR;
+ if (!(final_payload = rdbGenericLoadStringObject(rdb, RDB_LOAD_SDS, NULL))) {
+ error = sdsnew("Failed loading library payload");
+ goto done;
+ }
+
+ if (lib_ctx) {
+ sds library_name = NULL;
+ if (!(library_name = functionsCreateWithLibraryCtx(final_payload, rdbflags & RDBFLAGS_ALLOW_DUP, &error, lib_ctx, 0))) {
+ if (!error) {
+ error = sdsnew("Failed creating the library");
+ }
+ goto done;
+ }
+ sdsfree(library_name);
+ }
+
+ res = C_OK;
+
+done:
+ if (final_payload) sdsfree(final_payload);
+ if (error) {
+ if (err) {
+ *err = error;
+ } else {
+ serverLog(LL_WARNING, "Failed creating function, %s", error);
+ sdsfree(error);
+ }
+ }
+ return res;
+}
+
+/* Load an RDB file from the rio stream 'rdb'. On success C_OK is returned,
+ * otherwise C_ERR is returned and 'errno' is set accordingly. */
+int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
+ functionsLibCtx* functions_lib_ctx = functionsLibCtxGetCurrent();
+ rdbLoadingCtx loading_ctx = { .dbarray = server.db, .functions_lib_ctx = functions_lib_ctx };
+ int retval = rdbLoadRioWithLoadingCtx(rdb,rdbflags,rsi,&loading_ctx);
+ return retval;
+}
+
+
+/* Load an RDB file from the rio stream 'rdb'. On success C_OK is returned,
+ * otherwise C_ERR is returned.
+ * The rdb_loading_ctx argument holds objects to which the rdb will be loaded to,
+ * currently it only allow to set db object and functionLibCtx to which the data
+ * will be loaded (in the future it might contains more such objects). */
+int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadingCtx *rdb_loading_ctx) {
+ uint64_t dbid = 0;
+ int type, rdbver;
+ redisDb *db = rdb_loading_ctx->dbarray+0;
+ char buf[1024];
+ int error;
+ long long empty_keys_skipped = 0;
+
+ rdb->update_cksum = rdbLoadProgressCallback;
+ rdb->max_processing_chunk = server.loading_process_events_interval_bytes;
+ if (rioRead(rdb,buf,9) == 0) goto eoferr;
+ buf[9] = '\0';
+ if (memcmp(buf,"REDIS",5) != 0) {
+ serverLog(LL_WARNING,"Wrong signature trying to load DB from file");
+ return C_ERR;
+ }
+ rdbver = atoi(buf+5);
+ if (rdbver < 1 || rdbver > RDB_VERSION) {
+ serverLog(LL_WARNING,"Can't handle RDB format version %d",rdbver);
+ return C_ERR;
+ }
+
+ /* Key-specific attributes, set by opcodes before the key type. */
+ long long lru_idle = -1, lfu_freq = -1, expiretime = -1, now = mstime();
+ long long lru_clock = LRU_CLOCK();
+
+ while(1) {
+ sds key;
+ robj *val;
+
+ /* Read type. */
+ if ((type = rdbLoadType(rdb)) == -1) goto eoferr;
+
+ /* Handle special types. */
+ if (type == RDB_OPCODE_EXPIRETIME) {
+ /* EXPIRETIME: load an expire associated with the next key
+ * to load. Note that after loading an expire we need to
+ * load the actual type, and continue. */
+ expiretime = rdbLoadTime(rdb);
+ expiretime *= 1000;
+ if (rioGetReadError(rdb)) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_EXPIRETIME_MS) {
+ /* EXPIRETIME_MS: milliseconds precision expire times introduced
+ * with RDB v3. Like EXPIRETIME but no with more precision. */
+ expiretime = rdbLoadMillisecondTime(rdb,rdbver);
+ if (rioGetReadError(rdb)) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_FREQ) {
+ /* FREQ: LFU frequency. */
+ uint8_t byte;
+ if (rioRead(rdb,&byte,1) == 0) goto eoferr;
+ lfu_freq = byte;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_IDLE) {
+ /* IDLE: LRU idle time. */
+ uint64_t qword;
+ if ((qword = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
+ lru_idle = qword;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_EOF) {
+ /* EOF: End of file, exit the main loop. */
+ break;
+ } else if (type == RDB_OPCODE_SELECTDB) {
+ /* SELECTDB: Select the specified database. */
+ if ((dbid = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
+ if (dbid >= (unsigned)server.dbnum) {
+ serverLog(LL_WARNING,
+ "FATAL: Data file was created with a Redis "
+ "server configured to handle more than %d "
+ "databases. Exiting\n", server.dbnum);
+ exit(1);
+ }
+ db = rdb_loading_ctx->dbarray+dbid;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_RESIZEDB) {
+ /* RESIZEDB: Hint about the size of the keys in the currently
+ * selected data base, in order to avoid useless rehashing. */
+ uint64_t db_size, expires_size;
+ if ((db_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ if ((expires_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ dictExpand(db->dict,db_size);
+ dictExpand(db->expires,expires_size);
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_AUX) {
+ /* AUX: generic string-string fields. Use to add state to RDB
+ * which is backward compatible. Implementations of RDB loading
+ * are required to skip AUX fields they don't understand.
+ *
+ * An AUX field is composed of two strings: key and value. */
+ robj *auxkey, *auxval;
+ if ((auxkey = rdbLoadStringObject(rdb)) == NULL) goto eoferr;
+ if ((auxval = rdbLoadStringObject(rdb)) == NULL) {
+ decrRefCount(auxkey);
+ goto eoferr;
+ }
+
+ if (((char*)auxkey->ptr)[0] == '%') {
+ /* All the fields with a name staring with '%' are considered
+ * information fields and are logged at startup with a log
+ * level of NOTICE. */
+ serverLog(LL_NOTICE,"RDB '%s': %s",
+ (char*)auxkey->ptr,
+ (char*)auxval->ptr);
+ } else if (!strcasecmp(auxkey->ptr,"repl-stream-db")) {
+ if (rsi) rsi->repl_stream_db = atoi(auxval->ptr);
+ } else if (!strcasecmp(auxkey->ptr,"repl-id")) {
+ if (rsi && sdslen(auxval->ptr) == CONFIG_RUN_ID_SIZE) {
+ memcpy(rsi->repl_id,auxval->ptr,CONFIG_RUN_ID_SIZE+1);
+ rsi->repl_id_is_set = 1;
+ }
+ } else if (!strcasecmp(auxkey->ptr,"repl-offset")) {
+ if (rsi) rsi->repl_offset = strtoll(auxval->ptr,NULL,10);
+ } else if (!strcasecmp(auxkey->ptr,"lua")) {
+ /* Won't load the script back in memory anymore. */
+ } else if (!strcasecmp(auxkey->ptr,"redis-ver")) {
+ serverLog(LL_NOTICE,"Loading RDB produced by version %s",
+ (char*)auxval->ptr);
+ } else if (!strcasecmp(auxkey->ptr,"ctime")) {
+ time_t age = time(NULL)-strtol(auxval->ptr,NULL,10);
+ if (age < 0) age = 0;
+ serverLog(LL_NOTICE,"RDB age %ld seconds",
+ (unsigned long) age);
+ } else if (!strcasecmp(auxkey->ptr,"used-mem")) {
+ long long usedmem = strtoll(auxval->ptr,NULL,10);
+ serverLog(LL_NOTICE,"RDB memory usage when created %.2f Mb",
+ (double) usedmem / (1024*1024));
+ server.loading_rdb_used_mem = usedmem;
+ } else if (!strcasecmp(auxkey->ptr,"aof-preamble")) {
+ long long haspreamble = strtoll(auxval->ptr,NULL,10);
+ if (haspreamble) serverLog(LL_NOTICE,"RDB has an AOF tail");
+ } else if (!strcasecmp(auxkey->ptr, "aof-base")) {
+ long long isbase = strtoll(auxval->ptr, NULL, 10);
+ if (isbase) serverLog(LL_NOTICE, "RDB is base AOF");
+ } else if (!strcasecmp(auxkey->ptr,"redis-bits")) {
+ /* Just ignored. */
+ } else {
+ /* We ignore fields we don't understand, as by AUX field
+ * contract. */
+ serverLog(LL_DEBUG,"Unrecognized RDB AUX field: '%s'",
+ (char*)auxkey->ptr);
+ }
+
+ decrRefCount(auxkey);
+ decrRefCount(auxval);
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_MODULE_AUX) {
+ /* Load module data that is not related to the Redis key space.
+ * Such data can be potentially be stored both before and after the
+ * RDB keys-values section. */
+ uint64_t moduleid = rdbLoadLen(rdb,NULL);
+ int when_opcode = rdbLoadLen(rdb,NULL);
+ int when = rdbLoadLen(rdb,NULL);
+ if (rioGetReadError(rdb)) goto eoferr;
+ if (when_opcode != RDB_MODULE_OPCODE_UINT) {
+ rdbReportReadError("bad when_opcode");
+ goto eoferr;
+ }
+ moduleType *mt = moduleTypeLookupModuleByID(moduleid);
+ char name[10];
+ moduleTypeNameByID(name,moduleid);
+
+ if (!rdbCheckMode && mt == NULL) {
+ /* Unknown module. */
+ serverLog(LL_WARNING,"The RDB file contains AUX module data I can't load: no matching module '%s'", name);
+ exit(1);
+ } else if (!rdbCheckMode && mt != NULL) {
+ if (!mt->aux_load) {
+ /* Module doesn't support AUX. */
+ serverLog(LL_WARNING,"The RDB file contains module AUX data, but the module '%s' doesn't seem to support it.", name);
+ exit(1);
+ }
+
+ RedisModuleIO io;
+ moduleInitIOContext(io,mt,rdb,NULL,-1);
+ /* Call the rdb_load method of the module providing the 10 bit
+ * encoding version in the lower 10 bits of the module ID. */
+ int rc = mt->aux_load(&io,moduleid&1023, when);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ if (rc != REDISMODULE_OK || io.error) {
+ moduleTypeNameByID(name,moduleid);
+ serverLog(LL_WARNING,"The RDB file contains module AUX data for the module type '%s', that the responsible module is not able to load. Check for modules log above for additional clues.", name);
+ goto eoferr;
+ }
+ uint64_t eof = rdbLoadLen(rdb,NULL);
+ if (eof != RDB_MODULE_OPCODE_EOF) {
+ serverLog(LL_WARNING,"The RDB file contains module AUX data for the module '%s' that is not terminated by the proper module value EOF marker", name);
+ goto eoferr;
+ }
+ continue;
+ } else {
+ /* RDB check mode. */
+ robj *aux = rdbLoadCheckModuleValue(rdb,name);
+ decrRefCount(aux);
+ continue; /* Read next opcode. */
+ }
+ } else if (type == RDB_OPCODE_FUNCTION_PRE_GA) {
+ rdbReportCorruptRDB("Pre-release function format not supported.");
+ exit(1);
+ } else if (type == RDB_OPCODE_FUNCTION2) {
+ sds err = NULL;
+ if (rdbFunctionLoad(rdb, rdbver, rdb_loading_ctx->functions_lib_ctx, rdbflags, &err) != C_OK) {
+ serverLog(LL_WARNING,"Failed loading library, %s", err);
+ sdsfree(err);
+ goto eoferr;
+ }
+ continue;
+ }
+
+ /* Read key */
+ if ((key = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL)
+ goto eoferr;
+ /* Read value */
+ val = rdbLoadObject(type,rdb,key,db->id,&error);
+
+ /* Check if the key already expired. This function is used when loading
+ * an RDB file from disk, either at startup, or when an RDB was
+ * received from the master. In the latter case, the master is
+ * responsible for key expiry. If we would expire keys here, the
+ * snapshot taken by the master may not be reflected on the slave.
+ * Similarly, if the base AOF is RDB format, we want to load all
+ * the keys they are, since the log of operations in the incr AOF
+ * is assumed to work in the exact keyspace state. */
+ if (val == NULL) {
+ /* Since we used to have bug that could lead to empty keys
+ * (See #8453), we rather not fail when empty key is encountered
+ * in an RDB file, instead we will silently discard it and
+ * continue loading. */
+ if (error == RDB_LOAD_ERR_EMPTY_KEY) {
+ if(empty_keys_skipped++ < 10)
+ serverLog(LL_NOTICE, "rdbLoadObject skipping empty key: %s", key);
+ sdsfree(key);
+ } else {
+ sdsfree(key);
+ goto eoferr;
+ }
+ } else if (iAmMaster() &&
+ !(rdbflags&RDBFLAGS_AOF_PREAMBLE) &&
+ expiretime != -1 && expiretime < now)
+ {
+ if (rdbflags & RDBFLAGS_FEED_REPL) {
+ /* Caller should have created replication backlog,
+ * and now this path only works when rebooting,
+ * so we don't have replicas yet. */
+ serverAssert(server.repl_backlog != NULL && listLength(server.slaves) == 0);
+ robj keyobj;
+ initStaticStringObject(keyobj,key);
+ robj *argv[2];
+ argv[0] = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
+ argv[1] = &keyobj;
+ replicationFeedSlaves(server.slaves,dbid,argv,2);
+ }
+ sdsfree(key);
+ decrRefCount(val);
+ server.rdb_last_load_keys_expired++;
+ } else {
+ robj keyobj;
+ initStaticStringObject(keyobj,key);
+
+ /* Add the new object in the hash table */
+ int added = dbAddRDBLoad(db,key,val);
+ server.rdb_last_load_keys_loaded++;
+ if (!added) {
+ if (rdbflags & RDBFLAGS_ALLOW_DUP) {
+ /* This flag is useful for DEBUG RELOAD special modes.
+ * When it's set we allow new keys to replace the current
+ * keys with the same name. */
+ dbSyncDelete(db,&keyobj);
+ dbAddRDBLoad(db,key,val);
+ } else {
+ serverLog(LL_WARNING,
+ "RDB has duplicated key '%s' in DB %d",key,db->id);
+ serverPanic("Duplicated key found in RDB file");
+ }
+ }
+
+ /* Set the expire time if needed */
+ if (expiretime != -1) {
+ setExpire(NULL,db,&keyobj,expiretime);
+ }
+
+ /* Set usage information (for eviction). */
+ objectSetLRUOrLFU(val,lfu_freq,lru_idle,lru_clock,1000);
+
+ /* call key space notification on key loaded for modules only */
+ moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id);
+ }
+
+ /* Loading the database more slowly is useful in order to test
+ * certain edge cases. */
+ if (server.key_load_delay)
+ debugDelay(server.key_load_delay);
+
+ /* Reset the state that is key-specified and is populated by
+ * opcodes before the key, so that we start from scratch again. */
+ expiretime = -1;
+ lfu_freq = -1;
+ lru_idle = -1;
+ }
+ /* Verify the checksum if RDB version is >= 5 */
+ if (rdbver >= 5) {
+ uint64_t cksum, expected = rdb->cksum;
+
+ if (rioRead(rdb,&cksum,8) == 0) goto eoferr;
+ if (server.rdb_checksum && !server.skip_checksum_validation) {
+ memrev64ifbe(&cksum);
+ if (cksum == 0) {
+ serverLog(LL_NOTICE,"RDB file was saved with checksum disabled: no check performed.");
+ } else if (cksum != expected) {
+ serverLog(LL_WARNING,"Wrong RDB checksum expected: (%llx) but "
+ "got (%llx). Aborting now.",
+ (unsigned long long)expected,
+ (unsigned long long)cksum);
+ rdbReportCorruptRDB("RDB CRC error");
+ return C_ERR;
+ }
+ }
+ }
+
+ if (empty_keys_skipped) {
+ serverLog(LL_NOTICE,
+ "Done loading RDB, keys loaded: %lld, keys expired: %lld, empty keys skipped: %lld.",
+ server.rdb_last_load_keys_loaded, server.rdb_last_load_keys_expired, empty_keys_skipped);
+ } else {
+ serverLog(LL_NOTICE,
+ "Done loading RDB, keys loaded: %lld, keys expired: %lld.",
+ server.rdb_last_load_keys_loaded, server.rdb_last_load_keys_expired);
+ }
+ return C_OK;
+
+ /* Unexpected end of file is handled here calling rdbReportReadError():
+ * this will in turn either abort Redis in most cases, or if we are loading
+ * the RDB file from a socket during initial SYNC (diskless replica mode),
+ * we'll report the error to the caller, so that we can retry. */
+eoferr:
+ serverLog(LL_WARNING,
+ "Short read or OOM loading DB. Unrecoverable error, aborting now.");
+ rdbReportReadError("Unexpected EOF reading RDB file");
+ return C_ERR;
+}
+
+/* Like rdbLoadRio() but takes a filename instead of a rio stream. The
+ * filename is open for reading and a rio stream object created in order
+ * to do the actual loading. Moreover the ETA displayed in the INFO
+ * output is initialized and finalized.
+ *
+ * If you pass an 'rsi' structure initialized with RDB_SAVE_INFO_INIT, the
+ * loading code will fill the information fields in the structure. */
+int rdbLoad(char *filename, rdbSaveInfo *rsi, int rdbflags) {
+ FILE *fp;
+ rio rdb;
+ int retval;
+ struct stat sb;
+ int rdb_fd;
+
+ fp = fopen(filename, "r");
+ if (fp == NULL) {
+ if (errno == ENOENT) return RDB_NOT_EXIST;
+
+ serverLog(LL_WARNING,"Fatal error: can't open the RDB file %s for reading: %s", filename, strerror(errno));
+ return RDB_FAILED;
+ }
+
+ if (fstat(fileno(fp), &sb) == -1)
+ sb.st_size = 0;
+
+ startLoadingFile(sb.st_size, filename, rdbflags);
+ rioInitWithFile(&rdb,fp);
+
+ retval = rdbLoadRio(&rdb,rdbflags,rsi);
+
+ fclose(fp);
+ stopLoading(retval==C_OK);
+ /* Reclaim the cache backed by rdb */
+ if (retval == C_OK && !(rdbflags & RDBFLAGS_KEEP_CACHE)) {
+ /* TODO: maybe we could combine the fopen and open into one in the future */
+ rdb_fd = open(filename, O_RDONLY);
+ if (rdb_fd > 0) bioCreateCloseJob(rdb_fd, 0, 1);
+ }
+ return (retval==C_OK) ? RDB_OK : RDB_FAILED;
+}
+
+/* A background saving child (BGSAVE) terminated its work. Handle this.
+ * This function covers the case of actual BGSAVEs. */
+static void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
+ if (!bysignal && exitcode == 0) {
+ serverLog(LL_NOTICE,
+ "Background saving terminated with success");
+ server.dirty = server.dirty - server.dirty_before_bgsave;
+ server.lastsave = time(NULL);
+ server.lastbgsave_status = C_OK;
+ } else if (!bysignal && exitcode != 0) {
+ serverLog(LL_WARNING, "Background saving error");
+ server.lastbgsave_status = C_ERR;
+ } else {
+ mstime_t latency;
+
+ serverLog(LL_WARNING,
+ "Background saving terminated by signal %d", bysignal);
+ latencyStartMonitor(latency);
+ rdbRemoveTempFile(server.child_pid, 0);
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);
+ /* SIGUSR1 is whitelisted, so we have a way to kill a child without
+ * triggering an error condition. */
+ if (bysignal != SIGUSR1)
+ server.lastbgsave_status = C_ERR;
+ }
+}
+
+/* A background saving child (BGSAVE) terminated its work. Handle this.
+ * This function covers the case of RDB -> Slaves socket transfers for
+ * diskless replication. */
+static void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
+ if (!bysignal && exitcode == 0) {
+ serverLog(LL_NOTICE,
+ "Background RDB transfer terminated with success");
+ } else if (!bysignal && exitcode != 0) {
+ serverLog(LL_WARNING, "Background transfer error");
+ } else {
+ serverLog(LL_WARNING,
+ "Background transfer terminated by signal %d", bysignal);
+ }
+ if (server.rdb_child_exit_pipe!=-1)
+ close(server.rdb_child_exit_pipe);
+ aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE);
+ close(server.rdb_pipe_read);
+ server.rdb_child_exit_pipe = -1;
+ server.rdb_pipe_read = -1;
+ zfree(server.rdb_pipe_conns);
+ server.rdb_pipe_conns = NULL;
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
+ zfree(server.rdb_pipe_buff);
+ server.rdb_pipe_buff = NULL;
+ server.rdb_pipe_bufflen = 0;
+}
+
+/* When a background RDB saving/transfer terminates, call the right handler. */
+void backgroundSaveDoneHandler(int exitcode, int bysignal) {
+ int type = server.rdb_child_type;
+ switch(server.rdb_child_type) {
+ case RDB_CHILD_TYPE_DISK:
+ backgroundSaveDoneHandlerDisk(exitcode,bysignal);
+ break;
+ case RDB_CHILD_TYPE_SOCKET:
+ backgroundSaveDoneHandlerSocket(exitcode,bysignal);
+ break;
+ default:
+ serverPanic("Unknown RDB child type.");
+ break;
+ }
+
+ server.rdb_child_type = RDB_CHILD_TYPE_NONE;
+ server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start;
+ server.rdb_save_time_start = -1;
+ /* Possibly there are slaves waiting for a BGSAVE in order to be served
+ * (the first stage of SYNC is a bulk transfer of dump.rdb) */
+ updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? C_OK : C_ERR, type);
+}
+
+/* Kill the RDB saving child using SIGUSR1 (so that the parent will know
+ * the child did not exit for an error, but because we wanted), and performs
+ * the cleanup needed. */
+void killRDBChild(void) {
+ kill(server.child_pid, SIGUSR1);
+ /* Because we are not using here waitpid (like we have in killAppendOnlyChild
+ * and TerminateModuleForkChild), all the cleanup operations is done by
+ * checkChildrenDone, that later will find that the process killed.
+ * This includes:
+ * - resetChildState
+ * - rdbRemoveTempFile */
+}
+
+/* Spawn an RDB child that writes the RDB to the sockets of the slaves
+ * that are currently in SLAVE_STATE_WAIT_BGSAVE_START state. */
+int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
+ listNode *ln;
+ listIter li;
+ pid_t childpid;
+ int pipefds[2], rdb_pipe_write, safe_to_exit_pipe;
+
+ if (hasActiveChildProcess()) return C_ERR;
+
+ /* Even if the previous fork child exited, don't start a new one until we
+ * drained the pipe. */
+ if (server.rdb_pipe_conns) return C_ERR;
+
+ /* Before to fork, create a pipe that is used to transfer the rdb bytes to
+ * the parent, we can't let it write directly to the sockets, since in case
+ * of TLS we must let the parent handle a continuous TLS state when the
+ * child terminates and parent takes over. */
+ if (anetPipe(pipefds, O_NONBLOCK, 0) == -1) return C_ERR;
+ server.rdb_pipe_read = pipefds[0]; /* read end */
+ rdb_pipe_write = pipefds[1]; /* write end */
+
+ /* create another pipe that is used by the parent to signal to the child
+ * that it can exit. */
+ if (anetPipe(pipefds, 0, 0) == -1) {
+ close(rdb_pipe_write);
+ close(server.rdb_pipe_read);
+ return C_ERR;
+ }
+ safe_to_exit_pipe = pipefds[0]; /* read end */
+ server.rdb_child_exit_pipe = pipefds[1]; /* write end */
+
+ /* Collect the connections of the replicas we want to transfer
+ * the RDB to, which are i WAIT_BGSAVE_START state. */
+ server.rdb_pipe_conns = zmalloc(sizeof(connection *)*listLength(server.slaves));
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ /* Check slave has the exact requirements */
+ if (slave->slave_req != req)
+ continue;
+ server.rdb_pipe_conns[server.rdb_pipe_numconns++] = slave->conn;
+ replicationSetupSlaveForFullResync(slave,getPsyncInitialOffset());
+ }
+ }
+
+ /* Create the child process. */
+ if ((childpid = redisFork(CHILD_TYPE_RDB)) == 0) {
+ /* Child */
+ int retval, dummy;
+ rio rdb;
+
+ rioInitWithFd(&rdb,rdb_pipe_write);
+
+ /* Close the reading part, so that if the parent crashes, the child will
+ * get a write error and exit. */
+ close(server.rdb_pipe_read);
+
+ redisSetProcTitle("redis-rdb-to-slaves");
+ redisSetCpuAffinity(server.bgsave_cpulist);
+
+ retval = rdbSaveRioWithEOFMark(req,&rdb,NULL,rsi);
+ if (retval == C_OK && rioFlush(&rdb) == 0)
+ retval = C_ERR;
+
+ if (retval == C_OK) {
+ sendChildCowInfo(CHILD_INFO_TYPE_RDB_COW_SIZE, "RDB");
+ }
+
+ rioFreeFd(&rdb);
+ /* wake up the reader, tell it we're done. */
+ close(rdb_pipe_write);
+ close(server.rdb_child_exit_pipe); /* close write end so that we can detect the close on the parent. */
+ /* hold exit until the parent tells us it's safe. we're not expecting
+ * to read anything, just get the error when the pipe is closed. */
+ dummy = read(safe_to_exit_pipe, pipefds, 1);
+ UNUSED(dummy);
+ exitFromChild((retval == C_OK) ? 0 : 1);
+ } else {
+ /* Parent */
+ if (childpid == -1) {
+ serverLog(LL_WARNING,"Can't save in background: fork: %s",
+ strerror(errno));
+
+ /* Undo the state change. The caller will perform cleanup on
+ * all the slaves in BGSAVE_START state, but an early call to
+ * replicationSetupSlaveForFullResync() turned it into BGSAVE_END */
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
+ slave->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
+ }
+ }
+ close(rdb_pipe_write);
+ close(server.rdb_pipe_read);
+ zfree(server.rdb_pipe_conns);
+ server.rdb_pipe_conns = NULL;
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
+ } else {
+ serverLog(LL_NOTICE,"Background RDB transfer started by pid %ld",
+ (long) childpid);
+ server.rdb_save_time_start = time(NULL);
+ server.rdb_child_type = RDB_CHILD_TYPE_SOCKET;
+ close(rdb_pipe_write); /* close write in parent so that it can detect the close on the child. */
+ if (aeCreateFileEvent(server.el, server.rdb_pipe_read, AE_READABLE, rdbPipeReadHandler,NULL) == AE_ERR) {
+ serverPanic("Unrecoverable error creating server.rdb_pipe_read file event.");
+ }
+ }
+ close(safe_to_exit_pipe);
+ return (childpid == -1) ? C_ERR : C_OK;
+ }
+ return C_OK; /* Unreached. */
+}
+
+void saveCommand(client *c) {
+ if (server.child_type == CHILD_TYPE_RDB) {
+ addReplyError(c,"Background save already in progress");
+ return;
+ }
+
+ server.stat_rdb_saves++;
+
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ if (rdbSave(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE) == C_OK) {
+ addReply(c,shared.ok);
+ } else {
+ addReplyErrorObject(c,shared.err);
+ }
+}
+
+/* BGSAVE [SCHEDULE] */
+void bgsaveCommand(client *c) {
+ int schedule = 0;
+
+ /* The SCHEDULE option changes the behavior of BGSAVE when an AOF rewrite
+ * is in progress. Instead of returning an error a BGSAVE gets scheduled. */
+ if (c->argc > 1) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"schedule")) {
+ schedule = 1;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+
+ if (server.child_type == CHILD_TYPE_RDB) {
+ addReplyError(c,"Background save already in progress");
+ } else if (hasActiveChildProcess() || server.in_exec) {
+ if (schedule || server.in_exec) {
+ server.rdb_bgsave_scheduled = 1;
+ addReplyStatus(c,"Background saving scheduled");
+ } else {
+ addReplyError(c,
+ "Another child process is active (AOF?): can't BGSAVE right now. "
+ "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever "
+ "possible.");
+ }
+ } else if (rdbSaveBackground(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE) == C_OK) {
+ addReplyStatus(c,"Background saving started");
+ } else {
+ addReplyErrorObject(c,shared.err);
+ }
+}
+
+/* Populate the rdbSaveInfo structure used to persist the replication
+ * information inside the RDB file. Currently the structure explicitly
+ * contains just the currently selected DB from the master stream, however
+ * if the rdbSave*() family functions receive a NULL rsi structure also
+ * the Replication ID/offset is not saved. The function populates 'rsi'
+ * that is normally stack-allocated in the caller, returns the populated
+ * pointer if the instance has a valid master client, otherwise NULL
+ * is returned, and the RDB saving will not persist any replication related
+ * information. */
+rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
+ rdbSaveInfo rsi_init = RDB_SAVE_INFO_INIT;
+ *rsi = rsi_init;
+
+ /* If the instance is a master, we can populate the replication info
+ * only when repl_backlog is not NULL. If the repl_backlog is NULL,
+ * it means that the instance isn't in any replication chains. In this
+ * scenario the replication info is useless, because when a slave
+ * connects to us, the NULL repl_backlog will trigger a full
+ * synchronization, at the same time we will use a new replid and clear
+ * replid2. */
+ if (!server.masterhost && server.repl_backlog) {
+ /* Note that when server.slaveseldb is -1, it means that this master
+ * didn't apply any write commands after a full synchronization.
+ * So we can let repl_stream_db be 0, this allows a restarted slave
+ * to reload replication ID/offset, it's safe because the next write
+ * command must generate a SELECT statement. */
+ rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb;
+ return rsi;
+ }
+
+ /* If the instance is a slave we need a connected master
+ * in order to fetch the currently selected DB. */
+ if (server.master) {
+ rsi->repl_stream_db = server.master->db->id;
+ return rsi;
+ }
+
+ /* If we have a cached master we can use it in order to populate the
+ * replication selected DB info inside the RDB file: the slave can
+ * increment the master_repl_offset only from data arriving from the
+ * master, so if we are disconnected the offset in the cached master
+ * is valid. */
+ if (server.cached_master) {
+ rsi->repl_stream_db = server.cached_master->db->id;
+ return rsi;
+ }
+ return NULL;
+}
diff --git a/src/rdb.h b/src/rdb.h
new file mode 100644
index 0000000..234bde2
--- /dev/null
+++ b/src/rdb.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __RDB_H
+#define __RDB_H
+
+#include <stdio.h>
+#include "rio.h"
+
+/* TBD: include only necessary headers. */
+#include "server.h"
+
+/* The current RDB version. When the format changes in a way that is no longer
+ * backward compatible this number gets incremented. */
+#define RDB_VERSION 11
+
+/* Defines related to the dump file format. To store 32 bits lengths for short
+ * keys requires a lot of space, so we check the most significant 2 bits of
+ * the first byte to interpreter the length:
+ *
+ * 00|XXXXXX => if the two MSB are 00 the len is the 6 bits of this byte
+ * 01|XXXXXX XXXXXXXX => 01, the len is 14 bits, 6 bits + 8 bits of next byte
+ * 10|000000 [32 bit integer] => A full 32 bit len in net byte order will follow
+ * 10|000001 [64 bit integer] => A full 64 bit len in net byte order will follow
+ * 11|OBKIND this means: specially encoded object will follow. The six bits
+ * number specify the kind of object that follows.
+ * See the RDB_ENC_* defines.
+ *
+ * Lengths up to 63 are stored using a single byte, most DB keys, and may
+ * values, will fit inside. */
+#define RDB_6BITLEN 0
+#define RDB_14BITLEN 1
+#define RDB_32BITLEN 0x80
+#define RDB_64BITLEN 0x81
+#define RDB_ENCVAL 3
+#define RDB_LENERR UINT64_MAX
+
+/* When a length of a string object stored on disk has the first two bits
+ * set, the remaining six bits specify a special encoding for the object
+ * accordingly to the following defines: */
+#define RDB_ENC_INT8 0 /* 8 bit signed integer */
+#define RDB_ENC_INT16 1 /* 16 bit signed integer */
+#define RDB_ENC_INT32 2 /* 32 bit signed integer */
+#define RDB_ENC_LZF 3 /* string compressed with FASTLZ */
+
+/* Map object types to RDB object types. Macros starting with OBJ_ are for
+ * memory storage and may change. Instead RDB types must be fixed because
+ * we store them on disk. */
+#define RDB_TYPE_STRING 0
+#define RDB_TYPE_LIST 1
+#define RDB_TYPE_SET 2
+#define RDB_TYPE_ZSET 3
+#define RDB_TYPE_HASH 4
+#define RDB_TYPE_ZSET_2 5 /* ZSET version 2 with doubles stored in binary. */
+#define RDB_TYPE_MODULE_PRE_GA 6 /* Used in 4.0 release candidates */
+#define RDB_TYPE_MODULE_2 7 /* Module value with annotations for parsing without
+ the generating module being loaded. */
+/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
+
+/* Object types for encoded objects. */
+#define RDB_TYPE_HASH_ZIPMAP 9
+#define RDB_TYPE_LIST_ZIPLIST 10
+#define RDB_TYPE_SET_INTSET 11
+#define RDB_TYPE_ZSET_ZIPLIST 12
+#define RDB_TYPE_HASH_ZIPLIST 13
+#define RDB_TYPE_LIST_QUICKLIST 14
+#define RDB_TYPE_STREAM_LISTPACKS 15
+#define RDB_TYPE_HASH_LISTPACK 16
+#define RDB_TYPE_ZSET_LISTPACK 17
+#define RDB_TYPE_LIST_QUICKLIST_2 18
+#define RDB_TYPE_STREAM_LISTPACKS_2 19
+#define RDB_TYPE_SET_LISTPACK 20
+#define RDB_TYPE_STREAM_LISTPACKS_3 21
+/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
+
+/* Test if a type is an object type. */
+#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 21))
+
+/* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
+#define RDB_OPCODE_FUNCTION2 245 /* function library data */
+#define RDB_OPCODE_FUNCTION_PRE_GA 246 /* old function library data for 7.0 rc1 and rc2 */
+#define RDB_OPCODE_MODULE_AUX 247 /* Module auxiliary data. */
+#define RDB_OPCODE_IDLE 248 /* LRU idle time. */
+#define RDB_OPCODE_FREQ 249 /* LFU frequency. */
+#define RDB_OPCODE_AUX 250 /* RDB aux field. */
+#define RDB_OPCODE_RESIZEDB 251 /* Hash table resize hint. */
+#define RDB_OPCODE_EXPIRETIME_MS 252 /* Expire time in milliseconds. */
+#define RDB_OPCODE_EXPIRETIME 253 /* Old expire time in seconds. */
+#define RDB_OPCODE_SELECTDB 254 /* DB number of the following keys. */
+#define RDB_OPCODE_EOF 255 /* End of the RDB file. */
+
+/* Module serialized values sub opcodes */
+#define RDB_MODULE_OPCODE_EOF 0 /* End of module value. */
+#define RDB_MODULE_OPCODE_SINT 1 /* Signed integer. */
+#define RDB_MODULE_OPCODE_UINT 2 /* Unsigned integer. */
+#define RDB_MODULE_OPCODE_FLOAT 3 /* Float. */
+#define RDB_MODULE_OPCODE_DOUBLE 4 /* Double. */
+#define RDB_MODULE_OPCODE_STRING 5 /* String. */
+
+/* rdbLoad...() functions flags. */
+#define RDB_LOAD_NONE 0
+#define RDB_LOAD_ENC (1<<0)
+#define RDB_LOAD_PLAIN (1<<1)
+#define RDB_LOAD_SDS (1<<2)
+
+/* flags on the purpose of rdb save or load */
+#define RDBFLAGS_NONE 0 /* No special RDB loading. */
+#define RDBFLAGS_AOF_PREAMBLE (1<<0) /* Load/save the RDB as AOF preamble. */
+#define RDBFLAGS_REPLICATION (1<<1) /* Load/save for SYNC. */
+#define RDBFLAGS_ALLOW_DUP (1<<2) /* Allow duplicated keys when loading.*/
+#define RDBFLAGS_FEED_REPL (1<<3) /* Feed replication stream when loading.*/
+#define RDBFLAGS_KEEP_CACHE (1<<4) /* Don't reclaim cache after rdb file is generated */
+
+/* When rdbLoadObject() returns NULL, the err flag is
+ * set to hold the type of error that occurred */
+#define RDB_LOAD_ERR_EMPTY_KEY 1 /* Error of empty key */
+#define RDB_LOAD_ERR_OTHER 2 /* Any other errors */
+
+ssize_t rdbWriteRaw(rio *rdb, void *p, size_t len);
+int rdbSaveType(rio *rdb, unsigned char type);
+int rdbLoadType(rio *rdb);
+time_t rdbLoadTime(rio *rdb);
+int rdbSaveLen(rio *rdb, uint64_t len);
+int rdbSaveMillisecondTime(rio *rdb, long long t);
+long long rdbLoadMillisecondTime(rio *rdb, int rdbver);
+uint64_t rdbLoadLen(rio *rdb, int *isencoded);
+int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr);
+int rdbSaveObjectType(rio *rdb, robj *o);
+int rdbLoadObjectType(rio *rdb);
+int rdbLoad(char *filename, rdbSaveInfo *rsi, int rdbflags);
+int rdbSaveBackground(int req, char *filename, rdbSaveInfo *rsi, int rdbflags);
+int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi);
+void rdbRemoveTempFile(pid_t childpid, int from_signal);
+int rdbSaveToFile(const char *filename);
+int rdbSave(int req, char *filename, rdbSaveInfo *rsi, int rdbflags);
+ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid);
+size_t rdbSavedObjectLen(robj *o, robj *key, int dbid);
+robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error);
+void backgroundSaveDoneHandler(int exitcode, int bysignal);
+int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime,int dbid);
+ssize_t rdbSaveSingleModuleAux(rio *rdb, int when, moduleType *mt);
+robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename);
+robj *rdbLoadStringObject(rio *rdb);
+ssize_t rdbSaveStringObject(rio *rdb, robj *obj);
+ssize_t rdbSaveRawString(rio *rdb, unsigned char *s, size_t len);
+void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr);
+int rdbSaveBinaryDoubleValue(rio *rdb, double val);
+int rdbLoadBinaryDoubleValue(rio *rdb, double *val);
+int rdbSaveBinaryFloatValue(rio *rdb, float val);
+int rdbLoadBinaryFloatValue(rio *rdb, float *val);
+int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi);
+int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadingCtx *rdb_loading_ctx);
+int rdbFunctionLoad(rio *rdb, int ver, functionsLibCtx* lib_ctx, int rdbflags, sds *err);
+int rdbSaveRio(int req, rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi);
+ssize_t rdbSaveFunctions(rio *rdb);
+rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi);
+
+#endif
diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c
new file mode 100644
index 0000000..1ee562f
--- /dev/null
+++ b/src/redis-benchmark.c
@@ -0,0 +1,2060 @@
+/* Redis benchmark utility.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fmacros.h"
+#include "version.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <signal.h>
+#include <assert.h>
+#include <math.h>
+#include <pthread.h>
+
+#include <sdscompat.h> /* Use hiredis' sds compat header that maps sds calls to their hi_ variants */
+#include <sds.h> /* Use hiredis sds. */
+#include "ae.h"
+#include <hiredis.h>
+#ifdef USE_OPENSSL
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <hiredis_ssl.h>
+#endif
+#include "adlist.h"
+#include "dict.h"
+#include "zmalloc.h"
+#include "atomicvar.h"
+#include "crc16_slottable.h"
+#include "hdr_histogram.h"
+#include "cli_common.h"
+#include "mt19937-64.h"
+
+#define UNUSED(V) ((void) V)
+#define RANDPTR_INITIAL_SIZE 8
+#define DEFAULT_LATENCY_PRECISION 3
+#define MAX_LATENCY_PRECISION 4
+#define MAX_THREADS 500
+#define CLUSTER_SLOTS 16384
+#define CONFIG_LATENCY_HISTOGRAM_MIN_VALUE 10L /* >= 10 usecs */
+#define CONFIG_LATENCY_HISTOGRAM_MAX_VALUE 3000000L /* <= 3 secs(us precision) */
+#define CONFIG_LATENCY_HISTOGRAM_INSTANT_MAX_VALUE 3000000L /* <= 3 secs(us precision) */
+#define SHOW_THROUGHPUT_INTERVAL 250 /* 250ms */
+
+#define CLIENT_GET_EVENTLOOP(c) \
+ (c->thread_id >= 0 ? config.threads[c->thread_id]->el : config.el)
+
+struct benchmarkThread;
+struct clusterNode;
+struct redisConfig;
+
+static struct config {
+ aeEventLoop *el;
+ cliConnInfo conn_info;
+ const char *hostsocket;
+ int tls;
+ struct cliSSLconfig sslconfig;
+ int numclients;
+ redisAtomic int liveclients;
+ int requests;
+ redisAtomic int requests_issued;
+ redisAtomic int requests_finished;
+ redisAtomic int previous_requests_finished;
+ int last_printed_bytes;
+ long long previous_tick;
+ int keysize;
+ int datasize;
+ int randomkeys;
+ int randomkeys_keyspacelen;
+ int keepalive;
+ int pipeline;
+ long long start;
+ long long totlatency;
+ const char *title;
+ list *clients;
+ int quiet;
+ int csv;
+ int loop;
+ int idlemode;
+ sds input_dbnumstr;
+ char *tests;
+ int stdinarg; /* get last arg from stdin. (-x option) */
+ int precision;
+ int num_threads;
+ struct benchmarkThread **threads;
+ int cluster_mode;
+ int cluster_node_count;
+ struct clusterNode **cluster_nodes;
+ struct redisConfig *redis_config;
+ struct hdr_histogram* latency_histogram;
+ struct hdr_histogram* current_sec_latency_histogram;
+ redisAtomic int is_fetching_slots;
+ redisAtomic int is_updating_slots;
+ redisAtomic int slots_last_update;
+ int enable_tracking;
+ pthread_mutex_t liveclients_mutex;
+ pthread_mutex_t is_updating_slots_mutex;
+ int resp3; /* use RESP3 */
+} config;
+
+typedef struct _client {
+ redisContext *context;
+ sds obuf;
+ char **randptr; /* Pointers to :rand: strings inside the command buf */
+ size_t randlen; /* Number of pointers in client->randptr */
+ size_t randfree; /* Number of unused pointers in client->randptr */
+ char **stagptr; /* Pointers to slot hashtags (cluster mode only) */
+ size_t staglen; /* Number of pointers in client->stagptr */
+ size_t stagfree; /* Number of unused pointers in client->stagptr */
+ size_t written; /* Bytes of 'obuf' already written */
+ long long start; /* Start time of a request */
+ long long latency; /* Request latency */
+ int pending; /* Number of pending requests (replies to consume) */
+ int prefix_pending; /* If non-zero, number of pending prefix commands. Commands
+ such as auth and select are prefixed to the pipeline of
+ benchmark commands and discarded after the first send. */
+ int prefixlen; /* Size in bytes of the pending prefix commands */
+ int thread_id;
+ struct clusterNode *cluster_node;
+ int slots_last_update;
+} *client;
+
+/* Threads. */
+
+typedef struct benchmarkThread {
+ int index;
+ pthread_t thread;
+ aeEventLoop *el;
+} benchmarkThread;
+
+/* Cluster. */
+typedef struct clusterNode {
+ char *ip;
+ int port;
+ sds name;
+ int flags;
+ sds replicate; /* Master ID if node is a slave */
+ int *slots;
+ int slots_count;
+ int current_slot_index;
+ int *updated_slots; /* Used by updateClusterSlotsConfiguration */
+ int updated_slots_count; /* Used by updateClusterSlotsConfiguration */
+ int replicas_count;
+ sds *migrating; /* An array of sds where even strings are slots and odd
+ * strings are the destination node IDs. */
+ sds *importing; /* An array of sds where even strings are slots and odd
+ * strings are the source node IDs. */
+ int migrating_count; /* Length of the migrating array (migrating slots*2) */
+ int importing_count; /* Length of the importing array (importing slots*2) */
+ struct redisConfig *redis_config;
+} clusterNode;
+
+typedef struct redisConfig {
+ sds save;
+ sds appendonly;
+} redisConfig;
+
+/* Prototypes */
+char *redisGitSHA1(void);
+char *redisGitDirty(void);
+static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+static void createMissingClients(client c);
+static benchmarkThread *createBenchmarkThread(int index);
+static void freeBenchmarkThread(benchmarkThread *thread);
+static void freeBenchmarkThreads(void);
+static void *execBenchmarkThread(void *ptr);
+static clusterNode *createClusterNode(char *ip, int port);
+static redisConfig *getRedisConfig(const char *ip, int port,
+ const char *hostsocket);
+static redisContext *getRedisContext(const char *ip, int port,
+ const char *hostsocket);
+static void freeRedisConfig(redisConfig *cfg);
+static int fetchClusterSlotsConfiguration(client c);
+static void updateClusterSlotsConfiguration(void);
+int showThroughput(struct aeEventLoop *eventLoop, long long id,
+ void *clientData);
+
+static sds benchmarkVersion(void) {
+ sds version;
+ version = sdscatprintf(sdsempty(), "%s", REDIS_VERSION);
+
+ /* Add git commit and working tree status when available */
+ if (strtoll(redisGitSHA1(),NULL,16)) {
+ version = sdscatprintf(version, " (git:%s", redisGitSHA1());
+ if (strtoll(redisGitDirty(),NULL,10))
+ version = sdscatprintf(version, "-dirty");
+ version = sdscat(version, ")");
+ }
+ return version;
+}
+
+/* Dict callbacks */
+static uint64_t dictSdsHash(const void *key);
+static int dictSdsKeyCompare(dict *d, const void *key1, const void *key2);
+
+/* Implementation */
+static long long ustime(void) {
+ struct timeval tv;
+ long long ust;
+
+ gettimeofday(&tv, NULL);
+ ust = ((long long)tv.tv_sec)*1000000;
+ ust += tv.tv_usec;
+ return ust;
+}
+
+static long long mstime(void) {
+ return ustime()/1000;
+}
+
+static uint64_t dictSdsHash(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
+}
+
+static int dictSdsKeyCompare(dict *d, const void *key1, const void *key2)
+{
+ int l1,l2;
+ UNUSED(d);
+
+ l1 = sdslen((sds)key1);
+ l2 = sdslen((sds)key2);
+ if (l1 != l2) return 0;
+ return memcmp(key1, key2, l1) == 0;
+}
+
+static redisContext *getRedisContext(const char *ip, int port,
+ const char *hostsocket)
+{
+ redisContext *ctx = NULL;
+ redisReply *reply = NULL;
+ if (hostsocket == NULL)
+ ctx = redisConnect(ip, port);
+ else
+ ctx = redisConnectUnix(hostsocket);
+ if (ctx == NULL || ctx->err) {
+ fprintf(stderr,"Could not connect to Redis at ");
+ char *err = (ctx != NULL ? ctx->errstr : "");
+ if (hostsocket == NULL)
+ fprintf(stderr,"%s:%d: %s\n",ip,port,err);
+ else
+ fprintf(stderr,"%s: %s\n",hostsocket,err);
+ goto cleanup;
+ }
+ if (config.tls==1) {
+ const char *err = NULL;
+ if (cliSecureConnection(ctx, config.sslconfig, &err) == REDIS_ERR && err) {
+ fprintf(stderr, "Could not negotiate a TLS connection: %s\n", err);
+ goto cleanup;
+ }
+ }
+ if (config.conn_info.auth == NULL)
+ return ctx;
+ if (config.conn_info.user == NULL)
+ reply = redisCommand(ctx,"AUTH %s", config.conn_info.auth);
+ else
+ reply = redisCommand(ctx,"AUTH %s %s", config.conn_info.user, config.conn_info.auth);
+ if (reply != NULL) {
+ if (reply->type == REDIS_REPLY_ERROR) {
+ if (hostsocket == NULL)
+ fprintf(stderr, "Node %s:%d replied with error:\n%s\n", ip, port, reply->str);
+ else
+ fprintf(stderr, "Node %s replied with error:\n%s\n", hostsocket, reply->str);
+ freeReplyObject(reply);
+ redisFree(ctx);
+ exit(1);
+ }
+ freeReplyObject(reply);
+ return ctx;
+ }
+ fprintf(stderr, "ERROR: failed to fetch reply from ");
+ if (hostsocket == NULL)
+ fprintf(stderr, "%s:%d\n", ip, port);
+ else
+ fprintf(stderr, "%s\n", hostsocket);
+cleanup:
+ freeReplyObject(reply);
+ redisFree(ctx);
+ return NULL;
+}
+
+
+
+static redisConfig *getRedisConfig(const char *ip, int port,
+ const char *hostsocket)
+{
+ redisConfig *cfg = zcalloc(sizeof(*cfg));
+ if (!cfg) return NULL;
+ redisContext *c = NULL;
+ redisReply *reply = NULL, *sub_reply = NULL;
+ c = getRedisContext(ip, port, hostsocket);
+ if (c == NULL) {
+ freeRedisConfig(cfg);
+ exit(1);
+ }
+ redisAppendCommand(c, "CONFIG GET %s", "save");
+ redisAppendCommand(c, "CONFIG GET %s", "appendonly");
+ int abort_test = 0;
+ int i = 0;
+ void *r = NULL;
+ for (; i < 2; i++) {
+ int res = redisGetReply(c, &r);
+ if (reply) freeReplyObject(reply);
+ reply = res == REDIS_OK ? ((redisReply *) r) : NULL;
+ if (res != REDIS_OK || !r) goto fail;
+ if (reply->type == REDIS_REPLY_ERROR) {
+ goto fail;
+ }
+ if (reply->type != REDIS_REPLY_ARRAY || reply->elements < 2) goto fail;
+ sub_reply = reply->element[1];
+ char *value = sub_reply->str;
+ if (!value) value = "";
+ switch (i) {
+ case 0: cfg->save = sdsnew(value); break;
+ case 1: cfg->appendonly = sdsnew(value); break;
+ }
+ }
+ freeReplyObject(reply);
+ redisFree(c);
+ return cfg;
+fail:
+ if (reply && reply->type == REDIS_REPLY_ERROR &&
+ !strncmp(reply->str,"NOAUTH",6)) {
+ if (hostsocket == NULL)
+ fprintf(stderr, "Node %s:%d replied with error:\n%s\n", ip, port, reply->str);
+ else
+ fprintf(stderr, "Node %s replied with error:\n%s\n", hostsocket, reply->str);
+ abort_test = 1;
+ }
+ freeReplyObject(reply);
+ redisFree(c);
+ freeRedisConfig(cfg);
+ if (abort_test) exit(1);
+ return NULL;
+}
+static void freeRedisConfig(redisConfig *cfg) {
+ if (cfg->save) sdsfree(cfg->save);
+ if (cfg->appendonly) sdsfree(cfg->appendonly);
+ zfree(cfg);
+}
+
+static void freeClient(client c) {
+ aeEventLoop *el = CLIENT_GET_EVENTLOOP(c);
+ listNode *ln;
+ aeDeleteFileEvent(el,c->context->fd,AE_WRITABLE);
+ aeDeleteFileEvent(el,c->context->fd,AE_READABLE);
+ if (c->thread_id >= 0) {
+ int requests_finished = 0;
+ atomicGet(config.requests_finished, requests_finished);
+ if (requests_finished >= config.requests) {
+ aeStop(el);
+ }
+ }
+ redisFree(c->context);
+ sdsfree(c->obuf);
+ zfree(c->randptr);
+ zfree(c->stagptr);
+ zfree(c);
+ if (config.num_threads) pthread_mutex_lock(&(config.liveclients_mutex));
+ config.liveclients--;
+ ln = listSearchKey(config.clients,c);
+ assert(ln != NULL);
+ listDelNode(config.clients,ln);
+ if (config.num_threads) pthread_mutex_unlock(&(config.liveclients_mutex));
+}
+
+static void freeAllClients(void) {
+ listNode *ln = config.clients->head, *next;
+
+ while(ln) {
+ next = ln->next;
+ freeClient(ln->value);
+ ln = next;
+ }
+}
+
+static void resetClient(client c) {
+ aeEventLoop *el = CLIENT_GET_EVENTLOOP(c);
+ aeDeleteFileEvent(el,c->context->fd,AE_WRITABLE);
+ aeDeleteFileEvent(el,c->context->fd,AE_READABLE);
+ aeCreateFileEvent(el,c->context->fd,AE_WRITABLE,writeHandler,c);
+ c->written = 0;
+ c->pending = config.pipeline;
+}
+
+static void randomizeClientKey(client c) {
+ size_t i;
+
+ for (i = 0; i < c->randlen; i++) {
+ char *p = c->randptr[i]+11;
+ size_t r = 0;
+ if (config.randomkeys_keyspacelen != 0)
+ r = random() % config.randomkeys_keyspacelen;
+ size_t j;
+
+ for (j = 0; j < 12; j++) {
+ *p = '0'+r%10;
+ r/=10;
+ p--;
+ }
+ }
+}
+
+static void setClusterKeyHashTag(client c) {
+ assert(c->thread_id >= 0);
+ clusterNode *node = c->cluster_node;
+ assert(node);
+ assert(node->current_slot_index < node->slots_count);
+ int is_updating_slots = 0;
+ atomicGet(config.is_updating_slots, is_updating_slots);
+ /* If updateClusterSlotsConfiguration is updating the slots array,
+ * call updateClusterSlotsConfiguration is order to block the thread
+ * since the mutex is locked. When the slots will be updated by the
+ * thread that's actually performing the update, the execution of
+ * updateClusterSlotsConfiguration won't actually do anything, since
+ * the updated_slots_count array will be already NULL. */
+ if (is_updating_slots) updateClusterSlotsConfiguration();
+ int slot = node->slots[node->current_slot_index];
+ const char *tag = crc16_slot_table[slot];
+ int taglen = strlen(tag);
+ size_t i;
+ for (i = 0; i < c->staglen; i++) {
+ char *p = c->stagptr[i] + 1;
+ p[0] = tag[0];
+ p[1] = (taglen >= 2 ? tag[1] : '}');
+ p[2] = (taglen == 3 ? tag[2] : '}');
+ }
+}
+
+static void clientDone(client c) {
+ int requests_finished = 0;
+ atomicGet(config.requests_finished, requests_finished);
+ if (requests_finished >= config.requests) {
+ freeClient(c);
+ if (!config.num_threads && config.el) aeStop(config.el);
+ return;
+ }
+ if (config.keepalive) {
+ resetClient(c);
+ } else {
+ if (config.num_threads) pthread_mutex_lock(&(config.liveclients_mutex));
+ config.liveclients--;
+ createMissingClients(c);
+ config.liveclients++;
+ if (config.num_threads)
+ pthread_mutex_unlock(&(config.liveclients_mutex));
+ freeClient(c);
+ }
+}
+
+static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
+ client c = privdata;
+ void *reply = NULL;
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(mask);
+
+ /* Calculate latency only for the first read event. This means that the
+ * server already sent the reply and we need to parse it. Parsing overhead
+ * is not part of the latency, so calculate it only once, here. */
+ if (c->latency < 0) c->latency = ustime()-(c->start);
+
+ if (redisBufferRead(c->context) != REDIS_OK) {
+ fprintf(stderr,"Error: %s\n",c->context->errstr);
+ exit(1);
+ } else {
+ while(c->pending) {
+ if (redisGetReply(c->context,&reply) != REDIS_OK) {
+ fprintf(stderr,"Error: %s\n",c->context->errstr);
+ exit(1);
+ }
+ if (reply != NULL) {
+ if (reply == (void*)REDIS_REPLY_ERROR) {
+ fprintf(stderr,"Unexpected error reply, exiting...\n");
+ exit(1);
+ }
+ redisReply *r = reply;
+ if (r->type == REDIS_REPLY_ERROR) {
+ /* Try to update slots configuration if reply error is
+ * MOVED/ASK/CLUSTERDOWN and the key(s) used by the command
+ * contain(s) the slot hash tag.
+ * If the error is not topology-update related then we
+ * immediately exit to avoid false results. */
+ if (c->cluster_node && c->staglen) {
+ int fetch_slots = 0, do_wait = 0;
+ if (!strncmp(r->str,"MOVED",5) || !strncmp(r->str,"ASK",3))
+ fetch_slots = 1;
+ else if (!strncmp(r->str,"CLUSTERDOWN",11)) {
+ /* Usually the cluster is able to recover itself after
+ * a CLUSTERDOWN error, so try to sleep one second
+ * before requesting the new configuration. */
+ fetch_slots = 1;
+ do_wait = 1;
+ fprintf(stderr, "Error from server %s:%d: %s.\n",
+ c->cluster_node->ip,
+ c->cluster_node->port,
+ r->str);
+ }
+ if (do_wait) sleep(1);
+ if (fetch_slots && !fetchClusterSlotsConfiguration(c))
+ exit(1);
+ } else {
+ if (c->cluster_node) {
+ fprintf(stderr, "Error from server %s:%d: %s\n",
+ c->cluster_node->ip,
+ c->cluster_node->port,
+ r->str);
+ } else fprintf(stderr, "Error from server: %s\n", r->str);
+ exit(1);
+ }
+ }
+
+ freeReplyObject(reply);
+ /* This is an OK for prefix commands such as auth and select.*/
+ if (c->prefix_pending > 0) {
+ c->prefix_pending--;
+ c->pending--;
+ /* Discard prefix commands on first response.*/
+ if (c->prefixlen > 0) {
+ size_t j;
+ sdsrange(c->obuf, c->prefixlen, -1);
+ /* We also need to fix the pointers to the strings
+ * we need to randomize. */
+ for (j = 0; j < c->randlen; j++)
+ c->randptr[j] -= c->prefixlen;
+ /* Fix the pointers to the slot hash tags */
+ for (j = 0; j < c->staglen; j++)
+ c->stagptr[j] -= c->prefixlen;
+ c->prefixlen = 0;
+ }
+ continue;
+ }
+ int requests_finished = 0;
+ atomicGetIncr(config.requests_finished, requests_finished, 1);
+ if (requests_finished < config.requests){
+ if (config.num_threads == 0) {
+ hdr_record_value(
+ config.latency_histogram, // Histogram to record to
+ (long)c->latency<=CONFIG_LATENCY_HISTOGRAM_MAX_VALUE ? (long)c->latency : CONFIG_LATENCY_HISTOGRAM_MAX_VALUE); // Value to record
+ hdr_record_value(
+ config.current_sec_latency_histogram, // Histogram to record to
+ (long)c->latency<=CONFIG_LATENCY_HISTOGRAM_INSTANT_MAX_VALUE ? (long)c->latency : CONFIG_LATENCY_HISTOGRAM_INSTANT_MAX_VALUE); // Value to record
+ } else {
+ hdr_record_value_atomic(
+ config.latency_histogram, // Histogram to record to
+ (long)c->latency<=CONFIG_LATENCY_HISTOGRAM_MAX_VALUE ? (long)c->latency : CONFIG_LATENCY_HISTOGRAM_MAX_VALUE); // Value to record
+ hdr_record_value_atomic(
+ config.current_sec_latency_histogram, // Histogram to record to
+ (long)c->latency<=CONFIG_LATENCY_HISTOGRAM_INSTANT_MAX_VALUE ? (long)c->latency : CONFIG_LATENCY_HISTOGRAM_INSTANT_MAX_VALUE); // Value to record
+ }
+ }
+ c->pending--;
+ if (c->pending == 0) {
+ clientDone(c);
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+}
+
+static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
+ client c = privdata;
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(mask);
+
+ /* Initialize request when nothing was written. */
+ if (c->written == 0) {
+ /* Enforce upper bound to number of requests. */
+ int requests_issued = 0;
+ atomicGetIncr(config.requests_issued, requests_issued, config.pipeline);
+ if (requests_issued >= config.requests) {
+ return;
+ }
+
+ /* Really initialize: randomize keys and set start time. */
+ if (config.randomkeys) randomizeClientKey(c);
+ if (config.cluster_mode && c->staglen > 0) setClusterKeyHashTag(c);
+ atomicGet(config.slots_last_update, c->slots_last_update);
+ c->start = ustime();
+ c->latency = -1;
+ }
+ const ssize_t buflen = sdslen(c->obuf);
+ const ssize_t writeLen = buflen-c->written;
+ if (writeLen > 0) {
+ void *ptr = c->obuf+c->written;
+ while(1) {
+ /* Optimistically try to write before checking if the file descriptor
+ * is actually writable. At worst we get EAGAIN. */
+ const ssize_t nwritten = cliWriteConn(c->context,ptr,writeLen);
+ if (nwritten != writeLen) {
+ if (nwritten == -1 && errno != EAGAIN) {
+ if (errno != EPIPE)
+ fprintf(stderr, "Error writing to the server: %s\n", strerror(errno));
+ freeClient(c);
+ return;
+ } else if (nwritten > 0) {
+ c->written += nwritten;
+ return;
+ }
+ } else {
+ aeDeleteFileEvent(el,c->context->fd,AE_WRITABLE);
+ aeCreateFileEvent(el,c->context->fd,AE_READABLE,readHandler,c);
+ return;
+ }
+ }
+ }
+}
+
+/* Create a benchmark client, configured to send the command passed as 'cmd' of
+ * 'len' bytes.
+ *
+ * The command is copied N times in the client output buffer (that is reused
+ * again and again to send the request to the server) accordingly to the configured
+ * pipeline size.
+ *
+ * Also an initial SELECT command is prepended in order to make sure the right
+ * database is selected, if needed. The initial SELECT will be discarded as soon
+ * as the first reply is received.
+ *
+ * To create a client from scratch, the 'from' pointer is set to NULL. If instead
+ * we want to create a client using another client as reference, the 'from' pointer
+ * points to the client to use as reference. In such a case the following
+ * information is take from the 'from' client:
+ *
+ * 1) The command line to use.
+ * 2) The offsets of the __rand_int__ elements inside the command line, used
+ * for arguments randomization.
+ *
+ * Even when cloning another client, prefix commands are applied if needed.*/
+static client createClient(char *cmd, size_t len, client from, int thread_id) {
+ int j;
+ int is_cluster_client = (config.cluster_mode && thread_id >= 0);
+ client c = zmalloc(sizeof(struct _client));
+
+ const char *ip = NULL;
+ int port = 0;
+ c->cluster_node = NULL;
+ if (config.hostsocket == NULL || is_cluster_client) {
+ if (!is_cluster_client) {
+ ip = config.conn_info.hostip;
+ port = config.conn_info.hostport;
+ } else {
+ int node_idx = 0;
+ if (config.num_threads < config.cluster_node_count)
+ node_idx = config.liveclients % config.cluster_node_count;
+ else
+ node_idx = thread_id % config.cluster_node_count;
+ clusterNode *node = config.cluster_nodes[node_idx];
+ assert(node != NULL);
+ ip = (const char *) node->ip;
+ port = node->port;
+ c->cluster_node = node;
+ }
+ c->context = redisConnectNonBlock(ip,port);
+ } else {
+ c->context = redisConnectUnixNonBlock(config.hostsocket);
+ }
+ if (c->context->err) {
+ fprintf(stderr,"Could not connect to Redis at ");
+ if (config.hostsocket == NULL || is_cluster_client)
+ fprintf(stderr,"%s:%d: %s\n",ip,port,c->context->errstr);
+ else
+ fprintf(stderr,"%s: %s\n",config.hostsocket,c->context->errstr);
+ exit(1);
+ }
+ if (config.tls==1) {
+ const char *err = NULL;
+ if (cliSecureConnection(c->context, config.sslconfig, &err) == REDIS_ERR && err) {
+ fprintf(stderr, "Could not negotiate a TLS connection: %s\n", err);
+ exit(1);
+ }
+ }
+ c->thread_id = thread_id;
+ /* Suppress hiredis cleanup of unused buffers for max speed. */
+ c->context->reader->maxbuf = 0;
+
+ /* Build the request buffer:
+ * Queue N requests accordingly to the pipeline size, or simply clone
+ * the example client buffer. */
+ c->obuf = sdsempty();
+ /* Prefix the request buffer with AUTH and/or SELECT commands, if applicable.
+ * These commands are discarded after the first response, so if the client is
+ * reused the commands will not be used again. */
+ c->prefix_pending = 0;
+ if (config.conn_info.auth) {
+ char *buf = NULL;
+ int len;
+ if (config.conn_info.user == NULL)
+ len = redisFormatCommand(&buf, "AUTH %s", config.conn_info.auth);
+ else
+ len = redisFormatCommand(&buf, "AUTH %s %s",
+ config.conn_info.user, config.conn_info.auth);
+ c->obuf = sdscatlen(c->obuf, buf, len);
+ free(buf);
+ c->prefix_pending++;
+ }
+
+ if (config.enable_tracking) {
+ char *buf = NULL;
+ int len = redisFormatCommand(&buf, "CLIENT TRACKING on");
+ c->obuf = sdscatlen(c->obuf, buf, len);
+ free(buf);
+ c->prefix_pending++;
+ }
+
+ /* If a DB number different than zero is selected, prefix our request
+ * buffer with the SELECT command, that will be discarded the first
+ * time the replies are received, so if the client is reused the
+ * SELECT command will not be used again. */
+ if (config.conn_info.input_dbnum != 0 && !is_cluster_client) {
+ c->obuf = sdscatprintf(c->obuf,"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
+ (int)sdslen(config.input_dbnumstr),config.input_dbnumstr);
+ c->prefix_pending++;
+ }
+
+ if (config.resp3) {
+ char *buf = NULL;
+ int len = redisFormatCommand(&buf, "HELLO 3");
+ c->obuf = sdscatlen(c->obuf, buf, len);
+ free(buf);
+ c->prefix_pending++;
+ }
+
+ c->prefixlen = sdslen(c->obuf);
+ /* Append the request itself. */
+ if (from) {
+ c->obuf = sdscatlen(c->obuf,
+ from->obuf+from->prefixlen,
+ sdslen(from->obuf)-from->prefixlen);
+ } else {
+ for (j = 0; j < config.pipeline; j++)
+ c->obuf = sdscatlen(c->obuf,cmd,len);
+ }
+
+ c->written = 0;
+ c->pending = config.pipeline+c->prefix_pending;
+ c->randptr = NULL;
+ c->randlen = 0;
+ c->stagptr = NULL;
+ c->staglen = 0;
+
+ /* Find substrings in the output buffer that need to be randomized. */
+ if (config.randomkeys) {
+ if (from) {
+ c->randlen = from->randlen;
+ c->randfree = 0;
+ c->randptr = zmalloc(sizeof(char*)*c->randlen);
+ /* copy the offsets. */
+ for (j = 0; j < (int)c->randlen; j++) {
+ c->randptr[j] = c->obuf + (from->randptr[j]-from->obuf);
+ /* Adjust for the different select prefix length. */
+ c->randptr[j] += c->prefixlen - from->prefixlen;
+ }
+ } else {
+ char *p = c->obuf;
+
+ c->randlen = 0;
+ c->randfree = RANDPTR_INITIAL_SIZE;
+ c->randptr = zmalloc(sizeof(char*)*c->randfree);
+ while ((p = strstr(p,"__rand_int__")) != NULL) {
+ if (c->randfree == 0) {
+ c->randptr = zrealloc(c->randptr,sizeof(char*)*c->randlen*2);
+ c->randfree += c->randlen;
+ }
+ c->randptr[c->randlen++] = p;
+ c->randfree--;
+ p += 12; /* 12 is strlen("__rand_int__). */
+ }
+ }
+ }
+ /* If cluster mode is enabled, set slot hashtags pointers. */
+ if (config.cluster_mode) {
+ if (from) {
+ c->staglen = from->staglen;
+ c->stagfree = 0;
+ c->stagptr = zmalloc(sizeof(char*)*c->staglen);
+ /* copy the offsets. */
+ for (j = 0; j < (int)c->staglen; j++) {
+ c->stagptr[j] = c->obuf + (from->stagptr[j]-from->obuf);
+ /* Adjust for the different select prefix length. */
+ c->stagptr[j] += c->prefixlen - from->prefixlen;
+ }
+ } else {
+ char *p = c->obuf;
+
+ c->staglen = 0;
+ c->stagfree = RANDPTR_INITIAL_SIZE;
+ c->stagptr = zmalloc(sizeof(char*)*c->stagfree);
+ while ((p = strstr(p,"{tag}")) != NULL) {
+ if (c->stagfree == 0) {
+ c->stagptr = zrealloc(c->stagptr,
+ sizeof(char*) * c->staglen*2);
+ c->stagfree += c->staglen;
+ }
+ c->stagptr[c->staglen++] = p;
+ c->stagfree--;
+ p += 5; /* 5 is strlen("{tag}"). */
+ }
+ }
+ }
+ aeEventLoop *el = NULL;
+ if (thread_id < 0) el = config.el;
+ else {
+ benchmarkThread *thread = config.threads[thread_id];
+ el = thread->el;
+ }
+ if (config.idlemode == 0)
+ aeCreateFileEvent(el,c->context->fd,AE_WRITABLE,writeHandler,c);
+ else
+ /* In idle mode, clients still need to register readHandler for catching errors */
+ aeCreateFileEvent(el,c->context->fd,AE_READABLE,readHandler,c);
+
+ listAddNodeTail(config.clients,c);
+ atomicIncr(config.liveclients, 1);
+ atomicGet(config.slots_last_update, c->slots_last_update);
+ return c;
+}
+
+static void createMissingClients(client c) {
+ int n = 0;
+ while(config.liveclients < config.numclients) {
+ int thread_id = -1;
+ if (config.num_threads)
+ thread_id = config.liveclients % config.num_threads;
+ createClient(NULL,0,c,thread_id);
+
+ /* Listen backlog is quite limited on most systems */
+ if (++n > 64) {
+ usleep(50000);
+ n = 0;
+ }
+ }
+}
+
+static void showLatencyReport(void) {
+
+ const float reqpersec = (float)config.requests_finished/((float)config.totlatency/1000.0f);
+ const float p0 = ((float) hdr_min(config.latency_histogram))/1000.0f;
+ const float p50 = hdr_value_at_percentile(config.latency_histogram, 50.0 )/1000.0f;
+ const float p95 = hdr_value_at_percentile(config.latency_histogram, 95.0 )/1000.0f;
+ const float p99 = hdr_value_at_percentile(config.latency_histogram, 99.0 )/1000.0f;
+ const float p100 = ((float) hdr_max(config.latency_histogram))/1000.0f;
+ const float avg = hdr_mean(config.latency_histogram)/1000.0f;
+
+ if (!config.quiet && !config.csv) {
+ printf("%*s\r", config.last_printed_bytes, " "); // ensure there is a clean line
+ printf("====== %s ======\n", config.title);
+ printf(" %d requests completed in %.2f seconds\n", config.requests_finished,
+ (float)config.totlatency/1000);
+ printf(" %d parallel clients\n", config.numclients);
+ printf(" %d bytes payload\n", config.datasize);
+ printf(" keep alive: %d\n", config.keepalive);
+ if (config.cluster_mode) {
+ printf(" cluster mode: yes (%d masters)\n",
+ config.cluster_node_count);
+ int m ;
+ for (m = 0; m < config.cluster_node_count; m++) {
+ clusterNode *node = config.cluster_nodes[m];
+ redisConfig *cfg = node->redis_config;
+ if (cfg == NULL) continue;
+ printf(" node [%d] configuration:\n",m );
+ printf(" save: %s\n",
+ sdslen(cfg->save) ? cfg->save : "NONE");
+ printf(" appendonly: %s\n", cfg->appendonly);
+ }
+ } else {
+ if (config.redis_config) {
+ printf(" host configuration \"save\": %s\n",
+ config.redis_config->save);
+ printf(" host configuration \"appendonly\": %s\n",
+ config.redis_config->appendonly);
+ }
+ }
+ printf(" multi-thread: %s\n", (config.num_threads ? "yes" : "no"));
+ if (config.num_threads)
+ printf(" threads: %d\n", config.num_threads);
+
+ printf("\n");
+ printf("Latency by percentile distribution:\n");
+ struct hdr_iter iter;
+ long long previous_cumulative_count = -1;
+ const long long total_count = config.latency_histogram->total_count;
+ hdr_iter_percentile_init(&iter, config.latency_histogram, 1);
+ struct hdr_iter_percentiles *percentiles = &iter.specifics.percentiles;
+ while (hdr_iter_next(&iter))
+ {
+ const double value = iter.highest_equivalent_value / 1000.0f;
+ const double percentile = percentiles->percentile;
+ const long long cumulative_count = iter.cumulative_count;
+ if( previous_cumulative_count != cumulative_count || cumulative_count == total_count ){
+ printf("%3.3f%% <= %.3f milliseconds (cumulative count %lld)\n", percentile, value, cumulative_count);
+ }
+ previous_cumulative_count = cumulative_count;
+ }
+ printf("\n");
+ printf("Cumulative distribution of latencies:\n");
+ previous_cumulative_count = -1;
+ hdr_iter_linear_init(&iter, config.latency_histogram, 100);
+ while (hdr_iter_next(&iter))
+ {
+ const double value = iter.highest_equivalent_value / 1000.0f;
+ const long long cumulative_count = iter.cumulative_count;
+ const double percentile = ((double)cumulative_count/(double)total_count)*100.0;
+ if( previous_cumulative_count != cumulative_count || cumulative_count == total_count ){
+ printf("%3.3f%% <= %.3f milliseconds (cumulative count %lld)\n", percentile, value, cumulative_count);
+ }
+ /* After the 2 milliseconds latency to have percentages split
+ * by decimals will just add a lot of noise to the output. */
+ if(iter.highest_equivalent_value > 2000){
+ hdr_iter_linear_set_value_units_per_bucket(&iter,1000);
+ }
+ previous_cumulative_count = cumulative_count;
+ }
+ printf("\n");
+ printf("Summary:\n");
+ printf(" throughput summary: %.2f requests per second\n", reqpersec);
+ printf(" latency summary (msec):\n");
+ printf(" %9s %9s %9s %9s %9s %9s\n", "avg", "min", "p50", "p95", "p99", "max");
+ printf(" %9.3f %9.3f %9.3f %9.3f %9.3f %9.3f\n", avg, p0, p50, p95, p99, p100);
+ } else if (config.csv) {
+ printf("\"%s\",\"%.2f\",\"%.3f\",\"%.3f\",\"%.3f\",\"%.3f\",\"%.3f\",\"%.3f\"\n", config.title, reqpersec, avg, p0, p50, p95, p99, p100);
+ } else {
+ printf("%*s\r", config.last_printed_bytes, " "); // ensure there is a clean line
+ printf("%s: %.2f requests per second, p50=%.3f msec\n", config.title, reqpersec, p50);
+ }
+}
+
+static void initBenchmarkThreads(void) {
+ int i;
+ if (config.threads) freeBenchmarkThreads();
+ config.threads = zmalloc(config.num_threads * sizeof(benchmarkThread*));
+ for (i = 0; i < config.num_threads; i++) {
+ benchmarkThread *thread = createBenchmarkThread(i);
+ config.threads[i] = thread;
+ }
+}
+
+static void startBenchmarkThreads(void) {
+ int i;
+ for (i = 0; i < config.num_threads; i++) {
+ benchmarkThread *t = config.threads[i];
+ if (pthread_create(&(t->thread), NULL, execBenchmarkThread, t)){
+ fprintf(stderr, "FATAL: Failed to start thread %d.\n", i);
+ exit(1);
+ }
+ }
+ for (i = 0; i < config.num_threads; i++)
+ pthread_join(config.threads[i]->thread, NULL);
+}
+
+static void benchmark(const char *title, char *cmd, int len) {
+ client c;
+
+ config.title = title;
+ config.requests_issued = 0;
+ config.requests_finished = 0;
+ config.previous_requests_finished = 0;
+ config.last_printed_bytes = 0;
+ hdr_init(
+ CONFIG_LATENCY_HISTOGRAM_MIN_VALUE, // Minimum value
+ CONFIG_LATENCY_HISTOGRAM_MAX_VALUE, // Maximum value
+ config.precision, // Number of significant figures
+ &config.latency_histogram); // Pointer to initialise
+ hdr_init(
+ CONFIG_LATENCY_HISTOGRAM_MIN_VALUE, // Minimum value
+ CONFIG_LATENCY_HISTOGRAM_INSTANT_MAX_VALUE, // Maximum value
+ config.precision, // Number of significant figures
+ &config.current_sec_latency_histogram); // Pointer to initialise
+
+ if (config.num_threads) initBenchmarkThreads();
+
+ int thread_id = config.num_threads > 0 ? 0 : -1;
+ c = createClient(cmd,len,NULL,thread_id);
+ createMissingClients(c);
+
+ config.start = mstime();
+ if (!config.num_threads) aeMain(config.el);
+ else startBenchmarkThreads();
+ config.totlatency = mstime()-config.start;
+
+ showLatencyReport();
+ freeAllClients();
+ if (config.threads) freeBenchmarkThreads();
+ if (config.current_sec_latency_histogram) hdr_close(config.current_sec_latency_histogram);
+ if (config.latency_histogram) hdr_close(config.latency_histogram);
+
+}
+
+/* Thread functions. */
+
+static benchmarkThread *createBenchmarkThread(int index) {
+ benchmarkThread *thread = zmalloc(sizeof(*thread));
+ if (thread == NULL) return NULL;
+ thread->index = index;
+ thread->el = aeCreateEventLoop(1024*10);
+ aeCreateTimeEvent(thread->el,1,showThroughput,(void *)thread,NULL);
+ return thread;
+}
+
+static void freeBenchmarkThread(benchmarkThread *thread) {
+ if (thread->el) aeDeleteEventLoop(thread->el);
+ zfree(thread);
+}
+
+static void freeBenchmarkThreads(void) {
+ int i = 0;
+ for (; i < config.num_threads; i++) {
+ benchmarkThread *thread = config.threads[i];
+ if (thread) freeBenchmarkThread(thread);
+ }
+ zfree(config.threads);
+ config.threads = NULL;
+}
+
+static void *execBenchmarkThread(void *ptr) {
+ benchmarkThread *thread = (benchmarkThread *) ptr;
+ aeMain(thread->el);
+ return NULL;
+}
+
+/* Cluster helper functions. */
+
+static clusterNode *createClusterNode(char *ip, int port) {
+ clusterNode *node = zmalloc(sizeof(*node));
+ if (!node) return NULL;
+ node->ip = ip;
+ node->port = port;
+ node->name = NULL;
+ node->flags = 0;
+ node->replicate = NULL;
+ node->replicas_count = 0;
+ node->slots = zmalloc(CLUSTER_SLOTS * sizeof(int));
+ node->slots_count = 0;
+ node->current_slot_index = 0;
+ node->updated_slots = NULL;
+ node->updated_slots_count = 0;
+ node->migrating = NULL;
+ node->importing = NULL;
+ node->migrating_count = 0;
+ node->importing_count = 0;
+ node->redis_config = NULL;
+ return node;
+}
+
+static void freeClusterNode(clusterNode *node) {
+ int i;
+ if (node->name) sdsfree(node->name);
+ if (node->replicate) sdsfree(node->replicate);
+ if (node->migrating != NULL) {
+ for (i = 0; i < node->migrating_count; i++) sdsfree(node->migrating[i]);
+ zfree(node->migrating);
+ }
+ if (node->importing != NULL) {
+ for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]);
+ zfree(node->importing);
+ }
+ /* If the node is not the reference node, that uses the address from
+ * config.conn_info.hostip and config.conn_info.hostport, then the node ip has been
+ * allocated by fetchClusterConfiguration, so it must be freed. */
+ if (node->ip && strcmp(node->ip, config.conn_info.hostip) != 0) sdsfree(node->ip);
+ if (node->redis_config != NULL) freeRedisConfig(node->redis_config);
+ zfree(node->slots);
+ zfree(node);
+}
+
+static void freeClusterNodes(void) {
+ int i = 0;
+ for (; i < config.cluster_node_count; i++) {
+ clusterNode *n = config.cluster_nodes[i];
+ if (n) freeClusterNode(n);
+ }
+ zfree(config.cluster_nodes);
+ config.cluster_nodes = NULL;
+}
+
+static clusterNode **addClusterNode(clusterNode *node) {
+ int count = config.cluster_node_count + 1;
+ config.cluster_nodes = zrealloc(config.cluster_nodes,
+ count * sizeof(*node));
+ if (!config.cluster_nodes) return NULL;
+ config.cluster_nodes[config.cluster_node_count++] = node;
+ return config.cluster_nodes;
+}
+
+/* TODO: This should be refactored to use CLUSTER SLOTS, the migrating/importing
+ * information is anyway not used.
+ */
+static int fetchClusterConfiguration(void) {
+ int success = 1;
+ redisContext *ctx = NULL;
+ redisReply *reply = NULL;
+ ctx = getRedisContext(config.conn_info.hostip, config.conn_info.hostport, config.hostsocket);
+ if (ctx == NULL) {
+ exit(1);
+ }
+ clusterNode *firstNode = createClusterNode((char *) config.conn_info.hostip,
+ config.conn_info.hostport);
+ if (!firstNode) {success = 0; goto cleanup;}
+ reply = redisCommand(ctx, "CLUSTER NODES");
+ success = (reply != NULL);
+ if (!success) goto cleanup;
+ success = (reply->type != REDIS_REPLY_ERROR);
+ if (!success) {
+ if (config.hostsocket == NULL) {
+ fprintf(stderr, "Cluster node %s:%d replied with error:\n%s\n",
+ config.conn_info.hostip, config.conn_info.hostport, reply->str);
+ } else {
+ fprintf(stderr, "Cluster node %s replied with error:\n%s\n",
+ config.hostsocket, reply->str);
+ }
+ goto cleanup;
+ }
+ char *lines = reply->str, *p, *line;
+ while ((p = strstr(lines, "\n")) != NULL) {
+ *p = '\0';
+ line = lines;
+ lines = p + 1;
+ char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL;
+ int i = 0;
+ while ((p = strchr(line, ' ')) != NULL) {
+ *p = '\0';
+ char *token = line;
+ line = p + 1;
+ switch(i++){
+ case 0: name = token; break;
+ case 1: addr = token; break;
+ case 2: flags = token; break;
+ case 3: master_id = token; break;
+ }
+ if (i == 8) break; // Slots
+ }
+ if (!flags) {
+ fprintf(stderr, "Invalid CLUSTER NODES reply: missing flags.\n");
+ success = 0;
+ goto cleanup;
+ }
+ int myself = (strstr(flags, "myself") != NULL);
+ int is_replica = (strstr(flags, "slave") != NULL ||
+ (master_id != NULL && master_id[0] != '-'));
+ if (is_replica) continue;
+ if (addr == NULL) {
+ fprintf(stderr, "Invalid CLUSTER NODES reply: missing addr.\n");
+ success = 0;
+ goto cleanup;
+ }
+ clusterNode *node = NULL;
+ char *ip = NULL;
+ int port = 0;
+ char *paddr = strrchr(addr, ':');
+ if (paddr != NULL) {
+ *paddr = '\0';
+ ip = addr;
+ addr = paddr + 1;
+ /* If internal bus is specified, then just drop it. */
+ if ((paddr = strchr(addr, '@')) != NULL) *paddr = '\0';
+ port = atoi(addr);
+ }
+ if (myself) {
+ node = firstNode;
+ if (ip != NULL && strcmp(node->ip, ip) != 0) {
+ node->ip = sdsnew(ip);
+ node->port = port;
+ }
+ } else {
+ node = createClusterNode(sdsnew(ip), port);
+ }
+ if (node == NULL) {
+ success = 0;
+ goto cleanup;
+ }
+ if (name != NULL) node->name = sdsnew(name);
+ if (i == 8) {
+ int remaining = strlen(line);
+ while (remaining > 0) {
+ p = strchr(line, ' ');
+ if (p == NULL) p = line + remaining;
+ remaining -= (p - line);
+
+ char *slotsdef = line;
+ *p = '\0';
+ if (remaining) {
+ line = p + 1;
+ remaining--;
+ } else line = p;
+ char *dash = NULL;
+ if (slotsdef[0] == '[') {
+ slotsdef++;
+ if ((p = strstr(slotsdef, "->-"))) { // Migrating
+ *p = '\0';
+ p += 3;
+ char *closing_bracket = strchr(p, ']');
+ if (closing_bracket) *closing_bracket = '\0';
+ sds slot = sdsnew(slotsdef);
+ sds dst = sdsnew(p);
+ node->migrating_count += 2;
+ node->migrating =
+ zrealloc(node->migrating,
+ (node->migrating_count * sizeof(sds)));
+ node->migrating[node->migrating_count - 2] =
+ slot;
+ node->migrating[node->migrating_count - 1] =
+ dst;
+ } else if ((p = strstr(slotsdef, "-<-"))) {//Importing
+ *p = '\0';
+ p += 3;
+ char *closing_bracket = strchr(p, ']');
+ if (closing_bracket) *closing_bracket = '\0';
+ sds slot = sdsnew(slotsdef);
+ sds src = sdsnew(p);
+ node->importing_count += 2;
+ node->importing = zrealloc(node->importing,
+ (node->importing_count * sizeof(sds)));
+ node->importing[node->importing_count - 2] =
+ slot;
+ node->importing[node->importing_count - 1] =
+ src;
+ }
+ } else if ((dash = strchr(slotsdef, '-')) != NULL) {
+ p = dash;
+ int start, stop;
+ *p = '\0';
+ start = atoi(slotsdef);
+ stop = atoi(p + 1);
+ while (start <= stop) {
+ int slot = start++;
+ node->slots[node->slots_count++] = slot;
+ }
+ } else if (p > slotsdef) {
+ int slot = atoi(slotsdef);
+ node->slots[node->slots_count++] = slot;
+ }
+ }
+ }
+ if (node->slots_count == 0) {
+ fprintf(stderr,
+ "WARNING: Master node %s:%d has no slots, skipping...\n",
+ node->ip, node->port);
+ continue;
+ }
+ if (!addClusterNode(node)) {
+ success = 0;
+ goto cleanup;
+ }
+ }
+cleanup:
+ if (ctx) redisFree(ctx);
+ if (!success) {
+ if (config.cluster_nodes) freeClusterNodes();
+ }
+ if (reply) freeReplyObject(reply);
+ return success;
+}
+
+/* Request the current cluster slots configuration by calling CLUSTER SLOTS
+ * and atomically update the slots after a successful reply. */
+static int fetchClusterSlotsConfiguration(client c) {
+ UNUSED(c);
+ int success = 1, is_fetching_slots = 0, last_update = 0;
+ size_t i;
+ atomicGet(config.slots_last_update, last_update);
+ if (c->slots_last_update < last_update) {
+ c->slots_last_update = last_update;
+ return -1;
+ }
+ redisReply *reply = NULL;
+ atomicGetIncr(config.is_fetching_slots, is_fetching_slots, 1);
+ if (is_fetching_slots) return -1; //TODO: use other codes || errno ?
+ atomicSet(config.is_fetching_slots, 1);
+ fprintf(stderr,
+ "WARNING: Cluster slots configuration changed, fetching new one...\n");
+ const char *errmsg = "Failed to update cluster slots configuration";
+ static dictType dtype = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+ };
+ /* printf("[%d] fetchClusterSlotsConfiguration\n", c->thread_id); */
+ dict *masters = dictCreate(&dtype);
+ redisContext *ctx = NULL;
+ for (i = 0; i < (size_t) config.cluster_node_count; i++) {
+ clusterNode *node = config.cluster_nodes[i];
+ assert(node->ip != NULL);
+ assert(node->name != NULL);
+ assert(node->port);
+ /* Use first node as entry point to connect to. */
+ if (ctx == NULL) {
+ ctx = getRedisContext(node->ip, node->port, NULL);
+ if (!ctx) {
+ success = 0;
+ goto cleanup;
+ }
+ }
+ if (node->updated_slots != NULL)
+ zfree(node->updated_slots);
+ node->updated_slots = NULL;
+ node->updated_slots_count = 0;
+ dictReplace(masters, node->name, node) ;
+ }
+ reply = redisCommand(ctx, "CLUSTER SLOTS");
+ if (reply == NULL || reply->type == REDIS_REPLY_ERROR) {
+ success = 0;
+ if (reply)
+ fprintf(stderr,"%s\nCLUSTER SLOTS ERROR: %s\n",errmsg,reply->str);
+ goto cleanup;
+ }
+ assert(reply->type == REDIS_REPLY_ARRAY);
+ for (i = 0; i < reply->elements; i++) {
+ redisReply *r = reply->element[i];
+ assert(r->type == REDIS_REPLY_ARRAY);
+ assert(r->elements >= 3);
+ int from, to, slot;
+ from = r->element[0]->integer;
+ to = r->element[1]->integer;
+ redisReply *nr = r->element[2];
+ assert(nr->type == REDIS_REPLY_ARRAY && nr->elements >= 3);
+ assert(nr->element[2]->str != NULL);
+ sds name = sdsnew(nr->element[2]->str);
+ dictEntry *entry = dictFind(masters, name);
+ if (entry == NULL) {
+ success = 0;
+ fprintf(stderr, "%s: could not find node with ID %s in current "
+ "configuration.\n", errmsg, name);
+ if (name) sdsfree(name);
+ goto cleanup;
+ }
+ sdsfree(name);
+ clusterNode *node = dictGetVal(entry);
+ if (node->updated_slots == NULL)
+ node->updated_slots = zcalloc(CLUSTER_SLOTS * sizeof(int));
+ for (slot = from; slot <= to; slot++)
+ node->updated_slots[node->updated_slots_count++] = slot;
+ }
+ updateClusterSlotsConfiguration();
+cleanup:
+ freeReplyObject(reply);
+ redisFree(ctx);
+ dictRelease(masters);
+ atomicSet(config.is_fetching_slots, 0);
+ return success;
+}
+
+/* Atomically update the new slots configuration. */
+static void updateClusterSlotsConfiguration(void) {
+ pthread_mutex_lock(&config.is_updating_slots_mutex);
+ atomicSet(config.is_updating_slots, 1);
+ int i;
+ for (i = 0; i < config.cluster_node_count; i++) {
+ clusterNode *node = config.cluster_nodes[i];
+ if (node->updated_slots != NULL) {
+ int *oldslots = node->slots;
+ node->slots = node->updated_slots;
+ node->slots_count = node->updated_slots_count;
+ node->current_slot_index = 0;
+ node->updated_slots = NULL;
+ node->updated_slots_count = 0;
+ zfree(oldslots);
+ }
+ }
+ atomicSet(config.is_updating_slots, 0);
+ atomicIncr(config.slots_last_update, 1);
+ pthread_mutex_unlock(&config.is_updating_slots_mutex);
+}
+
+/* Generate random data for redis benchmark. See #7196. */
+static void genBenchmarkRandomData(char *data, int count) {
+ static uint32_t state = 1234;
+ int i = 0;
+
+ while (count--) {
+ state = (state*1103515245+12345);
+ data[i++] = '0'+((state>>16)&63);
+ }
+}
+
+/* Returns number of consumed options. */
+int parseOptions(int argc, char **argv) {
+ int i;
+ int lastarg;
+ int exit_status = 1;
+ char *tls_usage;
+
+ for (i = 1; i < argc; i++) {
+ lastarg = (i == (argc-1));
+
+ if (!strcmp(argv[i],"-c")) {
+ if (lastarg) goto invalid;
+ config.numclients = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) {
+ sds version = benchmarkVersion();
+ printf("redis-benchmark %s\n", version);
+ sdsfree(version);
+ exit(0);
+ } else if (!strcmp(argv[i],"-n")) {
+ if (lastarg) goto invalid;
+ config.requests = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"-k")) {
+ if (lastarg) goto invalid;
+ config.keepalive = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"-h")) {
+ if (lastarg) goto invalid;
+ sdsfree(config.conn_info.hostip);
+ config.conn_info.hostip = sdsnew(argv[++i]);
+ } else if (!strcmp(argv[i],"-p")) {
+ if (lastarg) goto invalid;
+ config.conn_info.hostport = atoi(argv[++i]);
+ if (config.conn_info.hostport < 0 || config.conn_info.hostport > 65535) {
+ fprintf(stderr, "Invalid server port.\n");
+ exit(1);
+ }
+ } else if (!strcmp(argv[i],"-s")) {
+ if (lastarg) goto invalid;
+ config.hostsocket = strdup(argv[++i]);
+ } else if (!strcmp(argv[i],"-x")) {
+ config.stdinarg = 1;
+ } else if (!strcmp(argv[i],"-a") ) {
+ if (lastarg) goto invalid;
+ config.conn_info.auth = sdsnew(argv[++i]);
+ } else if (!strcmp(argv[i],"--user")) {
+ if (lastarg) goto invalid;
+ config.conn_info.user = sdsnew(argv[++i]);
+ } else if (!strcmp(argv[i],"-u") && !lastarg) {
+ parseRedisUri(argv[++i],"redis-benchmark",&config.conn_info,&config.tls);
+ if (config.conn_info.hostport < 0 || config.conn_info.hostport > 65535) {
+ fprintf(stderr, "Invalid server port.\n");
+ exit(1);
+ }
+ config.input_dbnumstr = sdsfromlonglong(config.conn_info.input_dbnum);
+ } else if (!strcmp(argv[i],"-3")) {
+ config.resp3 = 1;
+ } else if (!strcmp(argv[i],"-d")) {
+ if (lastarg) goto invalid;
+ config.datasize = atoi(argv[++i]);
+ if (config.datasize < 1) config.datasize=1;
+ if (config.datasize > 1024*1024*1024) config.datasize = 1024*1024*1024;
+ } else if (!strcmp(argv[i],"-P")) {
+ if (lastarg) goto invalid;
+ config.pipeline = atoi(argv[++i]);
+ if (config.pipeline <= 0) config.pipeline=1;
+ } else if (!strcmp(argv[i],"-r")) {
+ if (lastarg) goto invalid;
+ const char *next = argv[++i], *p = next;
+ if (*p == '-') {
+ p++;
+ if (*p < '0' || *p > '9') goto invalid;
+ }
+ config.randomkeys = 1;
+ config.randomkeys_keyspacelen = atoi(next);
+ if (config.randomkeys_keyspacelen < 0)
+ config.randomkeys_keyspacelen = 0;
+ } else if (!strcmp(argv[i],"-q")) {
+ config.quiet = 1;
+ } else if (!strcmp(argv[i],"--csv")) {
+ config.csv = 1;
+ } else if (!strcmp(argv[i],"-l")) {
+ config.loop = 1;
+ } else if (!strcmp(argv[i],"-I")) {
+ config.idlemode = 1;
+ } else if (!strcmp(argv[i],"-e")) {
+ fprintf(stderr,
+ "WARNING: -e option has no effect. "
+ "We now immediately exit on error to avoid false results.\n");
+ } else if (!strcmp(argv[i],"--seed")) {
+ if (lastarg) goto invalid;
+ int rand_seed = atoi(argv[++i]);
+ srandom(rand_seed);
+ init_genrand64(rand_seed);
+ } else if (!strcmp(argv[i],"-t")) {
+ if (lastarg) goto invalid;
+ /* We get the list of tests to run as a string in the form
+ * get,set,lrange,...,test_N. Then we add a comma before and
+ * after the string in order to make sure that searching
+ * for ",testname," will always get a match if the test is
+ * enabled. */
+ config.tests = sdsnew(",");
+ config.tests = sdscat(config.tests,(char*)argv[++i]);
+ config.tests = sdscat(config.tests,",");
+ sdstolower(config.tests);
+ } else if (!strcmp(argv[i],"--dbnum")) {
+ if (lastarg) goto invalid;
+ config.conn_info.input_dbnum = atoi(argv[++i]);
+ config.input_dbnumstr = sdsfromlonglong(config.conn_info.input_dbnum);
+ } else if (!strcmp(argv[i],"--precision")) {
+ if (lastarg) goto invalid;
+ config.precision = atoi(argv[++i]);
+ if (config.precision < 0) config.precision = DEFAULT_LATENCY_PRECISION;
+ if (config.precision > MAX_LATENCY_PRECISION) config.precision = MAX_LATENCY_PRECISION;
+ } else if (!strcmp(argv[i],"--threads")) {
+ if (lastarg) goto invalid;
+ config.num_threads = atoi(argv[++i]);
+ if (config.num_threads > MAX_THREADS) {
+ fprintf(stderr,
+ "WARNING: Too many threads, limiting threads to %d.\n",
+ MAX_THREADS);
+ config.num_threads = MAX_THREADS;
+ } else if (config.num_threads < 0) config.num_threads = 0;
+ } else if (!strcmp(argv[i],"--cluster")) {
+ config.cluster_mode = 1;
+ } else if (!strcmp(argv[i],"--enable-tracking")) {
+ config.enable_tracking = 1;
+ } else if (!strcmp(argv[i],"--help")) {
+ exit_status = 0;
+ goto usage;
+ #ifdef USE_OPENSSL
+ } else if (!strcmp(argv[i],"--tls")) {
+ config.tls = 1;
+ } else if (!strcmp(argv[i],"--sni")) {
+ if (lastarg) goto invalid;
+ config.sslconfig.sni = strdup(argv[++i]);
+ } else if (!strcmp(argv[i],"--cacertdir")) {
+ if (lastarg) goto invalid;
+ config.sslconfig.cacertdir = strdup(argv[++i]);
+ } else if (!strcmp(argv[i],"--cacert")) {
+ if (lastarg) goto invalid;
+ config.sslconfig.cacert = strdup(argv[++i]);
+ } else if (!strcmp(argv[i],"--insecure")) {
+ config.sslconfig.skip_cert_verify = 1;
+ } else if (!strcmp(argv[i],"--cert")) {
+ if (lastarg) goto invalid;
+ config.sslconfig.cert = strdup(argv[++i]);
+ } else if (!strcmp(argv[i],"--key")) {
+ if (lastarg) goto invalid;
+ config.sslconfig.key = strdup(argv[++i]);
+ } else if (!strcmp(argv[i],"--tls-ciphers")) {
+ if (lastarg) goto invalid;
+ config.sslconfig.ciphers = strdup(argv[++i]);
+ #ifdef TLS1_3_VERSION
+ } else if (!strcmp(argv[i],"--tls-ciphersuites")) {
+ if (lastarg) goto invalid;
+ config.sslconfig.ciphersuites = strdup(argv[++i]);
+ #endif
+ #endif
+ } else {
+ /* Assume the user meant to provide an option when the arg starts
+ * with a dash. We're done otherwise and should use the remainder
+ * as the command and arguments for running the benchmark. */
+ if (argv[i][0] == '-') goto invalid;
+ return i;
+ }
+ }
+
+ return i;
+
+invalid:
+ printf("Invalid option \"%s\" or option argument missing\n\n",argv[i]);
+
+usage:
+ tls_usage =
+#ifdef USE_OPENSSL
+" --tls Establish a secure TLS connection.\n"
+" --sni <host> Server name indication for TLS.\n"
+" --cacert <file> CA Certificate file to verify with.\n"
+" --cacertdir <dir> Directory where trusted CA certificates are stored.\n"
+" If neither cacert nor cacertdir are specified, the default\n"
+" system-wide trusted root certs configuration will apply.\n"
+" --insecure Allow insecure TLS connection by skipping cert validation.\n"
+" --cert <file> Client certificate to authenticate with.\n"
+" --key <file> Private key file to authenticate with.\n"
+" --tls-ciphers <list> Sets the list of preferred ciphers (TLSv1.2 and below)\n"
+" in order of preference from highest to lowest separated by colon (\":\").\n"
+" See the ciphers(1ssl) manpage for more information about the syntax of this string.\n"
+#ifdef TLS1_3_VERSION
+" --tls-ciphersuites <list> Sets the list of preferred ciphersuites (TLSv1.3)\n"
+" in order of preference from highest to lowest separated by colon (\":\").\n"
+" See the ciphers(1ssl) manpage for more information about the syntax of this string,\n"
+" and specifically for TLSv1.3 ciphersuites.\n"
+#endif
+#endif
+"";
+
+ printf(
+"%s%s%s", /* Split to avoid strings longer than 4095 (-Woverlength-strings). */
+"Usage: redis-benchmark [OPTIONS] [COMMAND ARGS...]\n\n"
+"Options:\n"
+" -h <hostname> Server hostname (default 127.0.0.1)\n"
+" -p <port> Server port (default 6379)\n"
+" -s <socket> Server socket (overrides host and port)\n"
+" -a <password> Password for Redis Auth\n"
+" --user <username> Used to send ACL style 'AUTH username pass'. Needs -a.\n"
+" -u <uri> Server URI.\n"
+" -c <clients> Number of parallel connections (default 50).\n"
+" Note: If --cluster is used then number of clients has to be\n"
+" the same or higher than the number of nodes.\n"
+" -n <requests> Total number of requests (default 100000)\n"
+" -d <size> Data size of SET/GET value in bytes (default 3)\n"
+" --dbnum <db> SELECT the specified db number (default 0)\n"
+" -3 Start session in RESP3 protocol mode.\n"
+" --threads <num> Enable multi-thread mode.\n"
+" --cluster Enable cluster mode.\n"
+" If the command is supplied on the command line in cluster\n"
+" mode, the key must contain \"{tag}\". Otherwise, the\n"
+" command will not be sent to the right cluster node.\n"
+" --enable-tracking Send CLIENT TRACKING on before starting benchmark.\n"
+" -k <boolean> 1=keep alive 0=reconnect (default 1)\n"
+" -r <keyspacelen> Use random keys for SET/GET/INCR, random values for SADD,\n"
+" random members and scores for ZADD.\n"
+" Using this option the benchmark will expand the string\n"
+" __rand_int__ inside an argument with a 12 digits number in\n"
+" the specified range from 0 to keyspacelen-1. The\n"
+" substitution changes every time a command is executed.\n"
+" Default tests use this to hit random keys in the specified\n"
+" range.\n"
+" Note: If -r is omitted, all commands in a benchmark will\n"
+" use the same key.\n"
+" -P <numreq> Pipeline <numreq> requests. Default 1 (no pipeline).\n"
+" -q Quiet. Just show query/sec values\n"
+" --precision Number of decimal places to display in latency output (default 0)\n"
+" --csv Output in CSV format\n"
+" -l Loop. Run the tests forever\n"
+" -t <tests> Only run the comma separated list of tests. The test\n"
+" names are the same as the ones produced as output.\n"
+" The -t option is ignored if a specific command is supplied\n"
+" on the command line.\n"
+" -I Idle mode. Just open N idle connections and wait.\n"
+" -x Read last argument from STDIN.\n"
+" --seed <num> Set the seed for random number generator. Default seed is based on time.\n",
+tls_usage,
+" --help Output this help and exit.\n"
+" --version Output version and exit.\n\n"
+"Examples:\n\n"
+" Run the benchmark with the default configuration against 127.0.0.1:6379:\n"
+" $ redis-benchmark\n\n"
+" Use 20 parallel clients, for a total of 100k requests, against 192.168.1.1:\n"
+" $ redis-benchmark -h 192.168.1.1 -p 6379 -n 100000 -c 20\n\n"
+" Fill 127.0.0.1:6379 with about 1 million keys only using the SET test:\n"
+" $ redis-benchmark -t set -n 1000000 -r 100000000\n\n"
+" Benchmark 127.0.0.1:6379 for a few commands producing CSV output:\n"
+" $ redis-benchmark -t ping,set,get -n 100000 --csv\n\n"
+" Benchmark a specific command line:\n"
+" $ redis-benchmark -r 10000 -n 10000 eval 'return redis.call(\"ping\")' 0\n\n"
+" Fill a list with 10000 random elements:\n"
+" $ redis-benchmark -r 10000 -n 10000 lpush mylist __rand_int__\n\n"
+" On user specified command lines __rand_int__ is replaced with a random integer\n"
+" with a range of values selected by the -r option.\n"
+ );
+ exit(exit_status);
+}
+
+int showThroughput(struct aeEventLoop *eventLoop, long long id, void *clientData) {
+ UNUSED(eventLoop);
+ UNUSED(id);
+ benchmarkThread *thread = (benchmarkThread *)clientData;
+ int liveclients = 0;
+ int requests_finished = 0;
+ int previous_requests_finished = 0;
+ long long current_tick = mstime();
+ atomicGet(config.liveclients, liveclients);
+ atomicGet(config.requests_finished, requests_finished);
+ atomicGet(config.previous_requests_finished, previous_requests_finished);
+
+ if (liveclients == 0 && requests_finished != config.requests) {
+ fprintf(stderr,"All clients disconnected... aborting.\n");
+ exit(1);
+ }
+ if (config.num_threads && requests_finished >= config.requests) {
+ aeStop(eventLoop);
+ return AE_NOMORE;
+ }
+ if (config.csv) return SHOW_THROUGHPUT_INTERVAL;
+ /* only first thread output throughput */
+ if (thread != NULL && thread->index != 0) {
+ return SHOW_THROUGHPUT_INTERVAL;
+ }
+ if (config.idlemode == 1) {
+ printf("clients: %d\r", config.liveclients);
+ fflush(stdout);
+ return SHOW_THROUGHPUT_INTERVAL;
+ }
+ const float dt = (float)(current_tick-config.start)/1000.0;
+ const float rps = (float)requests_finished/dt;
+ const float instantaneous_dt = (float)(current_tick-config.previous_tick)/1000.0;
+ const float instantaneous_rps = (float)(requests_finished-previous_requests_finished)/instantaneous_dt;
+ config.previous_tick = current_tick;
+ atomicSet(config.previous_requests_finished,requests_finished);
+ printf("%*s\r", config.last_printed_bytes, " "); /* ensure there is a clean line */
+ int printed_bytes = printf("%s: rps=%.1f (overall: %.1f) avg_msec=%.3f (overall: %.3f)\r", config.title, instantaneous_rps, rps, hdr_mean(config.current_sec_latency_histogram)/1000.0f, hdr_mean(config.latency_histogram)/1000.0f);
+ config.last_printed_bytes = printed_bytes;
+ hdr_reset(config.current_sec_latency_histogram);
+ fflush(stdout);
+ return SHOW_THROUGHPUT_INTERVAL;
+}
+
+/* Return true if the named test was selected using the -t command line
+ * switch, or if all the tests are selected (no -t passed by user). */
+int test_is_selected(const char *name) {
+ char buf[256];
+ int l = strlen(name);
+
+ if (config.tests == NULL) return 1;
+ buf[0] = ',';
+ memcpy(buf+1,name,l);
+ buf[l+1] = ',';
+ buf[l+2] = '\0';
+ return strstr(config.tests,buf) != NULL;
+}
+
+int main(int argc, char **argv) {
+ int i;
+ char *data, *cmd, *tag;
+ int len;
+
+ client c;
+
+ srandom(time(NULL) ^ getpid());
+ init_genrand64(ustime() ^ getpid());
+ signal(SIGHUP, SIG_IGN);
+ signal(SIGPIPE, SIG_IGN);
+
+ memset(&config.sslconfig, 0, sizeof(config.sslconfig));
+ config.numclients = 50;
+ config.requests = 100000;
+ config.liveclients = 0;
+ config.el = aeCreateEventLoop(1024*10);
+ aeCreateTimeEvent(config.el,1,showThroughput,NULL,NULL);
+ config.keepalive = 1;
+ config.datasize = 3;
+ config.pipeline = 1;
+ config.randomkeys = 0;
+ config.randomkeys_keyspacelen = 0;
+ config.quiet = 0;
+ config.csv = 0;
+ config.loop = 0;
+ config.idlemode = 0;
+ config.clients = listCreate();
+ config.conn_info.hostip = sdsnew("127.0.0.1");
+ config.conn_info.hostport = 6379;
+ config.hostsocket = NULL;
+ config.tests = NULL;
+ config.conn_info.input_dbnum = 0;
+ config.stdinarg = 0;
+ config.conn_info.auth = NULL;
+ config.precision = DEFAULT_LATENCY_PRECISION;
+ config.num_threads = 0;
+ config.threads = NULL;
+ config.cluster_mode = 0;
+ config.cluster_node_count = 0;
+ config.cluster_nodes = NULL;
+ config.redis_config = NULL;
+ config.is_fetching_slots = 0;
+ config.is_updating_slots = 0;
+ config.slots_last_update = 0;
+ config.enable_tracking = 0;
+ config.resp3 = 0;
+
+ i = parseOptions(argc,argv);
+ argc -= i;
+ argv += i;
+
+ tag = "";
+
+#ifdef USE_OPENSSL
+ if (config.tls) {
+ cliSecureInit();
+ }
+#endif
+
+ if (config.cluster_mode) {
+ // We only include the slot placeholder {tag} if cluster mode is enabled
+ tag = ":{tag}";
+
+ /* Fetch cluster configuration. */
+ if (!fetchClusterConfiguration() || !config.cluster_nodes) {
+ if (!config.hostsocket) {
+ fprintf(stderr, "Failed to fetch cluster configuration from "
+ "%s:%d\n", config.conn_info.hostip, config.conn_info.hostport);
+ } else {
+ fprintf(stderr, "Failed to fetch cluster configuration from "
+ "%s\n", config.hostsocket);
+ }
+ exit(1);
+ }
+ if (config.cluster_node_count <= 1) {
+ fprintf(stderr, "Invalid cluster: %d node(s).\n",
+ config.cluster_node_count);
+ exit(1);
+ }
+ printf("Cluster has %d master nodes:\n\n", config.cluster_node_count);
+ int i = 0;
+ for (; i < config.cluster_node_count; i++) {
+ clusterNode *node = config.cluster_nodes[i];
+ if (!node) {
+ fprintf(stderr, "Invalid cluster node #%d\n", i);
+ exit(1);
+ }
+ printf("Master %d: ", i);
+ if (node->name) printf("%s ", node->name);
+ printf("%s:%d\n", node->ip, node->port);
+ node->redis_config = getRedisConfig(node->ip, node->port, NULL);
+ if (node->redis_config == NULL) {
+ fprintf(stderr, "WARNING: Could not fetch node CONFIG %s:%d\n",
+ node->ip, node->port);
+ }
+ }
+ printf("\n");
+ /* Automatically set thread number to node count if not specified
+ * by the user. */
+ if (config.num_threads == 0)
+ config.num_threads = config.cluster_node_count;
+ } else {
+ config.redis_config =
+ getRedisConfig(config.conn_info.hostip, config.conn_info.hostport, config.hostsocket);
+ if (config.redis_config == NULL) {
+ fprintf(stderr, "WARNING: Could not fetch server CONFIG\n");
+ }
+ }
+ if (config.num_threads > 0) {
+ pthread_mutex_init(&(config.liveclients_mutex), NULL);
+ pthread_mutex_init(&(config.is_updating_slots_mutex), NULL);
+ }
+
+ if (config.keepalive == 0) {
+ fprintf(stderr,
+ "WARNING: Keepalive disabled. You probably need "
+ "'echo 1 > /proc/sys/net/ipv4/tcp_tw_reuse' for Linux and "
+ "'sudo sysctl -w net.inet.tcp.msl=1000' for Mac OS X in order "
+ "to use a lot of clients/requests\n");
+ }
+ if (argc > 0 && config.tests != NULL) {
+ fprintf(stderr, "WARNING: Option -t is ignored.\n");
+ }
+
+ if (config.idlemode) {
+ printf("Creating %d idle connections and waiting forever (Ctrl+C when done)\n", config.numclients);
+ int thread_id = -1, use_threads = (config.num_threads > 0);
+ if (use_threads) {
+ thread_id = 0;
+ initBenchmarkThreads();
+ }
+ c = createClient("",0,NULL,thread_id); /* will never receive a reply */
+ createMissingClients(c);
+ if (use_threads) startBenchmarkThreads();
+ else aeMain(config.el);
+ /* and will wait for every */
+ }
+ if(config.csv){
+ printf("\"test\",\"rps\",\"avg_latency_ms\",\"min_latency_ms\",\"p50_latency_ms\",\"p95_latency_ms\",\"p99_latency_ms\",\"max_latency_ms\"\n");
+ }
+ /* Run benchmark with command in the remainder of the arguments. */
+ if (argc) {
+ sds title = sdsnew(argv[0]);
+ for (i = 1; i < argc; i++) {
+ title = sdscatlen(title, " ", 1);
+ title = sdscatlen(title, (char*)argv[i], strlen(argv[i]));
+ }
+ sds *sds_args = getSdsArrayFromArgv(argc, argv, 0);
+ if (!sds_args) {
+ fprintf(stderr, "Invalid quoted string\n");
+ return 1;
+ }
+ if (config.stdinarg) {
+ sds_args = sds_realloc(sds_args,(argc + 1) * sizeof(sds));
+ sds_args[argc] = readArgFromStdin();
+ argc++;
+ }
+ do {
+ len = redisFormatCommandArgv(&cmd,argc,(const char**)sds_args,NULL);
+ // adjust the datasize to the parsed command
+ config.datasize = len;
+ benchmark(title,cmd,len);
+ free(cmd);
+ } while(config.loop);
+ sdsfreesplitres(sds_args, argc);
+
+ sdsfree(title);
+ if (config.redis_config != NULL) freeRedisConfig(config.redis_config);
+ return 0;
+ }
+
+ /* Run default benchmark suite. */
+ data = zmalloc(config.datasize+1);
+ do {
+ genBenchmarkRandomData(data, config.datasize);
+ data[config.datasize] = '\0';
+
+ if (test_is_selected("ping_inline") || test_is_selected("ping"))
+ benchmark("PING_INLINE","PING\r\n",6);
+
+ if (test_is_selected("ping_mbulk") || test_is_selected("ping")) {
+ len = redisFormatCommand(&cmd,"PING");
+ benchmark("PING_MBULK",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("set")) {
+ len = redisFormatCommand(&cmd,"SET key%s:__rand_int__ %s",tag,data);
+ benchmark("SET",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("get")) {
+ len = redisFormatCommand(&cmd,"GET key%s:__rand_int__",tag);
+ benchmark("GET",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("incr")) {
+ len = redisFormatCommand(&cmd,"INCR counter%s:__rand_int__",tag);
+ benchmark("INCR",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("lpush")) {
+ len = redisFormatCommand(&cmd,"LPUSH mylist%s %s",tag,data);
+ benchmark("LPUSH",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("rpush")) {
+ len = redisFormatCommand(&cmd,"RPUSH mylist%s %s",tag,data);
+ benchmark("RPUSH",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("lpop")) {
+ len = redisFormatCommand(&cmd,"LPOP mylist%s",tag);
+ benchmark("LPOP",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("rpop")) {
+ len = redisFormatCommand(&cmd,"RPOP mylist%s",tag);
+ benchmark("RPOP",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("sadd")) {
+ len = redisFormatCommand(&cmd,
+ "SADD myset%s element:__rand_int__",tag);
+ benchmark("SADD",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("hset")) {
+ len = redisFormatCommand(&cmd,
+ "HSET myhash%s element:__rand_int__ %s",tag,data);
+ benchmark("HSET",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("spop")) {
+ len = redisFormatCommand(&cmd,"SPOP myset%s",tag);
+ benchmark("SPOP",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("zadd")) {
+ char *score = "0";
+ if (config.randomkeys) score = "__rand_int__";
+ len = redisFormatCommand(&cmd,
+ "ZADD myzset%s %s element:__rand_int__",tag,score);
+ benchmark("ZADD",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("zpopmin")) {
+ len = redisFormatCommand(&cmd,"ZPOPMIN myzset%s",tag);
+ benchmark("ZPOPMIN",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("lrange") ||
+ test_is_selected("lrange_100") ||
+ test_is_selected("lrange_300") ||
+ test_is_selected("lrange_500") ||
+ test_is_selected("lrange_600"))
+ {
+ len = redisFormatCommand(&cmd,"LPUSH mylist%s %s",tag,data);
+ benchmark("LPUSH (needed to benchmark LRANGE)",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("lrange") || test_is_selected("lrange_100")) {
+ len = redisFormatCommand(&cmd,"LRANGE mylist%s 0 99",tag);
+ benchmark("LRANGE_100 (first 100 elements)",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("lrange") || test_is_selected("lrange_300")) {
+ len = redisFormatCommand(&cmd,"LRANGE mylist%s 0 299",tag);
+ benchmark("LRANGE_300 (first 300 elements)",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("lrange") || test_is_selected("lrange_500")) {
+ len = redisFormatCommand(&cmd,"LRANGE mylist%s 0 499",tag);
+ benchmark("LRANGE_500 (first 500 elements)",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("lrange") || test_is_selected("lrange_600")) {
+ len = redisFormatCommand(&cmd,"LRANGE mylist%s 0 599",tag);
+ benchmark("LRANGE_600 (first 600 elements)",cmd,len);
+ free(cmd);
+ }
+
+ if (test_is_selected("mset")) {
+ const char *cmd_argv[21];
+ cmd_argv[0] = "MSET";
+ sds key_placeholder = sdscatprintf(sdsnew(""),"key%s:__rand_int__",tag);
+ for (i = 1; i < 21; i += 2) {
+ cmd_argv[i] = key_placeholder;
+ cmd_argv[i+1] = data;
+ }
+ len = redisFormatCommandArgv(&cmd,21,cmd_argv,NULL);
+ benchmark("MSET (10 keys)",cmd,len);
+ free(cmd);
+ sdsfree(key_placeholder);
+ }
+
+ if (test_is_selected("xadd")) {
+ len = redisFormatCommand(&cmd,"XADD mystream%s * myfield %s", tag, data);
+ benchmark("XADD",cmd,len);
+ free(cmd);
+ }
+
+ if (!config.csv) printf("\n");
+ } while(config.loop);
+
+ zfree(data);
+ freeCliConnInfo(config.conn_info);
+ if (config.redis_config != NULL) freeRedisConfig(config.redis_config);
+
+ return 0;
+}
diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c
new file mode 100644
index 0000000..616177a
--- /dev/null
+++ b/src/redis-check-aof.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <regex.h>
+#include <libgen.h>
+
+#define AOF_CHECK_OK 0
+#define AOF_CHECK_EMPTY 1
+#define AOF_CHECK_TRUNCATED 2
+#define AOF_CHECK_TIMESTAMP_TRUNCATED 3
+
+typedef enum {
+ AOF_RESP,
+ AOF_RDB_PREAMBLE,
+ AOF_MULTI_PART,
+} input_file_type;
+
+aofManifest *aofManifestCreate(void);
+void aofManifestFree(aofManifest *am);
+aofManifest *aofLoadManifestFromFile(sds am_filepath);
+
+#define ERROR(...) { \
+ char __buf[1024]; \
+ snprintf(__buf, sizeof(__buf), __VA_ARGS__); \
+ snprintf(error, sizeof(error), "0x%16llx: %s", (long long)epos, __buf); \
+}
+
+static char error[1044];
+static off_t epos;
+static long long line = 1;
+static time_t to_timestamp = 0;
+
+int consumeNewline(char *buf) {
+ if (strncmp(buf,"\r\n",2) != 0) {
+ ERROR("Expected \\r\\n, got: %02x%02x",buf[0],buf[1]);
+ return 0;
+ }
+ line += 1;
+ return 1;
+}
+
+int readLong(FILE *fp, char prefix, long *target) {
+ char buf[128], *eptr;
+ epos = ftello(fp);
+ if (fgets(buf,sizeof(buf),fp) == NULL) {
+ return 0;
+ }
+ if (buf[0] != prefix) {
+ ERROR("Expected prefix '%c', got: '%c'",prefix,buf[0]);
+ return 0;
+ }
+ *target = strtol(buf+1,&eptr,10);
+ return consumeNewline(eptr);
+}
+
+int readBytes(FILE *fp, char *target, long length) {
+ long real;
+ epos = ftello(fp);
+ real = fread(target,1,length,fp);
+ if (real != length) {
+ ERROR("Expected to read %ld bytes, got %ld bytes",length,real);
+ return 0;
+ }
+ return 1;
+}
+
+int readString(FILE *fp, char** target) {
+ long len;
+ *target = NULL;
+ if (!readLong(fp,'$',&len)) {
+ return 0;
+ }
+
+ if (len < 0 || len > LONG_MAX - 2) {
+ ERROR("Expected to read string of %ld bytes, which is not in the suitable range",len);
+ return 0;
+ }
+
+ /* Increase length to also consume \r\n */
+ len += 2;
+ *target = (char*)zmalloc(len);
+ if (!readBytes(fp,*target,len)) {
+ zfree(*target);
+ *target = NULL;
+ return 0;
+ }
+ if (!consumeNewline(*target+len-2)) {
+ zfree(*target);
+ *target = NULL;
+ return 0;
+ }
+ (*target)[len-2] = '\0';
+ return 1;
+}
+
+int readArgc(FILE *fp, long *target) {
+ return readLong(fp,'*',target);
+}
+
+/* Used to decode a RESP record in the AOF file to obtain the original
+ * redis command, and also check whether the command is MULTI/EXEC. If the
+ * command is MULTI, the parameter out_multi will be incremented by one, and
+ * if the command is EXEC, the parameter out_multi will be decremented
+ * by one. The parameter out_multi will be used by the upper caller to determine
+ * whether the AOF file contains unclosed transactions.
+ **/
+int processRESP(FILE *fp, char *filename, int *out_multi) {
+ long argc;
+ char *str;
+
+ if (!readArgc(fp, &argc)) return 0;
+
+ for (int i = 0; i < argc; i++) {
+ if (!readString(fp, &str)) return 0;
+ if (i == 0) {
+ if (strcasecmp(str, "multi") == 0) {
+ if ((*out_multi)++) {
+ ERROR("Unexpected MULTI in AOF %s", filename);
+ zfree(str);
+ return 0;
+ }
+ } else if (strcasecmp(str, "exec") == 0) {
+ if (--(*out_multi)) {
+ ERROR("Unexpected EXEC in AOF %s", filename);
+ zfree(str);
+ return 0;
+ }
+ }
+ }
+ zfree(str);
+ }
+
+ return 1;
+}
+
+/* Used to parse an annotation in the AOF file, the annotation starts with '#'
+ * in AOF. Currently AOF only contains timestamp annotations, but this function
+ * can easily be extended to handle other annotations.
+ *
+ * The processing rule of time annotation is that once the timestamp is found to
+ * be greater than 'to_timestamp', the AOF after the annotation is truncated.
+ * Note that in Multi Part AOF, this truncation is only allowed when the last_file
+ * parameter is 1.
+ **/
+int processAnnotations(FILE *fp, char *filename, int last_file) {
+ char buf[AOF_ANNOTATION_LINE_MAX_LEN];
+
+ epos = ftello(fp);
+ if (fgets(buf, sizeof(buf), fp) == NULL) {
+ printf("Failed to read annotations from AOF %s, aborting...\n", filename);
+ exit(1);
+ }
+
+ if (to_timestamp && strncmp(buf, "#TS:", 4) == 0) {
+ char *endptr;
+ errno = 0;
+ time_t ts = strtol(buf+4, &endptr, 10);
+ if (errno != 0 || *endptr != '\r') {
+ printf("Invalid timestamp annotation\n");
+ exit(1);
+ }
+ if (ts <= to_timestamp) return 1;
+ if (epos == 0) {
+ printf("AOF %s has nothing before timestamp %ld, "
+ "aborting...\n", filename, to_timestamp);
+ exit(1);
+ }
+ if (!last_file) {
+ printf("Failed to truncate AOF %s to timestamp %ld to offset %ld because it is not the last file.\n",
+ filename, to_timestamp, (long int)epos);
+ printf("If you insist, please delete all files after this file according to the manifest "
+ "file and delete the corresponding records in manifest file manually. Then re-run redis-check-aof.\n");
+ exit(1);
+ }
+ /* Truncate remaining AOF if exceeding 'to_timestamp' */
+ if (ftruncate(fileno(fp), epos) == -1) {
+ printf("Failed to truncate AOF %s to timestamp %ld\n",
+ filename, to_timestamp);
+ exit(1);
+ } else {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* Used to check the validity of a single AOF file. The AOF file can be:
+ * 1. Old-style AOF
+ * 2. Old-style RDB-preamble AOF
+ * 3. BASE or INCR in Multi Part AOF
+ * */
+int checkSingleAof(char *aof_filename, char *aof_filepath, int last_file, int fix, int preamble) {
+ off_t pos = 0, diff;
+ int multi = 0;
+ char buf[2];
+
+ FILE *fp = fopen(aof_filepath, "r+");
+ if (fp == NULL) {
+ printf("Cannot open file %s: %s, aborting...\n", aof_filepath, strerror(errno));
+ exit(1);
+ }
+
+ struct redis_stat sb;
+ if (redis_fstat(fileno(fp),&sb) == -1) {
+ printf("Cannot stat file: %s, aborting...\n", aof_filename);
+ exit(1);
+ }
+
+ off_t size = sb.st_size;
+ if (size == 0) {
+ return AOF_CHECK_EMPTY;
+ }
+
+ if (preamble) {
+ char *argv[2] = {NULL, aof_filepath};
+ if (redis_check_rdb_main(2, argv, fp) == C_ERR) {
+ printf("RDB preamble of AOF file is not sane, aborting.\n");
+ exit(1);
+ } else {
+ printf("RDB preamble is OK, proceeding with AOF tail...\n");
+ }
+ }
+
+ while(1) {
+ if (!multi) pos = ftello(fp);
+ if (fgets(buf, sizeof(buf), fp) == NULL) {
+ if (feof(fp)) {
+ break;
+ }
+ printf("Failed to read from AOF %s, aborting...\n", aof_filename);
+ exit(1);
+ }
+
+ if (fseek(fp, -1, SEEK_CUR) == -1) {
+ printf("Failed to fseek in AOF %s: %s", aof_filename, strerror(errno));
+ exit(1);
+ }
+
+ if (buf[0] == '#') {
+ if (!processAnnotations(fp, aof_filepath, last_file)) {
+ fclose(fp);
+ return AOF_CHECK_TIMESTAMP_TRUNCATED;
+ }
+ } else if (buf[0] == '*'){
+ if (!processRESP(fp, aof_filepath, &multi)) break;
+ } else {
+ printf("AOF %s format error\n", aof_filename);
+ break;
+ }
+ }
+
+ if (feof(fp) && multi && strlen(error) == 0) {
+ ERROR("Reached EOF before reading EXEC for MULTI");
+ }
+
+ if (strlen(error) > 0) {
+ printf("%s\n", error);
+ }
+
+ diff = size-pos;
+
+ /* In truncate-to-timestamp mode, just exit if there is nothing to truncate. */
+ if (diff == 0 && to_timestamp) {
+ printf("Truncate nothing in AOF %s to timestamp %ld\n", aof_filename, to_timestamp);
+ fclose(fp);
+ return AOF_CHECK_OK;
+ }
+
+ printf("AOF analyzed: filename=%s, size=%lld, ok_up_to=%lld, ok_up_to_line=%lld, diff=%lld\n",
+ aof_filename, (long long) size, (long long) pos, line, (long long) diff);
+ if (diff > 0) {
+ if (fix) {
+ if (!last_file) {
+ printf("Failed to truncate AOF %s because it is not the last file\n", aof_filename);
+ exit(1);
+ }
+
+ char buf[2];
+ printf("This will shrink the AOF %s from %lld bytes, with %lld bytes, to %lld bytes\n",
+ aof_filename, (long long)size, (long long)diff, (long long)pos);
+ printf("Continue? [y/N]: ");
+ if (fgets(buf, sizeof(buf), stdin) == NULL || strncasecmp(buf, "y", 1) != 0) {
+ printf("Aborting...\n");
+ exit(1);
+ }
+ if (ftruncate(fileno(fp), pos) == -1) {
+ printf("Failed to truncate AOF %s\n", aof_filename);
+ exit(1);
+ } else {
+ fclose(fp);
+ return AOF_CHECK_TRUNCATED;
+ }
+ } else {
+ printf("AOF %s is not valid. Use the --fix option to try fixing it.\n", aof_filename);
+ exit(1);
+ }
+ }
+ fclose(fp);
+ return AOF_CHECK_OK;
+}
+
+/* Used to determine whether the file is a RDB file. These two possibilities:
+ * 1. The file is an old style RDB-preamble AOF
+ * 2. The file is a BASE AOF in Multi Part AOF
+ * */
+int fileIsRDB(char *filepath) {
+ FILE *fp = fopen(filepath, "r");
+ if (fp == NULL) {
+ printf("Cannot open file %s: %s\n", filepath, strerror(errno));
+ exit(1);
+ }
+
+ struct redis_stat sb;
+ if (redis_fstat(fileno(fp), &sb) == -1) {
+ printf("Cannot stat file: %s\n", filepath);
+ exit(1);
+ }
+
+ off_t size = sb.st_size;
+ if (size == 0) {
+ fclose(fp);
+ return 0;
+ }
+
+ if (size >= 8) { /* There must be at least room for the RDB header. */
+ char sig[5];
+ int rdb_file = fread(sig, sizeof(sig), 1, fp) == 1 &&
+ memcmp(sig, "REDIS", sizeof(sig)) == 0;
+ if (rdb_file) {
+ fclose(fp);
+ return 1;
+ }
+ }
+
+ fclose(fp);
+ return 0;
+}
+
+/* Used to determine whether the file is a manifest file. */
+#define MANIFEST_MAX_LINE 1024
+int fileIsManifest(char *filepath) {
+ int is_manifest = 0;
+ FILE *fp = fopen(filepath, "r");
+ if (fp == NULL) {
+ printf("Cannot open file %s: %s\n", filepath, strerror(errno));
+ exit(1);
+ }
+
+ struct redis_stat sb;
+ if (redis_fstat(fileno(fp), &sb) == -1) {
+ printf("Cannot stat file: %s\n", filepath);
+ exit(1);
+ }
+
+ off_t size = sb.st_size;
+ if (size == 0) {
+ fclose(fp);
+ return 0;
+ }
+
+ char buf[MANIFEST_MAX_LINE+1];
+ while (1) {
+ if (fgets(buf, MANIFEST_MAX_LINE+1, fp) == NULL) {
+ if (feof(fp)) {
+ break;
+ } else {
+ printf("Cannot read file: %s\n", filepath);
+ exit(1);
+ }
+ }
+
+ /* Skip comments lines */
+ if (buf[0] == '#') {
+ continue;
+ } else if (!memcmp(buf, "file", strlen("file"))) {
+ is_manifest = 1;
+ }
+ }
+
+ fclose(fp);
+ return is_manifest;
+}
+
+/* Get the format of the file to be checked. It can be:
+ * AOF_RESP: Old-style AOF
+ * AOF_RDB_PREAMBLE: Old-style RDB-preamble AOF
+ * AOF_MULTI_PART: manifest in Multi Part AOF
+ *
+ * redis-check-aof tool will automatically perform different
+ * verification logic according to different file formats.
+ * */
+input_file_type getInputFileType(char *filepath) {
+ if (fileIsManifest(filepath)) {
+ return AOF_MULTI_PART;
+ } else if (fileIsRDB(filepath)) {
+ return AOF_RDB_PREAMBLE;
+ } else {
+ return AOF_RESP;
+ }
+}
+
+void printAofStyle(int ret, char *aofFileName, char *aofType) {
+ if (ret == AOF_CHECK_OK) {
+ printf("%s %s is valid\n", aofType, aofFileName);
+ } else if (ret == AOF_CHECK_EMPTY) {
+ printf("%s %s is empty\n", aofType, aofFileName);
+ } else if (ret == AOF_CHECK_TIMESTAMP_TRUNCATED) {
+ printf("Successfully truncated AOF %s to timestamp %ld\n",
+ aofFileName, to_timestamp);
+ } else if (ret == AOF_CHECK_TRUNCATED) {
+ printf("Successfully truncated AOF %s\n", aofFileName);
+ }
+}
+
+/* Check if Multi Part AOF is valid. It will check the BASE file and INCR files
+ * at once according to the manifest instructions (this is somewhat similar to
+ * redis' AOF loading).
+ *
+ * When the verification is successful, we can guarantee:
+ * 1. The manifest file format is valid
+ * 2. Both BASE AOF and INCR AOFs format are valid
+ * 3. No BASE or INCR AOFs files are missing
+ *
+ * Note that in Multi Part AOF, we only allow truncation for the last AOF file.
+ * */
+void checkMultiPartAof(char *dirpath, char *manifest_filepath, int fix) {
+ int total_num = 0, aof_num = 0, last_file;
+ int ret;
+
+ printf("Start checking Multi Part AOF\n");
+ aofManifest *am = aofLoadManifestFromFile(manifest_filepath);
+
+ if (am->base_aof_info) total_num++;
+ if (am->incr_aof_list) total_num += listLength(am->incr_aof_list);
+
+ if (am->base_aof_info) {
+ sds aof_filename = am->base_aof_info->file_name;
+ sds aof_filepath = makePath(dirpath, aof_filename);
+ last_file = ++aof_num == total_num;
+ int aof_preable = fileIsRDB(aof_filepath);
+
+ printf("Start to check BASE AOF (%s format).\n", aof_preable ? "RDB":"RESP");
+ ret = checkSingleAof(aof_filename, aof_filepath, last_file, fix, aof_preable);
+ printAofStyle(ret, aof_filename, (char *)"BASE AOF");
+ sdsfree(aof_filepath);
+ }
+
+ if (listLength(am->incr_aof_list)) {
+ listNode *ln;
+ listIter li;
+
+ printf("Start to check INCR files.\n");
+ listRewind(am->incr_aof_list, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ aofInfo *ai = (aofInfo*)ln->value;
+ sds aof_filename = (char*)ai->file_name;
+ sds aof_filepath = makePath(dirpath, aof_filename);
+ last_file = ++aof_num == total_num;
+ ret = checkSingleAof(aof_filename, aof_filepath, last_file, fix, 0);
+ printAofStyle(ret, aof_filename, (char *)"INCR AOF");
+ sdsfree(aof_filepath);
+ }
+ }
+
+ aofManifestFree(am);
+ printf("All AOF files and manifest are valid\n");
+}
+
+/* Check if old style AOF is valid. Internally, it will identify whether
+ * the AOF is in RDB-preamble format, and will eventually call `checkSingleAof`
+ * to do the check. */
+void checkOldStyleAof(char *filepath, int fix, int preamble) {
+ printf("Start checking Old-Style AOF\n");
+ int ret = checkSingleAof(filepath, filepath, 1, fix, preamble);
+ printAofStyle(ret, filepath, (char *)"AOF");
+}
+
+int redis_check_aof_main(int argc, char **argv) {
+ char *filepath;
+ char temp_filepath[PATH_MAX + 1];
+ char *dirpath;
+ int fix = 0;
+
+ if (argc < 2) {
+ goto invalid_args;
+ } else if (argc == 2) {
+ filepath = argv[1];
+ } else if (argc == 3) {
+ if (!strcmp(argv[1], "--fix")) {
+ filepath = argv[2];
+ fix = 1;
+ } else {
+ goto invalid_args;
+ }
+ } else if (argc == 4) {
+ if (!strcmp(argv[1], "--truncate-to-timestamp")) {
+ char *endptr;
+ errno = 0;
+ to_timestamp = strtol(argv[2], &endptr, 10);
+ if (errno != 0 || *endptr != '\0') {
+ printf("Invalid timestamp, aborting...\n");
+ exit(1);
+ }
+ filepath = argv[3];
+ } else {
+ goto invalid_args;
+ }
+ } else {
+ goto invalid_args;
+ }
+
+ /* In the glibc implementation dirname may modify their argument. */
+ memcpy(temp_filepath, filepath, strlen(filepath) + 1);
+ dirpath = dirname(temp_filepath);
+
+ /* Select the corresponding verification method according to the input file type. */
+ input_file_type type = getInputFileType(filepath);
+ switch (type) {
+ case AOF_MULTI_PART:
+ checkMultiPartAof(dirpath, filepath, fix);
+ break;
+ case AOF_RESP:
+ checkOldStyleAof(filepath, fix, 0);
+ break;
+ case AOF_RDB_PREAMBLE:
+ checkOldStyleAof(filepath, fix, 1);
+ break;
+ }
+
+ exit(0);
+
+invalid_args:
+ printf("Usage: %s [--fix|--truncate-to-timestamp $timestamp] <file.manifest|file.aof>\n",
+ argv[0]);
+ exit(1);
+}
diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c
new file mode 100644
index 0000000..682135e
--- /dev/null
+++ b/src/redis-check-rdb.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mt19937-64.h"
+#include "server.h"
+#include "rdb.h"
+
+#include <stdarg.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+void createSharedObjects(void);
+void rdbLoadProgressCallback(rio *r, const void *buf, size_t len);
+int rdbCheckMode = 0;
+
+struct {
+ rio *rio;
+ robj *key; /* Current key we are reading. */
+ int key_type; /* Current key type if != -1. */
+ unsigned long keys; /* Number of keys processed. */
+ unsigned long expires; /* Number of keys with an expire. */
+ unsigned long already_expired; /* Number of keys already expired. */
+ int doing; /* The state while reading the RDB. */
+ int error_set; /* True if error is populated. */
+ char error[1024];
+} rdbstate;
+
+/* At every loading step try to remember what we were about to do, so that
+ * we can log this information when an error is encountered. */
+#define RDB_CHECK_DOING_START 0
+#define RDB_CHECK_DOING_READ_TYPE 1
+#define RDB_CHECK_DOING_READ_EXPIRE 2
+#define RDB_CHECK_DOING_READ_KEY 3
+#define RDB_CHECK_DOING_READ_OBJECT_VALUE 4
+#define RDB_CHECK_DOING_CHECK_SUM 5
+#define RDB_CHECK_DOING_READ_LEN 6
+#define RDB_CHECK_DOING_READ_AUX 7
+#define RDB_CHECK_DOING_READ_MODULE_AUX 8
+#define RDB_CHECK_DOING_READ_FUNCTIONS 9
+
+char *rdb_check_doing_string[] = {
+ "start",
+ "read-type",
+ "read-expire",
+ "read-key",
+ "read-object-value",
+ "check-sum",
+ "read-len",
+ "read-aux",
+ "read-module-aux",
+ "read-functions"
+};
+
+char *rdb_type_string[] = {
+ "string",
+ "list-linked",
+ "set-hashtable",
+ "zset-v1",
+ "hash-hashtable",
+ "zset-v2",
+ "module-pre-release",
+ "module-value",
+ "",
+ "hash-zipmap",
+ "list-ziplist",
+ "set-intset",
+ "zset-ziplist",
+ "hash-ziplist",
+ "quicklist",
+ "stream",
+ "hash-listpack",
+ "zset-listpack",
+ "quicklist-v2",
+ "set-listpack",
+};
+
+/* Show a few stats collected into 'rdbstate' */
+void rdbShowGenericInfo(void) {
+ printf("[info] %lu keys read\n", rdbstate.keys);
+ printf("[info] %lu expires\n", rdbstate.expires);
+ printf("[info] %lu already expired\n", rdbstate.already_expired);
+}
+
+/* Called on RDB errors. Provides details about the RDB and the offset
+ * we were when the error was detected. */
+void rdbCheckError(const char *fmt, ...) {
+ char msg[1024];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ printf("--- RDB ERROR DETECTED ---\n");
+ printf("[offset %llu] %s\n",
+ (unsigned long long) (rdbstate.rio ?
+ rdbstate.rio->processed_bytes : 0), msg);
+ printf("[additional info] While doing: %s\n",
+ rdb_check_doing_string[rdbstate.doing]);
+ if (rdbstate.key)
+ printf("[additional info] Reading key '%s'\n",
+ (char*)rdbstate.key->ptr);
+ if (rdbstate.key_type != -1)
+ printf("[additional info] Reading type %d (%s)\n",
+ rdbstate.key_type,
+ ((unsigned)rdbstate.key_type <
+ sizeof(rdb_type_string)/sizeof(char*)) ?
+ rdb_type_string[rdbstate.key_type] : "unknown");
+ rdbShowGenericInfo();
+}
+
+/* Print information during RDB checking. */
+void rdbCheckInfo(const char *fmt, ...) {
+ char msg[1024];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ printf("[offset %llu] %s\n",
+ (unsigned long long) (rdbstate.rio ?
+ rdbstate.rio->processed_bytes : 0), msg);
+}
+
+/* Used inside rdb.c in order to log specific errors happening inside
+ * the RDB loading internals. */
+void rdbCheckSetError(const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(rdbstate.error, sizeof(rdbstate.error), fmt, ap);
+ va_end(ap);
+ rdbstate.error_set = 1;
+}
+
+/* During RDB check we setup a special signal handler for memory violations
+ * and similar conditions, so that we can log the offending part of the RDB
+ * if the crash is due to broken content. */
+void rdbCheckHandleCrash(int sig, siginfo_t *info, void *secret) {
+ UNUSED(sig);
+ UNUSED(info);
+ UNUSED(secret);
+
+ rdbCheckError("Server crash checking the specified RDB file!");
+ exit(1);
+}
+
+void rdbCheckSetupSignals(void) {
+ struct sigaction act;
+
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
+ act.sa_sigaction = rdbCheckHandleCrash;
+ sigaction(SIGSEGV, &act, NULL);
+ sigaction(SIGBUS, &act, NULL);
+ sigaction(SIGFPE, &act, NULL);
+ sigaction(SIGILL, &act, NULL);
+ sigaction(SIGABRT, &act, NULL);
+}
+
+/* Check the specified RDB file. Return 0 if the RDB looks sane, otherwise
+ * 1 is returned.
+ * The file is specified as a filename in 'rdbfilename' if 'fp' is NULL,
+ * otherwise the already open file 'fp' is checked. */
+int redis_check_rdb(char *rdbfilename, FILE *fp) {
+ uint64_t dbid;
+ int selected_dbid = -1;
+ int type, rdbver;
+ char buf[1024];
+ long long expiretime, now = mstime();
+ static rio rdb; /* Pointed by global struct riostate. */
+ struct stat sb;
+
+ int closefile = (fp == NULL);
+ if (fp == NULL && (fp = fopen(rdbfilename,"r")) == NULL) return 1;
+
+ if (fstat(fileno(fp), &sb) == -1)
+ sb.st_size = 0;
+
+ startLoadingFile(sb.st_size, rdbfilename, RDBFLAGS_NONE);
+ rioInitWithFile(&rdb,fp);
+ rdbstate.rio = &rdb;
+ rdb.update_cksum = rdbLoadProgressCallback;
+ if (rioRead(&rdb,buf,9) == 0) goto eoferr;
+ buf[9] = '\0';
+ if (memcmp(buf,"REDIS",5) != 0) {
+ rdbCheckError("Wrong signature trying to load DB from file");
+ goto err;
+ }
+ rdbver = atoi(buf+5);
+ if (rdbver < 1 || rdbver > RDB_VERSION) {
+ rdbCheckError("Can't handle RDB format version %d",rdbver);
+ goto err;
+ }
+
+ expiretime = -1;
+ while(1) {
+ robj *key, *val;
+
+ /* Read type. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_TYPE;
+ if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
+
+ /* Handle special types. */
+ if (type == RDB_OPCODE_EXPIRETIME) {
+ rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE;
+ /* EXPIRETIME: load an expire associated with the next key
+ * to load. Note that after loading an expire we need to
+ * load the actual type, and continue. */
+ expiretime = rdbLoadTime(&rdb);
+ expiretime *= 1000;
+ if (rioGetReadError(&rdb)) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_EXPIRETIME_MS) {
+ /* EXPIRETIME_MS: milliseconds precision expire times introduced
+ * with RDB v3. Like EXPIRETIME but no with more precision. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE;
+ expiretime = rdbLoadMillisecondTime(&rdb, rdbver);
+ if (rioGetReadError(&rdb)) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_FREQ) {
+ /* FREQ: LFU frequency. */
+ uint8_t byte;
+ if (rioRead(&rdb,&byte,1) == 0) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_IDLE) {
+ /* IDLE: LRU idle time. */
+ if (rdbLoadLen(&rdb,NULL) == RDB_LENERR) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_EOF) {
+ /* EOF: End of file, exit the main loop. */
+ break;
+ } else if (type == RDB_OPCODE_SELECTDB) {
+ /* SELECTDB: Select the specified database. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_LEN;
+ if ((dbid = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ rdbCheckInfo("Selecting DB ID %llu", (unsigned long long)dbid);
+ selected_dbid = dbid;
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_RESIZEDB) {
+ /* RESIZEDB: Hint about the size of the keys in the currently
+ * selected data base, in order to avoid useless rehashing. */
+ uint64_t db_size, expires_size;
+ rdbstate.doing = RDB_CHECK_DOING_READ_LEN;
+ if ((db_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ if ((expires_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_AUX) {
+ /* AUX: generic string-string fields. Use to add state to RDB
+ * which is backward compatible. Implementations of RDB loading
+ * are required to skip AUX fields they don't understand.
+ *
+ * An AUX field is composed of two strings: key and value. */
+ robj *auxkey, *auxval;
+ rdbstate.doing = RDB_CHECK_DOING_READ_AUX;
+ if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ if ((auxval = rdbLoadStringObject(&rdb)) == NULL) {
+ decrRefCount(auxkey);
+ goto eoferr;
+ }
+
+ rdbCheckInfo("AUX FIELD %s = '%s'",
+ (char*)auxkey->ptr, (char*)auxval->ptr);
+ decrRefCount(auxkey);
+ decrRefCount(auxval);
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_MODULE_AUX) {
+ /* AUX: Auxiliary data for modules. */
+ uint64_t moduleid, when_opcode, when;
+ rdbstate.doing = RDB_CHECK_DOING_READ_MODULE_AUX;
+ if ((moduleid = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) goto eoferr;
+ if ((when_opcode = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) goto eoferr;
+ if ((when = rdbLoadLen(&rdb,NULL)) == RDB_LENERR) goto eoferr;
+ if (when_opcode != RDB_MODULE_OPCODE_UINT) {
+ rdbCheckError("bad when_opcode");
+ goto err;
+ }
+
+ char name[10];
+ moduleTypeNameByID(name,moduleid);
+ rdbCheckInfo("MODULE AUX for: %s", name);
+
+ robj *o = rdbLoadCheckModuleValue(&rdb,name);
+ decrRefCount(o);
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_FUNCTION_PRE_GA) {
+ rdbCheckError("Pre-release function format not supported %d",rdbver);
+ goto err;
+ } else if (type == RDB_OPCODE_FUNCTION2) {
+ sds err = NULL;
+ rdbstate.doing = RDB_CHECK_DOING_READ_FUNCTIONS;
+ if (rdbFunctionLoad(&rdb, rdbver, NULL, 0, &err) != C_OK) {
+ rdbCheckError("Failed loading library, %s", err);
+ sdsfree(err);
+ goto err;
+ }
+ continue;
+ } else {
+ if (!rdbIsObjectType(type)) {
+ rdbCheckError("Invalid object type: %d", type);
+ goto err;
+ }
+ rdbstate.key_type = type;
+ }
+
+ /* Read key */
+ rdbstate.doing = RDB_CHECK_DOING_READ_KEY;
+ if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ rdbstate.key = key;
+ rdbstate.keys++;
+ /* Read value */
+ rdbstate.doing = RDB_CHECK_DOING_READ_OBJECT_VALUE;
+ if ((val = rdbLoadObject(type,&rdb,key->ptr,selected_dbid,NULL)) == NULL) goto eoferr;
+ /* Check if the key already expired. */
+ if (expiretime != -1 && expiretime < now)
+ rdbstate.already_expired++;
+ if (expiretime != -1) rdbstate.expires++;
+ rdbstate.key = NULL;
+ decrRefCount(key);
+ decrRefCount(val);
+ rdbstate.key_type = -1;
+ expiretime = -1;
+ }
+ /* Verify the checksum if RDB version is >= 5 */
+ if (rdbver >= 5 && server.rdb_checksum) {
+ uint64_t cksum, expected = rdb.cksum;
+
+ rdbstate.doing = RDB_CHECK_DOING_CHECK_SUM;
+ if (rioRead(&rdb,&cksum,8) == 0) goto eoferr;
+ memrev64ifbe(&cksum);
+ if (cksum == 0) {
+ rdbCheckInfo("RDB file was saved with checksum disabled: no check performed.");
+ } else if (cksum != expected) {
+ rdbCheckError("RDB CRC error");
+ goto err;
+ } else {
+ rdbCheckInfo("Checksum OK");
+ }
+ }
+
+ if (closefile) fclose(fp);
+ stopLoading(1);
+ return 0;
+
+eoferr: /* unexpected end of file is handled here with a fatal exit */
+ if (rdbstate.error_set) {
+ rdbCheckError(rdbstate.error);
+ } else {
+ rdbCheckError("Unexpected EOF reading RDB file");
+ }
+err:
+ if (closefile) fclose(fp);
+ stopLoading(0);
+ return 1;
+}
+
+static sds checkRdbVersion(void) {
+ sds version;
+ version = sdscatprintf(sdsempty(), "%s", REDIS_VERSION);
+
+ /* Add git commit and working tree status when available */
+ if (strtoll(redisGitSHA1(),NULL,16)) {
+ version = sdscatprintf(version, " (git:%s", redisGitSHA1());
+ if (strtoll(redisGitDirty(),NULL,10))
+ version = sdscatprintf(version, "-dirty");
+ version = sdscat(version, ")");
+ }
+ return version;
+}
+
+/* RDB check main: called form server.c when Redis is executed with the
+ * redis-check-rdb alias, on during RDB loading errors.
+ *
+ * The function works in two ways: can be called with argc/argv as a
+ * standalone executable, or called with a non NULL 'fp' argument if we
+ * already have an open file to check. This happens when the function
+ * is used to check an RDB preamble inside an AOF file.
+ *
+ * When called with fp = NULL, the function never returns, but exits with the
+ * status code according to success (RDB is sane) or error (RDB is corrupted).
+ * Otherwise if called with a non NULL fp, the function returns C_OK or
+ * C_ERR depending on the success or failure. */
+int redis_check_rdb_main(int argc, char **argv, FILE *fp) {
+ struct timeval tv;
+
+ if (argc != 2 && fp == NULL) {
+ fprintf(stderr, "Usage: %s <rdb-file-name>\n", argv[0]);
+ exit(1);
+ } else if (!strcmp(argv[1],"-v") || !strcmp(argv[1], "--version")) {
+ sds version = checkRdbVersion();
+ printf("redis-check-rdb %s\n", version);
+ sdsfree(version);
+ exit(0);
+ }
+
+ gettimeofday(&tv, NULL);
+ init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
+
+ /* In order to call the loading functions we need to create the shared
+ * integer objects, however since this function may be called from
+ * an already initialized Redis instance, check if we really need to. */
+ if (shared.integers[0] == NULL)
+ createSharedObjects();
+ server.loading_process_events_interval_bytes = 0;
+ server.sanitize_dump_payload = SANITIZE_DUMP_YES;
+ rdbCheckMode = 1;
+ rdbCheckInfo("Checking RDB file %s", argv[1]);
+ rdbCheckSetupSignals();
+ int retval = redis_check_rdb(argv[1],fp);
+ if (retval == 0) {
+ rdbCheckInfo("\\o/ RDB looks OK! \\o/");
+ rdbShowGenericInfo();
+ }
+ if (fp) return (retval == 0) ? C_OK : C_ERR;
+ exit(retval);
+}
diff --git a/src/redis-cli.c b/src/redis-cli.c
new file mode 100644
index 0000000..3854701
--- /dev/null
+++ b/src/redis-cli.c
@@ -0,0 +1,9983 @@
+/* Redis CLI (command line interface)
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fmacros.h"
+#include "version.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <time.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <math.h>
+#include <termios.h>
+
+#include <hiredis.h>
+#ifdef USE_OPENSSL
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <hiredis_ssl.h>
+#endif
+#include <sdscompat.h> /* Use hiredis' sds compat header that maps sds calls to their hi_ variants */
+#include <sds.h> /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */
+#include "dict.h"
+#include "adlist.h"
+#include "zmalloc.h"
+#include "linenoise.h"
+#include "anet.h"
+#include "ae.h"
+#include "connection.h"
+#include "cli_common.h"
+#include "mt19937-64.h"
+
+#include "cli_commands.h"
+
+#define UNUSED(V) ((void) V)
+
+#define OUTPUT_STANDARD 0
+#define OUTPUT_RAW 1
+#define OUTPUT_CSV 2
+#define OUTPUT_JSON 3
+#define OUTPUT_QUOTED_JSON 4
+#define REDIS_CLI_KEEPALIVE_INTERVAL 15 /* seconds */
+#define REDIS_CLI_DEFAULT_PIPE_TIMEOUT 30 /* seconds */
+#define REDIS_CLI_HISTFILE_ENV "REDISCLI_HISTFILE"
+#define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history"
+#define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE"
+#define REDIS_CLI_RCFILE_DEFAULT ".redisclirc"
+#define REDIS_CLI_AUTH_ENV "REDISCLI_AUTH"
+#define REDIS_CLI_CLUSTER_YES_ENV "REDISCLI_CLUSTER_YES"
+
+#define CLUSTER_MANAGER_SLOTS 16384
+#define CLUSTER_MANAGER_PORT_INCR 10000 /* same as CLUSTER_PORT_INCR */
+#define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000
+#define CLUSTER_MANAGER_MIGRATE_PIPELINE 10
+#define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2
+
+#define CLUSTER_MANAGER_INVALID_HOST_ARG \
+ "[ERR] Invalid arguments: you need to pass either a valid " \
+ "address (ie. 120.0.0.1:7000) or space separated IP " \
+ "and port (ie. 120.0.0.1 7000)\n"
+#define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL)
+#define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) ((nodes)/((replicas) + 1))
+#define CLUSTER_MANAGER_COMMAND(n,...) \
+ (redisCommand((n)->context, __VA_ARGS__))
+
+#define CLUSTER_MANAGER_NODE_ARRAY_FREE(array) zfree((array)->alloc)
+
+#define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \
+ clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \
+ (n)->ip, (n)->port, (err));
+
+#define clusterManagerLogInfo(...) \
+ clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_INFO,__VA_ARGS__)
+
+#define clusterManagerLogErr(...) \
+ clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_ERR,__VA_ARGS__)
+
+#define clusterManagerLogWarn(...) \
+ clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_WARN,__VA_ARGS__)
+
+#define clusterManagerLogOk(...) \
+ clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_SUCCESS,__VA_ARGS__)
+
+#define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0
+#define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1
+#define CLUSTER_MANAGER_FLAG_FRIEND 1 << 2
+#define CLUSTER_MANAGER_FLAG_NOADDR 1 << 3
+#define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4
+#define CLUSTER_MANAGER_FLAG_FAIL 1 << 5
+
+#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0
+#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1
+#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2
+#define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3
+#define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4
+#define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5
+#define CLUSTER_MANAGER_CMD_FLAG_REPLACE 1 << 6
+#define CLUSTER_MANAGER_CMD_FLAG_COPY 1 << 7
+#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 8
+#define CLUSTER_MANAGER_CMD_FLAG_CHECK_OWNERS 1 << 9
+#define CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS 1 << 10
+#define CLUSTER_MANAGER_CMD_FLAG_MASTERS_ONLY 1 << 11
+#define CLUSTER_MANAGER_CMD_FLAG_SLAVES_ONLY 1 << 12
+
+#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0
+#define CLUSTER_MANAGER_OPT_COLD 1 << 1
+#define CLUSTER_MANAGER_OPT_UPDATE 1 << 2
+#define CLUSTER_MANAGER_OPT_QUIET 1 << 6
+#define CLUSTER_MANAGER_OPT_VERBOSE 1 << 7
+
+#define CLUSTER_MANAGER_LOG_LVL_INFO 1
+#define CLUSTER_MANAGER_LOG_LVL_WARN 2
+#define CLUSTER_MANAGER_LOG_LVL_ERR 3
+#define CLUSTER_MANAGER_LOG_LVL_SUCCESS 4
+
+#define CLUSTER_JOIN_CHECK_AFTER 20
+
+#define LOG_COLOR_BOLD "29;1m"
+#define LOG_COLOR_RED "31;1m"
+#define LOG_COLOR_GREEN "32;1m"
+#define LOG_COLOR_YELLOW "33;1m"
+#define LOG_COLOR_RESET "0m"
+
+/* cliConnect() flags. */
+#define CC_FORCE (1<<0) /* Re-connect if already connected. */
+#define CC_QUIET (1<<1) /* Don't log connecting errors. */
+
+/* DNS lookup */
+#define NET_IP_STR_LEN 46 /* INET6_ADDRSTRLEN is 46 */
+
+/* --latency-dist palettes. */
+int spectrum_palette_color_size = 19;
+int spectrum_palette_color[] = {0,233,234,235,237,239,241,243,245,247,144,143,142,184,226,214,208,202,196};
+
+int spectrum_palette_mono_size = 13;
+int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253};
+
+/* The actual palette in use. */
+int *spectrum_palette;
+int spectrum_palette_size;
+
+static int orig_termios_saved = 0;
+static struct termios orig_termios; /* To restore terminal at exit.*/
+
+/* Dict Helpers */
+static uint64_t dictSdsHash(const void *key);
+static int dictSdsKeyCompare(dict *d, const void *key1,
+ const void *key2);
+static void dictSdsDestructor(dict *d, void *val);
+static void dictListDestructor(dict *d, void *val);
+
+/* Cluster Manager Command Info */
+typedef struct clusterManagerCommand {
+ char *name;
+ int argc;
+ char **argv;
+ sds stdin_arg; /* arg from stdin. (-X option) */
+ int flags;
+ int replicas;
+ char *from;
+ char *to;
+ char **weight;
+ int weight_argc;
+ char *master_id;
+ int slots;
+ int timeout;
+ int pipeline;
+ float threshold;
+ char *backup_dir;
+ char *from_user;
+ char *from_pass;
+ int from_askpass;
+} clusterManagerCommand;
+
+static int createClusterManagerCommand(char *cmdname, int argc, char **argv);
+
+
+static redisContext *context;
+static struct config {
+ cliConnInfo conn_info;
+ char *hostsocket;
+ int tls;
+ cliSSLconfig sslconfig;
+ long repeat;
+ long interval;
+ int dbnum; /* db num currently selected */
+ int interactive;
+ int shutdown;
+ int monitor_mode;
+ int pubsub_mode;
+ int blocking_state_aborted; /* used to abort monitor_mode and pubsub_mode. */
+ int latency_mode;
+ int latency_dist_mode;
+ int latency_history;
+ int lru_test_mode;
+ long long lru_test_sample_size;
+ int cluster_mode;
+ int cluster_reissue_command;
+ int cluster_send_asking;
+ int slave_mode;
+ int pipe_mode;
+ int pipe_timeout;
+ int getrdb_mode;
+ int get_functions_rdb_mode;
+ int stat_mode;
+ int scan_mode;
+ int count;
+ int intrinsic_latency_mode;
+ int intrinsic_latency_duration;
+ sds pattern;
+ char *rdb_filename;
+ int bigkeys;
+ int memkeys;
+ unsigned memkeys_samples;
+ int hotkeys;
+ int stdin_lastarg; /* get last arg from stdin. (-x option) */
+ int stdin_tag_arg; /* get <tag> arg from stdin. (-X option) */
+ char *stdin_tag_name; /* Placeholder(tag name) for user input. */
+ int askpass;
+ int quoted_input; /* Force input args to be treated as quoted strings */
+ int output; /* output mode, see OUTPUT_* defines */
+ int push_output; /* Should we display spontaneous PUSH replies */
+ sds mb_delim;
+ sds cmd_delim;
+ char prompt[128];
+ char *eval;
+ int eval_ldb;
+ int eval_ldb_sync; /* Ask for synchronous mode of the Lua debugger. */
+ int eval_ldb_end; /* Lua debugging session ended. */
+ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */
+ int last_cmd_type;
+ redisReply *last_reply;
+ int verbose;
+ int set_errcode;
+ clusterManagerCommand cluster_manager_command;
+ int no_auth_warning;
+ int resp2; /* value of 1: specified explicitly with option -2 */
+ int resp3; /* value of 1: specified explicitly, value of 2: implicit like --json option */
+ int current_resp3; /* 1 if we have RESP3 right now in the current connection. */
+ int in_multi;
+ int pre_multi_dbnum;
+ char *server_version;
+ char *test_hint;
+ char *test_hint_file;
+} config;
+
+/* User preferences. */
+static struct pref {
+ int hints;
+} pref;
+
+static volatile sig_atomic_t force_cancel_loop = 0;
+static void usage(int err);
+static void slaveMode(int send_sync);
+char *redisGitSHA1(void);
+char *redisGitDirty(void);
+static int cliConnect(int flags);
+
+static char *getInfoField(char *info, char *field);
+static long getLongInfoField(char *info, char *field);
+
+/*------------------------------------------------------------------------------
+ * Utility functions
+ *--------------------------------------------------------------------------- */
+size_t redis_strlcpy(char *dst, const char *src, size_t dsize);
+
+static void cliPushHandler(void *, void *);
+
+uint16_t crc16(const char *buf, int len);
+
+static long long ustime(void) {
+ struct timeval tv;
+ long long ust;
+
+ gettimeofday(&tv, NULL);
+ ust = ((long long)tv.tv_sec)*1000000;
+ ust += tv.tv_usec;
+ return ust;
+}
+
+static long long mstime(void) {
+ return ustime()/1000;
+}
+
+static void cliRefreshPrompt(void) {
+ if (config.eval_ldb) return;
+
+ sds prompt = sdsempty();
+ if (config.hostsocket != NULL) {
+ prompt = sdscatfmt(prompt,"redis %s",config.hostsocket);
+ } else {
+ char addr[256];
+ formatAddr(addr, sizeof(addr), config.conn_info.hostip, config.conn_info.hostport);
+ prompt = sdscatlen(prompt,addr,strlen(addr));
+ }
+
+ /* Add [dbnum] if needed */
+ if (config.dbnum != 0)
+ prompt = sdscatfmt(prompt,"[%i]",config.dbnum);
+
+ /* Add TX if in transaction state*/
+ if (config.in_multi)
+ prompt = sdscatlen(prompt,"(TX)",4);
+
+ if (config.pubsub_mode)
+ prompt = sdscatfmt(prompt,"(subscribed mode)");
+
+ /* Copy the prompt in the static buffer. */
+ prompt = sdscatlen(prompt,"> ",2);
+ snprintf(config.prompt,sizeof(config.prompt),"%s",prompt);
+ sdsfree(prompt);
+}
+
+/* Return the name of the dotfile for the specified 'dotfilename'.
+ * Normally it just concatenates user $HOME to the file specified
+ * in 'dotfilename'. However if the environment variable 'envoverride'
+ * is set, its value is taken as the path.
+ *
+ * The function returns NULL (if the file is /dev/null or cannot be
+ * obtained for some error), or an SDS string that must be freed by
+ * the user. */
+static sds getDotfilePath(char *envoverride, char *dotfilename) {
+ char *path = NULL;
+ sds dotPath = NULL;
+
+ /* Check the env for a dotfile override. */
+ path = getenv(envoverride);
+ if (path != NULL && *path != '\0') {
+ if (!strcmp("/dev/null", path)) {
+ return NULL;
+ }
+
+ /* If the env is set, return it. */
+ dotPath = sdsnew(path);
+ } else {
+ char *home = getenv("HOME");
+ if (home != NULL && *home != '\0') {
+ /* If no override is set use $HOME/<dotfilename>. */
+ dotPath = sdscatprintf(sdsempty(), "%s/%s", home, dotfilename);
+ }
+ }
+ return dotPath;
+}
+
+static uint64_t dictSdsHash(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
+}
+
+static int dictSdsKeyCompare(dict *d, const void *key1, const void *key2)
+{
+ int l1,l2;
+ UNUSED(d);
+
+ l1 = sdslen((sds)key1);
+ l2 = sdslen((sds)key2);
+ if (l1 != l2) return 0;
+ return memcmp(key1, key2, l1) == 0;
+}
+
+static void dictSdsDestructor(dict *d, void *val)
+{
+ UNUSED(d);
+ sdsfree(val);
+}
+
+void dictListDestructor(dict *d, void *val)
+{
+ UNUSED(d);
+ listRelease((list*)val);
+}
+
+/*------------------------------------------------------------------------------
+ * Help functions
+ *--------------------------------------------------------------------------- */
+
+#define CLI_HELP_COMMAND 1
+#define CLI_HELP_GROUP 2
+
+typedef struct {
+ int type;
+ int argc;
+ sds *argv;
+ sds full;
+
+ /* Only used for help on commands */
+ struct commandDocs docs;
+} helpEntry;
+
+static helpEntry *helpEntries = NULL;
+static int helpEntriesLen = 0;
+
+static sds cliVersion(void) {
+ sds version;
+ version = sdscatprintf(sdsempty(), "%s", REDIS_VERSION);
+
+ /* Add git commit and working tree status when available */
+ if (strtoll(redisGitSHA1(),NULL,16)) {
+ version = sdscatprintf(version, " (git:%s", redisGitSHA1());
+ if (strtoll(redisGitDirty(),NULL,10))
+ version = sdscatprintf(version, "-dirty");
+ version = sdscat(version, ")");
+ }
+ return version;
+}
+
+/* For backwards compatibility with pre-7.0 servers.
+ * cliLegacyInitHelp() sets up the helpEntries array with the command and group
+ * names from the commands.c file. However the Redis instance we are connecting
+ * to may support more commands, so this function integrates the previous
+ * entries with additional entries obtained using the COMMAND command
+ * available in recent versions of Redis. */
+static void cliLegacyIntegrateHelp(void) {
+ if (cliConnect(CC_QUIET) == REDIS_ERR) return;
+
+ redisReply *reply = redisCommand(context, "COMMAND");
+ if(reply == NULL || reply->type != REDIS_REPLY_ARRAY) return;
+
+ /* Scan the array reported by COMMAND and fill only the entries that
+ * don't already match what we have. */
+ for (size_t j = 0; j < reply->elements; j++) {
+ redisReply *entry = reply->element[j];
+ if (entry->type != REDIS_REPLY_ARRAY || entry->elements < 4 ||
+ entry->element[0]->type != REDIS_REPLY_STRING ||
+ entry->element[1]->type != REDIS_REPLY_INTEGER ||
+ entry->element[3]->type != REDIS_REPLY_INTEGER) return;
+ char *cmdname = entry->element[0]->str;
+ int i;
+
+ for (i = 0; i < helpEntriesLen; i++) {
+ helpEntry *he = helpEntries+i;
+ if (!strcasecmp(he->argv[0],cmdname))
+ break;
+ }
+ if (i != helpEntriesLen) continue;
+
+ helpEntriesLen++;
+ helpEntries = zrealloc(helpEntries,sizeof(helpEntry)*helpEntriesLen);
+ helpEntry *new = helpEntries+(helpEntriesLen-1);
+
+ new->argc = 1;
+ new->argv = zmalloc(sizeof(sds));
+ new->argv[0] = sdsnew(cmdname);
+ new->full = new->argv[0];
+ new->type = CLI_HELP_COMMAND;
+ sdstoupper(new->argv[0]);
+
+ new->docs.name = new->argv[0];
+ new->docs.args = NULL;
+ new->docs.numargs = 0;
+ new->docs.params = sdsempty();
+ int args = llabs(entry->element[1]->integer);
+ args--; /* Remove the command name itself. */
+ if (entry->element[3]->integer == 1) {
+ new->docs.params = sdscat(new->docs.params,"key ");
+ args--;
+ }
+ while(args-- > 0) new->docs.params = sdscat(new->docs.params,"arg ");
+ if (entry->element[1]->integer < 0)
+ new->docs.params = sdscat(new->docs.params,"...options...");
+ new->docs.summary = "Help not available";
+ new->docs.since = "Not known";
+ new->docs.group = "generic";
+ }
+ freeReplyObject(reply);
+}
+
+/* Concatenate a string to an sds string, but if it's empty substitute double quote marks. */
+static sds sdscat_orempty(sds params, const char *value) {
+ if (value[0] == '\0') {
+ return sdscat(params, "\"\"");
+ }
+ return sdscat(params, value);
+}
+
+static sds makeHint(char **inputargv, int inputargc, int cmdlen, struct commandDocs docs);
+
+static void cliAddCommandDocArg(cliCommandArg *cmdArg, redisReply *argMap);
+
+static void cliMakeCommandDocArgs(redisReply *arguments, cliCommandArg *result) {
+ for (size_t j = 0; j < arguments->elements; j++) {
+ cliAddCommandDocArg(&result[j], arguments->element[j]);
+ }
+}
+
+static void cliAddCommandDocArg(cliCommandArg *cmdArg, redisReply *argMap) {
+ if (argMap->type != REDIS_REPLY_MAP && argMap->type != REDIS_REPLY_ARRAY) {
+ return;
+ }
+
+ for (size_t i = 0; i < argMap->elements; i += 2) {
+ assert(argMap->element[i]->type == REDIS_REPLY_STRING);
+ char *key = argMap->element[i]->str;
+ if (!strcmp(key, "name")) {
+ assert(argMap->element[i + 1]->type == REDIS_REPLY_STRING);
+ cmdArg->name = sdsnew(argMap->element[i + 1]->str);
+ } else if (!strcmp(key, "display_text")) {
+ assert(argMap->element[i + 1]->type == REDIS_REPLY_STRING);
+ cmdArg->display_text = sdsnew(argMap->element[i + 1]->str);
+ } else if (!strcmp(key, "token")) {
+ assert(argMap->element[i + 1]->type == REDIS_REPLY_STRING);
+ cmdArg->token = sdsnew(argMap->element[i + 1]->str);
+ } else if (!strcmp(key, "type")) {
+ assert(argMap->element[i + 1]->type == REDIS_REPLY_STRING);
+ char *type = argMap->element[i + 1]->str;
+ if (!strcmp(type, "string")) {
+ cmdArg->type = ARG_TYPE_STRING;
+ } else if (!strcmp(type, "integer")) {
+ cmdArg->type = ARG_TYPE_INTEGER;
+ } else if (!strcmp(type, "double")) {
+ cmdArg->type = ARG_TYPE_DOUBLE;
+ } else if (!strcmp(type, "key")) {
+ cmdArg->type = ARG_TYPE_KEY;
+ } else if (!strcmp(type, "pattern")) {
+ cmdArg->type = ARG_TYPE_PATTERN;
+ } else if (!strcmp(type, "unix-time")) {
+ cmdArg->type = ARG_TYPE_UNIX_TIME;
+ } else if (!strcmp(type, "pure-token")) {
+ cmdArg->type = ARG_TYPE_PURE_TOKEN;
+ } else if (!strcmp(type, "oneof")) {
+ cmdArg->type = ARG_TYPE_ONEOF;
+ } else if (!strcmp(type, "block")) {
+ cmdArg->type = ARG_TYPE_BLOCK;
+ }
+ } else if (!strcmp(key, "arguments")) {
+ redisReply *arguments = argMap->element[i + 1];
+ cmdArg->subargs = zcalloc(arguments->elements * sizeof(cliCommandArg));
+ cmdArg->numsubargs = arguments->elements;
+ cliMakeCommandDocArgs(arguments, cmdArg->subargs);
+ } else if (!strcmp(key, "flags")) {
+ redisReply *flags = argMap->element[i + 1];
+ assert(flags->type == REDIS_REPLY_SET || flags->type == REDIS_REPLY_ARRAY);
+ for (size_t j = 0; j < flags->elements; j++) {
+ assert(flags->element[j]->type == REDIS_REPLY_STATUS);
+ char *flag = flags->element[j]->str;
+ if (!strcmp(flag, "optional")) {
+ cmdArg->flags |= CMD_ARG_OPTIONAL;
+ } else if (!strcmp(flag, "multiple")) {
+ cmdArg->flags |= CMD_ARG_MULTIPLE;
+ } else if (!strcmp(flag, "multiple_token")) {
+ cmdArg->flags |= CMD_ARG_MULTIPLE_TOKEN;
+ }
+ }
+ }
+ }
+}
+
+/* Fill in the fields of a help entry for the command/subcommand name. */
+static void cliFillInCommandHelpEntry(helpEntry *help, char *cmdname, char *subcommandname) {
+ help->argc = subcommandname ? 2 : 1;
+ help->argv = zmalloc(sizeof(sds) * help->argc);
+ help->argv[0] = sdsnew(cmdname);
+ sdstoupper(help->argv[0]);
+ if (subcommandname) {
+ /* Subcommand name may be two words separated by a pipe character. */
+ char *pipe = strchr(subcommandname, '|');
+ if (pipe != NULL) {
+ help->argv[1] = sdsnew(pipe + 1);
+ } else {
+ help->argv[1] = sdsnew(subcommandname);
+ }
+ sdstoupper(help->argv[1]);
+ }
+ sds fullname = sdsnew(help->argv[0]);
+ if (subcommandname) {
+ fullname = sdscat(fullname, " ");
+ fullname = sdscat(fullname, help->argv[1]);
+ }
+ help->full = fullname;
+ help->type = CLI_HELP_COMMAND;
+
+ help->docs.name = help->full;
+ help->docs.params = NULL;
+ help->docs.args = NULL;
+ help->docs.numargs = 0;
+ help->docs.since = NULL;
+}
+
+/* Initialize a command help entry for the command/subcommand described in 'specs'.
+ * 'next' points to the next help entry to be filled in.
+ * 'groups' is a set of command group names to be filled in.
+ * Returns a pointer to the next available position in the help entries table.
+ * If the command has subcommands, this is called recursively for the subcommands.
+ */
+static helpEntry *cliInitCommandHelpEntry(char *cmdname, char *subcommandname,
+ helpEntry *next, redisReply *specs,
+ dict *groups) {
+ helpEntry *help = next++;
+ cliFillInCommandHelpEntry(help, cmdname, subcommandname);
+
+ assert(specs->type == REDIS_REPLY_MAP || specs->type == REDIS_REPLY_ARRAY);
+ for (size_t j = 0; j < specs->elements; j += 2) {
+ assert(specs->element[j]->type == REDIS_REPLY_STRING);
+ char *key = specs->element[j]->str;
+ if (!strcmp(key, "summary")) {
+ redisReply *reply = specs->element[j + 1];
+ assert(reply->type == REDIS_REPLY_STRING);
+ help->docs.summary = sdsnew(reply->str);
+ } else if (!strcmp(key, "since")) {
+ redisReply *reply = specs->element[j + 1];
+ assert(reply->type == REDIS_REPLY_STRING);
+ help->docs.since = sdsnew(reply->str);
+ } else if (!strcmp(key, "group")) {
+ redisReply *reply = specs->element[j + 1];
+ assert(reply->type == REDIS_REPLY_STRING);
+ help->docs.group = sdsnew(reply->str);
+ sds group = sdsdup(help->docs.group);
+ if (dictAdd(groups, group, NULL) != DICT_OK) {
+ sdsfree(group);
+ }
+ } else if (!strcmp(key, "arguments")) {
+ redisReply *arguments = specs->element[j + 1];
+ assert(arguments->type == REDIS_REPLY_ARRAY);
+ help->docs.args = zcalloc(arguments->elements * sizeof(cliCommandArg));
+ help->docs.numargs = arguments->elements;
+ cliMakeCommandDocArgs(arguments, help->docs.args);
+ help->docs.params = makeHint(NULL, 0, 0, help->docs);
+ } else if (!strcmp(key, "subcommands")) {
+ redisReply *subcommands = specs->element[j + 1];
+ assert(subcommands->type == REDIS_REPLY_MAP || subcommands->type == REDIS_REPLY_ARRAY);
+ for (size_t i = 0; i < subcommands->elements; i += 2) {
+ assert(subcommands->element[i]->type == REDIS_REPLY_STRING);
+ char *subcommandname = subcommands->element[i]->str;
+ redisReply *subcommand = subcommands->element[i + 1];
+ assert(subcommand->type == REDIS_REPLY_MAP || subcommand->type == REDIS_REPLY_ARRAY);
+ next = cliInitCommandHelpEntry(cmdname, subcommandname, next, subcommand, groups);
+ }
+ }
+ }
+ return next;
+}
+
+/* Returns the total number of commands and subcommands in the command docs table. */
+static size_t cliCountCommands(redisReply* commandTable) {
+ size_t numCommands = commandTable->elements / 2;
+
+ /* The command docs table maps command names to a map of their specs. */
+ for (size_t i = 0; i < commandTable->elements; i += 2) {
+ assert(commandTable->element[i]->type == REDIS_REPLY_STRING); /* Command name. */
+ assert(commandTable->element[i + 1]->type == REDIS_REPLY_MAP ||
+ commandTable->element[i + 1]->type == REDIS_REPLY_ARRAY);
+ redisReply *map = commandTable->element[i + 1];
+ for (size_t j = 0; j < map->elements; j += 2) {
+ assert(map->element[j]->type == REDIS_REPLY_STRING);
+ char *key = map->element[j]->str;
+ if (!strcmp(key, "subcommands")) {
+ redisReply *subcommands = map->element[j + 1];
+ assert(subcommands->type == REDIS_REPLY_MAP || subcommands->type == REDIS_REPLY_ARRAY);
+ numCommands += subcommands->elements / 2;
+ }
+ }
+ }
+ return numCommands;
+}
+
+/* Comparator for sorting help table entries. */
+int helpEntryCompare(const void *entry1, const void *entry2) {
+ helpEntry *i1 = (helpEntry *)entry1;
+ helpEntry *i2 = (helpEntry *)entry2;
+ return strcmp(i1->full, i2->full);
+}
+
+/* Initializes command help entries for command groups.
+ * Called after the command help entries have already been filled in.
+ * Extends the help table with new entries for the command groups.
+ */
+void cliInitGroupHelpEntries(dict *groups) {
+ dictIterator *iter = dictGetIterator(groups);
+ dictEntry *entry;
+ helpEntry tmp;
+
+ int numGroups = dictSize(groups);
+ int pos = helpEntriesLen;
+ helpEntriesLen += numGroups;
+ helpEntries = zrealloc(helpEntries, sizeof(helpEntry)*helpEntriesLen);
+
+ for (entry = dictNext(iter); entry != NULL; entry = dictNext(iter)) {
+ tmp.argc = 1;
+ tmp.argv = zmalloc(sizeof(sds));
+ tmp.argv[0] = sdscatprintf(sdsempty(),"@%s",(char *)dictGetKey(entry));
+ tmp.full = tmp.argv[0];
+ tmp.type = CLI_HELP_GROUP;
+ tmp.docs.name = NULL;
+ tmp.docs.params = NULL;
+ tmp.docs.args = NULL;
+ tmp.docs.numargs = 0;
+ tmp.docs.summary = NULL;
+ tmp.docs.since = NULL;
+ tmp.docs.group = NULL;
+ helpEntries[pos++] = tmp;
+ }
+ dictReleaseIterator(iter);
+}
+
+/* Initializes help entries for all commands in the COMMAND DOCS reply. */
+void cliInitCommandHelpEntries(redisReply *commandTable, dict *groups) {
+ helpEntry *next = helpEntries;
+ for (size_t i = 0; i < commandTable->elements; i += 2) {
+ assert(commandTable->element[i]->type == REDIS_REPLY_STRING);
+ char *cmdname = commandTable->element[i]->str;
+
+ assert(commandTable->element[i + 1]->type == REDIS_REPLY_MAP ||
+ commandTable->element[i + 1]->type == REDIS_REPLY_ARRAY);
+ redisReply *cmdspecs = commandTable->element[i + 1];
+ next = cliInitCommandHelpEntry(cmdname, NULL, next, cmdspecs, groups);
+ }
+}
+
+/* Does the server version support a command/argument only available "since" some version?
+ * Returns 1 when supported, or 0 when the "since" version is newer than "version". */
+static int versionIsSupported(sds version, sds since) {
+ int i;
+ char *versionPos = version;
+ char *sincePos = since;
+ if (!since) {
+ return 1;
+ }
+
+ for (i = 0; i != 3; i++) {
+ int versionPart = atoi(versionPos);
+ int sincePart = atoi(sincePos);
+ if (versionPart > sincePart) {
+ return 1;
+ } else if (sincePart > versionPart) {
+ return 0;
+ }
+ versionPos = strchr(versionPos, '.');
+ sincePos = strchr(sincePos, '.');
+ if (!versionPos || !sincePos)
+ return 0;
+ versionPos++;
+ sincePos++;
+ }
+ return 0;
+}
+
+static void removeUnsupportedArgs(struct cliCommandArg *args, int *numargs, sds version) {
+ int i = 0, j;
+ while (i != *numargs) {
+ if (versionIsSupported(version, args[i].since)) {
+ if (args[i].subargs) {
+ removeUnsupportedArgs(args[i].subargs, &args[i].numsubargs, version);
+ }
+ i++;
+ continue;
+ }
+ for (j = i; j != *numargs; j++) {
+ args[j] = args[j + 1];
+ }
+ (*numargs)--;
+ }
+}
+
+static helpEntry *cliLegacyInitCommandHelpEntry(char *cmdname, char *subcommandname,
+ helpEntry *next, struct commandDocs *command,
+ dict *groups, sds version) {
+ helpEntry *help = next++;
+ cliFillInCommandHelpEntry(help, cmdname, subcommandname);
+
+ help->docs.summary = sdsnew(command->summary);
+ help->docs.since = sdsnew(command->since);
+ help->docs.group = sdsnew(command->group);
+ sds group = sdsdup(help->docs.group);
+ if (dictAdd(groups, group, NULL) != DICT_OK) {
+ sdsfree(group);
+ }
+
+ if (command->args != NULL) {
+ help->docs.args = command->args;
+ help->docs.numargs = command->numargs;
+ if (version)
+ removeUnsupportedArgs(help->docs.args, &help->docs.numargs, version);
+ help->docs.params = makeHint(NULL, 0, 0, help->docs);
+ }
+
+ if (command->subcommands != NULL) {
+ for (size_t i = 0; command->subcommands[i].name != NULL; i++) {
+ if (!version || versionIsSupported(version, command->subcommands[i].since)) {
+ char *subcommandname = command->subcommands[i].name;
+ next = cliLegacyInitCommandHelpEntry(
+ cmdname, subcommandname, next, &command->subcommands[i], groups, version);
+ }
+ }
+ }
+ return next;
+}
+
+int cliLegacyInitCommandHelpEntries(struct commandDocs *commands, dict *groups, sds version) {
+ helpEntry *next = helpEntries;
+ for (size_t i = 0; commands[i].name != NULL; i++) {
+ if (!version || versionIsSupported(version, commands[i].since)) {
+ next = cliLegacyInitCommandHelpEntry(commands[i].name, NULL, next, &commands[i], groups, version);
+ }
+ }
+ return next - helpEntries;
+}
+
+/* Returns the total number of commands and subcommands in the command docs table,
+ * filtered by server version (if provided).
+ */
+static size_t cliLegacyCountCommands(struct commandDocs *commands, sds version) {
+ int numCommands = 0;
+ for (size_t i = 0; commands[i].name != NULL; i++) {
+ if (version && !versionIsSupported(version, commands[i].since)) {
+ continue;
+ }
+ numCommands++;
+ if (commands[i].subcommands != NULL) {
+ numCommands += cliLegacyCountCommands(commands[i].subcommands, version);
+ }
+ }
+ return numCommands;
+}
+
+/* Gets the server version string by calling INFO SERVER.
+ * Stores the result in config.server_version.
+ * When not connected, or not possible, returns NULL. */
+static sds cliGetServerVersion(void) {
+ static const char *key = "\nredis_version:";
+ redisReply *serverInfo = NULL;
+ char *pos;
+
+ if (config.server_version != NULL) {
+ return config.server_version;
+ }
+
+ if (!context) return NULL;
+ serverInfo = redisCommand(context, "INFO SERVER");
+ if (serverInfo == NULL || serverInfo->type == REDIS_REPLY_ERROR) {
+ freeReplyObject(serverInfo);
+ return sdsempty();
+ }
+
+ assert(serverInfo->type == REDIS_REPLY_STRING || serverInfo->type == REDIS_REPLY_VERB);
+ sds info = serverInfo->str;
+
+ /* Finds the first appearance of "redis_version" in the INFO SERVER reply. */
+ pos = strstr(info, key);
+ if (pos) {
+ pos += strlen(key);
+ char *end = strchr(pos, '\r');
+ if (end) {
+ sds version = sdsnewlen(pos, end - pos);
+ freeReplyObject(serverInfo);
+ config.server_version = version;
+ return version;
+ }
+ }
+ freeReplyObject(serverInfo);
+ return NULL;
+}
+
+static void cliLegacyInitHelp(dict *groups) {
+ sds serverVersion = cliGetServerVersion();
+
+ /* Scan the commandDocs array and fill in the entries */
+ helpEntriesLen = cliLegacyCountCommands(redisCommandTable, serverVersion);
+ helpEntries = zmalloc(sizeof(helpEntry)*helpEntriesLen);
+
+ helpEntriesLen = cliLegacyInitCommandHelpEntries(redisCommandTable, groups, serverVersion);
+ cliInitGroupHelpEntries(groups);
+
+ qsort(helpEntries, helpEntriesLen, sizeof(helpEntry), helpEntryCompare);
+ dictRelease(groups);
+}
+
+/* cliInitHelp() sets up the helpEntries array with the command and group
+ * names and command descriptions obtained using the COMMAND DOCS command.
+ */
+static void cliInitHelp(void) {
+ /* Dict type for a set of strings, used to collect names of command groups. */
+ dictType groupsdt = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+ };
+ redisReply *commandTable;
+ dict *groups;
+
+ if (cliConnect(CC_QUIET) == REDIS_ERR) {
+ /* Can not connect to the server, but we still want to provide
+ * help, generate it only from the static cli_commands.c data instead. */
+ groups = dictCreate(&groupsdt);
+ cliLegacyInitHelp(groups);
+ return;
+ }
+ commandTable = redisCommand(context, "COMMAND DOCS");
+ if (commandTable == NULL || commandTable->type == REDIS_REPLY_ERROR) {
+ /* New COMMAND DOCS subcommand not supported - generate help from
+ * static cli_commands.c data instead. */
+ freeReplyObject(commandTable);
+
+ groups = dictCreate(&groupsdt);
+ cliLegacyInitHelp(groups);
+ cliLegacyIntegrateHelp();
+ return;
+ };
+ if (commandTable->type != REDIS_REPLY_MAP && commandTable->type != REDIS_REPLY_ARRAY) return;
+
+ /* Scan the array reported by COMMAND DOCS and fill in the entries */
+ helpEntriesLen = cliCountCommands(commandTable);
+ helpEntries = zmalloc(sizeof(helpEntry)*helpEntriesLen);
+
+ groups = dictCreate(&groupsdt);
+ cliInitCommandHelpEntries(commandTable, groups);
+ cliInitGroupHelpEntries(groups);
+
+ qsort(helpEntries, helpEntriesLen, sizeof(helpEntry), helpEntryCompare);
+ freeReplyObject(commandTable);
+ dictRelease(groups);
+}
+
+/* Output command help to stdout. */
+static void cliOutputCommandHelp(struct commandDocs *help, int group) {
+ printf("\r\n \x1b[1m%s\x1b[0m \x1b[90m%s\x1b[0m\r\n", help->name, help->params);
+ printf(" \x1b[33msummary:\x1b[0m %s\r\n", help->summary);
+ if (help->since != NULL) {
+ printf(" \x1b[33msince:\x1b[0m %s\r\n", help->since);
+ }
+ if (group) {
+ printf(" \x1b[33mgroup:\x1b[0m %s\r\n", help->group);
+ }
+}
+
+/* Print generic help. */
+static void cliOutputGenericHelp(void) {
+ sds version = cliVersion();
+ printf(
+ "redis-cli %s\n"
+ "To get help about Redis commands type:\n"
+ " \"help @<group>\" to get a list of commands in <group>\n"
+ " \"help <command>\" for help on <command>\n"
+ " \"help <tab>\" to get a list of possible help topics\n"
+ " \"quit\" to exit\n"
+ "\n"
+ "To set redis-cli preferences:\n"
+ " \":set hints\" enable online hints\n"
+ " \":set nohints\" disable online hints\n"
+ "Set your preferences in ~/.redisclirc\n",
+ version
+ );
+ sdsfree(version);
+}
+
+/* Output all command help, filtering by group or command name. */
+static void cliOutputHelp(int argc, char **argv) {
+ int i, j;
+ char *group = NULL;
+ helpEntry *entry;
+ struct commandDocs *help;
+
+ if (argc == 0) {
+ cliOutputGenericHelp();
+ return;
+ } else if (argc > 0 && argv[0][0] == '@') {
+ group = argv[0]+1;
+ }
+
+ if (helpEntries == NULL) {
+ /* Initialize the help using the results of the COMMAND command.
+ * In case we are using redis-cli help XXX, we need to init it. */
+ cliInitHelp();
+ }
+
+ assert(argc > 0);
+ for (i = 0; i < helpEntriesLen; i++) {
+ entry = &helpEntries[i];
+ if (entry->type != CLI_HELP_COMMAND) continue;
+
+ help = &entry->docs;
+ if (group == NULL) {
+ /* Compare all arguments */
+ if (argc <= entry->argc) {
+ for (j = 0; j < argc; j++) {
+ if (strcasecmp(argv[j],entry->argv[j]) != 0) break;
+ }
+ if (j == argc) {
+ cliOutputCommandHelp(help,1);
+ }
+ }
+ } else if (strcasecmp(group, help->group) == 0) {
+ cliOutputCommandHelp(help,0);
+ }
+ }
+ printf("\r\n");
+}
+
+/* Linenoise completion callback. */
+static void completionCallback(const char *buf, linenoiseCompletions *lc) {
+ size_t startpos = 0;
+ int mask;
+ int i;
+ size_t matchlen;
+ sds tmp;
+
+ if (strncasecmp(buf,"help ",5) == 0) {
+ startpos = 5;
+ while (isspace(buf[startpos])) startpos++;
+ mask = CLI_HELP_COMMAND | CLI_HELP_GROUP;
+ } else {
+ mask = CLI_HELP_COMMAND;
+ }
+
+ for (i = 0; i < helpEntriesLen; i++) {
+ if (!(helpEntries[i].type & mask)) continue;
+
+ matchlen = strlen(buf+startpos);
+ if (strncasecmp(buf+startpos,helpEntries[i].full,matchlen) == 0) {
+ tmp = sdsnewlen(buf,startpos);
+ tmp = sdscat(tmp,helpEntries[i].full);
+ linenoiseAddCompletion(lc,tmp);
+ sdsfree(tmp);
+ }
+ }
+}
+
+static sds addHintForArgument(sds hint, cliCommandArg *arg);
+
+/* Adds a separator character between words of a string under construction.
+ * A separator is added if the string length is greater than its previously-recorded
+ * length (*len), which is then updated, and it's not the last word to be added.
+ */
+static sds addSeparator(sds str, size_t *len, char *separator, int is_last) {
+ if (sdslen(str) > *len && !is_last) {
+ str = sdscat(str, separator);
+ *len = sdslen(str);
+ }
+ return str;
+}
+
+/* Recursively zeros the matched* fields of all arguments. */
+static void clearMatchedArgs(cliCommandArg *args, int numargs) {
+ for (int i = 0; i != numargs; ++i) {
+ args[i].matched = 0;
+ args[i].matched_token = 0;
+ args[i].matched_name = 0;
+ args[i].matched_all = 0;
+ if (args[i].subargs) {
+ clearMatchedArgs(args[i].subargs, args[i].numsubargs);
+ }
+ }
+}
+
+/* Builds a completion hint string describing the arguments, skipping parts already matched.
+ * Hints for all arguments are added to the input 'hint' parameter, separated by 'separator'.
+ */
+static sds addHintForArguments(sds hint, cliCommandArg *args, int numargs, char *separator) {
+ int i, j, incomplete;
+ size_t len=sdslen(hint);
+ for (i = 0; i < numargs; i++) {
+ if (!(args[i].flags & CMD_ARG_OPTIONAL)) {
+ hint = addHintForArgument(hint, &args[i]);
+ hint = addSeparator(hint, &len, separator, i == numargs-1);
+ continue;
+ }
+
+ /* The rule is that successive "optional" arguments can appear in any order.
+ * But if they are followed by a required argument, no more of those optional arguments
+ * can appear after that.
+ *
+ * This code handles all successive optional args together. This lets us show the
+ * completion of the currently-incomplete optional arg first, if there is one.
+ */
+ for (j = i, incomplete = -1; j < numargs; j++) {
+ if (!(args[j].flags & CMD_ARG_OPTIONAL)) break;
+ if (args[j].matched != 0 && args[j].matched_all == 0) {
+ /* User has started typing this arg; show its completion first. */
+ hint = addHintForArgument(hint, &args[j]);
+ hint = addSeparator(hint, &len, separator, i == numargs-1);
+ incomplete = j;
+ }
+ }
+
+ /* If the following non-optional arg has not been matched, add hints for
+ * any remaining optional args in this group.
+ */
+ if (j == numargs || args[j].matched == 0) {
+ for (; i < j; i++) {
+ if (incomplete != i) {
+ hint = addHintForArgument(hint, &args[i]);
+ hint = addSeparator(hint, &len, separator, i == numargs-1);
+ }
+ }
+ }
+
+ i = j - 1;
+ }
+ return hint;
+}
+
+/* Adds the "repeating" section of the hint string for a multiple-typed argument: [ABC def ...]
+ * The repeating part is a fixed unit; we don't filter matched elements from it.
+ */
+static sds addHintForRepeatedArgument(sds hint, cliCommandArg *arg) {
+ if (!(arg->flags & CMD_ARG_MULTIPLE)) {
+ return hint;
+ }
+
+ /* The repeating part is always shown at the end of the argument's hint,
+ * so we can safely clear its matched flags before printing it.
+ */
+ clearMatchedArgs(arg, 1);
+
+ if (hint[0] != '\0') {
+ hint = sdscat(hint, " ");
+ }
+ hint = sdscat(hint, "[");
+
+ if (arg->flags & CMD_ARG_MULTIPLE_TOKEN) {
+ hint = sdscat_orempty(hint, arg->token);
+ if (arg->type != ARG_TYPE_PURE_TOKEN) {
+ hint = sdscat(hint, " ");
+ }
+ }
+
+ switch (arg->type) {
+ case ARG_TYPE_ONEOF:
+ hint = addHintForArguments(hint, arg->subargs, arg->numsubargs, "|");
+ break;
+
+ case ARG_TYPE_BLOCK:
+ hint = addHintForArguments(hint, arg->subargs, arg->numsubargs, " ");
+ break;
+
+ case ARG_TYPE_PURE_TOKEN:
+ break;
+
+ default:
+ hint = sdscat_orempty(hint, arg->display_text ? arg->display_text : arg->name);
+ break;
+ }
+
+ hint = sdscat(hint, " ...]");
+ return hint;
+}
+
+/* Adds hint string for one argument, if not already matched. */
+static sds addHintForArgument(sds hint, cliCommandArg *arg) {
+ if (arg->matched_all) {
+ return hint;
+ }
+
+ /* Surround an optional arg with brackets, unless it's partially matched. */
+ if ((arg->flags & CMD_ARG_OPTIONAL) && !arg->matched) {
+ hint = sdscat(hint, "[");
+ }
+
+ /* Start with the token, if present and not matched. */
+ if (arg->token != NULL && !arg->matched_token) {
+ hint = sdscat_orempty(hint, arg->token);
+ if (arg->type != ARG_TYPE_PURE_TOKEN) {
+ hint = sdscat(hint, " ");
+ }
+ }
+
+ /* Add the body of the syntax string. */
+ switch (arg->type) {
+ case ARG_TYPE_ONEOF:
+ if (arg->matched == 0) {
+ hint = addHintForArguments(hint, arg->subargs, arg->numsubargs, "|");
+ } else {
+ int i;
+ for (i = 0; i < arg->numsubargs; i++) {
+ if (arg->subargs[i].matched != 0) {
+ hint = addHintForArgument(hint, &arg->subargs[i]);
+ }
+ }
+ }
+ break;
+
+ case ARG_TYPE_BLOCK:
+ hint = addHintForArguments(hint, arg->subargs, arg->numsubargs, " ");
+ break;
+
+ case ARG_TYPE_PURE_TOKEN:
+ break;
+
+ default:
+ if (!arg->matched_name) {
+ hint = sdscat_orempty(hint, arg->display_text ? arg->display_text : arg->name);
+ }
+ break;
+ }
+
+ hint = addHintForRepeatedArgument(hint, arg);
+
+ if ((arg->flags & CMD_ARG_OPTIONAL) && !arg->matched) {
+ hint = sdscat(hint, "]");
+ }
+
+ return hint;
+}
+
+static int matchArg(char **nextword, int numwords, cliCommandArg *arg);
+static int matchArgs(char **words, int numwords, cliCommandArg *args, int numargs);
+
+/* Tries to match the next words of the input against an argument. */
+static int matchNoTokenArg(char **nextword, int numwords, cliCommandArg *arg) {
+ int i;
+ switch (arg->type) {
+ case ARG_TYPE_BLOCK: {
+ arg->matched += matchArgs(nextword, numwords, arg->subargs, arg->numsubargs);
+
+ /* All the subargs must be matched for the block to match. */
+ arg->matched_all = 1;
+ for (i = 0; i < arg->numsubargs; i++) {
+ if (arg->subargs[i].matched_all == 0) {
+ arg->matched_all = 0;
+ }
+ }
+ break;
+ }
+ case ARG_TYPE_ONEOF: {
+ for (i = 0; i < arg->numsubargs; i++) {
+ if (matchArg(nextword, numwords, &arg->subargs[i])) {
+ arg->matched += arg->subargs[i].matched;
+ arg->matched_all = arg->subargs[i].matched_all;
+ break;
+ }
+ }
+ break;
+ }
+
+ case ARG_TYPE_INTEGER:
+ case ARG_TYPE_UNIX_TIME: {
+ long long value;
+ if (sscanf(*nextword, "%lld", &value)) {
+ arg->matched += 1;
+ arg->matched_name = 1;
+ arg->matched_all = 1;
+ } else {
+ /* Matching failed due to incorrect arg type. */
+ arg->matched = 0;
+ arg->matched_name = 0;
+ }
+ break;
+ }
+
+ case ARG_TYPE_DOUBLE: {
+ double value;
+ if (sscanf(*nextword, "%lf", &value)) {
+ arg->matched += 1;
+ arg->matched_name = 1;
+ arg->matched_all = 1;
+ } else {
+ /* Matching failed due to incorrect arg type. */
+ arg->matched = 0;
+ arg->matched_name = 0;
+ }
+ break;
+ }
+
+ default:
+ arg->matched += 1;
+ arg->matched_name = 1;
+ arg->matched_all = 1;
+ break;
+ }
+ return arg->matched;
+}
+
+/* Tries to match the next word of the input against a token literal. */
+static int matchToken(char **nextword, cliCommandArg *arg) {
+ if (strcasecmp(arg->token, nextword[0]) != 0) {
+ return 0;
+ }
+ arg->matched_token = 1;
+ arg->matched = 1;
+ return 1;
+}
+
+/* Tries to match the next words of the input against the next argument.
+ * If the arg is repeated ("multiple"), it will be matched only once.
+ * If the next input word(s) can't be matched, returns 0 for failure.
+ */
+static int matchArgOnce(char **nextword, int numwords, cliCommandArg *arg) {
+ /* First match the token, if present. */
+ if (arg->token != NULL) {
+ if (!matchToken(nextword, arg)) {
+ return 0;
+ }
+ if (arg->type == ARG_TYPE_PURE_TOKEN) {
+ arg->matched_all = 1;
+ return 1;
+ }
+ if (numwords == 1) {
+ return 1;
+ }
+ nextword++;
+ numwords--;
+ }
+
+ /* Then match the rest of the argument. */
+ if (!matchNoTokenArg(nextword, numwords, arg)) {
+ return 0;
+ }
+ return arg->matched;
+}
+
+/* Tries to match the next words of the input against the next argument.
+ * If the arg is repeated ("multiple"), it will be matched as many times as possible.
+ */
+static int matchArg(char **nextword, int numwords, cliCommandArg *arg) {
+ int matchedWords = 0;
+ int matchedOnce = matchArgOnce(nextword, numwords, arg);
+ if (!(arg->flags & CMD_ARG_MULTIPLE)) {
+ return matchedOnce;
+ }
+
+ /* Found one match; now match a "multiple" argument as many times as possible. */
+ matchedWords += matchedOnce;
+ while (arg->matched_all && matchedWords < numwords) {
+ clearMatchedArgs(arg, 1);
+ if (arg->token != NULL && !(arg->flags & CMD_ARG_MULTIPLE_TOKEN)) {
+ /* The token only appears the first time; the rest of the times,
+ * pretend we saw it so we don't hint it.
+ */
+ matchedOnce = matchNoTokenArg(nextword + matchedWords, numwords - matchedWords, arg);
+ if (arg->matched) {
+ arg->matched_token = 1;
+ }
+ } else {
+ matchedOnce = matchArgOnce(nextword + matchedWords, numwords - matchedWords, arg);
+ }
+ matchedWords += matchedOnce;
+ }
+ arg->matched_all = 0; /* Because more repetitions are still possible. */
+ return matchedWords;
+}
+
+/* Tries to match the next words of the input against
+ * any one of a consecutive set of optional arguments.
+ */
+static int matchOneOptionalArg(char **words, int numwords, cliCommandArg *args, int numargs, int *matchedarg) {
+ for (int nextword = 0, nextarg = 0; nextword != numwords && nextarg != numargs; ++nextarg) {
+ if (args[nextarg].matched) {
+ /* Already matched this arg. */
+ continue;
+ }
+
+ int matchedWords = matchArg(&words[nextword], numwords - nextword, &args[nextarg]);
+ if (matchedWords != 0) {
+ *matchedarg = nextarg;
+ return matchedWords;
+ }
+ }
+ return 0;
+}
+
+/* Matches as many input words as possible against a set of consecutive optional arguments. */
+static int matchOptionalArgs(char **words, int numwords, cliCommandArg *args, int numargs) {
+ int nextword = 0;
+ int matchedarg = -1, lastmatchedarg = -1;
+ while (nextword != numwords) {
+ int matchedWords = matchOneOptionalArg(&words[nextword], numwords - nextword, args, numargs, &matchedarg);
+ if (matchedWords == 0) {
+ break;
+ }
+ /* Successfully matched an optional arg; mark any previous match as completed
+ * so it won't be partially hinted.
+ */
+ if (lastmatchedarg != -1) {
+ args[lastmatchedarg].matched_all = 1;
+ }
+ lastmatchedarg = matchedarg;
+ nextword += matchedWords;
+ }
+ return nextword;
+}
+
+/* Matches as many input words as possible against command arguments. */
+static int matchArgs(char **words, int numwords, cliCommandArg *args, int numargs) {
+ int nextword, nextarg, matchedWords;
+ for (nextword = 0, nextarg = 0; nextword != numwords && nextarg != numargs; ++nextarg) {
+ /* Optional args can occur in any order. Collect a range of consecutive optional args
+ * and try to match them as a group against the next input words.
+ */
+ if (args[nextarg].flags & CMD_ARG_OPTIONAL) {
+ int lastoptional;
+ for (lastoptional = nextarg; lastoptional < numargs; lastoptional++) {
+ if (!(args[lastoptional].flags & CMD_ARG_OPTIONAL)) break;
+ }
+ matchedWords = matchOptionalArgs(&words[nextword], numwords - nextword, &args[nextarg], lastoptional - nextarg);
+ nextarg = lastoptional - 1;
+ } else {
+ matchedWords = matchArg(&words[nextword], numwords - nextword, &args[nextarg]);
+ if (matchedWords == 0) {
+ /* Couldn't match a required word - matching fails! */
+ return 0;
+ }
+ }
+
+ nextword += matchedWords;
+ }
+ return nextword;
+}
+
+/* Compute the linenoise hint for the input prefix in inputargv/inputargc.
+ * cmdlen is the number of words from the start of the input that make up the command.
+ * If docs.args exists, dynamically creates a hint string by matching the arg specs
+ * against the input words.
+ */
+static sds makeHint(char **inputargv, int inputargc, int cmdlen, struct commandDocs docs) {
+ sds hint;
+
+ if (docs.args) {
+ /* Remove arguments from the returned hint to show only the
+ * ones the user did not yet type. */
+ clearMatchedArgs(docs.args, docs.numargs);
+ hint = sdsempty();
+ int matchedWords = 0;
+ if (inputargv && inputargc)
+ matchedWords = matchArgs(inputargv + cmdlen, inputargc - cmdlen, docs.args, docs.numargs);
+ if (matchedWords == inputargc - cmdlen) {
+ hint = addHintForArguments(hint, docs.args, docs.numargs, " ");
+ }
+ return hint;
+ }
+
+ /* If arg specs are not available, show the hint string until the user types something. */
+ if (inputargc <= cmdlen) {
+ hint = sdsnew(docs.params);
+ } else {
+ hint = sdsempty();
+ }
+ return hint;
+}
+
+/* Search for a command matching the longest possible prefix of input words. */
+static helpEntry* findHelpEntry(int argc, char **argv) {
+ helpEntry *entry = NULL;
+ int i, rawargc, matchlen = 0;
+ sds *rawargv;
+
+ for (i = 0; i < helpEntriesLen; i++) {
+ if (!(helpEntries[i].type & CLI_HELP_COMMAND)) continue;
+
+ rawargv = helpEntries[i].argv;
+ rawargc = helpEntries[i].argc;
+ if (rawargc <= argc) {
+ int j;
+ for (j = 0; j < rawargc; j++) {
+ if (strcasecmp(rawargv[j],argv[j])) {
+ break;
+ }
+ }
+ if (j == rawargc && rawargc > matchlen) {
+ matchlen = rawargc;
+ entry = &helpEntries[i];
+ }
+ }
+ }
+ return entry;
+}
+
+/* Returns the command-line hint string for a given partial input. */
+static sds getHintForInput(const char *charinput) {
+ sds hint = NULL;
+ int inputargc, inputlen = strlen(charinput);
+ sds *inputargv = sdssplitargs(charinput, &inputargc);
+ int endspace = inputlen && isspace(charinput[inputlen-1]);
+
+ /* Don't match the last word until the user has typed a space after it. */
+ int matchargc = endspace ? inputargc : inputargc - 1;
+
+ helpEntry *entry = findHelpEntry(matchargc, inputargv);
+ if (entry) {
+ hint = makeHint(inputargv, matchargc, entry->argc, entry->docs);
+ }
+ sdsfreesplitres(inputargv, inputargc);
+ return hint;
+}
+
+/* Linenoise hints callback. */
+static char *hintsCallback(const char *buf, int *color, int *bold) {
+ if (!pref.hints) return NULL;
+
+ sds hint = getHintForInput(buf);
+ if (hint == NULL) {
+ return NULL;
+ }
+
+ *color = 90;
+ *bold = 0;
+
+ /* Add an initial space if needed. */
+ int len = strlen(buf);
+ int endspace = len && isspace(buf[len-1]);
+ if (!endspace) {
+ sds newhint = sdsnewlen(" ",1);
+ newhint = sdscatsds(newhint,hint);
+ sdsfree(hint);
+ hint = newhint;
+ }
+
+ return hint;
+}
+
+static void freeHintsCallback(void *ptr) {
+ sdsfree(ptr);
+}
+
+/*------------------------------------------------------------------------------
+ * TTY manipulation
+ *--------------------------------------------------------------------------- */
+
+/* Restore terminal if we've changed it. */
+void cliRestoreTTY(void) {
+ if (orig_termios_saved)
+ tcsetattr(STDIN_FILENO, TCSANOW, &orig_termios);
+}
+
+/* Put the terminal in "press any key" mode */
+static void cliPressAnyKeyTTY(void) {
+ if (!isatty(STDIN_FILENO)) return;
+ if (!orig_termios_saved) {
+ if (tcgetattr(STDIN_FILENO, &orig_termios) == -1) return;
+ atexit(cliRestoreTTY);
+ orig_termios_saved = 1;
+ }
+ struct termios mode = orig_termios;
+ mode.c_lflag &= ~(ECHO | ICANON); /* echoing off, canonical off */
+ tcsetattr(STDIN_FILENO, TCSANOW, &mode);
+}
+
+/*------------------------------------------------------------------------------
+ * Networking / parsing
+ *--------------------------------------------------------------------------- */
+
+/* Send AUTH command to the server */
+static int cliAuth(redisContext *ctx, char *user, char *auth) {
+ redisReply *reply;
+ if (auth == NULL) return REDIS_OK;
+
+ if (user == NULL)
+ reply = redisCommand(ctx,"AUTH %s",auth);
+ else
+ reply = redisCommand(ctx,"AUTH %s %s",user,auth);
+
+ if (reply == NULL) {
+ fprintf(stderr, "\nI/O error\n");
+ return REDIS_ERR;
+ }
+
+ int result = REDIS_OK;
+ if (reply->type == REDIS_REPLY_ERROR) {
+ result = REDIS_ERR;
+ fprintf(stderr, "AUTH failed: %s\n", reply->str);
+ }
+ freeReplyObject(reply);
+ return result;
+}
+
+/* Send SELECT input_dbnum to the server */
+static int cliSelect(void) {
+ redisReply *reply;
+ if (config.conn_info.input_dbnum == config.dbnum) return REDIS_OK;
+
+ reply = redisCommand(context,"SELECT %d",config.conn_info.input_dbnum);
+ if (reply == NULL) {
+ fprintf(stderr, "\nI/O error\n");
+ return REDIS_ERR;
+ }
+
+ int result = REDIS_OK;
+ if (reply->type == REDIS_REPLY_ERROR) {
+ result = REDIS_ERR;
+ fprintf(stderr,"SELECT %d failed: %s\n",config.conn_info.input_dbnum,reply->str);
+ } else {
+ config.dbnum = config.conn_info.input_dbnum;
+ cliRefreshPrompt();
+ }
+ freeReplyObject(reply);
+ return result;
+}
+
+/* Select RESP3 mode if redis-cli was started with the -3 option. */
+static int cliSwitchProto(void) {
+ redisReply *reply;
+ if (!config.resp3 || config.resp2) return REDIS_OK;
+
+ reply = redisCommand(context,"HELLO 3");
+ if (reply == NULL) {
+ fprintf(stderr, "\nI/O error\n");
+ return REDIS_ERR;
+ }
+
+ int result = REDIS_OK;
+ if (reply->type == REDIS_REPLY_ERROR) {
+ fprintf(stderr,"HELLO 3 failed: %s\n",reply->str);
+ if (config.resp3 == 1) {
+ result = REDIS_ERR;
+ } else if (config.resp3 == 2) {
+ result = REDIS_OK;
+ }
+ }
+
+ /* Retrieve server version string for later use. */
+ for (size_t i = 0; i < reply->elements; i += 2) {
+ assert(reply->element[i]->type == REDIS_REPLY_STRING);
+ char *key = reply->element[i]->str;
+ if (!strcmp(key, "version")) {
+ assert(reply->element[i + 1]->type == REDIS_REPLY_STRING);
+ config.server_version = sdsnew(reply->element[i + 1]->str);
+ }
+ }
+ freeReplyObject(reply);
+ config.current_resp3 = 1;
+ return result;
+}
+
+/* Connect to the server. It is possible to pass certain flags to the function:
+ * CC_FORCE: The connection is performed even if there is already
+ * a connected socket.
+ * CC_QUIET: Don't print errors if connection fails. */
+static int cliConnect(int flags) {
+ if (context == NULL || flags & CC_FORCE) {
+ if (context != NULL) {
+ redisFree(context);
+ config.dbnum = 0;
+ config.in_multi = 0;
+ config.pubsub_mode = 0;
+ cliRefreshPrompt();
+ }
+
+ /* Do not use hostsocket when we got redirected in cluster mode */
+ if (config.hostsocket == NULL ||
+ (config.cluster_mode && config.cluster_reissue_command)) {
+ context = redisConnect(config.conn_info.hostip,config.conn_info.hostport);
+ } else {
+ context = redisConnectUnix(config.hostsocket);
+ }
+
+ if (!context->err && config.tls) {
+ const char *err = NULL;
+ if (cliSecureConnection(context, config.sslconfig, &err) == REDIS_ERR && err) {
+ fprintf(stderr, "Could not negotiate a TLS connection: %s\n", err);
+ redisFree(context);
+ context = NULL;
+ return REDIS_ERR;
+ }
+ }
+
+ if (context->err) {
+ if (!(flags & CC_QUIET)) {
+ fprintf(stderr,"Could not connect to Redis at ");
+ if (config.hostsocket == NULL ||
+ (config.cluster_mode && config.cluster_reissue_command))
+ {
+ fprintf(stderr, "%s:%d: %s\n",
+ config.conn_info.hostip,config.conn_info.hostport,context->errstr);
+ } else {
+ fprintf(stderr,"%s: %s\n",
+ config.hostsocket,context->errstr);
+ }
+ }
+ redisFree(context);
+ context = NULL;
+ return REDIS_ERR;
+ }
+
+
+ /* Set aggressive KEEP_ALIVE socket option in the Redis context socket
+ * in order to prevent timeouts caused by the execution of long
+ * commands. At the same time this improves the detection of real
+ * errors. */
+ anetKeepAlive(NULL, context->fd, REDIS_CLI_KEEPALIVE_INTERVAL);
+
+ /* State of the current connection. */
+ config.current_resp3 = 0;
+
+ /* Do AUTH, select the right DB, switch to RESP3 if needed. */
+ if (cliAuth(context, config.conn_info.user, config.conn_info.auth) != REDIS_OK)
+ return REDIS_ERR;
+ if (cliSelect() != REDIS_OK)
+ return REDIS_ERR;
+ if (cliSwitchProto() != REDIS_OK)
+ return REDIS_ERR;
+ }
+
+ /* Set a PUSH handler if configured to do so. */
+ if (config.push_output) {
+ redisSetPushCallback(context, cliPushHandler);
+ }
+
+ return REDIS_OK;
+}
+
+/* In cluster, if server replies ASK, we will redirect to a different node.
+ * Before sending the real command, we need to send ASKING command first. */
+static int cliSendAsking(void) {
+ redisReply *reply;
+
+ config.cluster_send_asking = 0;
+ if (context == NULL) {
+ return REDIS_ERR;
+ }
+ reply = redisCommand(context,"ASKING");
+ if (reply == NULL) {
+ fprintf(stderr, "\nI/O error\n");
+ return REDIS_ERR;
+ }
+ int result = REDIS_OK;
+ if (reply->type == REDIS_REPLY_ERROR) {
+ result = REDIS_ERR;
+ fprintf(stderr,"ASKING failed: %s\n",reply->str);
+ }
+ freeReplyObject(reply);
+ return result;
+}
+
+static void cliPrintContextError(void) {
+ if (context == NULL) return;
+ fprintf(stderr,"Error: %s\n",context->errstr);
+}
+
+static int isInvalidateReply(redisReply *reply) {
+ return reply->type == REDIS_REPLY_PUSH && reply->elements == 2 &&
+ reply->element[0]->type == REDIS_REPLY_STRING &&
+ !strncmp(reply->element[0]->str, "invalidate", 10) &&
+ reply->element[1]->type == REDIS_REPLY_ARRAY;
+}
+
+/* Special display handler for RESP3 'invalidate' messages.
+ * This function does not validate the reply, so it should
+ * already be confirmed correct */
+static sds cliFormatInvalidateTTY(redisReply *r) {
+ sds out = sdsnew("-> invalidate: ");
+
+ for (size_t i = 0; i < r->element[1]->elements; i++) {
+ redisReply *key = r->element[1]->element[i];
+ assert(key->type == REDIS_REPLY_STRING);
+
+ out = sdscatfmt(out, "'%s'", key->str, key->len);
+ if (i < r->element[1]->elements - 1)
+ out = sdscatlen(out, ", ", 2);
+ }
+
+ return sdscatlen(out, "\n", 1);
+}
+
+/* Returns non-zero if cliFormatReplyTTY renders the reply in multiple lines. */
+static int cliIsMultilineValueTTY(redisReply *r) {
+ switch (r->type) {
+ case REDIS_REPLY_ARRAY:
+ case REDIS_REPLY_SET:
+ case REDIS_REPLY_PUSH:
+ if (r->elements == 0) return 0;
+ if (r->elements > 1) return 1;
+ return cliIsMultilineValueTTY(r->element[0]);
+ case REDIS_REPLY_MAP:
+ if (r->elements == 0) return 0;
+ if (r->elements > 2) return 1;
+ return cliIsMultilineValueTTY(r->element[1]);
+ default:
+ return 0;
+ }
+}
+
+static sds cliFormatReplyTTY(redisReply *r, char *prefix) {
+ sds out = sdsempty();
+ switch (r->type) {
+ case REDIS_REPLY_ERROR:
+ out = sdscatprintf(out,"(error) %s\n", r->str);
+ break;
+ case REDIS_REPLY_STATUS:
+ out = sdscat(out,r->str);
+ out = sdscat(out,"\n");
+ break;
+ case REDIS_REPLY_INTEGER:
+ out = sdscatprintf(out,"(integer) %lld\n",r->integer);
+ break;
+ case REDIS_REPLY_DOUBLE:
+ out = sdscatprintf(out,"(double) %s\n",r->str);
+ break;
+ case REDIS_REPLY_STRING:
+ case REDIS_REPLY_VERB:
+ /* If you are producing output for the standard output we want
+ * a more interesting output with quoted characters and so forth,
+ * unless it's a verbatim string type. */
+ if (r->type == REDIS_REPLY_STRING) {
+ out = sdscatrepr(out,r->str,r->len);
+ out = sdscat(out,"\n");
+ } else {
+ out = sdscatlen(out,r->str,r->len);
+ out = sdscat(out,"\n");
+ }
+ break;
+ case REDIS_REPLY_NIL:
+ out = sdscat(out,"(nil)\n");
+ break;
+ case REDIS_REPLY_BOOL:
+ out = sdscat(out,r->integer ? "(true)\n" : "(false)\n");
+ break;
+ case REDIS_REPLY_ARRAY:
+ case REDIS_REPLY_MAP:
+ case REDIS_REPLY_SET:
+ case REDIS_REPLY_PUSH:
+ if (r->elements == 0) {
+ if (r->type == REDIS_REPLY_ARRAY)
+ out = sdscat(out,"(empty array)\n");
+ else if (r->type == REDIS_REPLY_MAP)
+ out = sdscat(out,"(empty hash)\n");
+ else if (r->type == REDIS_REPLY_SET)
+ out = sdscat(out,"(empty set)\n");
+ else if (r->type == REDIS_REPLY_PUSH)
+ out = sdscat(out,"(empty push)\n");
+ else
+ out = sdscat(out,"(empty aggregate type)\n");
+ } else {
+ unsigned int i, idxlen = 0;
+ char _prefixlen[16];
+ char _prefixfmt[16];
+ sds _prefix;
+ sds tmp;
+
+ /* Calculate chars needed to represent the largest index */
+ i = r->elements;
+ if (r->type == REDIS_REPLY_MAP) i /= 2;
+ do {
+ idxlen++;
+ i /= 10;
+ } while(i);
+
+ /* Prefix for nested multi bulks should grow with idxlen+2 spaces */
+ memset(_prefixlen,' ',idxlen+2);
+ _prefixlen[idxlen+2] = '\0';
+ _prefix = sdscat(sdsnew(prefix),_prefixlen);
+
+ /* Setup prefix format for every entry */
+ char numsep;
+ if (r->type == REDIS_REPLY_SET) numsep = '~';
+ else if (r->type == REDIS_REPLY_MAP) numsep = '#';
+ /* TODO: this would be a breaking change for scripts, do that in a major version. */
+ /* else if (r->type == REDIS_REPLY_PUSH) numsep = '>'; */
+ else numsep = ')';
+ snprintf(_prefixfmt,sizeof(_prefixfmt),"%%s%%%ud%c ",idxlen,numsep);
+
+ for (i = 0; i < r->elements; i++) {
+ unsigned int human_idx = (r->type == REDIS_REPLY_MAP) ?
+ i/2 : i;
+ human_idx++; /* Make it 1-based. */
+
+ /* Don't use the prefix for the first element, as the parent
+ * caller already prepended the index number. */
+ out = sdscatprintf(out,_prefixfmt,i == 0 ? "" : prefix,human_idx);
+
+ /* Format the multi bulk entry */
+ tmp = cliFormatReplyTTY(r->element[i],_prefix);
+ out = sdscatlen(out,tmp,sdslen(tmp));
+ sdsfree(tmp);
+
+ /* For maps, format the value as well. */
+ if (r->type == REDIS_REPLY_MAP) {
+ i++;
+ sdsrange(out,0,-2);
+ out = sdscat(out," => ");
+ if (cliIsMultilineValueTTY(r->element[i])) {
+ /* linebreak before multiline value to fix alignment */
+ out = sdscat(out, "\n");
+ out = sdscat(out, _prefix);
+ }
+ tmp = cliFormatReplyTTY(r->element[i],_prefix);
+ out = sdscatlen(out,tmp,sdslen(tmp));
+ sdsfree(tmp);
+ }
+ }
+ sdsfree(_prefix);
+ }
+ break;
+ default:
+ fprintf(stderr,"Unknown reply type: %d\n", r->type);
+ exit(1);
+ }
+ return out;
+}
+
+/* Returns 1 if the reply is a pubsub pushed reply. */
+int isPubsubPush(redisReply *r) {
+ if (r == NULL ||
+ r->type != (config.current_resp3 ? REDIS_REPLY_PUSH : REDIS_REPLY_ARRAY) ||
+ r->elements < 3 ||
+ r->element[0]->type != REDIS_REPLY_STRING)
+ {
+ return 0;
+ }
+ char *str = r->element[0]->str;
+ size_t len = r->element[0]->len;
+ /* Check if it is [p|s][un]subscribe or [p|s]message, but even simpler, we
+ * just check that it ends with "message" or "subscribe". */
+ return ((len >= strlen("message") &&
+ !strcmp(str + len - strlen("message"), "message")) ||
+ (len >= strlen("subscribe") &&
+ !strcmp(str + len - strlen("subscribe"), "subscribe")));
+}
+
+int isColorTerm(void) {
+ char *t = getenv("TERM");
+ return t != NULL && strstr(t,"xterm") != NULL;
+}
+
+/* Helper function for sdsCatColorizedLdbReply() appending colorize strings
+ * to an SDS string. */
+sds sdscatcolor(sds o, char *s, size_t len, char *color) {
+ if (!isColorTerm()) return sdscatlen(o,s,len);
+
+ int bold = strstr(color,"bold") != NULL;
+ int ccode = 37; /* Defaults to white. */
+ if (strstr(color,"red")) ccode = 31;
+ else if (strstr(color,"green")) ccode = 32;
+ else if (strstr(color,"yellow")) ccode = 33;
+ else if (strstr(color,"blue")) ccode = 34;
+ else if (strstr(color,"magenta")) ccode = 35;
+ else if (strstr(color,"cyan")) ccode = 36;
+ else if (strstr(color,"white")) ccode = 37;
+
+ o = sdscatfmt(o,"\033[%i;%i;49m",bold,ccode);
+ o = sdscatlen(o,s,len);
+ o = sdscat(o,"\033[0m");
+ return o;
+}
+
+/* Colorize Lua debugger status replies according to the prefix they
+ * have. */
+sds sdsCatColorizedLdbReply(sds o, char *s, size_t len) {
+ char *color = "white";
+
+ if (strstr(s,"<debug>")) color = "bold";
+ if (strstr(s,"<redis>")) color = "green";
+ if (strstr(s,"<reply>")) color = "cyan";
+ if (strstr(s,"<error>")) color = "red";
+ if (strstr(s,"<hint>")) color = "bold";
+ if (strstr(s,"<value>") || strstr(s,"<retval>")) color = "magenta";
+ if (len > 4 && isdigit(s[3])) {
+ if (s[1] == '>') color = "yellow"; /* Current line. */
+ else if (s[2] == '#') color = "bold"; /* Break point. */
+ }
+ return sdscatcolor(o,s,len,color);
+}
+
+static sds cliFormatReplyRaw(redisReply *r) {
+ sds out = sdsempty(), tmp;
+ size_t i;
+
+ switch (r->type) {
+ case REDIS_REPLY_NIL:
+ /* Nothing... */
+ break;
+ case REDIS_REPLY_ERROR:
+ out = sdscatlen(out,r->str,r->len);
+ out = sdscatlen(out,"\n",1);
+ break;
+ case REDIS_REPLY_STATUS:
+ case REDIS_REPLY_STRING:
+ case REDIS_REPLY_VERB:
+ if (r->type == REDIS_REPLY_STATUS && config.eval_ldb) {
+ /* The Lua debugger replies with arrays of simple (status)
+ * strings. We colorize the output for more fun if this
+ * is a debugging session. */
+
+ /* Detect the end of a debugging session. */
+ if (strstr(r->str,"<endsession>") == r->str) {
+ config.enable_ldb_on_eval = 0;
+ config.eval_ldb = 0;
+ config.eval_ldb_end = 1; /* Signal the caller session ended. */
+ config.output = OUTPUT_STANDARD;
+ cliRefreshPrompt();
+ } else {
+ out = sdsCatColorizedLdbReply(out,r->str,r->len);
+ }
+ } else {
+ out = sdscatlen(out,r->str,r->len);
+ }
+ break;
+ case REDIS_REPLY_BOOL:
+ out = sdscat(out,r->integer ? "(true)" : "(false)");
+ break;
+ case REDIS_REPLY_INTEGER:
+ out = sdscatprintf(out,"%lld",r->integer);
+ break;
+ case REDIS_REPLY_DOUBLE:
+ out = sdscatprintf(out,"%s",r->str);
+ break;
+ case REDIS_REPLY_SET:
+ case REDIS_REPLY_ARRAY:
+ case REDIS_REPLY_PUSH:
+ for (i = 0; i < r->elements; i++) {
+ if (i > 0) out = sdscat(out,config.mb_delim);
+ tmp = cliFormatReplyRaw(r->element[i]);
+ out = sdscatlen(out,tmp,sdslen(tmp));
+ sdsfree(tmp);
+ }
+ break;
+ case REDIS_REPLY_MAP:
+ for (i = 0; i < r->elements; i += 2) {
+ if (i > 0) out = sdscat(out,config.mb_delim);
+ tmp = cliFormatReplyRaw(r->element[i]);
+ out = sdscatlen(out,tmp,sdslen(tmp));
+ sdsfree(tmp);
+
+ out = sdscatlen(out," ",1);
+ tmp = cliFormatReplyRaw(r->element[i+1]);
+ out = sdscatlen(out,tmp,sdslen(tmp));
+ sdsfree(tmp);
+ }
+ break;
+ default:
+ fprintf(stderr,"Unknown reply type: %d\n", r->type);
+ exit(1);
+ }
+ return out;
+}
+
+static sds cliFormatReplyCSV(redisReply *r) {
+ unsigned int i;
+
+ sds out = sdsempty();
+ switch (r->type) {
+ case REDIS_REPLY_ERROR:
+ out = sdscat(out,"ERROR,");
+ out = sdscatrepr(out,r->str,strlen(r->str));
+ break;
+ case REDIS_REPLY_STATUS:
+ out = sdscatrepr(out,r->str,r->len);
+ break;
+ case REDIS_REPLY_INTEGER:
+ out = sdscatprintf(out,"%lld",r->integer);
+ break;
+ case REDIS_REPLY_DOUBLE:
+ out = sdscatprintf(out,"%s",r->str);
+ break;
+ case REDIS_REPLY_STRING:
+ case REDIS_REPLY_VERB:
+ out = sdscatrepr(out,r->str,r->len);
+ break;
+ case REDIS_REPLY_NIL:
+ out = sdscat(out,"NULL");
+ break;
+ case REDIS_REPLY_BOOL:
+ out = sdscat(out,r->integer ? "true" : "false");
+ break;
+ case REDIS_REPLY_ARRAY:
+ case REDIS_REPLY_SET:
+ case REDIS_REPLY_PUSH:
+ case REDIS_REPLY_MAP: /* CSV has no map type, just output flat list. */
+ for (i = 0; i < r->elements; i++) {
+ sds tmp = cliFormatReplyCSV(r->element[i]);
+ out = sdscatlen(out,tmp,sdslen(tmp));
+ if (i != r->elements-1) out = sdscat(out,",");
+ sdsfree(tmp);
+ }
+ break;
+ default:
+ fprintf(stderr,"Unknown reply type: %d\n", r->type);
+ exit(1);
+ }
+ return out;
+}
+
+/* Append specified buffer to out and return it, using required JSON output
+ * mode. */
+static sds jsonStringOutput(sds out, const char *p, int len, int mode) {
+ if (mode == OUTPUT_JSON) {
+ return escapeJsonString(out, p, len);
+ } else if (mode == OUTPUT_QUOTED_JSON) {
+ /* Need to double-quote backslashes */
+ sds tmp = sdscatrepr(sdsempty(), p, len);
+ int tmplen = sdslen(tmp);
+ char *n = tmp;
+ while (tmplen--) {
+ if (*n == '\\') out = sdscatlen(out, "\\\\", 2);
+ else out = sdscatlen(out, n, 1);
+ n++;
+ }
+
+ sdsfree(tmp);
+ return out;
+ } else {
+ assert(0);
+ }
+}
+
+static sds cliFormatReplyJson(sds out, redisReply *r, int mode) {
+ unsigned int i;
+
+ switch (r->type) {
+ case REDIS_REPLY_ERROR:
+ out = sdscat(out,"error:");
+ out = jsonStringOutput(out,r->str,strlen(r->str),mode);
+ break;
+ case REDIS_REPLY_STATUS:
+ out = jsonStringOutput(out,r->str,r->len,mode);
+ break;
+ case REDIS_REPLY_INTEGER:
+ out = sdscatprintf(out,"%lld",r->integer);
+ break;
+ case REDIS_REPLY_DOUBLE:
+ out = sdscatprintf(out,"%s",r->str);
+ break;
+ case REDIS_REPLY_STRING:
+ case REDIS_REPLY_VERB:
+ out = jsonStringOutput(out,r->str,r->len,mode);
+ break;
+ case REDIS_REPLY_NIL:
+ out = sdscat(out,"null");
+ break;
+ case REDIS_REPLY_BOOL:
+ out = sdscat(out,r->integer ? "true" : "false");
+ break;
+ case REDIS_REPLY_ARRAY:
+ case REDIS_REPLY_SET:
+ case REDIS_REPLY_PUSH:
+ out = sdscat(out,"[");
+ for (i = 0; i < r->elements; i++ ) {
+ out = cliFormatReplyJson(out,r->element[i],mode);
+ if (i != r->elements-1) out = sdscat(out,",");
+ }
+ out = sdscat(out,"]");
+ break;
+ case REDIS_REPLY_MAP:
+ out = sdscat(out,"{");
+ for (i = 0; i < r->elements; i += 2) {
+ redisReply *key = r->element[i];
+ if (key->type == REDIS_REPLY_ERROR ||
+ key->type == REDIS_REPLY_STATUS ||
+ key->type == REDIS_REPLY_STRING ||
+ key->type == REDIS_REPLY_VERB)
+ {
+ out = cliFormatReplyJson(out,key,mode);
+ } else {
+ /* According to JSON spec, JSON map keys must be strings,
+ * and in RESP3, they can be other types.
+ * The first one(cliFormatReplyJson) is to convert non string type to string
+ * The Second one(escapeJsonString) is to escape the converted string */
+ sds keystr = cliFormatReplyJson(sdsempty(),key,mode);
+ if (keystr[0] == '"') out = sdscatsds(out,keystr);
+ else out = sdscatfmt(out,"\"%S\"",keystr);
+ sdsfree(keystr);
+ }
+ out = sdscat(out,":");
+
+ out = cliFormatReplyJson(out,r->element[i+1],mode);
+ if (i != r->elements-2) out = sdscat(out,",");
+ }
+ out = sdscat(out,"}");
+ break;
+ default:
+ fprintf(stderr,"Unknown reply type: %d\n", r->type);
+ exit(1);
+ }
+ return out;
+}
+
+/* Generate reply strings in various output modes */
+static sds cliFormatReply(redisReply *reply, int mode, int verbatim) {
+ sds out;
+
+ if (verbatim) {
+ out = cliFormatReplyRaw(reply);
+ } else if (mode == OUTPUT_STANDARD) {
+ out = cliFormatReplyTTY(reply, "");
+ } else if (mode == OUTPUT_RAW) {
+ out = cliFormatReplyRaw(reply);
+ out = sdscatsds(out, config.cmd_delim);
+ } else if (mode == OUTPUT_CSV) {
+ out = cliFormatReplyCSV(reply);
+ out = sdscatlen(out, "\n", 1);
+ } else if (mode == OUTPUT_JSON || mode == OUTPUT_QUOTED_JSON) {
+ out = cliFormatReplyJson(sdsempty(), reply, mode);
+ out = sdscatlen(out, "\n", 1);
+ } else {
+ fprintf(stderr, "Error: Unknown output encoding %d\n", mode);
+ exit(1);
+ }
+
+ return out;
+}
+
+/* Output any spontaneous PUSH reply we receive */
+static void cliPushHandler(void *privdata, void *reply) {
+ UNUSED(privdata);
+ sds out;
+
+ if (config.output == OUTPUT_STANDARD && isInvalidateReply(reply)) {
+ out = cliFormatInvalidateTTY(reply);
+ } else {
+ out = cliFormatReply(reply, config.output, 0);
+ }
+
+ fwrite(out, sdslen(out), 1, stdout);
+
+ freeReplyObject(reply);
+ sdsfree(out);
+}
+
+static int cliReadReply(int output_raw_strings) {
+ void *_reply;
+ redisReply *reply;
+ sds out = NULL;
+ int output = 1;
+
+ if (config.last_reply) {
+ freeReplyObject(config.last_reply);
+ config.last_reply = NULL;
+ }
+
+ if (redisGetReply(context,&_reply) != REDIS_OK) {
+ if (config.blocking_state_aborted) {
+ config.blocking_state_aborted = 0;
+ config.monitor_mode = 0;
+ config.pubsub_mode = 0;
+ return cliConnect(CC_FORCE);
+ }
+
+ if (config.shutdown) {
+ redisFree(context);
+ context = NULL;
+ return REDIS_OK;
+ }
+ if (config.interactive) {
+ /* Filter cases where we should reconnect */
+ if (context->err == REDIS_ERR_IO &&
+ (errno == ECONNRESET || errno == EPIPE))
+ return REDIS_ERR;
+ if (context->err == REDIS_ERR_EOF)
+ return REDIS_ERR;
+ }
+ cliPrintContextError();
+ exit(1);
+ return REDIS_ERR; /* avoid compiler warning */
+ }
+
+ config.last_reply = reply = (redisReply*)_reply;
+
+ config.last_cmd_type = reply->type;
+
+ /* Check if we need to connect to a different node and reissue the
+ * request. */
+ if (config.cluster_mode && reply->type == REDIS_REPLY_ERROR &&
+ (!strncmp(reply->str,"MOVED ",6) || !strncmp(reply->str,"ASK ",4)))
+ {
+ char *p = reply->str, *s;
+ int slot;
+
+ output = 0;
+ /* Comments show the position of the pointer as:
+ *
+ * [S] for pointer 's'
+ * [P] for pointer 'p'
+ */
+ s = strchr(p,' '); /* MOVED[S]3999 127.0.0.1:6381 */
+ p = strchr(s+1,' '); /* MOVED[S]3999[P]127.0.0.1:6381 */
+ *p = '\0';
+ slot = atoi(s+1);
+ s = strrchr(p+1,':'); /* MOVED 3999[P]127.0.0.1[S]6381 */
+ *s = '\0';
+ if (p+1 != s) {
+ /* Host might be empty, like 'MOVED 3999 :6381', if endpoint type is unknown. Only update the
+ * host if it's non-empty. */
+ sdsfree(config.conn_info.hostip);
+ config.conn_info.hostip = sdsnew(p+1);
+ }
+ config.conn_info.hostport = atoi(s+1);
+ if (config.interactive)
+ printf("-> Redirected to slot [%d] located at %s:%d\n",
+ slot, config.conn_info.hostip, config.conn_info.hostport);
+ config.cluster_reissue_command = 1;
+ if (!strncmp(reply->str,"ASK ",4)) {
+ config.cluster_send_asking = 1;
+ }
+ cliRefreshPrompt();
+ } else if (!config.interactive && config.set_errcode &&
+ reply->type == REDIS_REPLY_ERROR)
+ {
+ fprintf(stderr,"%s\n",reply->str);
+ exit(1);
+ return REDIS_ERR; /* avoid compiler warning */
+ }
+
+ if (output) {
+ out = cliFormatReply(reply, config.output, output_raw_strings);
+ fwrite(out,sdslen(out),1,stdout);
+ fflush(stdout);
+ sdsfree(out);
+ }
+ return REDIS_OK;
+}
+
+/* Simultaneously wait for pubsub messages from redis and input on stdin. */
+static void cliWaitForMessagesOrStdin(void) {
+ int show_info = config.output != OUTPUT_RAW && (isatty(STDOUT_FILENO) ||
+ getenv("FAKETTY"));
+ int use_color = show_info && isColorTerm();
+ cliPressAnyKeyTTY();
+ while (config.pubsub_mode) {
+ /* First check if there are any buffered replies. */
+ redisReply *reply;
+ do {
+ if (redisGetReplyFromReader(context, (void **)&reply) != REDIS_OK) {
+ cliPrintContextError();
+ exit(1);
+ }
+ if (reply) {
+ sds out = cliFormatReply(reply, config.output, 0);
+ fwrite(out,sdslen(out),1,stdout);
+ fflush(stdout);
+ sdsfree(out);
+ }
+ } while(reply);
+
+ /* Wait for input, either on the Redis socket or on stdin. */
+ struct timeval tv;
+ fd_set readfds;
+ FD_ZERO(&readfds);
+ FD_SET(context->fd, &readfds);
+ FD_SET(STDIN_FILENO, &readfds);
+ tv.tv_sec = 5;
+ tv.tv_usec = 0;
+ if (show_info) {
+ if (use_color) printf("\033[1;90m"); /* Bold, bright color. */
+ printf("Reading messages... (press Ctrl-C to quit or any key to type command)\r");
+ if (use_color) printf("\033[0m"); /* Reset color. */
+ fflush(stdout);
+ }
+ select(context->fd + 1, &readfds, NULL, NULL, &tv);
+ if (show_info) {
+ printf("\033[K"); /* Erase current line */
+ fflush(stdout);
+ }
+ if (config.blocking_state_aborted) {
+ /* Ctrl-C pressed */
+ config.blocking_state_aborted = 0;
+ config.pubsub_mode = 0;
+ if (cliConnect(CC_FORCE) != REDIS_OK) {
+ cliPrintContextError();
+ exit(1);
+ }
+ break;
+ } else if (FD_ISSET(context->fd, &readfds)) {
+ /* Message from Redis */
+ if (cliReadReply(0) != REDIS_OK) {
+ cliPrintContextError();
+ exit(1);
+ }
+ fflush(stdout);
+ } else if (FD_ISSET(STDIN_FILENO, &readfds)) {
+ /* Any key pressed */
+ break;
+ }
+ }
+ cliRestoreTTY();
+}
+
+static int cliSendCommand(int argc, char **argv, long repeat) {
+ char *command = argv[0];
+ size_t *argvlen;
+ int j, output_raw;
+
+ if (context == NULL) return REDIS_ERR;
+
+ output_raw = 0;
+ if (!strcasecmp(command,"info") ||
+ !strcasecmp(command,"lolwut") ||
+ (argc >= 2 && !strcasecmp(command,"debug") &&
+ !strcasecmp(argv[1],"htstats")) ||
+ (argc >= 2 && !strcasecmp(command,"debug") &&
+ !strcasecmp(argv[1],"htstats-key")) ||
+ (argc >= 2 && !strcasecmp(command,"debug") &&
+ !strcasecmp(argv[1],"client-eviction")) ||
+ (argc >= 2 && !strcasecmp(command,"memory") &&
+ (!strcasecmp(argv[1],"malloc-stats") ||
+ !strcasecmp(argv[1],"doctor"))) ||
+ (argc == 2 && !strcasecmp(command,"cluster") &&
+ (!strcasecmp(argv[1],"nodes") ||
+ !strcasecmp(argv[1],"info"))) ||
+ (argc >= 2 && !strcasecmp(command,"client") &&
+ (!strcasecmp(argv[1],"list") ||
+ !strcasecmp(argv[1],"info"))) ||
+ (argc == 3 && !strcasecmp(command,"latency") &&
+ !strcasecmp(argv[1],"graph")) ||
+ (argc == 2 && !strcasecmp(command,"latency") &&
+ !strcasecmp(argv[1],"doctor")) ||
+ /* Format PROXY INFO command for Redis Cluster Proxy:
+ * https://github.com/artix75/redis-cluster-proxy */
+ (argc >= 2 && !strcasecmp(command,"proxy") &&
+ !strcasecmp(argv[1],"info")))
+ {
+ output_raw = 1;
+ }
+
+ if (!strcasecmp(command,"shutdown")) config.shutdown = 1;
+ if (!strcasecmp(command,"monitor")) config.monitor_mode = 1;
+ int is_subscribe = (!strcasecmp(command, "subscribe") ||
+ !strcasecmp(command, "psubscribe") ||
+ !strcasecmp(command, "ssubscribe"));
+ int is_unsubscribe = (!strcasecmp(command, "unsubscribe") ||
+ !strcasecmp(command, "punsubscribe") ||
+ !strcasecmp(command, "sunsubscribe"));
+ if (!strcasecmp(command,"sync") ||
+ !strcasecmp(command,"psync")) config.slave_mode = 1;
+
+ /* When the user manually calls SCRIPT DEBUG, setup the activation of
+ * debugging mode on the next eval if needed. */
+ if (argc == 3 && !strcasecmp(argv[0],"script") &&
+ !strcasecmp(argv[1],"debug"))
+ {
+ if (!strcasecmp(argv[2],"yes") || !strcasecmp(argv[2],"sync")) {
+ config.enable_ldb_on_eval = 1;
+ } else {
+ config.enable_ldb_on_eval = 0;
+ }
+ }
+
+ /* Actually activate LDB on EVAL if needed. */
+ if (!strcasecmp(command,"eval") && config.enable_ldb_on_eval) {
+ config.eval_ldb = 1;
+ config.output = OUTPUT_RAW;
+ }
+
+ /* Setup argument length */
+ argvlen = zmalloc(argc*sizeof(size_t));
+ for (j = 0; j < argc; j++)
+ argvlen[j] = sdslen(argv[j]);
+
+ /* Negative repeat is allowed and causes infinite loop,
+ works well with the interval option. */
+ while(repeat < 0 || repeat-- > 0) {
+ redisAppendCommandArgv(context,argc,(const char**)argv,argvlen);
+
+ if (config.monitor_mode) {
+ do {
+ if (cliReadReply(output_raw) != REDIS_OK) {
+ cliPrintContextError();
+ exit(1);
+ }
+ fflush(stdout);
+
+ /* This happens when the MONITOR command returns an error. */
+ if (config.last_cmd_type == REDIS_REPLY_ERROR)
+ config.monitor_mode = 0;
+ } while(config.monitor_mode);
+ zfree(argvlen);
+ return REDIS_OK;
+ }
+
+ int num_expected_pubsub_push = 0;
+ if (is_subscribe || is_unsubscribe) {
+ /* When a push callback is set, redisGetReply (hiredis) loops until
+ * an in-band message is received, but these commands are confirmed
+ * using push replies only. There is one push reply per channel if
+ * channels are specified, otherwise at least one. */
+ num_expected_pubsub_push = argc > 1 ? argc - 1 : 1;
+ /* Unset our default PUSH handler so this works in RESP2/RESP3 */
+ redisSetPushCallback(context, NULL);
+ }
+
+ if (config.slave_mode) {
+ printf("Entering replica output mode... (press Ctrl-C to quit)\n");
+ slaveMode(0);
+ config.slave_mode = 0;
+ zfree(argvlen);
+ return REDIS_ERR; /* Error = slaveMode lost connection to master */
+ }
+
+ /* Read response, possibly skipping pubsub/push messages. */
+ while (1) {
+ if (cliReadReply(output_raw) != REDIS_OK) {
+ zfree(argvlen);
+ return REDIS_ERR;
+ }
+ fflush(stdout);
+ if (config.pubsub_mode || num_expected_pubsub_push > 0) {
+ if (isPubsubPush(config.last_reply)) {
+ if (num_expected_pubsub_push > 0 &&
+ !strcasecmp(config.last_reply->element[0]->str, command))
+ {
+ /* This pushed message confirms the
+ * [p|s][un]subscribe command. */
+ if (is_subscribe && !config.pubsub_mode) {
+ config.pubsub_mode = 1;
+ cliRefreshPrompt();
+ }
+ if (--num_expected_pubsub_push > 0) {
+ continue; /* We need more of these. */
+ }
+ } else {
+ continue; /* Skip this pubsub message. */
+ }
+ } else if (config.last_reply->type == REDIS_REPLY_PUSH) {
+ continue; /* Skip other push message. */
+ }
+ }
+
+ /* Store database number when SELECT was successfully executed. */
+ if (!strcasecmp(command,"select") && argc == 2 &&
+ config.last_cmd_type != REDIS_REPLY_ERROR)
+ {
+ config.conn_info.input_dbnum = config.dbnum = atoi(argv[1]);
+ cliRefreshPrompt();
+ } else if (!strcasecmp(command,"auth") && (argc == 2 || argc == 3)) {
+ cliSelect();
+ } else if (!strcasecmp(command,"multi") && argc == 1 &&
+ config.last_cmd_type != REDIS_REPLY_ERROR)
+ {
+ config.in_multi = 1;
+ config.pre_multi_dbnum = config.dbnum;
+ cliRefreshPrompt();
+ } else if (!strcasecmp(command,"exec") && argc == 1 && config.in_multi) {
+ config.in_multi = 0;
+ if (config.last_cmd_type == REDIS_REPLY_ERROR ||
+ config.last_cmd_type == REDIS_REPLY_NIL)
+ {
+ config.conn_info.input_dbnum = config.dbnum = config.pre_multi_dbnum;
+ }
+ cliRefreshPrompt();
+ } else if (!strcasecmp(command,"discard") && argc == 1 &&
+ config.last_cmd_type != REDIS_REPLY_ERROR)
+ {
+ config.in_multi = 0;
+ config.conn_info.input_dbnum = config.dbnum = config.pre_multi_dbnum;
+ cliRefreshPrompt();
+ } else if (!strcasecmp(command,"reset") && argc == 1 &&
+ config.last_cmd_type != REDIS_REPLY_ERROR) {
+ config.in_multi = 0;
+ config.dbnum = 0;
+ config.conn_info.input_dbnum = 0;
+ config.current_resp3 = 0;
+ if (config.pubsub_mode && config.push_output) {
+ redisSetPushCallback(context, cliPushHandler);
+ }
+ config.pubsub_mode = 0;
+ cliRefreshPrompt();
+ } else if (!strcasecmp(command,"hello")) {
+ if (config.last_cmd_type == REDIS_REPLY_MAP) {
+ config.current_resp3 = 1;
+ } else if (config.last_cmd_type == REDIS_REPLY_ARRAY) {
+ config.current_resp3 = 0;
+ }
+ } else if ((is_subscribe || is_unsubscribe) && !config.pubsub_mode) {
+ /* We didn't enter pubsub mode. Restore push callback. */
+ if (config.push_output)
+ redisSetPushCallback(context, cliPushHandler);
+ }
+
+ break;
+ }
+ if (config.cluster_reissue_command){
+ /* If we need to reissue the command, break to prevent a
+ further 'repeat' number of dud interactions */
+ break;
+ }
+ if (config.interval) usleep(config.interval);
+ fflush(stdout); /* Make it grep friendly */
+ }
+
+ zfree(argvlen);
+ return REDIS_OK;
+}
+
+/* Send a command reconnecting the link if needed. */
+static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, ...) {
+ redisReply *reply = NULL;
+ int tries = 0;
+ va_list ap;
+
+ assert(!c->err);
+ while(reply == NULL) {
+ while (c->err & (REDIS_ERR_IO | REDIS_ERR_EOF)) {
+ printf("\r\x1b[0K"); /* Cursor to left edge + clear line. */
+ printf("Reconnecting... %d\r", ++tries);
+ fflush(stdout);
+
+ redisFree(c);
+ c = redisConnect(config.conn_info.hostip,config.conn_info.hostport);
+ if (!c->err && config.tls) {
+ const char *err = NULL;
+ if (cliSecureConnection(c, config.sslconfig, &err) == REDIS_ERR && err) {
+ fprintf(stderr, "TLS Error: %s\n", err);
+ exit(1);
+ }
+ }
+ usleep(1000000);
+ }
+
+ va_start(ap,fmt);
+ reply = redisvCommand(c,fmt,ap);
+ va_end(ap);
+
+ if (c->err && !(c->err & (REDIS_ERR_IO | REDIS_ERR_EOF))) {
+ fprintf(stderr, "Error: %s\n", c->errstr);
+ exit(1);
+ } else if (tries > 0) {
+ printf("\r\x1b[0K"); /* Cursor to left edge + clear line. */
+ }
+ }
+
+ context = c;
+ return reply;
+}
+
+/*------------------------------------------------------------------------------
+ * User interface
+ *--------------------------------------------------------------------------- */
+
+static int parseOptions(int argc, char **argv) {
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ int lastarg = i==argc-1;
+
+ if (!strcmp(argv[i],"-h") && !lastarg) {
+ sdsfree(config.conn_info.hostip);
+ config.conn_info.hostip = sdsnew(argv[++i]);
+ } else if (!strcmp(argv[i],"-h") && lastarg) {
+ usage(0);
+ } else if (!strcmp(argv[i],"--help")) {
+ usage(0);
+ } else if (!strcmp(argv[i],"-x")) {
+ config.stdin_lastarg = 1;
+ } else if (!strcmp(argv[i], "-X") && !lastarg) {
+ config.stdin_tag_arg = 1;
+ config.stdin_tag_name = argv[++i];
+ } else if (!strcmp(argv[i],"-p") && !lastarg) {
+ config.conn_info.hostport = atoi(argv[++i]);
+ if (config.conn_info.hostport < 0 || config.conn_info.hostport > 65535) {
+ fprintf(stderr, "Invalid server port.\n");
+ exit(1);
+ }
+ } else if (!strcmp(argv[i],"-s") && !lastarg) {
+ config.hostsocket = argv[++i];
+ } else if (!strcmp(argv[i],"-r") && !lastarg) {
+ config.repeat = strtoll(argv[++i],NULL,10);
+ } else if (!strcmp(argv[i],"-i") && !lastarg) {
+ double seconds = atof(argv[++i]);
+ config.interval = seconds*1000000;
+ } else if (!strcmp(argv[i],"-n") && !lastarg) {
+ config.conn_info.input_dbnum = atoi(argv[++i]);
+ } else if (!strcmp(argv[i], "--no-auth-warning")) {
+ config.no_auth_warning = 1;
+ } else if (!strcmp(argv[i], "--askpass")) {
+ config.askpass = 1;
+ } else if ((!strcmp(argv[i],"-a") || !strcmp(argv[i],"--pass"))
+ && !lastarg)
+ {
+ config.conn_info.auth = sdsnew(argv[++i]);
+ } else if (!strcmp(argv[i],"--user") && !lastarg) {
+ config.conn_info.user = sdsnew(argv[++i]);
+ } else if (!strcmp(argv[i],"-u") && !lastarg) {
+ parseRedisUri(argv[++i],"redis-cli",&config.conn_info,&config.tls);
+ if (config.conn_info.hostport < 0 || config.conn_info.hostport > 65535) {
+ fprintf(stderr, "Invalid server port.\n");
+ exit(1);
+ }
+ } else if (!strcmp(argv[i],"--raw")) {
+ config.output = OUTPUT_RAW;
+ } else if (!strcmp(argv[i],"--no-raw")) {
+ config.output = OUTPUT_STANDARD;
+ } else if (!strcmp(argv[i],"--quoted-input")) {
+ config.quoted_input = 1;
+ } else if (!strcmp(argv[i],"--csv")) {
+ config.output = OUTPUT_CSV;
+ } else if (!strcmp(argv[i],"--json")) {
+ /* Not overwrite explicit value by -3 */
+ if (config.resp3 == 0) {
+ config.resp3 = 2;
+ }
+ config.output = OUTPUT_JSON;
+ } else if (!strcmp(argv[i],"--quoted-json")) {
+ /* Not overwrite explicit value by -3*/
+ if (config.resp3 == 0) {
+ config.resp3 = 2;
+ }
+ config.output = OUTPUT_QUOTED_JSON;
+ } else if (!strcmp(argv[i],"--latency")) {
+ config.latency_mode = 1;
+ } else if (!strcmp(argv[i],"--latency-dist")) {
+ config.latency_dist_mode = 1;
+ } else if (!strcmp(argv[i],"--mono")) {
+ spectrum_palette = spectrum_palette_mono;
+ spectrum_palette_size = spectrum_palette_mono_size;
+ } else if (!strcmp(argv[i],"--latency-history")) {
+ config.latency_mode = 1;
+ config.latency_history = 1;
+ } else if (!strcmp(argv[i],"--lru-test") && !lastarg) {
+ config.lru_test_mode = 1;
+ config.lru_test_sample_size = strtoll(argv[++i],NULL,10);
+ } else if (!strcmp(argv[i],"--slave")) {
+ config.slave_mode = 1;
+ } else if (!strcmp(argv[i],"--replica")) {
+ config.slave_mode = 1;
+ } else if (!strcmp(argv[i],"--stat")) {
+ config.stat_mode = 1;
+ } else if (!strcmp(argv[i],"--scan")) {
+ config.scan_mode = 1;
+ } else if (!strcmp(argv[i],"--pattern") && !lastarg) {
+ sdsfree(config.pattern);
+ config.pattern = sdsnew(argv[++i]);
+ } else if (!strcmp(argv[i],"--count") && !lastarg) {
+ config.count = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--quoted-pattern") && !lastarg) {
+ sdsfree(config.pattern);
+ config.pattern = unquoteCString(argv[++i]);
+ if (!config.pattern) {
+ fprintf(stderr,"Invalid quoted string specified for --quoted-pattern.\n");
+ exit(1);
+ }
+ } else if (!strcmp(argv[i],"--intrinsic-latency") && !lastarg) {
+ config.intrinsic_latency_mode = 1;
+ config.intrinsic_latency_duration = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--rdb") && !lastarg) {
+ config.getrdb_mode = 1;
+ config.rdb_filename = argv[++i];
+ } else if (!strcmp(argv[i],"--functions-rdb") && !lastarg) {
+ config.get_functions_rdb_mode = 1;
+ config.rdb_filename = argv[++i];
+ } else if (!strcmp(argv[i],"--pipe")) {
+ config.pipe_mode = 1;
+ } else if (!strcmp(argv[i],"--pipe-timeout") && !lastarg) {
+ config.pipe_timeout = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--bigkeys")) {
+ config.bigkeys = 1;
+ } else if (!strcmp(argv[i],"--memkeys")) {
+ config.memkeys = 1;
+ config.memkeys_samples = 0; /* use redis default */
+ } else if (!strcmp(argv[i],"--memkeys-samples") && !lastarg) {
+ config.memkeys = 1;
+ config.memkeys_samples = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--hotkeys")) {
+ config.hotkeys = 1;
+ } else if (!strcmp(argv[i],"--eval") && !lastarg) {
+ config.eval = argv[++i];
+ } else if (!strcmp(argv[i],"--ldb")) {
+ config.eval_ldb = 1;
+ config.output = OUTPUT_RAW;
+ } else if (!strcmp(argv[i],"--ldb-sync-mode")) {
+ config.eval_ldb = 1;
+ config.eval_ldb_sync = 1;
+ config.output = OUTPUT_RAW;
+ } else if (!strcmp(argv[i],"-c")) {
+ config.cluster_mode = 1;
+ } else if (!strcmp(argv[i],"-d") && !lastarg) {
+ sdsfree(config.mb_delim);
+ config.mb_delim = sdsnew(argv[++i]);
+ } else if (!strcmp(argv[i],"-D") && !lastarg) {
+ sdsfree(config.cmd_delim);
+ config.cmd_delim = sdsnew(argv[++i]);
+ } else if (!strcmp(argv[i],"-e")) {
+ config.set_errcode = 1;
+ } else if (!strcmp(argv[i],"--verbose")) {
+ config.verbose = 1;
+ } else if (!strcmp(argv[i],"--cluster") && !lastarg) {
+ if (CLUSTER_MANAGER_MODE()) usage(1);
+ char *cmd = argv[++i];
+ int j = i;
+ while (j < argc && argv[j][0] != '-') j++;
+ if (j > i) j--;
+ int err = createClusterManagerCommand(cmd, j - i, argv + i + 1);
+ if (err) exit(err);
+ i = j;
+ } else if (!strcmp(argv[i],"--cluster") && lastarg) {
+ usage(1);
+ } else if ((!strcmp(argv[i],"--cluster-only-masters"))) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_MASTERS_ONLY;
+ } else if ((!strcmp(argv[i],"--cluster-only-replicas"))) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_SLAVES_ONLY;
+ } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) {
+ config.cluster_manager_command.replicas = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--cluster-master-id") && !lastarg) {
+ config.cluster_manager_command.master_id = argv[++i];
+ } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) {
+ config.cluster_manager_command.from = argv[++i];
+ } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) {
+ config.cluster_manager_command.to = argv[++i];
+ } else if (!strcmp(argv[i],"--cluster-from-user") && !lastarg) {
+ config.cluster_manager_command.from_user = argv[++i];
+ } else if (!strcmp(argv[i],"--cluster-from-pass") && !lastarg) {
+ config.cluster_manager_command.from_pass = argv[++i];
+ } else if (!strcmp(argv[i], "--cluster-from-askpass")) {
+ config.cluster_manager_command.from_askpass = 1;
+ } else if (!strcmp(argv[i],"--cluster-weight") && !lastarg) {
+ if (config.cluster_manager_command.weight != NULL) {
+ fprintf(stderr, "WARNING: you cannot use --cluster-weight "
+ "more than once.\n"
+ "You can set more weights by adding them "
+ "as a space-separated list, ie:\n"
+ "--cluster-weight n1=w n2=w\n");
+ exit(1);
+ }
+ int widx = i + 1;
+ char **weight = argv + widx;
+ int wargc = 0;
+ for (; widx < argc; widx++) {
+ if (strstr(argv[widx], "--") == argv[widx]) break;
+ if (strchr(argv[widx], '=') == NULL) break;
+ wargc++;
+ }
+ if (wargc > 0) {
+ config.cluster_manager_command.weight = weight;
+ config.cluster_manager_command.weight_argc = wargc;
+ i += wargc;
+ }
+ } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) {
+ config.cluster_manager_command.slots = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) {
+ config.cluster_manager_command.timeout = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) {
+ config.cluster_manager_command.pipeline = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--cluster-threshold") && !lastarg) {
+ config.cluster_manager_command.threshold = atof(argv[++i]);
+ } else if (!strcmp(argv[i],"--cluster-yes")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_YES;
+ } else if (!strcmp(argv[i],"--cluster-simulate")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_SIMULATE;
+ } else if (!strcmp(argv[i],"--cluster-replace")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_REPLACE;
+ } else if (!strcmp(argv[i],"--cluster-copy")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_COPY;
+ } else if (!strcmp(argv[i],"--cluster-slave")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_SLAVE;
+ } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER;
+ } else if (!strcmp(argv[i],"--cluster-search-multiple-owners")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_CHECK_OWNERS;
+ } else if (!strcmp(argv[i],"--cluster-fix-with-unreachable-masters")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS;
+ } else if (!strcmp(argv[i],"--test_hint") && !lastarg) {
+ config.test_hint = argv[++i];
+ } else if (!strcmp(argv[i],"--test_hint_file") && !lastarg) {
+ config.test_hint_file = argv[++i];
+#ifdef USE_OPENSSL
+ } else if (!strcmp(argv[i],"--tls")) {
+ config.tls = 1;
+ } else if (!strcmp(argv[i],"--sni") && !lastarg) {
+ config.sslconfig.sni = argv[++i];
+ } else if (!strcmp(argv[i],"--cacertdir") && !lastarg) {
+ config.sslconfig.cacertdir = argv[++i];
+ } else if (!strcmp(argv[i],"--cacert") && !lastarg) {
+ config.sslconfig.cacert = argv[++i];
+ } else if (!strcmp(argv[i],"--cert") && !lastarg) {
+ config.sslconfig.cert = argv[++i];
+ } else if (!strcmp(argv[i],"--key") && !lastarg) {
+ config.sslconfig.key = argv[++i];
+ } else if (!strcmp(argv[i],"--tls-ciphers") && !lastarg) {
+ config.sslconfig.ciphers = argv[++i];
+ } else if (!strcmp(argv[i],"--insecure")) {
+ config.sslconfig.skip_cert_verify = 1;
+ #ifdef TLS1_3_VERSION
+ } else if (!strcmp(argv[i],"--tls-ciphersuites") && !lastarg) {
+ config.sslconfig.ciphersuites = argv[++i];
+ #endif
+#endif
+ } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) {
+ sds version = cliVersion();
+ printf("redis-cli %s\n", version);
+ sdsfree(version);
+ exit(0);
+ } else if (!strcmp(argv[i],"-2")) {
+ config.resp2 = 1;
+ } else if (!strcmp(argv[i],"-3")) {
+ config.resp3 = 1;
+ } else if (!strcmp(argv[i],"--show-pushes") && !lastarg) {
+ char *argval = argv[++i];
+ if (!strncasecmp(argval, "n", 1)) {
+ config.push_output = 0;
+ } else if (!strncasecmp(argval, "y", 1)) {
+ config.push_output = 1;
+ } else {
+ fprintf(stderr, "Unknown --show-pushes value '%s' "
+ "(valid: '[y]es', '[n]o')\n", argval);
+ }
+ } else if (CLUSTER_MANAGER_MODE() && argv[i][0] != '-') {
+ if (config.cluster_manager_command.argc == 0) {
+ int j = i + 1;
+ while (j < argc && argv[j][0] != '-') j++;
+ int cmd_argc = j - i;
+ config.cluster_manager_command.argc = cmd_argc;
+ config.cluster_manager_command.argv = argv + i;
+ if (cmd_argc > 1) i = j - 1;
+ }
+ } else {
+ if (argv[i][0] == '-') {
+ fprintf(stderr,
+ "Unrecognized option or bad number of args for: '%s'\n",
+ argv[i]);
+ exit(1);
+ } else {
+ /* Likely the command name, stop here. */
+ break;
+ }
+ }
+ }
+
+ if (config.hostsocket && config.cluster_mode) {
+ fprintf(stderr,"Options -c and -s are mutually exclusive.\n");
+ exit(1);
+ }
+
+ if (config.resp2 && config.resp3 == 1) {
+ fprintf(stderr,"Options -2 and -3 are mutually exclusive.\n");
+ exit(1);
+ }
+
+ /* --ldb requires --eval. */
+ if (config.eval_ldb && config.eval == NULL) {
+ fprintf(stderr,"Options --ldb and --ldb-sync-mode require --eval.\n");
+ fprintf(stderr,"Try %s --help for more information.\n", argv[0]);
+ exit(1);
+ }
+
+ if (!config.no_auth_warning && config.conn_info.auth != NULL) {
+ fputs("Warning: Using a password with '-a' or '-u' option on the command"
+ " line interface may not be safe.\n", stderr);
+ }
+
+ if (config.get_functions_rdb_mode && config.getrdb_mode) {
+ fprintf(stderr,"Option --functions-rdb and --rdb are mutually exclusive.\n");
+ exit(1);
+ }
+
+ if (config.stdin_lastarg && config.stdin_tag_arg) {
+ fprintf(stderr, "Options -x and -X are mutually exclusive.\n");
+ exit(1);
+ }
+
+ return i;
+}
+
+static void parseEnv(void) {
+ /* Set auth from env, but do not overwrite CLI arguments if passed */
+ char *auth = getenv(REDIS_CLI_AUTH_ENV);
+ if (auth != NULL && config.conn_info.auth == NULL) {
+ config.conn_info.auth = auth;
+ }
+
+ char *cluster_yes = getenv(REDIS_CLI_CLUSTER_YES_ENV);
+ if (cluster_yes != NULL && !strcmp(cluster_yes, "1")) {
+ config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_YES;
+ }
+}
+
+static void usage(int err) {
+ sds version = cliVersion();
+ FILE *target = err ? stderr: stdout;
+ const char *tls_usage =
+#ifdef USE_OPENSSL
+" --tls Establish a secure TLS connection.\n"
+" --sni <host> Server name indication for TLS.\n"
+" --cacert <file> CA Certificate file to verify with.\n"
+" --cacertdir <dir> Directory where trusted CA certificates are stored.\n"
+" If neither cacert nor cacertdir are specified, the default\n"
+" system-wide trusted root certs configuration will apply.\n"
+" --insecure Allow insecure TLS connection by skipping cert validation.\n"
+" --cert <file> Client certificate to authenticate with.\n"
+" --key <file> Private key file to authenticate with.\n"
+" --tls-ciphers <list> Sets the list of preferred ciphers (TLSv1.2 and below)\n"
+" in order of preference from highest to lowest separated by colon (\":\").\n"
+" See the ciphers(1ssl) manpage for more information about the syntax of this string.\n"
+#ifdef TLS1_3_VERSION
+" --tls-ciphersuites <list> Sets the list of preferred ciphersuites (TLSv1.3)\n"
+" in order of preference from highest to lowest separated by colon (\":\").\n"
+" See the ciphers(1ssl) manpage for more information about the syntax of this string,\n"
+" and specifically for TLSv1.3 ciphersuites.\n"
+#endif
+#endif
+"";
+
+ fprintf(target,
+"redis-cli %s\n"
+"\n"
+"Usage: redis-cli [OPTIONS] [cmd [arg [arg ...]]]\n"
+" -h <hostname> Server hostname (default: 127.0.0.1).\n"
+" -p <port> Server port (default: 6379).\n"
+" -s <socket> Server socket (overrides hostname and port).\n"
+" -a <password> Password to use when connecting to the server.\n"
+" You can also use the " REDIS_CLI_AUTH_ENV " environment\n"
+" variable to pass this password more safely\n"
+" (if both are used, this argument takes precedence).\n"
+" --user <username> Used to send ACL style 'AUTH username pass'. Needs -a.\n"
+" --pass <password> Alias of -a for consistency with the new --user option.\n"
+" --askpass Force user to input password with mask from STDIN.\n"
+" If this argument is used, '-a' and " REDIS_CLI_AUTH_ENV "\n"
+" environment variable will be ignored.\n"
+" -u <uri> Server URI.\n"
+" -r <repeat> Execute specified command N times.\n"
+" -i <interval> When -r is used, waits <interval> seconds per command.\n"
+" It is possible to specify sub-second times like -i 0.1.\n"
+" This interval is also used in --scan and --stat per cycle.\n"
+" and in --bigkeys, --memkeys, and --hotkeys per 100 cycles.\n"
+" -n <db> Database number.\n"
+" -2 Start session in RESP2 protocol mode.\n"
+" -3 Start session in RESP3 protocol mode.\n"
+" -x Read last argument from STDIN (see example below).\n"
+" -X Read <tag> argument from STDIN (see example below).\n"
+" -d <delimiter> Delimiter between response bulks for raw formatting (default: \\n).\n"
+" -D <delimiter> Delimiter between responses for raw formatting (default: \\n).\n"
+" -c Enable cluster mode (follow -ASK and -MOVED redirections).\n"
+" -e Return exit error code when command execution fails.\n"
+"%s"
+" --raw Use raw formatting for replies (default when STDOUT is\n"
+" not a tty).\n"
+" --no-raw Force formatted output even when STDOUT is not a tty.\n"
+" --quoted-input Force input to be handled as quoted strings.\n"
+" --csv Output in CSV format.\n"
+" --json Output in JSON format (default RESP3, use -2 if you want to use with RESP2).\n"
+" --quoted-json Same as --json, but produce ASCII-safe quoted strings, not Unicode.\n"
+" --show-pushes <yn> Whether to print RESP3 PUSH messages. Enabled by default when\n"
+" STDOUT is a tty but can be overridden with --show-pushes no.\n"
+" --stat Print rolling stats about server: mem, clients, ...\n",
+version,tls_usage);
+
+ fprintf(target,
+" --latency Enter a special mode continuously sampling latency.\n"
+" If you use this mode in an interactive session it runs\n"
+" forever displaying real-time stats. Otherwise if --raw or\n"
+" --csv is specified, or if you redirect the output to a non\n"
+" TTY, it samples the latency for 1 second (you can use\n"
+" -i to change the interval), then produces a single output\n"
+" and exits.\n"
+" --latency-history Like --latency but tracking latency changes over time.\n"
+" Default time interval is 15 sec. Change it using -i.\n"
+" --latency-dist Shows latency as a spectrum, requires xterm 256 colors.\n"
+" Default time interval is 1 sec. Change it using -i.\n"
+" --lru-test <keys> Simulate a cache workload with an 80-20 distribution.\n"
+" --replica Simulate a replica showing commands received from the master.\n"
+" --rdb <filename> Transfer an RDB dump from remote server to local file.\n"
+" Use filename of \"-\" to write to stdout.\n"
+" --functions-rdb <filename> Like --rdb but only get the functions (not the keys)\n"
+" when getting the RDB dump file.\n"
+" --pipe Transfer raw Redis protocol from stdin to server.\n"
+" --pipe-timeout <n> In --pipe mode, abort with error if after sending all data.\n"
+" no reply is received within <n> seconds.\n"
+" Default timeout: %d. Use 0 to wait forever.\n",
+ REDIS_CLI_DEFAULT_PIPE_TIMEOUT);
+ fprintf(target,
+" --bigkeys Sample Redis keys looking for keys with many elements (complexity).\n"
+" --memkeys Sample Redis keys looking for keys consuming a lot of memory.\n"
+" --memkeys-samples <n> Sample Redis keys looking for keys consuming a lot of memory.\n"
+" And define number of key elements to sample\n"
+" --hotkeys Sample Redis keys looking for hot keys.\n"
+" only works when maxmemory-policy is *lfu.\n"
+" --scan List all keys using the SCAN command.\n"
+" --pattern <pat> Keys pattern when using the --scan, --bigkeys or --hotkeys\n"
+" options (default: *).\n"
+" --count <count> Count option when using the --scan, --bigkeys or --hotkeys (default: 10).\n"
+" --quoted-pattern <pat> Same as --pattern, but the specified string can be\n"
+" quoted, in order to pass an otherwise non binary-safe string.\n"
+" --intrinsic-latency <sec> Run a test to measure intrinsic system latency.\n"
+" The test will run for the specified amount of seconds.\n"
+" --eval <file> Send an EVAL command using the Lua script at <file>.\n"
+" --ldb Used with --eval enable the Redis Lua debugger.\n"
+" --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n"
+" this mode the server is blocked and script changes are\n"
+" not rolled back from the server memory.\n"
+" --cluster <command> [args...] [opts...]\n"
+" Cluster Manager command and arguments (see below).\n"
+" --verbose Verbose mode.\n"
+" --no-auth-warning Don't show warning message when using password on command\n"
+" line interface.\n"
+" --help Output this help and exit.\n"
+" --version Output version and exit.\n"
+"\n");
+ /* Using another fprintf call to avoid -Woverlength-strings compile warning */
+ fprintf(target,
+"Cluster Manager Commands:\n"
+" Use --cluster help to list all available cluster manager commands.\n"
+"\n"
+"Examples:\n"
+" cat /etc/passwd | redis-cli -x set mypasswd\n"
+" redis-cli -D \"\" --raw dump key > key.dump && redis-cli -X dump_tag restore key2 0 dump_tag replace < key.dump\n"
+" redis-cli -r 100 lpush mylist x\n"
+" redis-cli -r 100 -i 1 info | grep used_memory_human:\n"
+" redis-cli --quoted-input set '\"null-\\x00-separated\"' value\n"
+" redis-cli --eval myscript.lua key1 key2 , arg1 arg2 arg3\n"
+" redis-cli --scan --pattern '*:12345*'\n"
+" redis-cli --scan --pattern '*:12345*' --count 100\n"
+"\n"
+" (Note: when using --eval the comma separates KEYS[] from ARGV[] items)\n"
+"\n"
+"When no command is given, redis-cli starts in interactive mode.\n"
+"Type \"help\" in interactive mode for information on available commands\n"
+"and settings.\n"
+"\n");
+ sdsfree(version);
+ exit(err);
+}
+
+static int confirmWithYes(char *msg, int ignore_force) {
+ /* if --cluster-yes option is set and ignore_force is false,
+ * do not prompt for an answer */
+ if (!ignore_force &&
+ (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_YES)) {
+ return 1;
+ }
+
+ printf("%s (type 'yes' to accept): ", msg);
+ fflush(stdout);
+ char buf[4];
+ int nread = read(fileno(stdin),buf,4);
+ buf[3] = '\0';
+ return (nread != 0 && !strcmp("yes", buf));
+}
+
+static int issueCommandRepeat(int argc, char **argv, long repeat) {
+ /* In Lua debugging mode, we want to pass the "help" to Redis to get
+ * it's own HELP message, rather than handle it by the CLI, see ldbRepl.
+ *
+ * For the normal Redis HELP, we can process it without a connection. */
+ if (!config.eval_ldb &&
+ (!strcasecmp(argv[0],"help") || !strcasecmp(argv[0],"?")))
+ {
+ cliOutputHelp(--argc, ++argv);
+ return REDIS_OK;
+ }
+
+ while (1) {
+ if (config.cluster_reissue_command || context == NULL ||
+ context->err == REDIS_ERR_IO || context->err == REDIS_ERR_EOF)
+ {
+ if (cliConnect(CC_FORCE) != REDIS_OK) {
+ cliPrintContextError();
+ config.cluster_reissue_command = 0;
+ return REDIS_ERR;
+ }
+ }
+ config.cluster_reissue_command = 0;
+ if (config.cluster_send_asking) {
+ if (cliSendAsking() != REDIS_OK) {
+ cliPrintContextError();
+ return REDIS_ERR;
+ }
+ }
+ if (cliSendCommand(argc,argv,repeat) != REDIS_OK) {
+ cliPrintContextError();
+ redisFree(context);
+ context = NULL;
+ return REDIS_ERR;
+ }
+
+ /* Issue the command again if we got redirected in cluster mode */
+ if (config.cluster_mode && config.cluster_reissue_command) {
+ continue;
+ }
+ break;
+ }
+ return REDIS_OK;
+}
+
+static int issueCommand(int argc, char **argv) {
+ return issueCommandRepeat(argc, argv, config.repeat);
+}
+
+/* Split the user provided command into multiple SDS arguments.
+ * This function normally uses sdssplitargs() from sds.c which is able
+ * to understand "quoted strings", escapes and so forth. However when
+ * we are in Lua debugging mode and the "eval" command is used, we want
+ * the remaining Lua script (after "e " or "eval ") to be passed verbatim
+ * as a single big argument. */
+static sds *cliSplitArgs(char *line, int *argc) {
+ if (config.eval_ldb && (strstr(line,"eval ") == line ||
+ strstr(line,"e ") == line))
+ {
+ sds *argv = sds_malloc(sizeof(sds)*2);
+ *argc = 2;
+ int len = strlen(line);
+ int elen = line[1] == ' ' ? 2 : 5; /* "e " or "eval "? */
+ argv[0] = sdsnewlen(line,elen-1);
+ argv[1] = sdsnewlen(line+elen,len-elen);
+ return argv;
+ } else {
+ return sdssplitargs(line,argc);
+ }
+}
+
+/* Set the CLI preferences. This function is invoked when an interactive
+ * ":command" is called, or when reading ~/.redisclirc file, in order to
+ * set user preferences. */
+void cliSetPreferences(char **argv, int argc, int interactive) {
+ if (!strcasecmp(argv[0],":set") && argc >= 2) {
+ if (!strcasecmp(argv[1],"hints")) pref.hints = 1;
+ else if (!strcasecmp(argv[1],"nohints")) pref.hints = 0;
+ else {
+ printf("%sunknown redis-cli preference '%s'\n",
+ interactive ? "" : ".redisclirc: ",
+ argv[1]);
+ }
+ } else {
+ printf("%sunknown redis-cli internal command '%s'\n",
+ interactive ? "" : ".redisclirc: ",
+ argv[0]);
+ }
+}
+
+/* Load the ~/.redisclirc file if any. */
+void cliLoadPreferences(void) {
+ sds rcfile = getDotfilePath(REDIS_CLI_RCFILE_ENV,REDIS_CLI_RCFILE_DEFAULT);
+ if (rcfile == NULL) return;
+ FILE *fp = fopen(rcfile,"r");
+ char buf[1024];
+
+ if (fp) {
+ while(fgets(buf,sizeof(buf),fp) != NULL) {
+ sds *argv;
+ int argc;
+
+ argv = sdssplitargs(buf,&argc);
+ if (argc > 0) cliSetPreferences(argv,argc,0);
+ sdsfreesplitres(argv,argc);
+ }
+ fclose(fp);
+ }
+ sdsfree(rcfile);
+}
+
+/* Some commands can include sensitive information and shouldn't be put in the
+ * history file. Currently these commands are include:
+ * - AUTH
+ * - ACL SETUSER
+ * - CONFIG SET masterauth/masteruser/requirepass
+ * - HELLO with [AUTH username password]
+ * - MIGRATE with [AUTH password] or [AUTH2 username password] */
+static int isSensitiveCommand(int argc, char **argv) {
+ if (!strcasecmp(argv[0],"auth")) {
+ return 1;
+ } else if (argc > 1 &&
+ !strcasecmp(argv[0],"acl") &&
+ !strcasecmp(argv[1],"setuser"))
+ {
+ return 1;
+ } else if (argc > 2 &&
+ !strcasecmp(argv[0],"config") &&
+ !strcasecmp(argv[1],"set")) {
+ for (int j = 2; j < argc; j = j+2) {
+ if (!strcasecmp(argv[j],"masterauth") ||
+ !strcasecmp(argv[j],"masteruser") ||
+ !strcasecmp(argv[j],"requirepass")) {
+ return 1;
+ }
+ }
+ return 0;
+ /* HELLO [protover [AUTH username password] [SETNAME clientname]] */
+ } else if (argc > 4 && !strcasecmp(argv[0],"hello")) {
+ for (int j = 2; j < argc; j++) {
+ int moreargs = argc - 1 - j;
+ if (!strcasecmp(argv[j],"AUTH") && moreargs >= 2) {
+ return 1;
+ } else if (!strcasecmp(argv[j],"SETNAME") && moreargs) {
+ j++;
+ } else {
+ return 0;
+ }
+ }
+ /* MIGRATE host port key|"" destination-db timeout [COPY] [REPLACE]
+ * [AUTH password] [AUTH2 username password] [KEYS key [key ...]] */
+ } else if (argc > 7 && !strcasecmp(argv[0], "migrate")) {
+ for (int j = 6; j < argc; j++) {
+ int moreargs = argc - 1 - j;
+ if (!strcasecmp(argv[j],"auth") && moreargs) {
+ return 1;
+ } else if (!strcasecmp(argv[j],"auth2") && moreargs >= 2) {
+ return 1;
+ } else if (!strcasecmp(argv[j],"keys") && moreargs) {
+ return 0;
+ }
+ }
+ }
+ return 0;
+}
+
+static void repl(void) {
+ sds historyfile = NULL;
+ int history = 0;
+ char *line;
+ int argc;
+ sds *argv;
+
+ /* There is no need to initialize redis HELP when we are in lua debugger mode.
+ * It has its own HELP and commands (COMMAND or COMMAND DOCS will fail and got nothing).
+ * We will initialize the redis HELP after the Lua debugging session ended.*/
+ if ((!config.eval_ldb) && isatty(fileno(stdin))) {
+ /* Initialize the help using the results of the COMMAND command. */
+ cliInitHelp();
+ }
+
+ config.interactive = 1;
+ linenoiseSetMultiLine(1);
+ linenoiseSetCompletionCallback(completionCallback);
+ linenoiseSetHintsCallback(hintsCallback);
+ linenoiseSetFreeHintsCallback(freeHintsCallback);
+
+ /* Only use history and load the rc file when stdin is a tty. */
+ if (isatty(fileno(stdin))) {
+ historyfile = getDotfilePath(REDIS_CLI_HISTFILE_ENV,REDIS_CLI_HISTFILE_DEFAULT);
+ //keep in-memory history always regardless if history file can be determined
+ history = 1;
+ if (historyfile != NULL) {
+ linenoiseHistoryLoad(historyfile);
+ }
+ cliLoadPreferences();
+ }
+
+ cliRefreshPrompt();
+ while(1) {
+ line = linenoise(context ? config.prompt : "not connected> ");
+ if (line == NULL) {
+ /* ^C, ^D or similar. */
+ if (config.pubsub_mode) {
+ config.pubsub_mode = 0;
+ if (cliConnect(CC_FORCE) == REDIS_OK)
+ continue;
+ }
+ break;
+ } else if (line[0] != '\0') {
+ long repeat = 1;
+ int skipargs = 0;
+ char *endptr = NULL;
+
+ argv = cliSplitArgs(line,&argc);
+ if (argv == NULL) {
+ printf("Invalid argument(s)\n");
+ fflush(stdout);
+ if (history) linenoiseHistoryAdd(line);
+ if (historyfile) linenoiseHistorySave(historyfile);
+ linenoiseFree(line);
+ continue;
+ } else if (argc == 0) {
+ sdsfreesplitres(argv,argc);
+ linenoiseFree(line);
+ continue;
+ }
+
+ /* check if we have a repeat command option and
+ * need to skip the first arg */
+ errno = 0;
+ repeat = strtol(argv[0], &endptr, 10);
+ if (argc > 1 && *endptr == '\0') {
+ if (errno == ERANGE || errno == EINVAL || repeat <= 0) {
+ fputs("Invalid redis-cli repeat command option value.\n", stdout);
+ sdsfreesplitres(argv, argc);
+ linenoiseFree(line);
+ continue;
+ }
+ skipargs = 1;
+ } else {
+ repeat = 1;
+ }
+
+ if (!isSensitiveCommand(argc - skipargs, argv + skipargs)) {
+ if (history) linenoiseHistoryAdd(line);
+ if (historyfile) linenoiseHistorySave(historyfile);
+ }
+
+ if (strcasecmp(argv[0],"quit") == 0 ||
+ strcasecmp(argv[0],"exit") == 0)
+ {
+ exit(0);
+ } else if (argv[0][0] == ':') {
+ cliSetPreferences(argv,argc,1);
+ sdsfreesplitres(argv,argc);
+ linenoiseFree(line);
+ continue;
+ } else if (strcasecmp(argv[0],"restart") == 0) {
+ if (config.eval) {
+ config.eval_ldb = 1;
+ config.output = OUTPUT_RAW;
+ sdsfreesplitres(argv,argc);
+ linenoiseFree(line);
+ return; /* Return to evalMode to restart the session. */
+ } else {
+ printf("Use 'restart' only in Lua debugging mode.\n");
+ fflush(stdout);
+ }
+ } else if (argc == 3 && !strcasecmp(argv[0],"connect")) {
+ sdsfree(config.conn_info.hostip);
+ config.conn_info.hostip = sdsnew(argv[1]);
+ config.conn_info.hostport = atoi(argv[2]);
+ cliRefreshPrompt();
+ cliConnect(CC_FORCE);
+ } else if (argc == 1 && !strcasecmp(argv[0],"clear")) {
+ linenoiseClearScreen();
+ } else {
+ long long start_time = mstime(), elapsed;
+
+ issueCommandRepeat(argc-skipargs, argv+skipargs, repeat);
+
+ /* If our debugging session ended, show the EVAL final
+ * reply. */
+ if (config.eval_ldb_end) {
+ config.eval_ldb_end = 0;
+ cliReadReply(0);
+ printf("\n(Lua debugging session ended%s)\n\n",
+ config.eval_ldb_sync ? "" :
+ " -- dataset changes rolled back");
+ cliInitHelp();
+ }
+
+ elapsed = mstime()-start_time;
+ if (elapsed >= 500 &&
+ config.output == OUTPUT_STANDARD)
+ {
+ printf("(%.2fs)\n",(double)elapsed/1000);
+ }
+ }
+ /* Free the argument vector */
+ sdsfreesplitres(argv,argc);
+ }
+
+ if (config.pubsub_mode) {
+ cliWaitForMessagesOrStdin();
+ }
+
+ /* linenoise() returns malloc-ed lines like readline() */
+ linenoiseFree(line);
+ }
+ exit(0);
+}
+
+static int noninteractive(int argc, char **argv) {
+ int retval = 0;
+ sds *sds_args = getSdsArrayFromArgv(argc, argv, config.quoted_input);
+
+ if (!sds_args) {
+ printf("Invalid quoted string\n");
+ return 1;
+ }
+
+ if (config.stdin_lastarg) {
+ sds_args = sds_realloc(sds_args, (argc + 1) * sizeof(sds));
+ sds_args[argc] = readArgFromStdin();
+ argc++;
+ } else if (config.stdin_tag_arg) {
+ int i = 0, tag_match = 0;
+
+ for (; i < argc; i++) {
+ if (strcmp(config.stdin_tag_name, sds_args[i]) != 0) continue;
+
+ tag_match = 1;
+ sdsfree(sds_args[i]);
+ sds_args[i] = readArgFromStdin();
+ break;
+ }
+
+ if (!tag_match) {
+ sdsfreesplitres(sds_args, argc);
+ fprintf(stderr, "Using -X option but stdin tag not match.\n");
+ return 1;
+ }
+ }
+
+ retval = issueCommand(argc, sds_args);
+ sdsfreesplitres(sds_args, argc);
+ while (config.pubsub_mode) {
+ if (cliReadReply(0) != REDIS_OK) {
+ cliPrintContextError();
+ exit(1);
+ }
+ fflush(stdout);
+ }
+ return retval == REDIS_OK ? 0 : 1;
+}
+
+/*------------------------------------------------------------------------------
+ * Eval mode
+ *--------------------------------------------------------------------------- */
+
+static int evalMode(int argc, char **argv) {
+ sds script = NULL;
+ FILE *fp;
+ char buf[1024];
+ size_t nread;
+ char **argv2;
+ int j, got_comma, keys;
+ int retval = REDIS_OK;
+
+ while(1) {
+ if (config.eval_ldb) {
+ printf(
+ "Lua debugging session started, please use:\n"
+ "quit -- End the session.\n"
+ "restart -- Restart the script in debug mode again.\n"
+ "help -- Show Lua script debugging commands.\n\n"
+ );
+ }
+
+ sdsfree(script);
+ script = sdsempty();
+ got_comma = 0;
+ keys = 0;
+
+ /* Load the script from the file, as an sds string. */
+ fp = fopen(config.eval,"r");
+ if (!fp) {
+ fprintf(stderr,
+ "Can't open file '%s': %s\n", config.eval, strerror(errno));
+ exit(1);
+ }
+ while((nread = fread(buf,1,sizeof(buf),fp)) != 0) {
+ script = sdscatlen(script,buf,nread);
+ }
+ fclose(fp);
+
+ /* If we are debugging a script, enable the Lua debugger. */
+ if (config.eval_ldb) {
+ redisReply *reply = redisCommand(context,
+ config.eval_ldb_sync ?
+ "SCRIPT DEBUG sync": "SCRIPT DEBUG yes");
+ if (reply) freeReplyObject(reply);
+ }
+
+ /* Create our argument vector */
+ argv2 = zmalloc(sizeof(sds)*(argc+3));
+ argv2[0] = sdsnew("EVAL");
+ argv2[1] = script;
+ for (j = 0; j < argc; j++) {
+ if (!got_comma && argv[j][0] == ',' && argv[j][1] == 0) {
+ got_comma = 1;
+ continue;
+ }
+ argv2[j+3-got_comma] = sdsnew(argv[j]);
+ if (!got_comma) keys++;
+ }
+ argv2[2] = sdscatprintf(sdsempty(),"%d",keys);
+
+ /* Call it */
+ int eval_ldb = config.eval_ldb; /* Save it, may be reverted. */
+ retval = issueCommand(argc+3-got_comma, argv2);
+ if (eval_ldb) {
+ if (!config.eval_ldb) {
+ /* If the debugging session ended immediately, there was an
+ * error compiling the script. Show it and they don't enter
+ * the REPL at all. */
+ printf("Eval debugging session can't start:\n");
+ cliReadReply(0);
+ break; /* Return to the caller. */
+ } else {
+ strncpy(config.prompt,"lua debugger> ",sizeof(config.prompt));
+ repl();
+ /* Restart the session if repl() returned. */
+ cliConnect(CC_FORCE);
+ printf("\n");
+ }
+ } else {
+ break; /* Return to the caller. */
+ }
+ }
+ return retval == REDIS_OK ? 0 : 1;
+}
+
+/*------------------------------------------------------------------------------
+ * Cluster Manager
+ *--------------------------------------------------------------------------- */
+
+/* The Cluster Manager global structure */
+static struct clusterManager {
+ list *nodes; /* List of nodes in the configuration. */
+ list *errors;
+ int unreachable_masters; /* Masters we are not able to reach. */
+} cluster_manager;
+
+/* Used by clusterManagerFixSlotsCoverage */
+dict *clusterManagerUncoveredSlots = NULL;
+
+typedef struct clusterManagerNode {
+ redisContext *context;
+ sds name;
+ char *ip;
+ int port;
+ int bus_port; /* cluster-port */
+ uint64_t current_epoch;
+ time_t ping_sent;
+ time_t ping_recv;
+ int flags;
+ list *flags_str; /* Flags string representations */
+ sds replicate; /* Master ID if node is a slave */
+ int dirty; /* Node has changes that can be flushed */
+ uint8_t slots[CLUSTER_MANAGER_SLOTS];
+ int slots_count;
+ int replicas_count;
+ list *friends;
+ sds *migrating; /* An array of sds where even strings are slots and odd
+ * strings are the destination node IDs. */
+ sds *importing; /* An array of sds where even strings are slots and odd
+ * strings are the source node IDs. */
+ int migrating_count; /* Length of the migrating array (migrating slots*2) */
+ int importing_count; /* Length of the importing array (importing slots*2) */
+ float weight; /* Weight used by rebalance */
+ int balance; /* Used by rebalance */
+} clusterManagerNode;
+
+/* Data structure used to represent a sequence of cluster nodes. */
+typedef struct clusterManagerNodeArray {
+ clusterManagerNode **nodes; /* Actual nodes array */
+ clusterManagerNode **alloc; /* Pointer to the allocated memory */
+ int len; /* Actual length of the array */
+ int count; /* Non-NULL nodes count */
+} clusterManagerNodeArray;
+
+/* Used for the reshard table. */
+typedef struct clusterManagerReshardTableItem {
+ clusterManagerNode *source;
+ int slot;
+} clusterManagerReshardTableItem;
+
+/* Info about a cluster internal link. */
+
+typedef struct clusterManagerLink {
+ sds node_name;
+ sds node_addr;
+ int connected;
+ int handshaking;
+} clusterManagerLink;
+
+static dictType clusterManagerDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ dictSdsDestructor, /* val destructor */
+ NULL /* allow to expand */
+};
+
+static dictType clusterManagerLinkDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictListDestructor, /* val destructor */
+ NULL /* allow to expand */
+};
+
+typedef int clusterManagerCommandProc(int argc, char **argv);
+typedef int (*clusterManagerOnReplyError)(redisReply *reply,
+ clusterManagerNode *n, int bulk_idx);
+
+/* Cluster Manager helper functions */
+
+static clusterManagerNode *clusterManagerNewNode(char *ip, int port, int bus_port);
+static clusterManagerNode *clusterManagerNodeByName(const char *name);
+static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n);
+static void clusterManagerNodeResetSlots(clusterManagerNode *node);
+static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err);
+static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node,
+ char *err);
+static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts,
+ char **err);
+static int clusterManagerLoadInfoFromNode(clusterManagerNode *node);
+static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err);
+static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes,
+ int ip_count, clusterManagerNode ***offending, int *offending_len);
+static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes,
+ int ip_count);
+static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent);
+static void clusterManagerShowNodes(void);
+static void clusterManagerShowClusterInfo(void);
+static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err);
+static void clusterManagerWaitForClusterJoin(void);
+static int clusterManagerCheckCluster(int quiet);
+static void clusterManagerLog(int level, const char* fmt, ...);
+static int clusterManagerIsConfigConsistent(void);
+static dict *clusterManagerGetLinkStatus(void);
+static void clusterManagerOnError(sds err);
+static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array,
+ int len);
+static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array);
+static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array,
+ clusterManagerNode **nodeptr);
+static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array,
+ clusterManagerNode *node);
+
+/* Cluster Manager commands. */
+
+static int clusterManagerCommandCreate(int argc, char **argv);
+static int clusterManagerCommandAddNode(int argc, char **argv);
+static int clusterManagerCommandDeleteNode(int argc, char **argv);
+static int clusterManagerCommandInfo(int argc, char **argv);
+static int clusterManagerCommandCheck(int argc, char **argv);
+static int clusterManagerCommandFix(int argc, char **argv);
+static int clusterManagerCommandReshard(int argc, char **argv);
+static int clusterManagerCommandRebalance(int argc, char **argv);
+static int clusterManagerCommandSetTimeout(int argc, char **argv);
+static int clusterManagerCommandImport(int argc, char **argv);
+static int clusterManagerCommandCall(int argc, char **argv);
+static int clusterManagerCommandHelp(int argc, char **argv);
+static int clusterManagerCommandBackup(int argc, char **argv);
+
+typedef struct clusterManagerCommandDef {
+ char *name;
+ clusterManagerCommandProc *proc;
+ int arity;
+ char *args;
+ char *options;
+} clusterManagerCommandDef;
+
+clusterManagerCommandDef clusterManagerCommands[] = {
+ {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN",
+ "replicas <arg>"},
+ {"check", clusterManagerCommandCheck, -1, "<host:port> or <host> <port> - separated by either colon or space",
+ "search-multiple-owners"},
+ {"info", clusterManagerCommandInfo, -1, "<host:port> or <host> <port> - separated by either colon or space", NULL},
+ {"fix", clusterManagerCommandFix, -1, "<host:port> or <host> <port> - separated by either colon or space",
+ "search-multiple-owners,fix-with-unreachable-masters"},
+ {"reshard", clusterManagerCommandReshard, -1, "<host:port> or <host> <port> - separated by either colon or space",
+ "from <arg>,to <arg>,slots <arg>,yes,timeout <arg>,pipeline <arg>,"
+ "replace"},
+ {"rebalance", clusterManagerCommandRebalance, -1, "<host:port> or <host> <port> - separated by either colon or space",
+ "weight <node1=w1...nodeN=wN>,use-empty-masters,"
+ "timeout <arg>,simulate,pipeline <arg>,threshold <arg>,replace"},
+ {"add-node", clusterManagerCommandAddNode, 2,
+ "new_host:new_port existing_host:existing_port", "slave,master-id <arg>"},
+ {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL},
+ {"call", clusterManagerCommandCall, -2,
+ "host:port command arg arg .. arg", "only-masters,only-replicas"},
+ {"set-timeout", clusterManagerCommandSetTimeout, 2,
+ "host:port milliseconds", NULL},
+ {"import", clusterManagerCommandImport, 1, "host:port",
+ "from <arg>,from-user <arg>,from-pass <arg>,from-askpass,copy,replace"},
+ {"backup", clusterManagerCommandBackup, 2, "host:port backup_directory",
+ NULL},
+ {"help", clusterManagerCommandHelp, 0, NULL, NULL}
+};
+
+typedef struct clusterManagerOptionDef {
+ char *name;
+ char *desc;
+} clusterManagerOptionDef;
+
+clusterManagerOptionDef clusterManagerOptions[] = {
+ {"--cluster-yes", "Automatic yes to cluster commands prompts"}
+};
+
+static void getRDB(clusterManagerNode *node);
+
+static int createClusterManagerCommand(char *cmdname, int argc, char **argv) {
+ clusterManagerCommand *cmd = &config.cluster_manager_command;
+ cmd->name = cmdname;
+ cmd->argc = argc;
+ cmd->argv = argc ? argv : NULL;
+ if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR;
+
+ if (config.stdin_lastarg) {
+ char **new_argv = zmalloc(sizeof(char*) * (cmd->argc+1));
+ memcpy(new_argv, cmd->argv, sizeof(char*) * cmd->argc);
+
+ cmd->stdin_arg = readArgFromStdin();
+ new_argv[cmd->argc++] = cmd->stdin_arg;
+ cmd->argv = new_argv;
+ } else if (config.stdin_tag_arg) {
+ int i = 0, tag_match = 0;
+ cmd->stdin_arg = readArgFromStdin();
+
+ for (; i < argc; i++) {
+ if (strcmp(argv[i], config.stdin_tag_name) != 0) continue;
+
+ tag_match = 1;
+ cmd->argv[i] = (char *)cmd->stdin_arg;
+ break;
+ }
+
+ if (!tag_match) {
+ sdsfree(cmd->stdin_arg);
+ fprintf(stderr, "Using -X option but stdin tag not match.\n");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static clusterManagerCommandProc *validateClusterManagerCommand(void) {
+ int i, commands_count = sizeof(clusterManagerCommands) /
+ sizeof(clusterManagerCommandDef);
+ clusterManagerCommandProc *proc = NULL;
+ char *cmdname = config.cluster_manager_command.name;
+ int argc = config.cluster_manager_command.argc;
+ for (i = 0; i < commands_count; i++) {
+ clusterManagerCommandDef cmddef = clusterManagerCommands[i];
+ if (!strcmp(cmddef.name, cmdname)) {
+ if ((cmddef.arity > 0 && argc != cmddef.arity) ||
+ (cmddef.arity < 0 && argc < (cmddef.arity * -1))) {
+ fprintf(stderr, "[ERR] Wrong number of arguments for "
+ "specified --cluster sub command\n");
+ return NULL;
+ }
+ proc = cmddef.proc;
+ }
+ }
+ if (!proc) fprintf(stderr, "Unknown --cluster subcommand\n");
+ return proc;
+}
+
+static int parseClusterNodeAddress(char *addr, char **ip_ptr, int *port_ptr,
+ int *bus_port_ptr)
+{
+ /* ip:port[@bus_port] */
+ char *c = strrchr(addr, '@');
+ if (c != NULL) {
+ *c = '\0';
+ if (bus_port_ptr != NULL)
+ *bus_port_ptr = atoi(c + 1);
+ }
+ c = strrchr(addr, ':');
+ if (c != NULL) {
+ *c = '\0';
+ *ip_ptr = addr;
+ *port_ptr = atoi(++c);
+ } else return 0;
+ return 1;
+}
+
+/* Get host ip and port from command arguments. If only one argument has
+ * been provided it must be in the form of 'ip:port', elsewhere
+ * the first argument must be the ip and the second one the port.
+ * If host and port can be detected, it returns 1 and it stores host and
+ * port into variables referenced by 'ip_ptr' and 'port_ptr' pointers,
+ * elsewhere it returns 0. */
+static int getClusterHostFromCmdArgs(int argc, char **argv,
+ char **ip_ptr, int *port_ptr) {
+ int port = 0;
+ char *ip = NULL;
+ if (argc == 1) {
+ char *addr = argv[0];
+ if (!parseClusterNodeAddress(addr, &ip, &port, NULL)) return 0;
+ } else {
+ ip = argv[0];
+ port = atoi(argv[1]);
+ }
+ if (!ip || !port) return 0;
+ else {
+ *ip_ptr = ip;
+ *port_ptr = port;
+ }
+ return 1;
+}
+
+static void freeClusterManagerNodeFlags(list *flags) {
+ listIter li;
+ listNode *ln;
+ listRewind(flags, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds flag = ln->value;
+ sdsfree(flag);
+ }
+ listRelease(flags);
+}
+
+static void freeClusterManagerNode(clusterManagerNode *node) {
+ if (node->context != NULL) redisFree(node->context);
+ if (node->friends != NULL) {
+ listIter li;
+ listNode *ln;
+ listRewind(node->friends,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *fn = ln->value;
+ freeClusterManagerNode(fn);
+ }
+ listRelease(node->friends);
+ node->friends = NULL;
+ }
+ if (node->name != NULL) sdsfree(node->name);
+ if (node->replicate != NULL) sdsfree(node->replicate);
+ if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip)
+ sdsfree(node->ip);
+ int i;
+ if (node->migrating != NULL) {
+ for (i = 0; i < node->migrating_count; i++) sdsfree(node->migrating[i]);
+ zfree(node->migrating);
+ }
+ if (node->importing != NULL) {
+ for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]);
+ zfree(node->importing);
+ }
+ if (node->flags_str != NULL) {
+ freeClusterManagerNodeFlags(node->flags_str);
+ node->flags_str = NULL;
+ }
+ zfree(node);
+}
+
+static void freeClusterManager(void) {
+ listIter li;
+ listNode *ln;
+ if (cluster_manager.nodes != NULL) {
+ listRewind(cluster_manager.nodes,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ freeClusterManagerNode(n);
+ }
+ listRelease(cluster_manager.nodes);
+ cluster_manager.nodes = NULL;
+ }
+ if (cluster_manager.errors != NULL) {
+ listRewind(cluster_manager.errors,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds err = ln->value;
+ sdsfree(err);
+ }
+ listRelease(cluster_manager.errors);
+ cluster_manager.errors = NULL;
+ }
+ if (clusterManagerUncoveredSlots != NULL)
+ dictRelease(clusterManagerUncoveredSlots);
+}
+
+static clusterManagerNode *clusterManagerNewNode(char *ip, int port, int bus_port) {
+ clusterManagerNode *node = zmalloc(sizeof(*node));
+ node->context = NULL;
+ node->name = NULL;
+ node->ip = ip;
+ node->port = port;
+ /* We don't need to know the bus_port, at this point this value may be wrong.
+ * If it is used, it will be corrected in clusterManagerLoadInfoFromNode. */
+ node->bus_port = bus_port ? bus_port : port + CLUSTER_MANAGER_PORT_INCR;
+ node->current_epoch = 0;
+ node->ping_sent = 0;
+ node->ping_recv = 0;
+ node->flags = 0;
+ node->flags_str = NULL;
+ node->replicate = NULL;
+ node->dirty = 0;
+ node->friends = NULL;
+ node->migrating = NULL;
+ node->importing = NULL;
+ node->migrating_count = 0;
+ node->importing_count = 0;
+ node->replicas_count = 0;
+ node->weight = 1.0f;
+ node->balance = 0;
+ clusterManagerNodeResetSlots(node);
+ return node;
+}
+
+static sds clusterManagerGetNodeRDBFilename(clusterManagerNode *node) {
+ assert(config.cluster_manager_command.backup_dir);
+ sds filename = sdsnew(config.cluster_manager_command.backup_dir);
+ if (filename[sdslen(filename) - 1] != '/')
+ filename = sdscat(filename, "/");
+ filename = sdscatprintf(filename, "redis-node-%s-%d-%s.rdb", node->ip,
+ node->port, node->name);
+ return filename;
+}
+
+/* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the
+ * latest case, if the 'err' arg is not NULL, it gets allocated with a copy
+ * of reply error (it's up to the caller function to free it), elsewhere
+ * the error is directly printed. */
+static int clusterManagerCheckRedisReply(clusterManagerNode *n,
+ redisReply *r, char **err)
+{
+ int is_err = 0;
+ if (!r || (is_err = (r->type == REDIS_REPLY_ERROR))) {
+ if (is_err) {
+ if (err != NULL) {
+ *err = zmalloc((r->len + 1) * sizeof(char));
+ redis_strlcpy(*err, r->str,(r->len + 1));
+ } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, r->str);
+ }
+ return 0;
+ }
+ return 1;
+}
+
+/* Call MULTI command on a cluster node. */
+static int clusterManagerStartTransaction(clusterManagerNode *node) {
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "MULTI");
+ int success = clusterManagerCheckRedisReply(node, reply, NULL);
+ if (reply) freeReplyObject(reply);
+ return success;
+}
+
+/* Call EXEC command on a cluster node. */
+static int clusterManagerExecTransaction(clusterManagerNode *node,
+ clusterManagerOnReplyError onerror)
+{
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "EXEC");
+ int success = clusterManagerCheckRedisReply(node, reply, NULL);
+ if (success) {
+ if (reply->type != REDIS_REPLY_ARRAY) {
+ success = 0;
+ goto cleanup;
+ }
+ size_t i;
+ for (i = 0; i < reply->elements; i++) {
+ redisReply *r = reply->element[i];
+ char *err = NULL;
+ success = clusterManagerCheckRedisReply(node, r, &err);
+ if (!success && onerror) success = onerror(r, node, i);
+ if (err) {
+ if (!success)
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err);
+ zfree(err);
+ }
+ if (!success) break;
+ }
+ }
+cleanup:
+ if (reply) freeReplyObject(reply);
+ return success;
+}
+
+static int clusterManagerNodeConnect(clusterManagerNode *node) {
+ if (node->context) redisFree(node->context);
+ node->context = redisConnect(node->ip, node->port);
+ if (!node->context->err && config.tls) {
+ const char *err = NULL;
+ if (cliSecureConnection(node->context, config.sslconfig, &err) == REDIS_ERR && err) {
+ fprintf(stderr,"TLS Error: %s\n", err);
+ redisFree(node->context);
+ node->context = NULL;
+ return 0;
+ }
+ }
+ if (node->context->err) {
+ fprintf(stderr,"Could not connect to Redis at ");
+ fprintf(stderr,"%s:%d: %s\n", node->ip, node->port,
+ node->context->errstr);
+ redisFree(node->context);
+ node->context = NULL;
+ return 0;
+ }
+ /* Set aggressive KEEP_ALIVE socket option in the Redis context socket
+ * in order to prevent timeouts caused by the execution of long
+ * commands. At the same time this improves the detection of real
+ * errors. */
+ anetKeepAlive(NULL, node->context->fd, REDIS_CLI_KEEPALIVE_INTERVAL);
+ if (config.conn_info.auth) {
+ redisReply *reply;
+ if (config.conn_info.user == NULL)
+ reply = redisCommand(node->context,"AUTH %s", config.conn_info.auth);
+ else
+ reply = redisCommand(node->context,"AUTH %s %s",
+ config.conn_info.user,config.conn_info.auth);
+ int ok = clusterManagerCheckRedisReply(node, reply, NULL);
+ if (reply != NULL) freeReplyObject(reply);
+ if (!ok) return 0;
+ }
+ return 1;
+}
+
+static void clusterManagerRemoveNodeFromList(list *nodelist,
+ clusterManagerNode *node) {
+ listIter li;
+ listNode *ln;
+ listRewind(nodelist, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ if (node == ln->value) {
+ listDelNode(nodelist, ln);
+ break;
+ }
+ }
+}
+
+/* Return the node with the specified name (ID) or NULL. */
+static clusterManagerNode *clusterManagerNodeByName(const char *name) {
+ if (cluster_manager.nodes == NULL) return NULL;
+ clusterManagerNode *found = NULL;
+ sds lcname = sdsempty();
+ lcname = sdscpy(lcname, name);
+ sdstolower(lcname);
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->name && !sdscmp(n->name, lcname)) {
+ found = n;
+ break;
+ }
+ }
+ sdsfree(lcname);
+ return found;
+}
+
+/* Like clusterManagerNodeByName but the specified name can be just the first
+ * part of the node ID as long as the prefix in unique across the
+ * cluster.
+ */
+static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char*name)
+{
+ if (cluster_manager.nodes == NULL) return NULL;
+ clusterManagerNode *found = NULL;
+ sds lcname = sdsempty();
+ lcname = sdscpy(lcname, name);
+ sdstolower(lcname);
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->name &&
+ strstr(n->name, lcname) == n->name) {
+ found = n;
+ break;
+ }
+ }
+ sdsfree(lcname);
+ return found;
+}
+
+static void clusterManagerNodeResetSlots(clusterManagerNode *node) {
+ memset(node->slots, 0, sizeof(node->slots));
+ node->slots_count = 0;
+}
+
+/* Call "INFO" redis command on the specified node and return the reply. */
+static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node,
+ char **err)
+{
+ redisReply *info = CLUSTER_MANAGER_COMMAND(node, "INFO");
+ if (err != NULL) *err = NULL;
+ if (info == NULL) return NULL;
+ if (info->type == REDIS_REPLY_ERROR) {
+ if (err != NULL) {
+ *err = zmalloc((info->len + 1) * sizeof(char));
+ redis_strlcpy(*err, info->str,(info->len + 1));
+ }
+ freeReplyObject(info);
+ return NULL;
+ }
+ return info;
+}
+
+static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) {
+ redisReply *info = clusterManagerGetNodeRedisInfo(node, err);
+ if (info == NULL) return 0;
+ int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled");
+ freeReplyObject(info);
+ return is_cluster;
+}
+
+/* Checks whether the node is empty. Node is considered not-empty if it has
+ * some key or if it already knows other nodes */
+static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) {
+ redisReply *info = clusterManagerGetNodeRedisInfo(node, err);
+ int is_empty = 1;
+ if (info == NULL) return 0;
+ if (strstr(info->str, "db0:") != NULL) {
+ is_empty = 0;
+ goto result;
+ }
+ freeReplyObject(info);
+ info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO");
+ if (err != NULL) *err = NULL;
+ if (!clusterManagerCheckRedisReply(node, info, err)) {
+ is_empty = 0;
+ goto result;
+ }
+ long known_nodes = getLongInfoField(info->str, "cluster_known_nodes");
+ is_empty = (known_nodes == 1);
+result:
+ freeReplyObject(info);
+ return is_empty;
+}
+
+/* Return the anti-affinity score, which is a measure of the amount of
+ * violations of anti-affinity in the current cluster layout, that is, how
+ * badly the masters and slaves are distributed in the different IP
+ * addresses so that slaves of the same master are not in the master
+ * host and are also in different hosts.
+ *
+ * The score is calculated as follows:
+ *
+ * SAME_AS_MASTER = 10000 * each slave in the same IP of its master.
+ * SAME_AS_SLAVE = 1 * each slave having the same IP as another slave
+ of the same master.
+ * FINAL_SCORE = SAME_AS_MASTER + SAME_AS_SLAVE
+ *
+ * So a greater score means a worse anti-affinity level, while zero
+ * means perfect anti-affinity.
+ *
+ * The anti affinity optimization will try to get a score as low as
+ * possible. Since we do not want to sacrifice the fact that slaves should
+ * not be in the same host as the master, we assign 10000 times the score
+ * to this violation, so that we'll optimize for the second factor only
+ * if it does not impact the first one.
+ *
+ * The ipnodes argument is an array of clusterManagerNodeArray, one for
+ * each IP, while ip_count is the total number of IPs in the configuration.
+ *
+ * The function returns the above score, and the list of
+ * offending slaves can be stored into the 'offending' argument,
+ * so that the optimizer can try changing the configuration of the
+ * slaves violating the anti-affinity goals. */
+static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes,
+ int ip_count, clusterManagerNode ***offending, int *offending_len)
+{
+ int score = 0, i, j;
+ int node_len = cluster_manager.nodes->len;
+ clusterManagerNode **offending_p = NULL;
+ if (offending != NULL) {
+ *offending = zcalloc(node_len * sizeof(clusterManagerNode*));
+ offending_p = *offending;
+ }
+ /* For each set of nodes in the same host, split by
+ * related nodes (masters and slaves which are involved in
+ * replication of each other) */
+ for (i = 0; i < ip_count; i++) {
+ clusterManagerNodeArray *node_array = &(ipnodes[i]);
+ dict *related = dictCreate(&clusterManagerDictType);
+ char *ip = NULL;
+ for (j = 0; j < node_array->len; j++) {
+ clusterManagerNode *node = node_array->nodes[j];
+ if (node == NULL) continue;
+ if (!ip) ip = node->ip;
+ sds types;
+ /* We always use the Master ID as key. */
+ sds key = (!node->replicate ? node->name : node->replicate);
+ assert(key != NULL);
+ dictEntry *entry = dictFind(related, key);
+ if (entry) types = sdsdup((sds) dictGetVal(entry));
+ else types = sdsempty();
+ /* Master type 'm' is always set as the first character of the
+ * types string. */
+ if (node->replicate) types = sdscat(types, "s");
+ else {
+ sds s = sdscatsds(sdsnew("m"), types);
+ sdsfree(types);
+ types = s;
+ }
+ dictReplace(related, key, types);
+ }
+ /* Now it's trivial to check, for each related group having the
+ * same host, what is their local score. */
+ dictIterator *iter = dictGetIterator(related);
+ dictEntry *entry;
+ while ((entry = dictNext(iter)) != NULL) {
+ sds types = (sds) dictGetVal(entry);
+ sds name = (sds) dictGetKey(entry);
+ int typeslen = sdslen(types);
+ if (typeslen < 2) continue;
+ if (types[0] == 'm') score += (10000 * (typeslen - 1));
+ else score += (1 * typeslen);
+ if (offending == NULL) continue;
+ /* Populate the list of offending nodes. */
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->replicate == NULL) continue;
+ if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) {
+ *(offending_p++) = n;
+ if (offending_len != NULL) (*offending_len)++;
+ break;
+ }
+ }
+ }
+ //if (offending_len != NULL) *offending_len = offending_p - *offending;
+ dictReleaseIterator(iter);
+ dictRelease(related);
+ }
+ return score;
+}
+
+static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes,
+ int ip_count)
+{
+ clusterManagerNode **offenders = NULL;
+ int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count,
+ NULL, NULL);
+ if (score == 0) goto cleanup;
+ clusterManagerLogInfo(">>> Trying to optimize slaves allocation "
+ "for anti-affinity\n");
+ int node_len = cluster_manager.nodes->len;
+ int maxiter = 500 * node_len; // Effort is proportional to cluster size...
+ srand(time(NULL));
+ while (maxiter > 0) {
+ int offending_len = 0;
+ if (offenders != NULL) {
+ zfree(offenders);
+ offenders = NULL;
+ }
+ score = clusterManagerGetAntiAffinityScore(ipnodes,
+ ip_count,
+ &offenders,
+ &offending_len);
+ if (score == 0 || offending_len == 0) break; // Optimal anti affinity reached
+ /* We'll try to randomly swap a slave's assigned master causing
+ * an affinity problem with another random slave, to see if we
+ * can improve the affinity. */
+ int rand_idx = rand() % offending_len;
+ clusterManagerNode *first = offenders[rand_idx],
+ *second = NULL;
+ clusterManagerNode **other_replicas = zcalloc((node_len - 1) *
+ sizeof(*other_replicas));
+ int other_replicas_count = 0;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n != first && n->replicate != NULL)
+ other_replicas[other_replicas_count++] = n;
+ }
+ if (other_replicas_count == 0) {
+ zfree(other_replicas);
+ break;
+ }
+ rand_idx = rand() % other_replicas_count;
+ second = other_replicas[rand_idx];
+ char *first_master = first->replicate,
+ *second_master = second->replicate;
+ first->replicate = second_master, first->dirty = 1;
+ second->replicate = first_master, second->dirty = 1;
+ int new_score = clusterManagerGetAntiAffinityScore(ipnodes,
+ ip_count,
+ NULL, NULL);
+ /* If the change actually makes thing worse, revert. Otherwise
+ * leave as it is because the best solution may need a few
+ * combined swaps. */
+ if (new_score > score) {
+ first->replicate = first_master;
+ second->replicate = second_master;
+ }
+ zfree(other_replicas);
+ maxiter--;
+ }
+ score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL);
+ char *msg;
+ int perfect = (score == 0);
+ int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS :
+ CLUSTER_MANAGER_LOG_LVL_WARN);
+ if (perfect) msg = "[OK] Perfect anti-affinity obtained!";
+ else if (score >= 10000)
+ msg = ("[WARNING] Some slaves are in the same host as their master");
+ else
+ msg=("[WARNING] Some slaves of the same master are in the same host");
+ clusterManagerLog(log_level, "%s\n", msg);
+cleanup:
+ zfree(offenders);
+}
+
+/* Return a representable string of the node's flags */
+static sds clusterManagerNodeFlagString(clusterManagerNode *node) {
+ sds flags = sdsempty();
+ if (!node->flags_str) return flags;
+ int empty = 1;
+ listIter li;
+ listNode *ln;
+ listRewind(node->flags_str, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds flag = ln->value;
+ if (strcmp(flag, "myself") == 0) continue;
+ if (!empty) flags = sdscat(flags, ",");
+ flags = sdscatfmt(flags, "%S", flag);
+ empty = 0;
+ }
+ return flags;
+}
+
+/* Return a representable string of the node's slots */
+static sds clusterManagerNodeSlotsString(clusterManagerNode *node) {
+ sds slots = sdsempty();
+ int first_range_idx = -1, last_slot_idx = -1, i;
+ for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) {
+ int has_slot = node->slots[i];
+ if (has_slot) {
+ if (first_range_idx == -1) {
+ if (sdslen(slots)) slots = sdscat(slots, ",");
+ first_range_idx = i;
+ slots = sdscatfmt(slots, "[%u", i);
+ }
+ last_slot_idx = i;
+ } else {
+ if (last_slot_idx >= 0) {
+ if (first_range_idx == last_slot_idx)
+ slots = sdscat(slots, "]");
+ else slots = sdscatfmt(slots, "-%u]", last_slot_idx);
+ }
+ last_slot_idx = -1;
+ first_range_idx = -1;
+ }
+ }
+ if (last_slot_idx >= 0) {
+ if (first_range_idx == last_slot_idx) slots = sdscat(slots, "]");
+ else slots = sdscatfmt(slots, "-%u]", last_slot_idx);
+ }
+ return slots;
+}
+
+static sds clusterManagerNodeGetJSON(clusterManagerNode *node,
+ unsigned long error_count)
+{
+ sds json = sdsempty();
+ sds replicate = sdsempty();
+ if (node->replicate)
+ replicate = sdscatprintf(replicate, "\"%s\"", node->replicate);
+ else
+ replicate = sdscat(replicate, "null");
+ sds slots = clusterManagerNodeSlotsString(node);
+ sds flags = clusterManagerNodeFlagString(node);
+ char *p = slots;
+ while ((p = strchr(p, '-')) != NULL)
+ *(p++) = ',';
+ json = sdscatprintf(json,
+ " {\n"
+ " \"name\": \"%s\",\n"
+ " \"host\": \"%s\",\n"
+ " \"port\": %d,\n"
+ " \"replicate\": %s,\n"
+ " \"slots\": [%s],\n"
+ " \"slots_count\": %d,\n"
+ " \"flags\": \"%s\",\n"
+ " \"current_epoch\": %llu",
+ node->name,
+ node->ip,
+ node->port,
+ replicate,
+ slots,
+ node->slots_count,
+ flags,
+ (unsigned long long)node->current_epoch
+ );
+ if (error_count > 0) {
+ json = sdscatprintf(json, ",\n \"cluster_errors\": %lu",
+ error_count);
+ }
+ if (node->migrating_count > 0 && node->migrating != NULL) {
+ int i = 0;
+ sds migrating = sdsempty();
+ for (; i < node->migrating_count; i += 2) {
+ sds slot = node->migrating[i];
+ sds dest = node->migrating[i + 1];
+ if (slot && dest) {
+ if (sdslen(migrating) > 0) migrating = sdscat(migrating, ",");
+ migrating = sdscatfmt(migrating, "\"%S\": \"%S\"", slot, dest);
+ }
+ }
+ if (sdslen(migrating) > 0)
+ json = sdscatfmt(json, ",\n \"migrating\": {%S}", migrating);
+ sdsfree(migrating);
+ }
+ if (node->importing_count > 0 && node->importing != NULL) {
+ int i = 0;
+ sds importing = sdsempty();
+ for (; i < node->importing_count; i += 2) {
+ sds slot = node->importing[i];
+ sds from = node->importing[i + 1];
+ if (slot && from) {
+ if (sdslen(importing) > 0) importing = sdscat(importing, ",");
+ importing = sdscatfmt(importing, "\"%S\": \"%S\"", slot, from);
+ }
+ }
+ if (sdslen(importing) > 0)
+ json = sdscatfmt(json, ",\n \"importing\": {%S}", importing);
+ sdsfree(importing);
+ }
+ json = sdscat(json, "\n }");
+ sdsfree(replicate);
+ sdsfree(slots);
+ sdsfree(flags);
+ return json;
+}
+
+
+/* -----------------------------------------------------------------------------
+ * Key space handling
+ * -------------------------------------------------------------------------- */
+
+/* We have 16384 hash slots. The hash slot of a given key is obtained
+ * as the least significant 14 bits of the crc16 of the key.
+ *
+ * However if the key contains the {...} pattern, only the part between
+ * { and } is hashed. This may be useful in the future to force certain
+ * keys to be in the same node (assuming no resharding is in progress). */
+static unsigned int clusterManagerKeyHashSlot(char *key, int keylen) {
+ int s, e; /* start-end indexes of { and } */
+
+ for (s = 0; s < keylen; s++)
+ if (key[s] == '{') break;
+
+ /* No '{' ? Hash the whole key. This is the base case. */
+ if (s == keylen) return crc16(key,keylen) & 0x3FFF;
+
+ /* '{' found? Check if we have the corresponding '}'. */
+ for (e = s+1; e < keylen; e++)
+ if (key[e] == '}') break;
+
+ /* No '}' or nothing between {} ? Hash the whole key. */
+ if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF;
+
+ /* If we are here there is both a { and a } on its right. Hash
+ * what is in the middle between { and }. */
+ return crc16(key+s+1,e-s-1) & 0x3FFF;
+}
+
+/* Return a string representation of the cluster node. */
+static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) {
+ sds info = sdsempty();
+ sds spaces = sdsempty();
+ int i;
+ for (i = 0; i < indent; i++) spaces = sdscat(spaces, " ");
+ if (indent) info = sdscat(info, spaces);
+ int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE);
+ char *role = (is_master ? "M" : "S");
+ sds slots = NULL;
+ if (node->dirty && node->replicate != NULL)
+ info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port);
+ else {
+ slots = clusterManagerNodeSlotsString(node);
+ sds flags = clusterManagerNodeFlagString(node);
+ info = sdscatfmt(info, "%s: %S %s:%u\n"
+ "%s slots:%S (%u slots) "
+ "%S",
+ role, node->name, node->ip, node->port, spaces,
+ slots, node->slots_count, flags);
+ sdsfree(slots);
+ sdsfree(flags);
+ }
+ if (node->replicate != NULL)
+ info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate);
+ else if (node->replicas_count)
+ info = sdscatfmt(info, "\n%s %U additional replica(s)",
+ spaces, node->replicas_count);
+ sdsfree(spaces);
+ return info;
+}
+
+static void clusterManagerShowNodes(void) {
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ sds info = clusterManagerNodeInfo(node, 0);
+ printf("%s\n", (char *) info);
+ sdsfree(info);
+ }
+}
+
+static void clusterManagerShowClusterInfo(void) {
+ int masters = 0;
+ int keys = 0;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) {
+ if (!node->name) continue;
+ int replicas = 0;
+ int dbsize = -1;
+ char name[9];
+ memcpy(name, node->name, 8);
+ name[8] = '\0';
+ listIter ri;
+ listNode *rn;
+ listRewind(cluster_manager.nodes, &ri);
+ while ((rn = listNext(&ri)) != NULL) {
+ clusterManagerNode *n = rn->value;
+ if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_SLAVE))
+ continue;
+ if (n->replicate && !strcmp(n->replicate, node->name))
+ replicas++;
+ }
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "DBSIZE");
+ if (reply != NULL && reply->type == REDIS_REPLY_INTEGER)
+ dbsize = reply->integer;
+ if (dbsize < 0) {
+ char *err = "";
+ if (reply != NULL && reply->type == REDIS_REPLY_ERROR)
+ err = reply->str;
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err);
+ if (reply != NULL) freeReplyObject(reply);
+ return;
+ };
+ if (reply != NULL) freeReplyObject(reply);
+ printf("%s:%d (%s...) -> %d keys | %d slots | %d slaves.\n",
+ node->ip, node->port, name, dbsize,
+ node->slots_count, replicas);
+ masters++;
+ keys += dbsize;
+ }
+ }
+ clusterManagerLogOk("[OK] %d keys in %d masters.\n", keys, masters);
+ float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS;
+ printf("%.2f keys per slot on average.\n", keys_per_slot);
+}
+
+/* Flush dirty slots configuration of the node by calling CLUSTER ADDSLOTS */
+static int clusterManagerAddSlots(clusterManagerNode *node, char**err)
+{
+ redisReply *reply = NULL;
+ void *_reply = NULL;
+ int success = 1;
+ /* First two args are used for the command itself. */
+ int argc = node->slots_count + 2;
+ sds *argv = zmalloc(argc * sizeof(*argv));
+ size_t *argvlen = zmalloc(argc * sizeof(*argvlen));
+ argv[0] = "CLUSTER";
+ argv[1] = "ADDSLOTS";
+ argvlen[0] = 7;
+ argvlen[1] = 8;
+ *err = NULL;
+ int i, argv_idx = 2;
+ for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) {
+ if (argv_idx >= argc) break;
+ if (node->slots[i]) {
+ argv[argv_idx] = sdsfromlonglong((long long) i);
+ argvlen[argv_idx] = sdslen(argv[argv_idx]);
+ argv_idx++;
+ }
+ }
+ if (argv_idx == 2) {
+ success = 0;
+ goto cleanup;
+ }
+ redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen);
+ if (redisGetReply(node->context, &_reply) != REDIS_OK) {
+ success = 0;
+ goto cleanup;
+ }
+ reply = (redisReply*) _reply;
+ success = clusterManagerCheckRedisReply(node, reply, err);
+cleanup:
+ zfree(argvlen);
+ if (argv != NULL) {
+ for (i = 2; i < argc; i++) sdsfree(argv[i]);
+ zfree(argv);
+ }
+ if (reply != NULL) freeReplyObject(reply);
+ return success;
+}
+
+/* Get the node the slot is assigned to from the point of view of node *n.
+ * If the slot is unassigned or if the reply is an error, return NULL.
+ * Use the **err argument in order to check whether the slot is unassigned
+ * or the reply resulted in an error. */
+static clusterManagerNode *clusterManagerGetSlotOwner(clusterManagerNode *n,
+ int slot, char **err)
+{
+ assert(slot >= 0 && slot < CLUSTER_MANAGER_SLOTS);
+ clusterManagerNode *owner = NULL;
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SLOTS");
+ if (clusterManagerCheckRedisReply(n, reply, err)) {
+ assert(reply->type == REDIS_REPLY_ARRAY);
+ size_t i;
+ for (i = 0; i < reply->elements; i++) {
+ redisReply *r = reply->element[i];
+ assert(r->type == REDIS_REPLY_ARRAY && r->elements >= 3);
+ int from, to;
+ from = r->element[0]->integer;
+ to = r->element[1]->integer;
+ if (slot < from || slot > to) continue;
+ redisReply *nr = r->element[2];
+ assert(nr->type == REDIS_REPLY_ARRAY && nr->elements >= 2);
+ char *name = NULL;
+ if (nr->elements >= 3)
+ name = nr->element[2]->str;
+ if (name != NULL)
+ owner = clusterManagerNodeByName(name);
+ else {
+ char *ip = nr->element[0]->str;
+ assert(ip != NULL);
+ int port = (int) nr->element[1]->integer;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *nd = ln->value;
+ if (strcmp(nd->ip, ip) == 0 && port == nd->port) {
+ owner = nd;
+ break;
+ }
+ }
+ }
+ if (owner) break;
+ }
+ }
+ if (reply) freeReplyObject(reply);
+ return owner;
+}
+
+/* Set slot status to "importing" or "migrating" */
+static int clusterManagerSetSlot(clusterManagerNode *node1,
+ clusterManagerNode *node2,
+ int slot, const char *status, char **err) {
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER "
+ "SETSLOT %d %s %s",
+ slot, status,
+ (char *) node2->name);
+ if (err != NULL) *err = NULL;
+ if (!reply) {
+ if (err) *err = zstrdup("CLUSTER SETSLOT failed to run");
+ return 0;
+ }
+ int success = 1;
+ if (reply->type == REDIS_REPLY_ERROR) {
+ success = 0;
+ if (err != NULL) {
+ *err = zmalloc((reply->len + 1) * sizeof(char));
+ redis_strlcpy(*err, reply->str,(reply->len + 1));
+ } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(node1, reply->str);
+ goto cleanup;
+ }
+cleanup:
+ freeReplyObject(reply);
+ return success;
+}
+
+static int clusterManagerClearSlotStatus(clusterManagerNode *node, int slot) {
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node,
+ "CLUSTER SETSLOT %d %s", slot, "STABLE");
+ int success = clusterManagerCheckRedisReply(node, reply, NULL);
+ if (reply) freeReplyObject(reply);
+ return success;
+}
+
+static int clusterManagerDelSlot(clusterManagerNode *node, int slot,
+ int ignore_unassigned_err)
+{
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node,
+ "CLUSTER DELSLOTS %d", slot);
+ char *err = NULL;
+ int success = clusterManagerCheckRedisReply(node, reply, &err);
+ if (!success && reply && reply->type == REDIS_REPLY_ERROR &&
+ ignore_unassigned_err)
+ {
+ char *get_owner_err = NULL;
+ clusterManagerNode *assigned_to =
+ clusterManagerGetSlotOwner(node, slot, &get_owner_err);
+ if (!assigned_to) {
+ if (get_owner_err == NULL) success = 1;
+ else {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, get_owner_err);
+ zfree(get_owner_err);
+ }
+ }
+ }
+ if (!success && err != NULL) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err);
+ zfree(err);
+ }
+ if (reply) freeReplyObject(reply);
+ return success;
+}
+
+static int clusterManagerAddSlot(clusterManagerNode *node, int slot) {
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node,
+ "CLUSTER ADDSLOTS %d", slot);
+ int success = clusterManagerCheckRedisReply(node, reply, NULL);
+ if (reply) freeReplyObject(reply);
+ return success;
+}
+
+static signed int clusterManagerCountKeysInSlot(clusterManagerNode *node,
+ int slot)
+{
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node,
+ "CLUSTER COUNTKEYSINSLOT %d", slot);
+ int count = -1;
+ int success = clusterManagerCheckRedisReply(node, reply, NULL);
+ if (success && reply->type == REDIS_REPLY_INTEGER) count = reply->integer;
+ if (reply) freeReplyObject(reply);
+ return count;
+}
+
+static int clusterManagerBumpEpoch(clusterManagerNode *node) {
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER BUMPEPOCH");
+ int success = clusterManagerCheckRedisReply(node, reply, NULL);
+ if (reply) freeReplyObject(reply);
+ return success;
+}
+
+/* Callback used by clusterManagerSetSlotOwner transaction. It should ignore
+ * errors except for ADDSLOTS errors.
+ * Return 1 if the error should be ignored. */
+static int clusterManagerOnSetOwnerErr(redisReply *reply,
+ clusterManagerNode *n, int bulk_idx)
+{
+ UNUSED(reply);
+ UNUSED(n);
+ /* Only raise error when ADDSLOTS fail (bulk_idx == 1). */
+ return (bulk_idx != 1);
+}
+
+static int clusterManagerSetSlotOwner(clusterManagerNode *owner,
+ int slot,
+ int do_clear)
+{
+ int success = clusterManagerStartTransaction(owner);
+ if (!success) return 0;
+ /* Ensure the slot is not already assigned. */
+ clusterManagerDelSlot(owner, slot, 1);
+ /* Add the slot and bump epoch. */
+ clusterManagerAddSlot(owner, slot);
+ if (do_clear) clusterManagerClearSlotStatus(owner, slot);
+ clusterManagerBumpEpoch(owner);
+ success = clusterManagerExecTransaction(owner, clusterManagerOnSetOwnerErr);
+ return success;
+}
+
+/* Get the hash for the values of the specified keys in *keys_reply for the
+ * specified nodes *n1 and *n2, by calling DEBUG DIGEST-VALUE redis command
+ * on both nodes. Every key with same name on both nodes but having different
+ * values will be added to the *diffs list. Return 0 in case of reply
+ * error. */
+static int clusterManagerCompareKeysValues(clusterManagerNode *n1,
+ clusterManagerNode *n2,
+ redisReply *keys_reply,
+ list *diffs)
+{
+ size_t i, argc = keys_reply->elements + 2;
+ static const char *hash_zero = "0000000000000000000000000000000000000000";
+ char **argv = zcalloc(argc * sizeof(char *));
+ size_t *argv_len = zcalloc(argc * sizeof(size_t));
+ argv[0] = "DEBUG";
+ argv_len[0] = 5;
+ argv[1] = "DIGEST-VALUE";
+ argv_len[1] = 12;
+ for (i = 0; i < keys_reply->elements; i++) {
+ redisReply *entry = keys_reply->element[i];
+ int idx = i + 2;
+ argv[idx] = entry->str;
+ argv_len[idx] = entry->len;
+ }
+ int success = 0;
+ void *_reply1 = NULL, *_reply2 = NULL;
+ redisReply *r1 = NULL, *r2 = NULL;
+ redisAppendCommandArgv(n1->context,argc, (const char**)argv,argv_len);
+ success = (redisGetReply(n1->context, &_reply1) == REDIS_OK);
+ if (!success) goto cleanup;
+ r1 = (redisReply *) _reply1;
+ redisAppendCommandArgv(n2->context,argc, (const char**)argv,argv_len);
+ success = (redisGetReply(n2->context, &_reply2) == REDIS_OK);
+ if (!success) goto cleanup;
+ r2 = (redisReply *) _reply2;
+ success = (r1->type != REDIS_REPLY_ERROR && r2->type != REDIS_REPLY_ERROR);
+ if (r1->type == REDIS_REPLY_ERROR) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(n1, r1->str);
+ success = 0;
+ }
+ if (r2->type == REDIS_REPLY_ERROR) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(n2, r2->str);
+ success = 0;
+ }
+ if (!success) goto cleanup;
+ assert(keys_reply->elements == r1->elements &&
+ keys_reply->elements == r2->elements);
+ for (i = 0; i < keys_reply->elements; i++) {
+ char *key = keys_reply->element[i]->str;
+ char *hash1 = r1->element[i]->str;
+ char *hash2 = r2->element[i]->str;
+ /* Ignore keys that don't exist in both nodes. */
+ if (strcmp(hash1, hash_zero) == 0 || strcmp(hash2, hash_zero) == 0)
+ continue;
+ if (strcmp(hash1, hash2) != 0) listAddNodeTail(diffs, key);
+ }
+cleanup:
+ if (r1) freeReplyObject(r1);
+ if (r2) freeReplyObject(r2);
+ zfree(argv);
+ zfree(argv_len);
+ return success;
+}
+
+/* Migrate keys taken from reply->elements. It returns the reply from the
+ * MIGRATE command, or NULL if something goes wrong. If the argument 'dots'
+ * is not NULL, a dot will be printed for every migrated key. */
+static redisReply *clusterManagerMigrateKeysInReply(clusterManagerNode *source,
+ clusterManagerNode *target,
+ redisReply *reply,
+ int replace, int timeout,
+ char *dots)
+{
+ redisReply *migrate_reply = NULL;
+ char **argv = NULL;
+ size_t *argv_len = NULL;
+ int c = (replace ? 8 : 7);
+ if (config.conn_info.auth) c += 2;
+ if (config.conn_info.user) c += 1;
+ size_t argc = c + reply->elements;
+ size_t i, offset = 6; // Keys Offset
+ argv = zcalloc(argc * sizeof(char *));
+ argv_len = zcalloc(argc * sizeof(size_t));
+ char portstr[255];
+ char timeoutstr[255];
+ snprintf(portstr, 10, "%d", target->port);
+ snprintf(timeoutstr, 10, "%d", timeout);
+ argv[0] = "MIGRATE";
+ argv_len[0] = 7;
+ argv[1] = target->ip;
+ argv_len[1] = strlen(target->ip);
+ argv[2] = portstr;
+ argv_len[2] = strlen(portstr);
+ argv[3] = "";
+ argv_len[3] = 0;
+ argv[4] = "0";
+ argv_len[4] = 1;
+ argv[5] = timeoutstr;
+ argv_len[5] = strlen(timeoutstr);
+ if (replace) {
+ argv[offset] = "REPLACE";
+ argv_len[offset] = 7;
+ offset++;
+ }
+ if (config.conn_info.auth) {
+ if (config.conn_info.user) {
+ argv[offset] = "AUTH2";
+ argv_len[offset] = 5;
+ offset++;
+ argv[offset] = config.conn_info.user;
+ argv_len[offset] = strlen(config.conn_info.user);
+ offset++;
+ argv[offset] = config.conn_info.auth;
+ argv_len[offset] = strlen(config.conn_info.auth);
+ offset++;
+ } else {
+ argv[offset] = "AUTH";
+ argv_len[offset] = 4;
+ offset++;
+ argv[offset] = config.conn_info.auth;
+ argv_len[offset] = strlen(config.conn_info.auth);
+ offset++;
+ }
+ }
+ argv[offset] = "KEYS";
+ argv_len[offset] = 4;
+ offset++;
+ for (i = 0; i < reply->elements; i++) {
+ redisReply *entry = reply->element[i];
+ size_t idx = i + offset;
+ assert(entry->type == REDIS_REPLY_STRING);
+ argv[idx] = (char *) sdsnewlen(entry->str, entry->len);
+ argv_len[idx] = entry->len;
+ if (dots) dots[i] = '.';
+ }
+ if (dots) dots[reply->elements] = '\0';
+ void *_reply = NULL;
+ redisAppendCommandArgv(source->context,argc,
+ (const char**)argv,argv_len);
+ int success = (redisGetReply(source->context, &_reply) == REDIS_OK);
+ for (i = 0; i < reply->elements; i++) sdsfree(argv[i + offset]);
+ if (!success) goto cleanup;
+ migrate_reply = (redisReply *) _reply;
+cleanup:
+ zfree(argv);
+ zfree(argv_len);
+ return migrate_reply;
+}
+
+/* Migrate all keys in the given slot from source to target.*/
+static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source,
+ clusterManagerNode *target,
+ int slot, int timeout,
+ int pipeline, int verbose,
+ char **err)
+{
+ int success = 1;
+ int do_fix = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_FIX;
+ int do_replace = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_REPLACE;
+ while (1) {
+ char *dots = NULL;
+ redisReply *reply = NULL, *migrate_reply = NULL;
+ reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER "
+ "GETKEYSINSLOT %d %d", slot,
+ pipeline);
+ success = (reply != NULL);
+ if (!success) return 0;
+ if (reply->type == REDIS_REPLY_ERROR) {
+ success = 0;
+ if (err != NULL) {
+ *err = zmalloc((reply->len + 1) * sizeof(char));
+ redis_strlcpy(*err, reply->str,(reply->len + 1));
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, *err);
+ }
+ goto next;
+ }
+ assert(reply->type == REDIS_REPLY_ARRAY);
+ size_t count = reply->elements;
+ if (count == 0) {
+ freeReplyObject(reply);
+ break;
+ }
+ if (verbose) dots = zmalloc((count+1) * sizeof(char));
+ /* Calling MIGRATE command. */
+ migrate_reply = clusterManagerMigrateKeysInReply(source, target,
+ reply, 0, timeout,
+ dots);
+ if (migrate_reply == NULL) goto next;
+ if (migrate_reply->type == REDIS_REPLY_ERROR) {
+ int is_busy = strstr(migrate_reply->str, "BUSYKEY") != NULL;
+ int not_served = 0;
+ if (!is_busy) {
+ /* Check if the slot is unassigned (not served) in the
+ * source node's configuration. */
+ char *get_owner_err = NULL;
+ clusterManagerNode *served_by =
+ clusterManagerGetSlotOwner(source, slot, &get_owner_err);
+ if (!served_by) {
+ if (get_owner_err == NULL) not_served = 1;
+ else {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(source,
+ get_owner_err);
+ zfree(get_owner_err);
+ }
+ }
+ }
+ /* Try to handle errors. */
+ if (is_busy || not_served) {
+ /* If the key's slot is not served, try to assign slot
+ * to the target node. */
+ if (do_fix && not_served) {
+ clusterManagerLogWarn("*** Slot was not served, setting "
+ "owner to node %s:%d.\n",
+ target->ip, target->port);
+ clusterManagerSetSlot(source, target, slot, "node", NULL);
+ }
+ /* If the key already exists in the target node (BUSYKEY),
+ * check whether its value is the same in both nodes.
+ * In case of equal values, retry migration with the
+ * REPLACE option.
+ * In case of different values:
+ * - If the migration is requested by the fix command, stop
+ * and warn the user.
+ * - In other cases (ie. reshard), proceed only if the user
+ * launched the command with the --cluster-replace option.*/
+ if (is_busy) {
+ clusterManagerLogWarn("\n*** Target key exists\n");
+ if (!do_replace) {
+ clusterManagerLogWarn("*** Checking key values on "
+ "both nodes...\n");
+ list *diffs = listCreate();
+ success = clusterManagerCompareKeysValues(source,
+ target, reply, diffs);
+ if (!success) {
+ clusterManagerLogErr("*** Value check failed!\n");
+ listRelease(diffs);
+ goto next;
+ }
+ if (listLength(diffs) > 0) {
+ success = 0;
+ clusterManagerLogErr(
+ "*** Found %d key(s) in both source node and "
+ "target node having different values.\n"
+ " Source node: %s:%d\n"
+ " Target node: %s:%d\n"
+ " Keys(s):\n",
+ listLength(diffs),
+ source->ip, source->port,
+ target->ip, target->port);
+ listIter dli;
+ listNode *dln;
+ listRewind(diffs, &dli);
+ while((dln = listNext(&dli)) != NULL) {
+ char *k = dln->value;
+ clusterManagerLogErr(" - %s\n", k);
+ }
+ clusterManagerLogErr("Please fix the above key(s) "
+ "manually and try again "
+ "or relaunch the command \n"
+ "with --cluster-replace "
+ "option to force key "
+ "overriding.\n");
+ listRelease(diffs);
+ goto next;
+ }
+ listRelease(diffs);
+ }
+ clusterManagerLogWarn("*** Replacing target keys...\n");
+ }
+ freeReplyObject(migrate_reply);
+ migrate_reply = clusterManagerMigrateKeysInReply(source,
+ target,
+ reply,
+ is_busy,
+ timeout,
+ NULL);
+ success = (migrate_reply != NULL &&
+ migrate_reply->type != REDIS_REPLY_ERROR);
+ } else success = 0;
+ if (!success) {
+ if (migrate_reply != NULL) {
+ if (err) {
+ *err = zmalloc((migrate_reply->len + 1) * sizeof(char));
+ redis_strlcpy(*err, migrate_reply->str, (migrate_reply->len + 1));
+ }
+ printf("\n");
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(source,
+ migrate_reply->str);
+ }
+ goto next;
+ }
+ }
+ if (verbose) {
+ printf("%s", dots);
+ fflush(stdout);
+ }
+next:
+ if (reply != NULL) freeReplyObject(reply);
+ if (migrate_reply != NULL) freeReplyObject(migrate_reply);
+ if (dots) zfree(dots);
+ if (!success) break;
+ }
+ return success;
+}
+
+/* Move slots between source and target nodes using MIGRATE.
+ *
+ * Options:
+ * CLUSTER_MANAGER_OPT_VERBOSE -- Print a dot for every moved key.
+ * CLUSTER_MANAGER_OPT_COLD -- Move keys without opening slots /
+ * reconfiguring the nodes.
+ * CLUSTER_MANAGER_OPT_UPDATE -- Update node->slots for source/target nodes.
+ * CLUSTER_MANAGER_OPT_QUIET -- Don't print info messages.
+*/
+static int clusterManagerMoveSlot(clusterManagerNode *source,
+ clusterManagerNode *target,
+ int slot, int opts, char**err)
+{
+ if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) {
+ printf("Moving slot %d from %s:%d to %s:%d: ", slot, source->ip,
+ source->port, target->ip, target->port);
+ fflush(stdout);
+ }
+ if (err != NULL) *err = NULL;
+ int pipeline = config.cluster_manager_command.pipeline,
+ timeout = config.cluster_manager_command.timeout,
+ print_dots = (opts & CLUSTER_MANAGER_OPT_VERBOSE),
+ option_cold = (opts & CLUSTER_MANAGER_OPT_COLD),
+ success = 1;
+ if (!option_cold) {
+ success = clusterManagerSetSlot(target, source, slot,
+ "importing", err);
+ if (!success) return 0;
+ success = clusterManagerSetSlot(source, target, slot,
+ "migrating", err);
+ if (!success) return 0;
+ }
+ success = clusterManagerMigrateKeysInSlot(source, target, slot, timeout,
+ pipeline, print_dots, err);
+ if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) printf("\n");
+ if (!success) return 0;
+ if (!option_cold) {
+ /* Set the new node as the owner of the slot in all the known nodes.
+ *
+ * We inform the target node first. It will propagate the information to
+ * the rest of the cluster.
+ *
+ * If we inform any other node first, it can happen that the target node
+ * crashes before it is set as the new owner and then the slot is left
+ * without an owner which results in redirect loops. See issue #7116. */
+ success = clusterManagerSetSlot(target, target, slot, "node", err);
+ if (!success) return 0;
+
+ /* Inform the source node. If the source node has just lost its last
+ * slot and the target node has already informed the source node, the
+ * source node has turned itself into a replica. This is not an error in
+ * this scenario so we ignore it. See issue #9223. */
+ success = clusterManagerSetSlot(source, target, slot, "node", err);
+ const char *acceptable = "ERR Please use SETSLOT only with masters.";
+ if (!success && err && !strncmp(*err, acceptable, strlen(acceptable))) {
+ zfree(*err);
+ *err = NULL;
+ } else if (!success && err) {
+ return 0;
+ }
+
+ /* We also inform the other nodes to avoid redirects in case the target
+ * node is slow to propagate the change to the entire cluster. */
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n == target || n == source) continue; /* already done */
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ success = clusterManagerSetSlot(n, target, slot, "node", err);
+ if (!success) return 0;
+ }
+ }
+ /* Update the node logical config */
+ if (opts & CLUSTER_MANAGER_OPT_UPDATE) {
+ source->slots[slot] = 0;
+ target->slots[slot] = 1;
+ }
+ return 1;
+}
+
+/* Flush the dirty node configuration by calling replicate for slaves or
+ * adding the slots defined in the masters. */
+static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) {
+ if (!node->dirty) return 0;
+ redisReply *reply = NULL;
+ int is_err = 0, success = 1;
+ if (err != NULL) *err = NULL;
+ if (node->replicate != NULL) {
+ reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s",
+ node->replicate);
+ if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) {
+ if (is_err && err != NULL) {
+ *err = zmalloc((reply->len + 1) * sizeof(char));
+ redis_strlcpy(*err, reply->str, (reply->len + 1));
+ }
+ success = 0;
+ /* If the cluster did not already joined it is possible that
+ * the slave does not know the master node yet. So on errors
+ * we return ASAP leaving the dirty flag set, to flush the
+ * config later. */
+ goto cleanup;
+ }
+ } else {
+ int added = clusterManagerAddSlots(node, err);
+ if (!added || *err != NULL) success = 0;
+ }
+ node->dirty = 0;
+cleanup:
+ if (reply != NULL) freeReplyObject(reply);
+ return success;
+}
+
+/* Wait until the cluster configuration is consistent. */
+static void clusterManagerWaitForClusterJoin(void) {
+ printf("Waiting for the cluster to join\n");
+ int counter = 0,
+ check_after = CLUSTER_JOIN_CHECK_AFTER +
+ (int)(listLength(cluster_manager.nodes) * 0.15f);
+ while(!clusterManagerIsConfigConsistent()) {
+ printf(".");
+ fflush(stdout);
+ sleep(1);
+ if (++counter > check_after) {
+ dict *status = clusterManagerGetLinkStatus();
+ dictIterator *iter = NULL;
+ if (status != NULL && dictSize(status) > 0) {
+ printf("\n");
+ clusterManagerLogErr("Warning: %d node(s) may "
+ "be unreachable\n", dictSize(status));
+ iter = dictGetIterator(status);
+ dictEntry *entry;
+ while ((entry = dictNext(iter)) != NULL) {
+ sds nodeaddr = (sds) dictGetKey(entry);
+ char *node_ip = NULL;
+ int node_port = 0, node_bus_port = 0;
+ list *from = (list *) dictGetVal(entry);
+ if (parseClusterNodeAddress(nodeaddr, &node_ip,
+ &node_port, &node_bus_port) && node_bus_port) {
+ clusterManagerLogErr(" - The port %d of node %s may "
+ "be unreachable from:\n",
+ node_bus_port, node_ip);
+ } else {
+ clusterManagerLogErr(" - Node %s may be unreachable "
+ "from:\n", nodeaddr);
+ }
+ listIter li;
+ listNode *ln;
+ listRewind(from, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds from_addr = ln->value;
+ clusterManagerLogErr(" %s\n", from_addr);
+ sdsfree(from_addr);
+ }
+ clusterManagerLogErr("Cluster bus ports must be reachable "
+ "by every node.\nRemember that "
+ "cluster bus ports are different "
+ "from standard instance ports.\n");
+ listEmpty(from);
+ }
+ }
+ if (iter != NULL) dictReleaseIterator(iter);
+ if (status != NULL) dictRelease(status);
+ counter = 0;
+ }
+ }
+ printf("\n");
+}
+
+/* Load node's cluster configuration by calling "CLUSTER NODES" command.
+ * Node's configuration (name, replicate, slots, ...) is then updated.
+ * If CLUSTER_MANAGER_OPT_GETFRIENDS flag is set into 'opts' argument,
+ * and node already knows other nodes, the node's friends list is populated
+ * with the other nodes info. */
+static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts,
+ char **err)
+{
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES");
+ int success = 1;
+ *err = NULL;
+ if (!clusterManagerCheckRedisReply(node, reply, err)) {
+ success = 0;
+ goto cleanup;
+ }
+ int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS);
+ char *lines = reply->str, *p, *line;
+ while ((p = strstr(lines, "\n")) != NULL) {
+ *p = '\0';
+ line = lines;
+ lines = p + 1;
+ char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL,
+ *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL,
+ *link_status = NULL;
+ UNUSED(link_status);
+ int i = 0;
+ while ((p = strchr(line, ' ')) != NULL) {
+ *p = '\0';
+ char *token = line;
+ line = p + 1;
+ switch(i++){
+ case 0: name = token; break;
+ case 1: addr = token; break;
+ case 2: flags = token; break;
+ case 3: master_id = token; break;
+ case 4: ping_sent = token; break;
+ case 5: ping_recv = token; break;
+ case 6: config_epoch = token; break;
+ case 7: link_status = token; break;
+ }
+ if (i == 8) break; // Slots
+ }
+ if (!flags) {
+ success = 0;
+ goto cleanup;
+ }
+
+ char *ip = NULL;
+ int port = 0, bus_port = 0;
+ if (addr == NULL || !parseClusterNodeAddress(addr, &ip, &port, &bus_port)) {
+ fprintf(stderr, "Error: invalid CLUSTER NODES reply\n");
+ success = 0;
+ goto cleanup;
+ }
+
+ int myself = (strstr(flags, "myself") != NULL);
+ clusterManagerNode *currentNode = NULL;
+ if (myself) {
+ /* bus-port could be wrong, correct it here, see clusterManagerNewNode. */
+ node->bus_port = bus_port;
+ node->flags |= CLUSTER_MANAGER_FLAG_MYSELF;
+ currentNode = node;
+ clusterManagerNodeResetSlots(node);
+ if (i == 8) {
+ int remaining = strlen(line);
+ while (remaining > 0) {
+ p = strchr(line, ' ');
+ if (p == NULL) p = line + remaining;
+ remaining -= (p - line);
+
+ char *slotsdef = line;
+ *p = '\0';
+ if (remaining) {
+ line = p + 1;
+ remaining--;
+ } else line = p;
+ char *dash = NULL;
+ if (slotsdef[0] == '[') {
+ slotsdef++;
+ if ((p = strstr(slotsdef, "->-"))) { // Migrating
+ *p = '\0';
+ p += 3;
+ char *closing_bracket = strchr(p, ']');
+ if (closing_bracket) *closing_bracket = '\0';
+ sds slot = sdsnew(slotsdef);
+ sds dst = sdsnew(p);
+ node->migrating_count += 2;
+ node->migrating = zrealloc(node->migrating,
+ (node->migrating_count * sizeof(sds)));
+ node->migrating[node->migrating_count - 2] =
+ slot;
+ node->migrating[node->migrating_count - 1] =
+ dst;
+ } else if ((p = strstr(slotsdef, "-<-"))) {//Importing
+ *p = '\0';
+ p += 3;
+ char *closing_bracket = strchr(p, ']');
+ if (closing_bracket) *closing_bracket = '\0';
+ sds slot = sdsnew(slotsdef);
+ sds src = sdsnew(p);
+ node->importing_count += 2;
+ node->importing = zrealloc(node->importing,
+ (node->importing_count * sizeof(sds)));
+ node->importing[node->importing_count - 2] =
+ slot;
+ node->importing[node->importing_count - 1] =
+ src;
+ }
+ } else if ((dash = strchr(slotsdef, '-')) != NULL) {
+ p = dash;
+ int start, stop;
+ *p = '\0';
+ start = atoi(slotsdef);
+ stop = atoi(p + 1);
+ node->slots_count += (stop - (start - 1));
+ while (start <= stop) node->slots[start++] = 1;
+ } else if (p > slotsdef) {
+ node->slots[atoi(slotsdef)] = 1;
+ node->slots_count++;
+ }
+ }
+ }
+ node->dirty = 0;
+ } else if (!getfriends) {
+ if (!(node->flags & CLUSTER_MANAGER_FLAG_MYSELF)) continue;
+ else break;
+ } else {
+ currentNode = clusterManagerNewNode(sdsnew(ip), port, bus_port);
+ currentNode->flags |= CLUSTER_MANAGER_FLAG_FRIEND;
+ if (node->friends == NULL) node->friends = listCreate();
+ listAddNodeTail(node->friends, currentNode);
+ }
+ if (name != NULL) {
+ if (currentNode->name) sdsfree(currentNode->name);
+ currentNode->name = sdsnew(name);
+ }
+ if (currentNode->flags_str != NULL)
+ freeClusterManagerNodeFlags(currentNode->flags_str);
+ currentNode->flags_str = listCreate();
+ int flag_len;
+ while ((flag_len = strlen(flags)) > 0) {
+ sds flag = NULL;
+ char *fp = strchr(flags, ',');
+ if (fp) {
+ *fp = '\0';
+ flag = sdsnew(flags);
+ flags = fp + 1;
+ } else {
+ flag = sdsnew(flags);
+ flags += flag_len;
+ }
+ if (strcmp(flag, "noaddr") == 0)
+ currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR;
+ else if (strcmp(flag, "disconnected") == 0)
+ currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT;
+ else if (strcmp(flag, "fail") == 0)
+ currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL;
+ else if (strcmp(flag, "slave") == 0) {
+ currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE;
+ if (master_id != NULL) {
+ if (currentNode->replicate) sdsfree(currentNode->replicate);
+ currentNode->replicate = sdsnew(master_id);
+ }
+ }
+ listAddNodeTail(currentNode->flags_str, flag);
+ }
+ if (config_epoch != NULL)
+ currentNode->current_epoch = atoll(config_epoch);
+ if (ping_sent != NULL) currentNode->ping_sent = atoll(ping_sent);
+ if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv);
+ if (!getfriends && myself) break;
+ }
+cleanup:
+ if (reply) freeReplyObject(reply);
+ return success;
+}
+
+/* Retrieves info about the cluster using argument 'node' as the starting
+ * point. All nodes will be loaded inside the cluster_manager.nodes list.
+ * Warning: if something goes wrong, it will free the starting node before
+ * returning 0. */
+static int clusterManagerLoadInfoFromNode(clusterManagerNode *node) {
+ if (node->context == NULL && !clusterManagerNodeConnect(node)) {
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ char *e = NULL;
+ if (!clusterManagerNodeIsCluster(node, &e)) {
+ clusterManagerPrintNotClusterNodeError(node, e);
+ if (e) zfree(e);
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ e = NULL;
+ if (!clusterManagerNodeLoadInfo(node, CLUSTER_MANAGER_OPT_GETFRIENDS, &e)) {
+ if (e) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, e);
+ zfree(e);
+ }
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ listIter li;
+ listNode *ln;
+ if (cluster_manager.nodes != NULL) {
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL)
+ freeClusterManagerNode((clusterManagerNode *) ln->value);
+ listRelease(cluster_manager.nodes);
+ }
+ cluster_manager.nodes = listCreate();
+ listAddNodeTail(cluster_manager.nodes, node);
+ if (node->friends != NULL) {
+ listRewind(node->friends, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *friend = ln->value;
+ if (!friend->ip || !friend->port) goto invalid_friend;
+ if (!friend->context && !clusterManagerNodeConnect(friend))
+ goto invalid_friend;
+ e = NULL;
+ if (clusterManagerNodeLoadInfo(friend, 0, &e)) {
+ if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR |
+ CLUSTER_MANAGER_FLAG_DISCONNECT |
+ CLUSTER_MANAGER_FLAG_FAIL))
+ {
+ goto invalid_friend;
+ }
+ listAddNodeTail(cluster_manager.nodes, friend);
+ } else {
+ clusterManagerLogErr("[ERR] Unable to load info for "
+ "node %s:%d\n",
+ friend->ip, friend->port);
+ goto invalid_friend;
+ }
+ continue;
+invalid_friend:
+ if (!(friend->flags & CLUSTER_MANAGER_FLAG_SLAVE))
+ cluster_manager.unreachable_masters++;
+ freeClusterManagerNode(friend);
+ }
+ listRelease(node->friends);
+ node->friends = NULL;
+ }
+ // Count replicas for each node
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->replicate != NULL) {
+ clusterManagerNode *master = clusterManagerNodeByName(n->replicate);
+ if (master == NULL) {
+ clusterManagerLogWarn("*** WARNING: %s:%d claims to be "
+ "slave of unknown node ID %s.\n",
+ n->ip, n->port, n->replicate);
+ } else master->replicas_count++;
+ }
+ }
+ return 1;
+}
+
+/* Compare functions used by various sorting operations. */
+int clusterManagerSlotCompare(const void *slot1, const void *slot2) {
+ const char **i1 = (const char **)slot1;
+ const char **i2 = (const char **)slot2;
+ return strcmp(*i1, *i2);
+}
+
+int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) {
+ clusterManagerNode *node1 = *((clusterManagerNode **) n1);
+ clusterManagerNode *node2 = *((clusterManagerNode **) n2);
+ return node2->slots_count - node1->slots_count;
+}
+
+int clusterManagerCompareNodeBalance(const void *n1, const void *n2) {
+ clusterManagerNode *node1 = *((clusterManagerNode **) n1);
+ clusterManagerNode *node2 = *((clusterManagerNode **) n2);
+ return node1->balance - node2->balance;
+}
+
+static sds clusterManagerGetConfigSignature(clusterManagerNode *node) {
+ sds signature = NULL;
+ int node_count = 0, i = 0, name_len = 0;
+ char **node_configs = NULL;
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES");
+ if (reply == NULL || reply->type == REDIS_REPLY_ERROR)
+ goto cleanup;
+ char *lines = reply->str, *p, *line;
+ while ((p = strstr(lines, "\n")) != NULL) {
+ i = 0;
+ *p = '\0';
+ line = lines;
+ lines = p + 1;
+ char *nodename = NULL;
+ int tot_size = 0;
+ while ((p = strchr(line, ' ')) != NULL) {
+ *p = '\0';
+ char *token = line;
+ line = p + 1;
+ if (i == 0) {
+ nodename = token;
+ tot_size = (p - token);
+ name_len = tot_size++; // Make room for ':' in tot_size
+ }
+ if (++i == 8) break;
+ }
+ if (i != 8) continue;
+ if (nodename == NULL) continue;
+ int remaining = strlen(line);
+ if (remaining == 0) continue;
+ char **slots = NULL;
+ int c = 0;
+ while (remaining > 0) {
+ p = strchr(line, ' ');
+ if (p == NULL) p = line + remaining;
+ int size = (p - line);
+ remaining -= size;
+ tot_size += size;
+ char *slotsdef = line;
+ *p = '\0';
+ if (remaining) {
+ line = p + 1;
+ remaining--;
+ } else line = p;
+ if (slotsdef[0] != '[') {
+ c++;
+ slots = zrealloc(slots, (c * sizeof(char *)));
+ slots[c - 1] = slotsdef;
+ }
+ }
+ if (c > 0) {
+ if (c > 1)
+ qsort(slots, c, sizeof(char *), clusterManagerSlotCompare);
+ node_count++;
+ node_configs =
+ zrealloc(node_configs, (node_count * sizeof(char *)));
+ /* Make room for '|' separators. */
+ tot_size += (sizeof(char) * (c - 1));
+ char *cfg = zmalloc((sizeof(char) * tot_size) + 1);
+ memcpy(cfg, nodename, name_len);
+ char *sp = cfg + name_len;
+ *(sp++) = ':';
+ for (i = 0; i < c; i++) {
+ if (i > 0) *(sp++) = ',';
+ int slen = strlen(slots[i]);
+ memcpy(sp, slots[i], slen);
+ sp += slen;
+ }
+ *(sp++) = '\0';
+ node_configs[node_count - 1] = cfg;
+ }
+ zfree(slots);
+ }
+ if (node_count > 0) {
+ if (node_count > 1) {
+ qsort(node_configs, node_count, sizeof(char *),
+ clusterManagerSlotCompare);
+ }
+ signature = sdsempty();
+ for (i = 0; i < node_count; i++) {
+ if (i > 0) signature = sdscatprintf(signature, "%c", '|');
+ signature = sdscatfmt(signature, "%s", node_configs[i]);
+ }
+ }
+cleanup:
+ if (reply != NULL) freeReplyObject(reply);
+ if (node_configs != NULL) {
+ for (i = 0; i < node_count; i++) zfree(node_configs[i]);
+ zfree(node_configs);
+ }
+ return signature;
+}
+
+static int clusterManagerIsConfigConsistent(void) {
+ if (cluster_manager.nodes == NULL) return 0;
+ int consistent = (listLength(cluster_manager.nodes) <= 1);
+ // If the Cluster has only one node, it's always consistent
+ if (consistent) return 1;
+ sds first_cfg = NULL;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ sds cfg = clusterManagerGetConfigSignature(node);
+ if (cfg == NULL) {
+ consistent = 0;
+ break;
+ }
+ if (first_cfg == NULL) first_cfg = cfg;
+ else {
+ consistent = !sdscmp(first_cfg, cfg);
+ sdsfree(cfg);
+ if (!consistent) break;
+ }
+ }
+ if (first_cfg != NULL) sdsfree(first_cfg);
+ return consistent;
+}
+
+static list *clusterManagerGetDisconnectedLinks(clusterManagerNode *node) {
+ list *links = NULL;
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES");
+ if (!clusterManagerCheckRedisReply(node, reply, NULL)) goto cleanup;
+ links = listCreate();
+ char *lines = reply->str, *p, *line;
+ while ((p = strstr(lines, "\n")) != NULL) {
+ int i = 0;
+ *p = '\0';
+ line = lines;
+ lines = p + 1;
+ char *nodename = NULL, *addr = NULL, *flags = NULL, *link_status = NULL;
+ while ((p = strchr(line, ' ')) != NULL) {
+ *p = '\0';
+ char *token = line;
+ line = p + 1;
+ if (i == 0) nodename = token;
+ else if (i == 1) addr = token;
+ else if (i == 2) flags = token;
+ else if (i == 7) link_status = token;
+ else if (i == 8) break;
+ i++;
+ }
+ if (i == 7) link_status = line;
+ if (nodename == NULL || addr == NULL || flags == NULL ||
+ link_status == NULL) continue;
+ if (strstr(flags, "myself") != NULL) continue;
+ int disconnected = ((strstr(flags, "disconnected") != NULL) ||
+ (strstr(link_status, "disconnected")));
+ int handshaking = (strstr(flags, "handshake") != NULL);
+ if (disconnected || handshaking) {
+ clusterManagerLink *link = zmalloc(sizeof(*link));
+ link->node_name = sdsnew(nodename);
+ link->node_addr = sdsnew(addr);
+ link->connected = 0;
+ link->handshaking = handshaking;
+ listAddNodeTail(links, link);
+ }
+ }
+cleanup:
+ if (reply != NULL) freeReplyObject(reply);
+ return links;
+}
+
+/* Check for disconnected cluster links. It returns a dict whose keys
+ * are the unreachable node addresses and the values are lists of
+ * node addresses that cannot reach the unreachable node. */
+static dict *clusterManagerGetLinkStatus(void) {
+ if (cluster_manager.nodes == NULL) return NULL;
+ dict *status = dictCreate(&clusterManagerLinkDictType);
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ list *links = clusterManagerGetDisconnectedLinks(node);
+ if (links) {
+ listIter lli;
+ listNode *lln;
+ listRewind(links, &lli);
+ while ((lln = listNext(&lli)) != NULL) {
+ clusterManagerLink *link = lln->value;
+ list *from = NULL;
+ dictEntry *entry = dictFind(status, link->node_addr);
+ if (entry) from = dictGetVal(entry);
+ else {
+ from = listCreate();
+ dictAdd(status, sdsdup(link->node_addr), from);
+ }
+ sds myaddr = sdsempty();
+ myaddr = sdscatfmt(myaddr, "%s:%u", node->ip, node->port);
+ listAddNodeTail(from, myaddr);
+ sdsfree(link->node_name);
+ sdsfree(link->node_addr);
+ zfree(link);
+ }
+ listRelease(links);
+ }
+ }
+ return status;
+}
+
+/* Add the error string to cluster_manager.errors and print it. */
+static void clusterManagerOnError(sds err) {
+ if (cluster_manager.errors == NULL)
+ cluster_manager.errors = listCreate();
+ listAddNodeTail(cluster_manager.errors, err);
+ clusterManagerLogErr("%s\n", (char *) err);
+}
+
+/* Check the slots coverage of the cluster. The 'all_slots' argument must be
+ * and array of 16384 bytes. Every covered slot will be set to 1 in the
+ * 'all_slots' array. The function returns the total number if covered slots.*/
+static int clusterManagerGetCoveredSlots(char *all_slots) {
+ if (cluster_manager.nodes == NULL) return 0;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ int totslots = 0, i;
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) {
+ if (node->slots[i] && !all_slots[i]) {
+ all_slots[i] = 1;
+ totslots++;
+ }
+ }
+ }
+ return totslots;
+}
+
+static void clusterManagerPrintSlotsList(list *slots) {
+ clusterManagerNode n = {0};
+ listIter li;
+ listNode *ln;
+ listRewind(slots, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ int slot = atoi(ln->value);
+ if (slot >= 0 && slot < CLUSTER_MANAGER_SLOTS)
+ n.slots[slot] = 1;
+ }
+ sds nodeslist = clusterManagerNodeSlotsString(&n);
+ printf("%s\n", nodeslist);
+ sdsfree(nodeslist);
+}
+
+/* Return the node, among 'nodes' with the greatest number of keys
+ * in the specified slot. */
+static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes,
+ int slot,
+ char **err)
+{
+ clusterManagerNode *node = NULL;
+ int numkeys = 0;
+ listIter li;
+ listNode *ln;
+ listRewind(nodes, &li);
+ if (err) *err = NULL;
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate)
+ continue;
+ redisReply *r =
+ CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOT %d", slot);
+ int success = clusterManagerCheckRedisReply(n, r, err);
+ if (success) {
+ if (r->integer > numkeys || node == NULL) {
+ numkeys = r->integer;
+ node = n;
+ }
+ }
+ if (r != NULL) freeReplyObject(r);
+ /* If the reply contains errors */
+ if (!success) {
+ if (err != NULL && *err != NULL)
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err);
+ node = NULL;
+ break;
+ }
+ }
+ return node;
+}
+
+/* This function returns the master that has the least number of replicas
+ * in the cluster. If there are multiple masters with the same smaller
+ * number of replicas, one at random is returned. */
+
+static clusterManagerNode *clusterManagerNodeWithLeastReplicas(void) {
+ clusterManagerNode *node = NULL;
+ int lowest_count = 0;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ if (node == NULL || n->replicas_count < lowest_count) {
+ node = n;
+ lowest_count = n->replicas_count;
+ }
+ }
+ return node;
+}
+
+/* This function returns a random master node, return NULL if none */
+
+static clusterManagerNode *clusterManagerNodeMasterRandom(void) {
+ int master_count = 0;
+ int idx;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ master_count++;
+ }
+
+ assert(master_count > 0);
+ srand(time(NULL));
+ idx = rand() % master_count;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ if (!idx--) {
+ return n;
+ }
+ }
+ /* Can not be reached */
+ assert(0);
+}
+
+static int clusterManagerFixSlotsCoverage(char *all_slots) {
+ int force_fix = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS;
+
+ if (cluster_manager.unreachable_masters > 0 && !force_fix) {
+ clusterManagerLogWarn("*** Fixing slots coverage with %d unreachable masters is dangerous: redis-cli will assume that slots about masters that are not reachable are not covered, and will try to reassign them to the reachable nodes. This can cause data loss and is rarely what you want to do. If you really want to proceed use the --cluster-fix-with-unreachable-masters option.\n", cluster_manager.unreachable_masters);
+ exit(1);
+ }
+
+ int i, fixed = 0;
+ list *none = NULL, *single = NULL, *multi = NULL;
+ clusterManagerLogInfo(">>> Fixing slots coverage...\n");
+ for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) {
+ int covered = all_slots[i];
+ if (!covered) {
+ sds slot = sdsfromlonglong((long long) i);
+ list *slot_nodes = listCreate();
+ sds slot_nodes_str = sdsempty();
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate)
+ continue;
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(n,
+ "CLUSTER GETKEYSINSLOT %d %d", i, 1);
+ if (!clusterManagerCheckRedisReply(n, reply, NULL)) {
+ fixed = -1;
+ if (reply) freeReplyObject(reply);
+ goto cleanup;
+ }
+ assert(reply->type == REDIS_REPLY_ARRAY);
+ if (reply->elements > 0) {
+ listAddNodeTail(slot_nodes, n);
+ if (listLength(slot_nodes) > 1)
+ slot_nodes_str = sdscat(slot_nodes_str, ", ");
+ slot_nodes_str = sdscatfmt(slot_nodes_str,
+ "%s:%u", n->ip, n->port);
+ }
+ freeReplyObject(reply);
+ }
+ sdsfree(slot_nodes_str);
+ dictAdd(clusterManagerUncoveredSlots, slot, slot_nodes);
+ }
+ }
+
+ /* For every slot, take action depending on the actual condition:
+ * 1) No node has keys for this slot.
+ * 2) A single node has keys for this slot.
+ * 3) Multiple nodes have keys for this slot. */
+ none = listCreate();
+ single = listCreate();
+ multi = listCreate();
+ dictIterator *iter = dictGetIterator(clusterManagerUncoveredSlots);
+ dictEntry *entry;
+ while ((entry = dictNext(iter)) != NULL) {
+ sds slot = (sds) dictGetKey(entry);
+ list *nodes = (list *) dictGetVal(entry);
+ switch (listLength(nodes)){
+ case 0: listAddNodeTail(none, slot); break;
+ case 1: listAddNodeTail(single, slot); break;
+ default: listAddNodeTail(multi, slot); break;
+ }
+ }
+ dictReleaseIterator(iter);
+
+ /* we want explicit manual confirmation from users for all the fix cases */
+ int ignore_force = 1;
+
+ /* Handle case "1": keys in no node. */
+ if (listLength(none) > 0) {
+ printf("The following uncovered slots have no keys "
+ "across the cluster:\n");
+ clusterManagerPrintSlotsList(none);
+ if (confirmWithYes("Fix these slots by covering with a random node?",
+ ignore_force)) {
+ listIter li;
+ listNode *ln;
+ listRewind(none, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds slot = ln->value;
+ int s = atoi(slot);
+ clusterManagerNode *n = clusterManagerNodeMasterRandom();
+ clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n",
+ slot, n->ip, n->port);
+ if (!clusterManagerSetSlotOwner(n, s, 0)) {
+ fixed = -1;
+ goto cleanup;
+ }
+ /* Since CLUSTER ADDSLOTS succeeded, we also update the slot
+ * info into the node struct, in order to keep it synced */
+ n->slots[s] = 1;
+ fixed++;
+ }
+ }
+ }
+
+ /* Handle case "2": keys only in one node. */
+ if (listLength(single) > 0) {
+ printf("The following uncovered slots have keys in just one node:\n");
+ clusterManagerPrintSlotsList(single);
+ if (confirmWithYes("Fix these slots by covering with those nodes?",
+ ignore_force)) {
+ listIter li;
+ listNode *ln;
+ listRewind(single, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds slot = ln->value;
+ int s = atoi(slot);
+ dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot);
+ assert(entry != NULL);
+ list *nodes = (list *) dictGetVal(entry);
+ listNode *fn = listFirst(nodes);
+ assert(fn != NULL);
+ clusterManagerNode *n = fn->value;
+ clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n",
+ slot, n->ip, n->port);
+ if (!clusterManagerSetSlotOwner(n, s, 0)) {
+ fixed = -1;
+ goto cleanup;
+ }
+ /* Since CLUSTER ADDSLOTS succeeded, we also update the slot
+ * info into the node struct, in order to keep it synced */
+ n->slots[atoi(slot)] = 1;
+ fixed++;
+ }
+ }
+ }
+
+ /* Handle case "3": keys in multiple nodes. */
+ if (listLength(multi) > 0) {
+ printf("The following uncovered slots have keys in multiple nodes:\n");
+ clusterManagerPrintSlotsList(multi);
+ if (confirmWithYes("Fix these slots by moving keys "
+ "into a single node?", ignore_force)) {
+ listIter li;
+ listNode *ln;
+ listRewind(multi, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds slot = ln->value;
+ dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot);
+ assert(entry != NULL);
+ list *nodes = (list *) dictGetVal(entry);
+ int s = atoi(slot);
+ clusterManagerNode *target =
+ clusterManagerGetNodeWithMostKeysInSlot(nodes, s, NULL);
+ if (target == NULL) {
+ fixed = -1;
+ goto cleanup;
+ }
+ clusterManagerLogInfo(">>> Covering slot %s moving keys "
+ "to %s:%d\n", slot,
+ target->ip, target->port);
+ if (!clusterManagerSetSlotOwner(target, s, 1)) {
+ fixed = -1;
+ goto cleanup;
+ }
+ /* Since CLUSTER ADDSLOTS succeeded, we also update the slot
+ * info into the node struct, in order to keep it synced */
+ target->slots[atoi(slot)] = 1;
+ listIter nli;
+ listNode *nln;
+ listRewind(nodes, &nli);
+ while ((nln = listNext(&nli)) != NULL) {
+ clusterManagerNode *src = nln->value;
+ if (src == target) continue;
+ /* Assign the slot to target node in the source node. */
+ if (!clusterManagerSetSlot(src, target, s, "NODE", NULL))
+ fixed = -1;
+ if (fixed < 0) goto cleanup;
+ /* Set the source node in 'importing' state
+ * (even if we will actually migrate keys away)
+ * in order to avoid receiving redirections
+ * for MIGRATE. */
+ if (!clusterManagerSetSlot(src, target, s,
+ "IMPORTING", NULL)) fixed = -1;
+ if (fixed < 0) goto cleanup;
+ int opts = CLUSTER_MANAGER_OPT_VERBOSE |
+ CLUSTER_MANAGER_OPT_COLD;
+ if (!clusterManagerMoveSlot(src, target, s, opts, NULL)) {
+ fixed = -1;
+ goto cleanup;
+ }
+ if (!clusterManagerClearSlotStatus(src, s))
+ fixed = -1;
+ if (fixed < 0) goto cleanup;
+ }
+ fixed++;
+ }
+ }
+ }
+cleanup:
+ if (none) listRelease(none);
+ if (single) listRelease(single);
+ if (multi) listRelease(multi);
+ return fixed;
+}
+
+/* Slot 'slot' was found to be in importing or migrating state in one or
+ * more nodes. This function fixes this condition by migrating keys where
+ * it seems more sensible. */
+static int clusterManagerFixOpenSlot(int slot) {
+ int force_fix = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS;
+
+ if (cluster_manager.unreachable_masters > 0 && !force_fix) {
+ clusterManagerLogWarn("*** Fixing open slots with %d unreachable masters is dangerous: redis-cli will assume that slots about masters that are not reachable are not covered, and will try to reassign them to the reachable nodes. This can cause data loss and is rarely what you want to do. If you really want to proceed use the --cluster-fix-with-unreachable-masters option.\n", cluster_manager.unreachable_masters);
+ exit(1);
+ }
+
+ clusterManagerLogInfo(">>> Fixing open slot %d\n", slot);
+ /* Try to obtain the current slot owner, according to the current
+ * nodes configuration. */
+ int success = 1;
+ list *owners = listCreate(); /* List of nodes claiming some ownership.
+ it could be stating in the configuration
+ to have the node ownership, or just
+ holding keys for such slot. */
+ list *migrating = listCreate();
+ list *importing = listCreate();
+ sds migrating_str = sdsempty();
+ sds importing_str = sdsempty();
+ clusterManagerNode *owner = NULL; /* The obvious slot owner if any. */
+
+ /* Iterate all the nodes, looking for potential owners of this slot. */
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ if (n->slots[slot]) {
+ listAddNodeTail(owners, n);
+ } else {
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n,
+ "CLUSTER COUNTKEYSINSLOT %d", slot);
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (success && r->integer > 0) {
+ clusterManagerLogWarn("*** Found keys about slot %d "
+ "in non-owner node %s:%d!\n", slot,
+ n->ip, n->port);
+ listAddNodeTail(owners, n);
+ }
+ if (r) freeReplyObject(r);
+ if (!success) goto cleanup;
+ }
+ }
+
+ /* If we have only a single potential owner for this slot,
+ * set it as "owner". */
+ if (listLength(owners) == 1) owner = listFirst(owners)->value;
+
+ /* Scan the list of nodes again, in order to populate the
+ * list of nodes in importing or migrating state for
+ * this slot. */
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ int is_migrating = 0, is_importing = 0;
+ if (n->migrating) {
+ for (int i = 0; i < n->migrating_count; i += 2) {
+ sds migrating_slot = n->migrating[i];
+ if (atoi(migrating_slot) == slot) {
+ char *sep = (listLength(migrating) == 0 ? "" : ",");
+ migrating_str = sdscatfmt(migrating_str, "%s%s:%u",
+ sep, n->ip, n->port);
+ listAddNodeTail(migrating, n);
+ is_migrating = 1;
+ break;
+ }
+ }
+ }
+ if (!is_migrating && n->importing) {
+ for (int i = 0; i < n->importing_count; i += 2) {
+ sds importing_slot = n->importing[i];
+ if (atoi(importing_slot) == slot) {
+ char *sep = (listLength(importing) == 0 ? "" : ",");
+ importing_str = sdscatfmt(importing_str, "%s%s:%u",
+ sep, n->ip, n->port);
+ listAddNodeTail(importing, n);
+ is_importing = 1;
+ break;
+ }
+ }
+ }
+
+ /* If the node is neither migrating nor importing and it's not
+ * the owner, then is added to the importing list in case
+ * it has keys in the slot. */
+ if (!is_migrating && !is_importing && n != owner) {
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n,
+ "CLUSTER COUNTKEYSINSLOT %d", slot);
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (success && r->integer > 0) {
+ clusterManagerLogWarn("*** Found keys about slot %d "
+ "in node %s:%d!\n", slot, n->ip,
+ n->port);
+ char *sep = (listLength(importing) == 0 ? "" : ",");
+ importing_str = sdscatfmt(importing_str, "%s%s:%u",
+ sep, n->ip, n->port);
+ listAddNodeTail(importing, n);
+ }
+ if (r) freeReplyObject(r);
+ if (!success) goto cleanup;
+ }
+ }
+ if (sdslen(migrating_str) > 0)
+ printf("Set as migrating in: %s\n", migrating_str);
+ if (sdslen(importing_str) > 0)
+ printf("Set as importing in: %s\n", importing_str);
+
+ /* If there is no slot owner, set as owner the node with the biggest
+ * number of keys, among the set of migrating / importing nodes. */
+ if (owner == NULL) {
+ clusterManagerLogInfo(">>> No single clear owner for the slot, "
+ "selecting an owner by # of keys...\n");
+ owner = clusterManagerGetNodeWithMostKeysInSlot(cluster_manager.nodes,
+ slot, NULL);
+ // If we still don't have an owner, we can't fix it.
+ if (owner == NULL) {
+ clusterManagerLogErr("[ERR] Can't select a slot owner. "
+ "Impossible to fix.\n");
+ success = 0;
+ goto cleanup;
+ }
+
+ // Use ADDSLOTS to assign the slot.
+ clusterManagerLogWarn("*** Configuring %s:%d as the slot owner\n",
+ owner->ip, owner->port);
+ success = clusterManagerClearSlotStatus(owner, slot);
+ if (!success) goto cleanup;
+ success = clusterManagerSetSlotOwner(owner, slot, 0);
+ if (!success) goto cleanup;
+ /* Since CLUSTER ADDSLOTS succeeded, we also update the slot
+ * info into the node struct, in order to keep it synced */
+ owner->slots[slot] = 1;
+ /* Remove the owner from the list of migrating/importing
+ * nodes. */
+ clusterManagerRemoveNodeFromList(migrating, owner);
+ clusterManagerRemoveNodeFromList(importing, owner);
+ }
+
+ /* If there are multiple owners of the slot, we need to fix it
+ * so that a single node is the owner and all the other nodes
+ * are in importing state. Later the fix can be handled by one
+ * of the base cases above.
+ *
+ * Note that this case also covers multiple nodes having the slot
+ * in migrating state, since migrating is a valid state only for
+ * slot owners. */
+ if (listLength(owners) > 1) {
+ /* Owner cannot be NULL at this point, since if there are more owners,
+ * the owner has been set in the previous condition (owner == NULL). */
+ assert(owner != NULL);
+ listRewind(owners, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n == owner) continue;
+ success = clusterManagerDelSlot(n, slot, 1);
+ if (!success) goto cleanup;
+ n->slots[slot] = 0;
+ /* Assign the slot to the owner in the node 'n' configuration.' */
+ success = clusterManagerSetSlot(n, owner, slot, "node", NULL);
+ if (!success) goto cleanup;
+ success = clusterManagerSetSlot(n, owner, slot, "importing", NULL);
+ if (!success) goto cleanup;
+ /* Avoid duplicates. */
+ clusterManagerRemoveNodeFromList(importing, n);
+ listAddNodeTail(importing, n);
+ /* Ensure that the node is not in the migrating list. */
+ clusterManagerRemoveNodeFromList(migrating, n);
+ }
+ }
+ int move_opts = CLUSTER_MANAGER_OPT_VERBOSE;
+
+ /* Case 1: The slot is in migrating state in one node, and in
+ * importing state in 1 node. That's trivial to address. */
+ if (listLength(migrating) == 1 && listLength(importing) == 1) {
+ clusterManagerNode *src = listFirst(migrating)->value;
+ clusterManagerNode *dst = listFirst(importing)->value;
+ clusterManagerLogInfo(">>> Case 1: Moving slot %d from "
+ "%s:%d to %s:%d\n", slot,
+ src->ip, src->port, dst->ip, dst->port);
+ move_opts |= CLUSTER_MANAGER_OPT_UPDATE;
+ success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL);
+ }
+
+ /* Case 2: There are multiple nodes that claim the slot as importing,
+ * they probably got keys about the slot after a restart so opened
+ * the slot. In this case we just move all the keys to the owner
+ * according to the configuration. */
+ else if (listLength(migrating) == 0 && listLength(importing) > 0) {
+ clusterManagerLogInfo(">>> Case 2: Moving all the %d slot keys to its "
+ "owner %s:%d\n", slot, owner->ip, owner->port);
+ move_opts |= CLUSTER_MANAGER_OPT_COLD;
+ listRewind(importing, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n == owner) continue;
+ success = clusterManagerMoveSlot(n, owner, slot, move_opts, NULL);
+ if (!success) goto cleanup;
+ clusterManagerLogInfo(">>> Setting %d as STABLE in "
+ "%s:%d\n", slot, n->ip, n->port);
+ success = clusterManagerClearSlotStatus(n, slot);
+ if (!success) goto cleanup;
+ }
+ /* Since the slot has been moved in "cold" mode, ensure that all the
+ * other nodes update their own configuration about the slot itself. */
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n == owner) continue;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ success = clusterManagerSetSlot(n, owner, slot, "NODE", NULL);
+ if (!success) goto cleanup;
+ }
+ }
+
+ /* Case 3: The slot is in migrating state in one node but multiple
+ * other nodes claim to be in importing state and don't have any key in
+ * the slot. We search for the importing node having the same ID as
+ * the destination node of the migrating node.
+ * In that case we move the slot from the migrating node to this node and
+ * we close the importing states on all the other importing nodes.
+ * If no importing node has the same ID as the destination node of the
+ * migrating node, the slot's state is closed on both the migrating node
+ * and the importing nodes. */
+ else if (listLength(migrating) == 1 && listLength(importing) > 1) {
+ int try_to_fix = 1;
+ clusterManagerNode *src = listFirst(migrating)->value;
+ clusterManagerNode *dst = NULL;
+ sds target_id = NULL;
+ for (int i = 0; i < src->migrating_count; i += 2) {
+ sds migrating_slot = src->migrating[i];
+ if (atoi(migrating_slot) == slot) {
+ target_id = src->migrating[i + 1];
+ break;
+ }
+ }
+ assert(target_id != NULL);
+ listIter li;
+ listNode *ln;
+ listRewind(importing, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ int count = clusterManagerCountKeysInSlot(n, slot);
+ if (count > 0) {
+ try_to_fix = 0;
+ break;
+ }
+ if (strcmp(n->name, target_id) == 0) dst = n;
+ }
+ if (!try_to_fix) goto unhandled_case;
+ if (dst != NULL) {
+ clusterManagerLogInfo(">>> Case 3: Moving slot %d from %s:%d to "
+ "%s:%d and closing it on all the other "
+ "importing nodes.\n",
+ slot, src->ip, src->port,
+ dst->ip, dst->port);
+ /* Move the slot to the destination node. */
+ success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL);
+ if (!success) goto cleanup;
+ /* Close slot on all the other importing nodes. */
+ listRewind(importing, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (dst == n) continue;
+ success = clusterManagerClearSlotStatus(n, slot);
+ if (!success) goto cleanup;
+ }
+ } else {
+ clusterManagerLogInfo(">>> Case 3: Closing slot %d on both "
+ "migrating and importing nodes.\n", slot);
+ /* Close the slot on both the migrating node and the importing
+ * nodes. */
+ success = clusterManagerClearSlotStatus(src, slot);
+ if (!success) goto cleanup;
+ listRewind(importing, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ success = clusterManagerClearSlotStatus(n, slot);
+ if (!success) goto cleanup;
+ }
+ }
+ } else {
+ int try_to_close_slot = (listLength(importing) == 0 &&
+ listLength(migrating) == 1);
+ if (try_to_close_slot) {
+ clusterManagerNode *n = listFirst(migrating)->value;
+ if (!owner || owner != n) {
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n,
+ "CLUSTER GETKEYSINSLOT %d %d", slot, 10);
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (r) {
+ if (success) try_to_close_slot = (r->elements == 0);
+ freeReplyObject(r);
+ }
+ if (!success) goto cleanup;
+ }
+ }
+ /* Case 4: There are no slots claiming to be in importing state, but
+ * there is a migrating node that actually don't have any key or is the
+ * slot owner. We can just close the slot, probably a reshard
+ * interrupted in the middle. */
+ if (try_to_close_slot) {
+ clusterManagerNode *n = listFirst(migrating)->value;
+ clusterManagerLogInfo(">>> Case 4: Closing slot %d on %s:%d\n",
+ slot, n->ip, n->port);
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s",
+ slot, "STABLE");
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (r) freeReplyObject(r);
+ if (!success) goto cleanup;
+ } else {
+unhandled_case:
+ success = 0;
+ clusterManagerLogErr("[ERR] Sorry, redis-cli can't fix this slot "
+ "yet (work in progress). Slot is set as "
+ "migrating in %s, as importing in %s, "
+ "owner is %s:%d\n", migrating_str,
+ importing_str, owner->ip, owner->port);
+ }
+ }
+cleanup:
+ listRelease(owners);
+ listRelease(migrating);
+ listRelease(importing);
+ sdsfree(migrating_str);
+ sdsfree(importing_str);
+ return success;
+}
+
+static int clusterManagerFixMultipleSlotOwners(int slot, list *owners) {
+ clusterManagerLogInfo(">>> Fixing multiple owners for slot %d...\n", slot);
+ int success = 0;
+ assert(listLength(owners) > 1);
+ clusterManagerNode *owner = clusterManagerGetNodeWithMostKeysInSlot(owners,
+ slot,
+ NULL);
+ if (!owner) owner = listFirst(owners)->value;
+ clusterManagerLogInfo(">>> Setting slot %d owner: %s:%d\n",
+ slot, owner->ip, owner->port);
+ /* Set the slot owner. */
+ if (!clusterManagerSetSlotOwner(owner, slot, 0)) return 0;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ /* Update configuration in all the other master nodes by assigning the slot
+ * itself to the new owner, and by eventually migrating keys if the node
+ * has keys for the slot. */
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n == owner) continue;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ int count = clusterManagerCountKeysInSlot(n, slot);
+ success = (count >= 0);
+ if (!success) break;
+ clusterManagerDelSlot(n, slot, 1);
+ if (!clusterManagerSetSlot(n, owner, slot, "node", NULL)) return 0;
+ if (count > 0) {
+ int opts = CLUSTER_MANAGER_OPT_VERBOSE |
+ CLUSTER_MANAGER_OPT_COLD;
+ success = clusterManagerMoveSlot(n, owner, slot, opts, NULL);
+ if (!success) break;
+ }
+ }
+ return success;
+}
+
+static int clusterManagerCheckCluster(int quiet) {
+ listNode *ln = listFirst(cluster_manager.nodes);
+ if (!ln) return 0;
+ clusterManagerNode *node = ln->value;
+ clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n",
+ node->ip, node->port);
+ int result = 1, consistent = 0;
+ int do_fix = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_FIX;
+ if (!quiet) clusterManagerShowNodes();
+ consistent = clusterManagerIsConfigConsistent();
+ if (!consistent) {
+ sds err = sdsnew("[ERR] Nodes don't agree about configuration!");
+ clusterManagerOnError(err);
+ result = 0;
+ } else {
+ clusterManagerLogOk("[OK] All nodes agree about slots "
+ "configuration.\n");
+ }
+ /* Check open slots */
+ clusterManagerLogInfo(">>> Check for open slots...\n");
+ listIter li;
+ listRewind(cluster_manager.nodes, &li);
+ int i;
+ dict *open_slots = NULL;
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->migrating != NULL) {
+ if (open_slots == NULL)
+ open_slots = dictCreate(&clusterManagerDictType);
+ sds errstr = sdsempty();
+ errstr = sdscatprintf(errstr,
+ "[WARNING] Node %s:%d has slots in "
+ "migrating state ",
+ n->ip,
+ n->port);
+ for (i = 0; i < n->migrating_count; i += 2) {
+ sds slot = n->migrating[i];
+ dictReplace(open_slots, slot, sdsdup(n->migrating[i + 1]));
+ char *fmt = (i > 0 ? ",%S" : "%S");
+ errstr = sdscatfmt(errstr, fmt, slot);
+ }
+ errstr = sdscat(errstr, ".");
+ clusterManagerOnError(errstr);
+ }
+ if (n->importing != NULL) {
+ if (open_slots == NULL)
+ open_slots = dictCreate(&clusterManagerDictType);
+ sds errstr = sdsempty();
+ errstr = sdscatprintf(errstr,
+ "[WARNING] Node %s:%d has slots in "
+ "importing state ",
+ n->ip,
+ n->port);
+ for (i = 0; i < n->importing_count; i += 2) {
+ sds slot = n->importing[i];
+ dictReplace(open_slots, slot, sdsdup(n->importing[i + 1]));
+ char *fmt = (i > 0 ? ",%S" : "%S");
+ errstr = sdscatfmt(errstr, fmt, slot);
+ }
+ errstr = sdscat(errstr, ".");
+ clusterManagerOnError(errstr);
+ }
+ }
+ if (open_slots != NULL) {
+ result = 0;
+ dictIterator *iter = dictGetIterator(open_slots);
+ dictEntry *entry;
+ sds errstr = sdsnew("[WARNING] The following slots are open: ");
+ i = 0;
+ while ((entry = dictNext(iter)) != NULL) {
+ sds slot = (sds) dictGetKey(entry);
+ char *fmt = (i++ > 0 ? ",%S" : "%S");
+ errstr = sdscatfmt(errstr, fmt, slot);
+ }
+ clusterManagerLogErr("%s.\n", (char *) errstr);
+ sdsfree(errstr);
+ if (do_fix) {
+ /* Fix open slots. */
+ dictReleaseIterator(iter);
+ iter = dictGetIterator(open_slots);
+ while ((entry = dictNext(iter)) != NULL) {
+ sds slot = (sds) dictGetKey(entry);
+ result = clusterManagerFixOpenSlot(atoi(slot));
+ if (!result) break;
+ }
+ }
+ dictReleaseIterator(iter);
+ dictRelease(open_slots);
+ }
+ clusterManagerLogInfo(">>> Check slots coverage...\n");
+ char slots[CLUSTER_MANAGER_SLOTS];
+ memset(slots, 0, CLUSTER_MANAGER_SLOTS);
+ int coverage = clusterManagerGetCoveredSlots(slots);
+ if (coverage == CLUSTER_MANAGER_SLOTS) {
+ clusterManagerLogOk("[OK] All %d slots covered.\n",
+ CLUSTER_MANAGER_SLOTS);
+ } else {
+ sds err = sdsempty();
+ err = sdscatprintf(err, "[ERR] Not all %d slots are "
+ "covered by nodes.\n",
+ CLUSTER_MANAGER_SLOTS);
+ clusterManagerOnError(err);
+ result = 0;
+ if (do_fix/* && result*/) {
+ dictType dtype = clusterManagerDictType;
+ dtype.keyDestructor = dictSdsDestructor;
+ dtype.valDestructor = dictListDestructor;
+ clusterManagerUncoveredSlots = dictCreate(&dtype);
+ int fixed = clusterManagerFixSlotsCoverage(slots);
+ if (fixed > 0) result = 1;
+ }
+ }
+ int search_multiple_owners = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_CHECK_OWNERS;
+ if (search_multiple_owners) {
+ /* Check whether there are multiple owners, even when slots are
+ * fully covered and there are no open slots. */
+ clusterManagerLogInfo(">>> Check for multiple slot owners...\n");
+ int slot = 0, slots_with_multiple_owners = 0;
+ for (; slot < CLUSTER_MANAGER_SLOTS; slot++) {
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ list *owners = listCreate();
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ if (n->slots[slot]) listAddNodeTail(owners, n);
+ else {
+ /* Nodes having keys for the slot will be considered
+ * owners too. */
+ int count = clusterManagerCountKeysInSlot(n, slot);
+ if (count > 0) listAddNodeTail(owners, n);
+ }
+ }
+ if (listLength(owners) > 1) {
+ result = 0;
+ clusterManagerLogErr("[WARNING] Slot %d has %d owners:\n",
+ slot, listLength(owners));
+ listRewind(owners, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ clusterManagerLogErr(" %s:%d\n", n->ip, n->port);
+ }
+ slots_with_multiple_owners++;
+ if (do_fix) {
+ result = clusterManagerFixMultipleSlotOwners(slot, owners);
+ if (!result) {
+ clusterManagerLogErr("Failed to fix multiple owners "
+ "for slot %d\n", slot);
+ listRelease(owners);
+ break;
+ } else slots_with_multiple_owners--;
+ }
+ }
+ listRelease(owners);
+ }
+ if (slots_with_multiple_owners == 0)
+ clusterManagerLogOk("[OK] No multiple owners found.\n");
+ }
+ return result;
+}
+
+static clusterManagerNode *clusterNodeForResharding(char *id,
+ clusterManagerNode *target,
+ int *raise_err)
+{
+ clusterManagerNode *node = NULL;
+ const char *invalid_node_msg = "*** The specified node (%s) is not known "
+ "or not a master, please retry.\n";
+ node = clusterManagerNodeByName(id);
+ *raise_err = 0;
+ if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) {
+ clusterManagerLogErr(invalid_node_msg, id);
+ *raise_err = 1;
+ return NULL;
+ } else if (target != NULL) {
+ if (!strcmp(node->name, target->name)) {
+ clusterManagerLogErr( "*** It is not possible to use "
+ "the target node as "
+ "source node.\n");
+ return NULL;
+ }
+ }
+ return node;
+}
+
+static list *clusterManagerComputeReshardTable(list *sources, int numslots) {
+ list *moved = listCreate();
+ int src_count = listLength(sources), i = 0, tot_slots = 0, j;
+ clusterManagerNode **sorted = zmalloc(src_count * sizeof(*sorted));
+ listIter li;
+ listNode *ln;
+ listRewind(sources, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ tot_slots += node->slots_count;
+ sorted[i++] = node;
+ }
+ qsort(sorted, src_count, sizeof(clusterManagerNode *),
+ clusterManagerSlotCountCompareDesc);
+ for (i = 0; i < src_count; i++) {
+ clusterManagerNode *node = sorted[i];
+ float n = ((float) numslots / tot_slots * node->slots_count);
+ if (i == 0) n = ceil(n);
+ else n = floor(n);
+ int max = (int) n, count = 0;
+ for (j = 0; j < CLUSTER_MANAGER_SLOTS; j++) {
+ int slot = node->slots[j];
+ if (!slot) continue;
+ if (count >= max || (int)listLength(moved) >= numslots) break;
+ clusterManagerReshardTableItem *item = zmalloc(sizeof(*item));
+ item->source = node;
+ item->slot = j;
+ listAddNodeTail(moved, item);
+ count++;
+ }
+ }
+ zfree(sorted);
+ return moved;
+}
+
+static void clusterManagerShowReshardTable(list *table) {
+ listIter li;
+ listNode *ln;
+ listRewind(table, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerReshardTableItem *item = ln->value;
+ clusterManagerNode *n = item->source;
+ printf(" Moving slot %d from %s\n", item->slot, (char *) n->name);
+ }
+}
+
+static void clusterManagerReleaseReshardTable(list *table) {
+ if (table != NULL) {
+ listIter li;
+ listNode *ln;
+ listRewind(table, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerReshardTableItem *item = ln->value;
+ zfree(item);
+ }
+ listRelease(table);
+ }
+}
+
+static void clusterManagerLog(int level, const char* fmt, ...) {
+ int use_colors =
+ (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR);
+ if (use_colors) {
+ printf("\033[");
+ switch (level) {
+ case CLUSTER_MANAGER_LOG_LVL_INFO: printf(LOG_COLOR_BOLD); break;
+ case CLUSTER_MANAGER_LOG_LVL_WARN: printf(LOG_COLOR_YELLOW); break;
+ case CLUSTER_MANAGER_LOG_LVL_ERR: printf(LOG_COLOR_RED); break;
+ case CLUSTER_MANAGER_LOG_LVL_SUCCESS: printf(LOG_COLOR_GREEN); break;
+ default: printf(LOG_COLOR_RESET); break;
+ }
+ }
+ va_list ap;
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ if (use_colors) printf("\033[" LOG_COLOR_RESET);
+}
+
+static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array,
+ int alloc_len)
+{
+ array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));
+ array->alloc = array->nodes;
+ array->len = alloc_len;
+ array->count = 0;
+}
+
+/* Reset array->nodes to the original array allocation and re-count non-NULL
+ * nodes. */
+static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array) {
+ if (array->nodes > array->alloc) {
+ array->len = array->nodes - array->alloc;
+ array->nodes = array->alloc;
+ array->count = 0;
+ int i = 0;
+ for(; i < array->len; i++) {
+ if (array->nodes[i] != NULL) array->count++;
+ }
+ }
+}
+
+/* Shift array->nodes and store the shifted node into 'nodeptr'. */
+static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array,
+ clusterManagerNode **nodeptr)
+{
+ assert(array->len > 0);
+ /* If the first node to be shifted is not NULL, decrement count. */
+ if (*array->nodes != NULL) array->count--;
+ /* Store the first node to be shifted into 'nodeptr'. */
+ *nodeptr = *array->nodes;
+ /* Shift the nodes array and decrement length. */
+ array->nodes++;
+ array->len--;
+}
+
+static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array,
+ clusterManagerNode *node)
+{
+ assert(array->len > 0);
+ assert(node != NULL);
+ assert(array->count < array->len);
+ array->nodes[array->count++] = node;
+}
+
+static void clusterManagerPrintNotEmptyNodeError(clusterManagerNode *node,
+ char *err)
+{
+ char *msg;
+ if (err) msg = err;
+ else {
+ msg = "is not empty. Either the node already knows other "
+ "nodes (check with CLUSTER NODES) or contains some "
+ "key in database 0.";
+ }
+ clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg);
+}
+
+static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node,
+ char *err)
+{
+ char *msg = (err ? err : "is not configured as a cluster node.");
+ clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg);
+}
+
+/* Execute redis-cli in Cluster Manager mode */
+static void clusterManagerMode(clusterManagerCommandProc *proc) {
+ int argc = config.cluster_manager_command.argc;
+ char **argv = config.cluster_manager_command.argv;
+ cluster_manager.nodes = NULL;
+ int success = proc(argc, argv);
+
+ /* Initialized in createClusterManagerCommand. */
+ if (config.stdin_lastarg) {
+ zfree(config.cluster_manager_command.argv);
+ sdsfree(config.cluster_manager_command.stdin_arg);
+ } else if (config.stdin_tag_arg) {
+ sdsfree(config.cluster_manager_command.stdin_arg);
+ }
+ freeClusterManager();
+
+ exit(success ? 0 : 1);
+}
+
+/* Cluster Manager Commands */
+
+static int clusterManagerCommandCreate(int argc, char **argv) {
+ int i, j, success = 1;
+ cluster_manager.nodes = listCreate();
+ for (i = 0; i < argc; i++) {
+ char *addr = argv[i];
+ char *ip = NULL;
+ int port = 0;
+ if (!parseClusterNodeAddress(addr, &ip, &port, NULL)) {
+ fprintf(stderr, "Invalid address format: %s\n", addr);
+ return 0;
+ }
+
+ clusterManagerNode *node = clusterManagerNewNode(ip, port, 0);
+ if (!clusterManagerNodeConnect(node)) {
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ char *err = NULL;
+ if (!clusterManagerNodeIsCluster(node, &err)) {
+ clusterManagerPrintNotClusterNodeError(node, err);
+ if (err) zfree(err);
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ err = NULL;
+ if (!clusterManagerNodeLoadInfo(node, 0, &err)) {
+ if (err) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err);
+ zfree(err);
+ }
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ err = NULL;
+ if (!clusterManagerNodeIsEmpty(node, &err)) {
+ clusterManagerPrintNotEmptyNodeError(node, err);
+ if (err) zfree(err);
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ listAddNodeTail(cluster_manager.nodes, node);
+ }
+ int node_len = cluster_manager.nodes->len;
+ int replicas = config.cluster_manager_command.replicas;
+ int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas);
+ if (masters_count < 3) {
+ clusterManagerLogErr(
+ "*** ERROR: Invalid configuration for cluster creation.\n"
+ "*** Redis Cluster requires at least 3 master nodes.\n"
+ "*** This is not possible with %d nodes and %d replicas per node.",
+ node_len, replicas);
+ clusterManagerLogErr("\n*** At least %d nodes are required.\n",
+ 3 * (replicas + 1));
+ return 0;
+ }
+ clusterManagerLogInfo(">>> Performing hash slots allocation "
+ "on %d nodes...\n", node_len);
+ int interleaved_len = 0, ip_count = 0;
+ clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved));
+ char **ips = zcalloc(node_len * sizeof(char*));
+ clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes));
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ int found = 0;
+ for (i = 0; i < ip_count; i++) {
+ char *ip = ips[i];
+ if (!strcmp(ip, n->ip)) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ ips[ip_count++] = n->ip;
+ }
+ clusterManagerNodeArray *node_array = &(ip_nodes[i]);
+ if (node_array->nodes == NULL)
+ clusterManagerNodeArrayInit(node_array, node_len);
+ clusterManagerNodeArrayAdd(node_array, n);
+ }
+ while (interleaved_len < node_len) {
+ for (i = 0; i < ip_count; i++) {
+ clusterManagerNodeArray *node_array = &(ip_nodes[i]);
+ if (node_array->count > 0) {
+ clusterManagerNode *n = NULL;
+ clusterManagerNodeArrayShift(node_array, &n);
+ interleaved[interleaved_len++] = n;
+ }
+ }
+ }
+ clusterManagerNode **masters = interleaved;
+ interleaved += masters_count;
+ interleaved_len -= masters_count;
+ float slots_per_node = CLUSTER_MANAGER_SLOTS / (float) masters_count;
+ long first = 0;
+ float cursor = 0.0f;
+ for (i = 0; i < masters_count; i++) {
+ clusterManagerNode *master = masters[i];
+ long last = lround(cursor + slots_per_node - 1);
+ if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1))
+ last = CLUSTER_MANAGER_SLOTS - 1;
+ if (last < first) last = first;
+ printf("Master[%d] -> Slots %ld - %ld\n", i, first, last);
+ master->slots_count = 0;
+ for (j = first; j <= last; j++) {
+ master->slots[j] = 1;
+ master->slots_count++;
+ }
+ master->dirty = 1;
+ first = last + 1;
+ cursor += slots_per_node;
+ }
+
+ /* Rotating the list sometimes helps to get better initial
+ * anti-affinity before the optimizer runs. */
+ clusterManagerNode *first_node = interleaved[0];
+ for (i = 0; i < (interleaved_len - 1); i++)
+ interleaved[i] = interleaved[i + 1];
+ interleaved[interleaved_len - 1] = first_node;
+ int assign_unused = 0, available_count = interleaved_len;
+assign_replicas:
+ for (i = 0; i < masters_count; i++) {
+ clusterManagerNode *master = masters[i];
+ int assigned_replicas = 0;
+ while (assigned_replicas < replicas) {
+ if (available_count == 0) break;
+ clusterManagerNode *found = NULL, *slave = NULL;
+ int firstNodeIdx = -1;
+ for (j = 0; j < interleaved_len; j++) {
+ clusterManagerNode *n = interleaved[j];
+ if (n == NULL) continue;
+ if (strcmp(n->ip, master->ip)) {
+ found = n;
+ interleaved[j] = NULL;
+ break;
+ }
+ if (firstNodeIdx < 0) firstNodeIdx = j;
+ }
+ if (found) slave = found;
+ else if (firstNodeIdx >= 0) {
+ slave = interleaved[firstNodeIdx];
+ interleaved_len -= (firstNodeIdx + 1);
+ interleaved += (firstNodeIdx + 1);
+ }
+ if (slave != NULL) {
+ assigned_replicas++;
+ available_count--;
+ if (slave->replicate) sdsfree(slave->replicate);
+ slave->replicate = sdsnew(master->name);
+ slave->dirty = 1;
+ } else break;
+ printf("Adding replica %s:%d to %s:%d\n", slave->ip, slave->port,
+ master->ip, master->port);
+ if (assign_unused) break;
+ }
+ }
+ if (!assign_unused && available_count > 0) {
+ assign_unused = 1;
+ printf("Adding extra replicas...\n");
+ goto assign_replicas;
+ }
+ for (i = 0; i < ip_count; i++) {
+ clusterManagerNodeArray *node_array = ip_nodes + i;
+ clusterManagerNodeArrayReset(node_array);
+ }
+ clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count);
+ clusterManagerShowNodes();
+ int ignore_force = 0;
+ if (confirmWithYes("Can I set the above configuration?", ignore_force)) {
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ char *err = NULL;
+ int flushed = clusterManagerFlushNodeConfig(node, &err);
+ if (!flushed && node->dirty && !node->replicate) {
+ if (err != NULL) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err);
+ zfree(err);
+ }
+ success = 0;
+ goto cleanup;
+ } else if (err != NULL) zfree(err);
+ }
+ clusterManagerLogInfo(">>> Nodes configuration updated\n");
+ clusterManagerLogInfo(">>> Assign a different config epoch to "
+ "each node\n");
+ int config_epoch = 1;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ redisReply *reply = NULL;
+ reply = CLUSTER_MANAGER_COMMAND(node,
+ "cluster set-config-epoch %d",
+ config_epoch++);
+ if (reply != NULL) freeReplyObject(reply);
+ }
+ clusterManagerLogInfo(">>> Sending CLUSTER MEET messages to join "
+ "the cluster\n");
+ clusterManagerNode *first = NULL;
+ char first_ip[NET_IP_STR_LEN]; /* first->ip may be a hostname */
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ if (first == NULL) {
+ first = node;
+ /* Although hiredis supports connecting to a hostname, CLUSTER
+ * MEET requires an IP address, so we do a DNS lookup here. */
+ if (anetResolve(NULL, first->ip, first_ip, sizeof(first_ip), ANET_NONE)
+ == ANET_ERR)
+ {
+ fprintf(stderr, "Invalid IP address or hostname specified: %s\n", first->ip);
+ success = 0;
+ goto cleanup;
+ }
+ continue;
+ }
+ redisReply *reply = NULL;
+ if (first->bus_port == 0 || (first->bus_port == first->port + CLUSTER_MANAGER_PORT_INCR)) {
+ /* CLUSTER MEET bus-port parameter was added in 4.0.
+ * So if (bus_port == 0) or (bus_port == port + CLUSTER_MANAGER_PORT_INCR),
+ * we just call CLUSTER MEET with 2 arguments, using the old form. */
+ reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d",
+ first_ip, first->port);
+ } else {
+ reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d %d",
+ first_ip, first->port, first->bus_port);
+ }
+ int is_err = 0;
+ if (reply != NULL) {
+ if ((is_err = reply->type == REDIS_REPLY_ERROR))
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, reply->str);
+ freeReplyObject(reply);
+ } else {
+ is_err = 1;
+ fprintf(stderr, "Failed to send CLUSTER MEET command.\n");
+ }
+ if (is_err) {
+ success = 0;
+ goto cleanup;
+ }
+ }
+ /* Give one second for the join to start, in order to avoid that
+ * waiting for cluster join will find all the nodes agree about
+ * the config as they are still empty with unassigned slots. */
+ sleep(1);
+ clusterManagerWaitForClusterJoin();
+ /* Useful for the replicas */
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ if (!node->dirty) continue;
+ char *err = NULL;
+ int flushed = clusterManagerFlushNodeConfig(node, &err);
+ if (!flushed && !node->replicate) {
+ if (err != NULL) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err);
+ zfree(err);
+ }
+ success = 0;
+ goto cleanup;
+ } else if (err != NULL) {
+ zfree(err);
+ }
+ }
+ // Reset Nodes
+ listRewind(cluster_manager.nodes, &li);
+ clusterManagerNode *first_node = NULL;
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ if (!first_node) first_node = node;
+ else freeClusterManagerNode(node);
+ }
+ listEmpty(cluster_manager.nodes);
+ if (!clusterManagerLoadInfoFromNode(first_node)) {
+ success = 0;
+ goto cleanup;
+ }
+ clusterManagerCheckCluster(0);
+ }
+cleanup:
+ /* Free everything */
+ zfree(masters);
+ zfree(ips);
+ for (i = 0; i < node_len; i++) {
+ clusterManagerNodeArray *node_array = ip_nodes + i;
+ CLUSTER_MANAGER_NODE_ARRAY_FREE(node_array);
+ }
+ zfree(ip_nodes);
+ return success;
+}
+
+static int clusterManagerCommandAddNode(int argc, char **argv) {
+ int success = 1;
+ redisReply *reply = NULL;
+ redisReply *function_restore_reply = NULL;
+ redisReply *function_list_reply = NULL;
+ char *ref_ip = NULL, *ip = NULL;
+ int ref_port = 0, port = 0;
+ if (!getClusterHostFromCmdArgs(argc - 1, argv + 1, &ref_ip, &ref_port))
+ goto invalid_args;
+ if (!getClusterHostFromCmdArgs(1, argv, &ip, &port))
+ goto invalid_args;
+ clusterManagerLogInfo(">>> Adding node %s:%d to cluster %s:%d\n", ip, port,
+ ref_ip, ref_port);
+ // Check the existing cluster
+ clusterManagerNode *refnode = clusterManagerNewNode(ref_ip, ref_port, 0);
+ if (!clusterManagerLoadInfoFromNode(refnode)) return 0;
+ if (!clusterManagerCheckCluster(0)) return 0;
+
+ /* If --cluster-master-id was specified, try to resolve it now so that we
+ * abort before starting with the node configuration. */
+ clusterManagerNode *master_node = NULL;
+ if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVE) {
+ char *master_id = config.cluster_manager_command.master_id;
+ if (master_id != NULL) {
+ master_node = clusterManagerNodeByName(master_id);
+ if (master_node == NULL) {
+ clusterManagerLogErr("[ERR] No such master ID %s\n", master_id);
+ return 0;
+ }
+ } else {
+ master_node = clusterManagerNodeWithLeastReplicas();
+ assert(master_node != NULL);
+ printf("Automatically selected master %s:%d\n", master_node->ip,
+ master_node->port);
+ }
+ }
+
+ // Add the new node
+ clusterManagerNode *new_node = clusterManagerNewNode(ip, port, 0);
+ int added = 0;
+ if (!clusterManagerNodeConnect(new_node)) {
+ clusterManagerLogErr("[ERR] Sorry, can't connect to node %s:%d\n",
+ ip, port);
+ success = 0;
+ goto cleanup;
+ }
+ char *err = NULL;
+ if (!(success = clusterManagerNodeIsCluster(new_node, &err))) {
+ clusterManagerPrintNotClusterNodeError(new_node, err);
+ if (err) zfree(err);
+ goto cleanup;
+ }
+ if (!clusterManagerNodeLoadInfo(new_node, 0, &err)) {
+ if (err) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(new_node, err);
+ zfree(err);
+ }
+ success = 0;
+ goto cleanup;
+ }
+ if (!(success = clusterManagerNodeIsEmpty(new_node, &err))) {
+ clusterManagerPrintNotEmptyNodeError(new_node, err);
+ if (err) zfree(err);
+ goto cleanup;
+ }
+ clusterManagerNode *first = listFirst(cluster_manager.nodes)->value;
+ listAddNodeTail(cluster_manager.nodes, new_node);
+ added = 1;
+
+ if (!master_node) {
+ /* Send functions to the new node, if new node is a replica it will get the functions from its primary. */
+ clusterManagerLogInfo(">>> Getting functions from cluster\n");
+ reply = CLUSTER_MANAGER_COMMAND(refnode, "FUNCTION DUMP");
+ if (!clusterManagerCheckRedisReply(refnode, reply, &err)) {
+ clusterManagerLogInfo(">>> Failed retrieving Functions from the cluster, "
+ "skip this step as Redis version do not support function command (error = '%s')\n", err? err : "NULL reply");
+ if (err) zfree(err);
+ } else {
+ assert(reply->type == REDIS_REPLY_STRING);
+ clusterManagerLogInfo(">>> Send FUNCTION LIST to %s:%d to verify there is no functions in it\n", ip, port);
+ function_list_reply = CLUSTER_MANAGER_COMMAND(new_node, "FUNCTION LIST");
+ if (!clusterManagerCheckRedisReply(new_node, function_list_reply, &err)) {
+ clusterManagerLogErr(">>> Failed on CLUSTER LIST (error = '%s')\r\n", err? err : "NULL reply");
+ if (err) zfree(err);
+ success = 0;
+ goto cleanup;
+ }
+ assert(function_list_reply->type == REDIS_REPLY_ARRAY);
+ if (function_list_reply->elements > 0) {
+ clusterManagerLogErr(">>> New node already contains functions and can not be added to the cluster. Use FUNCTION FLUSH and try again.\r\n");
+ success = 0;
+ goto cleanup;
+ }
+ clusterManagerLogInfo(">>> Send FUNCTION RESTORE to %s:%d\n", ip, port);
+ function_restore_reply = CLUSTER_MANAGER_COMMAND(new_node, "FUNCTION RESTORE %b", reply->str, reply->len);
+ if (!clusterManagerCheckRedisReply(new_node, function_restore_reply, &err)) {
+ clusterManagerLogErr(">>> Failed loading functions to the new node (error = '%s')\r\n", err? err : "NULL reply");
+ if (err) zfree(err);
+ success = 0;
+ goto cleanup;
+ }
+ }
+ }
+
+ if (reply) freeReplyObject(reply);
+
+ // Send CLUSTER MEET command to the new node
+ clusterManagerLogInfo(">>> Send CLUSTER MEET to node %s:%d to make it "
+ "join the cluster.\n", ip, port);
+ /* CLUSTER MEET requires an IP address, so we do a DNS lookup here. */
+ char first_ip[NET_IP_STR_LEN];
+ if (anetResolve(NULL, first->ip, first_ip, sizeof(first_ip), ANET_NONE) == ANET_ERR) {
+ fprintf(stderr, "Invalid IP address or hostname specified: %s\n", first->ip);
+ success = 0;
+ goto cleanup;
+ }
+
+ if (first->bus_port == 0 || (first->bus_port == first->port + CLUSTER_MANAGER_PORT_INCR)) {
+ /* CLUSTER MEET bus-port parameter was added in 4.0.
+ * So if (bus_port == 0) or (bus_port == port + CLUSTER_MANAGER_PORT_INCR),
+ * we just call CLUSTER MEET with 2 arguments, using the old form. */
+ reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER MEET %s %d",
+ first_ip, first->port);
+ } else {
+ reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER MEET %s %d %d",
+ first_ip, first->port, first->bus_port);
+ }
+
+ if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL)))
+ goto cleanup;
+
+ /* Additional configuration is needed if the node is added as a slave. */
+ if (master_node) {
+ sleep(1);
+ clusterManagerWaitForClusterJoin();
+ clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n",
+ master_node->ip, master_node->port);
+ freeReplyObject(reply);
+ reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s",
+ master_node->name);
+ if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL)))
+ goto cleanup;
+ }
+ clusterManagerLogOk("[OK] New node added correctly.\n");
+cleanup:
+ if (!added && new_node) freeClusterManagerNode(new_node);
+ if (reply) freeReplyObject(reply);
+ if (function_restore_reply) freeReplyObject(function_restore_reply);
+ if (function_list_reply) freeReplyObject(function_list_reply);
+ return success;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandDeleteNode(int argc, char **argv) {
+ UNUSED(argc);
+ int success = 1;
+ int port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args;
+ char *node_id = argv[1];
+ clusterManagerLogInfo(">>> Removing node %s from cluster %s:%d\n",
+ node_id, ip, port);
+ clusterManagerNode *ref_node = clusterManagerNewNode(ip, port, 0);
+ clusterManagerNode *node = NULL;
+
+ // Load cluster information
+ if (!clusterManagerLoadInfoFromNode(ref_node)) return 0;
+
+ // Check if the node exists and is not empty
+ node = clusterManagerNodeByName(node_id);
+ if (node == NULL) {
+ clusterManagerLogErr("[ERR] No such node ID %s\n", node_id);
+ return 0;
+ }
+ if (node->slots_count != 0) {
+ clusterManagerLogErr("[ERR] Node %s:%d is not empty! Reshard data "
+ "away and try again.\n", node->ip, node->port);
+ return 0;
+ }
+
+ // Send CLUSTER FORGET to all the nodes but the node to remove
+ clusterManagerLogInfo(">>> Sending CLUSTER FORGET messages to the "
+ "cluster...\n");
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n == node) continue;
+ if (n->replicate && !strcasecmp(n->replicate, node_id)) {
+ // Reconfigure the slave to replicate with some other node
+ clusterManagerNode *master = clusterManagerNodeWithLeastReplicas();
+ assert(master != NULL);
+ clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n",
+ n->ip, n->port, master->ip, master->port);
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s",
+ master->name);
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (r) freeReplyObject(r);
+ if (!success) return 0;
+ }
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER FORGET %s",
+ node_id);
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (r) freeReplyObject(r);
+ if (!success) return 0;
+ }
+
+ /* Finally send CLUSTER RESET to the node. */
+ clusterManagerLogInfo(">>> Sending CLUSTER RESET SOFT to the "
+ "deleted node.\n");
+ redisReply *r = redisCommand(node->context, "CLUSTER RESET %s", "SOFT");
+ success = clusterManagerCheckRedisReply(node, r, NULL);
+ if (r) freeReplyObject(r);
+ return success;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandInfo(int argc, char **argv) {
+ int port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *node = clusterManagerNewNode(ip, port, 0);
+ if (!clusterManagerLoadInfoFromNode(node)) return 0;
+ clusterManagerShowClusterInfo();
+ return 1;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandCheck(int argc, char **argv) {
+ int port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *node = clusterManagerNewNode(ip, port, 0);
+ if (!clusterManagerLoadInfoFromNode(node)) return 0;
+ clusterManagerShowClusterInfo();
+ return clusterManagerCheckCluster(0);
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandFix(int argc, char **argv) {
+ config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX;
+ return clusterManagerCommandCheck(argc, argv);
+}
+
+static int clusterManagerCommandReshard(int argc, char **argv) {
+ int port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *node = clusterManagerNewNode(ip, port, 0);
+ if (!clusterManagerLoadInfoFromNode(node)) return 0;
+ clusterManagerCheckCluster(0);
+ if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) {
+ fflush(stdout);
+ fprintf(stderr,
+ "*** Please fix your cluster problems before resharding\n");
+ return 0;
+ }
+ int slots = config.cluster_manager_command.slots;
+ if (!slots) {
+ while (slots <= 0 || slots > CLUSTER_MANAGER_SLOTS) {
+ printf("How many slots do you want to move (from 1 to %d)? ",
+ CLUSTER_MANAGER_SLOTS);
+ fflush(stdout);
+ char buf[6];
+ int nread = read(fileno(stdin),buf,6);
+ if (nread <= 0) continue;
+ int last_idx = nread - 1;
+ if (buf[last_idx] != '\n') {
+ int ch;
+ while ((ch = getchar()) != '\n' && ch != EOF) {}
+ }
+ buf[last_idx] = '\0';
+ slots = atoi(buf);
+ }
+ }
+ char buf[255];
+ char *to = config.cluster_manager_command.to,
+ *from = config.cluster_manager_command.from;
+ while (to == NULL) {
+ printf("What is the receiving node ID? ");
+ fflush(stdout);
+ int nread = read(fileno(stdin),buf,255);
+ if (nread <= 0) continue;
+ int last_idx = nread - 1;
+ if (buf[last_idx] != '\n') {
+ int ch;
+ while ((ch = getchar()) != '\n' && ch != EOF) {}
+ }
+ buf[last_idx] = '\0';
+ if (strlen(buf) > 0) to = buf;
+ }
+ int raise_err = 0;
+ clusterManagerNode *target = clusterNodeForResharding(to, NULL, &raise_err);
+ if (target == NULL) return 0;
+ list *sources = listCreate();
+ list *table = NULL;
+ int all = 0, result = 1;
+ if (from == NULL) {
+ printf("Please enter all the source node IDs.\n");
+ printf(" Type 'all' to use all the nodes as source nodes for "
+ "the hash slots.\n");
+ printf(" Type 'done' once you entered all the source nodes IDs.\n");
+ while (1) {
+ printf("Source node #%lu: ", listLength(sources) + 1);
+ fflush(stdout);
+ int nread = read(fileno(stdin),buf,255);
+ if (nread <= 0) continue;
+ int last_idx = nread - 1;
+ if (buf[last_idx] != '\n') {
+ int ch;
+ while ((ch = getchar()) != '\n' && ch != EOF) {}
+ }
+ buf[last_idx] = '\0';
+ if (!strcmp(buf, "done")) break;
+ else if (!strcmp(buf, "all")) {
+ all = 1;
+ break;
+ } else {
+ clusterManagerNode *src =
+ clusterNodeForResharding(buf, target, &raise_err);
+ if (src != NULL) listAddNodeTail(sources, src);
+ else if (raise_err) {
+ result = 0;
+ goto cleanup;
+ }
+ }
+ }
+ } else {
+ char *p;
+ while((p = strchr(from, ',')) != NULL) {
+ *p = '\0';
+ if (!strcmp(from, "all")) {
+ all = 1;
+ break;
+ } else {
+ clusterManagerNode *src =
+ clusterNodeForResharding(from, target, &raise_err);
+ if (src != NULL) listAddNodeTail(sources, src);
+ else if (raise_err) {
+ result = 0;
+ goto cleanup;
+ }
+ }
+ from = p + 1;
+ }
+ /* Check if there's still another source to process. */
+ if (!all && strlen(from) > 0) {
+ if (!strcmp(from, "all")) all = 1;
+ if (!all) {
+ clusterManagerNode *src =
+ clusterNodeForResharding(from, target, &raise_err);
+ if (src != NULL) listAddNodeTail(sources, src);
+ else if (raise_err) {
+ result = 0;
+ goto cleanup;
+ }
+ }
+ }
+ }
+ listIter li;
+ listNode *ln;
+ if (all) {
+ listEmpty(sources);
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate)
+ continue;
+ if (!sdscmp(n->name, target->name)) continue;
+ listAddNodeTail(sources, n);
+ }
+ }
+ if (listLength(sources) == 0) {
+ fprintf(stderr, "*** No source nodes given, operation aborted.\n");
+ result = 0;
+ goto cleanup;
+ }
+ printf("\nReady to move %d slots.\n", slots);
+ printf(" Source nodes:\n");
+ listRewind(sources, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *src = ln->value;
+ sds info = clusterManagerNodeInfo(src, 4);
+ printf("%s\n", info);
+ sdsfree(info);
+ }
+ printf(" Destination node:\n");
+ sds info = clusterManagerNodeInfo(target, 4);
+ printf("%s\n", info);
+ sdsfree(info);
+ table = clusterManagerComputeReshardTable(sources, slots);
+ printf(" Resharding plan:\n");
+ clusterManagerShowReshardTable(table);
+ if (!(config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_YES))
+ {
+ printf("Do you want to proceed with the proposed "
+ "reshard plan (yes/no)? ");
+ fflush(stdout);
+ char buf[4];
+ int nread = read(fileno(stdin),buf,4);
+ buf[3] = '\0';
+ if (nread <= 0 || strcmp("yes", buf) != 0) {
+ result = 0;
+ goto cleanup;
+ }
+ }
+ int opts = CLUSTER_MANAGER_OPT_VERBOSE;
+ listRewind(table, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerReshardTableItem *item = ln->value;
+ char *err = NULL;
+ result = clusterManagerMoveSlot(item->source, target, item->slot,
+ opts, &err);
+ if (!result) {
+ if (err != NULL) {
+ clusterManagerLogErr("clusterManagerMoveSlot failed: %s\n", err);
+ zfree(err);
+ }
+ goto cleanup;
+ }
+ }
+cleanup:
+ listRelease(sources);
+ clusterManagerReleaseReshardTable(table);
+ return result;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandRebalance(int argc, char **argv) {
+ int port = 0;
+ char *ip = NULL;
+ clusterManagerNode **weightedNodes = NULL;
+ list *involved = NULL;
+ if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *node = clusterManagerNewNode(ip, port, 0);
+ if (!clusterManagerLoadInfoFromNode(node)) return 0;
+ int result = 1, i;
+ if (config.cluster_manager_command.weight != NULL) {
+ for (i = 0; i < config.cluster_manager_command.weight_argc; i++) {
+ char *name = config.cluster_manager_command.weight[i];
+ char *p = strchr(name, '=');
+ if (p == NULL) {
+ clusterManagerLogErr("*** invalid input %s\n", name);
+ result = 0;
+ goto cleanup;
+ }
+ *p = '\0';
+ float w = atof(++p);
+ clusterManagerNode *n = clusterManagerNodeByAbbreviatedName(name);
+ if (n == NULL) {
+ clusterManagerLogErr("*** No such master node %s\n", name);
+ result = 0;
+ goto cleanup;
+ }
+ n->weight = w;
+ }
+ }
+ float total_weight = 0;
+ int nodes_involved = 0;
+ int use_empty = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER;
+ involved = listCreate();
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ /* Compute the total cluster weight. */
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate)
+ continue;
+ if (!use_empty && n->slots_count == 0) {
+ n->weight = 0;
+ continue;
+ }
+ total_weight += n->weight;
+ nodes_involved++;
+ listAddNodeTail(involved, n);
+ }
+ weightedNodes = zmalloc(nodes_involved * sizeof(clusterManagerNode *));
+ if (weightedNodes == NULL) goto cleanup;
+ /* Check cluster, only proceed if it looks sane. */
+ clusterManagerCheckCluster(1);
+ if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) {
+ clusterManagerLogErr("*** Please fix your cluster problems "
+ "before rebalancing\n");
+ result = 0;
+ goto cleanup;
+ }
+ /* Calculate the slots balance for each node. It's the number of
+ * slots the node should lose (if positive) or gain (if negative)
+ * in order to be balanced. */
+ int threshold_reached = 0, total_balance = 0;
+ float threshold = config.cluster_manager_command.threshold;
+ i = 0;
+ listRewind(involved, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ weightedNodes[i++] = n;
+ int expected = (int) (((float)CLUSTER_MANAGER_SLOTS / total_weight) *
+ n->weight);
+ n->balance = n->slots_count - expected;
+ total_balance += n->balance;
+ /* Compute the percentage of difference between the
+ * expected number of slots and the real one, to see
+ * if it's over the threshold specified by the user. */
+ int over_threshold = 0;
+ if (threshold > 0) {
+ if (n->slots_count > 0) {
+ float err_perc = fabs((100-(100.0*expected/n->slots_count)));
+ if (err_perc > threshold) over_threshold = 1;
+ } else if (expected > 1) {
+ over_threshold = 1;
+ }
+ }
+ if (over_threshold) threshold_reached = 1;
+ }
+ if (!threshold_reached) {
+ clusterManagerLogWarn("*** No rebalancing needed! "
+ "All nodes are within the %.2f%% threshold.\n",
+ config.cluster_manager_command.threshold);
+ goto cleanup;
+ }
+ /* Because of rounding, it is possible that the balance of all nodes
+ * summed does not give 0. Make sure that nodes that have to provide
+ * slots are always matched by nodes receiving slots. */
+ while (total_balance > 0) {
+ listRewind(involved, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->balance <= 0 && total_balance > 0) {
+ n->balance--;
+ total_balance--;
+ }
+ }
+ }
+ /* Sort nodes by their slots balance. */
+ qsort(weightedNodes, nodes_involved, sizeof(clusterManagerNode *),
+ clusterManagerCompareNodeBalance);
+ clusterManagerLogInfo(">>> Rebalancing across %d nodes. "
+ "Total weight = %.2f\n",
+ nodes_involved, total_weight);
+ if (config.verbose) {
+ for (i = 0; i < nodes_involved; i++) {
+ clusterManagerNode *n = weightedNodes[i];
+ printf("%s:%d balance is %d slots\n", n->ip, n->port, n->balance);
+ }
+ }
+ /* Now we have at the start of the 'sn' array nodes that should get
+ * slots, at the end nodes that must give slots.
+ * We take two indexes, one at the start, and one at the end,
+ * incrementing or decrementing the indexes accordingly til we
+ * find nodes that need to get/provide slots. */
+ int dst_idx = 0;
+ int src_idx = nodes_involved - 1;
+ int simulate = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_SIMULATE;
+ while (dst_idx < src_idx) {
+ clusterManagerNode *dst = weightedNodes[dst_idx];
+ clusterManagerNode *src = weightedNodes[src_idx];
+ int db = abs(dst->balance);
+ int sb = abs(src->balance);
+ int numslots = (db < sb ? db : sb);
+ if (numslots > 0) {
+ printf("Moving %d slots from %s:%d to %s:%d\n", numslots,
+ src->ip,
+ src->port,
+ dst->ip,
+ dst->port);
+ /* Actually move the slots. */
+ list *lsrc = listCreate(), *table = NULL;
+ listAddNodeTail(lsrc, src);
+ table = clusterManagerComputeReshardTable(lsrc, numslots);
+ listRelease(lsrc);
+ int table_len = (int) listLength(table);
+ if (!table || table_len != numslots) {
+ clusterManagerLogErr("*** Assertion failed: Reshard table "
+ "!= number of slots");
+ result = 0;
+ goto end_move;
+ }
+ if (simulate) {
+ for (i = 0; i < table_len; i++) printf("#");
+ } else {
+ int opts = CLUSTER_MANAGER_OPT_QUIET |
+ CLUSTER_MANAGER_OPT_UPDATE;
+ listRewind(table, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerReshardTableItem *item = ln->value;
+ char *err;
+ result = clusterManagerMoveSlot(item->source,
+ dst,
+ item->slot,
+ opts, &err);
+ if (!result) {
+ clusterManagerLogErr("*** clusterManagerMoveSlot: %s\n", err);
+ zfree(err);
+ goto end_move;
+ }
+ printf("#");
+ fflush(stdout);
+ }
+
+ }
+ printf("\n");
+end_move:
+ clusterManagerReleaseReshardTable(table);
+ if (!result) goto cleanup;
+ }
+ /* Update nodes balance. */
+ dst->balance += numslots;
+ src->balance -= numslots;
+ if (dst->balance == 0) dst_idx++;
+ if (src->balance == 0) src_idx --;
+ }
+cleanup:
+ if (involved != NULL) listRelease(involved);
+ if (weightedNodes != NULL) zfree(weightedNodes);
+ return result;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandSetTimeout(int argc, char **argv) {
+ UNUSED(argc);
+ int port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args;
+ int timeout = atoi(argv[1]);
+ if (timeout < 100) {
+ fprintf(stderr, "Setting a node timeout of less than 100 "
+ "milliseconds is a bad idea.\n");
+ return 0;
+ }
+ // Load cluster information
+ clusterManagerNode *node = clusterManagerNewNode(ip, port, 0);
+ if (!clusterManagerLoadInfoFromNode(node)) return 0;
+ int ok_count = 0, err_count = 0;
+
+ clusterManagerLogInfo(">>> Reconfiguring node timeout in every "
+ "cluster node...\n");
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ char *err = NULL;
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s %s %d",
+ "SET",
+ "cluster-node-timeout",
+ timeout);
+ if (reply == NULL) goto reply_err;
+ int ok = clusterManagerCheckRedisReply(n, reply, &err);
+ freeReplyObject(reply);
+ if (!ok) goto reply_err;
+ reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s", "REWRITE");
+ if (reply == NULL) goto reply_err;
+ ok = clusterManagerCheckRedisReply(n, reply, &err);
+ freeReplyObject(reply);
+ if (!ok) goto reply_err;
+ clusterManagerLogWarn("*** New timeout set for %s:%d\n", n->ip,
+ n->port);
+ ok_count++;
+ continue;
+reply_err:;
+ int need_free = 0;
+ if (err == NULL) err = "";
+ else need_free = 1;
+ clusterManagerLogErr("ERR setting node-timeout for %s:%d: %s\n", n->ip,
+ n->port, err);
+ if (need_free) zfree(err);
+ err_count++;
+ }
+ clusterManagerLogInfo(">>> New node timeout set. %d OK, %d ERR.\n",
+ ok_count, err_count);
+ return 1;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandImport(int argc, char **argv) {
+ int success = 1;
+ int port = 0, src_port = 0;
+ char *ip = NULL, *src_ip = NULL;
+ char *invalid_args_msg = NULL;
+ sds cmdfmt = NULL;
+ if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) {
+ invalid_args_msg = CLUSTER_MANAGER_INVALID_HOST_ARG;
+ goto invalid_args;
+ }
+ if (config.cluster_manager_command.from == NULL) {
+ invalid_args_msg = "[ERR] Option '--cluster-from' is required for "
+ "subcommand 'import'.\n";
+ goto invalid_args;
+ }
+ char *src_host[] = {config.cluster_manager_command.from};
+ if (!getClusterHostFromCmdArgs(1, src_host, &src_ip, &src_port)) {
+ invalid_args_msg = "[ERR] Invalid --cluster-from host. You need to "
+ "pass a valid address (ie. 120.0.0.1:7000).\n";
+ goto invalid_args;
+ }
+ clusterManagerLogInfo(">>> Importing data from %s:%d to cluster %s:%d\n",
+ src_ip, src_port, ip, port);
+
+ clusterManagerNode *refnode = clusterManagerNewNode(ip, port, 0);
+ if (!clusterManagerLoadInfoFromNode(refnode)) return 0;
+ if (!clusterManagerCheckCluster(0)) return 0;
+ char *reply_err = NULL;
+ redisReply *src_reply = NULL;
+ // Connect to the source node.
+ redisContext *src_ctx = redisConnect(src_ip, src_port);
+ if (src_ctx->err) {
+ success = 0;
+ fprintf(stderr,"Could not connect to Redis at %s:%d: %s.\n", src_ip,
+ src_port, src_ctx->errstr);
+ goto cleanup;
+ }
+ // Auth for the source node.
+ char *from_user = config.cluster_manager_command.from_user;
+ char *from_pass = config.cluster_manager_command.from_pass;
+ if (cliAuth(src_ctx, from_user, from_pass) == REDIS_ERR) {
+ success = 0;
+ goto cleanup;
+ }
+
+ src_reply = reconnectingRedisCommand(src_ctx, "INFO");
+ if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) {
+ if (src_reply && src_reply->str) reply_err = src_reply->str;
+ success = 0;
+ goto cleanup;
+ }
+ if (getLongInfoField(src_reply->str, "cluster_enabled")) {
+ clusterManagerLogErr("[ERR] The source node should not be a "
+ "cluster node.\n");
+ success = 0;
+ goto cleanup;
+ }
+ freeReplyObject(src_reply);
+ src_reply = reconnectingRedisCommand(src_ctx, "DBSIZE");
+ if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) {
+ if (src_reply && src_reply->str) reply_err = src_reply->str;
+ success = 0;
+ goto cleanup;
+ }
+ int size = src_reply->integer, i;
+ clusterManagerLogWarn("*** Importing %d keys from DB 0\n", size);
+
+ // Build a slot -> node map
+ clusterManagerNode *slots_map[CLUSTER_MANAGER_SLOTS];
+ memset(slots_map, 0, sizeof(slots_map));
+ listIter li;
+ listNode *ln;
+ for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) {
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ if (n->slots_count == 0) continue;
+ if (n->slots[i]) {
+ slots_map[i] = n;
+ break;
+ }
+ }
+ }
+ cmdfmt = sdsnew("MIGRATE %s %d %s %d %d");
+ if (config.conn_info.auth) {
+ if (config.conn_info.user) {
+ cmdfmt = sdscatfmt(cmdfmt," AUTH2 %s %s", config.conn_info.user, config.conn_info.auth);
+ } else {
+ cmdfmt = sdscatfmt(cmdfmt," AUTH %s", config.conn_info.auth);
+ }
+ }
+
+ if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COPY)
+ cmdfmt = sdscat(cmdfmt," COPY");
+ if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLACE)
+ cmdfmt = sdscat(cmdfmt," REPLACE");
+
+ /* Use SCAN to iterate over the keys, migrating to the
+ * right node as needed. */
+ int cursor = -999, timeout = config.cluster_manager_command.timeout;
+ while (cursor != 0) {
+ if (cursor < 0) cursor = 0;
+ freeReplyObject(src_reply);
+ src_reply = reconnectingRedisCommand(src_ctx, "SCAN %d COUNT %d",
+ cursor, 1000);
+ if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) {
+ if (src_reply && src_reply->str) reply_err = src_reply->str;
+ success = 0;
+ goto cleanup;
+ }
+ assert(src_reply->type == REDIS_REPLY_ARRAY);
+ assert(src_reply->elements >= 2);
+ assert(src_reply->element[1]->type == REDIS_REPLY_ARRAY);
+ if (src_reply->element[0]->type == REDIS_REPLY_STRING)
+ cursor = atoi(src_reply->element[0]->str);
+ else if (src_reply->element[0]->type == REDIS_REPLY_INTEGER)
+ cursor = src_reply->element[0]->integer;
+ int keycount = src_reply->element[1]->elements;
+ for (i = 0; i < keycount; i++) {
+ redisReply *kr = src_reply->element[1]->element[i];
+ assert(kr->type == REDIS_REPLY_STRING);
+ char *key = kr->str;
+ uint16_t slot = clusterManagerKeyHashSlot(key, kr->len);
+ clusterManagerNode *target = slots_map[slot];
+ printf("Migrating %s to %s:%d: ", key, target->ip, target->port);
+ redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt,
+ target->ip, target->port,
+ key, 0, timeout);
+ if (!r || r->type == REDIS_REPLY_ERROR) {
+ if (r && r->str) {
+ clusterManagerLogErr("Source %s:%d replied with "
+ "error:\n%s\n", src_ip, src_port,
+ r->str);
+ }
+ success = 0;
+ }
+ freeReplyObject(r);
+ if (!success) goto cleanup;
+ clusterManagerLogOk("OK\n");
+ }
+ }
+cleanup:
+ if (reply_err)
+ clusterManagerLogErr("Source %s:%d replied with error:\n%s\n",
+ src_ip, src_port, reply_err);
+ if (src_ctx) redisFree(src_ctx);
+ if (src_reply) freeReplyObject(src_reply);
+ if (cmdfmt) sdsfree(cmdfmt);
+ return success;
+invalid_args:
+ fprintf(stderr, "%s", invalid_args_msg);
+ return 0;
+}
+
+static int clusterManagerCommandCall(int argc, char **argv) {
+ int port = 0, i;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *refnode = clusterManagerNewNode(ip, port, 0);
+ if (!clusterManagerLoadInfoFromNode(refnode)) return 0;
+ argc--;
+ argv++;
+ size_t *argvlen = zmalloc(argc*sizeof(size_t));
+ clusterManagerLogInfo(">>> Calling");
+ for (i = 0; i < argc; i++) {
+ argvlen[i] = strlen(argv[i]);
+ printf(" %s", argv[i]);
+ }
+ printf("\n");
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if ((config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_MASTERS_ONLY)
+ && (n->replicate != NULL)) continue; // continue if node is slave
+ if ((config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVES_ONLY)
+ && (n->replicate == NULL)) continue; // continue if node is master
+ if (!n->context && !clusterManagerNodeConnect(n)) continue;
+ redisReply *reply = NULL;
+ redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen);
+ int status = redisGetReply(n->context, (void **)(&reply));
+ if (status != REDIS_OK || reply == NULL )
+ printf("%s:%d: Failed!\n", n->ip, n->port);
+ else {
+ sds formatted_reply = cliFormatReplyRaw(reply);
+ printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply);
+ sdsfree(formatted_reply);
+ }
+ if (reply != NULL) freeReplyObject(reply);
+ }
+ zfree(argvlen);
+ return 1;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandBackup(int argc, char **argv) {
+ UNUSED(argc);
+ int success = 1, port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *refnode = clusterManagerNewNode(ip, port, 0);
+ if (!clusterManagerLoadInfoFromNode(refnode)) return 0;
+ int no_issues = clusterManagerCheckCluster(0);
+ int cluster_errors_count = (no_issues ? 0 :
+ listLength(cluster_manager.errors));
+ config.cluster_manager_command.backup_dir = argv[1];
+ /* TODO: check if backup_dir is a valid directory. */
+ sds json = sdsnew("[\n");
+ int first_node = 0;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ if (!first_node) first_node = 1;
+ else json = sdscat(json, ",\n");
+ clusterManagerNode *node = ln->value;
+ sds node_json = clusterManagerNodeGetJSON(node, cluster_errors_count);
+ json = sdscat(json, node_json);
+ sdsfree(node_json);
+ if (node->replicate)
+ continue;
+ clusterManagerLogInfo(">>> Node %s:%d -> Saving RDB...\n",
+ node->ip, node->port);
+ fflush(stdout);
+ getRDB(node);
+ }
+ json = sdscat(json, "\n]");
+ sds jsonpath = sdsnew(config.cluster_manager_command.backup_dir);
+ if (jsonpath[sdslen(jsonpath) - 1] != '/')
+ jsonpath = sdscat(jsonpath, "/");
+ jsonpath = sdscat(jsonpath, "nodes.json");
+ fflush(stdout);
+ clusterManagerLogInfo("Saving cluster configuration to: %s\n", jsonpath);
+ FILE *out = fopen(jsonpath, "w+");
+ if (!out) {
+ clusterManagerLogErr("Could not save nodes to: %s\n", jsonpath);
+ success = 0;
+ goto cleanup;
+ }
+ fputs(json, out);
+ fclose(out);
+cleanup:
+ sdsfree(json);
+ sdsfree(jsonpath);
+ if (success) {
+ if (!no_issues) {
+ clusterManagerLogWarn("*** Cluster seems to have some problems, "
+ "please be aware of it if you're going "
+ "to restore this backup.\n");
+ }
+ clusterManagerLogOk("[OK] Backup created into: %s\n",
+ config.cluster_manager_command.backup_dir);
+ } else clusterManagerLogOk("[ERR] Failed to back cluster!\n");
+ return success;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandHelp(int argc, char **argv) {
+ UNUSED(argc);
+ UNUSED(argv);
+ int commands_count = sizeof(clusterManagerCommands) /
+ sizeof(clusterManagerCommandDef);
+ int i = 0, j;
+ fprintf(stdout, "Cluster Manager Commands:\n");
+ int padding = 15;
+ for (; i < commands_count; i++) {
+ clusterManagerCommandDef *def = &(clusterManagerCommands[i]);
+ int namelen = strlen(def->name), padlen = padding - namelen;
+ fprintf(stdout, " %s", def->name);
+ for (j = 0; j < padlen; j++) fprintf(stdout, " ");
+ fprintf(stdout, "%s\n", (def->args ? def->args : ""));
+ if (def->options != NULL) {
+ int optslen = strlen(def->options);
+ char *p = def->options, *eos = p + optslen;
+ char *comma = NULL;
+ while ((comma = strchr(p, ',')) != NULL) {
+ int deflen = (int)(comma - p);
+ char buf[255];
+ memcpy(buf, p, deflen);
+ buf[deflen] = '\0';
+ for (j = 0; j < padding; j++) fprintf(stdout, " ");
+ fprintf(stdout, " --cluster-%s\n", buf);
+ p = comma + 1;
+ if (p >= eos) break;
+ }
+ if (p < eos) {
+ for (j = 0; j < padding; j++) fprintf(stdout, " ");
+ fprintf(stdout, " --cluster-%s\n", p);
+ }
+ }
+ }
+ fprintf(stdout, "\nFor check, fix, reshard, del-node, set-timeout, "
+ "info, rebalance, call, import, backup you "
+ "can specify the host and port of any working node in "
+ "the cluster.\n");
+
+ int options_count = sizeof(clusterManagerOptions) /
+ sizeof(clusterManagerOptionDef);
+ i = 0;
+ fprintf(stdout, "\nCluster Manager Options:\n");
+ for (; i < options_count; i++) {
+ clusterManagerOptionDef *def = &(clusterManagerOptions[i]);
+ int namelen = strlen(def->name), padlen = padding - namelen;
+ fprintf(stdout, " %s", def->name);
+ for (j = 0; j < padlen; j++) fprintf(stdout, " ");
+ fprintf(stdout, "%s\n", def->desc);
+ }
+
+ fprintf(stdout, "\n");
+ return 0;
+}
+
+/*------------------------------------------------------------------------------
+ * Latency and latency history modes
+ *--------------------------------------------------------------------------- */
+
+static void latencyModePrint(long long min, long long max, double avg, long long count) {
+ if (config.output == OUTPUT_STANDARD) {
+ printf("min: %lld, max: %lld, avg: %.2f (%lld samples)",
+ min, max, avg, count);
+ fflush(stdout);
+ } else if (config.output == OUTPUT_CSV) {
+ printf("%lld,%lld,%.2f,%lld\n", min, max, avg, count);
+ } else if (config.output == OUTPUT_RAW) {
+ printf("%lld %lld %.2f %lld\n", min, max, avg, count);
+ } else if (config.output == OUTPUT_JSON) {
+ printf("{\"min\": %lld, \"max\": %lld, \"avg\": %.2f, \"count\": %lld}\n", min, max, avg, count);
+ }
+}
+
+#define LATENCY_SAMPLE_RATE 10 /* milliseconds. */
+#define LATENCY_HISTORY_DEFAULT_INTERVAL 15000 /* milliseconds. */
+static void latencyMode(void) {
+ redisReply *reply;
+ long long start, latency, min = 0, max = 0, tot = 0, count = 0;
+ long long history_interval =
+ config.interval ? config.interval/1000 :
+ LATENCY_HISTORY_DEFAULT_INTERVAL;
+ double avg;
+ long long history_start = mstime();
+
+ /* Set a default for the interval in case of --latency option
+ * with --raw, --csv or when it is redirected to non tty. */
+ if (config.interval == 0) {
+ config.interval = 1000;
+ } else {
+ config.interval /= 1000; /* We need to convert to milliseconds. */
+ }
+
+ if (!context) exit(1);
+ while(1) {
+ start = mstime();
+ reply = reconnectingRedisCommand(context,"PING");
+ if (reply == NULL) {
+ fprintf(stderr,"\nI/O error\n");
+ exit(1);
+ }
+ latency = mstime()-start;
+ freeReplyObject(reply);
+ count++;
+ if (count == 1) {
+ min = max = tot = latency;
+ avg = (double) latency;
+ } else {
+ if (latency < min) min = latency;
+ if (latency > max) max = latency;
+ tot += latency;
+ avg = (double) tot/count;
+ }
+
+ if (config.output == OUTPUT_STANDARD) {
+ printf("\x1b[0G\x1b[2K"); /* Clear the line. */
+ latencyModePrint(min,max,avg,count);
+ } else {
+ if (config.latency_history) {
+ latencyModePrint(min,max,avg,count);
+ } else if (mstime()-history_start > config.interval) {
+ latencyModePrint(min,max,avg,count);
+ exit(0);
+ }
+ }
+
+ if (config.latency_history && mstime()-history_start > history_interval)
+ {
+ printf(" -- %.2f seconds range\n", (float)(mstime()-history_start)/1000);
+ history_start = mstime();
+ min = max = tot = count = 0;
+ }
+ usleep(LATENCY_SAMPLE_RATE * 1000);
+ }
+}
+
+/*------------------------------------------------------------------------------
+ * Latency distribution mode -- requires 256 colors xterm
+ *--------------------------------------------------------------------------- */
+
+#define LATENCY_DIST_DEFAULT_INTERVAL 1000 /* milliseconds. */
+
+/* Structure to store samples distribution. */
+struct distsamples {
+ long long max; /* Max latency to fit into this interval (usec). */
+ long long count; /* Number of samples in this interval. */
+ int character; /* Associated character in visualization. */
+};
+
+/* Helper function for latencyDistMode(). Performs the spectrum visualization
+ * of the collected samples targeting an xterm 256 terminal.
+ *
+ * Takes an array of distsamples structures, ordered from smaller to bigger
+ * 'max' value. Last sample max must be 0, to mean that it holds all the
+ * samples greater than the previous one, and is also the stop sentinel.
+ *
+ * "tot' is the total number of samples in the different buckets, so it
+ * is the SUM(samples[i].count) for i to 0 up to the max sample.
+ *
+ * As a side effect the function sets all the buckets count to 0. */
+void showLatencyDistSamples(struct distsamples *samples, long long tot) {
+ int j;
+
+ /* We convert samples into an index inside the palette
+ * proportional to the percentage a given bucket represents.
+ * This way intensity of the different parts of the spectrum
+ * don't change relative to the number of requests, which avoids to
+ * pollute the visualization with non-latency related info. */
+ printf("\033[38;5;0m"); /* Set foreground color to black. */
+ for (j = 0; ; j++) {
+ int coloridx =
+ ceil((double) samples[j].count / tot * (spectrum_palette_size-1));
+ int color = spectrum_palette[coloridx];
+ printf("\033[48;5;%dm%c", (int)color, samples[j].character);
+ samples[j].count = 0;
+ if (samples[j].max == 0) break; /* Last sample. */
+ }
+ printf("\033[0m\n");
+ fflush(stdout);
+}
+
+/* Show the legend: different buckets values and colors meaning, so
+ * that the spectrum is more easily readable. */
+void showLatencyDistLegend(void) {
+ int j;
+
+ printf("---------------------------------------------\n");
+ printf(". - * # .01 .125 .25 .5 milliseconds\n");
+ printf("1,2,3,...,9 from 1 to 9 milliseconds\n");
+ printf("A,B,C,D,E 10,20,30,40,50 milliseconds\n");
+ printf("F,G,H,I,J .1,.2,.3,.4,.5 seconds\n");
+ printf("K,L,M,N,O,P,Q,? 1,2,4,8,16,30,60,>60 seconds\n");
+ printf("From 0 to 100%%: ");
+ for (j = 0; j < spectrum_palette_size; j++) {
+ printf("\033[48;5;%dm ", spectrum_palette[j]);
+ }
+ printf("\033[0m\n");
+ printf("---------------------------------------------\n");
+}
+
+static void latencyDistMode(void) {
+ redisReply *reply;
+ long long start, latency, count = 0;
+ long long history_interval =
+ config.interval ? config.interval/1000 :
+ LATENCY_DIST_DEFAULT_INTERVAL;
+ long long history_start = ustime();
+ int j, outputs = 0;
+
+ struct distsamples samples[] = {
+ /* We use a mostly logarithmic scale, with certain linear intervals
+ * which are more interesting than others, like 1-10 milliseconds
+ * range. */
+ {10,0,'.'}, /* 0.01 ms */
+ {125,0,'-'}, /* 0.125 ms */
+ {250,0,'*'}, /* 0.25 ms */
+ {500,0,'#'}, /* 0.5 ms */
+ {1000,0,'1'}, /* 1 ms */
+ {2000,0,'2'}, /* 2 ms */
+ {3000,0,'3'}, /* 3 ms */
+ {4000,0,'4'}, /* 4 ms */
+ {5000,0,'5'}, /* 5 ms */
+ {6000,0,'6'}, /* 6 ms */
+ {7000,0,'7'}, /* 7 ms */
+ {8000,0,'8'}, /* 8 ms */
+ {9000,0,'9'}, /* 9 ms */
+ {10000,0,'A'}, /* 10 ms */
+ {20000,0,'B'}, /* 20 ms */
+ {30000,0,'C'}, /* 30 ms */
+ {40000,0,'D'}, /* 40 ms */
+ {50000,0,'E'}, /* 50 ms */
+ {100000,0,'F'}, /* 0.1 s */
+ {200000,0,'G'}, /* 0.2 s */
+ {300000,0,'H'}, /* 0.3 s */
+ {400000,0,'I'}, /* 0.4 s */
+ {500000,0,'J'}, /* 0.5 s */
+ {1000000,0,'K'}, /* 1 s */
+ {2000000,0,'L'}, /* 2 s */
+ {4000000,0,'M'}, /* 4 s */
+ {8000000,0,'N'}, /* 8 s */
+ {16000000,0,'O'}, /* 16 s */
+ {30000000,0,'P'}, /* 30 s */
+ {60000000,0,'Q'}, /* 1 minute */
+ {0,0,'?'}, /* > 1 minute */
+ };
+
+ if (!context) exit(1);
+ while(1) {
+ start = ustime();
+ reply = reconnectingRedisCommand(context,"PING");
+ if (reply == NULL) {
+ fprintf(stderr,"\nI/O error\n");
+ exit(1);
+ }
+ latency = ustime()-start;
+ freeReplyObject(reply);
+ count++;
+
+ /* Populate the relevant bucket. */
+ for (j = 0; ; j++) {
+ if (samples[j].max == 0 || latency <= samples[j].max) {
+ samples[j].count++;
+ break;
+ }
+ }
+
+ /* From time to time show the spectrum. */
+ if (count && (ustime()-history_start)/1000 > history_interval) {
+ if ((outputs++ % 20) == 0)
+ showLatencyDistLegend();
+ showLatencyDistSamples(samples,count);
+ history_start = ustime();
+ count = 0;
+ }
+ usleep(LATENCY_SAMPLE_RATE * 1000);
+ }
+}
+
+/*------------------------------------------------------------------------------
+ * Slave mode
+ *--------------------------------------------------------------------------- */
+
+#define RDB_EOF_MARK_SIZE 40
+
+int sendReplconf(const char* arg1, const char* arg2) {
+ int res = 1;
+ fprintf(stderr, "sending REPLCONF %s %s\n", arg1, arg2);
+ redisReply *reply = redisCommand(context, "REPLCONF %s %s", arg1, arg2);
+
+ /* Handle any error conditions */
+ if(reply == NULL) {
+ fprintf(stderr, "\nI/O error\n");
+ exit(1);
+ } else if(reply->type == REDIS_REPLY_ERROR) {
+ /* non fatal, old versions may not support it */
+ fprintf(stderr, "REPLCONF %s error: %s\n", arg1, reply->str);
+ res = 0;
+ }
+ freeReplyObject(reply);
+ return res;
+}
+
+void sendCapa(void) {
+ sendReplconf("capa", "eof");
+}
+
+void sendRdbOnly(void) {
+ sendReplconf("rdb-only", "1");
+}
+
+/* Read raw bytes through a redisContext. The read operation is not greedy
+ * and may not fill the buffer entirely.
+ */
+static ssize_t readConn(redisContext *c, char *buf, size_t len)
+{
+ return c->funcs->read(c, buf, len);
+}
+
+/* Sends SYNC and reads the number of bytes in the payload. Used both by
+ * slaveMode() and getRDB().
+ *
+ * send_sync if 1 means we will explicitly send SYNC command. If 0 means
+ * we will not send SYNC command, will send the command that in c->obuf.
+ *
+ * Returns the size of the RDB payload to read, or 0 in case an EOF marker is used and the size
+ * is unknown, also returns 0 in case a PSYNC +CONTINUE was found (no RDB payload).
+ *
+ * The out_full_mode parameter if 1 means this is a full sync, if 0 means this is partial mode. */
+unsigned long long sendSync(redisContext *c, int send_sync, char *out_eof, int *out_full_mode) {
+ /* To start we need to send the SYNC command and return the payload.
+ * The hiredis client lib does not understand this part of the protocol
+ * and we don't want to mess with its buffers, so everything is performed
+ * using direct low-level I/O. */
+ char buf[4096], *p;
+ ssize_t nread;
+
+ if (out_full_mode) *out_full_mode = 1;
+
+ if (send_sync) {
+ /* Send the SYNC command. */
+ if (cliWriteConn(c, "SYNC\r\n", 6) != 6) {
+ fprintf(stderr,"Error writing to master\n");
+ exit(1);
+ }
+ } else {
+ /* We have written the command into c->obuf before. */
+ if (cliWriteConn(c, "", 0) != 0) {
+ fprintf(stderr,"Error writing to master\n");
+ exit(1);
+ }
+ }
+
+ /* Read $<payload>\r\n, making sure to read just up to "\n" */
+ p = buf;
+ while(1) {
+ nread = readConn(c,p,1);
+ if (nread <= 0) {
+ fprintf(stderr,"Error reading bulk length while SYNCing\n");
+ exit(1);
+ }
+ if (*p == '\n' && p != buf) break;
+ if (*p != '\n') p++;
+ if (p >= buf + sizeof(buf) - 1) break; /* Go back one more char for null-term. */
+ }
+ *p = '\0';
+ if (buf[0] == '-') {
+ fprintf(stderr, "SYNC with master failed: %s\n", buf);
+ exit(1);
+ }
+
+ /* Handling PSYNC responses.
+ * Read +FULLRESYNC <replid> <offset>\r\n, after that is the $<payload> or the $EOF:<40 bytes delimiter>
+ * Read +CONTINUE <replid>\r\n or +CONTINUE\r\n, after that is the command stream */
+ if (!strncmp(buf, "+FULLRESYNC", 11) ||
+ !strncmp(buf, "+CONTINUE", 9))
+ {
+ int sync_partial = !strncmp(buf, "+CONTINUE", 9);
+ fprintf(stderr, "PSYNC replied %s\n", buf);
+ p = buf;
+ while(1) {
+ nread = readConn(c,p,1);
+ if (nread <= 0) {
+ fprintf(stderr,"Error reading bulk length while PSYNCing\n");
+ exit(1);
+ }
+ if (*p == '\n' && p != buf) break;
+ if (*p != '\n') p++;
+ if (p >= buf + sizeof(buf) - 1) break; /* Go back one more char for null-term. */
+ }
+ *p = '\0';
+
+ if (sync_partial) {
+ if (out_full_mode) *out_full_mode = 0;
+ return 0;
+ }
+ }
+
+ if (strncmp(buf+1,"EOF:",4) == 0 && strlen(buf+5) >= RDB_EOF_MARK_SIZE) {
+ memcpy(out_eof, buf+5, RDB_EOF_MARK_SIZE);
+ return 0;
+ }
+ return strtoull(buf+1,NULL,10);
+}
+
+static void slaveMode(int send_sync) {
+ static char eofmark[RDB_EOF_MARK_SIZE];
+ static char lastbytes[RDB_EOF_MARK_SIZE];
+ static int usemark = 0;
+ static int out_full_mode;
+ unsigned long long payload = sendSync(context, send_sync, eofmark, &out_full_mode);
+ char buf[1024];
+ int original_output = config.output;
+ char *info = out_full_mode ? "Full resync" : "Partial resync";
+
+ if (out_full_mode == 1 && payload == 0) {
+ /* SYNC with EOF marker or PSYNC +FULLRESYNC with EOF marker. */
+ payload = ULLONG_MAX;
+ memset(lastbytes,0,RDB_EOF_MARK_SIZE);
+ usemark = 1;
+ fprintf(stderr, "%s with master, discarding "
+ "bytes of bulk transfer until EOF marker...\n", info);
+ } else if (out_full_mode == 1 && payload != 0) {
+ /* SYNC without EOF marker or PSYNC +FULLRESYNC. */
+ fprintf(stderr, "%s with master, discarding %llu "
+ "bytes of bulk transfer...\n", info, payload);
+ } else if (out_full_mode == 0 && payload == 0) {
+ /* PSYNC +CONTINUE (no RDB payload). */
+ fprintf(stderr, "%s with master...\n", info);
+ }
+
+ /* Discard the payload. */
+ while(payload) {
+ ssize_t nread;
+
+ nread = readConn(context,buf,(payload > sizeof(buf)) ? sizeof(buf) : payload);
+ if (nread <= 0) {
+ fprintf(stderr,"Error reading RDB payload while %sing\n", info);
+ exit(1);
+ }
+ payload -= nread;
+
+ if (usemark) {
+ /* Update the last bytes array, and check if it matches our delimiter.*/
+ if (nread >= RDB_EOF_MARK_SIZE) {
+ memcpy(lastbytes,buf+nread-RDB_EOF_MARK_SIZE,RDB_EOF_MARK_SIZE);
+ } else {
+ int rem = RDB_EOF_MARK_SIZE-nread;
+ memmove(lastbytes,lastbytes+nread,rem);
+ memcpy(lastbytes+rem,buf,nread);
+ }
+ if (memcmp(lastbytes,eofmark,RDB_EOF_MARK_SIZE) == 0)
+ break;
+ }
+ }
+
+ if (usemark) {
+ unsigned long long offset = ULLONG_MAX - payload;
+ fprintf(stderr,"%s done after %llu bytes. Logging commands from master.\n", info, offset);
+ /* put the slave online */
+ sleep(1);
+ sendReplconf("ACK", "0");
+ } else
+ fprintf(stderr,"%s done. Logging commands from master.\n", info);
+
+ /* Now we can use hiredis to read the incoming protocol. */
+ config.output = OUTPUT_CSV;
+ while (cliReadReply(0) == REDIS_OK);
+ config.output = original_output;
+}
+
+/*------------------------------------------------------------------------------
+ * RDB transfer mode
+ *--------------------------------------------------------------------------- */
+
+/* This function implements --rdb, so it uses the replication protocol in order
+ * to fetch the RDB file from a remote server. */
+static void getRDB(clusterManagerNode *node) {
+ int fd;
+ redisContext *s;
+ char *filename;
+ if (node != NULL) {
+ assert(node->context);
+ s = node->context;
+ filename = clusterManagerGetNodeRDBFilename(node);
+ } else {
+ s = context;
+ filename = config.rdb_filename;
+ }
+ static char eofmark[RDB_EOF_MARK_SIZE];
+ static char lastbytes[RDB_EOF_MARK_SIZE];
+ static int usemark = 0;
+ unsigned long long payload = sendSync(s, 1, eofmark, NULL);
+ char buf[4096];
+
+ if (payload == 0) {
+ payload = ULLONG_MAX;
+ memset(lastbytes,0,RDB_EOF_MARK_SIZE);
+ usemark = 1;
+ fprintf(stderr,"SYNC sent to master, writing bytes of bulk transfer "
+ "until EOF marker to '%s'\n", filename);
+ } else {
+ fprintf(stderr,"SYNC sent to master, writing %llu bytes to '%s'\n",
+ payload, filename);
+ }
+
+ int write_to_stdout = !strcmp(filename,"-");
+ /* Write to file. */
+ if (write_to_stdout) {
+ fd = STDOUT_FILENO;
+ } else {
+ fd = open(filename, O_CREAT|O_WRONLY, 0644);
+ if (fd == -1) {
+ fprintf(stderr, "Error opening '%s': %s\n", filename,
+ strerror(errno));
+ exit(1);
+ }
+ }
+
+ while(payload) {
+ ssize_t nread, nwritten;
+
+ nread = readConn(s,buf,(payload > sizeof(buf)) ? sizeof(buf) : payload);
+ if (nread <= 0) {
+ fprintf(stderr,"I/O Error reading RDB payload from socket\n");
+ exit(1);
+ }
+ nwritten = write(fd, buf, nread);
+ if (nwritten != nread) {
+ fprintf(stderr,"Error writing data to file: %s\n",
+ (nwritten == -1) ? strerror(errno) : "short write");
+ exit(1);
+ }
+ payload -= nread;
+
+ if (usemark) {
+ /* Update the last bytes array, and check if it matches our delimiter.*/
+ if (nread >= RDB_EOF_MARK_SIZE) {
+ memcpy(lastbytes,buf+nread-RDB_EOF_MARK_SIZE,RDB_EOF_MARK_SIZE);
+ } else {
+ int rem = RDB_EOF_MARK_SIZE-nread;
+ memmove(lastbytes,lastbytes+nread,rem);
+ memcpy(lastbytes+rem,buf,nread);
+ }
+ if (memcmp(lastbytes,eofmark,RDB_EOF_MARK_SIZE) == 0)
+ break;
+ }
+ }
+ if (usemark) {
+ payload = ULLONG_MAX - payload - RDB_EOF_MARK_SIZE;
+ if (!write_to_stdout && ftruncate(fd, payload) == -1)
+ fprintf(stderr,"ftruncate failed: %s.\n", strerror(errno));
+ fprintf(stderr,"Transfer finished with success after %llu bytes\n", payload);
+ } else {
+ fprintf(stderr,"Transfer finished with success.\n");
+ }
+ redisFree(s); /* Close the connection ASAP as fsync() may take time. */
+ if (node)
+ node->context = NULL;
+ if (!write_to_stdout && fsync(fd) == -1) {
+ fprintf(stderr,"Fail to fsync '%s': %s\n", filename, strerror(errno));
+ exit(1);
+ }
+ close(fd);
+ if (node) {
+ sdsfree(filename);
+ return;
+ }
+ exit(0);
+}
+
+/*------------------------------------------------------------------------------
+ * Bulk import (pipe) mode
+ *--------------------------------------------------------------------------- */
+
+#define PIPEMODE_WRITE_LOOP_MAX_BYTES (128*1024)
+static void pipeMode(void) {
+ long long errors = 0, replies = 0, obuf_len = 0, obuf_pos = 0;
+ char obuf[1024*16]; /* Output buffer */
+ char aneterr[ANET_ERR_LEN];
+ redisReply *reply;
+ int eof = 0; /* True once we consumed all the standard input. */
+ int done = 0;
+ char magic[20]; /* Special reply we recognize. */
+ time_t last_read_time = time(NULL);
+
+ srand(time(NULL));
+
+ /* Use non blocking I/O. */
+ if (anetNonBlock(aneterr,context->fd) == ANET_ERR) {
+ fprintf(stderr, "Can't set the socket in non blocking mode: %s\n",
+ aneterr);
+ exit(1);
+ }
+
+ context->flags &= ~REDIS_BLOCK;
+
+ /* Transfer raw protocol and read replies from the server at the same
+ * time. */
+ while(!done) {
+ int mask = AE_READABLE;
+
+ if (!eof || obuf_len != 0) mask |= AE_WRITABLE;
+ mask = aeWait(context->fd,mask,1000);
+
+ /* Handle the readable state: we can read replies from the server. */
+ if (mask & AE_READABLE) {
+ int read_error = 0;
+
+ do {
+ if (!read_error && redisBufferRead(context) == REDIS_ERR) {
+ read_error = 1;
+ }
+
+ reply = NULL;
+ if (redisGetReply(context, (void **) &reply) == REDIS_ERR) {
+ fprintf(stderr, "Error reading replies from server\n");
+ exit(1);
+ }
+ if (reply) {
+ last_read_time = time(NULL);
+ if (reply->type == REDIS_REPLY_ERROR) {
+ fprintf(stderr,"%s\n", reply->str);
+ errors++;
+ } else if (eof && reply->type == REDIS_REPLY_STRING &&
+ reply->len == 20) {
+ /* Check if this is the reply to our final ECHO
+ * command. If so everything was received
+ * from the server. */
+ if (memcmp(reply->str,magic,20) == 0) {
+ printf("Last reply received from server.\n");
+ done = 1;
+ replies--;
+ }
+ }
+ replies++;
+ freeReplyObject(reply);
+ }
+ } while(reply);
+
+ /* Abort on read errors. We abort here because it is important
+ * to consume replies even after a read error: this way we can
+ * show a potential problem to the user. */
+ if (read_error) exit(1);
+ }
+
+ /* Handle the writable state: we can send protocol to the server. */
+ if (mask & AE_WRITABLE) {
+ ssize_t loop_nwritten = 0;
+
+ while(1) {
+ /* Transfer current buffer to server. */
+ if (obuf_len != 0) {
+ ssize_t nwritten = cliWriteConn(context,obuf+obuf_pos,obuf_len);
+
+ if (nwritten == -1) {
+ if (errno != EAGAIN && errno != EINTR) {
+ fprintf(stderr, "Error writing to the server: %s\n",
+ strerror(errno));
+ exit(1);
+ } else {
+ nwritten = 0;
+ }
+ }
+ obuf_len -= nwritten;
+ obuf_pos += nwritten;
+ loop_nwritten += nwritten;
+ if (obuf_len != 0) break; /* Can't accept more data. */
+ }
+ if (context->err) {
+ fprintf(stderr, "Server I/O Error: %s\n", context->errstr);
+ exit(1);
+ }
+ /* If buffer is empty, load from stdin. */
+ if (obuf_len == 0 && !eof) {
+ ssize_t nread = read(STDIN_FILENO,obuf,sizeof(obuf));
+
+ if (nread == 0) {
+ /* The ECHO sequence starts with a "\r\n" so that if there
+ * is garbage in the protocol we read from stdin, the ECHO
+ * will likely still be properly formatted.
+ * CRLF is ignored by Redis, so it has no effects. */
+ char echo[] =
+ "\r\n*2\r\n$4\r\nECHO\r\n$20\r\n01234567890123456789\r\n";
+ int j;
+
+ eof = 1;
+ /* Everything transferred, so we queue a special
+ * ECHO command that we can match in the replies
+ * to make sure everything was read from the server. */
+ for (j = 0; j < 20; j++)
+ magic[j] = rand() & 0xff;
+ memcpy(echo+21,magic,20);
+ memcpy(obuf,echo,sizeof(echo)-1);
+ obuf_len = sizeof(echo)-1;
+ obuf_pos = 0;
+ printf("All data transferred. Waiting for the last reply...\n");
+ } else if (nread == -1) {
+ fprintf(stderr, "Error reading from stdin: %s\n",
+ strerror(errno));
+ exit(1);
+ } else {
+ obuf_len = nread;
+ obuf_pos = 0;
+ }
+ }
+ if ((obuf_len == 0 && eof) ||
+ loop_nwritten > PIPEMODE_WRITE_LOOP_MAX_BYTES) break;
+ }
+ }
+
+ /* Handle timeout, that is, we reached EOF, and we are not getting
+ * replies from the server for a few seconds, nor the final ECHO is
+ * received. */
+ if (eof && config.pipe_timeout > 0 &&
+ time(NULL)-last_read_time > config.pipe_timeout)
+ {
+ fprintf(stderr,"No replies for %d seconds: exiting.\n",
+ config.pipe_timeout);
+ errors++;
+ break;
+ }
+ }
+ printf("errors: %lld, replies: %lld\n", errors, replies);
+ if (errors)
+ exit(1);
+ else
+ exit(0);
+}
+
+/*------------------------------------------------------------------------------
+ * Find big keys
+ *--------------------------------------------------------------------------- */
+
+static redisReply *sendScan(unsigned long long *it) {
+ redisReply *reply;
+
+ if (config.pattern)
+ reply = redisCommand(context, "SCAN %llu MATCH %b COUNT %d",
+ *it, config.pattern, sdslen(config.pattern), config.count);
+ else
+ reply = redisCommand(context,"SCAN %llu",*it);
+
+ /* Handle any error conditions */
+ if(reply == NULL) {
+ fprintf(stderr, "\nI/O error\n");
+ exit(1);
+ } else if(reply->type == REDIS_REPLY_ERROR) {
+ fprintf(stderr, "SCAN error: %s\n", reply->str);
+ exit(1);
+ } else if(reply->type != REDIS_REPLY_ARRAY) {
+ fprintf(stderr, "Non ARRAY response from SCAN!\n");
+ exit(1);
+ } else if(reply->elements != 2) {
+ fprintf(stderr, "Invalid element count from SCAN!\n");
+ exit(1);
+ }
+
+ /* Validate our types are correct */
+ assert(reply->element[0]->type == REDIS_REPLY_STRING);
+ assert(reply->element[1]->type == REDIS_REPLY_ARRAY);
+
+ /* Update iterator */
+ *it = strtoull(reply->element[0]->str, NULL, 10);
+
+ return reply;
+}
+
+static int getDbSize(void) {
+ redisReply *reply;
+ int size;
+
+ reply = redisCommand(context, "DBSIZE");
+
+ if (reply == NULL) {
+ fprintf(stderr, "\nI/O error\n");
+ exit(1);
+ } else if (reply->type == REDIS_REPLY_ERROR) {
+ fprintf(stderr, "Couldn't determine DBSIZE: %s\n", reply->str);
+ exit(1);
+ } else if (reply->type != REDIS_REPLY_INTEGER) {
+ fprintf(stderr, "Non INTEGER response from DBSIZE!\n");
+ exit(1);
+ }
+
+ /* Grab the number of keys and free our reply */
+ size = reply->integer;
+ freeReplyObject(reply);
+
+ return size;
+}
+
+typedef struct {
+ char *name;
+ char *sizecmd;
+ char *sizeunit;
+ unsigned long long biggest;
+ unsigned long long count;
+ unsigned long long totalsize;
+ sds biggest_key;
+} typeinfo;
+
+typeinfo type_string = { "string", "STRLEN", "bytes" };
+typeinfo type_list = { "list", "LLEN", "items" };
+typeinfo type_set = { "set", "SCARD", "members" };
+typeinfo type_hash = { "hash", "HLEN", "fields" };
+typeinfo type_zset = { "zset", "ZCARD", "members" };
+typeinfo type_stream = { "stream", "XLEN", "entries" };
+typeinfo type_other = { "other", NULL, "?" };
+
+static typeinfo* typeinfo_add(dict *types, char* name, typeinfo* type_template) {
+ typeinfo *info = zmalloc(sizeof(typeinfo));
+ *info = *type_template;
+ info->name = sdsnew(name);
+ dictAdd(types, info->name, info);
+ return info;
+}
+
+void type_free(dict *d, void* val) {
+ typeinfo *info = val;
+ UNUSED(d);
+ if (info->biggest_key)
+ sdsfree(info->biggest_key);
+ sdsfree(info->name);
+ zfree(info);
+}
+
+static dictType typeinfoDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor (owned by the value)*/
+ type_free, /* val destructor */
+ NULL /* allow to expand */
+};
+
+static void getKeyTypes(dict *types_dict, redisReply *keys, typeinfo **types) {
+ redisReply *reply;
+ unsigned int i;
+
+ /* Pipeline TYPE commands */
+ for(i=0;i<keys->elements;i++) {
+ const char* argv[] = {"TYPE", keys->element[i]->str};
+ size_t lens[] = {4, keys->element[i]->len};
+ redisAppendCommandArgv(context, 2, argv, lens);
+ }
+
+ /* Retrieve types */
+ for(i=0;i<keys->elements;i++) {
+ if(redisGetReply(context, (void**)&reply)!=REDIS_OK) {
+ fprintf(stderr, "Error getting type for key '%s' (%d: %s)\n",
+ keys->element[i]->str, context->err, context->errstr);
+ exit(1);
+ } else if(reply->type != REDIS_REPLY_STATUS) {
+ if(reply->type == REDIS_REPLY_ERROR) {
+ fprintf(stderr, "TYPE returned an error: %s\n", reply->str);
+ } else {
+ fprintf(stderr,
+ "Invalid reply type (%d) for TYPE on key '%s'!\n",
+ reply->type, keys->element[i]->str);
+ }
+ exit(1);
+ }
+
+ sds typereply = sdsnew(reply->str);
+ dictEntry *de = dictFind(types_dict, typereply);
+ sdsfree(typereply);
+ typeinfo *type = NULL;
+ if (de)
+ type = dictGetVal(de);
+ else if (strcmp(reply->str, "none")) /* create new types for modules, (but not for deleted keys) */
+ type = typeinfo_add(types_dict, reply->str, &type_other);
+ types[i] = type;
+ freeReplyObject(reply);
+ }
+}
+
+static void getKeySizes(redisReply *keys, typeinfo **types,
+ unsigned long long *sizes, int memkeys,
+ unsigned memkeys_samples)
+{
+ redisReply *reply;
+ unsigned int i;
+
+ /* Pipeline size commands */
+ for(i=0;i<keys->elements;i++) {
+ /* Skip keys that disappeared between SCAN and TYPE (or unknown types when not in memkeys mode) */
+ if(!types[i] || (!types[i]->sizecmd && !memkeys))
+ continue;
+
+ if (!memkeys) {
+ const char* argv[] = {types[i]->sizecmd, keys->element[i]->str};
+ size_t lens[] = {strlen(types[i]->sizecmd), keys->element[i]->len};
+ redisAppendCommandArgv(context, 2, argv, lens);
+ } else if (memkeys_samples==0) {
+ const char* argv[] = {"MEMORY", "USAGE", keys->element[i]->str};
+ size_t lens[] = {6, 5, keys->element[i]->len};
+ redisAppendCommandArgv(context, 3, argv, lens);
+ } else {
+ sds samplesstr = sdsfromlonglong(memkeys_samples);
+ const char* argv[] = {"MEMORY", "USAGE", keys->element[i]->str, "SAMPLES", samplesstr};
+ size_t lens[] = {6, 5, keys->element[i]->len, 7, sdslen(samplesstr)};
+ redisAppendCommandArgv(context, 5, argv, lens);
+ sdsfree(samplesstr);
+ }
+ }
+
+ /* Retrieve sizes */
+ for(i=0;i<keys->elements;i++) {
+ /* Skip keys that disappeared between SCAN and TYPE (or unknown types when not in memkeys mode) */
+ if(!types[i] || (!types[i]->sizecmd && !memkeys)) {
+ sizes[i] = 0;
+ continue;
+ }
+
+ /* Retrieve size */
+ if(redisGetReply(context, (void**)&reply)!=REDIS_OK) {
+ fprintf(stderr, "Error getting size for key '%s' (%d: %s)\n",
+ keys->element[i]->str, context->err, context->errstr);
+ exit(1);
+ } else if(reply->type != REDIS_REPLY_INTEGER) {
+ /* Theoretically the key could have been removed and
+ * added as a different type between TYPE and SIZE */
+ fprintf(stderr,
+ "Warning: %s on '%s' failed (may have changed type)\n",
+ !memkeys? types[i]->sizecmd: "MEMORY USAGE",
+ keys->element[i]->str);
+ sizes[i] = 0;
+ } else {
+ sizes[i] = reply->integer;
+ }
+
+ freeReplyObject(reply);
+ }
+}
+
+static void longStatLoopModeStop(int s) {
+ UNUSED(s);
+ force_cancel_loop = 1;
+}
+
+static void findBigKeys(int memkeys, unsigned memkeys_samples) {
+ unsigned long long sampled = 0, total_keys, totlen=0, *sizes=NULL, it=0, scan_loops = 0;
+ redisReply *reply, *keys;
+ unsigned int arrsize=0, i;
+ dictIterator *di;
+ dictEntry *de;
+ typeinfo **types = NULL;
+ double pct;
+
+ dict *types_dict = dictCreate(&typeinfoDictType);
+ typeinfo_add(types_dict, "string", &type_string);
+ typeinfo_add(types_dict, "list", &type_list);
+ typeinfo_add(types_dict, "set", &type_set);
+ typeinfo_add(types_dict, "hash", &type_hash);
+ typeinfo_add(types_dict, "zset", &type_zset);
+ typeinfo_add(types_dict, "stream", &type_stream);
+
+ signal(SIGINT, longStatLoopModeStop);
+ /* Total keys pre scanning */
+ total_keys = getDbSize();
+
+ /* Status message */
+ printf("\n# Scanning the entire keyspace to find biggest keys as well as\n");
+ printf("# average sizes per key type. You can use -i 0.1 to sleep 0.1 sec\n");
+ printf("# per 100 SCAN commands (not usually needed).\n\n");
+
+ /* SCAN loop */
+ do {
+ /* Calculate approximate percentage completion */
+ pct = 100 * (double)sampled/total_keys;
+
+ /* Grab some keys and point to the keys array */
+ reply = sendScan(&it);
+ scan_loops++;
+ keys = reply->element[1];
+
+ /* Reallocate our type and size array if we need to */
+ if(keys->elements > arrsize) {
+ types = zrealloc(types, sizeof(typeinfo*)*keys->elements);
+ sizes = zrealloc(sizes, sizeof(unsigned long long)*keys->elements);
+
+ if(!types || !sizes) {
+ fprintf(stderr, "Failed to allocate storage for keys!\n");
+ exit(1);
+ }
+
+ arrsize = keys->elements;
+ }
+
+ /* Retrieve types and then sizes */
+ getKeyTypes(types_dict, keys, types);
+ getKeySizes(keys, types, sizes, memkeys, memkeys_samples);
+
+ /* Now update our stats */
+ for(i=0;i<keys->elements;i++) {
+ typeinfo *type = types[i];
+ /* Skip keys that disappeared between SCAN and TYPE */
+ if(!type)
+ continue;
+
+ type->totalsize += sizes[i];
+ type->count++;
+ totlen += keys->element[i]->len;
+ sampled++;
+
+ if(type->biggest<sizes[i]) {
+ /* Keep track of biggest key name for this type */
+ if (type->biggest_key)
+ sdsfree(type->biggest_key);
+ type->biggest_key = sdscatrepr(sdsempty(), keys->element[i]->str, keys->element[i]->len);
+ if(!type->biggest_key) {
+ fprintf(stderr, "Failed to allocate memory for key!\n");
+ exit(1);
+ }
+
+ printf(
+ "[%05.2f%%] Biggest %-6s found so far '%s' with %llu %s\n",
+ pct, type->name, type->biggest_key, sizes[i],
+ !memkeys? type->sizeunit: "bytes");
+
+ /* Keep track of the biggest size for this type */
+ type->biggest = sizes[i];
+ }
+
+ /* Update overall progress */
+ if(sampled % 1000000 == 0) {
+ printf("[%05.2f%%] Sampled %llu keys so far\n", pct, sampled);
+ }
+ }
+
+ /* Sleep if we've been directed to do so */
+ if (config.interval && (scan_loops % 100) == 0) {
+ usleep(config.interval);
+ }
+
+ freeReplyObject(reply);
+ } while(force_cancel_loop == 0 && it != 0);
+
+ if(types) zfree(types);
+ if(sizes) zfree(sizes);
+
+ /* We're done */
+ printf("\n-------- summary -------\n\n");
+ if (force_cancel_loop) printf("[%05.2f%%] ", pct);
+ printf("Sampled %llu keys in the keyspace!\n", sampled);
+ printf("Total key length in bytes is %llu (avg len %.2f)\n\n",
+ totlen, totlen ? (double)totlen/sampled : 0);
+
+ /* Output the biggest keys we found, for types we did find */
+ di = dictGetIterator(types_dict);
+ while ((de = dictNext(di))) {
+ typeinfo *type = dictGetVal(de);
+ if(type->biggest_key) {
+ printf("Biggest %6s found '%s' has %llu %s\n", type->name, type->biggest_key,
+ type->biggest, !memkeys? type->sizeunit: "bytes");
+ }
+ }
+ dictReleaseIterator(di);
+
+ printf("\n");
+
+ di = dictGetIterator(types_dict);
+ while ((de = dictNext(di))) {
+ typeinfo *type = dictGetVal(de);
+ printf("%llu %ss with %llu %s (%05.2f%% of keys, avg size %.2f)\n",
+ type->count, type->name, type->totalsize, !memkeys? type->sizeunit: "bytes",
+ sampled ? 100 * (double)type->count/sampled : 0,
+ type->count ? (double)type->totalsize/type->count : 0);
+ }
+ dictReleaseIterator(di);
+
+ dictRelease(types_dict);
+
+ /* Success! */
+ exit(0);
+}
+
+static void getKeyFreqs(redisReply *keys, unsigned long long *freqs) {
+ redisReply *reply;
+ unsigned int i;
+
+ /* Pipeline OBJECT freq commands */
+ for(i=0;i<keys->elements;i++) {
+ const char* argv[] = {"OBJECT", "FREQ", keys->element[i]->str};
+ size_t lens[] = {6, 4, keys->element[i]->len};
+ redisAppendCommandArgv(context, 3, argv, lens);
+ }
+
+ /* Retrieve freqs */
+ for(i=0;i<keys->elements;i++) {
+ if(redisGetReply(context, (void**)&reply)!=REDIS_OK) {
+ sds keyname = sdscatrepr(sdsempty(), keys->element[i]->str, keys->element[i]->len);
+ fprintf(stderr, "Error getting freq for key '%s' (%d: %s)\n",
+ keyname, context->err, context->errstr);
+ sdsfree(keyname);
+ exit(1);
+ } else if(reply->type != REDIS_REPLY_INTEGER) {
+ if(reply->type == REDIS_REPLY_ERROR) {
+ fprintf(stderr, "Error: %s\n", reply->str);
+ exit(1);
+ } else {
+ sds keyname = sdscatrepr(sdsempty(), keys->element[i]->str, keys->element[i]->len);
+ fprintf(stderr, "Warning: OBJECT freq on '%s' failed (may have been deleted)\n", keyname);
+ sdsfree(keyname);
+ freqs[i] = 0;
+ }
+ } else {
+ freqs[i] = reply->integer;
+ }
+ freeReplyObject(reply);
+ }
+}
+
+#define HOTKEYS_SAMPLE 16
+static void findHotKeys(void) {
+ redisReply *keys, *reply;
+ unsigned long long counters[HOTKEYS_SAMPLE] = {0};
+ sds hotkeys[HOTKEYS_SAMPLE] = {NULL};
+ unsigned long long sampled = 0, total_keys, *freqs = NULL, it = 0, scan_loops = 0;
+ unsigned int arrsize = 0, i, k;
+ double pct;
+
+ signal(SIGINT, longStatLoopModeStop);
+ /* Total keys pre scanning */
+ total_keys = getDbSize();
+
+ /* Status message */
+ printf("\n# Scanning the entire keyspace to find hot keys as well as\n");
+ printf("# average sizes per key type. You can use -i 0.1 to sleep 0.1 sec\n");
+ printf("# per 100 SCAN commands (not usually needed).\n\n");
+
+ /* SCAN loop */
+ do {
+ /* Calculate approximate percentage completion */
+ pct = 100 * (double)sampled/total_keys;
+
+ /* Grab some keys and point to the keys array */
+ reply = sendScan(&it);
+ scan_loops++;
+ keys = reply->element[1];
+
+ /* Reallocate our freqs array if we need to */
+ if(keys->elements > arrsize) {
+ freqs = zrealloc(freqs, sizeof(unsigned long long)*keys->elements);
+
+ if(!freqs) {
+ fprintf(stderr, "Failed to allocate storage for keys!\n");
+ exit(1);
+ }
+
+ arrsize = keys->elements;
+ }
+
+ getKeyFreqs(keys, freqs);
+
+ /* Now update our stats */
+ for(i=0;i<keys->elements;i++) {
+ sampled++;
+ /* Update overall progress */
+ if(sampled % 1000000 == 0) {
+ printf("[%05.2f%%] Sampled %llu keys so far\n", pct, sampled);
+ }
+
+ /* Use eviction pool here */
+ k = 0;
+ while (k < HOTKEYS_SAMPLE && freqs[i] > counters[k]) k++;
+ if (k == 0) continue;
+ k--;
+ if (k == 0 || counters[k] == 0) {
+ sdsfree(hotkeys[k]);
+ } else {
+ sdsfree(hotkeys[0]);
+ memmove(counters,counters+1,sizeof(counters[0])*k);
+ memmove(hotkeys,hotkeys+1,sizeof(hotkeys[0])*k);
+ }
+ counters[k] = freqs[i];
+ hotkeys[k] = sdscatrepr(sdsempty(), keys->element[i]->str, keys->element[i]->len);
+ printf(
+ "[%05.2f%%] Hot key '%s' found so far with counter %llu\n",
+ pct, hotkeys[k], freqs[i]);
+ }
+
+ /* Sleep if we've been directed to do so */
+ if (config.interval && (scan_loops % 100) == 0) {
+ usleep(config.interval);
+ }
+
+ freeReplyObject(reply);
+ } while(force_cancel_loop ==0 && it != 0);
+
+ if (freqs) zfree(freqs);
+
+ /* We're done */
+ printf("\n-------- summary -------\n\n");
+ if(force_cancel_loop)printf("[%05.2f%%] ",pct);
+ printf("Sampled %llu keys in the keyspace!\n", sampled);
+
+ for (i=1; i<= HOTKEYS_SAMPLE; i++) {
+ k = HOTKEYS_SAMPLE - i;
+ if(counters[k]>0) {
+ printf("hot key found with counter: %llu\tkeyname: %s\n", counters[k], hotkeys[k]);
+ sdsfree(hotkeys[k]);
+ }
+ }
+
+ exit(0);
+}
+
+/*------------------------------------------------------------------------------
+ * Stats mode
+ *--------------------------------------------------------------------------- */
+
+/* Return the specified INFO field from the INFO command output "info".
+ * A new buffer is allocated for the result, that needs to be free'd.
+ * If the field is not found NULL is returned. */
+static char *getInfoField(char *info, char *field) {
+ char *p = strstr(info,field);
+ char *n1, *n2;
+ char *result;
+
+ if (!p) return NULL;
+ p += strlen(field)+1;
+ n1 = strchr(p,'\r');
+ n2 = strchr(p,',');
+ if (n2 && n2 < n1) n1 = n2;
+ result = zmalloc(sizeof(char)*(n1-p)+1);
+ memcpy(result,p,(n1-p));
+ result[n1-p] = '\0';
+ return result;
+}
+
+/* Like the above function but automatically convert the result into
+ * a long. On error (missing field) LONG_MIN is returned. */
+static long getLongInfoField(char *info, char *field) {
+ char *value = getInfoField(info,field);
+ long l;
+
+ if (!value) return LONG_MIN;
+ l = strtol(value,NULL,10);
+ zfree(value);
+ return l;
+}
+
+/* Convert number of bytes into a human readable string of the form:
+ * 100B, 2G, 100M, 4K, and so forth. */
+void bytesToHuman(char *s, size_t size, long long n) {
+ double d;
+
+ if (n < 0) {
+ *s = '-';
+ s++;
+ n = -n;
+ }
+ if (n < 1024) {
+ /* Bytes */
+ snprintf(s,size,"%lldB",n);
+ return;
+ } else if (n < (1024*1024)) {
+ d = (double)n/(1024);
+ snprintf(s,size,"%.2fK",d);
+ } else if (n < (1024LL*1024*1024)) {
+ d = (double)n/(1024*1024);
+ snprintf(s,size,"%.2fM",d);
+ } else if (n < (1024LL*1024*1024*1024)) {
+ d = (double)n/(1024LL*1024*1024);
+ snprintf(s,size,"%.2fG",d);
+ }
+}
+
+static void statMode(void) {
+ redisReply *reply;
+ long aux, requests = 0;
+ int i = 0;
+
+ while(1) {
+ char buf[64];
+ int j;
+
+ reply = reconnectingRedisCommand(context,"INFO");
+ if (reply == NULL) {
+ fprintf(stderr, "\nI/O error\n");
+ exit(1);
+ } else if (reply->type == REDIS_REPLY_ERROR) {
+ fprintf(stderr, "ERROR: %s\n", reply->str);
+ exit(1);
+ }
+
+ if ((i++ % 20) == 0) {
+ printf(
+"------- data ------ --------------------- load -------------------- - child -\n"
+"keys mem clients blocked requests connections \n");
+ }
+
+ /* Keys */
+ aux = 0;
+ for (j = 0; j < 20; j++) {
+ long k;
+
+ snprintf(buf,sizeof(buf),"db%d:keys",j);
+ k = getLongInfoField(reply->str,buf);
+ if (k == LONG_MIN) continue;
+ aux += k;
+ }
+ snprintf(buf,sizeof(buf),"%ld",aux);
+ printf("%-11s",buf);
+
+ /* Used memory */
+ aux = getLongInfoField(reply->str,"used_memory");
+ bytesToHuman(buf,sizeof(buf),aux);
+ printf("%-8s",buf);
+
+ /* Clients */
+ aux = getLongInfoField(reply->str,"connected_clients");
+ snprintf(buf,sizeof(buf),"%ld",aux);
+ printf(" %-8s",buf);
+
+ /* Blocked (BLPOPPING) Clients */
+ aux = getLongInfoField(reply->str,"blocked_clients");
+ snprintf(buf,sizeof(buf),"%ld",aux);
+ printf("%-8s",buf);
+
+ /* Requests */
+ aux = getLongInfoField(reply->str,"total_commands_processed");
+ snprintf(buf,sizeof(buf),"%ld (+%ld)",aux,requests == 0 ? 0 : aux-requests);
+ printf("%-19s",buf);
+ requests = aux;
+
+ /* Connections */
+ aux = getLongInfoField(reply->str,"total_connections_received");
+ snprintf(buf,sizeof(buf),"%ld",aux);
+ printf(" %-12s",buf);
+
+ /* Children */
+ aux = getLongInfoField(reply->str,"bgsave_in_progress");
+ aux |= getLongInfoField(reply->str,"aof_rewrite_in_progress") << 1;
+ aux |= getLongInfoField(reply->str,"loading") << 2;
+ switch(aux) {
+ case 0: break;
+ case 1:
+ printf("SAVE");
+ break;
+ case 2:
+ printf("AOF");
+ break;
+ case 3:
+ printf("SAVE+AOF");
+ break;
+ case 4:
+ printf("LOAD");
+ break;
+ }
+
+ printf("\n");
+ freeReplyObject(reply);
+ usleep(config.interval);
+ }
+}
+
+/*------------------------------------------------------------------------------
+ * Scan mode
+ *--------------------------------------------------------------------------- */
+
+static void scanMode(void) {
+ redisReply *reply;
+ unsigned long long cur = 0;
+ signal(SIGINT, longStatLoopModeStop);
+ do {
+ reply = sendScan(&cur);
+ for (unsigned int j = 0; j < reply->element[1]->elements; j++) {
+ if (config.output == OUTPUT_STANDARD) {
+ sds out = sdscatrepr(sdsempty(), reply->element[1]->element[j]->str,
+ reply->element[1]->element[j]->len);
+ printf("%s\n", out);
+ sdsfree(out);
+ } else {
+ printf("%s\n", reply->element[1]->element[j]->str);
+ }
+ }
+ freeReplyObject(reply);
+ if (config.interval) usleep(config.interval);
+ } while(force_cancel_loop == 0 && cur != 0);
+
+ exit(0);
+}
+
+/*------------------------------------------------------------------------------
+ * LRU test mode
+ *--------------------------------------------------------------------------- */
+
+/* Return an integer from min to max (both inclusive) using a power-law
+ * distribution, depending on the value of alpha: the greater the alpha
+ * the more bias towards lower values.
+ *
+ * With alpha = 6.2 the output follows the 80-20 rule where 20% of
+ * the returned numbers will account for 80% of the frequency. */
+long long powerLawRand(long long min, long long max, double alpha) {
+ double pl, r;
+
+ max += 1;
+ r = ((double)rand()) / RAND_MAX;
+ pl = pow(
+ ((pow(max,alpha+1) - pow(min,alpha+1))*r + pow(min,alpha+1)),
+ (1.0/(alpha+1)));
+ return (max-1-(long long)pl)+min;
+}
+
+/* Generates a key name among a set of lru_test_sample_size keys, using
+ * an 80-20 distribution. */
+void LRUTestGenKey(char *buf, size_t buflen) {
+ snprintf(buf, buflen, "lru:%lld",
+ powerLawRand(1, config.lru_test_sample_size, 6.2));
+}
+
+#define LRU_CYCLE_PERIOD 1000 /* 1000 milliseconds. */
+#define LRU_CYCLE_PIPELINE_SIZE 250
+static void LRUTestMode(void) {
+ redisReply *reply;
+ char key[128];
+ long long start_cycle;
+ int j;
+
+ srand(time(NULL)^getpid());
+ while(1) {
+ /* Perform cycles of 1 second with 50% writes and 50% reads.
+ * We use pipelining batching writes / reads N times per cycle in order
+ * to fill the target instance easily. */
+ start_cycle = mstime();
+ long long hits = 0, misses = 0;
+ while(mstime() - start_cycle < LRU_CYCLE_PERIOD) {
+ /* Write cycle. */
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ char val[6];
+ val[5] = '\0';
+ for (int i = 0; i < 5; i++) val[i] = 'A'+rand()%('z'-'A');
+ LRUTestGenKey(key,sizeof(key));
+ redisAppendCommand(context, "SET %s %s",key,val);
+ }
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++)
+ redisGetReply(context, (void**)&reply);
+
+ /* Read cycle. */
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ LRUTestGenKey(key,sizeof(key));
+ redisAppendCommand(context, "GET %s",key);
+ }
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ if (redisGetReply(context, (void**)&reply) == REDIS_OK) {
+ switch(reply->type) {
+ case REDIS_REPLY_ERROR:
+ fprintf(stderr, "%s\n", reply->str);
+ break;
+ case REDIS_REPLY_NIL:
+ misses++;
+ break;
+ default:
+ hits++;
+ break;
+ }
+ }
+ }
+
+ if (context->err) {
+ fprintf(stderr,"I/O error during LRU test\n");
+ exit(1);
+ }
+ }
+ /* Print stats. */
+ printf(
+ "%lld Gets/sec | Hits: %lld (%.2f%%) | Misses: %lld (%.2f%%)\n",
+ hits+misses,
+ hits, (double)hits/(hits+misses)*100,
+ misses, (double)misses/(hits+misses)*100);
+ }
+ exit(0);
+}
+
+/*------------------------------------------------------------------------------
+ * Intrinsic latency mode.
+ *
+ * Measure max latency of a running process that does not result from
+ * syscalls. Basically this software should provide a hint about how much
+ * time the kernel leaves the process without a chance to run.
+ *--------------------------------------------------------------------------- */
+
+/* This is just some computation the compiler can't optimize out.
+ * Should run in less than 100-200 microseconds even using very
+ * slow hardware. Runs in less than 10 microseconds in modern HW. */
+unsigned long compute_something_fast(void) {
+ unsigned char s[256], i, j, t;
+ int count = 1000, k;
+ unsigned long output = 0;
+
+ for (k = 0; k < 256; k++) s[k] = k;
+
+ i = 0;
+ j = 0;
+ while(count--) {
+ i++;
+ j = j + s[i];
+ t = s[i];
+ s[i] = s[j];
+ s[j] = t;
+ output += s[(s[i]+s[j])&255];
+ }
+ return output;
+}
+
+static void sigIntHandler(int s) {
+ UNUSED(s);
+
+ if (config.monitor_mode || config.pubsub_mode) {
+ close(context->fd);
+ context->fd = REDIS_INVALID_FD;
+ config.blocking_state_aborted = 1;
+ } else {
+ exit(1);
+ }
+}
+
+static void intrinsicLatencyMode(void) {
+ long long test_end, run_time, max_latency = 0, runs = 0;
+
+ run_time = (long long)config.intrinsic_latency_duration * 1000000;
+ test_end = ustime() + run_time;
+ signal(SIGINT, longStatLoopModeStop);
+
+ while(1) {
+ long long start, end, latency;
+
+ start = ustime();
+ compute_something_fast();
+ end = ustime();
+ latency = end-start;
+ runs++;
+ if (latency <= 0) continue;
+
+ /* Reporting */
+ if (latency > max_latency) {
+ max_latency = latency;
+ printf("Max latency so far: %lld microseconds.\n", max_latency);
+ }
+
+ double avg_us = (double)run_time/runs;
+ double avg_ns = avg_us * 1e3;
+ if (force_cancel_loop || end > test_end) {
+ printf("\n%lld total runs "
+ "(avg latency: "
+ "%.4f microseconds / %.2f nanoseconds per run).\n",
+ runs, avg_us, avg_ns);
+ printf("Worst run took %.0fx longer than the average latency.\n",
+ max_latency / avg_us);
+ exit(0);
+ }
+ }
+}
+
+static sds askPassword(const char *msg) {
+ linenoiseMaskModeEnable();
+ sds auth = linenoise(msg);
+ linenoiseMaskModeDisable();
+ return auth;
+}
+
+/* Prints out the hint completion string for a given input prefix string. */
+void testHint(const char *input) {
+ cliInitHelp();
+
+ sds hint = getHintForInput(input);
+ printf("%s\n", hint);
+ exit(0);
+}
+
+sds readHintSuiteLine(char buf[], size_t size, FILE *fp) {
+ while (fgets(buf, size, fp) != NULL) {
+ if (buf[0] != '#') {
+ sds input = sdsnew(buf);
+
+ /* Strip newline. */
+ input = sdstrim(input, "\n");
+ return input;
+ }
+ }
+ return NULL;
+}
+
+/* Runs a suite of hint completion tests contained in a file. */
+void testHintSuite(char *filename) {
+ FILE *fp;
+ char buf[256];
+ sds line, input, expected, hint;
+ int pass=0, fail=0;
+ int argc;
+ char **argv;
+
+ fp = fopen(filename, "r");
+ if (!fp) {
+ fprintf(stderr,
+ "Can't open file '%s': %s\n", filename, strerror(errno));
+ exit(-1);
+ }
+
+ cliInitHelp();
+
+ while (1) {
+ line = readHintSuiteLine(buf, sizeof(buf), fp);
+ if (line == NULL) break;
+ argv = sdssplitargs(line, &argc);
+ sdsfree(line);
+ if (argc == 0) {
+ sdsfreesplitres(argv, argc);
+ continue;
+ }
+
+ if (argc == 1) {
+ fprintf(stderr,
+ "Missing expected hint for input '%s'\n", argv[0]);
+ exit(-1);
+ }
+ input = argv[0];
+ expected = argv[1];
+ hint = getHintForInput(input);
+ if (config.verbose) {
+ printf("Input: '%s', Expected: '%s', Hint: '%s'\n", input, expected, hint);
+ }
+
+ /* Strip trailing spaces from hint - they don't matter. */
+ while (hint != NULL && sdslen(hint) > 0 && hint[sdslen(hint) - 1] == ' ') {
+ sdssetlen(hint, sdslen(hint) - 1);
+ hint[sdslen(hint)] = '\0';
+ }
+
+ if (hint == NULL || strcmp(hint, expected) != 0) {
+ fprintf(stderr, "Test case '%s' FAILED: expected '%s', got '%s'\n", input, expected, hint);
+ ++fail;
+ }
+ else {
+ ++pass;
+ }
+ sdsfreesplitres(argv, argc);
+ sdsfree(hint);
+ }
+ fclose(fp);
+
+ printf("%s: %d/%d passed\n", fail == 0 ? "SUCCESS" : "FAILURE", pass, pass + fail);
+ exit(fail);
+}
+
+/*------------------------------------------------------------------------------
+ * Program main()
+ *--------------------------------------------------------------------------- */
+
+int main(int argc, char **argv) {
+ int firstarg;
+ struct timeval tv;
+
+ memset(&config.sslconfig, 0, sizeof(config.sslconfig));
+ config.conn_info.hostip = sdsnew("127.0.0.1");
+ config.conn_info.hostport = 6379;
+ config.hostsocket = NULL;
+ config.repeat = 1;
+ config.interval = 0;
+ config.dbnum = 0;
+ config.conn_info.input_dbnum = 0;
+ config.interactive = 0;
+ config.shutdown = 0;
+ config.monitor_mode = 0;
+ config.pubsub_mode = 0;
+ config.blocking_state_aborted = 0;
+ config.latency_mode = 0;
+ config.latency_dist_mode = 0;
+ config.latency_history = 0;
+ config.lru_test_mode = 0;
+ config.lru_test_sample_size = 0;
+ config.cluster_mode = 0;
+ config.cluster_send_asking = 0;
+ config.slave_mode = 0;
+ config.getrdb_mode = 0;
+ config.get_functions_rdb_mode = 0;
+ config.stat_mode = 0;
+ config.scan_mode = 0;
+ config.count = 10;
+ config.intrinsic_latency_mode = 0;
+ config.pattern = NULL;
+ config.rdb_filename = NULL;
+ config.pipe_mode = 0;
+ config.pipe_timeout = REDIS_CLI_DEFAULT_PIPE_TIMEOUT;
+ config.bigkeys = 0;
+ config.hotkeys = 0;
+ config.stdin_lastarg = 0;
+ config.stdin_tag_arg = 0;
+ config.stdin_tag_name = NULL;
+ config.conn_info.auth = NULL;
+ config.askpass = 0;
+ config.conn_info.user = NULL;
+ config.eval = NULL;
+ config.eval_ldb = 0;
+ config.eval_ldb_end = 0;
+ config.eval_ldb_sync = 0;
+ config.enable_ldb_on_eval = 0;
+ config.last_cmd_type = -1;
+ config.last_reply = NULL;
+ config.verbose = 0;
+ config.set_errcode = 0;
+ config.no_auth_warning = 0;
+ config.in_multi = 0;
+ config.server_version = NULL;
+ config.cluster_manager_command.name = NULL;
+ config.cluster_manager_command.argc = 0;
+ config.cluster_manager_command.argv = NULL;
+ config.cluster_manager_command.stdin_arg = NULL;
+ config.cluster_manager_command.flags = 0;
+ config.cluster_manager_command.replicas = 0;
+ config.cluster_manager_command.from = NULL;
+ config.cluster_manager_command.to = NULL;
+ config.cluster_manager_command.from_user = NULL;
+ config.cluster_manager_command.from_pass = NULL;
+ config.cluster_manager_command.from_askpass = 0;
+ config.cluster_manager_command.weight = NULL;
+ config.cluster_manager_command.weight_argc = 0;
+ config.cluster_manager_command.slots = 0;
+ config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT;
+ config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE;
+ config.cluster_manager_command.threshold =
+ CLUSTER_MANAGER_REBALANCE_THRESHOLD;
+ config.cluster_manager_command.backup_dir = NULL;
+ pref.hints = 1;
+
+ spectrum_palette = spectrum_palette_color;
+ spectrum_palette_size = spectrum_palette_color_size;
+
+ if (!isatty(fileno(stdout)) && (getenv("FAKETTY") == NULL)) {
+ config.output = OUTPUT_RAW;
+ config.push_output = 0;
+ } else {
+ config.output = OUTPUT_STANDARD;
+ config.push_output = 1;
+ }
+ config.mb_delim = sdsnew("\n");
+ config.cmd_delim = sdsnew("\n");
+
+ firstarg = parseOptions(argc,argv);
+ argc -= firstarg;
+ argv += firstarg;
+
+ parseEnv();
+
+ if (config.askpass) {
+ config.conn_info.auth = askPassword("Please input password: ");
+ }
+
+ if (config.cluster_manager_command.from_askpass) {
+ config.cluster_manager_command.from_pass = askPassword(
+ "Please input import source node password: ");
+ }
+
+#ifdef USE_OPENSSL
+ if (config.tls) {
+ cliSecureInit();
+ }
+#endif
+
+ gettimeofday(&tv, NULL);
+ init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
+
+ /* Cluster Manager mode */
+ if (CLUSTER_MANAGER_MODE()) {
+ clusterManagerCommandProc *proc = validateClusterManagerCommand();
+ if (!proc) {
+ exit(1);
+ }
+ clusterManagerMode(proc);
+ }
+
+ /* Latency mode */
+ if (config.latency_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ latencyMode();
+ }
+
+ /* Latency distribution mode */
+ if (config.latency_dist_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ latencyDistMode();
+ }
+
+ /* Slave mode */
+ if (config.slave_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ sendCapa();
+ sendReplconf("rdb-filter-only", "");
+ slaveMode(1);
+ }
+
+ /* Get RDB/functions mode. */
+ if (config.getrdb_mode || config.get_functions_rdb_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ sendCapa();
+ sendRdbOnly();
+ if (config.get_functions_rdb_mode && !sendReplconf("rdb-filter-only", "functions")) {
+ fprintf(stderr, "Failed requesting functions only RDB from server, aborting\n");
+ exit(1);
+ }
+ getRDB(NULL);
+ }
+
+ /* Pipe mode */
+ if (config.pipe_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ pipeMode();
+ }
+
+ /* Find big keys */
+ if (config.bigkeys) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ findBigKeys(0, 0);
+ }
+
+ /* Find large keys */
+ if (config.memkeys) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ findBigKeys(1, config.memkeys_samples);
+ }
+
+ /* Find hot keys */
+ if (config.hotkeys) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ findHotKeys();
+ }
+
+ /* Stat mode */
+ if (config.stat_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ if (config.interval == 0) config.interval = 1000000;
+ statMode();
+ }
+
+ /* Scan mode */
+ if (config.scan_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ scanMode();
+ }
+
+ /* LRU test mode */
+ if (config.lru_test_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ LRUTestMode();
+ }
+
+ /* Intrinsic latency mode */
+ if (config.intrinsic_latency_mode) intrinsicLatencyMode();
+
+ /* Print command-line hint for an input prefix string */
+ if (config.test_hint) {
+ testHint(config.test_hint);
+ }
+ /* Run test suite for command-line hints */
+ if (config.test_hint_file) {
+ testHintSuite(config.test_hint_file);
+ }
+
+ /* Start interactive mode when no command is provided */
+ if (argc == 0 && !config.eval) {
+ /* Ignore SIGPIPE in interactive mode to force a reconnect */
+ signal(SIGPIPE, SIG_IGN);
+ signal(SIGINT, sigIntHandler);
+
+ /* Note that in repl mode we don't abort on connection error.
+ * A new attempt will be performed for every command send. */
+ cliConnect(0);
+ repl();
+ }
+
+ /* Otherwise, we have some arguments to execute */
+ if (config.eval) {
+ if (cliConnect(0) != REDIS_OK) exit(1);
+ return evalMode(argc,argv);
+ } else {
+ cliConnect(CC_QUIET);
+ return noninteractive(argc,argv);
+ }
+}
diff --git a/src/redis-trib.rb b/src/redis-trib.rb
new file mode 100755
index 0000000..b1af830
--- /dev/null
+++ b/src/redis-trib.rb
@@ -0,0 +1,129 @@
+#!/usr/bin/env ruby
+
+def colorized(str, color)
+ return str if !(ENV['TERM'] || '')["xterm"]
+ color_code = {
+ white: 29,
+ bold: '29;1',
+ black: 30,
+ red: 31,
+ green: 32,
+ yellow: 33,
+ blue: 34,
+ magenta: 35,
+ cyan: 36,
+ gray: 37
+ }[color]
+ return str if !color_code
+ "\033[#{color_code}m#{str}\033[0m"
+end
+
+class String
+
+ %w(white bold black red green yellow blue magenta cyan gray).each{|color|
+ color = :"#{color}"
+ define_method(color){
+ colorized(self, color)
+ }
+ }
+
+end
+
+COMMANDS = %w(create check info fix reshard rebalance add-node
+ del-node set-timeout call import help)
+
+ALLOWED_OPTIONS={
+ "create" => {"replicas" => true},
+ "add-node" => {"slave" => false, "master-id" => true},
+ "import" => {"from" => :required, "copy" => false, "replace" => false},
+ "reshard" => {"from" => true, "to" => true, "slots" => true, "yes" => false, "timeout" => true, "pipeline" => true},
+ "rebalance" => {"weight" => [], "auto-weights" => false, "use-empty-masters" => false, "timeout" => true, "simulate" => false, "pipeline" => true, "threshold" => true},
+ "fix" => {"timeout" => 0},
+}
+
+def parse_options(cmd)
+ cmd = cmd.downcase
+ idx = 0
+ options = {}
+ args = []
+ while (arg = ARGV.shift)
+ if arg[0..1] == "--"
+ option = arg[2..-1]
+
+ # --verbose is a global option
+ if option == "--verbose"
+ options['verbose'] = true
+ next
+ end
+ if ALLOWED_OPTIONS[cmd] == nil ||
+ ALLOWED_OPTIONS[cmd][option] == nil
+ next
+ end
+ if ALLOWED_OPTIONS[cmd][option] != false
+ value = ARGV.shift
+ next if !value
+ else
+ value = true
+ end
+
+ # If the option is set to [], it's a multiple arguments
+ # option. We just queue every new value into an array.
+ if ALLOWED_OPTIONS[cmd][option] == []
+ options[option] = [] if !options[option]
+ options[option] << value
+ else
+ options[option] = value
+ end
+ else
+ next if arg[0,1] == '-'
+ args << arg
+ end
+ end
+
+ return options,args
+end
+
+def command_example(cmd, args, opts)
+ cmd = "redis-cli --cluster #{cmd}"
+ args.each{|a|
+ a = a.to_s
+ a = a.inspect if a[' ']
+ cmd << " #{a}"
+ }
+ opts.each{|opt, val|
+ opt = " --cluster-#{opt.downcase}"
+ if val != true
+ val = val.join(' ') if val.is_a? Array
+ opt << " #{val}"
+ end
+ cmd << opt
+ }
+ cmd
+end
+
+$command = ARGV.shift
+$opts, $args = parse_options($command) if $command
+
+puts "WARNING: redis-trib.rb is not longer available!".yellow
+puts "You should use #{'redis-cli'.bold} instead."
+puts ''
+puts "All commands and features belonging to redis-trib.rb "+
+ "have been moved\nto redis-cli."
+puts "In order to use them you should call redis-cli with the #{'--cluster'.bold}"
+puts "option followed by the subcommand name, arguments and options."
+puts ''
+puts "Use the following syntax:"
+puts "redis-cli --cluster SUBCOMMAND [ARGUMENTS] [OPTIONS]".bold
+puts ''
+puts "Example:"
+if $command
+ example = command_example $command, $args, $opts
+else
+ example = "redis-cli --cluster info 127.0.0.1:7000"
+end
+puts example.bold
+puts ''
+puts "To get help about all subcommands, type:"
+puts "redis-cli --cluster help".bold
+puts ''
+exit 1
diff --git a/src/redisassert.c b/src/redisassert.c
new file mode 100644
index 0000000..9f7402e
--- /dev/null
+++ b/src/redisassert.c
@@ -0,0 +1,53 @@
+/* redisassert.c -- Implement the default _serverAssert and _serverPanic which
+ * simply print stack trace to standard error stream.
+ *
+ * This file is shared by those modules that try to print some logs about stack trace
+ * but don't have their own implementations of functions in redisassert.h.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2021, Andy Pan <panjf2000@gmail.com> and Redis Labs
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+void _serverAssert(const char *estr, const char *file, int line) {
+ fprintf(stderr, "=== ASSERTION FAILED ===");
+ fprintf(stderr, "==> %s:%d '%s' is not true",file,line,estr);
+ raise(SIGSEGV);
+}
+
+void _serverPanic(const char *file, int line, const char *msg, ...) {
+ fprintf(stderr, "------------------------------------------------");
+ fprintf(stderr, "!!! Software Failure. Press left mouse button to continue");
+ fprintf(stderr, "Guru Meditation: %s #%s:%d",msg,file,line);
+ abort();
+}
diff --git a/src/redisassert.h b/src/redisassert.h
new file mode 100644
index 0000000..a3f95da
--- /dev/null
+++ b/src/redisassert.h
@@ -0,0 +1,49 @@
+/* redisassert.h -- Drop in replacements assert.h that prints the stack trace
+ * in the Redis logs.
+ *
+ * This file should be included instead of "assert.h" inside libraries used by
+ * Redis that are using assertions, so instead of Redis disappearing with
+ * SIGABORT, we get the details and stack trace inside the log file.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __REDIS_ASSERT_H__
+#define __REDIS_ASSERT_H__
+
+#include "config.h"
+
+#define assert(_e) (likely((_e))?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),redis_unreachable()))
+#define panic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),redis_unreachable()
+
+void _serverAssert(const char *estr, const char *file, int line);
+void _serverPanic(const char *file, int line, const char *msg, ...);
+
+#endif
diff --git a/src/redismodule.h b/src/redismodule.h
new file mode 100644
index 0000000..4378126
--- /dev/null
+++ b/src/redismodule.h
@@ -0,0 +1,1685 @@
+#ifndef REDISMODULE_H
+#define REDISMODULE_H
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+
+typedef struct RedisModuleString RedisModuleString;
+typedef struct RedisModuleKey RedisModuleKey;
+
+/* -------------- Defines NOT common between core and modules ------------- */
+
+#if defined REDISMODULE_CORE
+/* Things only defined for the modules core (server), not exported to modules
+ * that include this file. */
+
+#define RedisModuleString robj
+
+#endif /* defined REDISMODULE_CORE */
+
+#if !defined REDISMODULE_CORE && !defined REDISMODULE_CORE_MODULE
+/* Things defined for modules, but not for core-modules. */
+
+typedef long long mstime_t;
+typedef long long ustime_t;
+
+#endif /* !defined REDISMODULE_CORE && !defined REDISMODULE_CORE_MODULE */
+
+/* ---------------- Defines common between core and modules --------------- */
+
+/* Error status return values. */
+#define REDISMODULE_OK 0
+#define REDISMODULE_ERR 1
+
+/* Module Based Authentication status return values. */
+#define REDISMODULE_AUTH_HANDLED 0
+#define REDISMODULE_AUTH_NOT_HANDLED 1
+
+/* API versions. */
+#define REDISMODULE_APIVER_1 1
+
+/* Version of the RedisModuleTypeMethods structure. Once the RedisModuleTypeMethods
+ * structure is changed, this version number needs to be changed synchronistically. */
+#define REDISMODULE_TYPE_METHOD_VERSION 5
+
+/* API flags and constants */
+#define REDISMODULE_READ (1<<0)
+#define REDISMODULE_WRITE (1<<1)
+
+/* RedisModule_OpenKey extra flags for the 'mode' argument.
+ * Avoid touching the LRU/LFU of the key when opened. */
+#define REDISMODULE_OPEN_KEY_NOTOUCH (1<<16)
+/* Don't trigger keyspace event on key misses. */
+#define REDISMODULE_OPEN_KEY_NONOTIFY (1<<17)
+/* Don't update keyspace hits/misses counters. */
+#define REDISMODULE_OPEN_KEY_NOSTATS (1<<18)
+/* Avoid deleting lazy expired keys. */
+#define REDISMODULE_OPEN_KEY_NOEXPIRE (1<<19)
+/* Avoid any effects from fetching the key */
+#define REDISMODULE_OPEN_KEY_NOEFFECTS (1<<20)
+/* Mask of all REDISMODULE_OPEN_KEY_* values. Any new mode should be added to this list.
+ * Should not be used directly by the module, use RM_GetOpenKeyModesAll instead.
+ * Located here so when we will add new modes we will not forget to update it. */
+#define _REDISMODULE_OPEN_KEY_ALL REDISMODULE_READ | REDISMODULE_WRITE | REDISMODULE_OPEN_KEY_NOTOUCH | REDISMODULE_OPEN_KEY_NONOTIFY | REDISMODULE_OPEN_KEY_NOSTATS | REDISMODULE_OPEN_KEY_NOEXPIRE | REDISMODULE_OPEN_KEY_NOEFFECTS
+
+/* List push and pop */
+#define REDISMODULE_LIST_HEAD 0
+#define REDISMODULE_LIST_TAIL 1
+
+/* Key types. */
+#define REDISMODULE_KEYTYPE_EMPTY 0
+#define REDISMODULE_KEYTYPE_STRING 1
+#define REDISMODULE_KEYTYPE_LIST 2
+#define REDISMODULE_KEYTYPE_HASH 3
+#define REDISMODULE_KEYTYPE_SET 4
+#define REDISMODULE_KEYTYPE_ZSET 5
+#define REDISMODULE_KEYTYPE_MODULE 6
+#define REDISMODULE_KEYTYPE_STREAM 7
+
+/* Reply types. */
+#define REDISMODULE_REPLY_UNKNOWN -1
+#define REDISMODULE_REPLY_STRING 0
+#define REDISMODULE_REPLY_ERROR 1
+#define REDISMODULE_REPLY_INTEGER 2
+#define REDISMODULE_REPLY_ARRAY 3
+#define REDISMODULE_REPLY_NULL 4
+#define REDISMODULE_REPLY_MAP 5
+#define REDISMODULE_REPLY_SET 6
+#define REDISMODULE_REPLY_BOOL 7
+#define REDISMODULE_REPLY_DOUBLE 8
+#define REDISMODULE_REPLY_BIG_NUMBER 9
+#define REDISMODULE_REPLY_VERBATIM_STRING 10
+#define REDISMODULE_REPLY_ATTRIBUTE 11
+#define REDISMODULE_REPLY_PROMISE 12
+
+/* Postponed array length. */
+#define REDISMODULE_POSTPONED_ARRAY_LEN -1 /* Deprecated, please use REDISMODULE_POSTPONED_LEN */
+#define REDISMODULE_POSTPONED_LEN -1
+
+/* Expire */
+#define REDISMODULE_NO_EXPIRE -1
+
+/* Sorted set API flags. */
+#define REDISMODULE_ZADD_XX (1<<0)
+#define REDISMODULE_ZADD_NX (1<<1)
+#define REDISMODULE_ZADD_ADDED (1<<2)
+#define REDISMODULE_ZADD_UPDATED (1<<3)
+#define REDISMODULE_ZADD_NOP (1<<4)
+#define REDISMODULE_ZADD_GT (1<<5)
+#define REDISMODULE_ZADD_LT (1<<6)
+
+/* Hash API flags. */
+#define REDISMODULE_HASH_NONE 0
+#define REDISMODULE_HASH_NX (1<<0)
+#define REDISMODULE_HASH_XX (1<<1)
+#define REDISMODULE_HASH_CFIELDS (1<<2)
+#define REDISMODULE_HASH_EXISTS (1<<3)
+#define REDISMODULE_HASH_COUNT_ALL (1<<4)
+
+#define REDISMODULE_CONFIG_DEFAULT 0 /* This is the default for a module config. */
+#define REDISMODULE_CONFIG_IMMUTABLE (1ULL<<0) /* Can this value only be set at startup? */
+#define REDISMODULE_CONFIG_SENSITIVE (1ULL<<1) /* Does this value contain sensitive information */
+#define REDISMODULE_CONFIG_HIDDEN (1ULL<<4) /* This config is hidden in `config get <pattern>` (used for tests/debugging) */
+#define REDISMODULE_CONFIG_PROTECTED (1ULL<<5) /* Becomes immutable if enable-protected-configs is enabled. */
+#define REDISMODULE_CONFIG_DENY_LOADING (1ULL<<6) /* This config is forbidden during loading. */
+
+#define REDISMODULE_CONFIG_MEMORY (1ULL<<7) /* Indicates if this value can be set as a memory value */
+#define REDISMODULE_CONFIG_BITFLAGS (1ULL<<8) /* Indicates if this value can be set as a multiple enum values */
+
+/* StreamID type. */
+typedef struct RedisModuleStreamID {
+ uint64_t ms;
+ uint64_t seq;
+} RedisModuleStreamID;
+
+/* StreamAdd() flags. */
+#define REDISMODULE_STREAM_ADD_AUTOID (1<<0)
+/* StreamIteratorStart() flags. */
+#define REDISMODULE_STREAM_ITERATOR_EXCLUSIVE (1<<0)
+#define REDISMODULE_STREAM_ITERATOR_REVERSE (1<<1)
+/* StreamIteratorTrim*() flags. */
+#define REDISMODULE_STREAM_TRIM_APPROX (1<<0)
+
+/* Context Flags: Info about the current context returned by
+ * RM_GetContextFlags(). */
+
+/* The command is running in the context of a Lua script */
+#define REDISMODULE_CTX_FLAGS_LUA (1<<0)
+/* The command is running inside a Redis transaction */
+#define REDISMODULE_CTX_FLAGS_MULTI (1<<1)
+/* The instance is a master */
+#define REDISMODULE_CTX_FLAGS_MASTER (1<<2)
+/* The instance is a slave */
+#define REDISMODULE_CTX_FLAGS_SLAVE (1<<3)
+/* The instance is read-only (usually meaning it's a slave as well) */
+#define REDISMODULE_CTX_FLAGS_READONLY (1<<4)
+/* The instance is running in cluster mode */
+#define REDISMODULE_CTX_FLAGS_CLUSTER (1<<5)
+/* The instance has AOF enabled */
+#define REDISMODULE_CTX_FLAGS_AOF (1<<6)
+/* The instance has RDB enabled */
+#define REDISMODULE_CTX_FLAGS_RDB (1<<7)
+/* The instance has Maxmemory set */
+#define REDISMODULE_CTX_FLAGS_MAXMEMORY (1<<8)
+/* Maxmemory is set and has an eviction policy that may delete keys */
+#define REDISMODULE_CTX_FLAGS_EVICT (1<<9)
+/* Redis is out of memory according to the maxmemory flag. */
+#define REDISMODULE_CTX_FLAGS_OOM (1<<10)
+/* Less than 25% of memory available according to maxmemory. */
+#define REDISMODULE_CTX_FLAGS_OOM_WARNING (1<<11)
+/* The command was sent over the replication link. */
+#define REDISMODULE_CTX_FLAGS_REPLICATED (1<<12)
+/* Redis is currently loading either from AOF or RDB. */
+#define REDISMODULE_CTX_FLAGS_LOADING (1<<13)
+/* The replica has no link with its master, note that
+ * there is the inverse flag as well:
+ *
+ * REDISMODULE_CTX_FLAGS_REPLICA_IS_ONLINE
+ *
+ * The two flags are exclusive, one or the other can be set. */
+#define REDISMODULE_CTX_FLAGS_REPLICA_IS_STALE (1<<14)
+/* The replica is trying to connect with the master.
+ * (REPL_STATE_CONNECT and REPL_STATE_CONNECTING states) */
+#define REDISMODULE_CTX_FLAGS_REPLICA_IS_CONNECTING (1<<15)
+/* THe replica is receiving an RDB file from its master. */
+#define REDISMODULE_CTX_FLAGS_REPLICA_IS_TRANSFERRING (1<<16)
+/* The replica is online, receiving updates from its master. */
+#define REDISMODULE_CTX_FLAGS_REPLICA_IS_ONLINE (1<<17)
+/* There is currently some background process active. */
+#define REDISMODULE_CTX_FLAGS_ACTIVE_CHILD (1<<18)
+/* The next EXEC will fail due to dirty CAS (touched keys). */
+#define REDISMODULE_CTX_FLAGS_MULTI_DIRTY (1<<19)
+/* Redis is currently running inside background child process. */
+#define REDISMODULE_CTX_FLAGS_IS_CHILD (1<<20)
+/* The current client does not allow blocking, either called from
+ * within multi, lua, or from another module using RM_Call */
+#define REDISMODULE_CTX_FLAGS_DENY_BLOCKING (1<<21)
+/* The current client uses RESP3 protocol */
+#define REDISMODULE_CTX_FLAGS_RESP3 (1<<22)
+/* Redis is currently async loading database for diskless replication. */
+#define REDISMODULE_CTX_FLAGS_ASYNC_LOADING (1<<23)
+/* Redis is starting. */
+#define REDISMODULE_CTX_FLAGS_SERVER_STARTUP (1<<24)
+
+/* Next context flag, must be updated when adding new flags above!
+This flag should not be used directly by the module.
+ * Use RedisModule_GetContextFlagsAll instead. */
+#define _REDISMODULE_CTX_FLAGS_NEXT (1<<25)
+
+/* Keyspace changes notification classes. Every class is associated with a
+ * character for configuration purposes.
+ * NOTE: These have to be in sync with NOTIFY_* in server.h */
+#define REDISMODULE_NOTIFY_KEYSPACE (1<<0) /* K */
+#define REDISMODULE_NOTIFY_KEYEVENT (1<<1) /* E */
+#define REDISMODULE_NOTIFY_GENERIC (1<<2) /* g */
+#define REDISMODULE_NOTIFY_STRING (1<<3) /* $ */
+#define REDISMODULE_NOTIFY_LIST (1<<4) /* l */
+#define REDISMODULE_NOTIFY_SET (1<<5) /* s */
+#define REDISMODULE_NOTIFY_HASH (1<<6) /* h */
+#define REDISMODULE_NOTIFY_ZSET (1<<7) /* z */
+#define REDISMODULE_NOTIFY_EXPIRED (1<<8) /* x */
+#define REDISMODULE_NOTIFY_EVICTED (1<<9) /* e */
+#define REDISMODULE_NOTIFY_STREAM (1<<10) /* t */
+#define REDISMODULE_NOTIFY_KEY_MISS (1<<11) /* m (Note: This one is excluded from REDISMODULE_NOTIFY_ALL on purpose) */
+#define REDISMODULE_NOTIFY_LOADED (1<<12) /* module only key space notification, indicate a key loaded from rdb */
+#define REDISMODULE_NOTIFY_MODULE (1<<13) /* d, module key space notification */
+#define REDISMODULE_NOTIFY_NEW (1<<14) /* n, new key notification */
+
+/* Next notification flag, must be updated when adding new flags above!
+This flag should not be used directly by the module.
+ * Use RedisModule_GetKeyspaceNotificationFlagsAll instead. */
+#define _REDISMODULE_NOTIFY_NEXT (1<<15)
+
+#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM | REDISMODULE_NOTIFY_MODULE) /* A */
+
+/* A special pointer that we can use between the core and the module to signal
+ * field deletion, and that is impossible to be a valid pointer. */
+#define REDISMODULE_HASH_DELETE ((RedisModuleString*)(long)1)
+
+/* Error messages. */
+#define REDISMODULE_ERRORMSG_WRONGTYPE "WRONGTYPE Operation against a key holding the wrong kind of value"
+
+#define REDISMODULE_POSITIVE_INFINITE (1.0/0.0)
+#define REDISMODULE_NEGATIVE_INFINITE (-1.0/0.0)
+
+/* Cluster API defines. */
+#define REDISMODULE_NODE_ID_LEN 40
+#define REDISMODULE_NODE_MYSELF (1<<0)
+#define REDISMODULE_NODE_MASTER (1<<1)
+#define REDISMODULE_NODE_SLAVE (1<<2)
+#define REDISMODULE_NODE_PFAIL (1<<3)
+#define REDISMODULE_NODE_FAIL (1<<4)
+#define REDISMODULE_NODE_NOFAILOVER (1<<5)
+
+#define REDISMODULE_CLUSTER_FLAG_NONE 0
+#define REDISMODULE_CLUSTER_FLAG_NO_FAILOVER (1<<1)
+#define REDISMODULE_CLUSTER_FLAG_NO_REDIRECTION (1<<2)
+
+#define REDISMODULE_NOT_USED(V) ((void) V)
+
+/* Logging level strings */
+#define REDISMODULE_LOGLEVEL_DEBUG "debug"
+#define REDISMODULE_LOGLEVEL_VERBOSE "verbose"
+#define REDISMODULE_LOGLEVEL_NOTICE "notice"
+#define REDISMODULE_LOGLEVEL_WARNING "warning"
+
+/* Bit flags for aux_save_triggers and the aux_load and aux_save callbacks */
+#define REDISMODULE_AUX_BEFORE_RDB (1<<0)
+#define REDISMODULE_AUX_AFTER_RDB (1<<1)
+
+/* RM_Yield flags */
+#define REDISMODULE_YIELD_FLAG_NONE (1<<0)
+#define REDISMODULE_YIELD_FLAG_CLIENTS (1<<1)
+
+/* RM_BlockClientOnKeysWithFlags flags */
+#define REDISMODULE_BLOCK_UNBLOCK_DEFAULT (0)
+#define REDISMODULE_BLOCK_UNBLOCK_DELETED (1<<0)
+
+/* This type represents a timer handle, and is returned when a timer is
+ * registered and used in order to invalidate a timer. It's just a 64 bit
+ * number, because this is how each timer is represented inside the radix tree
+ * of timers that are going to expire, sorted by expire time. */
+typedef uint64_t RedisModuleTimerID;
+
+/* CommandFilter Flags */
+
+/* Do filter RedisModule_Call() commands initiated by module itself. */
+#define REDISMODULE_CMDFILTER_NOSELF (1<<0)
+
+/* Declare that the module can handle errors with RedisModule_SetModuleOptions. */
+#define REDISMODULE_OPTIONS_HANDLE_IO_ERRORS (1<<0)
+
+/* When set, Redis will not call RedisModule_SignalModifiedKey(), implicitly in
+ * RedisModule_CloseKey, and the module needs to do that when manually when keys
+ * are modified from the user's perspective, to invalidate WATCH. */
+#define REDISMODULE_OPTION_NO_IMPLICIT_SIGNAL_MODIFIED (1<<1)
+
+/* Declare that the module can handle diskless async replication with RedisModule_SetModuleOptions. */
+#define REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD (1<<2)
+
+/* Declare that the module want to get nested key space notifications.
+ * If enabled, the module is responsible to break endless loop. */
+#define REDISMODULE_OPTIONS_ALLOW_NESTED_KEYSPACE_NOTIFICATIONS (1<<3)
+
+/* Next option flag, must be updated when adding new module flags above!
+ * This flag should not be used directly by the module.
+ * Use RedisModule_GetModuleOptionsAll instead. */
+#define _REDISMODULE_OPTIONS_FLAGS_NEXT (1<<4)
+
+/* Definitions for RedisModule_SetCommandInfo. */
+
+typedef enum {
+ REDISMODULE_ARG_TYPE_STRING,
+ REDISMODULE_ARG_TYPE_INTEGER,
+ REDISMODULE_ARG_TYPE_DOUBLE,
+ REDISMODULE_ARG_TYPE_KEY, /* A string, but represents a keyname */
+ REDISMODULE_ARG_TYPE_PATTERN,
+ REDISMODULE_ARG_TYPE_UNIX_TIME,
+ REDISMODULE_ARG_TYPE_PURE_TOKEN,
+ REDISMODULE_ARG_TYPE_ONEOF, /* Must have sub-arguments */
+ REDISMODULE_ARG_TYPE_BLOCK /* Must have sub-arguments */
+} RedisModuleCommandArgType;
+
+#define REDISMODULE_CMD_ARG_NONE (0)
+#define REDISMODULE_CMD_ARG_OPTIONAL (1<<0) /* The argument is optional (like GET in SET command) */
+#define REDISMODULE_CMD_ARG_MULTIPLE (1<<1) /* The argument may repeat itself (like key in DEL) */
+#define REDISMODULE_CMD_ARG_MULTIPLE_TOKEN (1<<2) /* The argument may repeat itself, and so does its token (like `GET pattern` in SORT) */
+#define _REDISMODULE_CMD_ARG_NEXT (1<<3)
+
+typedef enum {
+ REDISMODULE_KSPEC_BS_INVALID = 0, /* Must be zero. An implicitly value of
+ * zero is provided when the field is
+ * absent in a struct literal. */
+ REDISMODULE_KSPEC_BS_UNKNOWN,
+ REDISMODULE_KSPEC_BS_INDEX,
+ REDISMODULE_KSPEC_BS_KEYWORD
+} RedisModuleKeySpecBeginSearchType;
+
+typedef enum {
+ REDISMODULE_KSPEC_FK_OMITTED = 0, /* Used when the field is absent in a
+ * struct literal. Don't use this value
+ * explicitly. */
+ REDISMODULE_KSPEC_FK_UNKNOWN,
+ REDISMODULE_KSPEC_FK_RANGE,
+ REDISMODULE_KSPEC_FK_KEYNUM
+} RedisModuleKeySpecFindKeysType;
+
+/* Key-spec flags. For details, see the documentation of
+ * RedisModule_SetCommandInfo and the key-spec flags in server.h. */
+#define REDISMODULE_CMD_KEY_RO (1ULL<<0)
+#define REDISMODULE_CMD_KEY_RW (1ULL<<1)
+#define REDISMODULE_CMD_KEY_OW (1ULL<<2)
+#define REDISMODULE_CMD_KEY_RM (1ULL<<3)
+#define REDISMODULE_CMD_KEY_ACCESS (1ULL<<4)
+#define REDISMODULE_CMD_KEY_UPDATE (1ULL<<5)
+#define REDISMODULE_CMD_KEY_INSERT (1ULL<<6)
+#define REDISMODULE_CMD_KEY_DELETE (1ULL<<7)
+#define REDISMODULE_CMD_KEY_NOT_KEY (1ULL<<8)
+#define REDISMODULE_CMD_KEY_INCOMPLETE (1ULL<<9)
+#define REDISMODULE_CMD_KEY_VARIABLE_FLAGS (1ULL<<10)
+
+/* Channel flags, for details see the documentation of
+ * RedisModule_ChannelAtPosWithFlags. */
+#define REDISMODULE_CMD_CHANNEL_PATTERN (1ULL<<0)
+#define REDISMODULE_CMD_CHANNEL_PUBLISH (1ULL<<1)
+#define REDISMODULE_CMD_CHANNEL_SUBSCRIBE (1ULL<<2)
+#define REDISMODULE_CMD_CHANNEL_UNSUBSCRIBE (1ULL<<3)
+
+typedef struct RedisModuleCommandArg {
+ const char *name;
+ RedisModuleCommandArgType type;
+ int key_spec_index; /* If type is KEY, this is a zero-based index of
+ * the key_spec in the command. For other types,
+ * you may specify -1. */
+ const char *token; /* If type is PURE_TOKEN, this is the token. */
+ const char *summary;
+ const char *since;
+ int flags; /* The REDISMODULE_CMD_ARG_* macros. */
+ const char *deprecated_since;
+ struct RedisModuleCommandArg *subargs;
+ const char *display_text;
+} RedisModuleCommandArg;
+
+typedef struct {
+ const char *since;
+ const char *changes;
+} RedisModuleCommandHistoryEntry;
+
+typedef struct {
+ const char *notes;
+ uint64_t flags; /* REDISMODULE_CMD_KEY_* macros. */
+ RedisModuleKeySpecBeginSearchType begin_search_type;
+ union {
+ struct {
+ /* The index from which we start the search for keys */
+ int pos;
+ } index;
+ struct {
+ /* The keyword that indicates the beginning of key args */
+ const char *keyword;
+ /* An index in argv from which to start searching.
+ * Can be negative, which means start search from the end, in reverse
+ * (Example: -2 means to start in reverse from the penultimate arg) */
+ int startfrom;
+ } keyword;
+ } bs;
+ RedisModuleKeySpecFindKeysType find_keys_type;
+ union {
+ struct {
+ /* Index of the last key relative to the result of the begin search
+ * step. Can be negative, in which case it's not relative. -1
+ * indicating till the last argument, -2 one before the last and so
+ * on. */
+ int lastkey;
+ /* How many args should we skip after finding a key, in order to
+ * find the next one. */
+ int keystep;
+ /* If lastkey is -1, we use limit to stop the search by a factor. 0
+ * and 1 mean no limit. 2 means 1/2 of the remaining args, 3 means
+ * 1/3, and so on. */
+ int limit;
+ } range;
+ struct {
+ /* Index of the argument containing the number of keys to come
+ * relative to the result of the begin search step */
+ int keynumidx;
+ /* Index of the fist key. (Usually it's just after keynumidx, in
+ * which case it should be set to keynumidx + 1.) */
+ int firstkey;
+ /* How many args should we skip after finding a key, in order to
+ * find the next one, relative to the result of the begin search
+ * step. */
+ int keystep;
+ } keynum;
+ } fk;
+} RedisModuleCommandKeySpec;
+
+typedef struct {
+ int version;
+ size_t sizeof_historyentry;
+ size_t sizeof_keyspec;
+ size_t sizeof_arg;
+} RedisModuleCommandInfoVersion;
+
+static const RedisModuleCommandInfoVersion RedisModule_CurrentCommandInfoVersion = {
+ .version = 1,
+ .sizeof_historyentry = sizeof(RedisModuleCommandHistoryEntry),
+ .sizeof_keyspec = sizeof(RedisModuleCommandKeySpec),
+ .sizeof_arg = sizeof(RedisModuleCommandArg)
+};
+
+#define REDISMODULE_COMMAND_INFO_VERSION (&RedisModule_CurrentCommandInfoVersion)
+
+typedef struct {
+ /* Always set version to REDISMODULE_COMMAND_INFO_VERSION */
+ const RedisModuleCommandInfoVersion *version;
+ /* Version 1 fields (added in Redis 7.0.0) */
+ const char *summary; /* Summary of the command */
+ const char *complexity; /* Complexity description */
+ const char *since; /* Debut module version of the command */
+ RedisModuleCommandHistoryEntry *history; /* History */
+ /* A string of space-separated tips meant for clients/proxies regarding this
+ * command */
+ const char *tips;
+ /* Number of arguments, it is possible to use -N to say >= N */
+ int arity;
+ RedisModuleCommandKeySpec *key_specs;
+ RedisModuleCommandArg *args;
+} RedisModuleCommandInfo;
+
+/* Eventloop definitions. */
+#define REDISMODULE_EVENTLOOP_READABLE 1
+#define REDISMODULE_EVENTLOOP_WRITABLE 2
+typedef void (*RedisModuleEventLoopFunc)(int fd, void *user_data, int mask);
+typedef void (*RedisModuleEventLoopOneShotFunc)(void *user_data);
+
+/* Server events definitions.
+ * Those flags should not be used directly by the module, instead
+ * the module should use RedisModuleEvent_* variables.
+ * Note: This must be synced with moduleEventVersions */
+#define REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED 0
+#define REDISMODULE_EVENT_PERSISTENCE 1
+#define REDISMODULE_EVENT_FLUSHDB 2
+#define REDISMODULE_EVENT_LOADING 3
+#define REDISMODULE_EVENT_CLIENT_CHANGE 4
+#define REDISMODULE_EVENT_SHUTDOWN 5
+#define REDISMODULE_EVENT_REPLICA_CHANGE 6
+#define REDISMODULE_EVENT_MASTER_LINK_CHANGE 7
+#define REDISMODULE_EVENT_CRON_LOOP 8
+#define REDISMODULE_EVENT_MODULE_CHANGE 9
+#define REDISMODULE_EVENT_LOADING_PROGRESS 10
+#define REDISMODULE_EVENT_SWAPDB 11
+#define REDISMODULE_EVENT_REPL_BACKUP 12 /* Deprecated since Redis 7.0, not used anymore. */
+#define REDISMODULE_EVENT_FORK_CHILD 13
+#define REDISMODULE_EVENT_REPL_ASYNC_LOAD 14
+#define REDISMODULE_EVENT_EVENTLOOP 15
+#define REDISMODULE_EVENT_CONFIG 16
+#define REDISMODULE_EVENT_KEY 17
+#define _REDISMODULE_EVENT_NEXT 18 /* Next event flag, should be updated if a new event added. */
+
+typedef struct RedisModuleEvent {
+ uint64_t id; /* REDISMODULE_EVENT_... defines. */
+ uint64_t dataver; /* Version of the structure we pass as 'data'. */
+} RedisModuleEvent;
+
+struct RedisModuleCtx;
+struct RedisModuleDefragCtx;
+typedef void (*RedisModuleEventCallback)(struct RedisModuleCtx *ctx, RedisModuleEvent eid, uint64_t subevent, void *data);
+
+/* IMPORTANT: When adding a new version of one of below structures that contain
+ * event data (RedisModuleFlushInfoV1 for example) we have to avoid renaming the
+ * old RedisModuleEvent structure.
+ * For example, if we want to add RedisModuleFlushInfoV2, the RedisModuleEvent
+ * structures should be:
+ * RedisModuleEvent_FlushDB = {
+ * REDISMODULE_EVENT_FLUSHDB,
+ * 1
+ * },
+ * RedisModuleEvent_FlushDBV2 = {
+ * REDISMODULE_EVENT_FLUSHDB,
+ * 2
+ * }
+ * and NOT:
+ * RedisModuleEvent_FlushDBV1 = {
+ * REDISMODULE_EVENT_FLUSHDB,
+ * 1
+ * },
+ * RedisModuleEvent_FlushDB = {
+ * REDISMODULE_EVENT_FLUSHDB,
+ * 2
+ * }
+ * The reason for that is forward-compatibility: We want that module that
+ * compiled with a new redismodule.h to be able to work with a old server,
+ * unless the author explicitly decided to use the newer event type.
+ */
+static const RedisModuleEvent
+ RedisModuleEvent_ReplicationRoleChanged = {
+ REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED,
+ 1
+ },
+ RedisModuleEvent_Persistence = {
+ REDISMODULE_EVENT_PERSISTENCE,
+ 1
+ },
+ RedisModuleEvent_FlushDB = {
+ REDISMODULE_EVENT_FLUSHDB,
+ 1
+ },
+ RedisModuleEvent_Loading = {
+ REDISMODULE_EVENT_LOADING,
+ 1
+ },
+ RedisModuleEvent_ClientChange = {
+ REDISMODULE_EVENT_CLIENT_CHANGE,
+ 1
+ },
+ RedisModuleEvent_Shutdown = {
+ REDISMODULE_EVENT_SHUTDOWN,
+ 1
+ },
+ RedisModuleEvent_ReplicaChange = {
+ REDISMODULE_EVENT_REPLICA_CHANGE,
+ 1
+ },
+ RedisModuleEvent_CronLoop = {
+ REDISMODULE_EVENT_CRON_LOOP,
+ 1
+ },
+ RedisModuleEvent_MasterLinkChange = {
+ REDISMODULE_EVENT_MASTER_LINK_CHANGE,
+ 1
+ },
+ RedisModuleEvent_ModuleChange = {
+ REDISMODULE_EVENT_MODULE_CHANGE,
+ 1
+ },
+ RedisModuleEvent_LoadingProgress = {
+ REDISMODULE_EVENT_LOADING_PROGRESS,
+ 1
+ },
+ RedisModuleEvent_SwapDB = {
+ REDISMODULE_EVENT_SWAPDB,
+ 1
+ },
+ /* Deprecated since Redis 7.0, not used anymore. */
+ __attribute__ ((deprecated))
+ RedisModuleEvent_ReplBackup = {
+ REDISMODULE_EVENT_REPL_BACKUP,
+ 1
+ },
+ RedisModuleEvent_ReplAsyncLoad = {
+ REDISMODULE_EVENT_REPL_ASYNC_LOAD,
+ 1
+ },
+ RedisModuleEvent_ForkChild = {
+ REDISMODULE_EVENT_FORK_CHILD,
+ 1
+ },
+ RedisModuleEvent_EventLoop = {
+ REDISMODULE_EVENT_EVENTLOOP,
+ 1
+ },
+ RedisModuleEvent_Config = {
+ REDISMODULE_EVENT_CONFIG,
+ 1
+ },
+ RedisModuleEvent_Key = {
+ REDISMODULE_EVENT_KEY,
+ 1
+ };
+
+/* Those are values that are used for the 'subevent' callback argument. */
+#define REDISMODULE_SUBEVENT_PERSISTENCE_RDB_START 0
+#define REDISMODULE_SUBEVENT_PERSISTENCE_AOF_START 1
+#define REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_RDB_START 2
+#define REDISMODULE_SUBEVENT_PERSISTENCE_ENDED 3
+#define REDISMODULE_SUBEVENT_PERSISTENCE_FAILED 4
+#define REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_AOF_START 5
+#define _REDISMODULE_SUBEVENT_PERSISTENCE_NEXT 6
+
+#define REDISMODULE_SUBEVENT_LOADING_RDB_START 0
+#define REDISMODULE_SUBEVENT_LOADING_AOF_START 1
+#define REDISMODULE_SUBEVENT_LOADING_REPL_START 2
+#define REDISMODULE_SUBEVENT_LOADING_ENDED 3
+#define REDISMODULE_SUBEVENT_LOADING_FAILED 4
+#define _REDISMODULE_SUBEVENT_LOADING_NEXT 5
+
+#define REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED 0
+#define REDISMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED 1
+#define _REDISMODULE_SUBEVENT_CLIENT_CHANGE_NEXT 2
+
+#define REDISMODULE_SUBEVENT_MASTER_LINK_UP 0
+#define REDISMODULE_SUBEVENT_MASTER_LINK_DOWN 1
+#define _REDISMODULE_SUBEVENT_MASTER_NEXT 2
+
+#define REDISMODULE_SUBEVENT_REPLICA_CHANGE_ONLINE 0
+#define REDISMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE 1
+#define _REDISMODULE_SUBEVENT_REPLICA_CHANGE_NEXT 2
+
+#define REDISMODULE_EVENT_REPLROLECHANGED_NOW_MASTER 0
+#define REDISMODULE_EVENT_REPLROLECHANGED_NOW_REPLICA 1
+#define _REDISMODULE_EVENT_REPLROLECHANGED_NEXT 2
+
+#define REDISMODULE_SUBEVENT_FLUSHDB_START 0
+#define REDISMODULE_SUBEVENT_FLUSHDB_END 1
+#define _REDISMODULE_SUBEVENT_FLUSHDB_NEXT 2
+
+#define REDISMODULE_SUBEVENT_MODULE_LOADED 0
+#define REDISMODULE_SUBEVENT_MODULE_UNLOADED 1
+#define _REDISMODULE_SUBEVENT_MODULE_NEXT 2
+
+#define REDISMODULE_SUBEVENT_CONFIG_CHANGE 0
+#define _REDISMODULE_SUBEVENT_CONFIG_NEXT 1
+
+#define REDISMODULE_SUBEVENT_LOADING_PROGRESS_RDB 0
+#define REDISMODULE_SUBEVENT_LOADING_PROGRESS_AOF 1
+#define _REDISMODULE_SUBEVENT_LOADING_PROGRESS_NEXT 2
+
+/* Replication Backup events are deprecated since Redis 7.0 and are never fired. */
+#define REDISMODULE_SUBEVENT_REPL_BACKUP_CREATE 0
+#define REDISMODULE_SUBEVENT_REPL_BACKUP_RESTORE 1
+#define REDISMODULE_SUBEVENT_REPL_BACKUP_DISCARD 2
+#define _REDISMODULE_SUBEVENT_REPL_BACKUP_NEXT 3
+
+#define REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_STARTED 0
+#define REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_ABORTED 1
+#define REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_COMPLETED 2
+#define _REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_NEXT 3
+
+#define REDISMODULE_SUBEVENT_FORK_CHILD_BORN 0
+#define REDISMODULE_SUBEVENT_FORK_CHILD_DIED 1
+#define _REDISMODULE_SUBEVENT_FORK_CHILD_NEXT 2
+
+#define REDISMODULE_SUBEVENT_EVENTLOOP_BEFORE_SLEEP 0
+#define REDISMODULE_SUBEVENT_EVENTLOOP_AFTER_SLEEP 1
+#define _REDISMODULE_SUBEVENT_EVENTLOOP_NEXT 2
+
+#define REDISMODULE_SUBEVENT_KEY_DELETED 0
+#define REDISMODULE_SUBEVENT_KEY_EXPIRED 1
+#define REDISMODULE_SUBEVENT_KEY_EVICTED 2
+#define REDISMODULE_SUBEVENT_KEY_OVERWRITTEN 3
+#define _REDISMODULE_SUBEVENT_KEY_NEXT 4
+
+#define _REDISMODULE_SUBEVENT_SHUTDOWN_NEXT 0
+#define _REDISMODULE_SUBEVENT_CRON_LOOP_NEXT 0
+#define _REDISMODULE_SUBEVENT_SWAPDB_NEXT 0
+
+/* RedisModuleClientInfo flags. */
+#define REDISMODULE_CLIENTINFO_FLAG_SSL (1<<0)
+#define REDISMODULE_CLIENTINFO_FLAG_PUBSUB (1<<1)
+#define REDISMODULE_CLIENTINFO_FLAG_BLOCKED (1<<2)
+#define REDISMODULE_CLIENTINFO_FLAG_TRACKING (1<<3)
+#define REDISMODULE_CLIENTINFO_FLAG_UNIXSOCKET (1<<4)
+#define REDISMODULE_CLIENTINFO_FLAG_MULTI (1<<5)
+
+/* Here we take all the structures that the module pass to the core
+ * and the other way around. Notably the list here contains the structures
+ * used by the hooks API RedisModule_RegisterToServerEvent().
+ *
+ * The structures always start with a 'version' field. This is useful
+ * when we want to pass a reference to the structure to the core APIs,
+ * for the APIs to fill the structure. In that case, the structure 'version'
+ * field is initialized before passing it to the core, so that the core is
+ * able to cast the pointer to the appropriate structure version. In this
+ * way we obtain ABI compatibility.
+ *
+ * Here we'll list all the structure versions in case they evolve over time,
+ * however using a define, we'll make sure to use the last version as the
+ * public name for the module to use. */
+
+#define REDISMODULE_CLIENTINFO_VERSION 1
+typedef struct RedisModuleClientInfo {
+ uint64_t version; /* Version of this structure for ABI compat. */
+ uint64_t flags; /* REDISMODULE_CLIENTINFO_FLAG_* */
+ uint64_t id; /* Client ID. */
+ char addr[46]; /* IPv4 or IPv6 address. */
+ uint16_t port; /* TCP port. */
+ uint16_t db; /* Selected DB. */
+} RedisModuleClientInfoV1;
+
+#define RedisModuleClientInfo RedisModuleClientInfoV1
+
+#define REDISMODULE_CLIENTINFO_INITIALIZER_V1 { .version = 1 }
+
+#define REDISMODULE_REPLICATIONINFO_VERSION 1
+typedef struct RedisModuleReplicationInfo {
+ uint64_t version; /* Not used since this structure is never passed
+ from the module to the core right now. Here
+ for future compatibility. */
+ int master; /* true if master, false if replica */
+ char *masterhost; /* master instance hostname for NOW_REPLICA */
+ int masterport; /* master instance port for NOW_REPLICA */
+ char *replid1; /* Main replication ID */
+ char *replid2; /* Secondary replication ID */
+ uint64_t repl1_offset; /* Main replication offset */
+ uint64_t repl2_offset; /* Offset of replid2 validity */
+} RedisModuleReplicationInfoV1;
+
+#define RedisModuleReplicationInfo RedisModuleReplicationInfoV1
+
+#define REDISMODULE_FLUSHINFO_VERSION 1
+typedef struct RedisModuleFlushInfo {
+ uint64_t version; /* Not used since this structure is never passed
+ from the module to the core right now. Here
+ for future compatibility. */
+ int32_t sync; /* Synchronous or threaded flush?. */
+ int32_t dbnum; /* Flushed database number, -1 for ALL. */
+} RedisModuleFlushInfoV1;
+
+#define RedisModuleFlushInfo RedisModuleFlushInfoV1
+
+#define REDISMODULE_MODULE_CHANGE_VERSION 1
+typedef struct RedisModuleModuleChange {
+ uint64_t version; /* Not used since this structure is never passed
+ from the module to the core right now. Here
+ for future compatibility. */
+ const char* module_name;/* Name of module loaded or unloaded. */
+ int32_t module_version; /* Module version. */
+} RedisModuleModuleChangeV1;
+
+#define RedisModuleModuleChange RedisModuleModuleChangeV1
+
+#define REDISMODULE_CONFIGCHANGE_VERSION 1
+typedef struct RedisModuleConfigChange {
+ uint64_t version; /* Not used since this structure is never passed
+ from the module to the core right now. Here
+ for future compatibility. */
+ uint32_t num_changes; /* how many redis config options were changed */
+ const char **config_names; /* the config names that were changed */
+} RedisModuleConfigChangeV1;
+
+#define RedisModuleConfigChange RedisModuleConfigChangeV1
+
+#define REDISMODULE_CRON_LOOP_VERSION 1
+typedef struct RedisModuleCronLoopInfo {
+ uint64_t version; /* Not used since this structure is never passed
+ from the module to the core right now. Here
+ for future compatibility. */
+ int32_t hz; /* Approximate number of events per second. */
+} RedisModuleCronLoopV1;
+
+#define RedisModuleCronLoop RedisModuleCronLoopV1
+
+#define REDISMODULE_LOADING_PROGRESS_VERSION 1
+typedef struct RedisModuleLoadingProgressInfo {
+ uint64_t version; /* Not used since this structure is never passed
+ from the module to the core right now. Here
+ for future compatibility. */
+ int32_t hz; /* Approximate number of events per second. */
+ int32_t progress; /* Approximate progress between 0 and 1024, or -1
+ * if unknown. */
+} RedisModuleLoadingProgressV1;
+
+#define RedisModuleLoadingProgress RedisModuleLoadingProgressV1
+
+#define REDISMODULE_SWAPDBINFO_VERSION 1
+typedef struct RedisModuleSwapDbInfo {
+ uint64_t version; /* Not used since this structure is never passed
+ from the module to the core right now. Here
+ for future compatibility. */
+ int32_t dbnum_first; /* Swap Db first dbnum */
+ int32_t dbnum_second; /* Swap Db second dbnum */
+} RedisModuleSwapDbInfoV1;
+
+#define RedisModuleSwapDbInfo RedisModuleSwapDbInfoV1
+
+#define REDISMODULE_KEYINFO_VERSION 1
+typedef struct RedisModuleKeyInfo {
+ uint64_t version; /* Not used since this structure is never passed
+ from the module to the core right now. Here
+ for future compatibility. */
+ RedisModuleKey *key; /* Opened key. */
+} RedisModuleKeyInfoV1;
+
+#define RedisModuleKeyInfo RedisModuleKeyInfoV1
+
+typedef enum {
+ REDISMODULE_ACL_LOG_AUTH = 0, /* Authentication failure */
+ REDISMODULE_ACL_LOG_CMD, /* Command authorization failure */
+ REDISMODULE_ACL_LOG_KEY, /* Key authorization failure */
+ REDISMODULE_ACL_LOG_CHANNEL /* Channel authorization failure */
+} RedisModuleACLLogEntryReason;
+
+/* Incomplete structures needed by both the core and modules. */
+typedef struct RedisModuleIO RedisModuleIO;
+typedef struct RedisModuleDigest RedisModuleDigest;
+typedef struct RedisModuleInfoCtx RedisModuleInfoCtx;
+typedef struct RedisModuleDefragCtx RedisModuleDefragCtx;
+
+/* Function pointers needed by both the core and modules, these needs to be
+ * exposed since you can't cast a function pointer to (void *). */
+typedef void (*RedisModuleInfoFunc)(RedisModuleInfoCtx *ctx, int for_crash_report);
+typedef void (*RedisModuleDefragFunc)(RedisModuleDefragCtx *ctx);
+typedef void (*RedisModuleUserChangedFunc) (uint64_t client_id, void *privdata);
+
+/* ------------------------- End of common defines ------------------------ */
+
+/* ----------- The rest of the defines are only for modules ----------------- */
+#if !defined REDISMODULE_CORE || defined REDISMODULE_CORE_MODULE
+/* Things defined for modules and core-modules. */
+
+/* Macro definitions specific to individual compilers */
+#ifndef REDISMODULE_ATTR_UNUSED
+# ifdef __GNUC__
+# define REDISMODULE_ATTR_UNUSED __attribute__((unused))
+# else
+# define REDISMODULE_ATTR_UNUSED
+# endif
+#endif
+
+#ifndef REDISMODULE_ATTR_PRINTF
+# ifdef __GNUC__
+# define REDISMODULE_ATTR_PRINTF(idx,cnt) __attribute__((format(printf,idx,cnt)))
+# else
+# define REDISMODULE_ATTR_PRINTF(idx,cnt)
+# endif
+#endif
+
+#ifndef REDISMODULE_ATTR_COMMON
+# if defined(__GNUC__) && !(defined(__clang__) && defined(__cplusplus))
+# define REDISMODULE_ATTR_COMMON __attribute__((__common__))
+# else
+# define REDISMODULE_ATTR_COMMON
+# endif
+#endif
+
+/* Incomplete structures for compiler checks but opaque access. */
+typedef struct RedisModuleCtx RedisModuleCtx;
+typedef struct RedisModuleCommand RedisModuleCommand;
+typedef struct RedisModuleCallReply RedisModuleCallReply;
+typedef struct RedisModuleType RedisModuleType;
+typedef struct RedisModuleBlockedClient RedisModuleBlockedClient;
+typedef struct RedisModuleClusterInfo RedisModuleClusterInfo;
+typedef struct RedisModuleDict RedisModuleDict;
+typedef struct RedisModuleDictIter RedisModuleDictIter;
+typedef struct RedisModuleCommandFilterCtx RedisModuleCommandFilterCtx;
+typedef struct RedisModuleCommandFilter RedisModuleCommandFilter;
+typedef struct RedisModuleServerInfoData RedisModuleServerInfoData;
+typedef struct RedisModuleScanCursor RedisModuleScanCursor;
+typedef struct RedisModuleUser RedisModuleUser;
+typedef struct RedisModuleKeyOptCtx RedisModuleKeyOptCtx;
+typedef struct RedisModuleRdbStream RedisModuleRdbStream;
+
+typedef int (*RedisModuleCmdFunc)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc);
+typedef void (*RedisModuleDisconnectFunc)(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc);
+typedef int (*RedisModuleNotificationFunc)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key);
+typedef void (*RedisModulePostNotificationJobFunc) (RedisModuleCtx *ctx, void *pd);
+typedef void *(*RedisModuleTypeLoadFunc)(RedisModuleIO *rdb, int encver);
+typedef void (*RedisModuleTypeSaveFunc)(RedisModuleIO *rdb, void *value);
+typedef int (*RedisModuleTypeAuxLoadFunc)(RedisModuleIO *rdb, int encver, int when);
+typedef void (*RedisModuleTypeAuxSaveFunc)(RedisModuleIO *rdb, int when);
+typedef void (*RedisModuleTypeRewriteFunc)(RedisModuleIO *aof, RedisModuleString *key, void *value);
+typedef size_t (*RedisModuleTypeMemUsageFunc)(const void *value);
+typedef size_t (*RedisModuleTypeMemUsageFunc2)(RedisModuleKeyOptCtx *ctx, const void *value, size_t sample_size);
+typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value);
+typedef void (*RedisModuleTypeFreeFunc)(void *value);
+typedef size_t (*RedisModuleTypeFreeEffortFunc)(RedisModuleString *key, const void *value);
+typedef size_t (*RedisModuleTypeFreeEffortFunc2)(RedisModuleKeyOptCtx *ctx, const void *value);
+typedef void (*RedisModuleTypeUnlinkFunc)(RedisModuleString *key, const void *value);
+typedef void (*RedisModuleTypeUnlinkFunc2)(RedisModuleKeyOptCtx *ctx, const void *value);
+typedef void *(*RedisModuleTypeCopyFunc)(RedisModuleString *fromkey, RedisModuleString *tokey, const void *value);
+typedef void *(*RedisModuleTypeCopyFunc2)(RedisModuleKeyOptCtx *ctx, const void *value);
+typedef int (*RedisModuleTypeDefragFunc)(RedisModuleDefragCtx *ctx, RedisModuleString *key, void **value);
+typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len);
+typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data);
+typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCommandFilterCtx *filter);
+typedef void (*RedisModuleForkDoneHandler) (int exitcode, int bysignal, void *user_data);
+typedef void (*RedisModuleScanCB)(RedisModuleCtx *ctx, RedisModuleString *keyname, RedisModuleKey *key, void *privdata);
+typedef void (*RedisModuleScanKeyCB)(RedisModuleKey *key, RedisModuleString *field, RedisModuleString *value, void *privdata);
+typedef RedisModuleString * (*RedisModuleConfigGetStringFunc)(const char *name, void *privdata);
+typedef long long (*RedisModuleConfigGetNumericFunc)(const char *name, void *privdata);
+typedef int (*RedisModuleConfigGetBoolFunc)(const char *name, void *privdata);
+typedef int (*RedisModuleConfigGetEnumFunc)(const char *name, void *privdata);
+typedef int (*RedisModuleConfigSetStringFunc)(const char *name, RedisModuleString *val, void *privdata, RedisModuleString **err);
+typedef int (*RedisModuleConfigSetNumericFunc)(const char *name, long long val, void *privdata, RedisModuleString **err);
+typedef int (*RedisModuleConfigSetBoolFunc)(const char *name, int val, void *privdata, RedisModuleString **err);
+typedef int (*RedisModuleConfigSetEnumFunc)(const char *name, int val, void *privdata, RedisModuleString **err);
+typedef int (*RedisModuleConfigApplyFunc)(RedisModuleCtx *ctx, void *privdata, RedisModuleString **err);
+typedef void (*RedisModuleOnUnblocked)(RedisModuleCtx *ctx, RedisModuleCallReply *reply, void *private_data);
+typedef int (*RedisModuleAuthCallback)(RedisModuleCtx *ctx, RedisModuleString *username, RedisModuleString *password, RedisModuleString **err);
+
+typedef struct RedisModuleTypeMethods {
+ uint64_t version;
+ RedisModuleTypeLoadFunc rdb_load;
+ RedisModuleTypeSaveFunc rdb_save;
+ RedisModuleTypeRewriteFunc aof_rewrite;
+ RedisModuleTypeMemUsageFunc mem_usage;
+ RedisModuleTypeDigestFunc digest;
+ RedisModuleTypeFreeFunc free;
+ RedisModuleTypeAuxLoadFunc aux_load;
+ RedisModuleTypeAuxSaveFunc aux_save;
+ int aux_save_triggers;
+ RedisModuleTypeFreeEffortFunc free_effort;
+ RedisModuleTypeUnlinkFunc unlink;
+ RedisModuleTypeCopyFunc copy;
+ RedisModuleTypeDefragFunc defrag;
+ RedisModuleTypeMemUsageFunc2 mem_usage2;
+ RedisModuleTypeFreeEffortFunc2 free_effort2;
+ RedisModuleTypeUnlinkFunc2 unlink2;
+ RedisModuleTypeCopyFunc2 copy2;
+ RedisModuleTypeAuxSaveFunc aux_save2;
+} RedisModuleTypeMethods;
+
+#define REDISMODULE_GET_API(name) \
+ RedisModule_GetApi("RedisModule_" #name, ((void **)&RedisModule_ ## name))
+
+/* Default API declaration prefix (not 'extern' for backwards compatibility) */
+#ifndef REDISMODULE_API
+#define REDISMODULE_API
+#endif
+
+/* Default API declaration suffix (compiler attributes) */
+#ifndef REDISMODULE_ATTR
+#define REDISMODULE_ATTR REDISMODULE_ATTR_COMMON
+#endif
+
+REDISMODULE_API void * (*RedisModule_Alloc)(size_t bytes) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_TryAlloc)(size_t bytes) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_Realloc)(void *ptr, size_t bytes) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_Free)(void *ptr) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_Calloc)(size_t nmemb, size_t size) REDISMODULE_ATTR;
+REDISMODULE_API char * (*RedisModule_Strdup)(const char *str) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetApi)(const char *, void *) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CreateCommand)(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleCommand *(*RedisModule_GetCommand)(RedisModuleCtx *ctx, const char *name) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CreateSubcommand)(RedisModuleCommand *parent, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SetCommandInfo)(RedisModuleCommand *command, const RedisModuleCommandInfo *info) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SetCommandACLCategories)(RedisModuleCommand *command, const char *ctgrsflags) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SetModuleAttribs)(RedisModuleCtx *ctx, const char *name, int ver, int apiver) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_IsModuleNameBusy)(const char *name) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_WrongArity)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithLongLong)(RedisModuleCtx *ctx, long long ll) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetSelectedDb)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SelectDb)(RedisModuleCtx *ctx, int newid) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_KeyExists)(RedisModuleCtx *ctx, RedisModuleString *keyname) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleKey * (*RedisModule_OpenKey)(RedisModuleCtx *ctx, RedisModuleString *keyname, int mode) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetOpenKeyModesAll)(void) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_CloseKey)(RedisModuleKey *kp) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_KeyType)(RedisModuleKey *kp) REDISMODULE_ATTR;
+REDISMODULE_API size_t (*RedisModule_ValueLength)(RedisModuleKey *kp) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ListPush)(RedisModuleKey *kp, int where, RedisModuleString *ele) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_ListPop)(RedisModuleKey *key, int where) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_ListGet)(RedisModuleKey *key, long index) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ListSet)(RedisModuleKey *key, long index, RedisModuleString *value) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ListInsert)(RedisModuleKey *key, long index, RedisModuleString *value) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ListDelete)(RedisModuleKey *key, long index) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleCallReply * (*RedisModule_Call)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) REDISMODULE_ATTR;
+REDISMODULE_API const char * (*RedisModule_CallReplyProto)(RedisModuleCallReply *reply, size_t *len) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_FreeCallReply)(RedisModuleCallReply *reply) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CallReplyType)(RedisModuleCallReply *reply) REDISMODULE_ATTR;
+REDISMODULE_API long long (*RedisModule_CallReplyInteger)(RedisModuleCallReply *reply) REDISMODULE_ATTR;
+REDISMODULE_API double (*RedisModule_CallReplyDouble)(RedisModuleCallReply *reply) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CallReplyBool)(RedisModuleCallReply *reply) REDISMODULE_ATTR;
+REDISMODULE_API const char* (*RedisModule_CallReplyBigNumber)(RedisModuleCallReply *reply, size_t *len) REDISMODULE_ATTR;
+REDISMODULE_API const char* (*RedisModule_CallReplyVerbatim)(RedisModuleCallReply *reply, size_t *len, const char **format) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleCallReply * (*RedisModule_CallReplySetElement)(RedisModuleCallReply *reply, size_t idx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CallReplyMapElement)(RedisModuleCallReply *reply, size_t idx, RedisModuleCallReply **key, RedisModuleCallReply **val) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CallReplyAttributeElement)(RedisModuleCallReply *reply, size_t idx, RedisModuleCallReply **key, RedisModuleCallReply **val) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_CallReplyPromiseSetUnblockHandler)(RedisModuleCallReply *reply, RedisModuleOnUnblocked on_unblock, void *private_data) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CallReplyPromiseAbort)(RedisModuleCallReply *reply, void **private_data) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleCallReply * (*RedisModule_CallReplyAttribute)(RedisModuleCallReply *reply) REDISMODULE_ATTR;
+REDISMODULE_API size_t (*RedisModule_CallReplyLength)(RedisModuleCallReply *reply) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleCallReply * (*RedisModule_CallReplyArrayElement)(RedisModuleCallReply *reply, size_t idx) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CreateString)(RedisModuleCtx *ctx, const char *ptr, size_t len) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromLongLong)(RedisModuleCtx *ctx, long long ll) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromULongLong)(RedisModuleCtx *ctx, unsigned long long ull) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromDouble)(RedisModuleCtx *ctx, double d) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromLongDouble)(RedisModuleCtx *ctx, long double ld, int humanfriendly) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromString)(RedisModuleCtx *ctx, const RedisModuleString *str) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromStreamID)(RedisModuleCtx *ctx, const RedisModuleStreamID *id) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringPrintf)(RedisModuleCtx *ctx, const char *fmt, ...) REDISMODULE_ATTR_PRINTF(2,3) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_FreeString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR;
+REDISMODULE_API const char * (*RedisModule_StringPtrLen)(const RedisModuleString *str, size_t *len) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithError)(RedisModuleCtx *ctx, const char *err) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithErrorFormat)(RedisModuleCtx *ctx, const char *fmt, ...) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithSimpleString)(RedisModuleCtx *ctx, const char *msg) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithArray)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithMap)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithSet)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithAttribute)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithNullArray)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithEmptyArray)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ReplySetArrayLength)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ReplySetMapLength)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ReplySetSetLength)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ReplySetAttributeLength)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ReplySetPushLength)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithStringBuffer)(RedisModuleCtx *ctx, const char *buf, size_t len) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithCString)(RedisModuleCtx *ctx, const char *buf) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithEmptyString)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithVerbatimString)(RedisModuleCtx *ctx, const char *buf, size_t len) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithVerbatimStringType)(RedisModuleCtx *ctx, const char *buf, size_t len, const char *ext) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithNull)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithBool)(RedisModuleCtx *ctx, int b) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithLongDouble)(RedisModuleCtx *ctx, long double d) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithDouble)(RedisModuleCtx *ctx, double d) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithBigNumber)(RedisModuleCtx *ctx, const char *bignum, size_t len) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplyWithCallReply)(RedisModuleCtx *ctx, RedisModuleCallReply *reply) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StringToLongLong)(const RedisModuleString *str, long long *ll) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StringToULongLong)(const RedisModuleString *str, unsigned long long *ull) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StringToDouble)(const RedisModuleString *str, double *d) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StringToLongDouble)(const RedisModuleString *str, long double *d) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StringToStreamID)(const RedisModuleString *str, RedisModuleStreamID *id) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_AutoMemory)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_Replicate)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ReplicateVerbatim)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API const char * (*RedisModule_CallReplyStringPtr)(RedisModuleCallReply *reply, size_t *len) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromCallReply)(RedisModuleCallReply *reply) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DeleteKey)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_UnlinkKey)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StringSet)(RedisModuleKey *key, RedisModuleString *str) REDISMODULE_ATTR;
+REDISMODULE_API char * (*RedisModule_StringDMA)(RedisModuleKey *key, size_t *len, int mode) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StringTruncate)(RedisModuleKey *key, size_t newlen) REDISMODULE_ATTR;
+REDISMODULE_API mstime_t (*RedisModule_GetExpire)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SetExpire)(RedisModuleKey *key, mstime_t expire) REDISMODULE_ATTR;
+REDISMODULE_API mstime_t (*RedisModule_GetAbsExpire)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SetAbsExpire)(RedisModuleKey *key, mstime_t expire) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ResetDataset)(int restart_aof, int async) REDISMODULE_ATTR;
+REDISMODULE_API unsigned long long (*RedisModule_DbSize)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_RandomKey)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetAdd)(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetIncrby)(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr, double *newscore) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetScore)(RedisModuleKey *key, RedisModuleString *ele, double *score) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetRem)(RedisModuleKey *key, RedisModuleString *ele, int *deleted) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ZsetRangeStop)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetFirstInScoreRange)(RedisModuleKey *key, double min, double max, int minex, int maxex) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetLastInScoreRange)(RedisModuleKey *key, double min, double max, int minex, int maxex) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetFirstInLexRange)(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetLastInLexRange)(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_ZsetRangeCurrentElement)(RedisModuleKey *key, double *score) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetRangeNext)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetRangePrev)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ZsetRangeEndReached)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_HashSet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_HashGet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamAdd)(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, int64_t numfields) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamDelete)(RedisModuleKey *key, RedisModuleStreamID *id) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamIteratorStart)(RedisModuleKey *key, int flags, RedisModuleStreamID *startid, RedisModuleStreamID *endid) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamIteratorStop)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamIteratorNextID)(RedisModuleKey *key, RedisModuleStreamID *id, long *numfields) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamIteratorNextField)(RedisModuleKey *key, RedisModuleString **field_ptr, RedisModuleString **value_ptr) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StreamIteratorDelete)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API long long (*RedisModule_StreamTrimByLength)(RedisModuleKey *key, int flags, long long length) REDISMODULE_ATTR;
+REDISMODULE_API long long (*RedisModule_StreamTrimByID)(RedisModuleKey *key, int flags, RedisModuleStreamID *id) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_IsKeysPositionRequest)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_KeyAtPos)(RedisModuleCtx *ctx, int pos) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_KeyAtPosWithFlags)(RedisModuleCtx *ctx, int pos, int flags) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_IsChannelsPositionRequest)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ChannelAtPosWithFlags)(RedisModuleCtx *ctx, int pos, int flags) REDISMODULE_ATTR;
+REDISMODULE_API unsigned long long (*RedisModule_GetClientId)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_GetClientUserNameById)(RedisModuleCtx *ctx, uint64_t id) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetClientInfoById)(void *ci, uint64_t id) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_GetClientNameById)(RedisModuleCtx *ctx, uint64_t id) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SetClientNameById)(uint64_t id, RedisModuleString *name) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_PublishMessage)(RedisModuleCtx *ctx, RedisModuleString *channel, RedisModuleString *message) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_PublishMessageShard)(RedisModuleCtx *ctx, RedisModuleString *channel, RedisModuleString *message) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetContextFlags)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_AvoidReplicaTraffic)(void) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_PoolAlloc)(RedisModuleCtx *ctx, size_t bytes) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleType * (*RedisModule_CreateDataType)(RedisModuleCtx *ctx, const char *name, int encver, RedisModuleTypeMethods *typemethods) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ModuleTypeSetValue)(RedisModuleKey *key, RedisModuleType *mt, void *value) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ModuleTypeReplaceValue)(RedisModuleKey *key, RedisModuleType *mt, void *new_value, void **old_value) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleType * (*RedisModule_ModuleTypeGetType)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_ModuleTypeGetValue)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_IsIOError)(RedisModuleIO *io) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SetModuleOptions)(RedisModuleCtx *ctx, int options) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SignalModifiedKey)(RedisModuleCtx *ctx, RedisModuleString *keyname) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SaveUnsigned)(RedisModuleIO *io, uint64_t value) REDISMODULE_ATTR;
+REDISMODULE_API uint64_t (*RedisModule_LoadUnsigned)(RedisModuleIO *io) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SaveSigned)(RedisModuleIO *io, int64_t value) REDISMODULE_ATTR;
+REDISMODULE_API int64_t (*RedisModule_LoadSigned)(RedisModuleIO *io) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_EmitAOF)(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SaveString)(RedisModuleIO *io, RedisModuleString *s) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SaveStringBuffer)(RedisModuleIO *io, const char *str, size_t len) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_LoadString)(RedisModuleIO *io) REDISMODULE_ATTR;
+REDISMODULE_API char * (*RedisModule_LoadStringBuffer)(RedisModuleIO *io, size_t *lenptr) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SaveDouble)(RedisModuleIO *io, double value) REDISMODULE_ATTR;
+REDISMODULE_API double (*RedisModule_LoadDouble)(RedisModuleIO *io) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SaveFloat)(RedisModuleIO *io, float value) REDISMODULE_ATTR;
+REDISMODULE_API float (*RedisModule_LoadFloat)(RedisModuleIO *io) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SaveLongDouble)(RedisModuleIO *io, long double value) REDISMODULE_ATTR;
+REDISMODULE_API long double (*RedisModule_LoadLongDouble)(RedisModuleIO *io) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_LoadDataTypeFromString)(const RedisModuleString *str, const RedisModuleType *mt) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_LoadDataTypeFromStringEncver)(const RedisModuleString *str, const RedisModuleType *mt, int encver) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_SaveDataTypeToString)(RedisModuleCtx *ctx, void *data, const RedisModuleType *mt) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...) REDISMODULE_ATTR REDISMODULE_ATTR_PRINTF(3,4);
+REDISMODULE_API void (*RedisModule_LogIOError)(RedisModuleIO *io, const char *levelstr, const char *fmt, ...) REDISMODULE_ATTR REDISMODULE_ATTR_PRINTF(3,4);
+REDISMODULE_API void (*RedisModule__Assert)(const char *estr, const char *file, int line) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_LatencyAddSample)(const char *event, mstime_t latency) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_TrimStringAllocation)(RedisModuleString *str) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_HoldString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StringCompare)(const RedisModuleString *a, const RedisModuleString *b) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleCtx * (*RedisModule_GetContextFromIO)(RedisModuleIO *io) REDISMODULE_ATTR;
+REDISMODULE_API const RedisModuleString * (*RedisModule_GetKeyNameFromIO)(RedisModuleIO *io) REDISMODULE_ATTR;
+REDISMODULE_API const RedisModuleString * (*RedisModule_GetKeyNameFromModuleKey)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetDbIdFromModuleKey)(RedisModuleKey *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetDbIdFromIO)(RedisModuleIO *io) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetDbIdFromOptCtx)(RedisModuleKeyOptCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetToDbIdFromOptCtx)(RedisModuleKeyOptCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API const RedisModuleString * (*RedisModule_GetKeyNameFromOptCtx)(RedisModuleKeyOptCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API const RedisModuleString * (*RedisModule_GetToKeyNameFromOptCtx)(RedisModuleKeyOptCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API mstime_t (*RedisModule_Milliseconds)(void) REDISMODULE_ATTR;
+REDISMODULE_API uint64_t (*RedisModule_MonotonicMicroseconds)(void) REDISMODULE_ATTR;
+REDISMODULE_API ustime_t (*RedisModule_Microseconds)(void) REDISMODULE_ATTR;
+REDISMODULE_API ustime_t (*RedisModule_CachedMicroseconds)(void) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_DigestAddStringBuffer)(RedisModuleDigest *md, const char *ele, size_t len) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_DigestAddLongLong)(RedisModuleDigest *md, long long ele) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_DigestEndSequence)(RedisModuleDigest *md) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetDbIdFromDigest)(RedisModuleDigest *dig) REDISMODULE_ATTR;
+REDISMODULE_API const RedisModuleString * (*RedisModule_GetKeyNameFromDigest)(RedisModuleDigest *dig) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleDict * (*RedisModule_CreateDict)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_FreeDict)(RedisModuleCtx *ctx, RedisModuleDict *d) REDISMODULE_ATTR;
+REDISMODULE_API uint64_t (*RedisModule_DictSize)(RedisModuleDict *d) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DictSetC)(RedisModuleDict *d, void *key, size_t keylen, void *ptr) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DictReplaceC)(RedisModuleDict *d, void *key, size_t keylen, void *ptr) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DictSet)(RedisModuleDict *d, RedisModuleString *key, void *ptr) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DictReplace)(RedisModuleDict *d, RedisModuleString *key, void *ptr) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_DictGetC)(RedisModuleDict *d, void *key, size_t keylen, int *nokey) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_DictGet)(RedisModuleDict *d, RedisModuleString *key, int *nokey) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DictDelC)(RedisModuleDict *d, void *key, size_t keylen, void *oldval) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DictDel)(RedisModuleDict *d, RedisModuleString *key, void *oldval) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleDictIter * (*RedisModule_DictIteratorStartC)(RedisModuleDict *d, const char *op, void *key, size_t keylen) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleDictIter * (*RedisModule_DictIteratorStart)(RedisModuleDict *d, const char *op, RedisModuleString *key) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_DictIteratorStop)(RedisModuleDictIter *di) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DictIteratorReseekC)(RedisModuleDictIter *di, const char *op, void *key, size_t keylen) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DictIteratorReseek)(RedisModuleDictIter *di, const char *op, RedisModuleString *key) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_DictNextC)(RedisModuleDictIter *di, size_t *keylen, void **dataptr) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_DictPrevC)(RedisModuleDictIter *di, size_t *keylen, void **dataptr) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_DictNext)(RedisModuleCtx *ctx, RedisModuleDictIter *di, void **dataptr) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_DictPrev)(RedisModuleCtx *ctx, RedisModuleDictIter *di, void **dataptr) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DictCompareC)(RedisModuleDictIter *di, const char *op, void *key, size_t keylen) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DictCompare)(RedisModuleDictIter *di, const char *op, RedisModuleString *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_RegisterInfoFunc)(RedisModuleCtx *ctx, RedisModuleInfoFunc cb) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_RegisterAuthCallback)(RedisModuleCtx *ctx, RedisModuleAuthCallback cb) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_InfoAddSection)(RedisModuleInfoCtx *ctx, const char *name) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_InfoBeginDictField)(RedisModuleInfoCtx *ctx, const char *name) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_InfoEndDictField)(RedisModuleInfoCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_InfoAddFieldString)(RedisModuleInfoCtx *ctx, const char *field, RedisModuleString *value) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_InfoAddFieldCString)(RedisModuleInfoCtx *ctx, const char *field,const char *value) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_InfoAddFieldDouble)(RedisModuleInfoCtx *ctx, const char *field, double value) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_InfoAddFieldLongLong)(RedisModuleInfoCtx *ctx, const char *field, long long value) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_InfoAddFieldULongLong)(RedisModuleInfoCtx *ctx, const char *field, unsigned long long value) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleServerInfoData * (*RedisModule_GetServerInfo)(RedisModuleCtx *ctx, const char *section) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_FreeServerInfo)(RedisModuleCtx *ctx, RedisModuleServerInfoData *data) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_ServerInfoGetField)(RedisModuleCtx *ctx, RedisModuleServerInfoData *data, const char* field) REDISMODULE_ATTR;
+REDISMODULE_API const char * (*RedisModule_ServerInfoGetFieldC)(RedisModuleServerInfoData *data, const char* field) REDISMODULE_ATTR;
+REDISMODULE_API long long (*RedisModule_ServerInfoGetFieldSigned)(RedisModuleServerInfoData *data, const char* field, int *out_err) REDISMODULE_ATTR;
+REDISMODULE_API unsigned long long (*RedisModule_ServerInfoGetFieldUnsigned)(RedisModuleServerInfoData *data, const char* field, int *out_err) REDISMODULE_ATTR;
+REDISMODULE_API double (*RedisModule_ServerInfoGetFieldDouble)(RedisModuleServerInfoData *data, const char* field, int *out_err) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SubscribeToServerEvent)(RedisModuleCtx *ctx, RedisModuleEvent event, RedisModuleEventCallback callback) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SetLRU)(RedisModuleKey *key, mstime_t lru_idle) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetLRU)(RedisModuleKey *key, mstime_t *lru_idle) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SetLFU)(RedisModuleKey *key, long long lfu_freq) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetLFU)(RedisModuleKey *key, long long *lfu_freq) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_BlockClientOnKeys)(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*), long long timeout_ms, RedisModuleString **keys, int numkeys, void *privdata) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_BlockClientOnKeysWithFlags)(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*), long long timeout_ms, RedisModuleString **keys, int numkeys, void *privdata, int flags) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SignalKeyAsReady)(RedisModuleCtx *ctx, RedisModuleString *key) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_GetBlockedClientReadyKey)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleScanCursor * (*RedisModule_ScanCursorCreate)(void) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ScanCursorRestart)(RedisModuleScanCursor *cursor) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ScanCursorDestroy)(RedisModuleScanCursor *cursor) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_Scan)(RedisModuleCtx *ctx, RedisModuleScanCursor *cursor, RedisModuleScanCB fn, void *privdata) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ScanKey)(RedisModuleKey *key, RedisModuleScanCursor *cursor, RedisModuleScanKeyCB fn, void *privdata) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetContextFlagsAll)(void) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetModuleOptionsAll)(void) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetKeyspaceNotificationFlagsAll)(void) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_IsSubEventSupported)(RedisModuleEvent event, uint64_t subevent) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetServerVersion)(void) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetTypeMethodVersion)(void) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_Yield)(RedisModuleCtx *ctx, int flags, const char *busy_reply) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_BlockClient)(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*), long long timeout_ms) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_BlockClientGetPrivateData)(RedisModuleBlockedClient *blocked_client) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_BlockClientSetPrivateData)(RedisModuleBlockedClient *blocked_client, void *private_data) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_BlockClientOnAuth)(RedisModuleCtx *ctx, RedisModuleAuthCallback reply_callback, void (*free_privdata)(RedisModuleCtx*,void*)) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_UnblockClient)(RedisModuleBlockedClient *bc, void *privdata) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_IsBlockedReplyRequest)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_GetBlockedClientHandle)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_AbortBlock)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_BlockedClientMeasureTimeStart)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_BlockedClientMeasureTimeEnd)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleCtx * (*RedisModule_GetThreadSafeContext)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleCtx * (*RedisModule_GetDetachedThreadSafeContext)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_FreeThreadSafeContext)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ThreadSafeContextLock)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ThreadSafeContextTryLock)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ThreadSafeContextUnlock)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SubscribeToKeyspaceEvents)(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc cb) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_AddPostNotificationJob)(RedisModuleCtx *ctx, RedisModulePostNotificationJobFunc callback, void *pd, void (*free_pd)(void*)) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_NotifyKeyspaceEvent)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetNotifyKeyspaceEvents)(void) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_BlockedClientDisconnected)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_RegisterClusterMessageReceiver)(RedisModuleCtx *ctx, uint8_t type, RedisModuleClusterMessageReceiver callback) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SendClusterMessage)(RedisModuleCtx *ctx, const char *target_id, uint8_t type, const char *msg, uint32_t len) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetClusterNodeInfo)(RedisModuleCtx *ctx, const char *id, char *ip, char *master_id, int *port, int *flags) REDISMODULE_ATTR;
+REDISMODULE_API char ** (*RedisModule_GetClusterNodesList)(RedisModuleCtx *ctx, size_t *numnodes) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_FreeClusterNodesList)(char **ids) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleTimerID (*RedisModule_CreateTimer)(RedisModuleCtx *ctx, mstime_t period, RedisModuleTimerProc callback, void *data) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_StopTimer)(RedisModuleCtx *ctx, RedisModuleTimerID id, void **data) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetTimerInfo)(RedisModuleCtx *ctx, RedisModuleTimerID id, uint64_t *remaining, void **data) REDISMODULE_ATTR;
+REDISMODULE_API const char * (*RedisModule_GetMyClusterID)(void) REDISMODULE_ATTR;
+REDISMODULE_API size_t (*RedisModule_GetClusterSize)(void) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_GetRandomBytes)(unsigned char *dst, size_t len) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_GetRandomHexChars)(char *dst, size_t len) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SetDisconnectCallback)(RedisModuleBlockedClient *bc, RedisModuleDisconnectFunc callback) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint64_t flags) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleCommandFilter * (*RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb, int flags) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_UnregisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilter *filter) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CommandFilterArgsCount)(RedisModuleCommandFilterCtx *fctx) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_CommandFilterArgGet)(RedisModuleCommandFilterCtx *fctx, int pos) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CommandFilterArgInsert)(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CommandFilterArgReplace)(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_CommandFilterArgDelete)(RedisModuleCommandFilterCtx *fctx, int pos) REDISMODULE_ATTR;
+REDISMODULE_API unsigned long long (*RedisModule_CommandFilterGetClientId)(RedisModuleCommandFilterCtx *fctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_Fork)(RedisModuleForkDoneHandler cb, void *user_data) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SendChildHeartbeat)(double progress) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ExitFromChild)(int retcode) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_KillForkChild)(int child_pid) REDISMODULE_ATTR;
+REDISMODULE_API float (*RedisModule_GetUsedMemoryRatio)(void) REDISMODULE_ATTR;
+REDISMODULE_API size_t (*RedisModule_MallocSize)(void* ptr) REDISMODULE_ATTR;
+REDISMODULE_API size_t (*RedisModule_MallocUsableSize)(void *ptr) REDISMODULE_ATTR;
+REDISMODULE_API size_t (*RedisModule_MallocSizeString)(RedisModuleString* str) REDISMODULE_ATTR;
+REDISMODULE_API size_t (*RedisModule_MallocSizeDict)(RedisModuleDict* dict) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleUser * (*RedisModule_CreateModuleUser)(const char *name) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_FreeModuleUser)(RedisModuleUser *user) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_SetContextUser)(RedisModuleCtx *ctx, const RedisModuleUser *user) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SetModuleUserACL)(RedisModuleUser *user, const char* acl) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_SetModuleUserACLString)(RedisModuleCtx * ctx, RedisModuleUser *user, const char* acl, RedisModuleString **error) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_GetModuleUserACLString)(RedisModuleUser *user) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_GetCurrentUserName)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleUser * (*RedisModule_GetModuleUserFromUserName)(RedisModuleString *name) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ACLCheckCommandPermissions)(RedisModuleUser *user, RedisModuleString **argv, int argc) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ACLCheckKeyPermissions)(RedisModuleUser *user, RedisModuleString *key, int flags) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_ACLCheckChannelPermissions)(RedisModuleUser *user, RedisModuleString *ch, int literal) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ACLAddLogEntry)(RedisModuleCtx *ctx, RedisModuleUser *user, RedisModuleString *object, RedisModuleACLLogEntryReason reason) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_ACLAddLogEntryByUserName)(RedisModuleCtx *ctx, RedisModuleString *user, RedisModuleString *object, RedisModuleACLLogEntryReason reason) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_AuthenticateClientWithACLUser)(RedisModuleCtx *ctx, const char *name, size_t len, RedisModuleUserChangedFunc callback, void *privdata, uint64_t *client_id) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_AuthenticateClientWithUser)(RedisModuleCtx *ctx, RedisModuleUser *user, RedisModuleUserChangedFunc callback, void *privdata, uint64_t *client_id) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DeauthenticateAndCloseClient)(RedisModuleCtx *ctx, uint64_t client_id) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_RedactClientCommandArgument)(RedisModuleCtx *ctx, int pos) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString * (*RedisModule_GetClientCertificate)(RedisModuleCtx *ctx, uint64_t id) REDISMODULE_ATTR;
+REDISMODULE_API int *(*RedisModule_GetCommandKeys)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, int *num_keys) REDISMODULE_ATTR;
+REDISMODULE_API int *(*RedisModule_GetCommandKeysWithFlags)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, int *num_keys, int **out_flags) REDISMODULE_ATTR;
+REDISMODULE_API const char *(*RedisModule_GetCurrentCommandName)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_RegisterDefragFunc)(RedisModuleCtx *ctx, RedisModuleDefragFunc func) REDISMODULE_ATTR;
+REDISMODULE_API void *(*RedisModule_DefragAlloc)(RedisModuleDefragCtx *ctx, void *ptr) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleString *(*RedisModule_DefragRedisModuleString)(RedisModuleDefragCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DefragShouldStop)(RedisModuleDefragCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DefragCursorSet)(RedisModuleDefragCtx *ctx, unsigned long cursor) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_DefragCursorGet)(RedisModuleDefragCtx *ctx, unsigned long *cursor) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_GetDbIdFromDefragCtx)(RedisModuleDefragCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API const RedisModuleString * (*RedisModule_GetKeyNameFromDefragCtx)(RedisModuleDefragCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_EventLoopAdd)(int fd, int mask, RedisModuleEventLoopFunc func, void *user_data) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_EventLoopDel)(int fd, int mask) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_EventLoopAddOneShot)(RedisModuleEventLoopOneShotFunc func, void *user_data) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_RegisterBoolConfig)(RedisModuleCtx *ctx, const char *name, int default_val, unsigned int flags, RedisModuleConfigGetBoolFunc getfn, RedisModuleConfigSetBoolFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_RegisterNumericConfig)(RedisModuleCtx *ctx, const char *name, long long default_val, unsigned int flags, long long min, long long max, RedisModuleConfigGetNumericFunc getfn, RedisModuleConfigSetNumericFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_RegisterStringConfig)(RedisModuleCtx *ctx, const char *name, const char *default_val, unsigned int flags, RedisModuleConfigGetStringFunc getfn, RedisModuleConfigSetStringFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_RegisterEnumConfig)(RedisModuleCtx *ctx, const char *name, int default_val, unsigned int flags, const char **enum_values, const int *int_values, int num_enum_vals, RedisModuleConfigGetEnumFunc getfn, RedisModuleConfigSetEnumFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_LoadConfigs)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
+REDISMODULE_API RedisModuleRdbStream *(*RedisModule_RdbStreamCreateFromFile)(const char *filename) REDISMODULE_ATTR;
+REDISMODULE_API void (*RedisModule_RdbStreamFree)(RedisModuleRdbStream *stream) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_RdbLoad)(RedisModuleCtx *ctx, RedisModuleRdbStream *stream, int flags) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_RdbSave)(RedisModuleCtx *ctx, RedisModuleRdbStream *stream, int flags) REDISMODULE_ATTR;
+
+#define RedisModule_IsAOFClient(id) ((id) == UINT64_MAX)
+
+/* This is included inline inside each Redis module. */
+static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) REDISMODULE_ATTR_UNUSED;
+static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) {
+ void *getapifuncptr = ((void**)ctx)[0];
+ RedisModule_GetApi = (int (*)(const char *, void *)) (unsigned long)getapifuncptr;
+ REDISMODULE_GET_API(Alloc);
+ REDISMODULE_GET_API(TryAlloc);
+ REDISMODULE_GET_API(Calloc);
+ REDISMODULE_GET_API(Free);
+ REDISMODULE_GET_API(Realloc);
+ REDISMODULE_GET_API(Strdup);
+ REDISMODULE_GET_API(CreateCommand);
+ REDISMODULE_GET_API(GetCommand);
+ REDISMODULE_GET_API(CreateSubcommand);
+ REDISMODULE_GET_API(SetCommandInfo);
+ REDISMODULE_GET_API(SetCommandACLCategories);
+ REDISMODULE_GET_API(SetModuleAttribs);
+ REDISMODULE_GET_API(IsModuleNameBusy);
+ REDISMODULE_GET_API(WrongArity);
+ REDISMODULE_GET_API(ReplyWithLongLong);
+ REDISMODULE_GET_API(ReplyWithError);
+ REDISMODULE_GET_API(ReplyWithErrorFormat);
+ REDISMODULE_GET_API(ReplyWithSimpleString);
+ REDISMODULE_GET_API(ReplyWithArray);
+ REDISMODULE_GET_API(ReplyWithMap);
+ REDISMODULE_GET_API(ReplyWithSet);
+ REDISMODULE_GET_API(ReplyWithAttribute);
+ REDISMODULE_GET_API(ReplyWithNullArray);
+ REDISMODULE_GET_API(ReplyWithEmptyArray);
+ REDISMODULE_GET_API(ReplySetArrayLength);
+ REDISMODULE_GET_API(ReplySetMapLength);
+ REDISMODULE_GET_API(ReplySetSetLength);
+ REDISMODULE_GET_API(ReplySetAttributeLength);
+ REDISMODULE_GET_API(ReplySetPushLength);
+ REDISMODULE_GET_API(ReplyWithStringBuffer);
+ REDISMODULE_GET_API(ReplyWithCString);
+ REDISMODULE_GET_API(ReplyWithString);
+ REDISMODULE_GET_API(ReplyWithEmptyString);
+ REDISMODULE_GET_API(ReplyWithVerbatimString);
+ REDISMODULE_GET_API(ReplyWithVerbatimStringType);
+ REDISMODULE_GET_API(ReplyWithNull);
+ REDISMODULE_GET_API(ReplyWithBool);
+ REDISMODULE_GET_API(ReplyWithCallReply);
+ REDISMODULE_GET_API(ReplyWithDouble);
+ REDISMODULE_GET_API(ReplyWithBigNumber);
+ REDISMODULE_GET_API(ReplyWithLongDouble);
+ REDISMODULE_GET_API(GetSelectedDb);
+ REDISMODULE_GET_API(SelectDb);
+ REDISMODULE_GET_API(KeyExists);
+ REDISMODULE_GET_API(OpenKey);
+ REDISMODULE_GET_API(GetOpenKeyModesAll);
+ REDISMODULE_GET_API(CloseKey);
+ REDISMODULE_GET_API(KeyType);
+ REDISMODULE_GET_API(ValueLength);
+ REDISMODULE_GET_API(ListPush);
+ REDISMODULE_GET_API(ListPop);
+ REDISMODULE_GET_API(ListGet);
+ REDISMODULE_GET_API(ListSet);
+ REDISMODULE_GET_API(ListInsert);
+ REDISMODULE_GET_API(ListDelete);
+ REDISMODULE_GET_API(StringToLongLong);
+ REDISMODULE_GET_API(StringToULongLong);
+ REDISMODULE_GET_API(StringToDouble);
+ REDISMODULE_GET_API(StringToLongDouble);
+ REDISMODULE_GET_API(StringToStreamID);
+ REDISMODULE_GET_API(Call);
+ REDISMODULE_GET_API(CallReplyProto);
+ REDISMODULE_GET_API(FreeCallReply);
+ REDISMODULE_GET_API(CallReplyInteger);
+ REDISMODULE_GET_API(CallReplyDouble);
+ REDISMODULE_GET_API(CallReplyBool);
+ REDISMODULE_GET_API(CallReplyBigNumber);
+ REDISMODULE_GET_API(CallReplyVerbatim);
+ REDISMODULE_GET_API(CallReplySetElement);
+ REDISMODULE_GET_API(CallReplyMapElement);
+ REDISMODULE_GET_API(CallReplyAttributeElement);
+ REDISMODULE_GET_API(CallReplyPromiseSetUnblockHandler);
+ REDISMODULE_GET_API(CallReplyPromiseAbort);
+ REDISMODULE_GET_API(CallReplyAttribute);
+ REDISMODULE_GET_API(CallReplyType);
+ REDISMODULE_GET_API(CallReplyLength);
+ REDISMODULE_GET_API(CallReplyArrayElement);
+ REDISMODULE_GET_API(CallReplyStringPtr);
+ REDISMODULE_GET_API(CreateStringFromCallReply);
+ REDISMODULE_GET_API(CreateString);
+ REDISMODULE_GET_API(CreateStringFromLongLong);
+ REDISMODULE_GET_API(CreateStringFromULongLong);
+ REDISMODULE_GET_API(CreateStringFromDouble);
+ REDISMODULE_GET_API(CreateStringFromLongDouble);
+ REDISMODULE_GET_API(CreateStringFromString);
+ REDISMODULE_GET_API(CreateStringFromStreamID);
+ REDISMODULE_GET_API(CreateStringPrintf);
+ REDISMODULE_GET_API(FreeString);
+ REDISMODULE_GET_API(StringPtrLen);
+ REDISMODULE_GET_API(AutoMemory);
+ REDISMODULE_GET_API(Replicate);
+ REDISMODULE_GET_API(ReplicateVerbatim);
+ REDISMODULE_GET_API(DeleteKey);
+ REDISMODULE_GET_API(UnlinkKey);
+ REDISMODULE_GET_API(StringSet);
+ REDISMODULE_GET_API(StringDMA);
+ REDISMODULE_GET_API(StringTruncate);
+ REDISMODULE_GET_API(GetExpire);
+ REDISMODULE_GET_API(SetExpire);
+ REDISMODULE_GET_API(GetAbsExpire);
+ REDISMODULE_GET_API(SetAbsExpire);
+ REDISMODULE_GET_API(ResetDataset);
+ REDISMODULE_GET_API(DbSize);
+ REDISMODULE_GET_API(RandomKey);
+ REDISMODULE_GET_API(ZsetAdd);
+ REDISMODULE_GET_API(ZsetIncrby);
+ REDISMODULE_GET_API(ZsetScore);
+ REDISMODULE_GET_API(ZsetRem);
+ REDISMODULE_GET_API(ZsetRangeStop);
+ REDISMODULE_GET_API(ZsetFirstInScoreRange);
+ REDISMODULE_GET_API(ZsetLastInScoreRange);
+ REDISMODULE_GET_API(ZsetFirstInLexRange);
+ REDISMODULE_GET_API(ZsetLastInLexRange);
+ REDISMODULE_GET_API(ZsetRangeCurrentElement);
+ REDISMODULE_GET_API(ZsetRangeNext);
+ REDISMODULE_GET_API(ZsetRangePrev);
+ REDISMODULE_GET_API(ZsetRangeEndReached);
+ REDISMODULE_GET_API(HashSet);
+ REDISMODULE_GET_API(HashGet);
+ REDISMODULE_GET_API(StreamAdd);
+ REDISMODULE_GET_API(StreamDelete);
+ REDISMODULE_GET_API(StreamIteratorStart);
+ REDISMODULE_GET_API(StreamIteratorStop);
+ REDISMODULE_GET_API(StreamIteratorNextID);
+ REDISMODULE_GET_API(StreamIteratorNextField);
+ REDISMODULE_GET_API(StreamIteratorDelete);
+ REDISMODULE_GET_API(StreamTrimByLength);
+ REDISMODULE_GET_API(StreamTrimByID);
+ REDISMODULE_GET_API(IsKeysPositionRequest);
+ REDISMODULE_GET_API(KeyAtPos);
+ REDISMODULE_GET_API(KeyAtPosWithFlags);
+ REDISMODULE_GET_API(IsChannelsPositionRequest);
+ REDISMODULE_GET_API(ChannelAtPosWithFlags);
+ REDISMODULE_GET_API(GetClientId);
+ REDISMODULE_GET_API(GetClientUserNameById);
+ REDISMODULE_GET_API(GetContextFlags);
+ REDISMODULE_GET_API(AvoidReplicaTraffic);
+ REDISMODULE_GET_API(PoolAlloc);
+ REDISMODULE_GET_API(CreateDataType);
+ REDISMODULE_GET_API(ModuleTypeSetValue);
+ REDISMODULE_GET_API(ModuleTypeReplaceValue);
+ REDISMODULE_GET_API(ModuleTypeGetType);
+ REDISMODULE_GET_API(ModuleTypeGetValue);
+ REDISMODULE_GET_API(IsIOError);
+ REDISMODULE_GET_API(SetModuleOptions);
+ REDISMODULE_GET_API(SignalModifiedKey);
+ REDISMODULE_GET_API(SaveUnsigned);
+ REDISMODULE_GET_API(LoadUnsigned);
+ REDISMODULE_GET_API(SaveSigned);
+ REDISMODULE_GET_API(LoadSigned);
+ REDISMODULE_GET_API(SaveString);
+ REDISMODULE_GET_API(SaveStringBuffer);
+ REDISMODULE_GET_API(LoadString);
+ REDISMODULE_GET_API(LoadStringBuffer);
+ REDISMODULE_GET_API(SaveDouble);
+ REDISMODULE_GET_API(LoadDouble);
+ REDISMODULE_GET_API(SaveFloat);
+ REDISMODULE_GET_API(LoadFloat);
+ REDISMODULE_GET_API(SaveLongDouble);
+ REDISMODULE_GET_API(LoadLongDouble);
+ REDISMODULE_GET_API(SaveDataTypeToString);
+ REDISMODULE_GET_API(LoadDataTypeFromString);
+ REDISMODULE_GET_API(LoadDataTypeFromStringEncver);
+ REDISMODULE_GET_API(EmitAOF);
+ REDISMODULE_GET_API(Log);
+ REDISMODULE_GET_API(LogIOError);
+ REDISMODULE_GET_API(_Assert);
+ REDISMODULE_GET_API(LatencyAddSample);
+ REDISMODULE_GET_API(StringAppendBuffer);
+ REDISMODULE_GET_API(TrimStringAllocation);
+ REDISMODULE_GET_API(RetainString);
+ REDISMODULE_GET_API(HoldString);
+ REDISMODULE_GET_API(StringCompare);
+ REDISMODULE_GET_API(GetContextFromIO);
+ REDISMODULE_GET_API(GetKeyNameFromIO);
+ REDISMODULE_GET_API(GetKeyNameFromModuleKey);
+ REDISMODULE_GET_API(GetDbIdFromModuleKey);
+ REDISMODULE_GET_API(GetDbIdFromIO);
+ REDISMODULE_GET_API(GetKeyNameFromOptCtx);
+ REDISMODULE_GET_API(GetToKeyNameFromOptCtx);
+ REDISMODULE_GET_API(GetDbIdFromOptCtx);
+ REDISMODULE_GET_API(GetToDbIdFromOptCtx);
+ REDISMODULE_GET_API(Milliseconds);
+ REDISMODULE_GET_API(MonotonicMicroseconds);
+ REDISMODULE_GET_API(Microseconds);
+ REDISMODULE_GET_API(CachedMicroseconds);
+ REDISMODULE_GET_API(DigestAddStringBuffer);
+ REDISMODULE_GET_API(DigestAddLongLong);
+ REDISMODULE_GET_API(DigestEndSequence);
+ REDISMODULE_GET_API(GetKeyNameFromDigest);
+ REDISMODULE_GET_API(GetDbIdFromDigest);
+ REDISMODULE_GET_API(CreateDict);
+ REDISMODULE_GET_API(FreeDict);
+ REDISMODULE_GET_API(DictSize);
+ REDISMODULE_GET_API(DictSetC);
+ REDISMODULE_GET_API(DictReplaceC);
+ REDISMODULE_GET_API(DictSet);
+ REDISMODULE_GET_API(DictReplace);
+ REDISMODULE_GET_API(DictGetC);
+ REDISMODULE_GET_API(DictGet);
+ REDISMODULE_GET_API(DictDelC);
+ REDISMODULE_GET_API(DictDel);
+ REDISMODULE_GET_API(DictIteratorStartC);
+ REDISMODULE_GET_API(DictIteratorStart);
+ REDISMODULE_GET_API(DictIteratorStop);
+ REDISMODULE_GET_API(DictIteratorReseekC);
+ REDISMODULE_GET_API(DictIteratorReseek);
+ REDISMODULE_GET_API(DictNextC);
+ REDISMODULE_GET_API(DictPrevC);
+ REDISMODULE_GET_API(DictNext);
+ REDISMODULE_GET_API(DictPrev);
+ REDISMODULE_GET_API(DictCompare);
+ REDISMODULE_GET_API(DictCompareC);
+ REDISMODULE_GET_API(RegisterInfoFunc);
+ REDISMODULE_GET_API(RegisterAuthCallback);
+ REDISMODULE_GET_API(InfoAddSection);
+ REDISMODULE_GET_API(InfoBeginDictField);
+ REDISMODULE_GET_API(InfoEndDictField);
+ REDISMODULE_GET_API(InfoAddFieldString);
+ REDISMODULE_GET_API(InfoAddFieldCString);
+ REDISMODULE_GET_API(InfoAddFieldDouble);
+ REDISMODULE_GET_API(InfoAddFieldLongLong);
+ REDISMODULE_GET_API(InfoAddFieldULongLong);
+ REDISMODULE_GET_API(GetServerInfo);
+ REDISMODULE_GET_API(FreeServerInfo);
+ REDISMODULE_GET_API(ServerInfoGetField);
+ REDISMODULE_GET_API(ServerInfoGetFieldC);
+ REDISMODULE_GET_API(ServerInfoGetFieldSigned);
+ REDISMODULE_GET_API(ServerInfoGetFieldUnsigned);
+ REDISMODULE_GET_API(ServerInfoGetFieldDouble);
+ REDISMODULE_GET_API(GetClientInfoById);
+ REDISMODULE_GET_API(GetClientNameById);
+ REDISMODULE_GET_API(SetClientNameById);
+ REDISMODULE_GET_API(PublishMessage);
+ REDISMODULE_GET_API(PublishMessageShard);
+ REDISMODULE_GET_API(SubscribeToServerEvent);
+ REDISMODULE_GET_API(SetLRU);
+ REDISMODULE_GET_API(GetLRU);
+ REDISMODULE_GET_API(SetLFU);
+ REDISMODULE_GET_API(GetLFU);
+ REDISMODULE_GET_API(BlockClientOnKeys);
+ REDISMODULE_GET_API(BlockClientOnKeysWithFlags);
+ REDISMODULE_GET_API(SignalKeyAsReady);
+ REDISMODULE_GET_API(GetBlockedClientReadyKey);
+ REDISMODULE_GET_API(ScanCursorCreate);
+ REDISMODULE_GET_API(ScanCursorRestart);
+ REDISMODULE_GET_API(ScanCursorDestroy);
+ REDISMODULE_GET_API(Scan);
+ REDISMODULE_GET_API(ScanKey);
+ REDISMODULE_GET_API(GetContextFlagsAll);
+ REDISMODULE_GET_API(GetModuleOptionsAll);
+ REDISMODULE_GET_API(GetKeyspaceNotificationFlagsAll);
+ REDISMODULE_GET_API(IsSubEventSupported);
+ REDISMODULE_GET_API(GetServerVersion);
+ REDISMODULE_GET_API(GetTypeMethodVersion);
+ REDISMODULE_GET_API(Yield);
+ REDISMODULE_GET_API(GetThreadSafeContext);
+ REDISMODULE_GET_API(GetDetachedThreadSafeContext);
+ REDISMODULE_GET_API(FreeThreadSafeContext);
+ REDISMODULE_GET_API(ThreadSafeContextLock);
+ REDISMODULE_GET_API(ThreadSafeContextTryLock);
+ REDISMODULE_GET_API(ThreadSafeContextUnlock);
+ REDISMODULE_GET_API(BlockClient);
+ REDISMODULE_GET_API(BlockClientGetPrivateData);
+ REDISMODULE_GET_API(BlockClientSetPrivateData);
+ REDISMODULE_GET_API(BlockClientOnAuth);
+ REDISMODULE_GET_API(UnblockClient);
+ REDISMODULE_GET_API(IsBlockedReplyRequest);
+ REDISMODULE_GET_API(IsBlockedTimeoutRequest);
+ REDISMODULE_GET_API(GetBlockedClientPrivateData);
+ REDISMODULE_GET_API(GetBlockedClientHandle);
+ REDISMODULE_GET_API(AbortBlock);
+ REDISMODULE_GET_API(BlockedClientMeasureTimeStart);
+ REDISMODULE_GET_API(BlockedClientMeasureTimeEnd);
+ REDISMODULE_GET_API(SetDisconnectCallback);
+ REDISMODULE_GET_API(SubscribeToKeyspaceEvents);
+ REDISMODULE_GET_API(AddPostNotificationJob);
+ REDISMODULE_GET_API(NotifyKeyspaceEvent);
+ REDISMODULE_GET_API(GetNotifyKeyspaceEvents);
+ REDISMODULE_GET_API(BlockedClientDisconnected);
+ REDISMODULE_GET_API(RegisterClusterMessageReceiver);
+ REDISMODULE_GET_API(SendClusterMessage);
+ REDISMODULE_GET_API(GetClusterNodeInfo);
+ REDISMODULE_GET_API(GetClusterNodesList);
+ REDISMODULE_GET_API(FreeClusterNodesList);
+ REDISMODULE_GET_API(CreateTimer);
+ REDISMODULE_GET_API(StopTimer);
+ REDISMODULE_GET_API(GetTimerInfo);
+ REDISMODULE_GET_API(GetMyClusterID);
+ REDISMODULE_GET_API(GetClusterSize);
+ REDISMODULE_GET_API(GetRandomBytes);
+ REDISMODULE_GET_API(GetRandomHexChars);
+ REDISMODULE_GET_API(SetClusterFlags);
+ REDISMODULE_GET_API(ExportSharedAPI);
+ REDISMODULE_GET_API(GetSharedAPI);
+ REDISMODULE_GET_API(RegisterCommandFilter);
+ REDISMODULE_GET_API(UnregisterCommandFilter);
+ REDISMODULE_GET_API(CommandFilterArgsCount);
+ REDISMODULE_GET_API(CommandFilterArgGet);
+ REDISMODULE_GET_API(CommandFilterArgInsert);
+ REDISMODULE_GET_API(CommandFilterArgReplace);
+ REDISMODULE_GET_API(CommandFilterArgDelete);
+ REDISMODULE_GET_API(CommandFilterGetClientId);
+ REDISMODULE_GET_API(Fork);
+ REDISMODULE_GET_API(SendChildHeartbeat);
+ REDISMODULE_GET_API(ExitFromChild);
+ REDISMODULE_GET_API(KillForkChild);
+ REDISMODULE_GET_API(GetUsedMemoryRatio);
+ REDISMODULE_GET_API(MallocSize);
+ REDISMODULE_GET_API(MallocUsableSize);
+ REDISMODULE_GET_API(MallocSizeString);
+ REDISMODULE_GET_API(MallocSizeDict);
+ REDISMODULE_GET_API(CreateModuleUser);
+ REDISMODULE_GET_API(FreeModuleUser);
+ REDISMODULE_GET_API(SetContextUser);
+ REDISMODULE_GET_API(SetModuleUserACL);
+ REDISMODULE_GET_API(SetModuleUserACLString);
+ REDISMODULE_GET_API(GetModuleUserACLString);
+ REDISMODULE_GET_API(GetCurrentUserName);
+ REDISMODULE_GET_API(GetModuleUserFromUserName);
+ REDISMODULE_GET_API(ACLCheckCommandPermissions);
+ REDISMODULE_GET_API(ACLCheckKeyPermissions);
+ REDISMODULE_GET_API(ACLCheckChannelPermissions);
+ REDISMODULE_GET_API(ACLAddLogEntry);
+ REDISMODULE_GET_API(ACLAddLogEntryByUserName);
+ REDISMODULE_GET_API(DeauthenticateAndCloseClient);
+ REDISMODULE_GET_API(AuthenticateClientWithACLUser);
+ REDISMODULE_GET_API(AuthenticateClientWithUser);
+ REDISMODULE_GET_API(RedactClientCommandArgument);
+ REDISMODULE_GET_API(GetClientCertificate);
+ REDISMODULE_GET_API(GetCommandKeys);
+ REDISMODULE_GET_API(GetCommandKeysWithFlags);
+ REDISMODULE_GET_API(GetCurrentCommandName);
+ REDISMODULE_GET_API(RegisterDefragFunc);
+ REDISMODULE_GET_API(DefragAlloc);
+ REDISMODULE_GET_API(DefragRedisModuleString);
+ REDISMODULE_GET_API(DefragShouldStop);
+ REDISMODULE_GET_API(DefragCursorSet);
+ REDISMODULE_GET_API(DefragCursorGet);
+ REDISMODULE_GET_API(GetKeyNameFromDefragCtx);
+ REDISMODULE_GET_API(GetDbIdFromDefragCtx);
+ REDISMODULE_GET_API(EventLoopAdd);
+ REDISMODULE_GET_API(EventLoopDel);
+ REDISMODULE_GET_API(EventLoopAddOneShot);
+ REDISMODULE_GET_API(RegisterBoolConfig);
+ REDISMODULE_GET_API(RegisterNumericConfig);
+ REDISMODULE_GET_API(RegisterStringConfig);
+ REDISMODULE_GET_API(RegisterEnumConfig);
+ REDISMODULE_GET_API(LoadConfigs);
+ REDISMODULE_GET_API(RdbStreamCreateFromFile);
+ REDISMODULE_GET_API(RdbStreamFree);
+ REDISMODULE_GET_API(RdbLoad);
+ REDISMODULE_GET_API(RdbSave);
+
+ if (RedisModule_IsModuleNameBusy && RedisModule_IsModuleNameBusy(name)) return REDISMODULE_ERR;
+ RedisModule_SetModuleAttribs(ctx,name,ver,apiver);
+ return REDISMODULE_OK;
+}
+
+#define RedisModule_Assert(_e) ((_e)?(void)0 : (RedisModule__Assert(#_e,__FILE__,__LINE__),exit(1)))
+
+#define RMAPI_FUNC_SUPPORTED(func) (func != NULL)
+
+#endif /* REDISMODULE_CORE */
+#endif /* REDISMODULE_H */
diff --git a/src/release.c b/src/release.c
new file mode 100644
index 0000000..adc7e55
--- /dev/null
+++ b/src/release.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Every time the Redis Git SHA1 or Dirty status changes only this small
+ * file is recompiled, as we access this information in all the other
+ * files using this functions. */
+
+#include <string.h>
+#include <stdio.h>
+
+#include "release.h"
+#include "crc64.h"
+
+char *redisGitSHA1(void) {
+ return REDIS_GIT_SHA1;
+}
+
+char *redisGitDirty(void) {
+ return REDIS_GIT_DIRTY;
+}
+
+const char *redisBuildIdRaw(void) {
+ return REDIS_BUILD_ID_RAW;
+}
+
+uint64_t redisBuildId(void) {
+ char *buildid = REDIS_BUILD_ID_RAW;
+
+ return crc64(0,(unsigned char*)buildid,strlen(buildid));
+}
+
+/* Return a cached value of the build string in order to avoid recomputing
+ * and converting it in hex every time: this string is shown in the INFO
+ * output that should be fast. */
+char *redisBuildIdString(void) {
+ static char buf[32];
+ static int cached = 0;
+ if (!cached) {
+ snprintf(buf,sizeof(buf),"%llx",(unsigned long long) redisBuildId());
+ cached = 1;
+ }
+ return buf;
+}
diff --git a/src/replication.c b/src/replication.c
new file mode 100644
index 0000000..fbd9b0a
--- /dev/null
+++ b/src/replication.c
@@ -0,0 +1,4241 @@
+/* Asynchronous replication implementation.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "server.h"
+#include "cluster.h"
+#include "bio.h"
+#include "functions.h"
+#include "connection.h"
+
+#include <memory.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+void replicationDiscardCachedMaster(void);
+void replicationResurrectCachedMaster(connection *conn);
+void replicationSendAck(void);
+int replicaPutOnline(client *slave);
+void replicaStartCommandStream(client *slave);
+int cancelReplicationHandshake(int reconnect);
+
+/* We take a global flag to remember if this instance generated an RDB
+ * because of replication, so that we can remove the RDB file in case
+ * the instance is configured to have no persistence. */
+int RDBGeneratedByReplication = 0;
+
+/* --------------------------- Utility functions ---------------------------- */
+static ConnectionType *connTypeOfReplication(void) {
+ if (server.tls_replication) {
+ return connectionTypeTls();
+ }
+
+ return connectionTypeTcp();
+}
+
+/* Return the pointer to a string representing the slave ip:listening_port
+ * pair. Mostly useful for logging, since we want to log a slave using its
+ * IP address and its listening port which is more clear for the user, for
+ * example: "Closing connection with replica 10.1.2.3:6380". */
+char *replicationGetSlaveName(client *c) {
+ static char buf[NET_HOST_PORT_STR_LEN];
+ char ip[NET_IP_STR_LEN];
+
+ ip[0] = '\0';
+ buf[0] = '\0';
+ if (c->slave_addr ||
+ connAddrPeerName(c->conn,ip,sizeof(ip),NULL) != -1)
+ {
+ char *addr = c->slave_addr ? c->slave_addr : ip;
+ if (c->slave_listening_port)
+ formatAddr(buf,sizeof(buf),addr,c->slave_listening_port);
+ else
+ snprintf(buf,sizeof(buf),"%s:<unknown-replica-port>",addr);
+ } else {
+ snprintf(buf,sizeof(buf),"client id #%llu",
+ (unsigned long long) c->id);
+ }
+ return buf;
+}
+
+/* Plain unlink() can block for quite some time in order to actually apply
+ * the file deletion to the filesystem. This call removes the file in a
+ * background thread instead. We actually just do close() in the thread,
+ * by using the fact that if there is another instance of the same file open,
+ * the foreground unlink() will only remove the fs name, and deleting the
+ * file's storage space will only happen once the last reference is lost. */
+int bg_unlink(const char *filename) {
+ int fd = open(filename,O_RDONLY|O_NONBLOCK);
+ if (fd == -1) {
+ /* Can't open the file? Fall back to unlinking in the main thread. */
+ return unlink(filename);
+ } else {
+ /* The following unlink() removes the name but doesn't free the
+ * file contents because a process still has it open. */
+ int retval = unlink(filename);
+ if (retval == -1) {
+ /* If we got an unlink error, we just return it, closing the
+ * new reference we have to the file. */
+ int old_errno = errno;
+ close(fd); /* This would overwrite our errno. So we saved it. */
+ errno = old_errno;
+ return -1;
+ }
+ bioCreateCloseJob(fd, 0, 0);
+ return 0; /* Success. */
+ }
+}
+
+/* ---------------------------------- MASTER -------------------------------- */
+
+void createReplicationBacklog(void) {
+ serverAssert(server.repl_backlog == NULL);
+ server.repl_backlog = zmalloc(sizeof(replBacklog));
+ server.repl_backlog->ref_repl_buf_node = NULL;
+ server.repl_backlog->unindexed_count = 0;
+ server.repl_backlog->blocks_index = raxNew();
+ server.repl_backlog->histlen = 0;
+ /* We don't have any data inside our buffer, but virtually the first
+ * byte we have is the next byte that will be generated for the
+ * replication stream. */
+ server.repl_backlog->offset = server.master_repl_offset+1;
+}
+
+/* This function is called when the user modifies the replication backlog
+ * size at runtime. It is up to the function to resize the buffer and setup it
+ * so that it contains the same data as the previous one (possibly less data,
+ * but the most recent bytes, or the same data and more free space in case the
+ * buffer is enlarged). */
+void resizeReplicationBacklog(void) {
+ if (server.repl_backlog_size < CONFIG_REPL_BACKLOG_MIN_SIZE)
+ server.repl_backlog_size = CONFIG_REPL_BACKLOG_MIN_SIZE;
+ if (server.repl_backlog)
+ incrementalTrimReplicationBacklog(REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
+}
+
+void freeReplicationBacklog(void) {
+ serverAssert(listLength(server.slaves) == 0);
+ if (server.repl_backlog == NULL) return;
+
+ /* Decrease the start buffer node reference count. */
+ if (server.repl_backlog->ref_repl_buf_node) {
+ replBufBlock *o = listNodeValue(
+ server.repl_backlog->ref_repl_buf_node);
+ serverAssert(o->refcount == 1); /* Last reference. */
+ o->refcount--;
+ }
+
+ /* Replication buffer blocks are completely released when we free the
+ * backlog, since the backlog is released only when there are no replicas
+ * and the backlog keeps the last reference of all blocks. */
+ freeReplicationBacklogRefMemAsync(server.repl_buffer_blocks,
+ server.repl_backlog->blocks_index);
+ resetReplicationBuffer();
+ zfree(server.repl_backlog);
+ server.repl_backlog = NULL;
+}
+
+/* To make search offset from replication buffer blocks quickly
+ * when replicas ask partial resynchronization, we create one index
+ * block every REPL_BACKLOG_INDEX_PER_BLOCKS blocks. */
+void createReplicationBacklogIndex(listNode *ln) {
+ server.repl_backlog->unindexed_count++;
+ if (server.repl_backlog->unindexed_count >= REPL_BACKLOG_INDEX_PER_BLOCKS) {
+ replBufBlock *o = listNodeValue(ln);
+ uint64_t encoded_offset = htonu64(o->repl_offset);
+ raxInsert(server.repl_backlog->blocks_index,
+ (unsigned char*)&encoded_offset, sizeof(uint64_t),
+ ln, NULL);
+ server.repl_backlog->unindexed_count = 0;
+ }
+}
+
+/* Rebase replication buffer blocks' offset since the initial
+ * setting offset starts from 0 when master restart. */
+void rebaseReplicationBuffer(long long base_repl_offset) {
+ raxFree(server.repl_backlog->blocks_index);
+ server.repl_backlog->blocks_index = raxNew();
+ server.repl_backlog->unindexed_count = 0;
+
+ listIter li;
+ listNode *ln;
+ listRewind(server.repl_buffer_blocks, &li);
+ while ((ln = listNext(&li))) {
+ replBufBlock *o = listNodeValue(ln);
+ o->repl_offset += base_repl_offset;
+ createReplicationBacklogIndex(ln);
+ }
+}
+
+void resetReplicationBuffer(void) {
+ server.repl_buffer_mem = 0;
+ server.repl_buffer_blocks = listCreate();
+ listSetFreeMethod(server.repl_buffer_blocks, (void (*)(void*))zfree);
+}
+
+int canFeedReplicaReplBuffer(client *replica) {
+ /* Don't feed replicas that only want the RDB. */
+ if (replica->flags & CLIENT_REPL_RDBONLY) return 0;
+
+ /* Don't feed replicas that are still waiting for BGSAVE to start. */
+ if (replica->replstate == SLAVE_STATE_WAIT_BGSAVE_START) return 0;
+
+ return 1;
+}
+
+/* Similar with 'prepareClientToWrite', note that we must call this function
+ * before feeding replication stream into global replication buffer, since
+ * clientHasPendingReplies in prepareClientToWrite will access the global
+ * replication buffer to make judgements. */
+int prepareReplicasToWrite(void) {
+ listIter li;
+ listNode *ln;
+ int prepared = 0;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ if (!canFeedReplicaReplBuffer(slave)) continue;
+ if (prepareClientToWrite(slave) == C_ERR) continue;
+ prepared++;
+ }
+
+ return prepared;
+}
+
+/* Wrapper for feedReplicationBuffer() that takes Redis string objects
+ * as input. */
+void feedReplicationBufferWithObject(robj *o) {
+ char llstr[LONG_STR_SIZE];
+ void *p;
+ size_t len;
+
+ if (o->encoding == OBJ_ENCODING_INT) {
+ len = ll2string(llstr,sizeof(llstr),(long)o->ptr);
+ p = llstr;
+ } else {
+ len = sdslen(o->ptr);
+ p = o->ptr;
+ }
+ feedReplicationBuffer(p,len);
+}
+
+/* Generally, we only have one replication buffer block to trim when replication
+ * backlog size exceeds our setting and no replica reference it. But if replica
+ * clients disconnect, we need to free many replication buffer blocks that are
+ * referenced. It would cost much time if there are a lots blocks to free, that
+ * will freeze server, so we trim replication backlog incrementally. */
+void incrementalTrimReplicationBacklog(size_t max_blocks) {
+ serverAssert(server.repl_backlog != NULL);
+
+ size_t trimmed_blocks = 0;
+ while (server.repl_backlog->histlen > server.repl_backlog_size &&
+ trimmed_blocks < max_blocks)
+ {
+ /* We never trim backlog to less than one block. */
+ if (listLength(server.repl_buffer_blocks) <= 1) break;
+
+ /* Replicas increment the refcount of the first replication buffer block
+ * they refer to, in that case, we don't trim the backlog even if
+ * backlog_histlen exceeds backlog_size. This implicitly makes backlog
+ * bigger than our setting, but makes the master accept partial resync as
+ * much as possible. So that backlog must be the last reference of
+ * replication buffer blocks. */
+ listNode *first = listFirst(server.repl_buffer_blocks);
+ serverAssert(first == server.repl_backlog->ref_repl_buf_node);
+ replBufBlock *fo = listNodeValue(first);
+ if (fo->refcount != 1) break;
+
+ /* We don't try trim backlog if backlog valid size will be lessen than
+ * setting backlog size once we release the first repl buffer block. */
+ if (server.repl_backlog->histlen - (long long)fo->size <=
+ server.repl_backlog_size) break;
+
+ /* Decr refcount and release the first block later. */
+ fo->refcount--;
+ trimmed_blocks++;
+ server.repl_backlog->histlen -= fo->size;
+
+ /* Go to use next replication buffer block node. */
+ listNode *next = listNextNode(first);
+ server.repl_backlog->ref_repl_buf_node = next;
+ serverAssert(server.repl_backlog->ref_repl_buf_node != NULL);
+ /* Incr reference count to keep the new head node. */
+ ((replBufBlock *)listNodeValue(next))->refcount++;
+
+ /* Remove the node in recorded blocks. */
+ uint64_t encoded_offset = htonu64(fo->repl_offset);
+ raxRemove(server.repl_backlog->blocks_index,
+ (unsigned char*)&encoded_offset, sizeof(uint64_t), NULL);
+
+ /* Delete the first node from global replication buffer. */
+ serverAssert(fo->refcount == 0 && fo->used == fo->size);
+ server.repl_buffer_mem -= (fo->size +
+ sizeof(listNode) + sizeof(replBufBlock));
+ listDelNode(server.repl_buffer_blocks, first);
+ }
+
+ /* Set the offset of the first byte we have in the backlog. */
+ server.repl_backlog->offset = server.master_repl_offset -
+ server.repl_backlog->histlen + 1;
+}
+
+/* Free replication buffer blocks that are referenced by this client. */
+void freeReplicaReferencedReplBuffer(client *replica) {
+ if (replica->ref_repl_buf_node != NULL) {
+ /* Decrease the start buffer node reference count. */
+ replBufBlock *o = listNodeValue(replica->ref_repl_buf_node);
+ serverAssert(o->refcount > 0);
+ o->refcount--;
+ incrementalTrimReplicationBacklog(REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
+ }
+ replica->ref_repl_buf_node = NULL;
+ replica->ref_block_pos = 0;
+}
+
+/* Append bytes into the global replication buffer list, replication backlog and
+ * all replica clients use replication buffers collectively, this function replace
+ * 'addReply*', 'feedReplicationBacklog' for replicas and replication backlog,
+ * First we add buffer into global replication buffer block list, and then
+ * update replica / replication-backlog referenced node and block position. */
+void feedReplicationBuffer(char *s, size_t len) {
+ static long long repl_block_id = 0;
+
+ if (server.repl_backlog == NULL) return;
+
+ while(len > 0) {
+ size_t start_pos = 0; /* The position of referenced block to start sending. */
+ listNode *start_node = NULL; /* Replica/backlog starts referenced node. */
+ int add_new_block = 0; /* Create new block if current block is total used. */
+ listNode *ln = listLast(server.repl_buffer_blocks);
+ replBufBlock *tail = ln ? listNodeValue(ln) : NULL;
+
+ /* Append to tail string when possible. */
+ if (tail && tail->size > tail->used) {
+ start_node = listLast(server.repl_buffer_blocks);
+ start_pos = tail->used;
+ /* Copy the part we can fit into the tail, and leave the rest for a
+ * new node */
+ size_t avail = tail->size - tail->used;
+ size_t copy = (avail >= len) ? len : avail;
+ memcpy(tail->buf + tail->used, s, copy);
+ tail->used += copy;
+ s += copy;
+ len -= copy;
+ server.master_repl_offset += copy;
+ server.repl_backlog->histlen += copy;
+ }
+ if (len) {
+ /* Create a new node, make sure it is allocated to at
+ * least PROTO_REPLY_CHUNK_BYTES */
+ size_t usable_size;
+ /* Avoid creating nodes smaller than PROTO_REPLY_CHUNK_BYTES, so that we can append more data into them,
+ * and also avoid creating nodes bigger than repl_backlog_size / 16, so that we won't have huge nodes that can't
+ * trim when we only still need to hold a small portion from them. */
+ size_t limit = max((size_t)server.repl_backlog_size / 16, (size_t)PROTO_REPLY_CHUNK_BYTES);
+ size_t size = min(max(len, (size_t)PROTO_REPLY_CHUNK_BYTES), limit);
+ tail = zmalloc_usable(size + sizeof(replBufBlock), &usable_size);
+ /* Take over the allocation's internal fragmentation */
+ tail->size = usable_size - sizeof(replBufBlock);
+ size_t copy = (tail->size >= len) ? len : tail->size;
+ tail->used = copy;
+ tail->refcount = 0;
+ tail->repl_offset = server.master_repl_offset + 1;
+ tail->id = repl_block_id++;
+ memcpy(tail->buf, s, copy);
+ listAddNodeTail(server.repl_buffer_blocks, tail);
+ /* We also count the list node memory into replication buffer memory. */
+ server.repl_buffer_mem += (usable_size + sizeof(listNode));
+ add_new_block = 1;
+ if (start_node == NULL) {
+ start_node = listLast(server.repl_buffer_blocks);
+ start_pos = 0;
+ }
+ s += copy;
+ len -= copy;
+ server.master_repl_offset += copy;
+ server.repl_backlog->histlen += copy;
+ }
+
+ /* For output buffer of replicas. */
+ listIter li;
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ if (!canFeedReplicaReplBuffer(slave)) continue;
+
+ /* Update shared replication buffer start position. */
+ if (slave->ref_repl_buf_node == NULL) {
+ slave->ref_repl_buf_node = start_node;
+ slave->ref_block_pos = start_pos;
+ /* Only increase the start block reference count. */
+ ((replBufBlock *)listNodeValue(start_node))->refcount++;
+ }
+
+ /* Check output buffer limit only when add new block. */
+ if (add_new_block) closeClientOnOutputBufferLimitReached(slave, 1);
+ }
+
+ /* For replication backlog */
+ if (server.repl_backlog->ref_repl_buf_node == NULL) {
+ server.repl_backlog->ref_repl_buf_node = start_node;
+ /* Only increase the start block reference count. */
+ ((replBufBlock *)listNodeValue(start_node))->refcount++;
+
+ /* Replication buffer must be empty before adding replication stream
+ * into replication backlog. */
+ serverAssert(add_new_block == 1 && start_pos == 0);
+ }
+ if (add_new_block) {
+ createReplicationBacklogIndex(listLast(server.repl_buffer_blocks));
+
+ /* It is important to trim after adding replication data to keep the backlog size close to
+ * repl_backlog_size in the common case. We wait until we add a new block to avoid repeated
+ * unnecessary trimming attempts when small amounts of data are added. See comments in
+ * freeMemoryGetNotCountedMemory() for details on replication backlog memory tracking. */
+ incrementalTrimReplicationBacklog(REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
+ }
+ }
+}
+
+/* Propagate write commands to replication stream.
+ *
+ * This function is used if the instance is a master: we use the commands
+ * received by our clients in order to create the replication stream.
+ * Instead if the instance is a replica and has sub-replicas attached, we use
+ * replicationFeedStreamFromMasterStream() */
+void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
+ int j, len;
+ char llstr[LONG_STR_SIZE];
+
+ /* In case we propagate a command that doesn't touch keys (PING, REPLCONF) we
+ * pass dbid=-1 that indicate there is no need to replicate `select` command. */
+ serverAssert(dictid == -1 || (dictid >= 0 && dictid < server.dbnum));
+
+ /* If the instance is not a top level master, return ASAP: we'll just proxy
+ * the stream of data we receive from our master instead, in order to
+ * propagate *identical* replication stream. In this way this slave can
+ * advertise the same replication ID as the master (since it shares the
+ * master replication history and has the same backlog and offsets). */
+ if (server.masterhost != NULL) return;
+
+ /* If there aren't slaves, and there is no backlog buffer to populate,
+ * we can return ASAP. */
+ if (server.repl_backlog == NULL && listLength(slaves) == 0) {
+ /* We increment the repl_offset anyway, since we use that for tracking AOF fsyncs
+ * even when there's no replication active. This code will not be reached if AOF
+ * is also disabled. */
+ server.master_repl_offset += 1;
+ return;
+ }
+
+ /* We can't have slaves attached and no backlog. */
+ serverAssert(!(listLength(slaves) != 0 && server.repl_backlog == NULL));
+
+ /* Must install write handler for all replicas first before feeding
+ * replication stream. */
+ prepareReplicasToWrite();
+
+ /* Send SELECT command to every slave if needed. */
+ if (dictid != -1 && server.slaveseldb != dictid) {
+ robj *selectcmd;
+
+ /* For a few DBs we have pre-computed SELECT command. */
+ if (dictid >= 0 && dictid < PROTO_SHARED_SELECT_CMDS) {
+ selectcmd = shared.select[dictid];
+ } else {
+ int dictid_len;
+
+ dictid_len = ll2string(llstr,sizeof(llstr),dictid);
+ selectcmd = createObject(OBJ_STRING,
+ sdscatprintf(sdsempty(),
+ "*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
+ dictid_len, llstr));
+ }
+
+ feedReplicationBufferWithObject(selectcmd);
+
+ if (dictid < 0 || dictid >= PROTO_SHARED_SELECT_CMDS)
+ decrRefCount(selectcmd);
+
+ server.slaveseldb = dictid;
+ }
+
+ /* Write the command to the replication buffer if any. */
+ char aux[LONG_STR_SIZE+3];
+
+ /* Add the multi bulk reply length. */
+ aux[0] = '*';
+ len = ll2string(aux+1,sizeof(aux)-1,argc);
+ aux[len+1] = '\r';
+ aux[len+2] = '\n';
+ feedReplicationBuffer(aux,len+3);
+
+ for (j = 0; j < argc; j++) {
+ long objlen = stringObjectLen(argv[j]);
+
+ /* We need to feed the buffer with the object as a bulk reply
+ * not just as a plain string, so create the $..CRLF payload len
+ * and add the final CRLF */
+ aux[0] = '$';
+ len = ll2string(aux+1,sizeof(aux)-1,objlen);
+ aux[len+1] = '\r';
+ aux[len+2] = '\n';
+ feedReplicationBuffer(aux,len+3);
+ feedReplicationBufferWithObject(argv[j]);
+ feedReplicationBuffer(aux+len+1,2);
+ }
+}
+
+/* This is a debugging function that gets called when we detect something
+ * wrong with the replication protocol: the goal is to peek into the
+ * replication backlog and show a few final bytes to make simpler to
+ * guess what kind of bug it could be. */
+void showLatestBacklog(void) {
+ if (server.repl_backlog == NULL) return;
+ if (listLength(server.repl_buffer_blocks) == 0) return;
+
+ size_t dumplen = 256;
+ if (server.repl_backlog->histlen < (long long)dumplen)
+ dumplen = server.repl_backlog->histlen;
+
+ sds dump = sdsempty();
+ listNode *node = listLast(server.repl_buffer_blocks);
+ while(dumplen) {
+ if (node == NULL) break;
+ replBufBlock *o = listNodeValue(node);
+ size_t thislen = o->used >= dumplen ? dumplen : o->used;
+ sds head = sdscatrepr(sdsempty(), o->buf+o->used-thislen, thislen);
+ sds tmp = sdscatsds(head, dump);
+ sdsfree(dump);
+ dump = tmp;
+ dumplen -= thislen;
+ node = listPrevNode(node);
+ }
+
+ /* Finally log such bytes: this is vital debugging info to
+ * understand what happened. */
+ serverLog(LL_NOTICE,"Latest backlog is: '%s'", dump);
+ sdsfree(dump);
+}
+
+/* This function is used in order to proxy what we receive from our master
+ * to our sub-slaves. */
+#include <ctype.h>
+void replicationFeedStreamFromMasterStream(char *buf, size_t buflen) {
+ /* Debugging: this is handy to see the stream sent from master
+ * to slaves. Disabled with if(0). */
+ if (0) {
+ printf("%zu:",buflen);
+ for (size_t j = 0; j < buflen; j++) {
+ printf("%c", isprint(buf[j]) ? buf[j] : '.');
+ }
+ printf("\n");
+ }
+
+ /* There must be replication backlog if having attached slaves. */
+ if (listLength(server.slaves)) serverAssert(server.repl_backlog != NULL);
+ if (server.repl_backlog) {
+ /* Must install write handler for all replicas first before feeding
+ * replication stream. */
+ prepareReplicasToWrite();
+ feedReplicationBuffer(buf,buflen);
+ }
+}
+
+void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc) {
+ /* Fast path to return if the monitors list is empty or the server is in loading. */
+ if (monitors == NULL || listLength(monitors) == 0 || server.loading) return;
+ listNode *ln;
+ listIter li;
+ int j;
+ sds cmdrepr = sdsnew("+");
+ robj *cmdobj;
+ struct timeval tv;
+
+ gettimeofday(&tv,NULL);
+ cmdrepr = sdscatprintf(cmdrepr,"%ld.%06ld ",(long)tv.tv_sec,(long)tv.tv_usec);
+ if (c->flags & CLIENT_SCRIPT) {
+ cmdrepr = sdscatprintf(cmdrepr,"[%d lua] ",dictid);
+ } else if (c->flags & CLIENT_UNIX_SOCKET) {
+ cmdrepr = sdscatprintf(cmdrepr,"[%d unix:%s] ",dictid,server.unixsocket);
+ } else {
+ cmdrepr = sdscatprintf(cmdrepr,"[%d %s] ",dictid,getClientPeerId(c));
+ }
+
+ for (j = 0; j < argc; j++) {
+ if (argv[j]->encoding == OBJ_ENCODING_INT) {
+ cmdrepr = sdscatprintf(cmdrepr, "\"%ld\"", (long)argv[j]->ptr);
+ } else {
+ cmdrepr = sdscatrepr(cmdrepr,(char*)argv[j]->ptr,
+ sdslen(argv[j]->ptr));
+ }
+ if (j != argc-1)
+ cmdrepr = sdscatlen(cmdrepr," ",1);
+ }
+ cmdrepr = sdscatlen(cmdrepr,"\r\n",2);
+ cmdobj = createObject(OBJ_STRING,cmdrepr);
+
+ listRewind(monitors,&li);
+ while((ln = listNext(&li))) {
+ client *monitor = ln->value;
+ addReply(monitor,cmdobj);
+ updateClientMemUsageAndBucket(monitor);
+ }
+ decrRefCount(cmdobj);
+}
+
+/* Feed the slave 'c' with the replication backlog starting from the
+ * specified 'offset' up to the end of the backlog. */
+long long addReplyReplicationBacklog(client *c, long long offset) {
+ long long skip;
+
+ serverLog(LL_DEBUG, "[PSYNC] Replica request offset: %lld", offset);
+
+ if (server.repl_backlog->histlen == 0) {
+ serverLog(LL_DEBUG, "[PSYNC] Backlog history len is zero");
+ return 0;
+ }
+
+ serverLog(LL_DEBUG, "[PSYNC] Backlog size: %lld",
+ server.repl_backlog_size);
+ serverLog(LL_DEBUG, "[PSYNC] First byte: %lld",
+ server.repl_backlog->offset);
+ serverLog(LL_DEBUG, "[PSYNC] History len: %lld",
+ server.repl_backlog->histlen);
+
+ /* Compute the amount of bytes we need to discard. */
+ skip = offset - server.repl_backlog->offset;
+ serverLog(LL_DEBUG, "[PSYNC] Skipping: %lld", skip);
+
+ /* Iterate recorded blocks, quickly search the approximate node. */
+ listNode *node = NULL;
+ if (raxSize(server.repl_backlog->blocks_index) > 0) {
+ uint64_t encoded_offset = htonu64(offset);
+ raxIterator ri;
+ raxStart(&ri, server.repl_backlog->blocks_index);
+ raxSeek(&ri, ">", (unsigned char*)&encoded_offset, sizeof(uint64_t));
+ if (raxEOF(&ri)) {
+ /* No found, so search from the last recorded node. */
+ raxSeek(&ri, "$", NULL, 0);
+ raxPrev(&ri);
+ node = (listNode *)ri.data;
+ } else {
+ raxPrev(&ri); /* Skip the sought node. */
+ /* We should search from the prev node since the offset of current
+ * sought node exceeds searching offset. */
+ if (raxPrev(&ri))
+ node = (listNode *)ri.data;
+ else
+ node = server.repl_backlog->ref_repl_buf_node;
+ }
+ raxStop(&ri);
+ } else {
+ /* No recorded blocks, just from the start node to search. */
+ node = server.repl_backlog->ref_repl_buf_node;
+ }
+
+ /* Search the exact node. */
+ while (node != NULL) {
+ replBufBlock *o = listNodeValue(node);
+ if (o->repl_offset + (long long)o->used >= offset) break;
+ node = listNextNode(node);
+ }
+ serverAssert(node != NULL);
+
+ /* Install a writer handler first.*/
+ prepareClientToWrite(c);
+ /* Setting output buffer of the replica. */
+ replBufBlock *o = listNodeValue(node);
+ o->refcount++;
+ c->ref_repl_buf_node = node;
+ c->ref_block_pos = offset - o->repl_offset;
+
+ return server.repl_backlog->histlen - skip;
+}
+
+/* Return the offset to provide as reply to the PSYNC command received
+ * from the slave. The returned value is only valid immediately after
+ * the BGSAVE process started and before executing any other command
+ * from clients. */
+long long getPsyncInitialOffset(void) {
+ return server.master_repl_offset;
+}
+
+/* Send a FULLRESYNC reply in the specific case of a full resynchronization,
+ * as a side effect setup the slave for a full sync in different ways:
+ *
+ * 1) Remember, into the slave client structure, the replication offset
+ * we sent here, so that if new slaves will later attach to the same
+ * background RDB saving process (by duplicating this client output
+ * buffer), we can get the right offset from this slave.
+ * 2) Set the replication state of the slave to WAIT_BGSAVE_END so that
+ * we start accumulating differences from this point.
+ * 3) Force the replication stream to re-emit a SELECT statement so
+ * the new slave incremental differences will start selecting the
+ * right database number.
+ *
+ * Normally this function should be called immediately after a successful
+ * BGSAVE for replication was started, or when there is one already in
+ * progress that we attached our slave to. */
+int replicationSetupSlaveForFullResync(client *slave, long long offset) {
+ char buf[128];
+ int buflen;
+
+ slave->psync_initial_offset = offset;
+ slave->replstate = SLAVE_STATE_WAIT_BGSAVE_END;
+ /* We are going to accumulate the incremental changes for this
+ * slave as well. Set slaveseldb to -1 in order to force to re-emit
+ * a SELECT statement in the replication stream. */
+ server.slaveseldb = -1;
+
+ /* Don't send this reply to slaves that approached us with
+ * the old SYNC command. */
+ if (!(slave->flags & CLIENT_PRE_PSYNC)) {
+ buflen = snprintf(buf,sizeof(buf),"+FULLRESYNC %s %lld\r\n",
+ server.replid,offset);
+ if (connWrite(slave->conn,buf,buflen) != buflen) {
+ freeClientAsync(slave);
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
+/* This function handles the PSYNC command from the point of view of a
+ * master receiving a request for partial resynchronization.
+ *
+ * On success return C_OK, otherwise C_ERR is returned and we proceed
+ * with the usual full resync. */
+int masterTryPartialResynchronization(client *c, long long psync_offset) {
+ long long psync_len;
+ char *master_replid = c->argv[1]->ptr;
+ char buf[128];
+ int buflen;
+
+ /* Is the replication ID of this master the same advertised by the wannabe
+ * slave via PSYNC? If the replication ID changed this master has a
+ * different replication history, and there is no way to continue.
+ *
+ * Note that there are two potentially valid replication IDs: the ID1
+ * and the ID2. The ID2 however is only valid up to a specific offset. */
+ if (strcasecmp(master_replid, server.replid) &&
+ (strcasecmp(master_replid, server.replid2) ||
+ psync_offset > server.second_replid_offset))
+ {
+ /* Replid "?" is used by slaves that want to force a full resync. */
+ if (master_replid[0] != '?') {
+ if (strcasecmp(master_replid, server.replid) &&
+ strcasecmp(master_replid, server.replid2))
+ {
+ serverLog(LL_NOTICE,"Partial resynchronization not accepted: "
+ "Replication ID mismatch (Replica asked for '%s', my "
+ "replication IDs are '%s' and '%s')",
+ master_replid, server.replid, server.replid2);
+ } else {
+ serverLog(LL_NOTICE,"Partial resynchronization not accepted: "
+ "Requested offset for second ID was %lld, but I can reply "
+ "up to %lld", psync_offset, server.second_replid_offset);
+ }
+ } else {
+ serverLog(LL_NOTICE,"Full resync requested by replica %s",
+ replicationGetSlaveName(c));
+ }
+ goto need_full_resync;
+ }
+
+ /* We still have the data our slave is asking for? */
+ if (!server.repl_backlog ||
+ psync_offset < server.repl_backlog->offset ||
+ psync_offset > (server.repl_backlog->offset + server.repl_backlog->histlen))
+ {
+ serverLog(LL_NOTICE,
+ "Unable to partial resync with replica %s for lack of backlog (Replica request was: %lld).", replicationGetSlaveName(c), psync_offset);
+ if (psync_offset > server.master_repl_offset) {
+ serverLog(LL_WARNING,
+ "Warning: replica %s tried to PSYNC with an offset that is greater than the master replication offset.", replicationGetSlaveName(c));
+ }
+ goto need_full_resync;
+ }
+
+ /* If we reached this point, we are able to perform a partial resync:
+ * 1) Set client state to make it a slave.
+ * 2) Inform the client we can continue with +CONTINUE
+ * 3) Send the backlog data (from the offset to the end) to the slave. */
+ c->flags |= CLIENT_SLAVE;
+ c->replstate = SLAVE_STATE_ONLINE;
+ c->repl_ack_time = server.unixtime;
+ c->repl_start_cmd_stream_on_ack = 0;
+ listAddNodeTail(server.slaves,c);
+ /* We can't use the connection buffers since they are used to accumulate
+ * new commands at this stage. But we are sure the socket send buffer is
+ * empty so this write will never fail actually. */
+ if (c->slave_capa & SLAVE_CAPA_PSYNC2) {
+ buflen = snprintf(buf,sizeof(buf),"+CONTINUE %s\r\n", server.replid);
+ } else {
+ buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n");
+ }
+ if (connWrite(c->conn,buf,buflen) != buflen) {
+ freeClientAsync(c);
+ return C_OK;
+ }
+ psync_len = addReplyReplicationBacklog(c,psync_offset);
+ serverLog(LL_NOTICE,
+ "Partial resynchronization request from %s accepted. Sending %lld bytes of backlog starting from offset %lld.",
+ replicationGetSlaveName(c),
+ psync_len, psync_offset);
+ /* Note that we don't need to set the selected DB at server.slaveseldb
+ * to -1 to force the master to emit SELECT, since the slave already
+ * has this state from the previous connection with the master. */
+
+ refreshGoodSlavesCount();
+
+ /* Fire the replica change modules event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_REPLICA_CHANGE,
+ REDISMODULE_SUBEVENT_REPLICA_CHANGE_ONLINE,
+ NULL);
+
+ return C_OK; /* The caller can return, no full resync needed. */
+
+need_full_resync:
+ /* We need a full resync for some reason... Note that we can't
+ * reply to PSYNC right now if a full SYNC is needed. The reply
+ * must include the master offset at the time the RDB file we transfer
+ * is generated, so we need to delay the reply to that moment. */
+ return C_ERR;
+}
+
+/* Start a BGSAVE for replication goals, which is, selecting the disk or
+ * socket target depending on the configuration, and making sure that
+ * the script cache is flushed before to start.
+ *
+ * The mincapa argument is the bitwise AND among all the slaves capabilities
+ * of the slaves waiting for this BGSAVE, so represents the slave capabilities
+ * all the slaves support. Can be tested via SLAVE_CAPA_* macros.
+ *
+ * Side effects, other than starting a BGSAVE:
+ *
+ * 1) Handle the slaves in WAIT_START state, by preparing them for a full
+ * sync if the BGSAVE was successfully started, or sending them an error
+ * and dropping them from the list of slaves.
+ *
+ * 2) Flush the Lua scripting script cache if the BGSAVE was actually
+ * started.
+ *
+ * Returns C_OK on success or C_ERR otherwise. */
+int startBgsaveForReplication(int mincapa, int req) {
+ int retval;
+ int socket_target = 0;
+ listIter li;
+ listNode *ln;
+
+ /* We use a socket target if slave can handle the EOF marker and we're configured to do diskless syncs.
+ * Note that in case we're creating a "filtered" RDB (functions-only, for example) we also force socket replication
+ * to avoid overwriting the snapshot RDB file with filtered data. */
+ socket_target = (server.repl_diskless_sync || req & SLAVE_REQ_RDB_MASK) && (mincapa & SLAVE_CAPA_EOF);
+ /* `SYNC` should have failed with error if we don't support socket and require a filter, assert this here */
+ serverAssert(socket_target || !(req & SLAVE_REQ_RDB_MASK));
+
+ serverLog(LL_NOTICE,"Starting BGSAVE for SYNC with target: %s",
+ socket_target ? "replicas sockets" : "disk");
+
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ /* Only do rdbSave* when rsiptr is not NULL,
+ * otherwise slave will miss repl-stream-db. */
+ if (rsiptr) {
+ if (socket_target)
+ retval = rdbSaveToSlavesSockets(req,rsiptr);
+ else {
+ /* Keep the page cache since it'll get used soon */
+ retval = rdbSaveBackground(req,server.rdb_filename,rsiptr,RDBFLAGS_KEEP_CACHE);
+ }
+ } else {
+ serverLog(LL_WARNING,"BGSAVE for replication: replication information not available, can't generate the RDB file right now. Try later.");
+ retval = C_ERR;
+ }
+
+ /* If we succeeded to start a BGSAVE with disk target, let's remember
+ * this fact, so that we can later delete the file if needed. Note
+ * that we don't set the flag to 1 if the feature is disabled, otherwise
+ * it would never be cleared: the file is not deleted. This way if
+ * the user enables it later with CONFIG SET, we are fine. */
+ if (retval == C_OK && !socket_target && server.rdb_del_sync_files)
+ RDBGeneratedByReplication = 1;
+
+ /* If we failed to BGSAVE, remove the slaves waiting for a full
+ * resynchronization from the list of slaves, inform them with
+ * an error about what happened, close the connection ASAP. */
+ if (retval == C_ERR) {
+ serverLog(LL_WARNING,"BGSAVE for replication failed");
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ slave->replstate = REPL_STATE_NONE;
+ slave->flags &= ~CLIENT_SLAVE;
+ listDelNode(server.slaves,ln);
+ addReplyError(slave,
+ "BGSAVE failed, replication can't continue");
+ slave->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ }
+ }
+ return retval;
+ }
+
+ /* If the target is socket, rdbSaveToSlavesSockets() already setup
+ * the slaves for a full resync. Otherwise for disk target do it now.*/
+ if (!socket_target) {
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ /* Check slave has the exact requirements */
+ if (slave->slave_req != req)
+ continue;
+ replicationSetupSlaveForFullResync(slave, getPsyncInitialOffset());
+ }
+ }
+ }
+
+ return retval;
+}
+
+/* SYNC and PSYNC command implementation. */
+void syncCommand(client *c) {
+ /* ignore SYNC if already slave or in monitor mode */
+ if (c->flags & CLIENT_SLAVE) return;
+
+ /* Check if this is a failover request to a replica with the same replid and
+ * become a master if so. */
+ if (c->argc > 3 && !strcasecmp(c->argv[0]->ptr,"psync") &&
+ !strcasecmp(c->argv[3]->ptr,"failover"))
+ {
+ serverLog(LL_NOTICE, "Failover request received for replid %s.",
+ (unsigned char *)c->argv[1]->ptr);
+ if (!server.masterhost) {
+ addReplyError(c, "PSYNC FAILOVER can't be sent to a master.");
+ return;
+ }
+
+ if (!strcasecmp(c->argv[1]->ptr,server.replid)) {
+ replicationUnsetMaster();
+ sds client = catClientInfoString(sdsempty(),c);
+ serverLog(LL_NOTICE,
+ "MASTER MODE enabled (failover request from '%s')",client);
+ sdsfree(client);
+ } else {
+ addReplyError(c, "PSYNC FAILOVER replid must match my replid.");
+ return;
+ }
+ }
+
+ /* Don't let replicas sync with us while we're failing over */
+ if (server.failover_state != NO_FAILOVER) {
+ addReplyError(c,"-NOMASTERLINK Can't SYNC while failing over");
+ return;
+ }
+
+ /* Refuse SYNC requests if we are a slave but the link with our master
+ * is not ok... */
+ if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED) {
+ addReplyError(c,"-NOMASTERLINK Can't SYNC while not connected with my master");
+ return;
+ }
+
+ /* SYNC can't be issued when the server has pending data to send to
+ * the client about already issued commands. We need a fresh reply
+ * buffer registering the differences between the BGSAVE and the current
+ * dataset, so that we can copy to other slaves if needed. */
+ if (clientHasPendingReplies(c)) {
+ addReplyError(c,"SYNC and PSYNC are invalid with pending output");
+ return;
+ }
+
+ /* Fail sync if slave doesn't support EOF capability but wants a filtered RDB. This is because we force filtered
+ * RDB's to be generated over a socket and not through a file to avoid conflicts with the snapshot files. Forcing
+ * use of a socket is handled, if needed, in `startBgsaveForReplication`. */
+ if (c->slave_req & SLAVE_REQ_RDB_MASK && !(c->slave_capa & SLAVE_CAPA_EOF)) {
+ addReplyError(c,"Filtered replica requires EOF capability");
+ return;
+ }
+
+ serverLog(LL_NOTICE,"Replica %s asks for synchronization",
+ replicationGetSlaveName(c));
+
+ /* Try a partial resynchronization if this is a PSYNC command.
+ * If it fails, we continue with usual full resynchronization, however
+ * when this happens replicationSetupSlaveForFullResync will replied
+ * with:
+ *
+ * +FULLRESYNC <replid> <offset>
+ *
+ * So the slave knows the new replid and offset to try a PSYNC later
+ * if the connection with the master is lost. */
+ if (!strcasecmp(c->argv[0]->ptr,"psync")) {
+ long long psync_offset;
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &psync_offset, NULL) != C_OK) {
+ serverLog(LL_WARNING, "Replica %s asks for synchronization but with a wrong offset",
+ replicationGetSlaveName(c));
+ return;
+ }
+
+ if (masterTryPartialResynchronization(c, psync_offset) == C_OK) {
+ server.stat_sync_partial_ok++;
+ return; /* No full resync needed, return. */
+ } else {
+ char *master_replid = c->argv[1]->ptr;
+
+ /* Increment stats for failed PSYNCs, but only if the
+ * replid is not "?", as this is used by slaves to force a full
+ * resync on purpose when they are not able to partially
+ * resync. */
+ if (master_replid[0] != '?') server.stat_sync_partial_err++;
+ }
+ } else {
+ /* If a slave uses SYNC, we are dealing with an old implementation
+ * of the replication protocol (like redis-cli --slave). Flag the client
+ * so that we don't expect to receive REPLCONF ACK feedbacks. */
+ c->flags |= CLIENT_PRE_PSYNC;
+ }
+
+ /* Full resynchronization. */
+ server.stat_sync_full++;
+
+ /* Setup the slave as one waiting for BGSAVE to start. The following code
+ * paths will change the state if we handle the slave differently. */
+ c->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
+ if (server.repl_disable_tcp_nodelay)
+ connDisableTcpNoDelay(c->conn); /* Non critical if it fails. */
+ c->repldbfd = -1;
+ c->flags |= CLIENT_SLAVE;
+ listAddNodeTail(server.slaves,c);
+
+ /* Create the replication backlog if needed. */
+ if (listLength(server.slaves) == 1 && server.repl_backlog == NULL) {
+ /* When we create the backlog from scratch, we always use a new
+ * replication ID and clear the ID2, since there is no valid
+ * past history. */
+ changeReplicationId();
+ clearReplicationId2();
+ createReplicationBacklog();
+ serverLog(LL_NOTICE,"Replication backlog created, my new "
+ "replication IDs are '%s' and '%s'",
+ server.replid, server.replid2);
+ }
+
+ /* CASE 1: BGSAVE is in progress, with disk target. */
+ if (server.child_type == CHILD_TYPE_RDB &&
+ server.rdb_child_type == RDB_CHILD_TYPE_DISK)
+ {
+ /* Ok a background save is in progress. Let's check if it is a good
+ * one for replication, i.e. if there is another slave that is
+ * registering differences since the server forked to save. */
+ client *slave;
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ slave = ln->value;
+ /* If the client needs a buffer of commands, we can't use
+ * a replica without replication buffer. */
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
+ (!(slave->flags & CLIENT_REPL_RDBONLY) ||
+ (c->flags & CLIENT_REPL_RDBONLY)))
+ break;
+ }
+ /* To attach this slave, we check that it has at least all the
+ * capabilities of the slave that triggered the current BGSAVE
+ * and its exact requirements. */
+ if (ln && ((c->slave_capa & slave->slave_capa) == slave->slave_capa) &&
+ c->slave_req == slave->slave_req) {
+ /* Perfect, the server is already registering differences for
+ * another slave. Set the right state, and copy the buffer.
+ * We don't copy buffer if clients don't want. */
+ if (!(c->flags & CLIENT_REPL_RDBONLY))
+ copyReplicaOutputBuffer(c,slave);
+ replicationSetupSlaveForFullResync(c,slave->psync_initial_offset);
+ serverLog(LL_NOTICE,"Waiting for end of BGSAVE for SYNC");
+ } else {
+ /* No way, we need to wait for the next BGSAVE in order to
+ * register differences. */
+ serverLog(LL_NOTICE,"Can't attach the replica to the current BGSAVE. Waiting for next BGSAVE for SYNC");
+ }
+
+ /* CASE 2: BGSAVE is in progress, with socket target. */
+ } else if (server.child_type == CHILD_TYPE_RDB &&
+ server.rdb_child_type == RDB_CHILD_TYPE_SOCKET)
+ {
+ /* There is an RDB child process but it is writing directly to
+ * children sockets. We need to wait for the next BGSAVE
+ * in order to synchronize. */
+ serverLog(LL_NOTICE,"Current BGSAVE has socket target. Waiting for next BGSAVE for SYNC");
+
+ /* CASE 3: There is no BGSAVE is in progress. */
+ } else {
+ if (server.repl_diskless_sync && (c->slave_capa & SLAVE_CAPA_EOF) &&
+ server.repl_diskless_sync_delay)
+ {
+ /* Diskless replication RDB child is created inside
+ * replicationCron() since we want to delay its start a
+ * few seconds to wait for more slaves to arrive. */
+ serverLog(LL_NOTICE,"Delay next BGSAVE for diskless SYNC");
+ } else {
+ /* We don't have a BGSAVE in progress, let's start one. Diskless
+ * or disk-based mode is determined by replica's capacity. */
+ if (!hasActiveChildProcess()) {
+ startBgsaveForReplication(c->slave_capa, c->slave_req);
+ } else {
+ serverLog(LL_NOTICE,
+ "No BGSAVE in progress, but another BG operation is active. "
+ "BGSAVE for replication delayed");
+ }
+ }
+ }
+ return;
+}
+
+/* REPLCONF <option> <value> <option> <value> ...
+ * This command is used by a replica in order to configure the replication
+ * process before starting it with the SYNC command.
+ * This command is also used by a master in order to get the replication
+ * offset from a replica.
+ *
+ * Currently we support these options:
+ *
+ * - listening-port <port>
+ * - ip-address <ip>
+ * What is the listening ip and port of the Replica redis instance, so that
+ * the master can accurately lists replicas and their listening ports in the
+ * INFO output.
+ *
+ * - capa <eof|psync2>
+ * What is the capabilities of this instance.
+ * eof: supports EOF-style RDB transfer for diskless replication.
+ * psync2: supports PSYNC v2, so understands +CONTINUE <new repl ID>.
+ *
+ * - ack <offset> [fack <aofofs>]
+ * Replica informs the master the amount of replication stream that it
+ * processed so far, and optionally the replication offset fsynced to the AOF file.
+ * This special pattern doesn't reply to the caller.
+ *
+ * - getack <dummy>
+ * Unlike other subcommands, this is used by master to get the replication
+ * offset from a replica.
+ *
+ * - rdb-only <0|1>
+ * Only wants RDB snapshot without replication buffer.
+ *
+ * - rdb-filter-only <include-filters>
+ * Define "include" filters for the RDB snapshot. Currently we only support
+ * a single include filter: "functions". Passing an empty string "" will
+ * result in an empty RDB. */
+void replconfCommand(client *c) {
+ int j;
+
+ if ((c->argc % 2) == 0) {
+ /* Number of arguments must be odd to make sure that every
+ * option has a corresponding value. */
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Process every option-value pair. */
+ for (j = 1; j < c->argc; j+=2) {
+ if (!strcasecmp(c->argv[j]->ptr,"listening-port")) {
+ long port;
+
+ if ((getLongFromObjectOrReply(c,c->argv[j+1],
+ &port,NULL) != C_OK))
+ return;
+ c->slave_listening_port = port;
+ } else if (!strcasecmp(c->argv[j]->ptr,"ip-address")) {
+ sds addr = c->argv[j+1]->ptr;
+ if (sdslen(addr) < NET_HOST_STR_LEN) {
+ if (c->slave_addr) sdsfree(c->slave_addr);
+ c->slave_addr = sdsdup(addr);
+ } else {
+ addReplyErrorFormat(c,"REPLCONF ip-address provided by "
+ "replica instance is too long: %zd bytes", sdslen(addr));
+ return;
+ }
+ } else if (!strcasecmp(c->argv[j]->ptr,"capa")) {
+ /* Ignore capabilities not understood by this master. */
+ if (!strcasecmp(c->argv[j+1]->ptr,"eof"))
+ c->slave_capa |= SLAVE_CAPA_EOF;
+ else if (!strcasecmp(c->argv[j+1]->ptr,"psync2"))
+ c->slave_capa |= SLAVE_CAPA_PSYNC2;
+ } else if (!strcasecmp(c->argv[j]->ptr,"ack")) {
+ /* REPLCONF ACK is used by slave to inform the master the amount
+ * of replication stream that it processed so far. It is an
+ * internal only command that normal clients should never use. */
+ long long offset;
+
+ if (!(c->flags & CLIENT_SLAVE)) return;
+ if ((getLongLongFromObject(c->argv[j+1], &offset) != C_OK))
+ return;
+ if (offset > c->repl_ack_off)
+ c->repl_ack_off = offset;
+ if (c->argc > j+3 && !strcasecmp(c->argv[j+2]->ptr,"fack")) {
+ if ((getLongLongFromObject(c->argv[j+3], &offset) != C_OK))
+ return;
+ if (offset > c->repl_aof_off)
+ c->repl_aof_off = offset;
+ }
+ c->repl_ack_time = server.unixtime;
+ /* If this was a diskless replication, we need to really put
+ * the slave online when the first ACK is received (which
+ * confirms slave is online and ready to get more data). This
+ * allows for simpler and less CPU intensive EOF detection
+ * when streaming RDB files.
+ * There's a chance the ACK got to us before we detected that the
+ * bgsave is done (since that depends on cron ticks), so run a
+ * quick check first (instead of waiting for the next ACK. */
+ if (server.child_type == CHILD_TYPE_RDB && c->replstate == SLAVE_STATE_WAIT_BGSAVE_END)
+ checkChildrenDone();
+ if (c->repl_start_cmd_stream_on_ack && c->replstate == SLAVE_STATE_ONLINE)
+ replicaStartCommandStream(c);
+ /* Note: this command does not reply anything! */
+ return;
+ } else if (!strcasecmp(c->argv[j]->ptr,"getack")) {
+ /* REPLCONF GETACK is used in order to request an ACK ASAP
+ * to the slave. */
+ if (server.masterhost && server.master) replicationSendAck();
+ return;
+ } else if (!strcasecmp(c->argv[j]->ptr,"rdb-only")) {
+ /* REPLCONF RDB-ONLY is used to identify the client only wants
+ * RDB snapshot without replication buffer. */
+ long rdb_only = 0;
+ if (getRangeLongFromObjectOrReply(c,c->argv[j+1],
+ 0,1,&rdb_only,NULL) != C_OK)
+ return;
+ if (rdb_only == 1) c->flags |= CLIENT_REPL_RDBONLY;
+ else c->flags &= ~CLIENT_REPL_RDBONLY;
+ } else if (!strcasecmp(c->argv[j]->ptr,"rdb-filter-only")) {
+ /* REPLCONFG RDB-FILTER-ONLY is used to define "include" filters
+ * for the RDB snapshot. Currently we only support a single
+ * include filter: "functions". In the future we may want to add
+ * other filters like key patterns, key types, non-volatile, module
+ * aux fields, ...
+ * We might want to add the complementing "RDB-FILTER-EXCLUDE" to
+ * filter out certain data. */
+ int filter_count, i;
+ sds *filters;
+ if (!(filters = sdssplitargs(c->argv[j+1]->ptr, &filter_count))) {
+ addReplyErrorFormat(c, "Missing rdb-filter-only values");
+ return;
+ }
+ /* By default filter out all parts of the rdb */
+ c->slave_req |= SLAVE_REQ_RDB_EXCLUDE_DATA;
+ c->slave_req |= SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS;
+ for (i = 0; i < filter_count; i++) {
+ if (!strcasecmp(filters[i], "functions"))
+ c->slave_req &= ~SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS;
+ else {
+ addReplyErrorFormat(c, "Unsupported rdb-filter-only option: %s", (char*)filters[i]);
+ sdsfreesplitres(filters, filter_count);
+ return;
+ }
+ }
+ sdsfreesplitres(filters, filter_count);
+ } else {
+ addReplyErrorFormat(c,"Unrecognized REPLCONF option: %s",
+ (char*)c->argv[j]->ptr);
+ return;
+ }
+ }
+ addReply(c,shared.ok);
+}
+
+/* This function puts a replica in the online state, and should be called just
+ * after a replica received the RDB file for the initial synchronization.
+ *
+ * It does a few things:
+ * 1) Put the slave in ONLINE state.
+ * 2) Update the count of "good replicas".
+ * 3) Trigger the module event.
+ *
+ * the return value indicates that the replica should be disconnected.
+ * */
+int replicaPutOnline(client *slave) {
+ if (slave->flags & CLIENT_REPL_RDBONLY) {
+ slave->replstate = SLAVE_STATE_RDB_TRANSMITTED;
+ /* The client asked for RDB only so we should close it ASAP */
+ serverLog(LL_NOTICE,
+ "RDB transfer completed, rdb only replica (%s) should be disconnected asap",
+ replicationGetSlaveName(slave));
+ return 0;
+ }
+ slave->replstate = SLAVE_STATE_ONLINE;
+ slave->repl_ack_time = server.unixtime; /* Prevent false timeout. */
+
+ refreshGoodSlavesCount();
+ /* Fire the replica change modules event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_REPLICA_CHANGE,
+ REDISMODULE_SUBEVENT_REPLICA_CHANGE_ONLINE,
+ NULL);
+ serverLog(LL_NOTICE,"Synchronization with replica %s succeeded",
+ replicationGetSlaveName(slave));
+ return 1;
+}
+
+/* This function should be called just after a replica received the RDB file
+ * for the initial synchronization, and we are finally ready to send the
+ * incremental stream of commands.
+ *
+ * It does a few things:
+ * 1) Close the replica's connection async if it doesn't need replication
+ * commands buffer stream, since it actually isn't a valid replica.
+ * 2) Make sure the writable event is re-installed, since when calling the SYNC
+ * command we had no replies and it was disabled, and then we could
+ * accumulate output buffer data without sending it to the replica so it
+ * won't get mixed with the RDB stream. */
+void replicaStartCommandStream(client *slave) {
+ serverAssert(!(slave->flags & CLIENT_REPL_RDBONLY));
+ slave->repl_start_cmd_stream_on_ack = 0;
+
+ putClientInPendingWriteQueue(slave);
+}
+
+/* We call this function periodically to remove an RDB file that was
+ * generated because of replication, in an instance that is otherwise
+ * without any persistence. We don't want instances without persistence
+ * to take RDB files around, this violates certain policies in certain
+ * environments. */
+void removeRDBUsedToSyncReplicas(void) {
+ /* If the feature is disabled, return ASAP but also clear the
+ * RDBGeneratedByReplication flag in case it was set. Otherwise if the
+ * feature was enabled, but gets disabled later with CONFIG SET, the
+ * flag may remain set to one: then next time the feature is re-enabled
+ * via CONFIG SET we have it set even if no RDB was generated
+ * because of replication recently. */
+ if (!server.rdb_del_sync_files) {
+ RDBGeneratedByReplication = 0;
+ return;
+ }
+
+ if (allPersistenceDisabled() && RDBGeneratedByReplication) {
+ client *slave;
+ listNode *ln;
+ listIter li;
+
+ int delrdb = 1;
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ slave = ln->value;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START ||
+ slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END ||
+ slave->replstate == SLAVE_STATE_SEND_BULK)
+ {
+ delrdb = 0;
+ break; /* No need to check the other replicas. */
+ }
+ }
+ if (delrdb) {
+ struct stat sb;
+ if (lstat(server.rdb_filename,&sb) != -1) {
+ RDBGeneratedByReplication = 0;
+ serverLog(LL_NOTICE,
+ "Removing the RDB file used to feed replicas "
+ "in a persistence-less instance");
+ bg_unlink(server.rdb_filename);
+ }
+ }
+ }
+}
+
+/* Close the repldbfd and reclaim the page cache if the client hold
+ * the last reference to replication DB */
+void closeRepldbfd(client *myself) {
+ listNode *ln;
+ listIter li;
+ int reclaim = 1;
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ if (slave != myself && slave->replstate == SLAVE_STATE_SEND_BULK) {
+ reclaim = 0;
+ break;
+ }
+ }
+
+ if (reclaim) {
+ bioCreateCloseJob(myself->repldbfd, 0, 1);
+ } else {
+ close(myself->repldbfd);
+ }
+ myself->repldbfd = -1;
+}
+
+void sendBulkToSlave(connection *conn) {
+ client *slave = connGetPrivateData(conn);
+ char buf[PROTO_IOBUF_LEN];
+ ssize_t nwritten, buflen;
+
+ /* Before sending the RDB file, we send the preamble as configured by the
+ * replication process. Currently the preamble is just the bulk count of
+ * the file in the form "$<length>\r\n". */
+ if (slave->replpreamble) {
+ nwritten = connWrite(conn,slave->replpreamble,sdslen(slave->replpreamble));
+ if (nwritten == -1) {
+ serverLog(LL_WARNING,
+ "Write error sending RDB preamble to replica: %s",
+ connGetLastError(conn));
+ freeClient(slave);
+ return;
+ }
+ atomicIncr(server.stat_net_repl_output_bytes, nwritten);
+ sdsrange(slave->replpreamble,nwritten,-1);
+ if (sdslen(slave->replpreamble) == 0) {
+ sdsfree(slave->replpreamble);
+ slave->replpreamble = NULL;
+ /* fall through sending data. */
+ } else {
+ return;
+ }
+ }
+
+ /* If the preamble was already transferred, send the RDB bulk data. */
+ lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
+ buflen = read(slave->repldbfd,buf,PROTO_IOBUF_LEN);
+ if (buflen <= 0) {
+ serverLog(LL_WARNING,"Read error sending DB to replica: %s",
+ (buflen == 0) ? "premature EOF" : strerror(errno));
+ freeClient(slave);
+ return;
+ }
+ if ((nwritten = connWrite(conn,buf,buflen)) == -1) {
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_WARNING,"Write error sending DB to replica: %s",
+ connGetLastError(conn));
+ freeClient(slave);
+ }
+ return;
+ }
+ slave->repldboff += nwritten;
+ atomicIncr(server.stat_net_repl_output_bytes, nwritten);
+ if (slave->repldboff == slave->repldbsize) {
+ closeRepldbfd(slave);
+ connSetWriteHandler(slave->conn,NULL);
+ if (!replicaPutOnline(slave)) {
+ freeClient(slave);
+ return;
+ }
+ replicaStartCommandStream(slave);
+ }
+}
+
+/* Remove one write handler from the list of connections waiting to be writable
+ * during rdb pipe transfer. */
+void rdbPipeWriteHandlerConnRemoved(struct connection *conn) {
+ if (!connHasWriteHandler(conn))
+ return;
+ connSetWriteHandler(conn, NULL);
+ client *slave = connGetPrivateData(conn);
+ slave->repl_last_partial_write = 0;
+ server.rdb_pipe_numconns_writing--;
+ /* if there are no more writes for now for this conn, or write error: */
+ if (server.rdb_pipe_numconns_writing == 0) {
+ if (aeCreateFileEvent(server.el, server.rdb_pipe_read, AE_READABLE, rdbPipeReadHandler,NULL) == AE_ERR) {
+ serverPanic("Unrecoverable error creating server.rdb_pipe_read file event.");
+ }
+ }
+}
+
+/* Called in diskless master during transfer of data from the rdb pipe, when
+ * the replica becomes writable again. */
+void rdbPipeWriteHandler(struct connection *conn) {
+ serverAssert(server.rdb_pipe_bufflen>0);
+ client *slave = connGetPrivateData(conn);
+ ssize_t nwritten;
+ if ((nwritten = connWrite(conn, server.rdb_pipe_buff + slave->repldboff,
+ server.rdb_pipe_bufflen - slave->repldboff)) == -1)
+ {
+ if (connGetState(conn) == CONN_STATE_CONNECTED)
+ return; /* equivalent to EAGAIN */
+ serverLog(LL_WARNING,"Write error sending DB to replica: %s",
+ connGetLastError(conn));
+ freeClient(slave);
+ return;
+ } else {
+ slave->repldboff += nwritten;
+ atomicIncr(server.stat_net_repl_output_bytes, nwritten);
+ if (slave->repldboff < server.rdb_pipe_bufflen) {
+ slave->repl_last_partial_write = server.unixtime;
+ return; /* more data to write.. */
+ }
+ }
+ rdbPipeWriteHandlerConnRemoved(conn);
+}
+
+/* Called in diskless master, when there's data to read from the child's rdb pipe */
+void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask) {
+ UNUSED(mask);
+ UNUSED(clientData);
+ UNUSED(eventLoop);
+ int i;
+ if (!server.rdb_pipe_buff)
+ server.rdb_pipe_buff = zmalloc(PROTO_IOBUF_LEN);
+ serverAssert(server.rdb_pipe_numconns_writing==0);
+
+ while (1) {
+ server.rdb_pipe_bufflen = read(fd, server.rdb_pipe_buff, PROTO_IOBUF_LEN);
+ if (server.rdb_pipe_bufflen < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ return;
+ serverLog(LL_WARNING,"Diskless rdb transfer, read error sending DB to replicas: %s", strerror(errno));
+ for (i=0; i < server.rdb_pipe_numconns; i++) {
+ connection *conn = server.rdb_pipe_conns[i];
+ if (!conn)
+ continue;
+ client *slave = connGetPrivateData(conn);
+ freeClient(slave);
+ server.rdb_pipe_conns[i] = NULL;
+ }
+ killRDBChild();
+ return;
+ }
+
+ if (server.rdb_pipe_bufflen == 0) {
+ /* EOF - write end was closed. */
+ int stillUp = 0;
+ aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE);
+ for (i=0; i < server.rdb_pipe_numconns; i++)
+ {
+ connection *conn = server.rdb_pipe_conns[i];
+ if (!conn)
+ continue;
+ stillUp++;
+ }
+ serverLog(LL_NOTICE,"Diskless rdb transfer, done reading from pipe, %d replicas still up.", stillUp);
+ /* Now that the replicas have finished reading, notify the child that it's safe to exit.
+ * When the server detects the child has exited, it can mark the replica as online, and
+ * start streaming the replication buffers. */
+ close(server.rdb_child_exit_pipe);
+ server.rdb_child_exit_pipe = -1;
+ return;
+ }
+
+ int stillAlive = 0;
+ for (i=0; i < server.rdb_pipe_numconns; i++)
+ {
+ ssize_t nwritten;
+ connection *conn = server.rdb_pipe_conns[i];
+ if (!conn)
+ continue;
+
+ client *slave = connGetPrivateData(conn);
+ if ((nwritten = connWrite(conn, server.rdb_pipe_buff, server.rdb_pipe_bufflen)) == -1) {
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_WARNING,"Diskless rdb transfer, write error sending DB to replica: %s",
+ connGetLastError(conn));
+ freeClient(slave);
+ server.rdb_pipe_conns[i] = NULL;
+ continue;
+ }
+ /* An error and still in connected state, is equivalent to EAGAIN */
+ slave->repldboff = 0;
+ } else {
+ /* Note: when use diskless replication, 'repldboff' is the offset
+ * of 'rdb_pipe_buff' sent rather than the offset of entire RDB. */
+ slave->repldboff = nwritten;
+ atomicIncr(server.stat_net_repl_output_bytes, nwritten);
+ }
+ /* If we were unable to write all the data to one of the replicas,
+ * setup write handler (and disable pipe read handler, below) */
+ if (nwritten != server.rdb_pipe_bufflen) {
+ slave->repl_last_partial_write = server.unixtime;
+ server.rdb_pipe_numconns_writing++;
+ connSetWriteHandler(conn, rdbPipeWriteHandler);
+ }
+ stillAlive++;
+ }
+
+ if (stillAlive == 0) {
+ serverLog(LL_WARNING,"Diskless rdb transfer, last replica dropped, killing fork child.");
+ killRDBChild();
+ }
+ /* Remove the pipe read handler if at least one write handler was set. */
+ if (server.rdb_pipe_numconns_writing || stillAlive == 0) {
+ aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE);
+ break;
+ }
+ }
+}
+
+/* This function is called at the end of every background saving.
+ *
+ * The argument bgsaveerr is C_OK if the background saving succeeded
+ * otherwise C_ERR is passed to the function.
+ * The 'type' argument is the type of the child that terminated
+ * (if it had a disk or socket target). */
+void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
+ listNode *ln;
+ listIter li;
+
+ /* Note: there's a chance we got here from within the REPLCONF ACK command
+ * so we must avoid using freeClient, otherwise we'll crash on our way up. */
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
+ struct redis_stat buf;
+
+ if (bgsaveerr != C_OK) {
+ freeClientAsync(slave);
+ serverLog(LL_WARNING,"SYNC failed. BGSAVE child returned an error");
+ continue;
+ }
+
+ /* If this was an RDB on disk save, we have to prepare to send
+ * the RDB from disk to the slave socket. Otherwise if this was
+ * already an RDB -> Slaves socket transfer, used in the case of
+ * diskless replication, our work is trivial, we can just put
+ * the slave online. */
+ if (type == RDB_CHILD_TYPE_SOCKET) {
+ serverLog(LL_NOTICE,
+ "Streamed RDB transfer with replica %s succeeded (socket). Waiting for REPLCONF ACK from replica to enable streaming",
+ replicationGetSlaveName(slave));
+ /* Note: we wait for a REPLCONF ACK message from the replica in
+ * order to really put it online (install the write handler
+ * so that the accumulated data can be transferred). However
+ * we change the replication state ASAP, since our slave
+ * is technically online now.
+ *
+ * So things work like that:
+ *
+ * 1. We end transferring the RDB file via socket.
+ * 2. The replica is put ONLINE but the write handler
+ * is not installed.
+ * 3. The replica however goes really online, and pings us
+ * back via REPLCONF ACK commands.
+ * 4. Now we finally install the write handler, and send
+ * the buffers accumulated so far to the replica.
+ *
+ * But why we do that? Because the replica, when we stream
+ * the RDB directly via the socket, must detect the RDB
+ * EOF (end of file), that is a special random string at the
+ * end of the RDB (for streamed RDBs we don't know the length
+ * in advance). Detecting such final EOF string is much
+ * simpler and less CPU intensive if no more data is sent
+ * after such final EOF. So we don't want to glue the end of
+ * the RDB transfer with the start of the other replication
+ * data. */
+ if (!replicaPutOnline(slave)) {
+ freeClientAsync(slave);
+ continue;
+ }
+ slave->repl_start_cmd_stream_on_ack = 1;
+ } else {
+ if ((slave->repldbfd = open(server.rdb_filename,O_RDONLY)) == -1 ||
+ redis_fstat(slave->repldbfd,&buf) == -1) {
+ freeClientAsync(slave);
+ serverLog(LL_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
+ continue;
+ }
+ slave->repldboff = 0;
+ slave->repldbsize = buf.st_size;
+ slave->replstate = SLAVE_STATE_SEND_BULK;
+ slave->replpreamble = sdscatprintf(sdsempty(),"$%lld\r\n",
+ (unsigned long long) slave->repldbsize);
+
+ connSetWriteHandler(slave->conn,NULL);
+ if (connSetWriteHandler(slave->conn,sendBulkToSlave) == C_ERR) {
+ freeClientAsync(slave);
+ continue;
+ }
+ }
+ }
+ }
+}
+
+/* Change the current instance replication ID with a new, random one.
+ * This will prevent successful PSYNCs between this master and other
+ * slaves, so the command should be called when something happens that
+ * alters the current story of the dataset. */
+void changeReplicationId(void) {
+ getRandomHexChars(server.replid,CONFIG_RUN_ID_SIZE);
+ server.replid[CONFIG_RUN_ID_SIZE] = '\0';
+}
+
+/* Clear (invalidate) the secondary replication ID. This happens, for
+ * example, after a full resynchronization, when we start a new replication
+ * history. */
+void clearReplicationId2(void) {
+ memset(server.replid2,'0',sizeof(server.replid));
+ server.replid2[CONFIG_RUN_ID_SIZE] = '\0';
+ server.second_replid_offset = -1;
+}
+
+/* Use the current replication ID / offset as secondary replication
+ * ID, and change the current one in order to start a new history.
+ * This should be used when an instance is switched from slave to master
+ * so that it can serve PSYNC requests performed using the master
+ * replication ID. */
+void shiftReplicationId(void) {
+ memcpy(server.replid2,server.replid,sizeof(server.replid));
+ /* We set the second replid offset to the master offset + 1, since
+ * the slave will ask for the first byte it has not yet received, so
+ * we need to add one to the offset: for example if, as a slave, we are
+ * sure we have the same history as the master for 50 bytes, after we
+ * are turned into a master, we can accept a PSYNC request with offset
+ * 51, since the slave asking has the same history up to the 50th
+ * byte, and is asking for the new bytes starting at offset 51. */
+ server.second_replid_offset = server.master_repl_offset+1;
+ changeReplicationId();
+ serverLog(LL_NOTICE,"Setting secondary replication ID to %s, valid up to offset: %lld. New replication ID is %s", server.replid2, server.second_replid_offset, server.replid);
+}
+
+/* ----------------------------------- SLAVE -------------------------------- */
+
+/* Returns 1 if the given replication state is a handshake state,
+ * 0 otherwise. */
+int slaveIsInHandshakeState(void) {
+ return server.repl_state >= REPL_STATE_RECEIVE_PING_REPLY &&
+ server.repl_state <= REPL_STATE_RECEIVE_PSYNC_REPLY;
+}
+
+/* Avoid the master to detect the slave is timing out while loading the
+ * RDB file in initial synchronization. We send a single newline character
+ * that is valid protocol but is guaranteed to either be sent entirely or
+ * not, since the byte is indivisible.
+ *
+ * The function is called in two contexts: while we flush the current
+ * data with emptyDb(), and while we load the new data received as an
+ * RDB file from the master. */
+void replicationSendNewlineToMaster(void) {
+ static time_t newline_sent;
+ if (time(NULL) != newline_sent) {
+ newline_sent = time(NULL);
+ /* Pinging back in this stage is best-effort. */
+ if (server.repl_transfer_s) connWrite(server.repl_transfer_s, "\n", 1);
+ }
+}
+
+/* Callback used by emptyDb() while flushing away old data to load
+ * the new dataset received by the master and by discardTempDb()
+ * after loading succeeded or failed. */
+void replicationEmptyDbCallback(dict *d) {
+ UNUSED(d);
+ if (server.repl_state == REPL_STATE_TRANSFER)
+ replicationSendNewlineToMaster();
+}
+
+/* Once we have a link with the master and the synchronization was
+ * performed, this function materializes the master client we store
+ * at server.master, starting from the specified file descriptor. */
+void replicationCreateMasterClient(connection *conn, int dbid) {
+ server.master = createClient(conn);
+ if (conn)
+ connSetReadHandler(server.master->conn, readQueryFromClient);
+
+ /**
+ * Important note:
+ * The CLIENT_DENY_BLOCKING flag is not, and should not, be set here.
+ * For commands like BLPOP, it makes no sense to block the master
+ * connection, and such blocking attempt will probably cause deadlock and
+ * break the replication. We consider such a thing as a bug because
+ * commands as BLPOP should never be sent on the replication link.
+ * A possible use-case for blocking the replication link is if a module wants
+ * to pass the execution to a background thread and unblock after the
+ * execution is done. This is the reason why we allow blocking the replication
+ * connection. */
+ server.master->flags |= CLIENT_MASTER;
+
+ server.master->authenticated = 1;
+ server.master->reploff = server.master_initial_offset;
+ server.master->read_reploff = server.master->reploff;
+ server.master->user = NULL; /* This client can do everything. */
+ memcpy(server.master->replid, server.master_replid,
+ sizeof(server.master_replid));
+ /* If master offset is set to -1, this master is old and is not
+ * PSYNC capable, so we flag it accordingly. */
+ if (server.master->reploff == -1)
+ server.master->flags |= CLIENT_PRE_PSYNC;
+ if (dbid != -1) selectDb(server.master,dbid);
+}
+
+/* This function will try to re-enable the AOF file after the
+ * master-replica synchronization: if it fails after multiple attempts
+ * the replica cannot be considered reliable and exists with an
+ * error. */
+void restartAOFAfterSYNC(void) {
+ unsigned int tries, max_tries = 10;
+ for (tries = 0; tries < max_tries; ++tries) {
+ if (startAppendOnly() == C_OK) break;
+ serverLog(LL_WARNING,
+ "Failed enabling the AOF after successful master synchronization! "
+ "Trying it again in one second.");
+ sleep(1);
+ }
+ if (tries == max_tries) {
+ serverLog(LL_WARNING,
+ "FATAL: this replica instance finished the synchronization with "
+ "its master, but the AOF can't be turned on. Exiting now.");
+ exit(1);
+ }
+}
+
+static int useDisklessLoad(void) {
+ /* compute boolean decision to use diskless load */
+ int enabled = server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB ||
+ (server.repl_diskless_load == REPL_DISKLESS_LOAD_WHEN_DB_EMPTY && dbTotalServerKeyCount()==0);
+
+ if (enabled) {
+ /* Check all modules handle read errors, otherwise it's not safe to use diskless load. */
+ if (!moduleAllDatatypesHandleErrors()) {
+ serverLog(LL_NOTICE,
+ "Skipping diskless-load because there are modules that don't handle read errors.");
+ enabled = 0;
+ }
+ /* Check all modules handle async replication, otherwise it's not safe to use diskless load. */
+ else if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB && !moduleAllModulesHandleReplAsyncLoad()) {
+ serverLog(LL_NOTICE,
+ "Skipping diskless-load because there are modules that are not aware of async replication.");
+ enabled = 0;
+ }
+ }
+ return enabled;
+}
+
+/* Helper function for readSyncBulkPayload() to initialize tempDb
+ * before socket-loading the new db from master. The tempDb may be populated
+ * by swapMainDbWithTempDb or freed by disklessLoadDiscardTempDb later. */
+redisDb *disklessLoadInitTempDb(void) {
+ return initTempDb();
+}
+
+/* Helper function for readSyncBulkPayload() to discard our tempDb
+ * when the loading succeeded or failed. */
+void disklessLoadDiscardTempDb(redisDb *tempDb) {
+ discardTempDb(tempDb, replicationEmptyDbCallback);
+}
+
+/* If we know we got an entirely different data set from our master
+ * we have no way to incrementally feed our replicas after that.
+ * We want our replicas to resync with us as well, if we have any sub-replicas.
+ * This is useful on readSyncBulkPayload in places where we just finished transferring db. */
+void replicationAttachToNewMaster(void) {
+ /* Replica starts to apply data from new master, we must discard the cached
+ * master structure. */
+ serverAssert(server.master == NULL);
+ replicationDiscardCachedMaster();
+
+ disconnectSlaves(); /* Force our replicas to resync with us as well. */
+ freeReplicationBacklog(); /* Don't allow our chained replicas to PSYNC. */
+}
+
+/* Asynchronously read the SYNC payload we receive from a master */
+#define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */
+void readSyncBulkPayload(connection *conn) {
+ char buf[PROTO_IOBUF_LEN];
+ ssize_t nread, readlen, nwritten;
+ int use_diskless_load = useDisklessLoad();
+ redisDb *diskless_load_tempDb = NULL;
+ functionsLibCtx* temp_functions_lib_ctx = NULL;
+ int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC :
+ EMPTYDB_NO_FLAGS;
+ off_t left;
+
+ /* Static vars used to hold the EOF mark, and the last bytes received
+ * from the server: when they match, we reached the end of the transfer. */
+ static char eofmark[CONFIG_RUN_ID_SIZE];
+ static char lastbytes[CONFIG_RUN_ID_SIZE];
+ static int usemark = 0;
+
+ /* If repl_transfer_size == -1 we still have to read the bulk length
+ * from the master reply. */
+ if (server.repl_transfer_size == -1) {
+ nread = connSyncReadLine(conn,buf,1024,server.repl_syncio_timeout*1000);
+ if (nread == -1) {
+ serverLog(LL_WARNING,
+ "I/O error reading bulk count from MASTER: %s",
+ connGetLastError(conn));
+ goto error;
+ } else {
+ /* nread here is returned by connSyncReadLine(), which calls syncReadLine() and
+ * convert "\r\n" to '\0' so 1 byte is lost. */
+ atomicIncr(server.stat_net_repl_input_bytes, nread+1);
+ }
+
+ if (buf[0] == '-') {
+ serverLog(LL_WARNING,
+ "MASTER aborted replication with an error: %s",
+ buf+1);
+ goto error;
+ } else if (buf[0] == '\0') {
+ /* At this stage just a newline works as a PING in order to take
+ * the connection live. So we refresh our last interaction
+ * timestamp. */
+ server.repl_transfer_lastio = server.unixtime;
+ return;
+ } else if (buf[0] != '$') {
+ serverLog(LL_WARNING,"Bad protocol from MASTER, the first byte is not '$' (we received '%s'), are you sure the host and port are right?", buf);
+ goto error;
+ }
+
+ /* There are two possible forms for the bulk payload. One is the
+ * usual $<count> bulk format. The other is used for diskless transfers
+ * when the master does not know beforehand the size of the file to
+ * transfer. In the latter case, the following format is used:
+ *
+ * $EOF:<40 bytes delimiter>
+ *
+ * At the end of the file the announced delimiter is transmitted. The
+ * delimiter is long and random enough that the probability of a
+ * collision with the actual file content can be ignored. */
+ if (strncmp(buf+1,"EOF:",4) == 0 && strlen(buf+5) >= CONFIG_RUN_ID_SIZE) {
+ usemark = 1;
+ memcpy(eofmark,buf+5,CONFIG_RUN_ID_SIZE);
+ memset(lastbytes,0,CONFIG_RUN_ID_SIZE);
+ /* Set any repl_transfer_size to avoid entering this code path
+ * at the next call. */
+ server.repl_transfer_size = 0;
+ serverLog(LL_NOTICE,
+ "MASTER <-> REPLICA sync: receiving streamed RDB from master with EOF %s",
+ use_diskless_load? "to parser":"to disk");
+ } else {
+ usemark = 0;
+ server.repl_transfer_size = strtol(buf+1,NULL,10);
+ serverLog(LL_NOTICE,
+ "MASTER <-> REPLICA sync: receiving %lld bytes from master %s",
+ (long long) server.repl_transfer_size,
+ use_diskless_load? "to parser":"to disk");
+ }
+ return;
+ }
+
+ if (!use_diskless_load) {
+ /* Read the data from the socket, store it to a file and search
+ * for the EOF. */
+ if (usemark) {
+ readlen = sizeof(buf);
+ } else {
+ left = server.repl_transfer_size - server.repl_transfer_read;
+ readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf);
+ }
+
+ nread = connRead(conn,buf,readlen);
+ if (nread <= 0) {
+ if (connGetState(conn) == CONN_STATE_CONNECTED) {
+ /* equivalent to EAGAIN */
+ return;
+ }
+ serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s",
+ (nread == -1) ? connGetLastError(conn) : "connection lost");
+ cancelReplicationHandshake(1);
+ return;
+ }
+ atomicIncr(server.stat_net_repl_input_bytes, nread);
+
+ /* When a mark is used, we want to detect EOF asap in order to avoid
+ * writing the EOF mark into the file... */
+ int eof_reached = 0;
+
+ if (usemark) {
+ /* Update the last bytes array, and check if it matches our
+ * delimiter. */
+ if (nread >= CONFIG_RUN_ID_SIZE) {
+ memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,
+ CONFIG_RUN_ID_SIZE);
+ } else {
+ int rem = CONFIG_RUN_ID_SIZE-nread;
+ memmove(lastbytes,lastbytes+nread,rem);
+ memcpy(lastbytes+rem,buf,nread);
+ }
+ if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0)
+ eof_reached = 1;
+ }
+
+ /* Update the last I/O time for the replication transfer (used in
+ * order to detect timeouts during replication), and write what we
+ * got from the socket to the dump file on disk. */
+ server.repl_transfer_lastio = server.unixtime;
+ if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) {
+ serverLog(LL_WARNING,
+ "Write error or short write writing to the DB dump file "
+ "needed for MASTER <-> REPLICA synchronization: %s",
+ (nwritten == -1) ? strerror(errno) : "short write");
+ goto error;
+ }
+ server.repl_transfer_read += nread;
+
+ /* Delete the last 40 bytes from the file if we reached EOF. */
+ if (usemark && eof_reached) {
+ if (ftruncate(server.repl_transfer_fd,
+ server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1)
+ {
+ serverLog(LL_WARNING,
+ "Error truncating the RDB file received from the master "
+ "for SYNC: %s", strerror(errno));
+ goto error;
+ }
+ }
+
+ /* Sync data on disk from time to time, otherwise at the end of the
+ * transfer we may suffer a big delay as the memory buffers are copied
+ * into the actual disk. */
+ if (server.repl_transfer_read >=
+ server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC)
+ {
+ off_t sync_size = server.repl_transfer_read -
+ server.repl_transfer_last_fsync_off;
+ rdb_fsync_range(server.repl_transfer_fd,
+ server.repl_transfer_last_fsync_off, sync_size);
+ server.repl_transfer_last_fsync_off += sync_size;
+ }
+
+ /* Check if the transfer is now complete */
+ if (!usemark) {
+ if (server.repl_transfer_read == server.repl_transfer_size)
+ eof_reached = 1;
+ }
+
+ /* If the transfer is yet not complete, we need to read more, so
+ * return ASAP and wait for the handler to be called again. */
+ if (!eof_reached) return;
+ }
+
+ /* We reach this point in one of the following cases:
+ *
+ * 1. The replica is using diskless replication, that is, it reads data
+ * directly from the socket to the Redis memory, without using
+ * a temporary RDB file on disk. In that case we just block and
+ * read everything from the socket.
+ *
+ * 2. Or when we are done reading from the socket to the RDB file, in
+ * such case we want just to read the RDB file in memory. */
+
+ /* We need to stop any AOF rewriting child before flushing and parsing
+ * the RDB, otherwise we'll create a copy-on-write disaster. */
+ if (server.aof_state != AOF_OFF) stopAppendOnly();
+ /* Also try to stop save RDB child before flushing and parsing the RDB:
+ * 1. Ensure background save doesn't overwrite synced data after being loaded.
+ * 2. Avoid copy-on-write disaster. */
+ if (server.child_type == CHILD_TYPE_RDB) {
+ if (!use_diskless_load) {
+ serverLog(LL_NOTICE,
+ "Replica is about to load the RDB file received from the "
+ "master, but there is a pending RDB child running. "
+ "Killing process %ld and removing its temp file to avoid "
+ "any race",
+ (long) server.child_pid);
+ }
+ killRDBChild();
+ }
+
+ if (use_diskless_load && server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
+ /* Initialize empty tempDb dictionaries. */
+ diskless_load_tempDb = disklessLoadInitTempDb();
+ temp_functions_lib_ctx = functionsLibCtxCreate();
+
+ moduleFireServerEvent(REDISMODULE_EVENT_REPL_ASYNC_LOAD,
+ REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_STARTED,
+ NULL);
+ } else {
+ replicationAttachToNewMaster();
+
+ serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data");
+ emptyData(-1,empty_db_flags,replicationEmptyDbCallback);
+ }
+
+ /* Before loading the DB into memory we need to delete the readable
+ * handler, otherwise it will get called recursively since
+ * rdbLoad() will call the event loop to process events from time to
+ * time for non blocking loading. */
+ connSetReadHandler(conn, NULL);
+
+ serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Loading DB in memory");
+ rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+ if (use_diskless_load) {
+ rio rdb;
+ redisDb *dbarray;
+ functionsLibCtx* functions_lib_ctx;
+ int asyncLoading = 0;
+
+ if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
+ /* Async loading means we continue serving read commands during full resync, and
+ * "swap" the new db with the old db only when loading is done.
+ * It is enabled only on SWAPDB diskless replication when master replication ID hasn't changed,
+ * because in that state the old content of the db represents a different point in time of the same
+ * data set we're currently receiving from the master. */
+ if (memcmp(server.replid, server.master_replid, CONFIG_RUN_ID_SIZE) == 0) {
+ asyncLoading = 1;
+ }
+ dbarray = diskless_load_tempDb;
+ functions_lib_ctx = temp_functions_lib_ctx;
+ } else {
+ dbarray = server.db;
+ functions_lib_ctx = functionsLibCtxGetCurrent();
+ functionsLibCtxClear(functions_lib_ctx);
+ }
+
+ rioInitWithConn(&rdb,conn,server.repl_transfer_size);
+
+ /* Put the socket in blocking mode to simplify RDB transfer.
+ * We'll restore it when the RDB is received. */
+ connBlock(conn);
+ connRecvTimeout(conn, server.repl_timeout*1000);
+ startLoading(server.repl_transfer_size, RDBFLAGS_REPLICATION, asyncLoading);
+
+ int loadingFailed = 0;
+ rdbLoadingCtx loadingCtx = { .dbarray = dbarray, .functions_lib_ctx = functions_lib_ctx };
+ if (rdbLoadRioWithLoadingCtx(&rdb,RDBFLAGS_REPLICATION,&rsi,&loadingCtx) != C_OK) {
+ /* RDB loading failed. */
+ serverLog(LL_WARNING,
+ "Failed trying to load the MASTER synchronization DB "
+ "from socket, check server logs.");
+ loadingFailed = 1;
+ } else if (usemark) {
+ /* Verify the end mark is correct. */
+ if (!rioRead(&rdb, buf, CONFIG_RUN_ID_SIZE) ||
+ memcmp(buf, eofmark, CONFIG_RUN_ID_SIZE) != 0)
+ {
+ serverLog(LL_WARNING, "Replication stream EOF marker is broken");
+ loadingFailed = 1;
+ }
+ }
+
+ if (loadingFailed) {
+ stopLoading(0);
+ cancelReplicationHandshake(1);
+ rioFreeConn(&rdb, NULL);
+
+ if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
+ /* Discard potentially partially loaded tempDb. */
+ moduleFireServerEvent(REDISMODULE_EVENT_REPL_ASYNC_LOAD,
+ REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_ABORTED,
+ NULL);
+
+ disklessLoadDiscardTempDb(diskless_load_tempDb);
+ functionsLibCtxFree(temp_functions_lib_ctx);
+ serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Discarding temporary DB in background");
+ } else {
+ /* Remove the half-loaded data in case we started with an empty replica. */
+ emptyData(-1,empty_db_flags,replicationEmptyDbCallback);
+ }
+
+ /* Note that there's no point in restarting the AOF on SYNC
+ * failure, it'll be restarted when sync succeeds or the replica
+ * gets promoted. */
+ return;
+ }
+
+ /* RDB loading succeeded if we reach this point. */
+ if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
+ /* We will soon swap main db with tempDb and replicas will start
+ * to apply data from new master, we must discard the cached
+ * master structure and force resync of sub-replicas. */
+ replicationAttachToNewMaster();
+
+ serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Swapping active DB with loaded DB");
+ swapMainDbWithTempDb(diskless_load_tempDb);
+
+ /* swap existing functions ctx with the temporary one */
+ functionsLibCtxSwapWithCurrent(temp_functions_lib_ctx);
+
+ moduleFireServerEvent(REDISMODULE_EVENT_REPL_ASYNC_LOAD,
+ REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_COMPLETED,
+ NULL);
+
+ /* Delete the old db as it's useless now. */
+ disklessLoadDiscardTempDb(diskless_load_tempDb);
+ serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Discarding old DB in background");
+ }
+
+ /* Inform about db change, as replication was diskless and didn't cause a save. */
+ server.dirty++;
+
+ stopLoading(1);
+
+ /* Cleanup and restore the socket to the original state to continue
+ * with the normal replication. */
+ rioFreeConn(&rdb, NULL);
+ connNonBlock(conn);
+ connRecvTimeout(conn,0);
+ } else {
+
+ /* Make sure the new file (also used for persistence) is fully synced
+ * (not covered by earlier calls to rdb_fsync_range). */
+ if (fsync(server.repl_transfer_fd) == -1) {
+ serverLog(LL_WARNING,
+ "Failed trying to sync the temp DB to disk in "
+ "MASTER <-> REPLICA synchronization: %s",
+ strerror(errno));
+ cancelReplicationHandshake(1);
+ return;
+ }
+
+ /* Rename rdb like renaming rewrite aof asynchronously. */
+ int old_rdb_fd = open(server.rdb_filename,O_RDONLY|O_NONBLOCK);
+ if (rename(server.repl_transfer_tmpfile,server.rdb_filename) == -1) {
+ serverLog(LL_WARNING,
+ "Failed trying to rename the temp DB into %s in "
+ "MASTER <-> REPLICA synchronization: %s",
+ server.rdb_filename, strerror(errno));
+ cancelReplicationHandshake(1);
+ if (old_rdb_fd != -1) close(old_rdb_fd);
+ return;
+ }
+ /* Close old rdb asynchronously. */
+ if (old_rdb_fd != -1) bioCreateCloseJob(old_rdb_fd, 0, 0);
+
+ /* Sync the directory to ensure rename is persisted */
+ if (fsyncFileDir(server.rdb_filename) == -1) {
+ serverLog(LL_WARNING,
+ "Failed trying to sync DB directory %s in "
+ "MASTER <-> REPLICA synchronization: %s",
+ server.rdb_filename, strerror(errno));
+ cancelReplicationHandshake(1);
+ return;
+ }
+
+ if (rdbLoad(server.rdb_filename,&rsi,RDBFLAGS_REPLICATION) != RDB_OK) {
+ serverLog(LL_WARNING,
+ "Failed trying to load the MASTER synchronization "
+ "DB from disk, check server logs.");
+ cancelReplicationHandshake(1);
+ if (server.rdb_del_sync_files && allPersistenceDisabled()) {
+ serverLog(LL_NOTICE,"Removing the RDB file obtained from "
+ "the master. This replica has persistence "
+ "disabled");
+ bg_unlink(server.rdb_filename);
+ }
+ /* Note that there's no point in restarting the AOF on sync failure,
+ it'll be restarted when sync succeeds or replica promoted. */
+ return;
+ }
+
+ /* Cleanup. */
+ if (server.rdb_del_sync_files && allPersistenceDisabled()) {
+ serverLog(LL_NOTICE,"Removing the RDB file obtained from "
+ "the master. This replica has persistence "
+ "disabled");
+ bg_unlink(server.rdb_filename);
+ }
+
+ zfree(server.repl_transfer_tmpfile);
+ close(server.repl_transfer_fd);
+ server.repl_transfer_fd = -1;
+ server.repl_transfer_tmpfile = NULL;
+ }
+
+ /* Final setup of the connected slave <- master link */
+ replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db);
+ server.repl_state = REPL_STATE_CONNECTED;
+ server.repl_down_since = 0;
+
+ /* Fire the master link modules event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_MASTER_LINK_CHANGE,
+ REDISMODULE_SUBEVENT_MASTER_LINK_UP,
+ NULL);
+
+ /* After a full resynchronization we use the replication ID and
+ * offset of the master. The secondary ID / offset are cleared since
+ * we are starting a new history. */
+ memcpy(server.replid,server.master->replid,sizeof(server.replid));
+ server.master_repl_offset = server.master->reploff;
+ clearReplicationId2();
+
+ /* Let's create the replication backlog if needed. Slaves need to
+ * accumulate the backlog regardless of the fact they have sub-slaves
+ * or not, in order to behave correctly if they are promoted to
+ * masters after a failover. */
+ if (server.repl_backlog == NULL) createReplicationBacklog();
+ serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success");
+
+ if (server.supervised_mode == SUPERVISED_SYSTEMD) {
+ redisCommunicateSystemd("STATUS=MASTER <-> REPLICA sync: Finished with success. Ready to accept connections in read-write mode.\n");
+ }
+
+ /* Send the initial ACK immediately to put this replica in online state. */
+ if (usemark) replicationSendAck();
+
+ /* Restart the AOF subsystem now that we finished the sync. This
+ * will trigger an AOF rewrite, and when done will start appending
+ * to the new file. */
+ if (server.aof_enabled) restartAOFAfterSYNC();
+ return;
+
+error:
+ cancelReplicationHandshake(1);
+ return;
+}
+
+char *receiveSynchronousResponse(connection *conn) {
+ char buf[256];
+ /* Read the reply from the server. */
+ if (connSyncReadLine(conn,buf,sizeof(buf),server.repl_syncio_timeout*1000) == -1)
+ {
+ serverLog(LL_WARNING, "Failed to read response from the server: %s", connGetLastError(conn));
+ return NULL;
+ }
+ server.repl_transfer_lastio = server.unixtime;
+ return sdsnew(buf);
+}
+
+/* Send a pre-formatted multi-bulk command to the connection. */
+char* sendCommandRaw(connection *conn, sds cmd) {
+ if (connSyncWrite(conn,cmd,sdslen(cmd),server.repl_syncio_timeout*1000) == -1) {
+ return sdscatprintf(sdsempty(),"-Writing to master: %s",
+ connGetLastError(conn));
+ }
+ return NULL;
+}
+
+/* Compose a multi-bulk command and send it to the connection.
+ * Used to send AUTH and REPLCONF commands to the master before starting the
+ * replication.
+ *
+ * Takes a list of char* arguments, terminated by a NULL argument.
+ *
+ * The command returns an sds string representing the result of the
+ * operation. On error the first byte is a "-".
+ */
+char *sendCommand(connection *conn, ...) {
+ va_list ap;
+ sds cmd = sdsempty();
+ sds cmdargs = sdsempty();
+ size_t argslen = 0;
+ char *arg;
+
+ /* Create the command to send to the master, we use redis binary
+ * protocol to make sure correct arguments are sent. This function
+ * is not safe for all binary data. */
+ va_start(ap,conn);
+ while(1) {
+ arg = va_arg(ap, char*);
+ if (arg == NULL) break;
+ cmdargs = sdscatprintf(cmdargs,"$%zu\r\n%s\r\n",strlen(arg),arg);
+ argslen++;
+ }
+
+ cmd = sdscatprintf(cmd,"*%zu\r\n",argslen);
+ cmd = sdscatsds(cmd,cmdargs);
+ sdsfree(cmdargs);
+
+ va_end(ap);
+ char* err = sendCommandRaw(conn, cmd);
+ sdsfree(cmd);
+ if(err)
+ return err;
+ return NULL;
+}
+
+/* Compose a multi-bulk command and send it to the connection.
+ * Used to send AUTH and REPLCONF commands to the master before starting the
+ * replication.
+ *
+ * argv_lens is optional, when NULL, strlen is used.
+ *
+ * The command returns an sds string representing the result of the
+ * operation. On error the first byte is a "-".
+ */
+char *sendCommandArgv(connection *conn, int argc, char **argv, size_t *argv_lens) {
+ sds cmd = sdsempty();
+ char *arg;
+ int i;
+
+ /* Create the command to send to the master. */
+ cmd = sdscatfmt(cmd,"*%i\r\n",argc);
+ for (i=0; i<argc; i++) {
+ int len;
+ arg = argv[i];
+ len = argv_lens ? argv_lens[i] : strlen(arg);
+ cmd = sdscatfmt(cmd,"$%i\r\n",len);
+ cmd = sdscatlen(cmd,arg,len);
+ cmd = sdscatlen(cmd,"\r\n",2);
+ }
+ char* err = sendCommandRaw(conn, cmd);
+ sdsfree(cmd);
+ if (err)
+ return err;
+ return NULL;
+}
+
+/* Try a partial resynchronization with the master if we are about to reconnect.
+ * If there is no cached master structure, at least try to issue a
+ * "PSYNC ? -1" command in order to trigger a full resync using the PSYNC
+ * command in order to obtain the master replid and the master replication
+ * global offset.
+ *
+ * This function is designed to be called from syncWithMaster(), so the
+ * following assumptions are made:
+ *
+ * 1) We pass the function an already connected socket "fd".
+ * 2) This function does not close the file descriptor "fd". However in case
+ * of successful partial resynchronization, the function will reuse
+ * 'fd' as file descriptor of the server.master client structure.
+ *
+ * The function is split in two halves: if read_reply is 0, the function
+ * writes the PSYNC command on the socket, and a new function call is
+ * needed, with read_reply set to 1, in order to read the reply of the
+ * command. This is useful in order to support non blocking operations, so
+ * that we write, return into the event loop, and read when there are data.
+ *
+ * When read_reply is 0 the function returns PSYNC_WRITE_ERR if there
+ * was a write error, or PSYNC_WAIT_REPLY to signal we need another call
+ * with read_reply set to 1. However even when read_reply is set to 1
+ * the function may return PSYNC_WAIT_REPLY again to signal there were
+ * insufficient data to read to complete its work. We should re-enter
+ * into the event loop and wait in such a case.
+ *
+ * The function returns:
+ *
+ * PSYNC_CONTINUE: If the PSYNC command succeeded and we can continue.
+ * PSYNC_FULLRESYNC: If PSYNC is supported but a full resync is needed.
+ * In this case the master replid and global replication
+ * offset is saved.
+ * PSYNC_NOT_SUPPORTED: If the server does not understand PSYNC at all and
+ * the caller should fall back to SYNC.
+ * PSYNC_WRITE_ERROR: There was an error writing the command to the socket.
+ * PSYNC_WAIT_REPLY: Call again the function with read_reply set to 1.
+ * PSYNC_TRY_LATER: Master is currently in a transient error condition.
+ *
+ * Notable side effects:
+ *
+ * 1) As a side effect of the function call the function removes the readable
+ * event handler from "fd", unless the return value is PSYNC_WAIT_REPLY.
+ * 2) server.master_initial_offset is set to the right value according
+ * to the master reply. This will be used to populate the 'server.master'
+ * structure replication offset.
+ */
+
+#define PSYNC_WRITE_ERROR 0
+#define PSYNC_WAIT_REPLY 1
+#define PSYNC_CONTINUE 2
+#define PSYNC_FULLRESYNC 3
+#define PSYNC_NOT_SUPPORTED 4
+#define PSYNC_TRY_LATER 5
+int slaveTryPartialResynchronization(connection *conn, int read_reply) {
+ char *psync_replid;
+ char psync_offset[32];
+ sds reply;
+
+ /* Writing half */
+ if (!read_reply) {
+ /* Initially set master_initial_offset to -1 to mark the current
+ * master replid and offset as not valid. Later if we'll be able to do
+ * a FULL resync using the PSYNC command we'll set the offset at the
+ * right value, so that this information will be propagated to the
+ * client structure representing the master into server.master. */
+ server.master_initial_offset = -1;
+
+ if (server.cached_master) {
+ psync_replid = server.cached_master->replid;
+ snprintf(psync_offset,sizeof(psync_offset),"%lld", server.cached_master->reploff+1);
+ serverLog(LL_NOTICE,"Trying a partial resynchronization (request %s:%s).", psync_replid, psync_offset);
+ } else {
+ serverLog(LL_NOTICE,"Partial resynchronization not possible (no cached master)");
+ psync_replid = "?";
+ memcpy(psync_offset,"-1",3);
+ }
+
+ /* Issue the PSYNC command, if this is a master with a failover in
+ * progress then send the failover argument to the replica to cause it
+ * to become a master */
+ if (server.failover_state == FAILOVER_IN_PROGRESS) {
+ reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,"FAILOVER",NULL);
+ } else {
+ reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,NULL);
+ }
+
+ if (reply != NULL) {
+ serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply);
+ sdsfree(reply);
+ connSetReadHandler(conn, NULL);
+ return PSYNC_WRITE_ERROR;
+ }
+ return PSYNC_WAIT_REPLY;
+ }
+
+ /* Reading half */
+ reply = receiveSynchronousResponse(conn);
+ /* Master did not reply to PSYNC */
+ if (reply == NULL) {
+ connSetReadHandler(conn, NULL);
+ serverLog(LL_WARNING, "Master did not reply to PSYNC, will try later");
+ return PSYNC_TRY_LATER;
+ }
+
+ if (sdslen(reply) == 0) {
+ /* The master may send empty newlines after it receives PSYNC
+ * and before to reply, just to keep the connection alive. */
+ sdsfree(reply);
+ return PSYNC_WAIT_REPLY;
+ }
+
+ connSetReadHandler(conn, NULL);
+
+ if (!strncmp(reply,"+FULLRESYNC",11)) {
+ char *replid = NULL, *offset = NULL;
+
+ /* FULL RESYNC, parse the reply in order to extract the replid
+ * and the replication offset. */
+ replid = strchr(reply,' ');
+ if (replid) {
+ replid++;
+ offset = strchr(replid,' ');
+ if (offset) offset++;
+ }
+ if (!replid || !offset || (offset-replid-1) != CONFIG_RUN_ID_SIZE) {
+ serverLog(LL_WARNING,
+ "Master replied with wrong +FULLRESYNC syntax.");
+ /* This is an unexpected condition, actually the +FULLRESYNC
+ * reply means that the master supports PSYNC, but the reply
+ * format seems wrong. To stay safe we blank the master
+ * replid to make sure next PSYNCs will fail. */
+ memset(server.master_replid,0,CONFIG_RUN_ID_SIZE+1);
+ } else {
+ memcpy(server.master_replid, replid, offset-replid-1);
+ server.master_replid[CONFIG_RUN_ID_SIZE] = '\0';
+ server.master_initial_offset = strtoll(offset,NULL,10);
+ serverLog(LL_NOTICE,"Full resync from master: %s:%lld",
+ server.master_replid,
+ server.master_initial_offset);
+ }
+ sdsfree(reply);
+ return PSYNC_FULLRESYNC;
+ }
+
+ if (!strncmp(reply,"+CONTINUE",9)) {
+ /* Partial resync was accepted. */
+ serverLog(LL_NOTICE,
+ "Successful partial resynchronization with master.");
+
+ /* Check the new replication ID advertised by the master. If it
+ * changed, we need to set the new ID as primary ID, and set
+ * secondary ID as the old master ID up to the current offset, so
+ * that our sub-slaves will be able to PSYNC with us after a
+ * disconnection. */
+ char *start = reply+10;
+ char *end = reply+9;
+ while(end[0] != '\r' && end[0] != '\n' && end[0] != '\0') end++;
+ if (end-start == CONFIG_RUN_ID_SIZE) {
+ char new[CONFIG_RUN_ID_SIZE+1];
+ memcpy(new,start,CONFIG_RUN_ID_SIZE);
+ new[CONFIG_RUN_ID_SIZE] = '\0';
+
+ if (strcmp(new,server.cached_master->replid)) {
+ /* Master ID changed. */
+ serverLog(LL_NOTICE,"Master replication ID changed to %s",new);
+
+ /* Set the old ID as our ID2, up to the current offset+1. */
+ memcpy(server.replid2,server.cached_master->replid,
+ sizeof(server.replid2));
+ server.second_replid_offset = server.master_repl_offset+1;
+
+ /* Update the cached master ID and our own primary ID to the
+ * new one. */
+ memcpy(server.replid,new,sizeof(server.replid));
+ memcpy(server.cached_master->replid,new,sizeof(server.replid));
+
+ /* Disconnect all the sub-slaves: they need to be notified. */
+ disconnectSlaves();
+ }
+ }
+
+ /* Setup the replication to continue. */
+ sdsfree(reply);
+ replicationResurrectCachedMaster(conn);
+
+ /* If this instance was restarted and we read the metadata to
+ * PSYNC from the persistence file, our replication backlog could
+ * be still not initialized. Create it. */
+ if (server.repl_backlog == NULL) createReplicationBacklog();
+ return PSYNC_CONTINUE;
+ }
+
+ /* If we reach this point we received either an error (since the master does
+ * not understand PSYNC or because it is in a special state and cannot
+ * serve our request), or an unexpected reply from the master.
+ *
+ * Return PSYNC_NOT_SUPPORTED on errors we don't understand, otherwise
+ * return PSYNC_TRY_LATER if we believe this is a transient error. */
+
+ if (!strncmp(reply,"-NOMASTERLINK",13) ||
+ !strncmp(reply,"-LOADING",8))
+ {
+ serverLog(LL_NOTICE,
+ "Master is currently unable to PSYNC "
+ "but should be in the future: %s", reply);
+ sdsfree(reply);
+ return PSYNC_TRY_LATER;
+ }
+
+ if (strncmp(reply,"-ERR",4)) {
+ /* If it's not an error, log the unexpected event. */
+ serverLog(LL_WARNING,
+ "Unexpected reply to PSYNC from master: %s", reply);
+ } else {
+ serverLog(LL_NOTICE,
+ "Master does not support PSYNC or is in "
+ "error state (reply: %s)", reply);
+ }
+ sdsfree(reply);
+ return PSYNC_NOT_SUPPORTED;
+}
+
+/* This handler fires when the non blocking connect was able to
+ * establish a connection with the master. */
+void syncWithMaster(connection *conn) {
+ char tmpfile[256], *err = NULL;
+ int dfd = -1, maxtries = 5;
+ int psync_result;
+
+ /* If this event fired after the user turned the instance into a master
+ * with SLAVEOF NO ONE we must just return ASAP. */
+ if (server.repl_state == REPL_STATE_NONE) {
+ connClose(conn);
+ return;
+ }
+
+ /* Check for errors in the socket: after a non blocking connect() we
+ * may find that the socket is in error state. */
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_WARNING,"Error condition on socket for SYNC: %s",
+ connGetLastError(conn));
+ goto error;
+ }
+
+ /* Send a PING to check the master is able to reply without errors. */
+ if (server.repl_state == REPL_STATE_CONNECTING) {
+ serverLog(LL_NOTICE,"Non blocking connect for SYNC fired the event.");
+ /* Delete the writable event so that the readable event remains
+ * registered and we can wait for the PONG reply. */
+ connSetReadHandler(conn, syncWithMaster);
+ connSetWriteHandler(conn, NULL);
+ server.repl_state = REPL_STATE_RECEIVE_PING_REPLY;
+ /* Send the PING, don't check for errors at all, we have the timeout
+ * that will take care about this. */
+ err = sendCommand(conn,"PING",NULL);
+ if (err) goto write_error;
+ return;
+ }
+
+ /* Receive the PONG command. */
+ if (server.repl_state == REPL_STATE_RECEIVE_PING_REPLY) {
+ err = receiveSynchronousResponse(conn);
+
+ /* The master did not reply */
+ if (err == NULL) goto no_response_error;
+
+ /* We accept only two replies as valid, a positive +PONG reply
+ * (we just check for "+") or an authentication error.
+ * Note that older versions of Redis replied with "operation not
+ * permitted" instead of using a proper error code, so we test
+ * both. */
+ if (err[0] != '+' &&
+ strncmp(err,"-NOAUTH",7) != 0 &&
+ strncmp(err,"-NOPERM",7) != 0 &&
+ strncmp(err,"-ERR operation not permitted",28) != 0)
+ {
+ serverLog(LL_WARNING,"Error reply to PING from master: '%s'",err);
+ sdsfree(err);
+ goto error;
+ } else {
+ serverLog(LL_NOTICE,
+ "Master replied to PING, replication can continue...");
+ }
+ sdsfree(err);
+ err = NULL;
+ server.repl_state = REPL_STATE_SEND_HANDSHAKE;
+ }
+
+ if (server.repl_state == REPL_STATE_SEND_HANDSHAKE) {
+ /* AUTH with the master if required. */
+ if (server.masterauth) {
+ char *args[3] = {"AUTH",NULL,NULL};
+ size_t lens[3] = {4,0,0};
+ int argc = 1;
+ if (server.masteruser) {
+ args[argc] = server.masteruser;
+ lens[argc] = strlen(server.masteruser);
+ argc++;
+ }
+ args[argc] = server.masterauth;
+ lens[argc] = sdslen(server.masterauth);
+ argc++;
+ err = sendCommandArgv(conn, argc, args, lens);
+ if (err) goto write_error;
+ }
+
+ /* Set the slave port, so that Master's INFO command can list the
+ * slave listening port correctly. */
+ {
+ int port;
+ if (server.slave_announce_port)
+ port = server.slave_announce_port;
+ else if (server.tls_replication && server.tls_port)
+ port = server.tls_port;
+ else
+ port = server.port;
+ sds portstr = sdsfromlonglong(port);
+ err = sendCommand(conn,"REPLCONF",
+ "listening-port",portstr, NULL);
+ sdsfree(portstr);
+ if (err) goto write_error;
+ }
+
+ /* Set the slave ip, so that Master's INFO command can list the
+ * slave IP address port correctly in case of port forwarding or NAT.
+ * Skip REPLCONF ip-address if there is no slave-announce-ip option set. */
+ if (server.slave_announce_ip) {
+ err = sendCommand(conn,"REPLCONF",
+ "ip-address",server.slave_announce_ip, NULL);
+ if (err) goto write_error;
+ }
+
+ /* Inform the master of our (slave) capabilities.
+ *
+ * EOF: supports EOF-style RDB transfer for diskless replication.
+ * PSYNC2: supports PSYNC v2, so understands +CONTINUE <new repl ID>.
+ *
+ * The master will ignore capabilities it does not understand. */
+ err = sendCommand(conn,"REPLCONF",
+ "capa","eof","capa","psync2",NULL);
+ if (err) goto write_error;
+
+ server.repl_state = REPL_STATE_RECEIVE_AUTH_REPLY;
+ return;
+ }
+
+ if (server.repl_state == REPL_STATE_RECEIVE_AUTH_REPLY && !server.masterauth)
+ server.repl_state = REPL_STATE_RECEIVE_PORT_REPLY;
+
+ /* Receive AUTH reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_AUTH_REPLY) {
+ err = receiveSynchronousResponse(conn);
+ if (err == NULL) goto no_response_error;
+ if (err[0] == '-') {
+ serverLog(LL_WARNING,"Unable to AUTH to MASTER: %s",err);
+ sdsfree(err);
+ goto error;
+ }
+ sdsfree(err);
+ err = NULL;
+ server.repl_state = REPL_STATE_RECEIVE_PORT_REPLY;
+ return;
+ }
+
+ /* Receive REPLCONF listening-port reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_PORT_REPLY) {
+ err = receiveSynchronousResponse(conn);
+ if (err == NULL) goto no_response_error;
+ /* Ignore the error if any, not all the Redis versions support
+ * REPLCONF listening-port. */
+ if (err[0] == '-') {
+ serverLog(LL_NOTICE,"(Non critical) Master does not understand "
+ "REPLCONF listening-port: %s", err);
+ }
+ sdsfree(err);
+ server.repl_state = REPL_STATE_RECEIVE_IP_REPLY;
+ return;
+ }
+
+ if (server.repl_state == REPL_STATE_RECEIVE_IP_REPLY && !server.slave_announce_ip)
+ server.repl_state = REPL_STATE_RECEIVE_CAPA_REPLY;
+
+ /* Receive REPLCONF ip-address reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_IP_REPLY) {
+ err = receiveSynchronousResponse(conn);
+ if (err == NULL) goto no_response_error;
+ /* Ignore the error if any, not all the Redis versions support
+ * REPLCONF ip-address. */
+ if (err[0] == '-') {
+ serverLog(LL_NOTICE,"(Non critical) Master does not understand "
+ "REPLCONF ip-address: %s", err);
+ }
+ sdsfree(err);
+ server.repl_state = REPL_STATE_RECEIVE_CAPA_REPLY;
+ return;
+ }
+
+ /* Receive CAPA reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_CAPA_REPLY) {
+ err = receiveSynchronousResponse(conn);
+ if (err == NULL) goto no_response_error;
+ /* Ignore the error if any, not all the Redis versions support
+ * REPLCONF capa. */
+ if (err[0] == '-') {
+ serverLog(LL_NOTICE,"(Non critical) Master does not understand "
+ "REPLCONF capa: %s", err);
+ }
+ sdsfree(err);
+ err = NULL;
+ server.repl_state = REPL_STATE_SEND_PSYNC;
+ }
+
+ /* Try a partial resynchronization. If we don't have a cached master
+ * slaveTryPartialResynchronization() will at least try to use PSYNC
+ * to start a full resynchronization so that we get the master replid
+ * and the global offset, to try a partial resync at the next
+ * reconnection attempt. */
+ if (server.repl_state == REPL_STATE_SEND_PSYNC) {
+ if (slaveTryPartialResynchronization(conn,0) == PSYNC_WRITE_ERROR) {
+ err = sdsnew("Write error sending the PSYNC command.");
+ abortFailover("Write error to failover target");
+ goto write_error;
+ }
+ server.repl_state = REPL_STATE_RECEIVE_PSYNC_REPLY;
+ return;
+ }
+
+ /* If reached this point, we should be in REPL_STATE_RECEIVE_PSYNC_REPLY. */
+ if (server.repl_state != REPL_STATE_RECEIVE_PSYNC_REPLY) {
+ serverLog(LL_WARNING,"syncWithMaster(): state machine error, "
+ "state should be RECEIVE_PSYNC but is %d",
+ server.repl_state);
+ goto error;
+ }
+
+ psync_result = slaveTryPartialResynchronization(conn,1);
+ if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */
+
+ /* Check the status of the planned failover. We expect PSYNC_CONTINUE,
+ * but there is nothing technically wrong with a full resync which
+ * could happen in edge cases. */
+ if (server.failover_state == FAILOVER_IN_PROGRESS) {
+ if (psync_result == PSYNC_CONTINUE || psync_result == PSYNC_FULLRESYNC) {
+ clearFailoverState();
+ } else {
+ abortFailover("Failover target rejected psync request");
+ return;
+ }
+ }
+
+ /* If the master is in an transient error, we should try to PSYNC
+ * from scratch later, so go to the error path. This happens when
+ * the server is loading the dataset or is not connected with its
+ * master and so forth. */
+ if (psync_result == PSYNC_TRY_LATER) goto error;
+
+ /* Note: if PSYNC does not return WAIT_REPLY, it will take care of
+ * uninstalling the read handler from the file descriptor. */
+
+ if (psync_result == PSYNC_CONTINUE) {
+ serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Master accepted a Partial Resynchronization.");
+ if (server.supervised_mode == SUPERVISED_SYSTEMD) {
+ redisCommunicateSystemd("STATUS=MASTER <-> REPLICA sync: Partial Resynchronization accepted. Ready to accept connections in read-write mode.\n");
+ }
+ return;
+ }
+
+ /* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC
+ * and the server.master_replid and master_initial_offset are
+ * already populated. */
+ if (psync_result == PSYNC_NOT_SUPPORTED) {
+ serverLog(LL_NOTICE,"Retrying with SYNC...");
+ if (connSyncWrite(conn,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) {
+ serverLog(LL_WARNING,"I/O error writing to MASTER: %s",
+ connGetLastError(conn));
+ goto error;
+ }
+ }
+
+ /* Prepare a suitable temp file for bulk transfer */
+ if (!useDisklessLoad()) {
+ while(maxtries--) {
+ snprintf(tmpfile,256,
+ "temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid());
+ dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
+ if (dfd != -1) break;
+ sleep(1);
+ }
+ if (dfd == -1) {
+ serverLog(LL_WARNING,"Opening the temp file needed for MASTER <-> REPLICA synchronization: %s",strerror(errno));
+ goto error;
+ }
+ server.repl_transfer_tmpfile = zstrdup(tmpfile);
+ server.repl_transfer_fd = dfd;
+ }
+
+ /* Setup the non blocking download of the bulk file. */
+ if (connSetReadHandler(conn, readSyncBulkPayload)
+ == C_ERR)
+ {
+ char conninfo[CONN_INFO_LEN];
+ serverLog(LL_WARNING,
+ "Can't create readable event for SYNC: %s (%s)",
+ strerror(errno), connGetInfo(conn, conninfo, sizeof(conninfo)));
+ goto error;
+ }
+
+ server.repl_state = REPL_STATE_TRANSFER;
+ server.repl_transfer_size = -1;
+ server.repl_transfer_read = 0;
+ server.repl_transfer_last_fsync_off = 0;
+ server.repl_transfer_lastio = server.unixtime;
+ return;
+
+no_response_error: /* Handle receiveSynchronousResponse() error when master has no reply */
+ serverLog(LL_WARNING, "Master did not respond to command during SYNC handshake");
+ /* Fall through to regular error handling */
+
+error:
+ if (dfd != -1) close(dfd);
+ connClose(conn);
+ server.repl_transfer_s = NULL;
+ if (server.repl_transfer_fd != -1)
+ close(server.repl_transfer_fd);
+ if (server.repl_transfer_tmpfile)
+ zfree(server.repl_transfer_tmpfile);
+ server.repl_transfer_tmpfile = NULL;
+ server.repl_transfer_fd = -1;
+ server.repl_state = REPL_STATE_CONNECT;
+ return;
+
+write_error: /* Handle sendCommand() errors. */
+ serverLog(LL_WARNING,"Sending command to master in replication handshake: %s", err);
+ sdsfree(err);
+ goto error;
+}
+
+int connectWithMaster(void) {
+ server.repl_transfer_s = connCreate(connTypeOfReplication());
+ if (connConnect(server.repl_transfer_s, server.masterhost, server.masterport,
+ server.bind_source_addr, syncWithMaster) == C_ERR) {
+ serverLog(LL_WARNING,"Unable to connect to MASTER: %s",
+ connGetLastError(server.repl_transfer_s));
+ connClose(server.repl_transfer_s);
+ server.repl_transfer_s = NULL;
+ return C_ERR;
+ }
+
+
+ server.repl_transfer_lastio = server.unixtime;
+ server.repl_state = REPL_STATE_CONNECTING;
+ serverLog(LL_NOTICE,"MASTER <-> REPLICA sync started");
+ return C_OK;
+}
+
+/* This function can be called when a non blocking connection is currently
+ * in progress to undo it.
+ * Never call this function directly, use cancelReplicationHandshake() instead.
+ */
+void undoConnectWithMaster(void) {
+ connClose(server.repl_transfer_s);
+ server.repl_transfer_s = NULL;
+}
+
+/* Abort the async download of the bulk dataset while SYNC-ing with master.
+ * Never call this function directly, use cancelReplicationHandshake() instead.
+ */
+void replicationAbortSyncTransfer(void) {
+ serverAssert(server.repl_state == REPL_STATE_TRANSFER);
+ undoConnectWithMaster();
+ if (server.repl_transfer_fd!=-1) {
+ close(server.repl_transfer_fd);
+ bg_unlink(server.repl_transfer_tmpfile);
+ zfree(server.repl_transfer_tmpfile);
+ server.repl_transfer_tmpfile = NULL;
+ server.repl_transfer_fd = -1;
+ }
+}
+
+/* This function aborts a non blocking replication attempt if there is one
+ * in progress, by canceling the non-blocking connect attempt or
+ * the initial bulk transfer.
+ *
+ * If there was a replication handshake in progress 1 is returned and
+ * the replication state (server.repl_state) set to REPL_STATE_CONNECT.
+ *
+ * Otherwise zero is returned and no operation is performed at all. */
+int cancelReplicationHandshake(int reconnect) {
+ if (server.repl_state == REPL_STATE_TRANSFER) {
+ replicationAbortSyncTransfer();
+ server.repl_state = REPL_STATE_CONNECT;
+ } else if (server.repl_state == REPL_STATE_CONNECTING ||
+ slaveIsInHandshakeState())
+ {
+ undoConnectWithMaster();
+ server.repl_state = REPL_STATE_CONNECT;
+ } else {
+ return 0;
+ }
+
+ if (!reconnect)
+ return 1;
+
+ /* try to re-connect without waiting for replicationCron, this is needed
+ * for the "diskless loading short read" test. */
+ serverLog(LL_NOTICE,"Reconnecting to MASTER %s:%d after failure",
+ server.masterhost, server.masterport);
+ connectWithMaster();
+
+ return 1;
+}
+
+/* Set replication to the specified master address and port. */
+void replicationSetMaster(char *ip, int port) {
+ int was_master = server.masterhost == NULL;
+
+ sdsfree(server.masterhost);
+ server.masterhost = NULL;
+ if (server.master) {
+ freeClient(server.master);
+ }
+ disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */
+
+ /* Setting masterhost only after the call to freeClient since it calls
+ * replicationHandleMasterDisconnection which can trigger a re-connect
+ * directly from within that call. */
+ server.masterhost = sdsnew(ip);
+ server.masterport = port;
+
+ /* Update oom_score_adj */
+ setOOMScoreAdj(-1);
+
+ /* Here we don't disconnect with replicas, since they may hopefully be able
+ * to partially resync with us. We will disconnect with replicas and force
+ * them to resync with us when changing replid on partially resync with new
+ * master, or finishing transferring RDB and preparing loading DB on full
+ * sync with new master. */
+
+ cancelReplicationHandshake(0);
+ /* Before destroying our master state, create a cached master using
+ * our own parameters, to later PSYNC with the new master. */
+ if (was_master) {
+ replicationDiscardCachedMaster();
+ replicationCacheMasterUsingMyself();
+ }
+
+ /* Fire the role change modules event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED,
+ REDISMODULE_EVENT_REPLROLECHANGED_NOW_REPLICA,
+ NULL);
+
+ /* Fire the master link modules event. */
+ if (server.repl_state == REPL_STATE_CONNECTED)
+ moduleFireServerEvent(REDISMODULE_EVENT_MASTER_LINK_CHANGE,
+ REDISMODULE_SUBEVENT_MASTER_LINK_DOWN,
+ NULL);
+
+ server.repl_state = REPL_STATE_CONNECT;
+ serverLog(LL_NOTICE,"Connecting to MASTER %s:%d",
+ server.masterhost, server.masterport);
+ connectWithMaster();
+}
+
+/* Cancel replication, setting the instance as a master itself. */
+void replicationUnsetMaster(void) {
+ if (server.masterhost == NULL) return; /* Nothing to do. */
+
+ /* Fire the master link modules event. */
+ if (server.repl_state == REPL_STATE_CONNECTED)
+ moduleFireServerEvent(REDISMODULE_EVENT_MASTER_LINK_CHANGE,
+ REDISMODULE_SUBEVENT_MASTER_LINK_DOWN,
+ NULL);
+
+ /* Clear masterhost first, since the freeClient calls
+ * replicationHandleMasterDisconnection which can attempt to re-connect. */
+ sdsfree(server.masterhost);
+ server.masterhost = NULL;
+ if (server.master) freeClient(server.master);
+ replicationDiscardCachedMaster();
+ cancelReplicationHandshake(0);
+ /* When a slave is turned into a master, the current replication ID
+ * (that was inherited from the master at synchronization time) is
+ * used as secondary ID up to the current offset, and a new replication
+ * ID is created to continue with a new replication history. */
+ shiftReplicationId();
+ /* Disconnecting all the slaves is required: we need to inform slaves
+ * of the replication ID change (see shiftReplicationId() call). However
+ * the slaves will be able to partially resync with us, so it will be
+ * a very fast reconnection. */
+ disconnectSlaves();
+ server.repl_state = REPL_STATE_NONE;
+
+ /* We need to make sure the new master will start the replication stream
+ * with a SELECT statement. This is forced after a full resync, but
+ * with PSYNC version 2, there is no need for full resync after a
+ * master switch. */
+ server.slaveseldb = -1;
+
+ /* Update oom_score_adj */
+ setOOMScoreAdj(-1);
+
+ /* Once we turn from slave to master, we consider the starting time without
+ * slaves (that is used to count the replication backlog time to live) as
+ * starting from now. Otherwise the backlog will be freed after a
+ * failover if slaves do not connect immediately. */
+ server.repl_no_slaves_since = server.unixtime;
+
+ /* Reset down time so it'll be ready for when we turn into replica again. */
+ server.repl_down_since = 0;
+
+ /* Fire the role change modules event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED,
+ REDISMODULE_EVENT_REPLROLECHANGED_NOW_MASTER,
+ NULL);
+
+ /* Restart the AOF subsystem in case we shut it down during a sync when
+ * we were still a slave. */
+ if (server.aof_enabled && server.aof_state == AOF_OFF) restartAOFAfterSYNC();
+}
+
+/* This function is called when the slave lose the connection with the
+ * master into an unexpected way. */
+void replicationHandleMasterDisconnection(void) {
+ /* Fire the master link modules event. */
+ if (server.repl_state == REPL_STATE_CONNECTED)
+ moduleFireServerEvent(REDISMODULE_EVENT_MASTER_LINK_CHANGE,
+ REDISMODULE_SUBEVENT_MASTER_LINK_DOWN,
+ NULL);
+
+ server.master = NULL;
+ server.repl_state = REPL_STATE_CONNECT;
+ server.repl_down_since = server.unixtime;
+ /* We lost connection with our master, don't disconnect slaves yet,
+ * maybe we'll be able to PSYNC with our master later. We'll disconnect
+ * the slaves only if we'll have to do a full resync with our master. */
+
+ /* Try to re-connect immediately rather than wait for replicationCron
+ * waiting 1 second may risk backlog being recycled. */
+ if (server.masterhost) {
+ serverLog(LL_NOTICE,"Reconnecting to MASTER %s:%d",
+ server.masterhost, server.masterport);
+ connectWithMaster();
+ }
+}
+
+void replicaofCommand(client *c) {
+ /* SLAVEOF is not allowed in cluster mode as replication is automatically
+ * configured using the current address of the master node. */
+ if (server.cluster_enabled) {
+ addReplyError(c,"REPLICAOF not allowed in cluster mode.");
+ return;
+ }
+
+ if (server.failover_state != NO_FAILOVER) {
+ addReplyError(c,"REPLICAOF not allowed while failing over.");
+ return;
+ }
+
+ /* The special host/port combination "NO" "ONE" turns the instance
+ * into a master. Otherwise the new master address is set. */
+ if (!strcasecmp(c->argv[1]->ptr,"no") &&
+ !strcasecmp(c->argv[2]->ptr,"one")) {
+ if (server.masterhost) {
+ replicationUnsetMaster();
+ sds client = catClientInfoString(sdsempty(),c);
+ serverLog(LL_NOTICE,"MASTER MODE enabled (user request from '%s')",
+ client);
+ sdsfree(client);
+ }
+ } else {
+ long port;
+
+ if (c->flags & CLIENT_SLAVE)
+ {
+ /* If a client is already a replica they cannot run this command,
+ * because it involves flushing all replicas (including this
+ * client) */
+ addReplyError(c, "Command is not valid when client is a replica.");
+ return;
+ }
+
+ if (getRangeLongFromObjectOrReply(c, c->argv[2], 0, 65535, &port,
+ "Invalid master port") != C_OK)
+ return;
+
+ /* Check if we are already attached to the specified master */
+ if (server.masterhost && !strcasecmp(server.masterhost,c->argv[1]->ptr)
+ && server.masterport == port) {
+ serverLog(LL_NOTICE,"REPLICAOF would result into synchronization "
+ "with the master we are already connected "
+ "with. No operation performed.");
+ addReplySds(c,sdsnew("+OK Already connected to specified "
+ "master\r\n"));
+ return;
+ }
+ /* There was no previous master or the user specified a different one,
+ * we can continue. */
+ replicationSetMaster(c->argv[1]->ptr, port);
+ sds client = catClientInfoString(sdsempty(),c);
+ serverLog(LL_NOTICE,"REPLICAOF %s:%d enabled (user request from '%s')",
+ server.masterhost, server.masterport, client);
+ sdsfree(client);
+ }
+ addReply(c,shared.ok);
+}
+
+/* ROLE command: provide information about the role of the instance
+ * (master or slave) and additional information related to replication
+ * in an easy to process format. */
+void roleCommand(client *c) {
+ if (server.sentinel_mode) {
+ sentinelRoleCommand(c);
+ return;
+ }
+
+ if (server.masterhost == NULL) {
+ listIter li;
+ listNode *ln;
+ void *mbcount;
+ int slaves = 0;
+
+ addReplyArrayLen(c,3);
+ addReplyBulkCBuffer(c,"master",6);
+ addReplyLongLong(c,server.master_repl_offset);
+ mbcount = addReplyDeferredLen(c);
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ char ip[NET_IP_STR_LEN], *slaveaddr = slave->slave_addr;
+
+ if (!slaveaddr) {
+ if (connAddrPeerName(slave->conn,ip,sizeof(ip),NULL) == -1)
+ continue;
+ slaveaddr = ip;
+ }
+ if (slave->replstate != SLAVE_STATE_ONLINE) continue;
+ addReplyArrayLen(c,3);
+ addReplyBulkCString(c,slaveaddr);
+ addReplyBulkLongLong(c,slave->slave_listening_port);
+ addReplyBulkLongLong(c,slave->repl_ack_off);
+ slaves++;
+ }
+ setDeferredArrayLen(c,mbcount,slaves);
+ } else {
+ char *slavestate = NULL;
+
+ addReplyArrayLen(c,5);
+ addReplyBulkCBuffer(c,"slave",5);
+ addReplyBulkCString(c,server.masterhost);
+ addReplyLongLong(c,server.masterport);
+ if (slaveIsInHandshakeState()) {
+ slavestate = "handshake";
+ } else {
+ switch(server.repl_state) {
+ case REPL_STATE_NONE: slavestate = "none"; break;
+ case REPL_STATE_CONNECT: slavestate = "connect"; break;
+ case REPL_STATE_CONNECTING: slavestate = "connecting"; break;
+ case REPL_STATE_TRANSFER: slavestate = "sync"; break;
+ case REPL_STATE_CONNECTED: slavestate = "connected"; break;
+ default: slavestate = "unknown"; break;
+ }
+ }
+ addReplyBulkCString(c,slavestate);
+ addReplyLongLong(c,server.master ? server.master->reploff : -1);
+ }
+}
+
+/* Send a REPLCONF ACK command to the master to inform it about the current
+ * processed offset. If we are not connected with a master, the command has
+ * no effects. */
+void replicationSendAck(void) {
+ client *c = server.master;
+
+ if (c != NULL) {
+ int send_fack = server.fsynced_reploff != -1;
+ c->flags |= CLIENT_MASTER_FORCE_REPLY;
+ addReplyArrayLen(c,send_fack ? 5 : 3);
+ addReplyBulkCString(c,"REPLCONF");
+ addReplyBulkCString(c,"ACK");
+ addReplyBulkLongLong(c,c->reploff);
+ if (send_fack) {
+ addReplyBulkCString(c,"FACK");
+ addReplyBulkLongLong(c,server.fsynced_reploff);
+ }
+ c->flags &= ~CLIENT_MASTER_FORCE_REPLY;
+ }
+}
+
+/* ---------------------- MASTER CACHING FOR PSYNC -------------------------- */
+
+/* In order to implement partial synchronization we need to be able to cache
+ * our master's client structure after a transient disconnection.
+ * It is cached into server.cached_master and flushed away using the following
+ * functions. */
+
+/* This function is called by freeClient() in order to cache the master
+ * client structure instead of destroying it. freeClient() will return
+ * ASAP after this function returns, so every action needed to avoid problems
+ * with a client that is really "suspended" has to be done by this function.
+ *
+ * The other functions that will deal with the cached master are:
+ *
+ * replicationDiscardCachedMaster() that will make sure to kill the client
+ * as for some reason we don't want to use it in the future.
+ *
+ * replicationResurrectCachedMaster() that is used after a successful PSYNC
+ * handshake in order to reactivate the cached master.
+ */
+void replicationCacheMaster(client *c) {
+ serverAssert(server.master != NULL && server.cached_master == NULL);
+ serverLog(LL_NOTICE,"Caching the disconnected master state.");
+
+ /* Unlink the client from the server structures. */
+ unlinkClient(c);
+
+ /* Reset the master client so that's ready to accept new commands:
+ * we want to discard the non processed query buffers and non processed
+ * offsets, including pending transactions, already populated arguments,
+ * pending outputs to the master. */
+ sdsclear(server.master->querybuf);
+ server.master->qb_pos = 0;
+ server.master->repl_applied = 0;
+ server.master->read_reploff = server.master->reploff;
+ if (c->flags & CLIENT_MULTI) discardTransaction(c);
+ listEmpty(c->reply);
+ c->sentlen = 0;
+ c->reply_bytes = 0;
+ c->bufpos = 0;
+ resetClient(c);
+
+ /* Save the master. Server.master will be set to null later by
+ * replicationHandleMasterDisconnection(). */
+ server.cached_master = server.master;
+
+ /* Invalidate the Peer ID cache. */
+ if (c->peerid) {
+ sdsfree(c->peerid);
+ c->peerid = NULL;
+ }
+ /* Invalidate the Sock Name cache. */
+ if (c->sockname) {
+ sdsfree(c->sockname);
+ c->sockname = NULL;
+ }
+
+ /* Caching the master happens instead of the actual freeClient() call,
+ * so make sure to adjust the replication state. This function will
+ * also set server.master to NULL. */
+ replicationHandleMasterDisconnection();
+}
+
+/* This function is called when a master is turned into a slave, in order to
+ * create from scratch a cached master for the new client, that will allow
+ * to PSYNC with the slave that was promoted as the new master after a
+ * failover.
+ *
+ * Assuming this instance was previously the master instance of the new master,
+ * the new master will accept its replication ID, and potential also the
+ * current offset if no data was lost during the failover. So we use our
+ * current replication ID and offset in order to synthesize a cached master. */
+void replicationCacheMasterUsingMyself(void) {
+ serverLog(LL_NOTICE,
+ "Before turning into a replica, using my own master parameters "
+ "to synthesize a cached master: I may be able to synchronize with "
+ "the new master with just a partial transfer.");
+
+ /* This will be used to populate the field server.master->reploff
+ * by replicationCreateMasterClient(). We'll later set the created
+ * master as server.cached_master, so the replica will use such
+ * offset for PSYNC. */
+ server.master_initial_offset = server.master_repl_offset;
+
+ /* The master client we create can be set to any DBID, because
+ * the new master will start its replication stream with SELECT. */
+ replicationCreateMasterClient(NULL,-1);
+
+ /* Use our own ID / offset. */
+ memcpy(server.master->replid, server.replid, sizeof(server.replid));
+
+ /* Set as cached master. */
+ unlinkClient(server.master);
+ server.cached_master = server.master;
+ server.master = NULL;
+}
+
+/* Free a cached master, called when there are no longer the conditions for
+ * a partial resync on reconnection. */
+void replicationDiscardCachedMaster(void) {
+ if (server.cached_master == NULL) return;
+
+ serverLog(LL_NOTICE,"Discarding previously cached master state.");
+ server.cached_master->flags &= ~CLIENT_MASTER;
+ freeClient(server.cached_master);
+ server.cached_master = NULL;
+}
+
+/* Turn the cached master into the current master, using the file descriptor
+ * passed as argument as the socket for the new master.
+ *
+ * This function is called when successfully setup a partial resynchronization
+ * so the stream of data that we'll receive will start from where this
+ * master left. */
+void replicationResurrectCachedMaster(connection *conn) {
+ server.master = server.cached_master;
+ server.cached_master = NULL;
+ server.master->conn = conn;
+ connSetPrivateData(server.master->conn, server.master);
+ server.master->flags &= ~(CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP);
+ server.master->authenticated = 1;
+ server.master->lastinteraction = server.unixtime;
+ server.repl_state = REPL_STATE_CONNECTED;
+ server.repl_down_since = 0;
+
+ /* Fire the master link modules event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_MASTER_LINK_CHANGE,
+ REDISMODULE_SUBEVENT_MASTER_LINK_UP,
+ NULL);
+
+ /* Re-add to the list of clients. */
+ linkClient(server.master);
+ if (connSetReadHandler(server.master->conn, readQueryFromClient)) {
+ serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the readable handler: %s", strerror(errno));
+ freeClientAsync(server.master); /* Close ASAP. */
+ }
+
+ /* We may also need to install the write handler as well if there is
+ * pending data in the write buffers. */
+ if (clientHasPendingReplies(server.master)) {
+ if (connSetWriteHandler(server.master->conn, sendReplyToClient)) {
+ serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the writable handler: %s", strerror(errno));
+ freeClientAsync(server.master); /* Close ASAP. */
+ }
+ }
+}
+
+/* ------------------------- MIN-SLAVES-TO-WRITE --------------------------- */
+
+/* This function counts the number of slaves with lag <= min-slaves-max-lag.
+ * If the option is active, the server will prevent writes if there are not
+ * enough connected slaves with the specified lag (or less). */
+void refreshGoodSlavesCount(void) {
+ listIter li;
+ listNode *ln;
+ int good = 0;
+
+ if (!server.repl_min_slaves_to_write ||
+ !server.repl_min_slaves_max_lag) return;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ time_t lag = server.unixtime - slave->repl_ack_time;
+
+ if (slave->replstate == SLAVE_STATE_ONLINE &&
+ lag <= server.repl_min_slaves_max_lag) good++;
+ }
+ server.repl_good_slaves_count = good;
+}
+
+/* return true if status of good replicas is OK. otherwise false */
+int checkGoodReplicasStatus(void) {
+ return server.masterhost || /* not a primary status should be OK */
+ !server.repl_min_slaves_max_lag || /* Min slave max lag not configured */
+ !server.repl_min_slaves_to_write || /* Min slave to write not configured */
+ server.repl_good_slaves_count >= server.repl_min_slaves_to_write; /* check if we have enough slaves */
+}
+
+/* ----------------------- SYNCHRONOUS REPLICATION --------------------------
+ * Redis synchronous replication design can be summarized in points:
+ *
+ * - Redis masters have a global replication offset, used by PSYNC.
+ * - Master increment the offset every time new commands are sent to slaves.
+ * - Slaves ping back masters with the offset processed so far.
+ *
+ * So synchronous replication adds a new WAIT command in the form:
+ *
+ * WAIT <num_replicas> <milliseconds_timeout>
+ *
+ * That returns the number of replicas that processed the query when
+ * we finally have at least num_replicas, or when the timeout was
+ * reached.
+ *
+ * The command is implemented in this way:
+ *
+ * - Every time a client processes a command, we remember the replication
+ * offset after sending that command to the slaves.
+ * - When WAIT is called, we ask slaves to send an acknowledgement ASAP.
+ * The client is blocked at the same time (see blocked.c).
+ * - Once we receive enough ACKs for a given offset or when the timeout
+ * is reached, the WAIT command is unblocked and the reply sent to the
+ * client.
+ */
+
+/* This just set a flag so that we broadcast a REPLCONF GETACK command
+ * to all the slaves in the beforeSleep() function. Note that this way
+ * we "group" all the clients that want to wait for synchronous replication
+ * in a given event loop iteration, and send a single GETACK for them all. */
+void replicationRequestAckFromSlaves(void) {
+ server.get_ack_from_slaves = 1;
+}
+
+/* Return the number of slaves that already acknowledged the specified
+ * replication offset. */
+int replicationCountAcksByOffset(long long offset) {
+ listIter li;
+ listNode *ln;
+ int count = 0;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate != SLAVE_STATE_ONLINE) continue;
+ if (slave->repl_ack_off >= offset) count++;
+ }
+ return count;
+}
+
+/* Return the number of replicas that already acknowledged the specified
+ * replication offset being AOF fsynced. */
+int replicationCountAOFAcksByOffset(long long offset) {
+ listIter li;
+ listNode *ln;
+ int count = 0;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate != SLAVE_STATE_ONLINE) continue;
+ if (slave->repl_aof_off >= offset) count++;
+ }
+ return count;
+}
+
+/* WAIT for N replicas to acknowledge the processing of our latest
+ * write command (and all the previous commands). */
+void waitCommand(client *c) {
+ mstime_t timeout;
+ long numreplicas, ackreplicas;
+ long long offset = c->woff;
+
+ if (server.masterhost) {
+ addReplyError(c,"WAIT cannot be used with replica instances. Please also note that since Redis 4.0 if a replica is configured to be writable (which is not the default) writes to replicas are just local and are not propagated.");
+ return;
+ }
+
+ /* Argument parsing. */
+ if (getLongFromObjectOrReply(c,c->argv[1],&numreplicas,NULL) != C_OK)
+ return;
+ if (getTimeoutFromObjectOrReply(c,c->argv[2],&timeout,UNIT_MILLISECONDS)
+ != C_OK) return;
+
+ /* First try without blocking at all. */
+ ackreplicas = replicationCountAcksByOffset(c->woff);
+ if (ackreplicas >= numreplicas || c->flags & CLIENT_DENY_BLOCKING) {
+ addReplyLongLong(c,ackreplicas);
+ return;
+ }
+
+ /* Otherwise block the client and put it into our list of clients
+ * waiting for ack from slaves. */
+ blockForReplication(c,timeout,offset,numreplicas);
+
+ /* Make sure that the server will send an ACK request to all the slaves
+ * before returning to the event loop. */
+ replicationRequestAckFromSlaves();
+}
+
+/* WAIT for N replicas and / or local master to acknowledge our latest
+ * write command got synced to the disk. */
+void waitaofCommand(client *c) {
+ mstime_t timeout;
+ long numreplicas, numlocal, ackreplicas, acklocal;
+
+ /* Argument parsing. */
+ if (getRangeLongFromObjectOrReply(c,c->argv[1],0,1,&numlocal,NULL) != C_OK)
+ return;
+ if (getPositiveLongFromObjectOrReply(c,c->argv[2],&numreplicas,NULL) != C_OK)
+ return;
+ if (getTimeoutFromObjectOrReply(c,c->argv[3],&timeout,UNIT_MILLISECONDS) != C_OK)
+ return;
+
+ if (server.masterhost) {
+ addReplyError(c,"WAITAOF cannot be used with replica instances. Please also note that writes to replicas are just local and are not propagated.");
+ return;
+ }
+ if (numlocal && !server.aof_enabled) {
+ addReplyError(c, "WAITAOF cannot be used when numlocal is set but appendonly is disabled.");
+ return;
+ }
+
+ /* First try without blocking at all. */
+ ackreplicas = replicationCountAOFAcksByOffset(c->woff);
+ acklocal = server.fsynced_reploff >= c->woff;
+ if ((ackreplicas >= numreplicas && acklocal >= numlocal) || c->flags & CLIENT_DENY_BLOCKING) {
+ addReplyArrayLen(c,2);
+ addReplyLongLong(c,acklocal);
+ addReplyLongLong(c,ackreplicas);
+ return;
+ }
+
+ /* Otherwise block the client and put it into our list of clients
+ * waiting for ack from slaves. */
+ blockForAofFsync(c,timeout,c->woff,numlocal,numreplicas);
+
+ /* Make sure that the server will send an ACK request to all the slaves
+ * before returning to the event loop. */
+ replicationRequestAckFromSlaves();
+}
+
+/* This is called by unblockClient() to perform the blocking op type
+ * specific cleanup. We just remove the client from the list of clients
+ * waiting for replica acks. Never call it directly, call unblockClient()
+ * instead. */
+void unblockClientWaitingReplicas(client *c) {
+ listNode *ln = listSearchKey(server.clients_waiting_acks,c);
+ serverAssert(ln != NULL);
+ listDelNode(server.clients_waiting_acks,ln);
+ updateStatsOnUnblock(c, 0, 0, 0);
+}
+
+/* Check if there are clients blocked in WAIT or WAITAOF that can be unblocked
+ * since we received enough ACKs from slaves. */
+void processClientsWaitingReplicas(void) {
+ long long last_offset = 0;
+ long long last_aof_offset = 0;
+ int last_numreplicas = 0;
+ int last_aof_numreplicas = 0;
+
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.clients_waiting_acks,&li);
+ while((ln = listNext(&li))) {
+ int numlocal = 0;
+ int numreplicas = 0;
+
+ client *c = ln->value;
+ int is_wait_aof = c->bstate.btype == BLOCKED_WAITAOF;
+
+ if (is_wait_aof && c->bstate.numlocal && !server.aof_enabled) {
+ addReplyError(c, "WAITAOF cannot be used when numlocal is set but appendonly is disabled.");
+ unblockClient(c, 1);
+ continue;
+ }
+
+ /* Every time we find a client that is satisfied for a given
+ * offset and number of replicas, we remember it so the next client
+ * may be unblocked without calling replicationCountAcksByOffset()
+ * or calling replicationCountAOFAcksByOffset()
+ * if the requested offset / replicas were equal or less. */
+ if (!is_wait_aof && last_offset && last_offset >= c->bstate.reploffset &&
+ last_numreplicas >= c->bstate.numreplicas)
+ {
+ numreplicas = last_numreplicas;
+ } else if (is_wait_aof && last_aof_offset && last_aof_offset >= c->bstate.reploffset &&
+ last_aof_numreplicas >= c->bstate.numreplicas)
+ {
+ numreplicas = last_aof_numreplicas;
+ } else {
+ numreplicas = is_wait_aof ?
+ replicationCountAOFAcksByOffset(c->bstate.reploffset) :
+ replicationCountAcksByOffset(c->bstate.reploffset);
+
+ /* Check if the number of replicas is satisfied. */
+ if (numreplicas < c->bstate.numreplicas) continue;
+
+ if (is_wait_aof) {
+ last_aof_offset = c->bstate.reploffset;
+ last_aof_numreplicas = numreplicas;
+ } else {
+ last_offset = c->bstate.reploffset;
+ last_numreplicas = numreplicas;
+ }
+ }
+
+ /* Check if the local constraint of WAITAOF is served */
+ if (is_wait_aof) {
+ numlocal = server.fsynced_reploff >= c->bstate.reploffset;
+ if (numlocal < c->bstate.numlocal) continue;
+ }
+
+ /* Reply before unblocking, because unblock client calls reqresAppendResponse */
+ if (is_wait_aof) {
+ /* WAITAOF has an array reply */
+ addReplyArrayLen(c, 2);
+ addReplyLongLong(c, numlocal);
+ addReplyLongLong(c, numreplicas);
+ } else {
+ addReplyLongLong(c, numreplicas);
+ }
+
+ unblockClient(c, 1);
+ }
+}
+
+/* Return the slave replication offset for this instance, that is
+ * the offset for which we already processed the master replication stream. */
+long long replicationGetSlaveOffset(void) {
+ long long offset = 0;
+
+ if (server.masterhost != NULL) {
+ if (server.master) {
+ offset = server.master->reploff;
+ } else if (server.cached_master) {
+ offset = server.cached_master->reploff;
+ }
+ }
+ /* offset may be -1 when the master does not support it at all, however
+ * this function is designed to return an offset that can express the
+ * amount of data processed by the master, so we return a positive
+ * integer. */
+ if (offset < 0) offset = 0;
+ return offset;
+}
+
+/* --------------------------- REPLICATION CRON ---------------------------- */
+
+/* Replication cron function, called 1 time per second. */
+void replicationCron(void) {
+ static long long replication_cron_loops = 0;
+
+ /* Check failover status first, to see if we need to start
+ * handling the failover. */
+ updateFailoverStatus();
+
+ /* Non blocking connection timeout? */
+ if (server.masterhost &&
+ (server.repl_state == REPL_STATE_CONNECTING ||
+ slaveIsInHandshakeState()) &&
+ (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
+ {
+ serverLog(LL_WARNING,"Timeout connecting to the MASTER...");
+ cancelReplicationHandshake(1);
+ }
+
+ /* Bulk transfer I/O timeout? */
+ if (server.masterhost && server.repl_state == REPL_STATE_TRANSFER &&
+ (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
+ {
+ serverLog(LL_WARNING,"Timeout receiving bulk data from MASTER... If the problem persists try to set the 'repl-timeout' parameter in redis.conf to a larger value.");
+ cancelReplicationHandshake(1);
+ }
+
+ /* Timed out master when we are an already connected slave? */
+ if (server.masterhost && server.repl_state == REPL_STATE_CONNECTED &&
+ (time(NULL)-server.master->lastinteraction) > server.repl_timeout)
+ {
+ serverLog(LL_WARNING,"MASTER timeout: no data nor PING received...");
+ freeClient(server.master);
+ }
+
+ /* Check if we should connect to a MASTER */
+ if (server.repl_state == REPL_STATE_CONNECT) {
+ serverLog(LL_NOTICE,"Connecting to MASTER %s:%d",
+ server.masterhost, server.masterport);
+ connectWithMaster();
+ }
+
+ /* Send ACK to master from time to time.
+ * Note that we do not send periodic acks to masters that don't
+ * support PSYNC and replication offsets. */
+ if (server.masterhost && server.master &&
+ !(server.master->flags & CLIENT_PRE_PSYNC))
+ replicationSendAck();
+
+ /* If we have attached slaves, PING them from time to time.
+ * So slaves can implement an explicit timeout to masters, and will
+ * be able to detect a link disconnection even if the TCP connection
+ * will not actually go down. */
+ listIter li;
+ listNode *ln;
+ robj *ping_argv[1];
+
+ /* First, send PING according to ping_slave_period. */
+ if ((replication_cron_loops % server.repl_ping_slave_period) == 0 &&
+ listLength(server.slaves))
+ {
+ /* Note that we don't send the PING if the clients are paused during
+ * a Redis Cluster manual failover: the PING we send will otherwise
+ * alter the replication offsets of master and slave, and will no longer
+ * match the one stored into 'mf_master_offset' state. */
+ int manual_failover_in_progress =
+ ((server.cluster_enabled &&
+ server.cluster->mf_end) ||
+ server.failover_end_time) &&
+ isPausedActionsWithUpdate(PAUSE_ACTION_REPLICA);
+
+ if (!manual_failover_in_progress) {
+ ping_argv[0] = shared.ping;
+ replicationFeedSlaves(server.slaves, -1,
+ ping_argv, 1);
+ }
+ }
+
+ /* Second, send a newline to all the slaves in pre-synchronization
+ * stage, that is, slaves waiting for the master to create the RDB file.
+ *
+ * Also send the a newline to all the chained slaves we have, if we lost
+ * connection from our master, to keep the slaves aware that their
+ * master is online. This is needed since sub-slaves only receive proxied
+ * data from top-level masters, so there is no explicit pinging in order
+ * to avoid altering the replication offsets. This special out of band
+ * pings (newlines) can be sent, they will have no effect in the offset.
+ *
+ * The newline will be ignored by the slave but will refresh the
+ * last interaction timer preventing a timeout. In this case we ignore the
+ * ping period and refresh the connection once per second since certain
+ * timeouts are set at a few seconds (example: PSYNC response). */
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ int is_presync =
+ (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START ||
+ (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
+ server.rdb_child_type != RDB_CHILD_TYPE_SOCKET));
+
+ if (is_presync) {
+ connWrite(slave->conn, "\n", 1);
+ }
+ }
+
+ /* Disconnect timedout slaves. */
+ if (listLength(server.slaves)) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate == SLAVE_STATE_ONLINE) {
+ if (slave->flags & CLIENT_PRE_PSYNC)
+ continue;
+ if ((server.unixtime - slave->repl_ack_time) > server.repl_timeout) {
+ serverLog(LL_WARNING, "Disconnecting timedout replica (streaming sync): %s",
+ replicationGetSlaveName(slave));
+ freeClient(slave);
+ continue;
+ }
+ }
+ /* We consider disconnecting only diskless replicas because disk-based replicas aren't fed
+ * by the fork child so if a disk-based replica is stuck it doesn't prevent the fork child
+ * from terminating. */
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END && server.rdb_child_type == RDB_CHILD_TYPE_SOCKET) {
+ if (slave->repl_last_partial_write != 0 &&
+ (server.unixtime - slave->repl_last_partial_write) > server.repl_timeout)
+ {
+ serverLog(LL_WARNING, "Disconnecting timedout replica (full sync): %s",
+ replicationGetSlaveName(slave));
+ freeClient(slave);
+ continue;
+ }
+ }
+ }
+ }
+
+ /* If this is a master without attached slaves and there is a replication
+ * backlog active, in order to reclaim memory we can free it after some
+ * (configured) time. Note that this cannot be done for slaves: slaves
+ * without sub-slaves attached should still accumulate data into the
+ * backlog, in order to reply to PSYNC queries if they are turned into
+ * masters after a failover. */
+ if (listLength(server.slaves) == 0 && server.repl_backlog_time_limit &&
+ server.repl_backlog && server.masterhost == NULL)
+ {
+ time_t idle = server.unixtime - server.repl_no_slaves_since;
+
+ if (idle > server.repl_backlog_time_limit) {
+ /* When we free the backlog, we always use a new
+ * replication ID and clear the ID2. This is needed
+ * because when there is no backlog, the master_repl_offset
+ * is not updated, but we would still retain our replication
+ * ID, leading to the following problem:
+ *
+ * 1. We are a master instance.
+ * 2. Our slave is promoted to master. It's repl-id-2 will
+ * be the same as our repl-id.
+ * 3. We, yet as master, receive some updates, that will not
+ * increment the master_repl_offset.
+ * 4. Later we are turned into a slave, connect to the new
+ * master that will accept our PSYNC request by second
+ * replication ID, but there will be data inconsistency
+ * because we received writes. */
+ changeReplicationId();
+ clearReplicationId2();
+ freeReplicationBacklog();
+ serverLog(LL_NOTICE,
+ "Replication backlog freed after %d seconds "
+ "without connected replicas.",
+ (int) server.repl_backlog_time_limit);
+ }
+ }
+
+ replicationStartPendingFork();
+
+ /* Remove the RDB file used for replication if Redis is not running
+ * with any persistence. */
+ removeRDBUsedToSyncReplicas();
+
+ /* Sanity check replication buffer, the first block of replication buffer blocks
+ * must be referenced by someone, since it will be freed when not referenced,
+ * otherwise, server will OOM. also, its refcount must not be more than
+ * replicas number + 1(replication backlog). */
+ if (listLength(server.repl_buffer_blocks) > 0) {
+ replBufBlock *o = listNodeValue(listFirst(server.repl_buffer_blocks));
+ serverAssert(o->refcount > 0 &&
+ o->refcount <= (int)listLength(server.slaves)+1);
+ }
+
+ /* Refresh the number of slaves with lag <= min-slaves-max-lag. */
+ refreshGoodSlavesCount();
+ replication_cron_loops++; /* Incremented with frequency 1 HZ. */
+}
+
+int shouldStartChildReplication(int *mincapa_out, int *req_out) {
+ /* We should start a BGSAVE good for replication if we have slaves in
+ * WAIT_BGSAVE_START state.
+ *
+ * In case of diskless replication, we make sure to wait the specified
+ * number of seconds (according to configuration) so that other slaves
+ * have the time to arrive before we start streaming. */
+ if (!hasActiveChildProcess()) {
+ time_t idle, max_idle = 0;
+ int slaves_waiting = 0;
+ int mincapa;
+ int req;
+ int first = 1;
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ if (first) {
+ /* Get first slave's requirements */
+ req = slave->slave_req;
+ } else if (req != slave->slave_req) {
+ /* Skip slaves that don't match */
+ continue;
+ }
+ idle = server.unixtime - slave->lastinteraction;
+ if (idle > max_idle) max_idle = idle;
+ slaves_waiting++;
+ mincapa = first ? slave->slave_capa : (mincapa & slave->slave_capa);
+ first = 0;
+ }
+ }
+
+ if (slaves_waiting &&
+ (!server.repl_diskless_sync ||
+ (server.repl_diskless_sync_max_replicas > 0 &&
+ slaves_waiting >= server.repl_diskless_sync_max_replicas) ||
+ max_idle >= server.repl_diskless_sync_delay))
+ {
+ if (mincapa_out)
+ *mincapa_out = mincapa;
+ if (req_out)
+ *req_out = req;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+void replicationStartPendingFork(void) {
+ int mincapa = -1;
+ int req = -1;
+
+ if (shouldStartChildReplication(&mincapa, &req)) {
+ /* Start the BGSAVE. The called function may start a
+ * BGSAVE with socket target or disk target depending on the
+ * configuration and slaves capabilities and requirements. */
+ startBgsaveForReplication(mincapa, req);
+ }
+}
+
+/* Find replica at IP:PORT from replica list */
+static client *findReplica(char *host, int port) {
+ listIter li;
+ listNode *ln;
+ client *replica;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ replica = ln->value;
+ char ip[NET_IP_STR_LEN], *replicaip = replica->slave_addr;
+
+ if (!replicaip) {
+ if (connAddrPeerName(replica->conn, ip, sizeof(ip), NULL) == -1)
+ continue;
+ replicaip = ip;
+ }
+
+ if (!strcasecmp(host, replicaip) &&
+ (port == replica->slave_listening_port))
+ return replica;
+ }
+
+ return NULL;
+}
+
+const char *getFailoverStateString(void) {
+ switch(server.failover_state) {
+ case NO_FAILOVER: return "no-failover";
+ case FAILOVER_IN_PROGRESS: return "failover-in-progress";
+ case FAILOVER_WAIT_FOR_SYNC: return "waiting-for-sync";
+ default: return "unknown";
+ }
+}
+
+/* Resets the internal failover configuration, this needs
+ * to be called after a failover either succeeds or fails
+ * as it includes the client unpause. */
+void clearFailoverState(void) {
+ server.failover_end_time = 0;
+ server.force_failover = 0;
+ zfree(server.target_replica_host);
+ server.target_replica_host = NULL;
+ server.target_replica_port = 0;
+ server.failover_state = NO_FAILOVER;
+ unpauseActions(PAUSE_DURING_FAILOVER);
+}
+
+/* Abort an ongoing failover if one is going on. */
+void abortFailover(const char *err) {
+ if (server.failover_state == NO_FAILOVER) return;
+
+ if (server.target_replica_host) {
+ serverLog(LL_NOTICE,"FAILOVER to %s:%d aborted: %s",
+ server.target_replica_host,server.target_replica_port,err);
+ } else {
+ serverLog(LL_NOTICE,"FAILOVER to any replica aborted: %s",err);
+ }
+ if (server.failover_state == FAILOVER_IN_PROGRESS) {
+ replicationUnsetMaster();
+ }
+ clearFailoverState();
+}
+
+/*
+ * FAILOVER [TO <HOST> <PORT> [FORCE]] [ABORT] [TIMEOUT <timeout>]
+ *
+ * This command will coordinate a failover between the master and one
+ * of its replicas. The happy path contains the following steps:
+ * 1) The master will initiate a client pause write, to stop replication
+ * traffic.
+ * 2) The master will periodically check if any of its replicas has
+ * consumed the entire replication stream through acks.
+ * 3) Once any replica has caught up, the master will itself become a replica.
+ * 4) The master will send a PSYNC FAILOVER request to the target replica, which
+ * if accepted will cause the replica to become the new master and start a sync.
+ *
+ * FAILOVER ABORT is the only way to abort a failover command, as replicaof
+ * will be disabled. This may be needed if the failover is unable to progress.
+ *
+ * The optional arguments [TO <HOST> <IP>] allows designating a specific replica
+ * to be failed over to.
+ *
+ * FORCE flag indicates that even if the target replica is not caught up,
+ * failover to it anyway. This must be specified with a timeout and a target
+ * HOST and IP.
+ *
+ * TIMEOUT <timeout> indicates how long should the primary wait for
+ * a replica to sync up before aborting. If not specified, the failover
+ * will attempt forever and must be manually aborted.
+ */
+void failoverCommand(client *c) {
+ if (server.cluster_enabled) {
+ addReplyError(c,"FAILOVER not allowed in cluster mode. "
+ "Use CLUSTER FAILOVER command instead.");
+ return;
+ }
+
+ /* Handle special case for abort */
+ if ((c->argc == 2) && !strcasecmp(c->argv[1]->ptr,"abort")) {
+ if (server.failover_state == NO_FAILOVER) {
+ addReplyError(c, "No failover in progress.");
+ return;
+ }
+
+ abortFailover("Failover manually aborted");
+ addReply(c,shared.ok);
+ return;
+ }
+
+ long timeout_in_ms = 0;
+ int force_flag = 0;
+ long port = 0;
+ char *host = NULL;
+
+ /* Parse the command for syntax and arguments. */
+ for (int j = 1; j < c->argc; j++) {
+ if (!strcasecmp(c->argv[j]->ptr,"timeout") && (j + 1 < c->argc) &&
+ timeout_in_ms == 0)
+ {
+ if (getLongFromObjectOrReply(c,c->argv[j + 1],
+ &timeout_in_ms,NULL) != C_OK) return;
+ if (timeout_in_ms <= 0) {
+ addReplyError(c,"FAILOVER timeout must be greater than 0");
+ return;
+ }
+ j++;
+ } else if (!strcasecmp(c->argv[j]->ptr,"to") && (j + 2 < c->argc) &&
+ !host)
+ {
+ if (getLongFromObjectOrReply(c,c->argv[j + 2],&port,NULL) != C_OK)
+ return;
+ host = c->argv[j + 1]->ptr;
+ j += 2;
+ } else if (!strcasecmp(c->argv[j]->ptr,"force") && !force_flag) {
+ force_flag = 1;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ if (server.failover_state != NO_FAILOVER) {
+ addReplyError(c,"FAILOVER already in progress.");
+ return;
+ }
+
+ if (server.masterhost) {
+ addReplyError(c,"FAILOVER is not valid when server is a replica.");
+ return;
+ }
+
+ if (listLength(server.slaves) == 0) {
+ addReplyError(c,"FAILOVER requires connected replicas.");
+ return;
+ }
+
+ if (force_flag && (!timeout_in_ms || !host)) {
+ addReplyError(c,"FAILOVER with force option requires both a timeout "
+ "and target HOST and IP.");
+ return;
+ }
+
+ /* If a replica address was provided, validate that it is connected. */
+ if (host) {
+ client *replica = findReplica(host, port);
+
+ if (replica == NULL) {
+ addReplyError(c,"FAILOVER target HOST and PORT is not "
+ "a replica.");
+ return;
+ }
+
+ /* Check if requested replica is online */
+ if (replica->replstate != SLAVE_STATE_ONLINE) {
+ addReplyError(c,"FAILOVER target replica is not online.");
+ return;
+ }
+
+ server.target_replica_host = zstrdup(host);
+ server.target_replica_port = port;
+ serverLog(LL_NOTICE,"FAILOVER requested to %s:%ld.",host,port);
+ } else {
+ serverLog(LL_NOTICE,"FAILOVER requested to any replica.");
+ }
+
+ mstime_t now = commandTimeSnapshot();
+ if (timeout_in_ms) {
+ server.failover_end_time = now + timeout_in_ms;
+ }
+
+ server.force_failover = force_flag;
+ server.failover_state = FAILOVER_WAIT_FOR_SYNC;
+ /* Cluster failover will unpause eventually */
+ pauseActions(PAUSE_DURING_FAILOVER,
+ LLONG_MAX,
+ PAUSE_ACTIONS_CLIENT_WRITE_SET);
+ addReply(c,shared.ok);
+}
+
+/* Failover cron function, checks coordinated failover state.
+ *
+ * Implementation note: The current implementation calls replicationSetMaster()
+ * to start the failover request, this has some unintended side effects if the
+ * failover doesn't work like blocked clients will be unblocked and replicas will
+ * be disconnected. This could be optimized further.
+ */
+void updateFailoverStatus(void) {
+ if (server.failover_state != FAILOVER_WAIT_FOR_SYNC) return;
+ mstime_t now = server.mstime;
+
+ /* Check if failover operation has timed out */
+ if (server.failover_end_time && server.failover_end_time <= now) {
+ if (server.force_failover) {
+ serverLog(LL_NOTICE,
+ "FAILOVER to %s:%d time out exceeded, failing over.",
+ server.target_replica_host, server.target_replica_port);
+ server.failover_state = FAILOVER_IN_PROGRESS;
+ /* If timeout has expired force a failover if requested. */
+ replicationSetMaster(server.target_replica_host,
+ server.target_replica_port);
+ return;
+ } else {
+ /* Force was not requested, so timeout. */
+ abortFailover("Replica never caught up before timeout");
+ return;
+ }
+ }
+
+ /* Check to see if the replica has caught up so failover can start */
+ client *replica = NULL;
+ if (server.target_replica_host) {
+ replica = findReplica(server.target_replica_host,
+ server.target_replica_port);
+ } else {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ /* Find any replica that has matched our repl_offset */
+ while((ln = listNext(&li))) {
+ replica = ln->value;
+ if (replica->repl_ack_off == server.master_repl_offset) {
+ char ip[NET_IP_STR_LEN], *replicaaddr = replica->slave_addr;
+
+ if (!replicaaddr) {
+ if (connAddrPeerName(replica->conn,ip,sizeof(ip),NULL) == -1)
+ continue;
+ replicaaddr = ip;
+ }
+
+ /* We are now failing over to this specific node */
+ server.target_replica_host = zstrdup(replicaaddr);
+ server.target_replica_port = replica->slave_listening_port;
+ break;
+ }
+ }
+ }
+
+ /* We've found a replica that is caught up */
+ if (replica && (replica->repl_ack_off == server.master_repl_offset)) {
+ server.failover_state = FAILOVER_IN_PROGRESS;
+ serverLog(LL_NOTICE,
+ "Failover target %s:%d is synced, failing over.",
+ server.target_replica_host, server.target_replica_port);
+ /* Designated replica is caught up, failover to it. */
+ replicationSetMaster(server.target_replica_host,
+ server.target_replica_port);
+ }
+}
diff --git a/src/resp_parser.c b/src/resp_parser.c
new file mode 100644
index 0000000..b92a74c
--- /dev/null
+++ b/src/resp_parser.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2009-2021, Redis Labs Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* ----------------------------------------------------------------------------------------
+ * A RESP parser for parsing replies returned by RM_Call or Lua's
+ * 'redis.call()'.
+ *
+ * The parser introduces callbacks that need to be set by the user. Each
+ * callback represents a different reply type. Each callback gets a p_ctx that
+ * was given to the parseReply function. The callbacks also give the protocol
+ * (underlying blob) of the current reply and the size.
+ *
+ * Some callbacks also get the parser object itself:
+ * - array_callback
+ * - set_callback
+ * - map_callback
+ *
+ * These callbacks need to continue parsing by calling parseReply a number of
+ * times, according to the supplied length. Subsequent parseReply calls may use
+ * a different p_ctx, which will be used for nested CallReply objects.
+ *
+ * These callbacks also do not receive a proto_len, which is not known at the
+ * time of parsing. Callers may calculate it themselves after parsing the
+ * entire collection.
+ *
+ * NOTE: This parser is designed to only handle replies generated by Redis
+ * itself. It does not perform many required validations and thus NOT SAFE FOR
+ * PARSING USER INPUT.
+ * ----------------------------------------------------------------------------------------
+ */
+
+#include "resp_parser.h"
+#include "server.h"
+
+static int parseBulk(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ long long bulklen;
+ parser->curr_location = p + 2; /* for \r\n */
+
+ string2ll(proto+1,p-proto-1,&bulklen);
+ if (bulklen == -1) {
+ parser->callbacks.null_bulk_string_callback(p_ctx, proto, parser->curr_location - proto);
+ } else {
+ const char *str = parser->curr_location;
+ parser->curr_location += bulklen;
+ parser->curr_location += 2; /* for \r\n */
+ parser->callbacks.bulk_string_callback(p_ctx, str, bulklen, proto, parser->curr_location - proto);
+ }
+
+ return C_OK;
+}
+
+static int parseSimpleString(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ parser->curr_location = p + 2; /* for \r\n */
+ parser->callbacks.simple_str_callback(p_ctx, proto+1, p-proto-1, proto, parser->curr_location - proto);
+ return C_OK;
+}
+
+static int parseError(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ parser->curr_location = p + 2; // for \r\n
+ parser->callbacks.error_callback(p_ctx, proto+1, p-proto-1, proto, parser->curr_location - proto);
+ return C_OK;
+}
+
+static int parseLong(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ parser->curr_location = p + 2; /* for \r\n */
+ long long val;
+ string2ll(proto+1,p-proto-1,&val);
+ parser->callbacks.long_callback(p_ctx, val, proto, parser->curr_location - proto);
+ return C_OK;
+}
+
+static int parseAttributes(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ long long len;
+ string2ll(proto+1,p-proto-1,&len);
+ p += 2;
+ parser->curr_location = p;
+ parser->callbacks.attribute_callback(parser, p_ctx, len, proto);
+ return C_OK;
+}
+
+static int parseVerbatimString(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ long long bulklen;
+ parser->curr_location = p + 2; /* for \r\n */
+ string2ll(proto+1,p-proto-1,&bulklen);
+ const char *format = parser->curr_location;
+ parser->curr_location += bulklen;
+ parser->curr_location += 2; /* for \r\n */
+ parser->callbacks.verbatim_string_callback(p_ctx, format, format + 4, bulklen - 4, proto, parser->curr_location - proto);
+ return C_OK;
+}
+
+static int parseBigNumber(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ parser->curr_location = p + 2; /* for \r\n */
+ parser->callbacks.big_number_callback(p_ctx, proto+1, p-proto-1, proto, parser->curr_location - proto);
+ return C_OK;
+}
+
+static int parseNull(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ parser->curr_location = p + 2; /* for \r\n */
+ parser->callbacks.null_callback(p_ctx, proto, parser->curr_location - proto);
+ return C_OK;
+}
+
+static int parseDouble(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ parser->curr_location = p + 2; /* for \r\n */
+ char buf[MAX_LONG_DOUBLE_CHARS+1];
+ size_t len = p-proto-1;
+ double d;
+ if (len <= MAX_LONG_DOUBLE_CHARS) {
+ memcpy(buf,proto+1,len);
+ buf[len] = '\0';
+ d = strtod(buf,NULL); /* We expect a valid representation. */
+ } else {
+ d = 0;
+ }
+ parser->callbacks.double_callback(p_ctx, d, proto, parser->curr_location - proto);
+ return C_OK;
+}
+
+static int parseBool(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ parser->curr_location = p + 2; /* for \r\n */
+ parser->callbacks.bool_callback(p_ctx, proto[1] == 't', proto, parser->curr_location - proto);
+ return C_OK;
+}
+
+static int parseArray(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ long long len;
+ string2ll(proto+1,p-proto-1,&len);
+ p += 2;
+ parser->curr_location = p;
+ if (len == -1) {
+ parser->callbacks.null_array_callback(p_ctx, proto, parser->curr_location - proto);
+ } else {
+ parser->callbacks.array_callback(parser, p_ctx, len, proto);
+ }
+ return C_OK;
+}
+
+static int parseSet(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ long long len;
+ string2ll(proto+1,p-proto-1,&len);
+ p += 2;
+ parser->curr_location = p;
+ parser->callbacks.set_callback(parser, p_ctx, len, proto);
+ return C_OK;
+}
+
+static int parseMap(ReplyParser *parser, void *p_ctx) {
+ const char *proto = parser->curr_location;
+ char *p = strchr(proto+1,'\r');
+ long long len;
+ string2ll(proto+1,p-proto-1,&len);
+ p += 2;
+ parser->curr_location = p;
+ parser->callbacks.map_callback(parser, p_ctx, len, proto);
+ return C_OK;
+}
+
+/* Parse a reply pointed to by parser->curr_location. */
+int parseReply(ReplyParser *parser, void *p_ctx) {
+ switch (parser->curr_location[0]) {
+ case '$': return parseBulk(parser, p_ctx);
+ case '+': return parseSimpleString(parser, p_ctx);
+ case '-': return parseError(parser, p_ctx);
+ case ':': return parseLong(parser, p_ctx);
+ case '*': return parseArray(parser, p_ctx);
+ case '~': return parseSet(parser, p_ctx);
+ case '%': return parseMap(parser, p_ctx);
+ case '#': return parseBool(parser, p_ctx);
+ case ',': return parseDouble(parser, p_ctx);
+ case '_': return parseNull(parser, p_ctx);
+ case '(': return parseBigNumber(parser, p_ctx);
+ case '=': return parseVerbatimString(parser, p_ctx);
+ case '|': return parseAttributes(parser, p_ctx);
+ default: if (parser->callbacks.error) parser->callbacks.error(p_ctx);
+ }
+ return C_ERR;
+}
diff --git a/src/resp_parser.h b/src/resp_parser.h
new file mode 100644
index 0000000..0b5c8e2
--- /dev/null
+++ b/src/resp_parser.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021, Redis Labs Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SRC_RESP_PARSER_H_
+#define SRC_RESP_PARSER_H_
+
+#include <stddef.h>
+
+typedef struct ReplyParser ReplyParser;
+
+typedef struct ReplyParserCallbacks {
+ /* Called when the parser reaches an empty mbulk ('*-1') */
+ void (*null_array_callback)(void *ctx, const char *proto, size_t proto_len);
+
+ /* Called when the parser reaches an empty bulk ('$-1') (bulk len is -1) */
+ void (*null_bulk_string_callback)(void *ctx, const char *proto, size_t proto_len);
+
+ /* Called when the parser reaches a bulk ('$'), which is passed as 'str' along with its length 'len' */
+ void (*bulk_string_callback)(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len);
+
+ /* Called when the parser reaches an error ('-'), which is passed as 'str' along with its length 'len' */
+ void (*error_callback)(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len);
+
+ /* Called when the parser reaches a simple string ('+'), which is passed as 'str' along with its length 'len' */
+ void (*simple_str_callback)(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len);
+
+ /* Called when the parser reaches a long long value (':'), which is passed as an argument 'val' */
+ void (*long_callback)(void *ctx, long long val, const char *proto, size_t proto_len);
+
+ /* Called when the parser reaches an array ('*'). The array length is passed as an argument 'len' */
+ void (*array_callback)(struct ReplyParser *parser, void *ctx, size_t len, const char *proto);
+
+ /* Called when the parser reaches a set ('~'). The set length is passed as an argument 'len' */
+ void (*set_callback)(struct ReplyParser *parser, void *ctx, size_t len, const char *proto);
+
+ /* Called when the parser reaches a map ('%'). The map length is passed as an argument 'len' */
+ void (*map_callback)(struct ReplyParser *parser, void *ctx, size_t len, const char *proto);
+
+ /* Called when the parser reaches a bool ('#'), which is passed as an argument 'val' */
+ void (*bool_callback)(void *ctx, int val, const char *proto, size_t proto_len);
+
+ /* Called when the parser reaches a double (','), which is passed as an argument 'val' */
+ void (*double_callback)(void *ctx, double val, const char *proto, size_t proto_len);
+
+ /* Called when the parser reaches a big number ('('), which is passed as 'str' along with its length 'len' */
+ void (*big_number_callback)(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len);
+
+ /* Called when the parser reaches a string ('='), which is passed as 'str' along with its 'format' and length 'len' */
+ void (*verbatim_string_callback)(void *ctx, const char *format, const char *str, size_t len, const char *proto, size_t proto_len);
+
+ /* Called when the parser reaches an attribute ('|'). The attribute length is passed as an argument 'len' */
+ void (*attribute_callback)(struct ReplyParser *parser, void *ctx, size_t len, const char *proto);
+
+ /* Called when the parser reaches a null ('_') */
+ void (*null_callback)(void *ctx, const char *proto, size_t proto_len);
+
+ void (*error)(void *ctx);
+} ReplyParserCallbacks;
+
+struct ReplyParser {
+ /* The current location in the reply buffer, needs to be set to the beginning of the reply */
+ const char *curr_location;
+ ReplyParserCallbacks callbacks;
+};
+
+int parseReply(ReplyParser *parser, void *p_ctx);
+
+#endif /* SRC_RESP_PARSER_H_ */
diff --git a/src/rio.c b/src/rio.c
new file mode 100644
index 0000000..eaf88d2
--- /dev/null
+++ b/src/rio.c
@@ -0,0 +1,520 @@
+/* rio.c is a simple stream-oriented I/O abstraction that provides an interface
+ * to write code that can consume/produce data using different concrete input
+ * and output devices. For instance the same rdb.c code using the rio
+ * abstraction can be used to read and write the RDB format using in-memory
+ * buffers or files.
+ *
+ * A rio object provides the following methods:
+ * read: read from stream.
+ * write: write to stream.
+ * tell: get the current offset.
+ *
+ * It is also possible to set a 'checksum' method that is used by rio.c in order
+ * to compute a checksum of the data written or read, or to query the rio object
+ * for the current checksum.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "fmacros.h"
+#include "fpconv_dtoa.h"
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "rio.h"
+#include "util.h"
+#include "crc64.h"
+#include "config.h"
+#include "server.h"
+
+/* ------------------------- Buffer I/O implementation ----------------------- */
+
+/* Returns 1 or 0 for success/failure. */
+static size_t rioBufferWrite(rio *r, const void *buf, size_t len) {
+ r->io.buffer.ptr = sdscatlen(r->io.buffer.ptr,(char*)buf,len);
+ r->io.buffer.pos += len;
+ return 1;
+}
+
+/* Returns 1 or 0 for success/failure. */
+static size_t rioBufferRead(rio *r, void *buf, size_t len) {
+ if (sdslen(r->io.buffer.ptr)-r->io.buffer.pos < len)
+ return 0; /* not enough buffer to return len bytes. */
+ memcpy(buf,r->io.buffer.ptr+r->io.buffer.pos,len);
+ r->io.buffer.pos += len;
+ return 1;
+}
+
+/* Returns read/write position in buffer. */
+static off_t rioBufferTell(rio *r) {
+ return r->io.buffer.pos;
+}
+
+/* Flushes any buffer to target device if applicable. Returns 1 on success
+ * and 0 on failures. */
+static int rioBufferFlush(rio *r) {
+ UNUSED(r);
+ return 1; /* Nothing to do, our write just appends to the buffer. */
+}
+
+static const rio rioBufferIO = {
+ rioBufferRead,
+ rioBufferWrite,
+ rioBufferTell,
+ rioBufferFlush,
+ NULL, /* update_checksum */
+ 0, /* current checksum */
+ 0, /* flags */
+ 0, /* bytes read or written */
+ 0, /* read/write chunk size */
+ { { NULL, 0 } } /* union for io-specific vars */
+};
+
+void rioInitWithBuffer(rio *r, sds s) {
+ *r = rioBufferIO;
+ r->io.buffer.ptr = s;
+ r->io.buffer.pos = 0;
+}
+
+/* --------------------- Stdio file pointer implementation ------------------- */
+
+/* Returns 1 or 0 for success/failure. */
+static size_t rioFileWrite(rio *r, const void *buf, size_t len) {
+ if (!r->io.file.autosync) return fwrite(buf,len,1,r->io.file.fp);
+
+ size_t nwritten = 0;
+ /* Incrementally write data to the file, avoid a single write larger than
+ * the autosync threshold (so that the kernel's buffer cache never has too
+ * many dirty pages at once). */
+ while (len != nwritten) {
+ serverAssert(r->io.file.autosync > r->io.file.buffered);
+ size_t nalign = (size_t)(r->io.file.autosync - r->io.file.buffered);
+ size_t towrite = nalign > len-nwritten ? len-nwritten : nalign;
+
+ if (fwrite((char*)buf+nwritten,towrite,1,r->io.file.fp) == 0) return 0;
+ nwritten += towrite;
+ r->io.file.buffered += towrite;
+
+ if (r->io.file.buffered >= r->io.file.autosync) {
+ fflush(r->io.file.fp);
+
+ size_t processed = r->processed_bytes + nwritten;
+ serverAssert(processed % r->io.file.autosync == 0);
+ serverAssert(r->io.file.buffered == r->io.file.autosync);
+
+#if HAVE_SYNC_FILE_RANGE
+ /* Start writeout asynchronously. */
+ if (sync_file_range(fileno(r->io.file.fp),
+ processed - r->io.file.autosync, r->io.file.autosync,
+ SYNC_FILE_RANGE_WRITE) == -1)
+ return 0;
+
+ if (processed >= (size_t)r->io.file.autosync * 2) {
+ /* To keep the promise to 'autosync', we should make sure last
+ * asynchronous writeout persists into disk. This call may block
+ * if last writeout is not finished since disk is slow. */
+ if (sync_file_range(fileno(r->io.file.fp),
+ processed - r->io.file.autosync*2,
+ r->io.file.autosync, SYNC_FILE_RANGE_WAIT_BEFORE|
+ SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER) == -1)
+ return 0;
+ }
+#else
+ if (redis_fsync(fileno(r->io.file.fp)) == -1) return 0;
+#endif
+ if (r->io.file.reclaim_cache) {
+ /* In Linux sync_file_range just issue a writeback request to
+ * OS, and when posix_fadvise is called, the dirty page may
+ * still be in flushing, which means it would be ignored by
+ * posix_fadvise.
+ *
+ * So we posix_fadvise the whole file, and the writeback-ed
+ * pages will have other chances to be reclaimed. */
+ reclaimFilePageCache(fileno(r->io.file.fp), 0, 0);
+ }
+ r->io.file.buffered = 0;
+ }
+ }
+ return 1;
+}
+
+/* Returns 1 or 0 for success/failure. */
+static size_t rioFileRead(rio *r, void *buf, size_t len) {
+ return fread(buf,len,1,r->io.file.fp);
+}
+
+/* Returns read/write position in file. */
+static off_t rioFileTell(rio *r) {
+ return ftello(r->io.file.fp);
+}
+
+/* Flushes any buffer to target device if applicable. Returns 1 on success
+ * and 0 on failures. */
+static int rioFileFlush(rio *r) {
+ return (fflush(r->io.file.fp) == 0) ? 1 : 0;
+}
+
+static const rio rioFileIO = {
+ rioFileRead,
+ rioFileWrite,
+ rioFileTell,
+ rioFileFlush,
+ NULL, /* update_checksum */
+ 0, /* current checksum */
+ 0, /* flags */
+ 0, /* bytes read or written */
+ 0, /* read/write chunk size */
+ { { NULL, 0 } } /* union for io-specific vars */
+};
+
+void rioInitWithFile(rio *r, FILE *fp) {
+ *r = rioFileIO;
+ r->io.file.fp = fp;
+ r->io.file.buffered = 0;
+ r->io.file.autosync = 0;
+ r->io.file.reclaim_cache = 0;
+}
+
+/* ------------------- Connection implementation -------------------
+ * We use this RIO implementation when reading an RDB file directly from
+ * the connection to the memory via rdbLoadRio(), thus this implementation
+ * only implements reading from a connection that is, normally,
+ * just a socket. */
+
+static size_t rioConnWrite(rio *r, const void *buf, size_t len) {
+ UNUSED(r);
+ UNUSED(buf);
+ UNUSED(len);
+ return 0; /* Error, this target does not yet support writing. */
+}
+
+/* Returns 1 or 0 for success/failure. */
+static size_t rioConnRead(rio *r, void *buf, size_t len) {
+ size_t avail = sdslen(r->io.conn.buf)-r->io.conn.pos;
+
+ /* If the buffer is too small for the entire request: realloc. */
+ if (sdslen(r->io.conn.buf) + sdsavail(r->io.conn.buf) < len)
+ r->io.conn.buf = sdsMakeRoomFor(r->io.conn.buf, len - sdslen(r->io.conn.buf));
+
+ /* If the remaining unused buffer is not large enough: memmove so that we
+ * can read the rest. */
+ if (len > avail && sdsavail(r->io.conn.buf) < len - avail) {
+ sdsrange(r->io.conn.buf, r->io.conn.pos, -1);
+ r->io.conn.pos = 0;
+ }
+
+ /* Make sure the caller didn't request to read past the limit.
+ * If they didn't we'll buffer till the limit, if they did, we'll
+ * return an error. */
+ if (r->io.conn.read_limit != 0 && r->io.conn.read_limit < r->io.conn.read_so_far + len) {
+ errno = EOVERFLOW;
+ return 0;
+ }
+
+ /* If we don't already have all the data in the sds, read more */
+ while (len > sdslen(r->io.conn.buf) - r->io.conn.pos) {
+ size_t buffered = sdslen(r->io.conn.buf) - r->io.conn.pos;
+ size_t needs = len - buffered;
+ /* Read either what's missing, or PROTO_IOBUF_LEN, the bigger of
+ * the two. */
+ size_t toread = needs < PROTO_IOBUF_LEN ? PROTO_IOBUF_LEN: needs;
+ if (toread > sdsavail(r->io.conn.buf)) toread = sdsavail(r->io.conn.buf);
+ if (r->io.conn.read_limit != 0 &&
+ r->io.conn.read_so_far + buffered + toread > r->io.conn.read_limit)
+ {
+ toread = r->io.conn.read_limit - r->io.conn.read_so_far - buffered;
+ }
+ int retval = connRead(r->io.conn.conn,
+ (char*)r->io.conn.buf + sdslen(r->io.conn.buf),
+ toread);
+ if (retval == 0) {
+ return 0;
+ } else if (retval < 0) {
+ if (connLastErrorRetryable(r->io.conn.conn)) continue;
+ if (errno == EWOULDBLOCK) errno = ETIMEDOUT;
+ return 0;
+ }
+ sdsIncrLen(r->io.conn.buf, retval);
+ }
+
+ memcpy(buf, (char*)r->io.conn.buf + r->io.conn.pos, len);
+ r->io.conn.read_so_far += len;
+ r->io.conn.pos += len;
+ return len;
+}
+
+/* Returns read/write position in file. */
+static off_t rioConnTell(rio *r) {
+ return r->io.conn.read_so_far;
+}
+
+/* Flushes any buffer to target device if applicable. Returns 1 on success
+ * and 0 on failures. */
+static int rioConnFlush(rio *r) {
+ /* Our flush is implemented by the write method, that recognizes a
+ * buffer set to NULL with a count of zero as a flush request. */
+ return rioConnWrite(r,NULL,0);
+}
+
+static const rio rioConnIO = {
+ rioConnRead,
+ rioConnWrite,
+ rioConnTell,
+ rioConnFlush,
+ NULL, /* update_checksum */
+ 0, /* current checksum */
+ 0, /* flags */
+ 0, /* bytes read or written */
+ 0, /* read/write chunk size */
+ { { NULL, 0 } } /* union for io-specific vars */
+};
+
+/* Create an RIO that implements a buffered read from an fd
+ * read_limit argument stops buffering when the reaching the limit. */
+void rioInitWithConn(rio *r, connection *conn, size_t read_limit) {
+ *r = rioConnIO;
+ r->io.conn.conn = conn;
+ r->io.conn.pos = 0;
+ r->io.conn.read_limit = read_limit;
+ r->io.conn.read_so_far = 0;
+ r->io.conn.buf = sdsnewlen(NULL, PROTO_IOBUF_LEN);
+ sdsclear(r->io.conn.buf);
+}
+
+/* Release the RIO stream. Optionally returns the unread buffered data
+ * when the SDS pointer 'remaining' is passed. */
+void rioFreeConn(rio *r, sds *remaining) {
+ if (remaining && (size_t)r->io.conn.pos < sdslen(r->io.conn.buf)) {
+ if (r->io.conn.pos > 0) sdsrange(r->io.conn.buf, r->io.conn.pos, -1);
+ *remaining = r->io.conn.buf;
+ } else {
+ sdsfree(r->io.conn.buf);
+ if (remaining) *remaining = NULL;
+ }
+ r->io.conn.buf = NULL;
+}
+
+/* ------------------- File descriptor implementation ------------------
+ * This target is used to write the RDB file to pipe, when the master just
+ * streams the data to the replicas without creating an RDB on-disk image
+ * (diskless replication option).
+ * It only implements writes. */
+
+/* Returns 1 or 0 for success/failure.
+ *
+ * When buf is NULL and len is 0, the function performs a flush operation
+ * if there is some pending buffer, so this function is also used in order
+ * to implement rioFdFlush(). */
+static size_t rioFdWrite(rio *r, const void *buf, size_t len) {
+ ssize_t retval;
+ unsigned char *p = (unsigned char*) buf;
+ int doflush = (buf == NULL && len == 0);
+
+ /* For small writes, we rather keep the data in user-space buffer, and flush
+ * it only when it grows. however for larger writes, we prefer to flush
+ * any pre-existing buffer, and write the new one directly without reallocs
+ * and memory copying. */
+ if (len > PROTO_IOBUF_LEN) {
+ /* First, flush any pre-existing buffered data. */
+ if (sdslen(r->io.fd.buf)) {
+ if (rioFdWrite(r, NULL, 0) == 0)
+ return 0;
+ }
+ /* Write the new data, keeping 'p' and 'len' from the input. */
+ } else {
+ if (len) {
+ r->io.fd.buf = sdscatlen(r->io.fd.buf,buf,len);
+ if (sdslen(r->io.fd.buf) > PROTO_IOBUF_LEN)
+ doflush = 1;
+ if (!doflush)
+ return 1;
+ }
+ /* Flushing the buffered data. set 'p' and 'len' accordingly. */
+ p = (unsigned char*) r->io.fd.buf;
+ len = sdslen(r->io.fd.buf);
+ }
+
+ size_t nwritten = 0;
+ while(nwritten != len) {
+ retval = write(r->io.fd.fd,p+nwritten,len-nwritten);
+ if (retval <= 0) {
+ if (retval == -1 && errno == EINTR) continue;
+ /* With blocking io, which is the sole user of this
+ * rio target, EWOULDBLOCK is returned only because of
+ * the SO_SNDTIMEO socket option, so we translate the error
+ * into one more recognizable by the user. */
+ if (retval == -1 && errno == EWOULDBLOCK) errno = ETIMEDOUT;
+ return 0; /* error. */
+ }
+ nwritten += retval;
+ }
+
+ r->io.fd.pos += len;
+ sdsclear(r->io.fd.buf);
+ return 1;
+}
+
+/* Returns 1 or 0 for success/failure. */
+static size_t rioFdRead(rio *r, void *buf, size_t len) {
+ UNUSED(r);
+ UNUSED(buf);
+ UNUSED(len);
+ return 0; /* Error, this target does not support reading. */
+}
+
+/* Returns read/write position in file. */
+static off_t rioFdTell(rio *r) {
+ return r->io.fd.pos;
+}
+
+/* Flushes any buffer to target device if applicable. Returns 1 on success
+ * and 0 on failures. */
+static int rioFdFlush(rio *r) {
+ /* Our flush is implemented by the write method, that recognizes a
+ * buffer set to NULL with a count of zero as a flush request. */
+ return rioFdWrite(r,NULL,0);
+}
+
+static const rio rioFdIO = {
+ rioFdRead,
+ rioFdWrite,
+ rioFdTell,
+ rioFdFlush,
+ NULL, /* update_checksum */
+ 0, /* current checksum */
+ 0, /* flags */
+ 0, /* bytes read or written */
+ 0, /* read/write chunk size */
+ { { NULL, 0 } } /* union for io-specific vars */
+};
+
+void rioInitWithFd(rio *r, int fd) {
+ *r = rioFdIO;
+ r->io.fd.fd = fd;
+ r->io.fd.pos = 0;
+ r->io.fd.buf = sdsempty();
+}
+
+/* release the rio stream. */
+void rioFreeFd(rio *r) {
+ sdsfree(r->io.fd.buf);
+}
+
+/* ---------------------------- Generic functions ---------------------------- */
+
+/* This function can be installed both in memory and file streams when checksum
+ * computation is needed. */
+void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len) {
+ r->cksum = crc64(r->cksum,buf,len);
+}
+
+/* Set the file-based rio object to auto-fsync every 'bytes' file written.
+ * By default this is set to zero that means no automatic file sync is
+ * performed.
+ *
+ * This feature is useful in a few contexts since when we rely on OS write
+ * buffers sometimes the OS buffers way too much, resulting in too many
+ * disk I/O concentrated in very little time. When we fsync in an explicit
+ * way instead the I/O pressure is more distributed across time. */
+void rioSetAutoSync(rio *r, off_t bytes) {
+ if(r->write != rioFileIO.write) return;
+ r->io.file.autosync = bytes;
+}
+
+/* Set the file-based rio object to reclaim cache after every auto-sync.
+ * In the Linux implementation POSIX_FADV_DONTNEED skips the dirty
+ * pages, so if auto sync is unset this option will have no effect.
+ *
+ * This feature can reduce the cache footprint backed by the file. */
+void rioSetReclaimCache(rio *r, int enabled) {
+ r->io.file.reclaim_cache = enabled;
+}
+
+/* Check the type of rio. */
+uint8_t rioCheckType(rio *r) {
+ if (r->read == rioFileRead) {
+ return RIO_TYPE_FILE;
+ } else if (r->read == rioBufferRead) {
+ return RIO_TYPE_BUFFER;
+ } else if (r->read == rioConnRead) {
+ return RIO_TYPE_CONN;
+ } else {
+ /* r->read == rioFdRead */
+ return RIO_TYPE_FD;
+ }
+}
+
+/* --------------------------- Higher level interface --------------------------
+ *
+ * The following higher level functions use lower level rio.c functions to help
+ * generating the Redis protocol for the Append Only File. */
+
+/* Write multi bulk count in the format: "*<count>\r\n". */
+size_t rioWriteBulkCount(rio *r, char prefix, long count) {
+ char cbuf[128];
+ int clen;
+
+ cbuf[0] = prefix;
+ clen = 1+ll2string(cbuf+1,sizeof(cbuf)-1,count);
+ cbuf[clen++] = '\r';
+ cbuf[clen++] = '\n';
+ if (rioWrite(r,cbuf,clen) == 0) return 0;
+ return clen;
+}
+
+/* Write binary-safe string in the format: "$<count>\r\n<payload>\r\n". */
+size_t rioWriteBulkString(rio *r, const char *buf, size_t len) {
+ size_t nwritten;
+
+ if ((nwritten = rioWriteBulkCount(r,'$',len)) == 0) return 0;
+ if (len > 0 && rioWrite(r,buf,len) == 0) return 0;
+ if (rioWrite(r,"\r\n",2) == 0) return 0;
+ return nwritten+len+2;
+}
+
+/* Write a long long value in format: "$<count>\r\n<payload>\r\n". */
+size_t rioWriteBulkLongLong(rio *r, long long l) {
+ char lbuf[32];
+ unsigned int llen;
+
+ llen = ll2string(lbuf,sizeof(lbuf),l);
+ return rioWriteBulkString(r,lbuf,llen);
+}
+
+/* Write a double value in the format: "$<count>\r\n<payload>\r\n" */
+size_t rioWriteBulkDouble(rio *r, double d) {
+ char dbuf[128];
+ unsigned int dlen;
+ dlen = fpconv_dtoa(d, dbuf);
+ dbuf[dlen] = '\0';
+ return rioWriteBulkString(r,dbuf,dlen);
+}
diff --git a/src/rio.h b/src/rio.h
new file mode 100644
index 0000000..9dd59d3
--- /dev/null
+++ b/src/rio.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef __REDIS_RIO_H
+#define __REDIS_RIO_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include "sds.h"
+#include "connection.h"
+
+#define RIO_FLAG_READ_ERROR (1<<0)
+#define RIO_FLAG_WRITE_ERROR (1<<1)
+
+#define RIO_TYPE_FILE (1<<0)
+#define RIO_TYPE_BUFFER (1<<1)
+#define RIO_TYPE_CONN (1<<2)
+#define RIO_TYPE_FD (1<<3)
+
+struct _rio {
+ /* Backend functions.
+ * Since this functions do not tolerate short writes or reads the return
+ * value is simplified to: zero on error, non zero on complete success. */
+ size_t (*read)(struct _rio *, void *buf, size_t len);
+ size_t (*write)(struct _rio *, const void *buf, size_t len);
+ off_t (*tell)(struct _rio *);
+ int (*flush)(struct _rio *);
+ /* The update_cksum method if not NULL is used to compute the checksum of
+ * all the data that was read or written so far. The method should be
+ * designed so that can be called with the current checksum, and the buf
+ * and len fields pointing to the new block of data to add to the checksum
+ * computation. */
+ void (*update_cksum)(struct _rio *, const void *buf, size_t len);
+
+ /* The current checksum and flags (see RIO_FLAG_*) */
+ uint64_t cksum, flags;
+
+ /* number of bytes read or written */
+ size_t processed_bytes;
+
+ /* maximum single read or write chunk size */
+ size_t max_processing_chunk;
+
+ /* Backend-specific vars. */
+ union {
+ /* In-memory buffer target. */
+ struct {
+ sds ptr;
+ off_t pos;
+ } buffer;
+ /* Stdio file pointer target. */
+ struct {
+ FILE *fp;
+ off_t buffered; /* Bytes written since last fsync. */
+ off_t autosync; /* fsync after 'autosync' bytes written. */
+ unsigned reclaim_cache:1; /* A flag to indicate reclaim cache after fsync */
+ } file;
+ /* Connection object (used to read from socket) */
+ struct {
+ connection *conn; /* Connection */
+ off_t pos; /* pos in buf that was returned */
+ sds buf; /* buffered data */
+ size_t read_limit; /* don't allow to buffer/read more than that */
+ size_t read_so_far; /* amount of data read from the rio (not buffered) */
+ } conn;
+ /* FD target (used to write to pipe). */
+ struct {
+ int fd; /* File descriptor. */
+ off_t pos;
+ sds buf;
+ } fd;
+ } io;
+};
+
+typedef struct _rio rio;
+
+/* The following functions are our interface with the stream. They'll call the
+ * actual implementation of read / write / tell, and will update the checksum
+ * if needed. */
+
+static inline size_t rioWrite(rio *r, const void *buf, size_t len) {
+ if (r->flags & RIO_FLAG_WRITE_ERROR) return 0;
+ while (len) {
+ size_t bytes_to_write = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
+ if (r->update_cksum) r->update_cksum(r,buf,bytes_to_write);
+ if (r->write(r,buf,bytes_to_write) == 0) {
+ r->flags |= RIO_FLAG_WRITE_ERROR;
+ return 0;
+ }
+ buf = (char*)buf + bytes_to_write;
+ len -= bytes_to_write;
+ r->processed_bytes += bytes_to_write;
+ }
+ return 1;
+}
+
+static inline size_t rioRead(rio *r, void *buf, size_t len) {
+ if (r->flags & RIO_FLAG_READ_ERROR) return 0;
+ while (len) {
+ size_t bytes_to_read = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
+ if (r->read(r,buf,bytes_to_read) == 0) {
+ r->flags |= RIO_FLAG_READ_ERROR;
+ return 0;
+ }
+ if (r->update_cksum) r->update_cksum(r,buf,bytes_to_read);
+ buf = (char*)buf + bytes_to_read;
+ len -= bytes_to_read;
+ r->processed_bytes += bytes_to_read;
+ }
+ return 1;
+}
+
+static inline off_t rioTell(rio *r) {
+ return r->tell(r);
+}
+
+static inline int rioFlush(rio *r) {
+ return r->flush(r);
+}
+
+/* This function allows to know if there was a read error in any past
+ * operation, since the rio stream was created or since the last call
+ * to rioClearError(). */
+static inline int rioGetReadError(rio *r) {
+ return (r->flags & RIO_FLAG_READ_ERROR) != 0;
+}
+
+/* Like rioGetReadError() but for write errors. */
+static inline int rioGetWriteError(rio *r) {
+ return (r->flags & RIO_FLAG_WRITE_ERROR) != 0;
+}
+
+static inline void rioClearErrors(rio *r) {
+ r->flags &= ~(RIO_FLAG_READ_ERROR|RIO_FLAG_WRITE_ERROR);
+}
+
+void rioInitWithFile(rio *r, FILE *fp);
+void rioInitWithBuffer(rio *r, sds s);
+void rioInitWithConn(rio *r, connection *conn, size_t read_limit);
+void rioInitWithFd(rio *r, int fd);
+
+void rioFreeFd(rio *r);
+void rioFreeConn(rio *r, sds* out_remainingBufferedData);
+
+size_t rioWriteBulkCount(rio *r, char prefix, long count);
+size_t rioWriteBulkString(rio *r, const char *buf, size_t len);
+size_t rioWriteBulkLongLong(rio *r, long long l);
+size_t rioWriteBulkDouble(rio *r, double d);
+
+struct redisObject;
+int rioWriteBulkObject(rio *r, struct redisObject *obj);
+
+void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len);
+void rioSetAutoSync(rio *r, off_t bytes);
+void rioSetReclaimCache(rio *r, int enabled);
+uint8_t rioCheckType(rio *r);
+#endif
diff --git a/src/script.c b/src/script.c
new file mode 100644
index 0000000..6a798a6
--- /dev/null
+++ b/src/script.c
@@ -0,0 +1,578 @@
+/*
+ * Copyright (c) 2009-2021, Redis Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "script.h"
+#include "cluster.h"
+
+scriptFlag scripts_flags_def[] = {
+ {.flag = SCRIPT_FLAG_NO_WRITES, .str = "no-writes"},
+ {.flag = SCRIPT_FLAG_ALLOW_OOM, .str = "allow-oom"},
+ {.flag = SCRIPT_FLAG_ALLOW_STALE, .str = "allow-stale"},
+ {.flag = SCRIPT_FLAG_NO_CLUSTER, .str = "no-cluster"},
+ {.flag = SCRIPT_FLAG_ALLOW_CROSS_SLOT, .str = "allow-cross-slot-keys"},
+ {.flag = 0, .str = NULL}, /* flags array end */
+};
+
+/* On script invocation, holding the current run context */
+static scriptRunCtx *curr_run_ctx = NULL;
+
+static void exitScriptTimedoutMode(scriptRunCtx *run_ctx) {
+ serverAssert(run_ctx == curr_run_ctx);
+ serverAssert(scriptIsTimedout());
+ run_ctx->flags &= ~SCRIPT_TIMEDOUT;
+ blockingOperationEnds();
+ /* if we are a replica and we have an active master, set it for continue processing */
+ if (server.masterhost && server.master) queueClientForReprocessing(server.master);
+}
+
+static void enterScriptTimedoutMode(scriptRunCtx *run_ctx) {
+ serverAssert(run_ctx == curr_run_ctx);
+ serverAssert(!scriptIsTimedout());
+ /* Mark script as timedout */
+ run_ctx->flags |= SCRIPT_TIMEDOUT;
+ blockingOperationStarts();
+}
+
+int scriptIsTimedout(void) {
+ return scriptIsRunning() && (curr_run_ctx->flags & SCRIPT_TIMEDOUT);
+}
+
+client* scriptGetClient(void) {
+ serverAssert(scriptIsRunning());
+ return curr_run_ctx->c;
+}
+
+client* scriptGetCaller(void) {
+ serverAssert(scriptIsRunning());
+ return curr_run_ctx->original_client;
+}
+
+/* interrupt function for scripts, should be call
+ * from time to time to reply some special command (like ping)
+ * and also check if the run should be terminated. */
+int scriptInterrupt(scriptRunCtx *run_ctx) {
+ if (run_ctx->flags & SCRIPT_TIMEDOUT) {
+ /* script already timedout
+ we just need to precess some events and return */
+ processEventsWhileBlocked();
+ return (run_ctx->flags & SCRIPT_KILLED) ? SCRIPT_KILL : SCRIPT_CONTINUE;
+ }
+
+ long long elapsed = elapsedMs(run_ctx->start_time);
+ if (elapsed < server.busy_reply_threshold) {
+ return SCRIPT_CONTINUE;
+ }
+
+ serverLog(LL_WARNING,
+ "Slow script detected: still in execution after %lld milliseconds. "
+ "You can try killing the script using the %s command. Script name is: %s.",
+ elapsed, (run_ctx->flags & SCRIPT_EVAL_MODE) ? "SCRIPT KILL" : "FUNCTION KILL", run_ctx->funcname);
+
+ enterScriptTimedoutMode(run_ctx);
+ /* Once the script timeouts we reenter the event loop to permit others
+ * some commands execution. For this reason
+ * we need to mask the client executing the script from the event loop.
+ * If we don't do that the client may disconnect and could no longer be
+ * here when the EVAL command will return. */
+ protectClient(run_ctx->original_client);
+
+ processEventsWhileBlocked();
+
+ return (run_ctx->flags & SCRIPT_KILLED) ? SCRIPT_KILL : SCRIPT_CONTINUE;
+}
+
+uint64_t scriptFlagsToCmdFlags(uint64_t cmd_flags, uint64_t script_flags) {
+ /* If the script declared flags, clear the ones from the command and use the ones it declared.*/
+ cmd_flags &= ~(CMD_STALE | CMD_DENYOOM | CMD_WRITE);
+
+ /* NO_WRITES implies ALLOW_OOM */
+ if (!(script_flags & (SCRIPT_FLAG_ALLOW_OOM | SCRIPT_FLAG_NO_WRITES)))
+ cmd_flags |= CMD_DENYOOM;
+ if (!(script_flags & SCRIPT_FLAG_NO_WRITES))
+ cmd_flags |= CMD_WRITE;
+ if (script_flags & SCRIPT_FLAG_ALLOW_STALE)
+ cmd_flags |= CMD_STALE;
+
+ /* In addition the MAY_REPLICATE flag is set for these commands, but
+ * if we have flags we know if it's gonna do any writes or not. */
+ cmd_flags &= ~CMD_MAY_REPLICATE;
+
+ return cmd_flags;
+}
+
+/* Prepare the given run ctx for execution */
+int scriptPrepareForRun(scriptRunCtx *run_ctx, client *engine_client, client *caller, const char *funcname, uint64_t script_flags, int ro) {
+ serverAssert(!curr_run_ctx);
+ int client_allow_oom = !!(caller->flags & CLIENT_ALLOW_OOM);
+
+ int running_stale = server.masterhost &&
+ server.repl_state != REPL_STATE_CONNECTED &&
+ server.repl_serve_stale_data == 0;
+ int obey_client = mustObeyClient(caller);
+
+ if (!(script_flags & SCRIPT_FLAG_EVAL_COMPAT_MODE)) {
+ if ((script_flags & SCRIPT_FLAG_NO_CLUSTER) && server.cluster_enabled) {
+ addReplyError(caller, "Can not run script on cluster, 'no-cluster' flag is set.");
+ return C_ERR;
+ }
+
+ if (running_stale && !(script_flags & SCRIPT_FLAG_ALLOW_STALE)) {
+ addReplyError(caller, "-MASTERDOWN Link with MASTER is down, "
+ "replica-serve-stale-data is set to 'no' "
+ "and 'allow-stale' flag is not set on the script.");
+ return C_ERR;
+ }
+
+ if (!(script_flags & SCRIPT_FLAG_NO_WRITES)) {
+ /* Script may perform writes we need to verify:
+ * 1. we are not a readonly replica
+ * 2. no disk error detected
+ * 3. command is not `fcall_ro`/`eval[sha]_ro` */
+ if (server.masterhost && server.repl_slave_ro && !obey_client) {
+ addReplyError(caller, "-READONLY Can not run script with write flag on readonly replica");
+ return C_ERR;
+ }
+
+ /* Deny writes if we're unable to persist. */
+ int deny_write_type = writeCommandsDeniedByDiskError();
+ if (deny_write_type != DISK_ERROR_TYPE_NONE && !obey_client) {
+ if (deny_write_type == DISK_ERROR_TYPE_RDB)
+ addReplyError(caller, "-MISCONF Redis is configured to save RDB snapshots, "
+ "but it's currently unable to persist to disk. "
+ "Writable scripts are blocked. Use 'no-writes' flag for read only scripts.");
+ else
+ addReplyErrorFormat(caller, "-MISCONF Redis is configured to persist data to AOF, "
+ "but it's currently unable to persist to disk. "
+ "Writable scripts are blocked. Use 'no-writes' flag for read only scripts. "
+ "AOF error: %s", strerror(server.aof_last_write_errno));
+ return C_ERR;
+ }
+
+ if (ro) {
+ addReplyError(caller, "Can not execute a script with write flag using *_ro command.");
+ return C_ERR;
+ }
+
+ /* Don't accept write commands if there are not enough good slaves and
+ * user configured the min-slaves-to-write option. */
+ if (!checkGoodReplicasStatus()) {
+ addReplyErrorObject(caller, shared.noreplicaserr);
+ return C_ERR;
+ }
+ }
+
+ /* Check OOM state. the no-writes flag imply allow-oom. we tested it
+ * after the no-write error, so no need to mention it in the error reply. */
+ if (!client_allow_oom && server.pre_command_oom_state && server.maxmemory &&
+ !(script_flags & (SCRIPT_FLAG_ALLOW_OOM|SCRIPT_FLAG_NO_WRITES)))
+ {
+ addReplyError(caller, "-OOM allow-oom flag is not set on the script, "
+ "can not run it when used memory > 'maxmemory'");
+ return C_ERR;
+ }
+
+ } else {
+ /* Special handling for backwards compatibility (no shebang eval[sha]) mode */
+ if (running_stale) {
+ addReplyErrorObject(caller, shared.masterdownerr);
+ return C_ERR;
+ }
+ }
+
+ run_ctx->c = engine_client;
+ run_ctx->original_client = caller;
+ run_ctx->funcname = funcname;
+
+ client *script_client = run_ctx->c;
+ client *curr_client = run_ctx->original_client;
+
+ /* Select the right DB in the context of the Lua client */
+ selectDb(script_client, curr_client->db->id);
+ script_client->resp = 2; /* Default is RESP2, scripts can change it. */
+
+ /* If we are in MULTI context, flag Lua client as CLIENT_MULTI. */
+ if (curr_client->flags & CLIENT_MULTI) {
+ script_client->flags |= CLIENT_MULTI;
+ }
+
+ run_ctx->start_time = getMonotonicUs();
+
+ run_ctx->flags = 0;
+ run_ctx->repl_flags = PROPAGATE_AOF | PROPAGATE_REPL;
+
+ if (ro || (!(script_flags & SCRIPT_FLAG_EVAL_COMPAT_MODE) && (script_flags & SCRIPT_FLAG_NO_WRITES))) {
+ /* On fcall_ro or on functions that do not have the 'write'
+ * flag, we will not allow write commands. */
+ run_ctx->flags |= SCRIPT_READ_ONLY;
+ }
+ if (client_allow_oom || (!(script_flags & SCRIPT_FLAG_EVAL_COMPAT_MODE) && (script_flags & SCRIPT_FLAG_ALLOW_OOM))) {
+ /* Note: we don't need to test the no-writes flag here and set this run_ctx flag,
+ * since only write commands can are deny-oom. */
+ run_ctx->flags |= SCRIPT_ALLOW_OOM;
+ }
+
+ if ((script_flags & SCRIPT_FLAG_EVAL_COMPAT_MODE) || (script_flags & SCRIPT_FLAG_ALLOW_CROSS_SLOT)) {
+ run_ctx->flags |= SCRIPT_ALLOW_CROSS_SLOT;
+ }
+
+ /* set the curr_run_ctx so we can use it to kill the script if needed */
+ curr_run_ctx = run_ctx;
+
+ return C_OK;
+}
+
+/* Reset the given run ctx after execution */
+void scriptResetRun(scriptRunCtx *run_ctx) {
+ serverAssert(curr_run_ctx);
+
+ /* After the script done, remove the MULTI state. */
+ run_ctx->c->flags &= ~CLIENT_MULTI;
+
+ if (scriptIsTimedout()) {
+ exitScriptTimedoutMode(run_ctx);
+ /* Restore the client that was protected when the script timeout
+ * was detected. */
+ unprotectClient(run_ctx->original_client);
+ }
+
+ preventCommandPropagation(run_ctx->original_client);
+
+ /* unset curr_run_ctx so we will know there is no running script */
+ curr_run_ctx = NULL;
+}
+
+/* return true if a script is currently running */
+int scriptIsRunning(void) {
+ return curr_run_ctx != NULL;
+}
+
+const char* scriptCurrFunction(void) {
+ serverAssert(scriptIsRunning());
+ return curr_run_ctx->funcname;
+}
+
+int scriptIsEval(void) {
+ serverAssert(scriptIsRunning());
+ return curr_run_ctx->flags & SCRIPT_EVAL_MODE;
+}
+
+/* Kill the current running script */
+void scriptKill(client *c, int is_eval) {
+ if (!curr_run_ctx) {
+ addReplyError(c, "-NOTBUSY No scripts in execution right now.");
+ return;
+ }
+ if (mustObeyClient(curr_run_ctx->original_client)) {
+ addReplyError(c,
+ "-UNKILLABLE The busy script was sent by a master instance in the context of replication and cannot be killed.");
+ return;
+ }
+ if (curr_run_ctx->flags & SCRIPT_WRITE_DIRTY) {
+ addReplyError(c,
+ "-UNKILLABLE Sorry the script already executed write "
+ "commands against the dataset. You can either wait the "
+ "script termination or kill the server in a hard way "
+ "using the SHUTDOWN NOSAVE command.");
+ return;
+ }
+ if (is_eval && !(curr_run_ctx->flags & SCRIPT_EVAL_MODE)) {
+ /* Kill a function with 'SCRIPT KILL' is not allow */
+ addReplyErrorObject(c, shared.slowscripterr);
+ return;
+ }
+ if (!is_eval && (curr_run_ctx->flags & SCRIPT_EVAL_MODE)) {
+ /* Kill an eval with 'FUNCTION KILL' is not allow */
+ addReplyErrorObject(c, shared.slowevalerr);
+ return;
+ }
+ curr_run_ctx->flags |= SCRIPT_KILLED;
+ addReply(c, shared.ok);
+}
+
+static int scriptVerifyCommandArity(struct redisCommand *cmd, int argc, sds *err) {
+ if (!cmd || ((cmd->arity > 0 && cmd->arity != argc) || (argc < -cmd->arity))) {
+ if (cmd)
+ *err = sdsnew("Wrong number of args calling Redis command from script");
+ else
+ *err = sdsnew("Unknown Redis command called from script");
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+static int scriptVerifyACL(client *c, sds *err) {
+ /* Check the ACLs. */
+ int acl_errpos;
+ int acl_retval = ACLCheckAllPerm(c, &acl_errpos);
+ if (acl_retval != ACL_OK) {
+ addACLLogEntry(c,acl_retval,ACL_LOG_CTX_LUA,acl_errpos,NULL,NULL);
+ sds msg = getAclErrorMessage(acl_retval, c->user, c->cmd, c->argv[acl_errpos]->ptr, 0);
+ *err = sdscatsds(sdsnew("ACL failure in script: "), msg);
+ sdsfree(msg);
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+static int scriptVerifyWriteCommandAllow(scriptRunCtx *run_ctx, char **err) {
+
+ /* A write command, on an RO command or an RO script is rejected ASAP.
+ * Note: For scripts, we consider may-replicate commands as write commands.
+ * This also makes it possible to allow read-only scripts to be run during
+ * CLIENT PAUSE WRITE. */
+ if (run_ctx->flags & SCRIPT_READ_ONLY &&
+ (run_ctx->c->cmd->flags & (CMD_WRITE|CMD_MAY_REPLICATE)))
+ {
+ *err = sdsnew("Write commands are not allowed from read-only scripts.");
+ return C_ERR;
+ }
+
+ /* The other checks below are on the server state and are only relevant for
+ * write commands, return if this is not a write command. */
+ if (!(run_ctx->c->cmd->flags & CMD_WRITE))
+ return C_OK;
+
+ /* If the script already made a modification to the dataset, we can't
+ * fail it on unpredictable error state. */
+ if ((run_ctx->flags & SCRIPT_WRITE_DIRTY))
+ return C_OK;
+
+ /* Write commands are forbidden against read-only slaves, or if a
+ * command marked as non-deterministic was already called in the context
+ * of this script. */
+ int deny_write_type = writeCommandsDeniedByDiskError();
+
+ if (server.masterhost && server.repl_slave_ro &&
+ !mustObeyClient(run_ctx->original_client))
+ {
+ *err = sdsdup(shared.roslaveerr->ptr);
+ return C_ERR;
+ }
+
+ if (deny_write_type != DISK_ERROR_TYPE_NONE) {
+ *err = writeCommandsGetDiskErrorMessage(deny_write_type);
+ return C_ERR;
+ }
+
+ /* Don't accept write commands if there are not enough good slaves and
+ * user configured the min-slaves-to-write option. Note this only reachable
+ * for Eval scripts that didn't declare flags, see the other check in
+ * scriptPrepareForRun */
+ if (!checkGoodReplicasStatus()) {
+ *err = sdsdup(shared.noreplicaserr->ptr);
+ return C_ERR;
+ }
+
+ return C_OK;
+}
+
+static int scriptVerifyOOM(scriptRunCtx *run_ctx, char **err) {
+ if (run_ctx->flags & SCRIPT_ALLOW_OOM) {
+ /* Allow running any command even if OOM reached */
+ return C_OK;
+ }
+
+ /* If we reached the memory limit configured via maxmemory, commands that
+ * could enlarge the memory usage are not allowed, but only if this is the
+ * first write in the context of this script, otherwise we can't stop
+ * in the middle. */
+
+ if (server.maxmemory && /* Maxmemory is actually enabled. */
+ !mustObeyClient(run_ctx->original_client) && /* Don't care about mem for replicas or AOF. */
+ !(run_ctx->flags & SCRIPT_WRITE_DIRTY) && /* Script had no side effects so far. */
+ server.pre_command_oom_state && /* Detected OOM when script start. */
+ (run_ctx->c->cmd->flags & CMD_DENYOOM))
+ {
+ *err = sdsdup(shared.oomerr->ptr);
+ return C_ERR;
+ }
+
+ return C_OK;
+}
+
+static int scriptVerifyClusterState(scriptRunCtx *run_ctx, client *c, client *original_c, sds *err) {
+ if (!server.cluster_enabled || mustObeyClient(original_c)) {
+ return C_OK;
+ }
+ /* If this is a Redis Cluster node, we need to make sure the script is not
+ * trying to access non-local keys, with the exception of commands
+ * received from our master or when loading the AOF back in memory. */
+ int error_code;
+ /* Duplicate relevant flags in the script client. */
+ c->flags &= ~(CLIENT_READONLY | CLIENT_ASKING);
+ c->flags |= original_c->flags & (CLIENT_READONLY | CLIENT_ASKING);
+ int hashslot = -1;
+ if (getNodeByQuery(c, c->cmd, c->argv, c->argc, &hashslot, &error_code) != server.cluster->myself) {
+ if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
+ *err = sdsnew(
+ "Script attempted to execute a write command while the "
+ "cluster is down and readonly");
+ } else if (error_code == CLUSTER_REDIR_DOWN_STATE) {
+ *err = sdsnew("Script attempted to execute a command while the "
+ "cluster is down");
+ } else {
+ *err = sdsnew("Script attempted to access a non local key in a "
+ "cluster node");
+ }
+ return C_ERR;
+ }
+
+ /* If the script declared keys in advanced, the cross slot error would have
+ * already been thrown. This is only checking for cross slot keys being accessed
+ * that weren't pre-declared. */
+ if (hashslot != -1 && !(run_ctx->flags & SCRIPT_ALLOW_CROSS_SLOT)) {
+ if (original_c->slot == -1) {
+ original_c->slot = hashslot;
+ } else if (original_c->slot != hashslot) {
+ *err = sdsnew("Script attempted to access keys that do not hash to "
+ "the same slot");
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
+/* set RESP for a given run_ctx */
+int scriptSetResp(scriptRunCtx *run_ctx, int resp) {
+ if (resp != 2 && resp != 3) {
+ return C_ERR;
+ }
+
+ run_ctx->c->resp = resp;
+ return C_OK;
+}
+
+/* set Repl for a given run_ctx
+ * either: PROPAGATE_AOF | PROPAGATE_REPL*/
+int scriptSetRepl(scriptRunCtx *run_ctx, int repl) {
+ if ((repl & ~(PROPAGATE_AOF | PROPAGATE_REPL)) != 0) {
+ return C_ERR;
+ }
+ run_ctx->repl_flags = repl;
+ return C_OK;
+}
+
+static int scriptVerifyAllowStale(client *c, sds *err) {
+ if (!server.masterhost) {
+ /* Not a replica, stale is irrelevant */
+ return C_OK;
+ }
+
+ if (server.repl_state == REPL_STATE_CONNECTED) {
+ /* Connected to replica, stale is irrelevant */
+ return C_OK;
+ }
+
+ if (server.repl_serve_stale_data == 1) {
+ /* Disconnected from replica but allow to serve data */
+ return C_OK;
+ }
+
+ if (c->cmd->flags & CMD_STALE) {
+ /* Command is allow while stale */
+ return C_OK;
+ }
+
+ /* On stale replica, can not run the command */
+ *err = sdsnew("Can not execute the command on a stale replica");
+ return C_ERR;
+}
+
+/* Call a Redis command.
+ * The reply is written to the run_ctx client and it is
+ * up to the engine to take and parse.
+ * The err out variable is set only if error occurs and describe the error.
+ * If err is set on reply is written to the run_ctx client. */
+void scriptCall(scriptRunCtx *run_ctx, sds *err) {
+ client *c = run_ctx->c;
+
+ /* Setup our fake client for command execution */
+ c->user = run_ctx->original_client->user;
+
+ /* Process module hooks */
+ moduleCallCommandFilters(c);
+
+ struct redisCommand *cmd = lookupCommand(c->argv, c->argc);
+ c->cmd = c->lastcmd = c->realcmd = cmd;
+ if (scriptVerifyCommandArity(cmd, c->argc, err) != C_OK) {
+ goto error;
+ }
+
+ /* There are commands that are not allowed inside scripts. */
+ if (!server.script_disable_deny_script && (cmd->flags & CMD_NOSCRIPT)) {
+ *err = sdsnew("This Redis command is not allowed from script");
+ goto error;
+ }
+
+ if (scriptVerifyAllowStale(c, err) != C_OK) {
+ goto error;
+ }
+
+ if (scriptVerifyACL(c, err) != C_OK) {
+ goto error;
+ }
+
+ if (scriptVerifyWriteCommandAllow(run_ctx, err) != C_OK) {
+ goto error;
+ }
+
+ if (scriptVerifyOOM(run_ctx, err) != C_OK) {
+ goto error;
+ }
+
+ if (cmd->flags & CMD_WRITE) {
+ /* signify that we already change the data in this execution */
+ run_ctx->flags |= SCRIPT_WRITE_DIRTY;
+ }
+
+ if (scriptVerifyClusterState(run_ctx, c, run_ctx->original_client, err) != C_OK) {
+ goto error;
+ }
+
+ int call_flags = CMD_CALL_NONE;
+ if (run_ctx->repl_flags & PROPAGATE_AOF) {
+ call_flags |= CMD_CALL_PROPAGATE_AOF;
+ }
+ if (run_ctx->repl_flags & PROPAGATE_REPL) {
+ call_flags |= CMD_CALL_PROPAGATE_REPL;
+ }
+ call(c, call_flags);
+ serverAssert((c->flags & CLIENT_BLOCKED) == 0);
+ return;
+
+error:
+ afterErrorReply(c, *err, sdslen(*err), 0);
+ incrCommandStatsOnError(cmd, ERROR_COMMAND_REJECTED);
+}
+
+long long scriptRunDuration(void) {
+ serverAssert(scriptIsRunning());
+ return elapsedMs(curr_run_ctx->start_time);
+}
diff --git a/src/script.h b/src/script.h
new file mode 100644
index 0000000..c487165
--- /dev/null
+++ b/src/script.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2009-2021, Redis Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SCRIPT_H_
+#define __SCRIPT_H_
+
+/*
+ * Script.c unit provides an API for functions and eval
+ * to interact with Redis. Interaction includes mostly
+ * executing commands, but also functionalities like calling
+ * Redis back on long scripts or check if the script was killed.
+ *
+ * The interaction is done using a scriptRunCtx object that
+ * need to be created by the user and initialized using scriptPrepareForRun.
+ *
+ * Detailed list of functionalities expose by the unit:
+ * 1. Calling commands (including all the validation checks such as
+ * acl, cluster, read only run, ...)
+ * 2. Set Resp
+ * 3. Set Replication method (AOF/REPLICATION/NONE)
+ * 4. Call Redis back to on long running scripts to allow Redis reply
+ * to clients and perform script kill
+ */
+
+/*
+ * scriptInterrupt function will return one of those value,
+ *
+ * - SCRIPT_KILL - kill the current running script.
+ * - SCRIPT_CONTINUE - keep running the current script.
+ */
+#define SCRIPT_KILL 1
+#define SCRIPT_CONTINUE 2
+
+/* runCtx flags */
+#define SCRIPT_WRITE_DIRTY (1ULL<<0) /* indicate that the current script already performed a write command */
+#define SCRIPT_TIMEDOUT (1ULL<<3) /* indicate that the current script timedout */
+#define SCRIPT_KILLED (1ULL<<4) /* indicate that the current script was marked to be killed */
+#define SCRIPT_READ_ONLY (1ULL<<5) /* indicate that the current script should only perform read commands */
+#define SCRIPT_ALLOW_OOM (1ULL<<6) /* indicate to allow any command even if OOM reached */
+#define SCRIPT_EVAL_MODE (1ULL<<7) /* Indicate that the current script called from legacy Lua */
+#define SCRIPT_ALLOW_CROSS_SLOT (1ULL<<8) /* Indicate that the current script may access keys from multiple slots */
+typedef struct scriptRunCtx scriptRunCtx;
+
+struct scriptRunCtx {
+ const char *funcname;
+ client *c;
+ client *original_client;
+ int flags;
+ int repl_flags;
+ monotime start_time;
+};
+
+/* Scripts flags */
+#define SCRIPT_FLAG_NO_WRITES (1ULL<<0)
+#define SCRIPT_FLAG_ALLOW_OOM (1ULL<<1)
+#define SCRIPT_FLAG_ALLOW_STALE (1ULL<<2)
+#define SCRIPT_FLAG_NO_CLUSTER (1ULL<<3)
+#define SCRIPT_FLAG_EVAL_COMPAT_MODE (1ULL<<4) /* EVAL Script backwards compatible behavior, no shebang provided */
+#define SCRIPT_FLAG_ALLOW_CROSS_SLOT (1ULL<<5)
+
+/* Defines a script flags */
+typedef struct scriptFlag {
+ uint64_t flag;
+ const char *str;
+} scriptFlag;
+
+extern scriptFlag scripts_flags_def[];
+
+uint64_t scriptFlagsToCmdFlags(uint64_t cmd_flags, uint64_t script_flags);
+int scriptPrepareForRun(scriptRunCtx *r_ctx, client *engine_client, client *caller, const char *funcname, uint64_t script_flags, int ro);
+void scriptResetRun(scriptRunCtx *r_ctx);
+int scriptSetResp(scriptRunCtx *r_ctx, int resp);
+int scriptSetRepl(scriptRunCtx *r_ctx, int repl);
+void scriptCall(scriptRunCtx *r_ctx, sds *err);
+int scriptInterrupt(scriptRunCtx *r_ctx);
+void scriptKill(client *c, int is_eval);
+int scriptIsRunning(void);
+const char* scriptCurrFunction(void);
+int scriptIsEval(void);
+int scriptIsTimedout(void);
+client* scriptGetClient(void);
+client* scriptGetCaller(void);
+long long scriptRunDuration(void);
+
+#endif /* __SCRIPT_H_ */
diff --git a/src/script_lua.c b/src/script_lua.c
new file mode 100644
index 0000000..8cdd805
--- /dev/null
+++ b/src/script_lua.c
@@ -0,0 +1,1722 @@
+/*
+ * Copyright (c) 2009-2021, Redis Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "script_lua.h"
+#include "fpconv_dtoa.h"
+
+#include "server.h"
+#include "sha1.h"
+#include "rand.h"
+#include "cluster.h"
+#include "monotonic.h"
+#include "resp_parser.h"
+#include "version.h"
+#include <lauxlib.h>
+#include <lualib.h>
+#include <ctype.h>
+#include <math.h>
+
+/* Globals that are added by the Lua libraries */
+static char *libraries_allow_list[] = {
+ "string",
+ "cjson",
+ "bit",
+ "cmsgpack",
+ "math",
+ "table",
+ "struct",
+ NULL,
+};
+
+/* Redis Lua API globals */
+static char *redis_api_allow_list[] = {
+ "redis",
+ "__redis__err__handler", /* error handler for eval, currently located on globals.
+ Should move to registry. */
+ NULL,
+};
+
+/* Lua builtins */
+static char *lua_builtins_allow_list[] = {
+ "xpcall",
+ "tostring",
+ "getfenv",
+ "setmetatable",
+ "next",
+ "assert",
+ "tonumber",
+ "rawequal",
+ "collectgarbage",
+ "getmetatable",
+ "rawset",
+ "pcall",
+ "coroutine",
+ "type",
+ "_G",
+ "select",
+ "unpack",
+ "gcinfo",
+ "pairs",
+ "rawget",
+ "loadstring",
+ "ipairs",
+ "_VERSION",
+ "setfenv",
+ "load",
+ "error",
+ NULL,
+};
+
+/* Lua builtins which are not documented on the Lua documentation */
+static char *lua_builtins_not_documented_allow_list[] = {
+ "newproxy",
+ NULL,
+};
+
+/* Lua builtins which are allowed on initialization but will be removed right after */
+static char *lua_builtins_removed_after_initialization_allow_list[] = {
+ "debug", /* debug will be set to nil after the error handler will be created */
+ NULL,
+};
+
+/* Those allow lists was created from the globals that was
+ * available to the user when the allow lists was first introduce.
+ * Because we do not want to break backward compatibility we keep
+ * all the globals. The allow lists will prevent us from accidentally
+ * creating unwanted globals in the future.
+ *
+ * Also notice that the allow list is only checked on start time,
+ * after that the global table is locked so not need to check anything.*/
+static char **allow_lists[] = {
+ libraries_allow_list,
+ redis_api_allow_list,
+ lua_builtins_allow_list,
+ lua_builtins_not_documented_allow_list,
+ lua_builtins_removed_after_initialization_allow_list,
+ NULL,
+};
+
+/* Deny list contains elements which we know we do not want to add to globals
+ * and there is no need to print a warning message form them. We will print a
+ * log message only if an element was added to the globals and the element is
+ * not on the allow list nor on the back list. */
+static char *deny_list[] = {
+ "dofile",
+ "loadfile",
+ "print",
+ NULL,
+};
+
+static int redis_math_random (lua_State *L);
+static int redis_math_randomseed (lua_State *L);
+static void redisProtocolToLuaType_Int(void *ctx, long long val, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_BulkString(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_NullBulkString(void *ctx, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_NullArray(void *ctx, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_Status(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_Error(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_Array(struct ReplyParser *parser, void *ctx, size_t len, const char *proto);
+static void redisProtocolToLuaType_Map(struct ReplyParser *parser, void *ctx, size_t len, const char *proto);
+static void redisProtocolToLuaType_Set(struct ReplyParser *parser, void *ctx, size_t len, const char *proto);
+static void redisProtocolToLuaType_Null(void *ctx, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_Bool(void *ctx, int val, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_Double(void *ctx, double d, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_BigNumber(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_VerbatimString(void *ctx, const char *format, const char *str, size_t len, const char *proto, size_t proto_len);
+static void redisProtocolToLuaType_Attribute(struct ReplyParser *parser, void *ctx, size_t len, const char *proto);
+static void luaReplyToRedisReply(client *c, client* script_client, lua_State *lua);
+
+/*
+ * Save the give pointer on Lua registry, used to save the Lua context and
+ * function context so we can retrieve them from lua_State.
+ */
+void luaSaveOnRegistry(lua_State* lua, const char* name, void* ptr) {
+ lua_pushstring(lua, name);
+ if (ptr) {
+ lua_pushlightuserdata(lua, ptr);
+ } else {
+ lua_pushnil(lua);
+ }
+ lua_settable(lua, LUA_REGISTRYINDEX);
+}
+
+/*
+ * Get a saved pointer from registry
+ */
+void* luaGetFromRegistry(lua_State* lua, const char* name) {
+ lua_pushstring(lua, name);
+ lua_gettable(lua, LUA_REGISTRYINDEX);
+
+ if (lua_isnil(lua, -1)) {
+ lua_pop(lua, 1); /* pops the value */
+ return NULL;
+ }
+ /* must be light user data */
+ serverAssert(lua_islightuserdata(lua, -1));
+
+ void* ptr = (void*) lua_topointer(lua, -1);
+ serverAssert(ptr);
+
+ /* pops the value */
+ lua_pop(lua, 1);
+
+ return ptr;
+}
+
+/* ---------------------------------------------------------------------------
+ * Redis reply to Lua type conversion functions.
+ * ------------------------------------------------------------------------- */
+
+/* Take a Redis reply in the Redis protocol format and convert it into a
+ * Lua type. Thanks to this function, and the introduction of not connected
+ * clients, it is trivial to implement the redis() lua function.
+ *
+ * Basically we take the arguments, execute the Redis command in the context
+ * of a non connected client, then take the generated reply and convert it
+ * into a suitable Lua type. With this trick the scripting feature does not
+ * need the introduction of a full Redis internals API. The script
+ * is like a normal client that bypasses all the slow I/O paths.
+ *
+ * Note: in this function we do not do any sanity check as the reply is
+ * generated by Redis directly. This allows us to go faster.
+ *
+ * Errors are returned as a table with a single 'err' field set to the
+ * error string.
+ */
+
+static const ReplyParserCallbacks DefaultLuaTypeParserCallbacks = {
+ .null_array_callback = redisProtocolToLuaType_NullArray,
+ .bulk_string_callback = redisProtocolToLuaType_BulkString,
+ .null_bulk_string_callback = redisProtocolToLuaType_NullBulkString,
+ .error_callback = redisProtocolToLuaType_Error,
+ .simple_str_callback = redisProtocolToLuaType_Status,
+ .long_callback = redisProtocolToLuaType_Int,
+ .array_callback = redisProtocolToLuaType_Array,
+ .set_callback = redisProtocolToLuaType_Set,
+ .map_callback = redisProtocolToLuaType_Map,
+ .bool_callback = redisProtocolToLuaType_Bool,
+ .double_callback = redisProtocolToLuaType_Double,
+ .null_callback = redisProtocolToLuaType_Null,
+ .big_number_callback = redisProtocolToLuaType_BigNumber,
+ .verbatim_string_callback = redisProtocolToLuaType_VerbatimString,
+ .attribute_callback = redisProtocolToLuaType_Attribute,
+ .error = NULL,
+};
+
+static void redisProtocolToLuaType(lua_State *lua, char* reply) {
+ ReplyParser parser = {.curr_location = reply, .callbacks = DefaultLuaTypeParserCallbacks};
+
+ parseReply(&parser, lua);
+}
+
+static void redisProtocolToLuaType_Int(void *ctx, long long val, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 1)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_pushnumber(lua,(lua_Number)val);
+}
+
+static void redisProtocolToLuaType_NullBulkString(void *ctx, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 1)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_pushboolean(lua,0);
+}
+
+static void redisProtocolToLuaType_NullArray(void *ctx, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 1)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_pushboolean(lua,0);
+}
+
+
+static void redisProtocolToLuaType_BulkString(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 1)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_pushlstring(lua,str,len);
+}
+
+static void redisProtocolToLuaType_Status(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 3)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_newtable(lua);
+ lua_pushstring(lua,"ok");
+ lua_pushlstring(lua,str,len);
+ lua_settable(lua,-3);
+}
+
+static void redisProtocolToLuaType_Error(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 3)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ sds err_msg = sdscatlen(sdsnew("-"), str, len);
+ luaPushErrorBuff(lua,err_msg);
+ /* push a field indicate to ignore updating the stats on this error
+ * because it was already updated when executing the command. */
+ lua_pushstring(lua,"ignore_error_stats_update");
+ lua_pushboolean(lua, 1);
+ lua_settable(lua,-3);
+}
+
+static void redisProtocolToLuaType_Map(struct ReplyParser *parser, void *ctx, size_t len, const char *proto) {
+ UNUSED(proto);
+ lua_State *lua = ctx;
+ if (lua) {
+ if (!lua_checkstack(lua, 3)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_newtable(lua);
+ lua_pushstring(lua, "map");
+ lua_newtable(lua);
+ }
+ for (size_t j = 0; j < len; j++) {
+ parseReply(parser,lua);
+ parseReply(parser,lua);
+ if (lua) lua_settable(lua,-3);
+ }
+ if (lua) lua_settable(lua,-3);
+}
+
+static void redisProtocolToLuaType_Set(struct ReplyParser *parser, void *ctx, size_t len, const char *proto) {
+ UNUSED(proto);
+
+ lua_State *lua = ctx;
+ if (lua) {
+ if (!lua_checkstack(lua, 3)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_newtable(lua);
+ lua_pushstring(lua, "set");
+ lua_newtable(lua);
+ }
+ for (size_t j = 0; j < len; j++) {
+ parseReply(parser,lua);
+ if (lua) {
+ if (!lua_checkstack(lua, 1)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic.
+ * Notice that here we need to check the stack again because the recursive
+ * call to redisProtocolToLuaType might have use the room allocated in the stack*/
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_pushboolean(lua,1);
+ lua_settable(lua,-3);
+ }
+ }
+ if (lua) lua_settable(lua,-3);
+}
+
+static void redisProtocolToLuaType_Array(struct ReplyParser *parser, void *ctx, size_t len, const char *proto) {
+ UNUSED(proto);
+
+ lua_State *lua = ctx;
+ if (lua){
+ if (!lua_checkstack(lua, 2)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_newtable(lua);
+ }
+ for (size_t j = 0; j < len; j++) {
+ if (lua) lua_pushnumber(lua,j+1);
+ parseReply(parser,lua);
+ if (lua) lua_settable(lua,-3);
+ }
+}
+
+static void redisProtocolToLuaType_Attribute(struct ReplyParser *parser, void *ctx, size_t len, const char *proto) {
+ UNUSED(proto);
+
+ /* Parse the attribute reply.
+ * Currently, we do not expose the attribute to the Lua script so
+ * we just need to continue parsing and ignore it (the NULL ensures that the
+ * reply will be ignored). */
+ for (size_t j = 0; j < len; j++) {
+ parseReply(parser,NULL);
+ parseReply(parser,NULL);
+ }
+
+ /* Parse the reply itself. */
+ parseReply(parser,ctx);
+}
+
+static void redisProtocolToLuaType_VerbatimString(void *ctx, const char *format, const char *str, size_t len, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 5)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_newtable(lua);
+ lua_pushstring(lua,"verbatim_string");
+ lua_newtable(lua);
+ lua_pushstring(lua,"string");
+ lua_pushlstring(lua,str,len);
+ lua_settable(lua,-3);
+ lua_pushstring(lua,"format");
+ lua_pushlstring(lua,format,3);
+ lua_settable(lua,-3);
+ lua_settable(lua,-3);
+}
+
+static void redisProtocolToLuaType_BigNumber(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 3)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_newtable(lua);
+ lua_pushstring(lua,"big_number");
+ lua_pushlstring(lua,str,len);
+ lua_settable(lua,-3);
+}
+
+static void redisProtocolToLuaType_Null(void *ctx, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 1)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_pushnil(lua);
+}
+
+static void redisProtocolToLuaType_Bool(void *ctx, int val, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 1)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_pushboolean(lua,val);
+}
+
+static void redisProtocolToLuaType_Double(void *ctx, double d, const char *proto, size_t proto_len) {
+ UNUSED(proto);
+ UNUSED(proto_len);
+ if (!ctx) {
+ return;
+ }
+
+ lua_State *lua = ctx;
+ if (!lua_checkstack(lua, 3)) {
+ /* Increase the Lua stack if needed, to make sure there is enough room
+ * to push elements to the stack. On failure, exit with panic. */
+ serverPanic("lua stack limit reach when parsing redis.call reply");
+ }
+ lua_newtable(lua);
+ lua_pushstring(lua,"double");
+ lua_pushnumber(lua,d);
+ lua_settable(lua,-3);
+}
+
+/* This function is used in order to push an error on the Lua stack in the
+ * format used by redis.pcall to return errors, which is a lua table
+ * with an "err" field set to the error string including the error code.
+ * Note that this table is never a valid reply by proper commands,
+ * since the returned tables are otherwise always indexed by integers, never by strings.
+ *
+ * The function takes ownership on the given err_buffer. */
+void luaPushErrorBuff(lua_State *lua, sds err_buffer) {
+ sds msg;
+ sds error_code;
+
+ /* If debugging is active and in step mode, log errors resulting from
+ * Redis commands. */
+ if (ldbIsEnabled()) {
+ ldbLog(sdscatprintf(sdsempty(),"<error> %s",err_buffer));
+ }
+
+ /* There are two possible formats for the received `error` string:
+ * 1) "-CODE msg": in this case we remove the leading '-' since we don't store it as part of the lua error format.
+ * 2) "msg": in this case we prepend a generic 'ERR' code since all error statuses need some error code.
+ * We support format (1) so this function can reuse the error messages used in other places in redis.
+ * We support format (2) so it'll be easy to pass descriptive errors to this function without worrying about format.
+ */
+ if (err_buffer[0] == '-') {
+ /* derive error code from the message */
+ char *err_msg = strstr(err_buffer, " ");
+ if (!err_msg) {
+ msg = sdsnew(err_buffer+1);
+ error_code = sdsnew("ERR");
+ } else {
+ *err_msg = '\0';
+ msg = sdsnew(err_msg+1);
+ error_code = sdsnew(err_buffer + 1);
+ }
+ sdsfree(err_buffer);
+ } else {
+ msg = err_buffer;
+ error_code = sdsnew("ERR");
+ }
+ /* Trim newline at end of string. If we reuse the ready-made Redis error objects (case 1 above) then we might
+ * have a newline that needs to be trimmed. In any case the lua Redis error table shouldn't end with a newline. */
+ msg = sdstrim(msg, "\r\n");
+ sds final_msg = sdscatfmt(error_code, " %s", msg);
+
+ lua_newtable(lua);
+ lua_pushstring(lua,"err");
+ lua_pushstring(lua, final_msg);
+ lua_settable(lua,-3);
+
+ sdsfree(msg);
+ sdsfree(final_msg);
+}
+
+void luaPushError(lua_State *lua, const char *error) {
+ luaPushErrorBuff(lua, sdsnew(error));
+}
+
+/* In case the error set into the Lua stack by luaPushError() was generated
+ * by the non-error-trapping version of redis.pcall(), which is redis.call(),
+ * this function will raise the Lua error so that the execution of the
+ * script will be halted. */
+int luaError(lua_State *lua) {
+ return lua_error(lua);
+}
+
+
+/* ---------------------------------------------------------------------------
+ * Lua reply to Redis reply conversion functions.
+ * ------------------------------------------------------------------------- */
+
+/* Reply to client 'c' converting the top element in the Lua stack to a
+ * Redis reply. As a side effect the element is consumed from the stack. */
+static void luaReplyToRedisReply(client *c, client* script_client, lua_State *lua) {
+ int t = lua_type(lua,-1);
+
+ if (!lua_checkstack(lua, 4)) {
+ /* Increase the Lua stack if needed to make sure there is enough room
+ * to push 4 elements to the stack. On failure, return error.
+ * Notice that we need, in the worst case, 4 elements because returning a map might
+ * require push 4 elements to the Lua stack.*/
+ addReplyErrorFormat(c, "reached lua stack limit");
+ lua_pop(lua,1); /* pop the element from the stack */
+ return;
+ }
+
+ switch(t) {
+ case LUA_TSTRING:
+ addReplyBulkCBuffer(c,(char*)lua_tostring(lua,-1),lua_strlen(lua,-1));
+ break;
+ case LUA_TBOOLEAN:
+ if (script_client->resp == 2)
+ addReply(c,lua_toboolean(lua,-1) ? shared.cone :
+ shared.null[c->resp]);
+ else
+ addReplyBool(c,lua_toboolean(lua,-1));
+ break;
+ case LUA_TNUMBER:
+ addReplyLongLong(c,(long long)lua_tonumber(lua,-1));
+ break;
+ case LUA_TTABLE:
+ /* We need to check if it is an array, an error, or a status reply.
+ * Error are returned as a single element table with 'err' field.
+ * Status replies are returned as single element table with 'ok'
+ * field. */
+
+ /* Handle error reply. */
+ /* we took care of the stack size on function start */
+ lua_pushstring(lua,"err");
+ lua_rawget(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TSTRING) {
+ lua_pop(lua, 1); /* pop the error message, we will use luaExtractErrorInformation to get error information */
+ errorInfo err_info = {0};
+ luaExtractErrorInformation(lua, &err_info);
+ addReplyErrorFormatEx(c,
+ err_info.ignore_err_stats_update? ERR_REPLY_FLAG_NO_STATS_UPDATE: 0,
+ "-%s",
+ err_info.msg);
+ luaErrorInformationDiscard(&err_info);
+ lua_pop(lua,1); /* pop the result table */
+ return;
+ }
+ lua_pop(lua,1); /* Discard field name pushed before. */
+
+ /* Handle status reply. */
+ lua_pushstring(lua,"ok");
+ lua_rawget(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TSTRING) {
+ sds ok = sdsnew(lua_tostring(lua,-1));
+ sdsmapchars(ok,"\r\n"," ",2);
+ addReplyStatusLength(c, ok, sdslen(ok));
+ sdsfree(ok);
+ lua_pop(lua,2);
+ return;
+ }
+ lua_pop(lua,1); /* Discard field name pushed before. */
+
+ /* Handle double reply. */
+ lua_pushstring(lua,"double");
+ lua_rawget(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TNUMBER) {
+ addReplyDouble(c,lua_tonumber(lua,-1));
+ lua_pop(lua,2);
+ return;
+ }
+ lua_pop(lua,1); /* Discard field name pushed before. */
+
+ /* Handle big number reply. */
+ lua_pushstring(lua,"big_number");
+ lua_rawget(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TSTRING) {
+ sds big_num = sdsnewlen(lua_tostring(lua,-1), lua_strlen(lua,-1));
+ sdsmapchars(big_num,"\r\n"," ",2);
+ addReplyBigNum(c,big_num,sdslen(big_num));
+ sdsfree(big_num);
+ lua_pop(lua,2);
+ return;
+ }
+ lua_pop(lua,1); /* Discard field name pushed before. */
+
+ /* Handle verbatim reply. */
+ lua_pushstring(lua,"verbatim_string");
+ lua_rawget(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TTABLE) {
+ lua_pushstring(lua,"format");
+ lua_rawget(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TSTRING){
+ char* format = (char*)lua_tostring(lua,-1);
+ lua_pushstring(lua,"string");
+ lua_rawget(lua,-3);
+ t = lua_type(lua,-1);
+ if (t == LUA_TSTRING){
+ size_t len;
+ char* str = (char*)lua_tolstring(lua,-1,&len);
+ addReplyVerbatim(c, str, len, format);
+ lua_pop(lua,4);
+ return;
+ }
+ lua_pop(lua,1);
+ }
+ lua_pop(lua,1);
+ }
+ lua_pop(lua,1); /* Discard field name pushed before. */
+
+ /* Handle map reply. */
+ lua_pushstring(lua,"map");
+ lua_rawget(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TTABLE) {
+ int maplen = 0;
+ void *replylen = addReplyDeferredLen(c);
+ /* we took care of the stack size on function start */
+ lua_pushnil(lua); /* Use nil to start iteration. */
+ while (lua_next(lua,-2)) {
+ /* Stack now: table, key, value */
+ lua_pushvalue(lua,-2); /* Dup key before consuming. */
+ luaReplyToRedisReply(c, script_client, lua); /* Return key. */
+ luaReplyToRedisReply(c, script_client, lua); /* Return value. */
+ /* Stack now: table, key. */
+ maplen++;
+ }
+ setDeferredMapLen(c,replylen,maplen);
+ lua_pop(lua,2);
+ return;
+ }
+ lua_pop(lua,1); /* Discard field name pushed before. */
+
+ /* Handle set reply. */
+ lua_pushstring(lua,"set");
+ lua_rawget(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TTABLE) {
+ int setlen = 0;
+ void *replylen = addReplyDeferredLen(c);
+ /* we took care of the stack size on function start */
+ lua_pushnil(lua); /* Use nil to start iteration. */
+ while (lua_next(lua,-2)) {
+ /* Stack now: table, key, true */
+ lua_pop(lua,1); /* Discard the boolean value. */
+ lua_pushvalue(lua,-1); /* Dup key before consuming. */
+ luaReplyToRedisReply(c, script_client, lua); /* Return key. */
+ /* Stack now: table, key. */
+ setlen++;
+ }
+ setDeferredSetLen(c,replylen,setlen);
+ lua_pop(lua,2);
+ return;
+ }
+ lua_pop(lua,1); /* Discard field name pushed before. */
+
+ /* Handle the array reply. */
+ void *replylen = addReplyDeferredLen(c);
+ int j = 1, mbulklen = 0;
+ while(1) {
+ /* we took care of the stack size on function start */
+ lua_pushnumber(lua,j++);
+ lua_rawget(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TNIL) {
+ lua_pop(lua,1);
+ break;
+ }
+ luaReplyToRedisReply(c, script_client, lua);
+ mbulklen++;
+ }
+ setDeferredArrayLen(c,replylen,mbulklen);
+ break;
+ default:
+ addReplyNull(c);
+ }
+ lua_pop(lua,1);
+}
+
+/* ---------------------------------------------------------------------------
+ * Lua redis.* functions implementations.
+ * ------------------------------------------------------------------------- */
+void freeLuaRedisArgv(robj **argv, int argc, int argv_len);
+
+/* Cached argv array across calls. */
+static robj **lua_argv = NULL;
+static int lua_argv_size = 0;
+
+/* Cache of recently used small arguments to avoid malloc calls. */
+static robj *lua_args_cached_objects[LUA_CMD_OBJCACHE_SIZE];
+static size_t lua_args_cached_objects_len[LUA_CMD_OBJCACHE_SIZE];
+
+static robj **luaArgsToRedisArgv(lua_State *lua, int *argc, int *argv_len) {
+ int j;
+ /* Require at least one argument */
+ *argc = lua_gettop(lua);
+ if (*argc == 0) {
+ luaPushError(lua, "Please specify at least one argument for this redis lib call");
+ return NULL;
+ }
+
+ /* Build the arguments vector (reuse a cached argv from last call) */
+ if (lua_argv_size < *argc) {
+ lua_argv = zrealloc(lua_argv,sizeof(robj*)* *argc);
+ lua_argv_size = *argc;
+ }
+ *argv_len = lua_argv_size;
+
+ for (j = 0; j < *argc; j++) {
+ char *obj_s;
+ size_t obj_len;
+ char dbuf[64];
+
+ if (lua_type(lua,j+1) == LUA_TNUMBER) {
+ /* We can't use lua_tolstring() for number -> string conversion
+ * since Lua uses a format specifier that loses precision. */
+ lua_Number num = lua_tonumber(lua,j+1);
+ obj_len = fpconv_dtoa((double)num, dbuf);
+ dbuf[obj_len] = '\0';
+ obj_s = dbuf;
+ } else {
+ obj_s = (char*)lua_tolstring(lua,j+1,&obj_len);
+ if (obj_s == NULL) break; /* Not a string. */
+ }
+ /* Try to use a cached object. */
+ if (j < LUA_CMD_OBJCACHE_SIZE && lua_args_cached_objects[j] &&
+ lua_args_cached_objects_len[j] >= obj_len)
+ {
+ sds s = lua_args_cached_objects[j]->ptr;
+ lua_argv[j] = lua_args_cached_objects[j];
+ lua_args_cached_objects[j] = NULL;
+ memcpy(s,obj_s,obj_len+1);
+ sdssetlen(s, obj_len);
+ } else {
+ lua_argv[j] = createStringObject(obj_s, obj_len);
+ }
+ }
+
+ /* Pop all arguments from the stack, we do not need them anymore
+ * and this way we guaranty we will have room on the stack for the result. */
+ lua_pop(lua, *argc);
+
+ /* Check if one of the arguments passed by the Lua script
+ * is not a string or an integer (lua_isstring() return true for
+ * integers as well). */
+ if (j != *argc) {
+ freeLuaRedisArgv(lua_argv, j, lua_argv_size);
+ luaPushError(lua, "Lua redis lib command arguments must be strings or integers");
+ return NULL;
+ }
+
+ return lua_argv;
+}
+
+void freeLuaRedisArgv(robj **argv, int argc, int argv_len) {
+ int j;
+ for (j = 0; j < argc; j++) {
+ robj *o = argv[j];
+
+ /* Try to cache the object in the lua_args_cached_objects array.
+ * The object must be small, SDS-encoded, and with refcount = 1
+ * (we must be the only owner) for us to cache it. */
+ if (j < LUA_CMD_OBJCACHE_SIZE &&
+ o->refcount == 1 &&
+ (o->encoding == OBJ_ENCODING_RAW ||
+ o->encoding == OBJ_ENCODING_EMBSTR) &&
+ sdslen(o->ptr) <= LUA_CMD_OBJCACHE_MAX_LEN)
+ {
+ sds s = o->ptr;
+ if (lua_args_cached_objects[j]) decrRefCount(lua_args_cached_objects[j]);
+ lua_args_cached_objects[j] = o;
+ lua_args_cached_objects_len[j] = sdsalloc(s);
+ } else {
+ decrRefCount(o);
+ }
+ }
+ if (argv != lua_argv || argv_len != lua_argv_size) {
+ /* The command changed argv, scrap the cache and start over. */
+ zfree(argv);
+ lua_argv = NULL;
+ lua_argv_size = 0;
+ }
+}
+
+static int luaRedisGenericCommand(lua_State *lua, int raise_error) {
+ int j;
+ scriptRunCtx* rctx = luaGetFromRegistry(lua, REGISTRY_RUN_CTX_NAME);
+ serverAssert(rctx); /* Only supported inside script invocation */
+ sds err = NULL;
+ client* c = rctx->c;
+ sds reply;
+
+ c->argv = luaArgsToRedisArgv(lua, &c->argc, &c->argv_len);
+ if (c->argv == NULL) {
+ return raise_error ? luaError(lua) : 1;
+ }
+
+ static int inuse = 0; /* Recursive calls detection. */
+
+ /* By using Lua debug hooks it is possible to trigger a recursive call
+ * to luaRedisGenericCommand(), which normally should never happen.
+ * To make this function reentrant is futile and makes it slower, but
+ * we should at least detect such a misuse, and abort. */
+ if (inuse) {
+ char *recursion_warning =
+ "luaRedisGenericCommand() recursive call detected. "
+ "Are you doing funny stuff with Lua debug hooks?";
+ serverLog(LL_WARNING,"%s",recursion_warning);
+ luaPushError(lua,recursion_warning);
+ return 1;
+ }
+ inuse++;
+
+ /* Log the command if debugging is active. */
+ if (ldbIsEnabled()) {
+ sds cmdlog = sdsnew("<redis>");
+ for (j = 0; j < c->argc; j++) {
+ if (j == 10) {
+ cmdlog = sdscatprintf(cmdlog," ... (%d more)",
+ c->argc-j-1);
+ break;
+ } else {
+ cmdlog = sdscatlen(cmdlog," ",1);
+ cmdlog = sdscatsds(cmdlog,c->argv[j]->ptr);
+ }
+ }
+ ldbLog(cmdlog);
+ }
+
+ scriptCall(rctx, &err);
+ if (err) {
+ luaPushError(lua, err);
+ sdsfree(err);
+ /* push a field indicate to ignore updating the stats on this error
+ * because it was already updated when executing the command. */
+ lua_pushstring(lua,"ignore_error_stats_update");
+ lua_pushboolean(lua, 1);
+ lua_settable(lua,-3);
+ goto cleanup;
+ }
+
+ /* Convert the result of the Redis command into a suitable Lua type.
+ * The first thing we need is to create a single string from the client
+ * output buffers. */
+ if (listLength(c->reply) == 0 && (size_t)c->bufpos < c->buf_usable_size) {
+ /* This is a fast path for the common case of a reply inside the
+ * client static buffer. Don't create an SDS string but just use
+ * the client buffer directly. */
+ c->buf[c->bufpos] = '\0';
+ reply = c->buf;
+ c->bufpos = 0;
+ } else {
+ reply = sdsnewlen(c->buf,c->bufpos);
+ c->bufpos = 0;
+ while(listLength(c->reply)) {
+ clientReplyBlock *o = listNodeValue(listFirst(c->reply));
+
+ reply = sdscatlen(reply,o->buf,o->used);
+ listDelNode(c->reply,listFirst(c->reply));
+ }
+ }
+ if (raise_error && reply[0] != '-') raise_error = 0;
+ redisProtocolToLuaType(lua,reply);
+
+ /* If the debugger is active, log the reply from Redis. */
+ if (ldbIsEnabled())
+ ldbLogRedisReply(reply);
+
+ if (reply != c->buf) sdsfree(reply);
+ c->reply_bytes = 0;
+
+cleanup:
+ /* Clean up. Command code may have changed argv/argc so we use the
+ * argv/argc of the client instead of the local variables. */
+ freeLuaRedisArgv(c->argv, c->argc, c->argv_len);
+ c->argc = c->argv_len = 0;
+ c->user = NULL;
+ c->argv = NULL;
+ resetClient(c);
+ inuse--;
+
+ if (raise_error) {
+ /* If we are here we should have an error in the stack, in the
+ * form of a table with an "err" field. Extract the string to
+ * return the plain error. */
+ return luaError(lua);
+ }
+ return 1;
+}
+
+/* Our implementation to lua pcall.
+ * We need this implementation for backward
+ * comparability with older Redis versions.
+ *
+ * On Redis 7, the error object is a table,
+ * compare to older version where the error
+ * object is a string. To keep backward
+ * comparability we catch the table object
+ * and just return the error message. */
+static int luaRedisPcall(lua_State *lua) {
+ int argc = lua_gettop(lua);
+ lua_pushboolean(lua, 1); /* result place holder */
+ lua_insert(lua, 1);
+ if (lua_pcall(lua, argc - 1, LUA_MULTRET, 0)) {
+ /* Error */
+ lua_remove(lua, 1); /* remove the result place holder, now we have room for at least one element */
+ if (lua_istable(lua, -1)) {
+ lua_getfield(lua, -1, "err");
+ if (lua_isstring(lua, -1)) {
+ lua_replace(lua, -2); /* replace the error message with the table */
+ }
+ }
+ lua_pushboolean(lua, 0); /* push result */
+ lua_insert(lua, 1);
+ }
+ return lua_gettop(lua);
+
+}
+
+/* redis.call() */
+static int luaRedisCallCommand(lua_State *lua) {
+ return luaRedisGenericCommand(lua,1);
+}
+
+/* redis.pcall() */
+static int luaRedisPCallCommand(lua_State *lua) {
+ return luaRedisGenericCommand(lua,0);
+}
+
+/* This adds redis.sha1hex(string) to Lua scripts using the same hashing
+ * function used for sha1ing lua scripts. */
+static int luaRedisSha1hexCommand(lua_State *lua) {
+ int argc = lua_gettop(lua);
+ char digest[41];
+ size_t len;
+ char *s;
+
+ if (argc != 1) {
+ luaPushError(lua, "wrong number of arguments");
+ return luaError(lua);
+ }
+
+ s = (char*)lua_tolstring(lua,1,&len);
+ sha1hex(digest,s,len);
+ lua_pushstring(lua,digest);
+ return 1;
+}
+
+/* Returns a table with a single field 'field' set to the string value
+ * passed as argument. This helper function is handy when returning
+ * a Redis Protocol error or status reply from Lua:
+ *
+ * return redis.error_reply("ERR Some Error")
+ * return redis.status_reply("ERR Some Error")
+ */
+static int luaRedisReturnSingleFieldTable(lua_State *lua, char *field) {
+ if (lua_gettop(lua) != 1 || lua_type(lua,-1) != LUA_TSTRING) {
+ luaPushError(lua, "wrong number or type of arguments");
+ return 1;
+ }
+
+ lua_newtable(lua);
+ lua_pushstring(lua, field);
+ lua_pushvalue(lua, -3);
+ lua_settable(lua, -3);
+ return 1;
+}
+
+/* redis.error_reply() */
+static int luaRedisErrorReplyCommand(lua_State *lua) {
+ if (lua_gettop(lua) != 1 || lua_type(lua,-1) != LUA_TSTRING) {
+ luaPushError(lua, "wrong number or type of arguments");
+ return 1;
+ }
+
+ /* add '-' if not exists */
+ const char *err = lua_tostring(lua, -1);
+ sds err_buff = NULL;
+ if (err[0] != '-') {
+ err_buff = sdscatfmt(sdsempty(), "-%s", err);
+ } else {
+ err_buff = sdsnew(err);
+ }
+ luaPushErrorBuff(lua, err_buff);
+ return 1;
+}
+
+/* redis.status_reply() */
+static int luaRedisStatusReplyCommand(lua_State *lua) {
+ return luaRedisReturnSingleFieldTable(lua,"ok");
+}
+
+/* redis.set_repl()
+ *
+ * Set the propagation of write commands executed in the context of the
+ * script to on/off for AOF and slaves. */
+static int luaRedisSetReplCommand(lua_State *lua) {
+ int flags, argc = lua_gettop(lua);
+
+ scriptRunCtx* rctx = luaGetFromRegistry(lua, REGISTRY_RUN_CTX_NAME);
+ serverAssert(rctx); /* Only supported inside script invocation */
+
+ if (argc != 1) {
+ luaPushError(lua, "redis.set_repl() requires one argument.");
+ return luaError(lua);
+ }
+
+ flags = lua_tonumber(lua,-1);
+ if ((flags & ~(PROPAGATE_AOF|PROPAGATE_REPL)) != 0) {
+ luaPushError(lua, "Invalid replication flags. Use REPL_AOF, REPL_REPLICA, REPL_ALL or REPL_NONE.");
+ return luaError(lua);
+ }
+
+ scriptSetRepl(rctx, flags);
+ return 0;
+}
+
+/* redis.acl_check_cmd()
+ *
+ * Checks ACL permissions for given command for the current user. */
+static int luaRedisAclCheckCmdPermissionsCommand(lua_State *lua) {
+ scriptRunCtx* rctx = luaGetFromRegistry(lua, REGISTRY_RUN_CTX_NAME);
+ serverAssert(rctx); /* Only supported inside script invocation */
+ int raise_error = 0;
+
+ int argc, argv_len;
+ robj **argv = luaArgsToRedisArgv(lua, &argc, &argv_len);
+
+ /* Require at least one argument */
+ if (argv == NULL) return luaError(lua);
+
+ /* Find command */
+ struct redisCommand *cmd;
+ if ((cmd = lookupCommand(argv, argc)) == NULL) {
+ luaPushError(lua, "Invalid command passed to redis.acl_check_cmd()");
+ raise_error = 1;
+ } else {
+ int keyidxptr;
+ if (ACLCheckAllUserCommandPerm(rctx->original_client->user, cmd, argv, argc, &keyidxptr) != ACL_OK) {
+ lua_pushboolean(lua, 0);
+ } else {
+ lua_pushboolean(lua, 1);
+ }
+ }
+
+ freeLuaRedisArgv(argv, argc, argv_len);
+ if (raise_error)
+ return luaError(lua);
+ else
+ return 1;
+}
+
+
+/* redis.log() */
+static int luaLogCommand(lua_State *lua) {
+ int j, argc = lua_gettop(lua);
+ int level;
+ sds log;
+
+ if (argc < 2) {
+ luaPushError(lua, "redis.log() requires two arguments or more.");
+ return luaError(lua);
+ } else if (!lua_isnumber(lua,-argc)) {
+ luaPushError(lua, "First argument must be a number (log level).");
+ return luaError(lua);
+ }
+ level = lua_tonumber(lua,-argc);
+ if (level < LL_DEBUG || level > LL_WARNING) {
+ luaPushError(lua, "Invalid debug level.");
+ return luaError(lua);
+ }
+ if (level < server.verbosity) return 0;
+
+ /* Glue together all the arguments */
+ log = sdsempty();
+ for (j = 1; j < argc; j++) {
+ size_t len;
+ char *s;
+
+ s = (char*)lua_tolstring(lua,(-argc)+j,&len);
+ if (s) {
+ if (j != 1) log = sdscatlen(log," ",1);
+ log = sdscatlen(log,s,len);
+ }
+ }
+ serverLogRaw(level,log);
+ sdsfree(log);
+ return 0;
+}
+
+/* redis.setresp() */
+static int luaSetResp(lua_State *lua) {
+ scriptRunCtx* rctx = luaGetFromRegistry(lua, REGISTRY_RUN_CTX_NAME);
+ serverAssert(rctx); /* Only supported inside script invocation */
+ int argc = lua_gettop(lua);
+
+ if (argc != 1) {
+ luaPushError(lua, "redis.setresp() requires one argument.");
+ return luaError(lua);
+ }
+
+ int resp = lua_tonumber(lua,-argc);
+ if (resp != 2 && resp != 3) {
+ luaPushError(lua, "RESP version must be 2 or 3.");
+ return luaError(lua);
+ }
+ scriptSetResp(rctx, resp);
+ return 0;
+}
+
+/* ---------------------------------------------------------------------------
+ * Lua engine initialization and reset.
+ * ------------------------------------------------------------------------- */
+
+static void luaLoadLib(lua_State *lua, const char *libname, lua_CFunction luafunc) {
+ lua_pushcfunction(lua, luafunc);
+ lua_pushstring(lua, libname);
+ lua_call(lua, 1, 0);
+}
+
+LUALIB_API int (luaopen_cjson) (lua_State *L);
+LUALIB_API int (luaopen_struct) (lua_State *L);
+LUALIB_API int (luaopen_cmsgpack) (lua_State *L);
+LUALIB_API int (luaopen_bit) (lua_State *L);
+
+static void luaLoadLibraries(lua_State *lua) {
+ luaLoadLib(lua, "", luaopen_base);
+ luaLoadLib(lua, LUA_TABLIBNAME, luaopen_table);
+ luaLoadLib(lua, LUA_STRLIBNAME, luaopen_string);
+ luaLoadLib(lua, LUA_MATHLIBNAME, luaopen_math);
+ luaLoadLib(lua, LUA_DBLIBNAME, luaopen_debug);
+ luaLoadLib(lua, "cjson", luaopen_cjson);
+ luaLoadLib(lua, "struct", luaopen_struct);
+ luaLoadLib(lua, "cmsgpack", luaopen_cmsgpack);
+ luaLoadLib(lua, "bit", luaopen_bit);
+
+#if 0 /* Stuff that we don't load currently, for sandboxing concerns. */
+ luaLoadLib(lua, LUA_LOADLIBNAME, luaopen_package);
+ luaLoadLib(lua, LUA_OSLIBNAME, luaopen_os);
+#endif
+}
+
+/* Return sds of the string value located on stack at the given index.
+ * Return NULL if the value is not a string. */
+sds luaGetStringSds(lua_State *lua, int index) {
+ if (!lua_isstring(lua, index)) {
+ return NULL;
+ }
+
+ size_t len;
+ const char *str = lua_tolstring(lua, index, &len);
+ sds str_sds = sdsnewlen(str, len);
+ return str_sds;
+}
+
+static int luaProtectedTableError(lua_State *lua) {
+ int argc = lua_gettop(lua);
+ if (argc != 2) {
+ serverLog(LL_WARNING, "malicious code trying to call luaProtectedTableError with wrong arguments");
+ luaL_error(lua, "Wrong number of arguments to luaProtectedTableError");
+ }
+ if (!lua_isstring(lua, -1) && !lua_isnumber(lua, -1)) {
+ luaL_error(lua, "Second argument to luaProtectedTableError must be a string or number");
+ }
+ const char *variable_name = lua_tostring(lua, -1);
+ luaL_error(lua, "Script attempted to access nonexistent global variable '%s'", variable_name);
+ return 0;
+}
+
+/* Set a special metatable on the table on the top of the stack.
+ * The metatable will raise an error if the user tries to fetch
+ * an un-existing value.
+ *
+ * The function assumes the Lua stack have a least enough
+ * space to push 2 element, its up to the caller to verify
+ * this before calling this function. */
+void luaSetErrorMetatable(lua_State *lua) {
+ lua_newtable(lua); /* push metatable */
+ lua_pushcfunction(lua, luaProtectedTableError); /* push get error handler */
+ lua_setfield(lua, -2, "__index");
+ lua_setmetatable(lua, -2);
+}
+
+static int luaNewIndexAllowList(lua_State *lua) {
+ int argc = lua_gettop(lua);
+ if (argc != 3) {
+ serverLog(LL_WARNING, "malicious code trying to call luaNewIndexAllowList with wrong arguments");
+ luaL_error(lua, "Wrong number of arguments to luaNewIndexAllowList");
+ }
+ if (!lua_istable(lua, -3)) {
+ luaL_error(lua, "first argument to luaNewIndexAllowList must be a table");
+ }
+ if (!lua_isstring(lua, -2) && !lua_isnumber(lua, -2)) {
+ luaL_error(lua, "Second argument to luaNewIndexAllowList must be a string or number");
+ }
+ const char *variable_name = lua_tostring(lua, -2);
+ /* check if the key is in our allow list */
+
+ char ***allow_l = allow_lists;
+ for (; *allow_l ; ++allow_l){
+ char **c = *allow_l;
+ for (; *c ; ++c) {
+ if (strcmp(*c, variable_name) == 0) {
+ break;
+ }
+ }
+ if (*c) {
+ break;
+ }
+ }
+ if (!*allow_l) {
+ /* Search the value on the back list, if its there we know that it was removed
+ * on purpose and there is no need to print a warning. */
+ char **c = deny_list;
+ for ( ; *c ; ++c) {
+ if (strcmp(*c, variable_name) == 0) {
+ break;
+ }
+ }
+ if (!*c) {
+ serverLog(LL_WARNING, "A key '%s' was added to Lua globals which is not on the globals allow list nor listed on the deny list.", variable_name);
+ }
+ } else {
+ lua_rawset(lua, -3);
+ }
+ return 0;
+}
+
+/* Set a metatable with '__newindex' function that verify that
+ * the new index appears on our globals while list.
+ *
+ * The metatable is set on the table which located on the top
+ * of the stack.
+ */
+void luaSetAllowListProtection(lua_State *lua) {
+ lua_newtable(lua); /* push metatable */
+ lua_pushcfunction(lua, luaNewIndexAllowList); /* push get error handler */
+ lua_setfield(lua, -2, "__newindex");
+ lua_setmetatable(lua, -2);
+}
+
+/* Set the readonly flag on the table located on the top of the stack
+ * and recursively call this function on each table located on the original
+ * table. Also, recursively call this function on the metatables.*/
+void luaSetTableProtectionRecursively(lua_State *lua) {
+ /* This protect us from a loop in case we already visited the table
+ * For example, globals has '_G' key which is pointing back to globals. */
+ if (lua_isreadonlytable(lua, -1)) {
+ return;
+ }
+
+ /* protect the current table */
+ lua_enablereadonlytable(lua, -1, 1);
+
+ lua_checkstack(lua, 2);
+ lua_pushnil(lua); /* Use nil to start iteration. */
+ while (lua_next(lua,-2)) {
+ /* Stack now: table, key, value */
+ if (lua_istable(lua, -1)) {
+ luaSetTableProtectionRecursively(lua);
+ }
+ lua_pop(lua, 1);
+ }
+
+ /* protect the metatable if exists */
+ if (lua_getmetatable(lua, -1)) {
+ luaSetTableProtectionRecursively(lua);
+ lua_pop(lua, 1); /* pop the metatable */
+ }
+}
+
+void luaRegisterVersion(lua_State* lua) {
+ lua_pushstring(lua,"REDIS_VERSION_NUM");
+ lua_pushnumber(lua,REDIS_VERSION_NUM);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REDIS_VERSION");
+ lua_pushstring(lua,REDIS_VERSION);
+ lua_settable(lua,-3);
+}
+
+void luaRegisterLogFunction(lua_State* lua) {
+ /* redis.log and log levels. */
+ lua_pushstring(lua,"log");
+ lua_pushcfunction(lua,luaLogCommand);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"LOG_DEBUG");
+ lua_pushnumber(lua,LL_DEBUG);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"LOG_VERBOSE");
+ lua_pushnumber(lua,LL_VERBOSE);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"LOG_NOTICE");
+ lua_pushnumber(lua,LL_NOTICE);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"LOG_WARNING");
+ lua_pushnumber(lua,LL_WARNING);
+ lua_settable(lua,-3);
+}
+
+void luaRegisterRedisAPI(lua_State* lua) {
+ lua_pushvalue(lua, LUA_GLOBALSINDEX);
+ luaSetAllowListProtection(lua);
+ lua_pop(lua, 1);
+
+ luaLoadLibraries(lua);
+
+ lua_pushcfunction(lua,luaRedisPcall);
+ lua_setglobal(lua, "pcall");
+
+ /* Register the redis commands table and fields */
+ lua_newtable(lua);
+
+ /* redis.call */
+ lua_pushstring(lua,"call");
+ lua_pushcfunction(lua,luaRedisCallCommand);
+ lua_settable(lua,-3);
+
+ /* redis.pcall */
+ lua_pushstring(lua,"pcall");
+ lua_pushcfunction(lua,luaRedisPCallCommand);
+ lua_settable(lua,-3);
+
+ luaRegisterLogFunction(lua);
+
+ luaRegisterVersion(lua);
+
+ /* redis.setresp */
+ lua_pushstring(lua,"setresp");
+ lua_pushcfunction(lua,luaSetResp);
+ lua_settable(lua,-3);
+
+ /* redis.sha1hex */
+ lua_pushstring(lua, "sha1hex");
+ lua_pushcfunction(lua, luaRedisSha1hexCommand);
+ lua_settable(lua, -3);
+
+ /* redis.error_reply and redis.status_reply */
+ lua_pushstring(lua, "error_reply");
+ lua_pushcfunction(lua, luaRedisErrorReplyCommand);
+ lua_settable(lua, -3);
+ lua_pushstring(lua, "status_reply");
+ lua_pushcfunction(lua, luaRedisStatusReplyCommand);
+ lua_settable(lua, -3);
+
+ /* redis.set_repl and associated flags. */
+ lua_pushstring(lua,"set_repl");
+ lua_pushcfunction(lua,luaRedisSetReplCommand);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_NONE");
+ lua_pushnumber(lua,PROPAGATE_NONE);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_AOF");
+ lua_pushnumber(lua,PROPAGATE_AOF);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_SLAVE");
+ lua_pushnumber(lua,PROPAGATE_REPL);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_REPLICA");
+ lua_pushnumber(lua,PROPAGATE_REPL);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_ALL");
+ lua_pushnumber(lua,PROPAGATE_AOF|PROPAGATE_REPL);
+ lua_settable(lua,-3);
+
+ /* redis.acl_check_cmd */
+ lua_pushstring(lua,"acl_check_cmd");
+ lua_pushcfunction(lua,luaRedisAclCheckCmdPermissionsCommand);
+ lua_settable(lua,-3);
+
+ /* Finally set the table as 'redis' global var. */
+ lua_setglobal(lua,REDIS_API_NAME);
+
+ /* Replace math.random and math.randomseed with our implementations. */
+ lua_getglobal(lua,"math");
+
+ lua_pushstring(lua,"random");
+ lua_pushcfunction(lua,redis_math_random);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"randomseed");
+ lua_pushcfunction(lua,redis_math_randomseed);
+ lua_settable(lua,-3);
+
+ lua_setglobal(lua,"math");
+}
+
+/* Set an array of Redis String Objects as a Lua array (table) stored into a
+ * global variable. */
+static void luaCreateArray(lua_State *lua, robj **elev, int elec) {
+ int j;
+
+ lua_newtable(lua);
+ for (j = 0; j < elec; j++) {
+ lua_pushlstring(lua,(char*)elev[j]->ptr,sdslen(elev[j]->ptr));
+ lua_rawseti(lua,-2,j+1);
+ }
+}
+
+/* ---------------------------------------------------------------------------
+ * Redis provided math.random
+ * ------------------------------------------------------------------------- */
+
+/* We replace math.random() with our implementation that is not affected
+ * by specific libc random() implementations and will output the same sequence
+ * (for the same seed) in every arch. */
+
+/* The following implementation is the one shipped with Lua itself but with
+ * rand() replaced by redisLrand48(). */
+static int redis_math_random (lua_State *L) {
+ /* the `%' avoids the (rare) case of r==1, and is needed also because on
+ some systems (SunOS!) `rand()' may return a value larger than RAND_MAX */
+ lua_Number r = (lua_Number)(redisLrand48()%REDIS_LRAND48_MAX) /
+ (lua_Number)REDIS_LRAND48_MAX;
+ switch (lua_gettop(L)) { /* check number of arguments */
+ case 0: { /* no arguments */
+ lua_pushnumber(L, r); /* Number between 0 and 1 */
+ break;
+ }
+ case 1: { /* only upper limit */
+ int u = luaL_checkint(L, 1);
+ luaL_argcheck(L, 1<=u, 1, "interval is empty");
+ lua_pushnumber(L, floor(r*u)+1); /* int between 1 and `u' */
+ break;
+ }
+ case 2: { /* lower and upper limits */
+ int l = luaL_checkint(L, 1);
+ int u = luaL_checkint(L, 2);
+ luaL_argcheck(L, l<=u, 2, "interval is empty");
+ lua_pushnumber(L, floor(r*(u-l+1))+l); /* int between `l' and `u' */
+ break;
+ }
+ default: return luaL_error(L, "wrong number of arguments");
+ }
+ return 1;
+}
+
+static int redis_math_randomseed (lua_State *L) {
+ redisSrand48(luaL_checkint(L, 1));
+ return 0;
+}
+
+/* This is the Lua script "count" hook that we use to detect scripts timeout. */
+static void luaMaskCountHook(lua_State *lua, lua_Debug *ar) {
+ UNUSED(ar);
+ scriptRunCtx* rctx = luaGetFromRegistry(lua, REGISTRY_RUN_CTX_NAME);
+ serverAssert(rctx); /* Only supported inside script invocation */
+ if (scriptInterrupt(rctx) == SCRIPT_KILL) {
+ serverLog(LL_NOTICE,"Lua script killed by user with SCRIPT KILL.");
+
+ /*
+ * Set the hook to invoke all the time so the user
+ * will not be able to catch the error with pcall and invoke
+ * pcall again which will prevent the script from ever been killed
+ */
+ lua_sethook(lua, luaMaskCountHook, LUA_MASKLINE, 0);
+
+ luaPushError(lua,"Script killed by user with SCRIPT KILL...");
+ luaError(lua);
+ }
+}
+
+void luaErrorInformationDiscard(errorInfo *err_info) {
+ if (err_info->msg) sdsfree(err_info->msg);
+ if (err_info->source) sdsfree(err_info->source);
+ if (err_info->line) sdsfree(err_info->line);
+}
+
+void luaExtractErrorInformation(lua_State *lua, errorInfo *err_info) {
+ if (lua_isstring(lua, -1)) {
+ err_info->msg = sdscatfmt(sdsempty(), "ERR %s", lua_tostring(lua, -1));
+ err_info->line = NULL;
+ err_info->source = NULL;
+ err_info->ignore_err_stats_update = 0;
+ return;
+ }
+
+ lua_getfield(lua, -1, "err");
+ if (lua_isstring(lua, -1)) {
+ err_info->msg = sdsnew(lua_tostring(lua, -1));
+ }
+ lua_pop(lua, 1);
+
+ lua_getfield(lua, -1, "source");
+ if (lua_isstring(lua, -1)) {
+ err_info->source = sdsnew(lua_tostring(lua, -1));
+ }
+ lua_pop(lua, 1);
+
+ lua_getfield(lua, -1, "line");
+ if (lua_isstring(lua, -1)) {
+ err_info->line = sdsnew(lua_tostring(lua, -1));
+ }
+ lua_pop(lua, 1);
+
+ lua_getfield(lua, -1, "ignore_error_stats_update");
+ if (lua_isboolean(lua, -1)) {
+ err_info->ignore_err_stats_update = lua_toboolean(lua, -1);
+ }
+ lua_pop(lua, 1);
+}
+
+void luaCallFunction(scriptRunCtx* run_ctx, lua_State *lua, robj** keys, size_t nkeys, robj** args, size_t nargs, int debug_enabled) {
+ client* c = run_ctx->original_client;
+ int delhook = 0;
+
+ /* We must set it before we set the Lua hook, theoretically the
+ * Lua hook might be called wheneven we run any Lua instruction
+ * such as 'luaSetGlobalArray' and we want the run_ctx to be available
+ * each time the Lua hook is invoked. */
+ luaSaveOnRegistry(lua, REGISTRY_RUN_CTX_NAME, run_ctx);
+
+ if (server.busy_reply_threshold > 0 && !debug_enabled) {
+ lua_sethook(lua,luaMaskCountHook,LUA_MASKCOUNT,100000);
+ delhook = 1;
+ } else if (debug_enabled) {
+ lua_sethook(lua,luaLdbLineHook,LUA_MASKLINE|LUA_MASKCOUNT,100000);
+ delhook = 1;
+ }
+
+ /* Populate the argv and keys table accordingly to the arguments that
+ * EVAL received. */
+ luaCreateArray(lua,keys,nkeys);
+ /* On eval, keys and arguments are globals. */
+ if (run_ctx->flags & SCRIPT_EVAL_MODE){
+ /* open global protection to set KEYS */
+ lua_enablereadonlytable(lua, LUA_GLOBALSINDEX, 0);
+ lua_setglobal(lua,"KEYS");
+ lua_enablereadonlytable(lua, LUA_GLOBALSINDEX, 1);
+ }
+ luaCreateArray(lua,args,nargs);
+ if (run_ctx->flags & SCRIPT_EVAL_MODE){
+ /* open global protection to set ARGV */
+ lua_enablereadonlytable(lua, LUA_GLOBALSINDEX, 0);
+ lua_setglobal(lua,"ARGV");
+ lua_enablereadonlytable(lua, LUA_GLOBALSINDEX, 1);
+ }
+
+ /* At this point whether this script was never seen before or if it was
+ * already defined, we can call it.
+ * On eval mode, we have zero arguments and expect a single return value.
+ * In addition the error handler is located on position -2 on the Lua stack.
+ * On function mode, we pass 2 arguments (the keys and args tables),
+ * and the error handler is located on position -4 (stack: error_handler, callback, keys, args) */
+ int err;
+ if (run_ctx->flags & SCRIPT_EVAL_MODE) {
+ err = lua_pcall(lua,0,1,-2);
+ } else {
+ err = lua_pcall(lua,2,1,-4);
+ }
+
+ /* Call the Lua garbage collector from time to time to avoid a
+ * full cycle performed by Lua, which adds too latency.
+ *
+ * The call is performed every LUA_GC_CYCLE_PERIOD executed commands
+ * (and for LUA_GC_CYCLE_PERIOD collection steps) because calling it
+ * for every command uses too much CPU. */
+ #define LUA_GC_CYCLE_PERIOD 50
+ {
+ static long gc_count = 0;
+
+ gc_count++;
+ if (gc_count == LUA_GC_CYCLE_PERIOD) {
+ lua_gc(lua,LUA_GCSTEP,LUA_GC_CYCLE_PERIOD);
+ gc_count = 0;
+ }
+ }
+
+ if (err) {
+ /* Error object is a table of the following format:
+ * {err='<error msg>', source='<source file>', line=<line>}
+ * We can construct the error message from this information */
+ if (!lua_istable(lua, -1)) {
+ const char *msg = "execution failure";
+ if (lua_isstring(lua, -1)) {
+ msg = lua_tostring(lua, -1);
+ }
+ addReplyErrorFormat(c,"Error running script %s, %.100s\n", run_ctx->funcname, msg);
+ } else {
+ errorInfo err_info = {0};
+ sds final_msg = sdsempty();
+ luaExtractErrorInformation(lua, &err_info);
+ final_msg = sdscatfmt(final_msg, "-%s",
+ err_info.msg);
+ if (err_info.line && err_info.source) {
+ final_msg = sdscatfmt(final_msg, " script: %s, on %s:%s.",
+ run_ctx->funcname,
+ err_info.source,
+ err_info.line);
+ }
+ addReplyErrorSdsEx(c, final_msg, err_info.ignore_err_stats_update? ERR_REPLY_FLAG_NO_STATS_UPDATE : 0);
+ luaErrorInformationDiscard(&err_info);
+ }
+ lua_pop(lua,1); /* Consume the Lua error */
+ } else {
+ /* On success convert the Lua return value into Redis protocol, and
+ * send it to * the client. */
+ luaReplyToRedisReply(c, run_ctx->c, lua); /* Convert and consume the reply. */
+ }
+
+ /* Perform some cleanup that we need to do both on error and success. */
+ if (delhook) lua_sethook(lua,NULL,0,0); /* Disable hook */
+
+ /* remove run_ctx from registry, its only applicable for the current script. */
+ luaSaveOnRegistry(lua, REGISTRY_RUN_CTX_NAME, NULL);
+}
+
+unsigned long luaMemory(lua_State *lua) {
+ return lua_gc(lua, LUA_GCCOUNT, 0) * 1024LL;
+}
diff --git a/src/script_lua.h b/src/script_lua.h
new file mode 100644
index 0000000..4c2b348
--- /dev/null
+++ b/src/script_lua.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2009-2021, Redis Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SCRIPT_LUA_H_
+#define __SCRIPT_LUA_H_
+
+/*
+ * script_lua.c unit provides shared functionality between
+ * eval.c and function_lua.c. Functionality provided:
+ *
+ * * Execute Lua code, assuming that the code is located on
+ * the top of the Lua stack. In addition, parsing the execution
+ * result and convert it to the resp and reply ot the client.
+ *
+ * * Run Redis commands from within the Lua code (Including
+ * parsing the reply and create a Lua object out of it).
+ *
+ * * Register Redis API to the Lua interpreter. Only shared
+ * API are registered (API that is only relevant on eval.c
+ * (like debugging) are registered on eval.c).
+ *
+ * Uses script.c for interaction back with Redis.
+ */
+
+#include "server.h"
+#include "script.h"
+#include <lua.h>
+#include <lauxlib.h>
+#include <lualib.h>
+
+#define REGISTRY_RUN_CTX_NAME "__RUN_CTX__"
+#define REGISTRY_SET_GLOBALS_PROTECTION_NAME "__GLOBAL_PROTECTION__"
+#define REDIS_API_NAME "redis"
+
+typedef struct errorInfo {
+ sds msg;
+ sds source;
+ sds line;
+ int ignore_err_stats_update;
+}errorInfo;
+
+void luaRegisterRedisAPI(lua_State* lua);
+sds luaGetStringSds(lua_State *lua, int index);
+void luaRegisterGlobalProtectionFunction(lua_State *lua);
+void luaSetErrorMetatable(lua_State *lua);
+void luaSetAllowListProtection(lua_State *lua);
+void luaSetTableProtectionRecursively(lua_State *lua);
+void luaRegisterLogFunction(lua_State* lua);
+void luaRegisterVersion(lua_State* lua);
+void luaPushErrorBuff(lua_State *lua, sds err_buff);
+void luaPushError(lua_State *lua, const char *error);
+int luaError(lua_State *lua);
+void luaSaveOnRegistry(lua_State* lua, const char* name, void* ptr);
+void* luaGetFromRegistry(lua_State* lua, const char* name);
+void luaCallFunction(scriptRunCtx* r_ctx, lua_State *lua, robj** keys, size_t nkeys, robj** args, size_t nargs, int debug_enabled);
+void luaExtractErrorInformation(lua_State *lua, errorInfo *err_info);
+void luaErrorInformationDiscard(errorInfo *err_info);
+unsigned long luaMemory(lua_State *lua);
+
+
+#endif /* __SCRIPT_LUA_H_ */
diff --git a/src/sds.c b/src/sds.c
new file mode 100644
index 0000000..2cc5b23
--- /dev/null
+++ b/src/sds.c
@@ -0,0 +1,1496 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+#include <limits.h>
+#include "sds.h"
+#include "sdsalloc.h"
+#include "util.h"
+
+const char *SDS_NOINIT = "SDS_NOINIT";
+
+static inline int sdsHdrSize(char type) {
+ switch(type&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return sizeof(struct sdshdr5);
+ case SDS_TYPE_8:
+ return sizeof(struct sdshdr8);
+ case SDS_TYPE_16:
+ return sizeof(struct sdshdr16);
+ case SDS_TYPE_32:
+ return sizeof(struct sdshdr32);
+ case SDS_TYPE_64:
+ return sizeof(struct sdshdr64);
+ }
+ return 0;
+}
+
+static inline char sdsReqType(size_t string_size) {
+ if (string_size < 1<<5)
+ return SDS_TYPE_5;
+ if (string_size < 1<<8)
+ return SDS_TYPE_8;
+ if (string_size < 1<<16)
+ return SDS_TYPE_16;
+#if (LONG_MAX == LLONG_MAX)
+ if (string_size < 1ll<<32)
+ return SDS_TYPE_32;
+ return SDS_TYPE_64;
+#else
+ return SDS_TYPE_32;
+#endif
+}
+
+static inline size_t sdsTypeMaxSize(char type) {
+ if (type == SDS_TYPE_5)
+ return (1<<5) - 1;
+ if (type == SDS_TYPE_8)
+ return (1<<8) - 1;
+ if (type == SDS_TYPE_16)
+ return (1<<16) - 1;
+#if (LONG_MAX == LLONG_MAX)
+ if (type == SDS_TYPE_32)
+ return (1ll<<32) - 1;
+#endif
+ return -1; /* this is equivalent to the max SDS_TYPE_64 or SDS_TYPE_32 */
+}
+
+/* Create a new sds string with the content specified by the 'init' pointer
+ * and 'initlen'.
+ * If NULL is used for 'init' the string is initialized with zero bytes.
+ * If SDS_NOINIT is used, the buffer is left uninitialized;
+ *
+ * The string is always null-terminated (all the sds strings are, always) so
+ * even if you create an sds string with:
+ *
+ * mystring = sdsnewlen("abc",3);
+ *
+ * You can print the string with printf() as there is an implicit \0 at the
+ * end of the string. However the string is binary safe and can contain
+ * \0 characters in the middle, as the length is stored in the sds header. */
+sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) {
+ void *sh;
+ sds s;
+ char type = sdsReqType(initlen);
+ /* Empty strings are usually created in order to append. Use type 8
+ * since type 5 is not good at this. */
+ if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
+ int hdrlen = sdsHdrSize(type);
+ unsigned char *fp; /* flags pointer. */
+ size_t usable;
+
+ assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow */
+ sh = trymalloc?
+ s_trymalloc_usable(hdrlen+initlen+1, &usable) :
+ s_malloc_usable(hdrlen+initlen+1, &usable);
+ if (sh == NULL) return NULL;
+ if (init==SDS_NOINIT)
+ init = NULL;
+ else if (!init)
+ memset(sh, 0, hdrlen+initlen+1);
+ s = (char*)sh+hdrlen;
+ fp = ((unsigned char*)s)-1;
+ usable = usable-hdrlen-1;
+ if (usable > sdsTypeMaxSize(type))
+ usable = sdsTypeMaxSize(type);
+ switch(type) {
+ case SDS_TYPE_5: {
+ *fp = type | (initlen << SDS_TYPE_BITS);
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ sh->len = initlen;
+ sh->alloc = usable;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ sh->len = initlen;
+ sh->alloc = usable;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ sh->len = initlen;
+ sh->alloc = usable;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ sh->len = initlen;
+ sh->alloc = usable;
+ *fp = type;
+ break;
+ }
+ }
+ if (initlen && init)
+ memcpy(s, init, initlen);
+ s[initlen] = '\0';
+ return s;
+}
+
+sds sdsnewlen(const void *init, size_t initlen) {
+ return _sdsnewlen(init, initlen, 0);
+}
+
+sds sdstrynewlen(const void *init, size_t initlen) {
+ return _sdsnewlen(init, initlen, 1);
+}
+
+/* Create an empty (zero length) sds string. Even in this case the string
+ * always has an implicit null term. */
+sds sdsempty(void) {
+ return sdsnewlen("",0);
+}
+
+/* Create a new sds string starting from a null terminated C string. */
+sds sdsnew(const char *init) {
+ size_t initlen = (init == NULL) ? 0 : strlen(init);
+ return sdsnewlen(init, initlen);
+}
+
+/* Duplicate an sds string. */
+sds sdsdup(const sds s) {
+ return sdsnewlen(s, sdslen(s));
+}
+
+/* Free an sds string. No operation is performed if 's' is NULL. */
+void sdsfree(sds s) {
+ if (s == NULL) return;
+ s_free((char*)s-sdsHdrSize(s[-1]));
+}
+
+/* Set the sds string length to the length as obtained with strlen(), so
+ * considering as content only up to the first null term character.
+ *
+ * This function is useful when the sds string is hacked manually in some
+ * way, like in the following example:
+ *
+ * s = sdsnew("foobar");
+ * s[2] = '\0';
+ * sdsupdatelen(s);
+ * printf("%d\n", sdslen(s));
+ *
+ * The output will be "2", but if we comment out the call to sdsupdatelen()
+ * the output will be "6" as the string was modified but the logical length
+ * remains 6 bytes. */
+void sdsupdatelen(sds s) {
+ size_t reallen = strlen(s);
+ sdssetlen(s, reallen);
+}
+
+/* Modify an sds string in-place to make it empty (zero length).
+ * However all the existing buffer is not discarded but set as free space
+ * so that next append operations will not require allocations up to the
+ * number of bytes previously available. */
+void sdsclear(sds s) {
+ sdssetlen(s, 0);
+ s[0] = '\0';
+}
+
+/* Enlarge the free space at the end of the sds string so that the caller
+ * is sure that after calling this function can overwrite up to addlen
+ * bytes after the end of the string, plus one more byte for nul term.
+ * If there's already sufficient free space, this function returns without any
+ * action, if there isn't sufficient free space, it'll allocate what's missing,
+ * and possibly more:
+ * When greedy is 1, enlarge more than needed, to avoid need for future reallocs
+ * on incremental growth.
+ * When greedy is 0, enlarge just enough so that there's free space for 'addlen'.
+ *
+ * Note: this does not change the *length* of the sds string as returned
+ * by sdslen(), but only the free buffer space we have. */
+sds _sdsMakeRoomFor(sds s, size_t addlen, int greedy) {
+ void *sh, *newsh;
+ size_t avail = sdsavail(s);
+ size_t len, newlen, reqlen;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen;
+ size_t usable;
+
+ /* Return ASAP if there is enough space left. */
+ if (avail >= addlen) return s;
+
+ len = sdslen(s);
+ sh = (char*)s-sdsHdrSize(oldtype);
+ reqlen = newlen = (len+addlen);
+ assert(newlen > len); /* Catch size_t overflow */
+ if (greedy == 1) {
+ if (newlen < SDS_MAX_PREALLOC)
+ newlen *= 2;
+ else
+ newlen += SDS_MAX_PREALLOC;
+ }
+
+ type = sdsReqType(newlen);
+
+ /* Don't use type 5: the user is appending to the string and type 5 is
+ * not able to remember empty space, so sdsMakeRoomFor() must be called
+ * at every appending operation. */
+ if (type == SDS_TYPE_5) type = SDS_TYPE_8;
+
+ hdrlen = sdsHdrSize(type);
+ assert(hdrlen + newlen + 1 > reqlen); /* Catch size_t overflow */
+ if (oldtype==type) {
+ newsh = s_realloc_usable(sh, hdrlen+newlen+1, &usable);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+hdrlen;
+ } else {
+ /* Since the header size changes, need to move the string forward,
+ * and can't use realloc */
+ newsh = s_malloc_usable(hdrlen+newlen+1, &usable);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ usable = usable-hdrlen-1;
+ if (usable > sdsTypeMaxSize(type))
+ usable = sdsTypeMaxSize(type);
+ sdssetalloc(s, usable);
+ return s;
+}
+
+/* Enlarge the free space at the end of the sds string more than needed,
+ * This is useful to avoid repeated re-allocations when repeatedly appending to the sds. */
+sds sdsMakeRoomFor(sds s, size_t addlen) {
+ return _sdsMakeRoomFor(s, addlen, 1);
+}
+
+/* Unlike sdsMakeRoomFor(), this one just grows to the necessary size. */
+sds sdsMakeRoomForNonGreedy(sds s, size_t addlen) {
+ return _sdsMakeRoomFor(s, addlen, 0);
+}
+
+/* Reallocate the sds string so that it has no free space at the end. The
+ * contained string remains not altered, but next concatenation operations
+ * will require a reallocation.
+ *
+ * After the call, the passed sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdsRemoveFreeSpace(sds s, int would_regrow) {
+ return sdsResize(s, sdslen(s), would_regrow);
+}
+
+/* Resize the allocation, this can make the allocation bigger or smaller,
+ * if the size is smaller than currently used len, the data will be truncated.
+ *
+ * The when the would_regrow argument is set to 1, it prevents the use of
+ * SDS_TYPE_5, which is desired when the sds is likely to be changed again.
+ *
+ * The sdsAlloc size will be set to the requested size regardless of the actual
+ * allocation size, this is done in order to avoid repeated calls to this
+ * function when the caller detects that it has excess space. */
+sds sdsResize(sds s, size_t size, int would_regrow) {
+ void *sh, *newsh;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen, oldhdrlen = sdsHdrSize(oldtype);
+ size_t len = sdslen(s);
+ sh = (char*)s-oldhdrlen;
+
+ /* Return ASAP if the size is already good. */
+ if (sdsalloc(s) == size) return s;
+
+ /* Truncate len if needed. */
+ if (size < len) len = size;
+
+ /* Check what would be the minimum SDS header that is just good enough to
+ * fit this string. */
+ type = sdsReqType(size);
+ if (would_regrow) {
+ /* Don't use type 5, it is not good for strings that are expected to grow back. */
+ if (type == SDS_TYPE_5) type = SDS_TYPE_8;
+ }
+ hdrlen = sdsHdrSize(type);
+
+ /* If the type is the same, or can hold the size in it with low overhead
+ * (larger than SDS_TYPE_8), we just realloc(), letting the allocator
+ * to do the copy only if really needed. Otherwise if the change is
+ * huge, we manually reallocate the string to use the different header
+ * type. */
+ int use_realloc = (oldtype==type || (type < oldtype && type > SDS_TYPE_8));
+ size_t newlen = use_realloc ? oldhdrlen+size+1 : hdrlen+size+1;
+
+ if (use_realloc) {
+ int alloc_already_optimal = 0;
+ #if defined(USE_JEMALLOC)
+ /* je_nallocx returns the expected allocation size for the newlen.
+ * We aim to avoid calling realloc() when using Jemalloc if there is no
+ * change in the allocation size, as it incurs a cost even if the
+ * allocation size stays the same. */
+ alloc_already_optimal = (je_nallocx(newlen, 0) == zmalloc_size(sh));
+ #endif
+ if (!alloc_already_optimal) {
+ newsh = s_realloc(sh, newlen);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+oldhdrlen;
+ }
+ } else {
+ newsh = s_malloc(newlen);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ }
+ s[len] = 0;
+ sdssetlen(s, len);
+ sdssetalloc(s, size);
+ return s;
+}
+
+/* Return the total size of the allocation of the specified sds string,
+ * including:
+ * 1) The sds header before the pointer.
+ * 2) The string.
+ * 3) The free buffer at the end if any.
+ * 4) The implicit null term.
+ */
+size_t sdsAllocSize(sds s) {
+ size_t alloc = sdsalloc(s);
+ return sdsHdrSize(s[-1])+alloc+1;
+}
+
+/* Return the pointer of the actual SDS allocation (normally SDS strings
+ * are referenced by the start of the string buffer). */
+void *sdsAllocPtr(sds s) {
+ return (void*) (s-sdsHdrSize(s[-1]));
+}
+
+/* Increment the sds length and decrements the left free space at the
+ * end of the string according to 'incr'. Also set the null term
+ * in the new end of the string.
+ *
+ * This function is used in order to fix the string length after the
+ * user calls sdsMakeRoomFor(), writes something after the end of
+ * the current string, and finally needs to set the new length.
+ *
+ * Note: it is possible to use a negative increment in order to
+ * right-trim the string.
+ *
+ * Usage example:
+ *
+ * Using sdsIncrLen() and sdsMakeRoomFor() it is possible to mount the
+ * following schema, to cat bytes coming from the kernel to the end of an
+ * sds string without copying into an intermediate buffer:
+ *
+ * oldlen = sdslen(s);
+ * s = sdsMakeRoomFor(s, BUFFER_SIZE);
+ * nread = read(fd, s+oldlen, BUFFER_SIZE);
+ * ... check for nread <= 0 and handle it ...
+ * sdsIncrLen(s, nread);
+ */
+void sdsIncrLen(sds s, ssize_t incr) {
+ unsigned char flags = s[-1];
+ size_t len;
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char oldlen = SDS_TYPE_5_LEN(flags);
+ assert((incr > 0 && oldlen+incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr)));
+ *fp = SDS_TYPE_5 | ((oldlen+incr) << SDS_TYPE_BITS);
+ len = oldlen+incr;
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (unsigned int)incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (uint64_t)incr) || (incr < 0 && sh->len >= (uint64_t)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ default: len = 0; /* Just to avoid compilation warnings. */
+ }
+ s[len] = '\0';
+}
+
+/* Grow the sds to have the specified length. Bytes that were not part of
+ * the original length of the sds will be set to zero.
+ *
+ * if the specified length is smaller than the current length, no operation
+ * is performed. */
+sds sdsgrowzero(sds s, size_t len) {
+ size_t curlen = sdslen(s);
+
+ if (len <= curlen) return s;
+ s = sdsMakeRoomFor(s,len-curlen);
+ if (s == NULL) return NULL;
+
+ /* Make sure added region doesn't contain garbage */
+ memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */
+ sdssetlen(s, len);
+ return s;
+}
+
+/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the
+ * end of the specified sds string 's'.
+ *
+ * After the call, the passed sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscatlen(sds s, const void *t, size_t len) {
+ size_t curlen = sdslen(s);
+
+ s = sdsMakeRoomFor(s,len);
+ if (s == NULL) return NULL;
+ memcpy(s+curlen, t, len);
+ sdssetlen(s, curlen+len);
+ s[curlen+len] = '\0';
+ return s;
+}
+
+/* Append the specified null terminated C string to the sds string 's'.
+ *
+ * After the call, the passed sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscat(sds s, const char *t) {
+ return sdscatlen(s, t, strlen(t));
+}
+
+/* Append the specified sds 't' to the existing sds 's'.
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscatsds(sds s, const sds t) {
+ return sdscatlen(s, t, sdslen(t));
+}
+
+/* Destructively modify the sds string 's' to hold the specified binary
+ * safe string pointed by 't' of length 'len' bytes. */
+sds sdscpylen(sds s, const char *t, size_t len) {
+ if (sdsalloc(s) < len) {
+ s = sdsMakeRoomFor(s,len-sdslen(s));
+ if (s == NULL) return NULL;
+ }
+ memcpy(s, t, len);
+ s[len] = '\0';
+ sdssetlen(s, len);
+ return s;
+}
+
+/* Like sdscpylen() but 't' must be a null-terminated string so that the length
+ * of the string is obtained with strlen(). */
+sds sdscpy(sds s, const char *t) {
+ return sdscpylen(s, t, strlen(t));
+}
+
+/* Create an sds string from a long long value. It is much faster than:
+ *
+ * sdscatprintf(sdsempty(),"%lld\n", value);
+ */
+sds sdsfromlonglong(long long value) {
+ char buf[LONG_STR_SIZE];
+ int len = ll2string(buf,sizeof(buf),value);
+
+ return sdsnewlen(buf,len);
+}
+
+/* Like sdscatprintf() but gets va_list instead of being variadic. */
+sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
+ va_list cpy;
+ char staticbuf[1024], *buf = staticbuf, *t;
+ size_t buflen = strlen(fmt)*2;
+ int bufstrlen;
+
+ /* We try to start using a static buffer for speed.
+ * If not possible we revert to heap allocation. */
+ if (buflen > sizeof(staticbuf)) {
+ buf = s_malloc(buflen);
+ if (buf == NULL) return NULL;
+ } else {
+ buflen = sizeof(staticbuf);
+ }
+
+ /* Alloc enough space for buffer and \0 after failing to
+ * fit the string in the current buffer size. */
+ while(1) {
+ va_copy(cpy,ap);
+ bufstrlen = vsnprintf(buf, buflen, fmt, cpy);
+ va_end(cpy);
+ if (bufstrlen < 0) {
+ if (buf != staticbuf) s_free(buf);
+ return NULL;
+ }
+ if (((size_t)bufstrlen) >= buflen) {
+ if (buf != staticbuf) s_free(buf);
+ buflen = ((size_t)bufstrlen) + 1;
+ buf = s_malloc(buflen);
+ if (buf == NULL) return NULL;
+ continue;
+ }
+ break;
+ }
+
+ /* Finally concat the obtained string to the SDS string and return it. */
+ t = sdscatlen(s, buf, bufstrlen);
+ if (buf != staticbuf) s_free(buf);
+ return t;
+}
+
+/* Append to the sds string 's' a string obtained using printf-alike format
+ * specifier.
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call.
+ *
+ * Example:
+ *
+ * s = sdsnew("Sum is: ");
+ * s = sdscatprintf(s,"%d+%d = %d",a,b,a+b).
+ *
+ * Often you need to create a string from scratch with the printf-alike
+ * format. When this is the need, just use sdsempty() as the target string:
+ *
+ * s = sdscatprintf(sdsempty(), "... your format ...", args);
+ */
+sds sdscatprintf(sds s, const char *fmt, ...) {
+ va_list ap;
+ char *t;
+ va_start(ap, fmt);
+ t = sdscatvprintf(s,fmt,ap);
+ va_end(ap);
+ return t;
+}
+
+/* This function is similar to sdscatprintf, but much faster as it does
+ * not rely on sprintf() family functions implemented by the libc that
+ * are often very slow. Moreover directly handling the sds string as
+ * new data is concatenated provides a performance improvement.
+ *
+ * However this function only handles an incompatible subset of printf-alike
+ * format specifiers:
+ *
+ * %s - C String
+ * %S - SDS string
+ * %i - signed int
+ * %I - 64 bit signed integer (long long, int64_t)
+ * %u - unsigned int
+ * %U - 64 bit unsigned integer (unsigned long long, uint64_t)
+ * %% - Verbatim "%" character.
+ */
+sds sdscatfmt(sds s, char const *fmt, ...) {
+ size_t initlen = sdslen(s);
+ const char *f = fmt;
+ long i;
+ va_list ap;
+
+ /* To avoid continuous reallocations, let's start with a buffer that
+ * can hold at least two times the format string itself. It's not the
+ * best heuristic but seems to work in practice. */
+ s = sdsMakeRoomFor(s, strlen(fmt)*2);
+ va_start(ap,fmt);
+ f = fmt; /* Next format specifier byte to process. */
+ i = initlen; /* Position of the next byte to write to dest str. */
+ while(*f) {
+ char next, *str;
+ size_t l;
+ long long num;
+ unsigned long long unum;
+
+ /* Make sure there is always space for at least 1 char. */
+ if (sdsavail(s)==0) {
+ s = sdsMakeRoomFor(s,1);
+ }
+
+ switch(*f) {
+ case '%':
+ next = *(f+1);
+ if (next == '\0') break;
+ f++;
+ switch(next) {
+ case 's':
+ case 'S':
+ str = va_arg(ap,char*);
+ l = (next == 's') ? strlen(str) : sdslen(str);
+ if (sdsavail(s) < l) {
+ s = sdsMakeRoomFor(s,l);
+ }
+ memcpy(s+i,str,l);
+ sdsinclen(s,l);
+ i += l;
+ break;
+ case 'i':
+ case 'I':
+ if (next == 'i')
+ num = va_arg(ap,int);
+ else
+ num = va_arg(ap,long long);
+ {
+ char buf[LONG_STR_SIZE];
+ l = ll2string(buf,sizeof(buf),num);
+ if (sdsavail(s) < l) {
+ s = sdsMakeRoomFor(s,l);
+ }
+ memcpy(s+i,buf,l);
+ sdsinclen(s,l);
+ i += l;
+ }
+ break;
+ case 'u':
+ case 'U':
+ if (next == 'u')
+ unum = va_arg(ap,unsigned int);
+ else
+ unum = va_arg(ap,unsigned long long);
+ {
+ char buf[LONG_STR_SIZE];
+ l = ull2string(buf,sizeof(buf),unum);
+ if (sdsavail(s) < l) {
+ s = sdsMakeRoomFor(s,l);
+ }
+ memcpy(s+i,buf,l);
+ sdsinclen(s,l);
+ i += l;
+ }
+ break;
+ default: /* Handle %% and generally %<unknown>. */
+ s[i++] = next;
+ sdsinclen(s,1);
+ break;
+ }
+ break;
+ default:
+ s[i++] = *f;
+ sdsinclen(s,1);
+ break;
+ }
+ f++;
+ }
+ va_end(ap);
+
+ /* Add null-term */
+ s[i] = '\0';
+ return s;
+}
+
+/* Remove the part of the string from left and from right composed just of
+ * contiguous characters found in 'cset', that is a null terminated C string.
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call.
+ *
+ * Example:
+ *
+ * s = sdsnew("AA...AA.a.aa.aHelloWorld :::");
+ * s = sdstrim(s,"Aa. :");
+ * printf("%s\n", s);
+ *
+ * Output will be just "HelloWorld".
+ */
+sds sdstrim(sds s, const char *cset) {
+ char *end, *sp, *ep;
+ size_t len;
+
+ sp = s;
+ ep = end = s+sdslen(s)-1;
+ while(sp <= end && strchr(cset, *sp)) sp++;
+ while(ep > sp && strchr(cset, *ep)) ep--;
+ len = (ep-sp)+1;
+ if (s != sp) memmove(s, sp, len);
+ s[len] = '\0';
+ sdssetlen(s,len);
+ return s;
+}
+
+/* Changes the input string to be a subset of the original.
+ * It does not release the free space in the string, so a call to
+ * sdsRemoveFreeSpace may be wise after. */
+void sdssubstr(sds s, size_t start, size_t len) {
+ /* Clamp out of range input */
+ size_t oldlen = sdslen(s);
+ if (start >= oldlen) start = len = 0;
+ if (len > oldlen-start) len = oldlen-start;
+
+ /* Move the data */
+ if (len) memmove(s, s+start, len);
+ s[len] = 0;
+ sdssetlen(s,len);
+}
+
+/* Turn the string into a smaller (or equal) string containing only the
+ * substring specified by the 'start' and 'end' indexes.
+ *
+ * start and end can be negative, where -1 means the last character of the
+ * string, -2 the penultimate character, and so forth.
+ *
+ * The interval is inclusive, so the start and end characters will be part
+ * of the resulting string.
+ *
+ * The string is modified in-place.
+ *
+ * NOTE: this function can be misleading and can have unexpected behaviour,
+ * specifically when you want the length of the new string to be 0.
+ * Having start==end will result in a string with one character.
+ * please consider using sdssubstr instead.
+ *
+ * Example:
+ *
+ * s = sdsnew("Hello World");
+ * sdsrange(s,1,-1); => "ello World"
+ */
+void sdsrange(sds s, ssize_t start, ssize_t end) {
+ size_t newlen, len = sdslen(s);
+ if (len == 0) return;
+ if (start < 0)
+ start = len + start;
+ if (end < 0)
+ end = len + end;
+ newlen = (start > end) ? 0 : (end-start)+1;
+ sdssubstr(s, start, newlen);
+}
+
+/* Apply tolower() to every character of the sds string 's'. */
+void sdstolower(sds s) {
+ size_t len = sdslen(s), j;
+
+ for (j = 0; j < len; j++) s[j] = tolower(s[j]);
+}
+
+/* Apply toupper() to every character of the sds string 's'. */
+void sdstoupper(sds s) {
+ size_t len = sdslen(s), j;
+
+ for (j = 0; j < len; j++) s[j] = toupper(s[j]);
+}
+
+/* Compare two sds strings s1 and s2 with memcmp().
+ *
+ * Return value:
+ *
+ * positive if s1 > s2.
+ * negative if s1 < s2.
+ * 0 if s1 and s2 are exactly the same binary string.
+ *
+ * If two strings share exactly the same prefix, but one of the two has
+ * additional characters, the longer string is considered to be greater than
+ * the smaller one. */
+int sdscmp(const sds s1, const sds s2) {
+ size_t l1, l2, minlen;
+ int cmp;
+
+ l1 = sdslen(s1);
+ l2 = sdslen(s2);
+ minlen = (l1 < l2) ? l1 : l2;
+ cmp = memcmp(s1,s2,minlen);
+ if (cmp == 0) return l1>l2? 1: (l1<l2? -1: 0);
+ return cmp;
+}
+
+/* Split 's' with separator in 'sep'. An array
+ * of sds strings is returned. *count will be set
+ * by reference to the number of tokens returned.
+ *
+ * On out of memory, zero length string, zero length
+ * separator, NULL is returned.
+ *
+ * Note that 'sep' is able to split a string using
+ * a multi-character separator. For example
+ * sdssplit("foo_-_bar","_-_"); will return two
+ * elements "foo" and "bar".
+ *
+ * This version of the function is binary-safe but
+ * requires length arguments. sdssplit() is just the
+ * same function but for zero-terminated strings.
+ */
+sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count) {
+ int elements = 0, slots = 5;
+ long start = 0, j;
+ sds *tokens;
+
+ if (seplen < 1 || len <= 0) {
+ *count = 0;
+ return NULL;
+ }
+ tokens = s_malloc(sizeof(sds)*slots);
+ if (tokens == NULL) return NULL;
+
+ for (j = 0; j < (len-(seplen-1)); j++) {
+ /* make sure there is room for the next element and the final one */
+ if (slots < elements+2) {
+ sds *newtokens;
+
+ slots *= 2;
+ newtokens = s_realloc(tokens,sizeof(sds)*slots);
+ if (newtokens == NULL) goto cleanup;
+ tokens = newtokens;
+ }
+ /* search the separator */
+ if ((seplen == 1 && *(s+j) == sep[0]) || (memcmp(s+j,sep,seplen) == 0)) {
+ tokens[elements] = sdsnewlen(s+start,j-start);
+ if (tokens[elements] == NULL) goto cleanup;
+ elements++;
+ start = j+seplen;
+ j = j+seplen-1; /* skip the separator */
+ }
+ }
+ /* Add the final element. We are sure there is room in the tokens array. */
+ tokens[elements] = sdsnewlen(s+start,len-start);
+ if (tokens[elements] == NULL) goto cleanup;
+ elements++;
+ *count = elements;
+ return tokens;
+
+cleanup:
+ {
+ int i;
+ for (i = 0; i < elements; i++) sdsfree(tokens[i]);
+ s_free(tokens);
+ *count = 0;
+ return NULL;
+ }
+}
+
+/* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */
+void sdsfreesplitres(sds *tokens, int count) {
+ if (!tokens) return;
+ while(count--)
+ sdsfree(tokens[count]);
+ s_free(tokens);
+}
+
+/* Append to the sds string "s" an escaped string representation where
+ * all the non-printable characters (tested with isprint()) are turned into
+ * escapes in the form "\n\r\a...." or "\x<hex-number>".
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscatrepr(sds s, const char *p, size_t len) {
+ s = sdsMakeRoomFor(s, len + 2);
+ s = sdscatlen(s,"\"",1);
+ while(len--) {
+ switch(*p) {
+ case '\\':
+ case '"':
+ s = sdscatprintf(s,"\\%c",*p);
+ break;
+ case '\n': s = sdscatlen(s,"\\n",2); break;
+ case '\r': s = sdscatlen(s,"\\r",2); break;
+ case '\t': s = sdscatlen(s,"\\t",2); break;
+ case '\a': s = sdscatlen(s,"\\a",2); break;
+ case '\b': s = sdscatlen(s,"\\b",2); break;
+ default:
+ if (isprint(*p))
+ s = sdscatlen(s, p, 1);
+ else
+ s = sdscatprintf(s,"\\x%02x",(unsigned char)*p);
+ break;
+ }
+ p++;
+ }
+ return sdscatlen(s,"\"",1);
+}
+
+/* Returns one if the string contains characters to be escaped
+ * by sdscatrepr(), zero otherwise.
+ *
+ * Typically, this should be used to help protect aggregated strings in a way
+ * that is compatible with sdssplitargs(). For this reason, also spaces will be
+ * treated as needing an escape.
+ */
+int sdsneedsrepr(const sds s) {
+ size_t len = sdslen(s);
+ const char *p = s;
+
+ while (len--) {
+ if (*p == '\\' || *p == '"' || *p == '\n' || *p == '\r' ||
+ *p == '\t' || *p == '\a' || *p == '\b' || !isprint(*p) || isspace(*p)) return 1;
+ p++;
+ }
+
+ return 0;
+}
+
+/* Helper function for sdssplitargs() that returns non zero if 'c'
+ * is a valid hex digit. */
+int is_hex_digit(char c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F');
+}
+
+/* Helper function for sdssplitargs() that converts a hex digit into an
+ * integer from 0 to 15 */
+int hex_digit_to_int(char c) {
+ switch(c) {
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ case 'a': case 'A': return 10;
+ case 'b': case 'B': return 11;
+ case 'c': case 'C': return 12;
+ case 'd': case 'D': return 13;
+ case 'e': case 'E': return 14;
+ case 'f': case 'F': return 15;
+ default: return 0;
+ }
+}
+
+/* Split a line into arguments, where every argument can be in the
+ * following programming-language REPL-alike form:
+ *
+ * foo bar "newline are supported\n" and "\xff\x00otherstuff"
+ *
+ * The number of arguments is stored into *argc, and an array
+ * of sds is returned.
+ *
+ * The caller should free the resulting array of sds strings with
+ * sdsfreesplitres().
+ *
+ * Note that sdscatrepr() is able to convert back a string into
+ * a quoted string in the same format sdssplitargs() is able to parse.
+ *
+ * The function returns the allocated tokens on success, even when the
+ * input string is empty, or NULL if the input contains unbalanced
+ * quotes or closed quotes followed by non space characters
+ * as in: "foo"bar or "foo'
+ */
+sds *sdssplitargs(const char *line, int *argc) {
+ const char *p = line;
+ char *current = NULL;
+ char **vector = NULL;
+
+ *argc = 0;
+ while(1) {
+ /* skip blanks */
+ while(*p && isspace(*p)) p++;
+ if (*p) {
+ /* get a token */
+ int inq=0; /* set to 1 if we are in "quotes" */
+ int insq=0; /* set to 1 if we are in 'single quotes' */
+ int done=0;
+
+ if (current == NULL) current = sdsempty();
+ while(!done) {
+ if (inq) {
+ if (*p == '\\' && *(p+1) == 'x' &&
+ is_hex_digit(*(p+2)) &&
+ is_hex_digit(*(p+3)))
+ {
+ unsigned char byte;
+
+ byte = (hex_digit_to_int(*(p+2))*16)+
+ hex_digit_to_int(*(p+3));
+ current = sdscatlen(current,(char*)&byte,1);
+ p += 3;
+ } else if (*p == '\\' && *(p+1)) {
+ char c;
+
+ p++;
+ switch(*p) {
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'b': c = '\b'; break;
+ case 'a': c = '\a'; break;
+ default: c = *p; break;
+ }
+ current = sdscatlen(current,&c,1);
+ } else if (*p == '"') {
+ /* closing quote must be followed by a space or
+ * nothing at all. */
+ if (*(p+1) && !isspace(*(p+1))) goto err;
+ done=1;
+ } else if (!*p) {
+ /* unterminated quotes */
+ goto err;
+ } else {
+ current = sdscatlen(current,p,1);
+ }
+ } else if (insq) {
+ if (*p == '\\' && *(p+1) == '\'') {
+ p++;
+ current = sdscatlen(current,"'",1);
+ } else if (*p == '\'') {
+ /* closing quote must be followed by a space or
+ * nothing at all. */
+ if (*(p+1) && !isspace(*(p+1))) goto err;
+ done=1;
+ } else if (!*p) {
+ /* unterminated quotes */
+ goto err;
+ } else {
+ current = sdscatlen(current,p,1);
+ }
+ } else {
+ switch(*p) {
+ case ' ':
+ case '\n':
+ case '\r':
+ case '\t':
+ case '\0':
+ done=1;
+ break;
+ case '"':
+ inq=1;
+ break;
+ case '\'':
+ insq=1;
+ break;
+ default:
+ current = sdscatlen(current,p,1);
+ break;
+ }
+ }
+ if (*p) p++;
+ }
+ /* add the token to the vector */
+ vector = s_realloc(vector,((*argc)+1)*sizeof(char*));
+ vector[*argc] = current;
+ (*argc)++;
+ current = NULL;
+ } else {
+ /* Even on empty input string return something not NULL. */
+ if (vector == NULL) vector = s_malloc(sizeof(void*));
+ return vector;
+ }
+ }
+
+err:
+ while((*argc)--)
+ sdsfree(vector[*argc]);
+ s_free(vector);
+ if (current) sdsfree(current);
+ *argc = 0;
+ return NULL;
+}
+
+/* Modify the string substituting all the occurrences of the set of
+ * characters specified in the 'from' string to the corresponding character
+ * in the 'to' array.
+ *
+ * For instance: sdsmapchars(mystring, "ho", "01", 2)
+ * will have the effect of turning the string "hello" into "0ell1".
+ *
+ * The function returns the sds string pointer, that is always the same
+ * as the input pointer since no resize is needed. */
+sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen) {
+ size_t j, i, l = sdslen(s);
+
+ for (j = 0; j < l; j++) {
+ for (i = 0; i < setlen; i++) {
+ if (s[j] == from[i]) {
+ s[j] = to[i];
+ break;
+ }
+ }
+ }
+ return s;
+}
+
+/* Join an array of C strings using the specified separator (also a C string).
+ * Returns the result as an sds string. */
+sds sdsjoin(char **argv, int argc, char *sep) {
+ sds join = sdsempty();
+ int j;
+
+ for (j = 0; j < argc; j++) {
+ join = sdscat(join, argv[j]);
+ if (j != argc-1) join = sdscat(join,sep);
+ }
+ return join;
+}
+
+/* Like sdsjoin, but joins an array of SDS strings. */
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen) {
+ sds join = sdsempty();
+ int j;
+
+ for (j = 0; j < argc; j++) {
+ join = sdscatsds(join, argv[j]);
+ if (j != argc-1) join = sdscatlen(join,sep,seplen);
+ }
+ return join;
+}
+
+/* Wrappers to the allocators used by SDS. Note that SDS will actually
+ * just use the macros defined into sdsalloc.h in order to avoid to pay
+ * the overhead of function calls. Here we define these wrappers only for
+ * the programs SDS is linked to, if they want to touch the SDS internals
+ * even if they use a different allocator. */
+void *sds_malloc(size_t size) { return s_malloc(size); }
+void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
+void sds_free(void *ptr) { s_free(ptr); }
+
+/* Perform expansion of a template string and return the result as a newly
+ * allocated sds.
+ *
+ * Template variables are specified using curly brackets, e.g. {variable}.
+ * An opening bracket can be quoted by repeating it twice.
+ */
+sds sdstemplate(const char *template, sdstemplate_callback_t cb_func, void *cb_arg)
+{
+ sds res = sdsempty();
+ const char *p = template;
+
+ while (*p) {
+ /* Find next variable, copy everything until there */
+ const char *sv = strchr(p, '{');
+ if (!sv) {
+ /* Not found: copy till rest of template and stop */
+ res = sdscat(res, p);
+ break;
+ } else if (sv > p) {
+ /* Found: copy anything up to the beginning of the variable */
+ res = sdscatlen(res, p, sv - p);
+ }
+
+ /* Skip into variable name, handle premature end or quoting */
+ sv++;
+ if (!*sv) goto error; /* Premature end of template */
+ if (*sv == '{') {
+ /* Quoted '{' */
+ p = sv + 1;
+ res = sdscat(res, "{");
+ continue;
+ }
+
+ /* Find end of variable name, handle premature end of template */
+ const char *ev = strchr(sv, '}');
+ if (!ev) goto error;
+
+ /* Pass variable name to callback and obtain value. If callback failed,
+ * abort. */
+ sds varname = sdsnewlen(sv, ev - sv);
+ sds value = cb_func(varname, cb_arg);
+ sdsfree(varname);
+ if (!value) goto error;
+
+ /* Append value to result and continue */
+ res = sdscat(res, value);
+ sdsfree(value);
+ p = ev + 1;
+ }
+
+ return res;
+
+error:
+ sdsfree(res);
+ return NULL;
+}
+
+#ifdef REDIS_TEST
+#include <stdio.h>
+#include <limits.h>
+#include "testhelp.h"
+
+#define UNUSED(x) (void)(x)
+
+static sds sdsTestTemplateCallback(sds varname, void *arg) {
+ UNUSED(arg);
+ static const char *_var1 = "variable1";
+ static const char *_var2 = "variable2";
+
+ if (!strcmp(varname, _var1)) return sdsnew("value1");
+ else if (!strcmp(varname, _var2)) return sdsnew("value2");
+ else return NULL;
+}
+
+int sdsTest(int argc, char **argv, int flags) {
+ UNUSED(argc);
+ UNUSED(argv);
+ UNUSED(flags);
+
+ {
+ sds x = sdsnew("foo"), y;
+
+ test_cond("Create a string and obtain the length",
+ sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0);
+
+ sdsfree(x);
+ x = sdsnewlen("foo",2);
+ test_cond("Create a string with specified length",
+ sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0);
+
+ x = sdscat(x,"bar");
+ test_cond("Strings concatenation",
+ sdslen(x) == 5 && memcmp(x,"fobar\0",6) == 0);
+
+ x = sdscpy(x,"a");
+ test_cond("sdscpy() against an originally longer string",
+ sdslen(x) == 1 && memcmp(x,"a\0",2) == 0);
+
+ x = sdscpy(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk");
+ test_cond("sdscpy() against an originally shorter string",
+ sdslen(x) == 33 &&
+ memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0);
+
+ sdsfree(x);
+ x = sdscatprintf(sdsempty(),"%d",123);
+ test_cond("sdscatprintf() seems working in the base case",
+ sdslen(x) == 3 && memcmp(x,"123\0",4) == 0);
+
+ sdsfree(x);
+ x = sdscatprintf(sdsempty(),"a%cb",0);
+ test_cond("sdscatprintf() seems working with \\0 inside of result",
+ sdslen(x) == 3 && memcmp(x,"a\0""b\0",4) == 0);
+
+ {
+ sdsfree(x);
+ char etalon[1024*1024];
+ for (size_t i = 0; i < sizeof(etalon); i++) {
+ etalon[i] = '0';
+ }
+ x = sdscatprintf(sdsempty(),"%0*d",(int)sizeof(etalon),0);
+ test_cond("sdscatprintf() can print 1MB",
+ sdslen(x) == sizeof(etalon) && memcmp(x,etalon,sizeof(etalon)) == 0);
+ }
+
+ sdsfree(x);
+ x = sdsnew("--");
+ x = sdscatfmt(x, "Hello %s World %I,%I--", "Hi!", LLONG_MIN,LLONG_MAX);
+ test_cond("sdscatfmt() seems working in the base case",
+ sdslen(x) == 60 &&
+ memcmp(x,"--Hello Hi! World -9223372036854775808,"
+ "9223372036854775807--",60) == 0);
+ printf("[%s]\n",x);
+
+ sdsfree(x);
+ x = sdsnew("--");
+ x = sdscatfmt(x, "%u,%U--", UINT_MAX, ULLONG_MAX);
+ test_cond("sdscatfmt() seems working with unsigned numbers",
+ sdslen(x) == 35 &&
+ memcmp(x,"--4294967295,18446744073709551615--",35) == 0);
+
+ sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," x");
+ test_cond("sdstrim() works when all chars match",
+ sdslen(x) == 0);
+
+ sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," ");
+ test_cond("sdstrim() works when a single char remains",
+ sdslen(x) == 1 && x[0] == 'x');
+
+ sdsfree(x);
+ x = sdsnew("xxciaoyyy");
+ sdstrim(x,"xy");
+ test_cond("sdstrim() correctly trims characters",
+ sdslen(x) == 4 && memcmp(x,"ciao\0",5) == 0);
+
+ y = sdsdup(x);
+ sdsrange(y,1,1);
+ test_cond("sdsrange(...,1,1)",
+ sdslen(y) == 1 && memcmp(y,"i\0",2) == 0);
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,1,-1);
+ test_cond("sdsrange(...,1,-1)",
+ sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0);
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,-2,-1);
+ test_cond("sdsrange(...,-2,-1)",
+ sdslen(y) == 2 && memcmp(y,"ao\0",3) == 0);
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,2,1);
+ test_cond("sdsrange(...,2,1)",
+ sdslen(y) == 0 && memcmp(y,"\0",1) == 0);
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,1,100);
+ test_cond("sdsrange(...,1,100)",
+ sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0);
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,100,100);
+ test_cond("sdsrange(...,100,100)",
+ sdslen(y) == 0 && memcmp(y,"\0",1) == 0);
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,4,6);
+ test_cond("sdsrange(...,4,6)",
+ sdslen(y) == 0 && memcmp(y,"\0",1) == 0);
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,3,6);
+ test_cond("sdsrange(...,3,6)",
+ sdslen(y) == 1 && memcmp(y,"o\0",2) == 0);
+
+ sdsfree(y);
+ sdsfree(x);
+ x = sdsnew("foo");
+ y = sdsnew("foa");
+ test_cond("sdscmp(foo,foa)", sdscmp(x,y) > 0);
+
+ sdsfree(y);
+ sdsfree(x);
+ x = sdsnew("bar");
+ y = sdsnew("bar");
+ test_cond("sdscmp(bar,bar)", sdscmp(x,y) == 0);
+
+ sdsfree(y);
+ sdsfree(x);
+ x = sdsnew("aar");
+ y = sdsnew("bar");
+ test_cond("sdscmp(bar,bar)", sdscmp(x,y) < 0);
+
+ sdsfree(y);
+ sdsfree(x);
+ x = sdsnewlen("\a\n\0foo\r",7);
+ y = sdscatrepr(sdsempty(),x,sdslen(x));
+ test_cond("sdscatrepr(...data...)",
+ memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0);
+
+ {
+ unsigned int oldfree;
+ char *p;
+ int i;
+ size_t step = 10, j;
+
+ sdsfree(x);
+ sdsfree(y);
+ x = sdsnew("0");
+ test_cond("sdsnew() free/len buffers", sdslen(x) == 1 && sdsavail(x) == 0);
+
+ /* Run the test a few times in order to hit the first two
+ * SDS header types. */
+ for (i = 0; i < 10; i++) {
+ size_t oldlen = sdslen(x);
+ x = sdsMakeRoomFor(x,step);
+ int type = x[-1]&SDS_TYPE_MASK;
+
+ test_cond("sdsMakeRoomFor() len", sdslen(x) == oldlen);
+ if (type != SDS_TYPE_5) {
+ test_cond("sdsMakeRoomFor() free", sdsavail(x) >= step);
+ oldfree = sdsavail(x);
+ UNUSED(oldfree);
+ }
+ p = x+oldlen;
+ for (j = 0; j < step; j++) {
+ p[j] = 'A'+j;
+ }
+ sdsIncrLen(x,step);
+ }
+ test_cond("sdsMakeRoomFor() content",
+ memcmp("0ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",x,101) == 0);
+ test_cond("sdsMakeRoomFor() final length",sdslen(x)==101);
+
+ sdsfree(x);
+ }
+
+ /* Simple template */
+ x = sdstemplate("v1={variable1} v2={variable2}", sdsTestTemplateCallback, NULL);
+ test_cond("sdstemplate() normal flow",
+ memcmp(x,"v1=value1 v2=value2",19) == 0);
+ sdsfree(x);
+
+ /* Template with callback error */
+ x = sdstemplate("v1={variable1} v3={doesnotexist}", sdsTestTemplateCallback, NULL);
+ test_cond("sdstemplate() with callback error", x == NULL);
+
+ /* Template with empty var name */
+ x = sdstemplate("v1={", sdsTestTemplateCallback, NULL);
+ test_cond("sdstemplate() with empty var name", x == NULL);
+
+ /* Template with truncated var name */
+ x = sdstemplate("v1={start", sdsTestTemplateCallback, NULL);
+ test_cond("sdstemplate() with truncated var name", x == NULL);
+
+ /* Template with quoting */
+ x = sdstemplate("v1={{{variable1}} {{} v2={variable2}", sdsTestTemplateCallback, NULL);
+ test_cond("sdstemplate() with quoting",
+ memcmp(x,"v1={value1} {} v2=value2",24) == 0);
+ sdsfree(x);
+
+ /* Test sdsresize - extend */
+ x = sdsnew("1234567890123456789012345678901234567890");
+ x = sdsResize(x, 200, 1);
+ test_cond("sdsrezie() expand len", sdslen(x) == 40);
+ test_cond("sdsrezie() expand strlen", strlen(x) == 40);
+ test_cond("sdsrezie() expand alloc", sdsalloc(x) == 200);
+ /* Test sdsresize - trim free space */
+ x = sdsResize(x, 80, 1);
+ test_cond("sdsrezie() shrink len", sdslen(x) == 40);
+ test_cond("sdsrezie() shrink strlen", strlen(x) == 40);
+ test_cond("sdsrezie() shrink alloc", sdsalloc(x) == 80);
+ /* Test sdsresize - crop used space */
+ x = sdsResize(x, 30, 1);
+ test_cond("sdsrezie() crop len", sdslen(x) == 30);
+ test_cond("sdsrezie() crop strlen", strlen(x) == 30);
+ test_cond("sdsrezie() crop alloc", sdsalloc(x) == 30);
+ /* Test sdsresize - extend to different class */
+ x = sdsResize(x, 400, 1);
+ test_cond("sdsrezie() expand len", sdslen(x) == 30);
+ test_cond("sdsrezie() expand strlen", strlen(x) == 30);
+ test_cond("sdsrezie() expand alloc", sdsalloc(x) == 400);
+ /* Test sdsresize - shrink to different class */
+ x = sdsResize(x, 4, 1);
+ test_cond("sdsrezie() crop len", sdslen(x) == 4);
+ test_cond("sdsrezie() crop strlen", strlen(x) == 4);
+ test_cond("sdsrezie() crop alloc", sdsalloc(x) == 4);
+ sdsfree(x);
+ }
+ return 0;
+}
+#endif
diff --git a/src/sds.h b/src/sds.h
new file mode 100644
index 0000000..208eaa2
--- /dev/null
+++ b/src/sds.h
@@ -0,0 +1,287 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SDS_H
+#define __SDS_H
+
+#define SDS_MAX_PREALLOC (1024*1024)
+extern const char *SDS_NOINIT;
+
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdint.h>
+
+typedef char *sds;
+
+/* Note: sdshdr5 is never used, we just access the flags byte directly.
+ * However is here to document the layout of type 5 SDS strings. */
+struct __attribute__ ((__packed__)) sdshdr5 {
+ unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr8 {
+ uint8_t len; /* used */
+ uint8_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr16 {
+ uint16_t len; /* used */
+ uint16_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr32 {
+ uint32_t len; /* used */
+ uint32_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr64 {
+ uint64_t len; /* used */
+ uint64_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+
+#define SDS_TYPE_5 0
+#define SDS_TYPE_8 1
+#define SDS_TYPE_16 2
+#define SDS_TYPE_32 3
+#define SDS_TYPE_64 4
+#define SDS_TYPE_MASK 7
+#define SDS_TYPE_BITS 3
+#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
+#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
+#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
+
+static inline size_t sdslen(const sds s) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->len;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->len;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->len;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->len;
+ }
+ return 0;
+}
+
+static inline size_t sdsavail(const sds s) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ return 0;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ return sh->alloc - sh->len;
+ }
+ }
+ return 0;
+}
+
+static inline void sdssetlen(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len = newlen;
+ break;
+ }
+}
+
+static inline void sdsinclen(sds s, size_t inc) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len += inc;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len += inc;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len += inc;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len += inc;
+ break;
+ }
+}
+
+/* sdsalloc() = sdsavail() + sdslen() */
+static inline size_t sdsalloc(const sds s) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->alloc;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->alloc;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->alloc;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->alloc;
+ }
+ return 0;
+}
+
+static inline void sdssetalloc(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ /* Nothing to do, this type has no total allocation info. */
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->alloc = newlen;
+ break;
+ }
+}
+
+sds sdsnewlen(const void *init, size_t initlen);
+sds sdstrynewlen(const void *init, size_t initlen);
+sds sdsnew(const char *init);
+sds sdsempty(void);
+sds sdsdup(const sds s);
+void sdsfree(sds s);
+sds sdsgrowzero(sds s, size_t len);
+sds sdscatlen(sds s, const void *t, size_t len);
+sds sdscat(sds s, const char *t);
+sds sdscatsds(sds s, const sds t);
+sds sdscpylen(sds s, const char *t, size_t len);
+sds sdscpy(sds s, const char *t);
+
+sds sdscatvprintf(sds s, const char *fmt, va_list ap);
+#ifdef __GNUC__
+sds sdscatprintf(sds s, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+#else
+sds sdscatprintf(sds s, const char *fmt, ...);
+#endif
+
+sds sdscatfmt(sds s, char const *fmt, ...);
+sds sdstrim(sds s, const char *cset);
+void sdssubstr(sds s, size_t start, size_t len);
+void sdsrange(sds s, ssize_t start, ssize_t end);
+void sdsupdatelen(sds s);
+void sdsclear(sds s);
+int sdscmp(const sds s1, const sds s2);
+sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count);
+void sdsfreesplitres(sds *tokens, int count);
+void sdstolower(sds s);
+void sdstoupper(sds s);
+sds sdsfromlonglong(long long value);
+sds sdscatrepr(sds s, const char *p, size_t len);
+sds *sdssplitargs(const char *line, int *argc);
+sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
+sds sdsjoin(char **argv, int argc, char *sep);
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);
+int sdsneedsrepr(const sds s);
+
+/* Callback for sdstemplate. The function gets called by sdstemplate
+ * every time a variable needs to be expanded. The variable name is
+ * provided as variable, and the callback is expected to return a
+ * substitution value. Returning a NULL indicates an error.
+ */
+typedef sds (*sdstemplate_callback_t)(const sds variable, void *arg);
+sds sdstemplate(const char *template, sdstemplate_callback_t cb_func, void *cb_arg);
+
+/* Low level functions exposed to the user API */
+sds sdsMakeRoomFor(sds s, size_t addlen);
+sds sdsMakeRoomForNonGreedy(sds s, size_t addlen);
+void sdsIncrLen(sds s, ssize_t incr);
+sds sdsRemoveFreeSpace(sds s, int would_regrow);
+sds sdsResize(sds s, size_t size, int would_regrow);
+size_t sdsAllocSize(sds s);
+void *sdsAllocPtr(sds s);
+
+/* Export the allocator used by SDS to the program using SDS.
+ * Sometimes the program SDS is linked to, may use a different set of
+ * allocators, but may want to allocate or free things that SDS will
+ * respectively free or allocate. */
+void *sds_malloc(size_t size);
+void *sds_realloc(void *ptr, size_t size);
+void sds_free(void *ptr);
+
+#ifdef REDIS_TEST
+int sdsTest(int argc, char *argv[], int flags);
+#endif
+
+#endif
diff --git a/src/sdsalloc.h b/src/sdsalloc.h
new file mode 100644
index 0000000..a1c5584
--- /dev/null
+++ b/src/sdsalloc.h
@@ -0,0 +1,54 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* SDS allocator selection.
+ *
+ * This file is used in order to change the SDS allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#ifndef __SDS_ALLOC_H__
+#define __SDS_ALLOC_H__
+
+#include "zmalloc.h"
+#define s_malloc zmalloc
+#define s_realloc zrealloc
+#define s_trymalloc ztrymalloc
+#define s_tryrealloc ztryrealloc
+#define s_free zfree
+#define s_malloc_usable zmalloc_usable
+#define s_realloc_usable zrealloc_usable
+#define s_trymalloc_usable ztrymalloc_usable
+#define s_tryrealloc_usable ztryrealloc_usable
+#define s_free_usable zfree_usable
+
+#endif
diff --git a/src/sentinel.c b/src/sentinel.c
new file mode 100644
index 0000000..238be90
--- /dev/null
+++ b/src/sentinel.c
@@ -0,0 +1,5484 @@
+/* Redis Sentinel implementation
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "hiredis.h"
+#if USE_OPENSSL == 1 /* BUILD_YES */
+#include "openssl/ssl.h"
+#include "hiredis_ssl.h"
+#endif
+#include "async.h"
+
+#include <ctype.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+
+extern char **environ;
+
+#if USE_OPENSSL == 1 /* BUILD_YES */
+extern SSL_CTX *redis_tls_ctx;
+extern SSL_CTX *redis_tls_client_ctx;
+#endif
+
+#define REDIS_SENTINEL_PORT 26379
+
+/* ======================== Sentinel global state =========================== */
+
+/* Address object, used to describe an ip:port pair. */
+typedef struct sentinelAddr {
+ char *hostname; /* Hostname OR address, as specified */
+ char *ip; /* Always a resolved address */
+ int port;
+} sentinelAddr;
+
+/* A Sentinel Redis Instance object is monitoring. */
+#define SRI_MASTER (1<<0)
+#define SRI_SLAVE (1<<1)
+#define SRI_SENTINEL (1<<2)
+#define SRI_S_DOWN (1<<3) /* Subjectively down (no quorum). */
+#define SRI_O_DOWN (1<<4) /* Objectively down (confirmed by others). */
+#define SRI_MASTER_DOWN (1<<5) /* A Sentinel with this flag set thinks that
+ its master is down. */
+#define SRI_FAILOVER_IN_PROGRESS (1<<6) /* Failover is in progress for
+ this master. */
+#define SRI_PROMOTED (1<<7) /* Slave selected for promotion. */
+#define SRI_RECONF_SENT (1<<8) /* SLAVEOF <newmaster> sent. */
+#define SRI_RECONF_INPROG (1<<9) /* Slave synchronization in progress. */
+#define SRI_RECONF_DONE (1<<10) /* Slave synchronized with new master. */
+#define SRI_FORCE_FAILOVER (1<<11) /* Force failover with master up. */
+#define SRI_SCRIPT_KILL_SENT (1<<12) /* SCRIPT KILL already sent on -BUSY */
+#define SRI_MASTER_REBOOT (1<<13) /* Master was detected as rebooting */
+/* Note: when adding new flags, please check the flags section in addReplySentinelRedisInstance. */
+
+/* Note: times are in milliseconds. */
+#define SENTINEL_PING_PERIOD 1000
+
+static mstime_t sentinel_info_period = 10000;
+static mstime_t sentinel_ping_period = SENTINEL_PING_PERIOD;
+static mstime_t sentinel_ask_period = 1000;
+static mstime_t sentinel_publish_period = 2000;
+static mstime_t sentinel_default_down_after = 30000;
+static mstime_t sentinel_tilt_trigger = 2000;
+static mstime_t sentinel_tilt_period = SENTINEL_PING_PERIOD * 30;
+static mstime_t sentinel_slave_reconf_timeout = 10000;
+static mstime_t sentinel_min_link_reconnect_period = 15000;
+static mstime_t sentinel_election_timeout = 10000;
+static mstime_t sentinel_script_max_runtime = 60000; /* 60 seconds max exec time. */
+static mstime_t sentinel_script_retry_delay = 30000; /* 30 seconds between retries. */
+static mstime_t sentinel_default_failover_timeout = 60*3*1000;
+
+#define SENTINEL_HELLO_CHANNEL "__sentinel__:hello"
+#define SENTINEL_DEFAULT_SLAVE_PRIORITY 100
+#define SENTINEL_DEFAULT_PARALLEL_SYNCS 1
+#define SENTINEL_MAX_PENDING_COMMANDS 100
+
+#define SENTINEL_MAX_DESYNC 1000
+#define SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG 1
+#define SENTINEL_DEFAULT_RESOLVE_HOSTNAMES 0
+#define SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES 0
+
+/* Failover machine different states. */
+#define SENTINEL_FAILOVER_STATE_NONE 0 /* No failover in progress. */
+#define SENTINEL_FAILOVER_STATE_WAIT_START 1 /* Wait for failover_start_time*/
+#define SENTINEL_FAILOVER_STATE_SELECT_SLAVE 2 /* Select slave to promote */
+#define SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE 3 /* Slave -> Master */
+#define SENTINEL_FAILOVER_STATE_WAIT_PROMOTION 4 /* Wait slave to change role */
+#define SENTINEL_FAILOVER_STATE_RECONF_SLAVES 5 /* SLAVEOF newmaster */
+#define SENTINEL_FAILOVER_STATE_UPDATE_CONFIG 6 /* Monitor promoted slave. */
+
+#define SENTINEL_MASTER_LINK_STATUS_UP 0
+#define SENTINEL_MASTER_LINK_STATUS_DOWN 1
+
+/* Generic flags that can be used with different functions.
+ * They use higher bits to avoid colliding with the function specific
+ * flags. */
+#define SENTINEL_NO_FLAGS 0
+#define SENTINEL_GENERATE_EVENT (1<<16)
+#define SENTINEL_LEADER (1<<17)
+#define SENTINEL_OBSERVER (1<<18)
+
+/* Script execution flags and limits. */
+#define SENTINEL_SCRIPT_NONE 0
+#define SENTINEL_SCRIPT_RUNNING 1
+#define SENTINEL_SCRIPT_MAX_QUEUE 256
+#define SENTINEL_SCRIPT_MAX_RUNNING 16
+#define SENTINEL_SCRIPT_MAX_RETRY 10
+
+/* SENTINEL SIMULATE-FAILURE command flags. */
+#define SENTINEL_SIMFAILURE_NONE 0
+#define SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION (1<<0)
+#define SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION (1<<1)
+
+/* The link to a sentinelRedisInstance. When we have the same set of Sentinels
+ * monitoring many masters, we have different instances representing the
+ * same Sentinels, one per master, and we need to share the hiredis connections
+ * among them. Otherwise if 5 Sentinels are monitoring 100 masters we create
+ * 500 outgoing connections instead of 5.
+ *
+ * So this structure represents a reference counted link in terms of the two
+ * hiredis connections for commands and Pub/Sub, and the fields needed for
+ * failure detection, since the ping/pong time are now local to the link: if
+ * the link is available, the instance is available. This way we don't just
+ * have 5 connections instead of 500, we also send 5 pings instead of 500.
+ *
+ * Links are shared only for Sentinels: master and slave instances have
+ * a link with refcount = 1, always. */
+typedef struct instanceLink {
+ int refcount; /* Number of sentinelRedisInstance owners. */
+ int disconnected; /* Non-zero if we need to reconnect cc or pc. */
+ int pending_commands; /* Number of commands sent waiting for a reply. */
+ redisAsyncContext *cc; /* Hiredis context for commands. */
+ redisAsyncContext *pc; /* Hiredis context for Pub / Sub. */
+ mstime_t cc_conn_time; /* cc connection time. */
+ mstime_t pc_conn_time; /* pc connection time. */
+ mstime_t pc_last_activity; /* Last time we received any message. */
+ mstime_t last_avail_time; /* Last time the instance replied to ping with
+ a reply we consider valid. */
+ mstime_t act_ping_time; /* Time at which the last pending ping (no pong
+ received after it) was sent. This field is
+ set to 0 when a pong is received, and set again
+ to the current time if the value is 0 and a new
+ ping is sent. */
+ mstime_t last_ping_time; /* Time at which we sent the last ping. This is
+ only used to avoid sending too many pings
+ during failure. Idle time is computed using
+ the act_ping_time field. */
+ mstime_t last_pong_time; /* Last time the instance replied to ping,
+ whatever the reply was. That's used to check
+ if the link is idle and must be reconnected. */
+ mstime_t last_reconn_time; /* Last reconnection attempt performed when
+ the link was down. */
+} instanceLink;
+
+typedef struct sentinelRedisInstance {
+ int flags; /* See SRI_... defines */
+ char *name; /* Master name from the point of view of this sentinel. */
+ char *runid; /* Run ID of this instance, or unique ID if is a Sentinel.*/
+ uint64_t config_epoch; /* Configuration epoch. */
+ sentinelAddr *addr; /* Master host. */
+ instanceLink *link; /* Link to the instance, may be shared for Sentinels. */
+ mstime_t last_pub_time; /* Last time we sent hello via Pub/Sub. */
+ mstime_t last_hello_time; /* Only used if SRI_SENTINEL is set. Last time
+ we received a hello from this Sentinel
+ via Pub/Sub. */
+ mstime_t last_master_down_reply_time; /* Time of last reply to
+ SENTINEL is-master-down command. */
+ mstime_t s_down_since_time; /* Subjectively down since time. */
+ mstime_t o_down_since_time; /* Objectively down since time. */
+ mstime_t down_after_period; /* Consider it down after that period. */
+ mstime_t master_reboot_down_after_period; /* Consider master down after that period. */
+ mstime_t master_reboot_since_time; /* master reboot time since time. */
+ mstime_t info_refresh; /* Time at which we received INFO output from it. */
+ dict *renamed_commands; /* Commands renamed in this instance:
+ Sentinel will use the alternative commands
+ mapped on this table to send things like
+ SLAVEOF, CONFIG, INFO, ... */
+
+ /* Role and the first time we observed it.
+ * This is useful in order to delay replacing what the instance reports
+ * with our own configuration. We need to always wait some time in order
+ * to give a chance to the leader to report the new configuration before
+ * we do silly things. */
+ int role_reported;
+ mstime_t role_reported_time;
+ mstime_t slave_conf_change_time; /* Last time slave master addr changed. */
+
+ /* Master specific. */
+ dict *sentinels; /* Other sentinels monitoring the same master. */
+ dict *slaves; /* Slaves for this master instance. */
+ unsigned int quorum;/* Number of sentinels that need to agree on failure. */
+ int parallel_syncs; /* How many slaves to reconfigure at same time. */
+ char *auth_pass; /* Password to use for AUTH against master & replica. */
+ char *auth_user; /* Username for ACLs AUTH against master & replica. */
+
+ /* Slave specific. */
+ mstime_t master_link_down_time; /* Slave replication link down time. */
+ int slave_priority; /* Slave priority according to its INFO output. */
+ int replica_announced; /* Replica announcing according to its INFO output. */
+ mstime_t slave_reconf_sent_time; /* Time at which we sent SLAVE OF <new> */
+ struct sentinelRedisInstance *master; /* Master instance if it's slave. */
+ char *slave_master_host; /* Master host as reported by INFO */
+ int slave_master_port; /* Master port as reported by INFO */
+ int slave_master_link_status; /* Master link status as reported by INFO */
+ unsigned long long slave_repl_offset; /* Slave replication offset. */
+ /* Failover */
+ char *leader; /* If this is a master instance, this is the runid of
+ the Sentinel that should perform the failover. If
+ this is a Sentinel, this is the runid of the Sentinel
+ that this Sentinel voted as leader. */
+ uint64_t leader_epoch; /* Epoch of the 'leader' field. */
+ uint64_t failover_epoch; /* Epoch of the currently started failover. */
+ int failover_state; /* See SENTINEL_FAILOVER_STATE_* defines. */
+ mstime_t failover_state_change_time;
+ mstime_t failover_start_time; /* Last failover attempt start time. */
+ mstime_t failover_timeout; /* Max time to refresh failover state. */
+ mstime_t failover_delay_logged; /* For what failover_start_time value we
+ logged the failover delay. */
+ struct sentinelRedisInstance *promoted_slave; /* Promoted slave instance. */
+ /* Scripts executed to notify admin or reconfigure clients: when they
+ * are set to NULL no script is executed. */
+ char *notification_script;
+ char *client_reconfig_script;
+ sds info; /* cached INFO output */
+} sentinelRedisInstance;
+
+/* Main state. */
+struct sentinelState {
+ char myid[CONFIG_RUN_ID_SIZE+1]; /* This sentinel ID. */
+ uint64_t current_epoch; /* Current epoch. */
+ dict *masters; /* Dictionary of master sentinelRedisInstances.
+ Key is the instance name, value is the
+ sentinelRedisInstance structure pointer. */
+ int tilt; /* Are we in TILT mode? */
+ int running_scripts; /* Number of scripts in execution right now. */
+ mstime_t tilt_start_time; /* When TITL started. */
+ mstime_t previous_time; /* Last time we ran the time handler. */
+ list *scripts_queue; /* Queue of user scripts to execute. */
+ char *announce_ip; /* IP addr that is gossiped to other sentinels if
+ not NULL. */
+ int announce_port; /* Port that is gossiped to other sentinels if
+ non zero. */
+ unsigned long simfailure_flags; /* Failures simulation. */
+ int deny_scripts_reconfig; /* Allow SENTINEL SET ... to change script
+ paths at runtime? */
+ char *sentinel_auth_pass; /* Password to use for AUTH against other sentinel */
+ char *sentinel_auth_user; /* Username for ACLs AUTH against other sentinel. */
+ int resolve_hostnames; /* Support use of hostnames, assuming DNS is well configured. */
+ int announce_hostnames; /* Announce hostnames instead of IPs when we have them. */
+} sentinel;
+
+/* A script execution job. */
+typedef struct sentinelScriptJob {
+ int flags; /* Script job flags: SENTINEL_SCRIPT_* */
+ int retry_num; /* Number of times we tried to execute it. */
+ char **argv; /* Arguments to call the script. */
+ mstime_t start_time; /* Script execution time if the script is running,
+ otherwise 0 if we are allowed to retry the
+ execution at any time. If the script is not
+ running and it's not 0, it means: do not run
+ before the specified time. */
+ pid_t pid; /* Script execution pid. */
+} sentinelScriptJob;
+
+/* ======================= hiredis ae.c adapters =============================
+ * Note: this implementation is taken from hiredis/adapters/ae.h, however
+ * we have our modified copy for Sentinel in order to use our allocator
+ * and to have full control over how the adapter works. */
+
+typedef struct redisAeEvents {
+ redisAsyncContext *context;
+ aeEventLoop *loop;
+ int fd;
+ int reading, writing;
+} redisAeEvents;
+
+static void redisAeReadEvent(aeEventLoop *el, int fd, void *privdata, int mask) {
+ ((void)el); ((void)fd); ((void)mask);
+
+ redisAeEvents *e = (redisAeEvents*)privdata;
+ redisAsyncHandleRead(e->context);
+}
+
+static void redisAeWriteEvent(aeEventLoop *el, int fd, void *privdata, int mask) {
+ ((void)el); ((void)fd); ((void)mask);
+
+ redisAeEvents *e = (redisAeEvents*)privdata;
+ redisAsyncHandleWrite(e->context);
+}
+
+static void redisAeAddRead(void *privdata) {
+ redisAeEvents *e = (redisAeEvents*)privdata;
+ aeEventLoop *loop = e->loop;
+ if (!e->reading) {
+ e->reading = 1;
+ aeCreateFileEvent(loop,e->fd,AE_READABLE,redisAeReadEvent,e);
+ }
+}
+
+static void redisAeDelRead(void *privdata) {
+ redisAeEvents *e = (redisAeEvents*)privdata;
+ aeEventLoop *loop = e->loop;
+ if (e->reading) {
+ e->reading = 0;
+ aeDeleteFileEvent(loop,e->fd,AE_READABLE);
+ }
+}
+
+static void redisAeAddWrite(void *privdata) {
+ redisAeEvents *e = (redisAeEvents*)privdata;
+ aeEventLoop *loop = e->loop;
+ if (!e->writing) {
+ e->writing = 1;
+ aeCreateFileEvent(loop,e->fd,AE_WRITABLE,redisAeWriteEvent,e);
+ }
+}
+
+static void redisAeDelWrite(void *privdata) {
+ redisAeEvents *e = (redisAeEvents*)privdata;
+ aeEventLoop *loop = e->loop;
+ if (e->writing) {
+ e->writing = 0;
+ aeDeleteFileEvent(loop,e->fd,AE_WRITABLE);
+ }
+}
+
+static void redisAeCleanup(void *privdata) {
+ redisAeEvents *e = (redisAeEvents*)privdata;
+ redisAeDelRead(privdata);
+ redisAeDelWrite(privdata);
+ zfree(e);
+}
+
+static int redisAeAttach(aeEventLoop *loop, redisAsyncContext *ac) {
+ redisContext *c = &(ac->c);
+ redisAeEvents *e;
+
+ /* Nothing should be attached when something is already attached */
+ if (ac->ev.data != NULL)
+ return C_ERR;
+
+ /* Create container for context and r/w events */
+ e = (redisAeEvents*)zmalloc(sizeof(*e));
+ e->context = ac;
+ e->loop = loop;
+ e->fd = c->fd;
+ e->reading = e->writing = 0;
+
+ /* Register functions to start/stop listening for events */
+ ac->ev.addRead = redisAeAddRead;
+ ac->ev.delRead = redisAeDelRead;
+ ac->ev.addWrite = redisAeAddWrite;
+ ac->ev.delWrite = redisAeDelWrite;
+ ac->ev.cleanup = redisAeCleanup;
+ ac->ev.data = e;
+
+ return C_OK;
+}
+
+/* ============================= Prototypes ================================= */
+
+void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status);
+void sentinelDisconnectCallback(const redisAsyncContext *c, int status);
+void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata);
+sentinelRedisInstance *sentinelGetMasterByName(char *name);
+char *sentinelGetSubjectiveLeader(sentinelRedisInstance *master);
+char *sentinelGetObjectiveLeader(sentinelRedisInstance *master);
+void instanceLinkConnectionError(const redisAsyncContext *c);
+const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri);
+void sentinelAbortFailover(sentinelRedisInstance *ri);
+void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...);
+sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master);
+void sentinelScheduleScriptExecution(char *path, ...);
+void sentinelStartFailover(sentinelRedisInstance *master);
+void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata);
+int sentinelSendSlaveOf(sentinelRedisInstance *ri, const sentinelAddr *addr);
+char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch);
+int sentinelFlushConfig(void);
+void sentinelGenerateInitialMonitorEvents(void);
+int sentinelSendPing(sentinelRedisInstance *ri);
+int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master);
+sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid);
+void sentinelSimFailureCrash(void);
+
+/* ========================= Dictionary types =============================== */
+
+void releaseSentinelRedisInstance(sentinelRedisInstance *ri);
+
+void dictInstancesValDestructor (dict *d, void *obj) {
+ UNUSED(d);
+ releaseSentinelRedisInstance(obj);
+}
+
+/* Instance name (sds) -> instance (sentinelRedisInstance pointer)
+ *
+ * also used for: sentinelRedisInstance->sentinels dictionary that maps
+ * sentinels ip:port to last seen time in Pub/Sub hello message. */
+dictType instancesDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ dictInstancesValDestructor,/* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Instance runid (sds) -> votes (long casted to void*)
+ *
+ * This is useful into sentinelGetObjectiveLeader() function in order to
+ * count the votes and understand who is the leader. */
+dictType leaderVotesDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Instance renamed commands table. */
+dictType renamedCommandsDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictSdsDestructor, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* =========================== Initialization =============================== */
+
+void sentinelSetCommand(client *c);
+void sentinelConfigGetCommand(client *c);
+void sentinelConfigSetCommand(client *c);
+
+/* this array is used for sentinel config lookup, which need to be loaded
+ * before monitoring masters config to avoid dependency issues */
+const char *preMonitorCfgName[] = {
+ "announce-ip",
+ "announce-port",
+ "deny-scripts-reconfig",
+ "sentinel-user",
+ "sentinel-pass",
+ "current-epoch",
+ "myid",
+ "resolve-hostnames",
+ "announce-hostnames"
+};
+
+/* This function overwrites a few normal Redis config default with Sentinel
+ * specific defaults. */
+void initSentinelConfig(void) {
+ server.port = REDIS_SENTINEL_PORT;
+ server.protected_mode = 0; /* Sentinel must be exposed. */
+}
+
+void freeSentinelLoadQueueEntry(void *item);
+
+/* Perform the Sentinel mode initialization. */
+void initSentinel(void) {
+ /* Initialize various data structures. */
+ sentinel.current_epoch = 0;
+ sentinel.masters = dictCreate(&instancesDictType);
+ sentinel.tilt = 0;
+ sentinel.tilt_start_time = 0;
+ sentinel.previous_time = mstime();
+ sentinel.running_scripts = 0;
+ sentinel.scripts_queue = listCreate();
+ sentinel.announce_ip = NULL;
+ sentinel.announce_port = 0;
+ sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
+ sentinel.deny_scripts_reconfig = SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG;
+ sentinel.sentinel_auth_pass = NULL;
+ sentinel.sentinel_auth_user = NULL;
+ sentinel.resolve_hostnames = SENTINEL_DEFAULT_RESOLVE_HOSTNAMES;
+ sentinel.announce_hostnames = SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES;
+ memset(sentinel.myid,0,sizeof(sentinel.myid));
+ server.sentinel_config = NULL;
+}
+
+/* This function is for checking whether sentinel config file has been set,
+ * also checking whether we have write permissions. */
+void sentinelCheckConfigFile(void) {
+ if (server.configfile == NULL) {
+ serverLog(LL_WARNING,
+ "Sentinel needs config file on disk to save state. Exiting...");
+ exit(1);
+ } else if (access(server.configfile,W_OK) == -1) {
+ serverLog(LL_WARNING,
+ "Sentinel config file %s is not writable: %s. Exiting...",
+ server.configfile,strerror(errno));
+ exit(1);
+ }
+}
+
+/* This function gets called when the server is in Sentinel mode, started,
+ * loaded the configuration, and is ready for normal operations. */
+void sentinelIsRunning(void) {
+ int j;
+
+ /* If this Sentinel has yet no ID set in the configuration file, we
+ * pick a random one and persist the config on disk. From now on this
+ * will be this Sentinel ID across restarts. */
+ for (j = 0; j < CONFIG_RUN_ID_SIZE; j++)
+ if (sentinel.myid[j] != 0) break;
+
+ if (j == CONFIG_RUN_ID_SIZE) {
+ /* Pick ID and persist the config. */
+ getRandomHexChars(sentinel.myid,CONFIG_RUN_ID_SIZE);
+ sentinelFlushConfig();
+ }
+
+ /* Log its ID to make debugging of issues simpler. */
+ serverLog(LL_NOTICE,"Sentinel ID is %s", sentinel.myid);
+
+ /* We want to generate a +monitor event for every configured master
+ * at startup. */
+ sentinelGenerateInitialMonitorEvents();
+}
+
+/* ============================== sentinelAddr ============================== */
+
+/* Create a sentinelAddr object and return it on success.
+ * On error NULL is returned and errno is set to:
+ * ENOENT: Can't resolve the hostname, unless accept_unresolved is non-zero.
+ * EINVAL: Invalid port number.
+ */
+sentinelAddr *createSentinelAddr(char *hostname, int port, int is_accept_unresolved) {
+ char ip[NET_IP_STR_LEN];
+ sentinelAddr *sa;
+
+ if (port < 0 || port > 65535) {
+ errno = EINVAL;
+ return NULL;
+ }
+ if (anetResolve(NULL,hostname,ip,sizeof(ip),
+ sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR) {
+ serverLog(LL_WARNING, "Failed to resolve hostname '%s'", hostname);
+ if (sentinel.resolve_hostnames && is_accept_unresolved) {
+ ip[0] = '\0';
+ }
+ else {
+ errno = ENOENT;
+ return NULL;
+ }
+ }
+ sa = zmalloc(sizeof(*sa));
+ sa->hostname = sdsnew(hostname);
+ sa->ip = sdsnew(ip);
+ sa->port = port;
+ return sa;
+}
+
+/* Return a duplicate of the source address. */
+sentinelAddr *dupSentinelAddr(sentinelAddr *src) {
+ sentinelAddr *sa;
+
+ sa = zmalloc(sizeof(*sa));
+ sa->hostname = sdsnew(src->hostname);
+ sa->ip = sdsnew(src->ip);
+ sa->port = src->port;
+ return sa;
+}
+
+/* Free a Sentinel address. Can't fail. */
+void releaseSentinelAddr(sentinelAddr *sa) {
+ sdsfree(sa->hostname);
+ sdsfree(sa->ip);
+ zfree(sa);
+}
+
+/* Return non-zero if the two addresses are equal, either by address
+ * or by hostname if they could not have been resolved.
+ */
+int sentinelAddrOrHostnameEqual(sentinelAddr *a, sentinelAddr *b) {
+ return a->port == b->port &&
+ (!strcmp(a->ip, b->ip) ||
+ !strcasecmp(a->hostname, b->hostname));
+}
+
+/* Return non-zero if a hostname matches an address. */
+int sentinelAddrEqualsHostname(sentinelAddr *a, char *hostname) {
+ char ip[NET_IP_STR_LEN];
+
+ /* Try resolve the hostname and compare it to the address */
+ if (anetResolve(NULL, hostname, ip, sizeof(ip),
+ sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR) {
+
+ /* If failed resolve then compare based on hostnames. That is our best effort as
+ * long as the server is unavailable for some reason. It is fine since Redis
+ * instance cannot have multiple hostnames for a given setup */
+ return !strcasecmp(sentinel.resolve_hostnames ? a->hostname : a->ip, hostname);
+ }
+ /* Compare based on address */
+ return !strcasecmp(a->ip, ip);
+}
+
+const char *announceSentinelAddr(const sentinelAddr *a) {
+ return sentinel.announce_hostnames ? a->hostname : a->ip;
+}
+
+/* Return an allocated sds with hostname/address:port. IPv6
+ * addresses are bracketed the same way anetFormatAddr() does.
+ */
+sds announceSentinelAddrAndPort(const sentinelAddr *a) {
+ const char *addr = announceSentinelAddr(a);
+ if (strchr(addr, ':') != NULL)
+ return sdscatprintf(sdsempty(), "[%s]:%d", addr, a->port);
+ else
+ return sdscatprintf(sdsempty(), "%s:%d", addr, a->port);
+}
+
+/* =========================== Events notification ========================== */
+
+/* Send an event to log, pub/sub, user notification script.
+ *
+ * 'level' is the log level for logging. Only LL_WARNING events will trigger
+ * the execution of the user notification script.
+ *
+ * 'type' is the message type, also used as a pub/sub channel name.
+ *
+ * 'ri', is the redis instance target of this event if applicable, and is
+ * used to obtain the path of the notification script to execute.
+ *
+ * The remaining arguments are printf-alike.
+ * If the format specifier starts with the two characters "%@" then ri is
+ * not NULL, and the message is prefixed with an instance identifier in the
+ * following format:
+ *
+ * <instance type> <instance name> <ip> <port>
+ *
+ * If the instance type is not master, than the additional string is
+ * added to specify the originating master:
+ *
+ * @ <master name> <master ip> <master port>
+ *
+ * Any other specifier after "%@" is processed by printf itself.
+ */
+void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
+ const char *fmt, ...) {
+ va_list ap;
+ char msg[LOG_MAX_LEN];
+ robj *channel, *payload;
+
+ /* Handle %@ */
+ if (fmt[0] == '%' && fmt[1] == '@') {
+ sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ?
+ NULL : ri->master;
+
+ if (master) {
+ snprintf(msg, sizeof(msg), "%s %s %s %d @ %s %s %d",
+ sentinelRedisInstanceTypeStr(ri),
+ ri->name, announceSentinelAddr(ri->addr), ri->addr->port,
+ master->name, announceSentinelAddr(master->addr), master->addr->port);
+ } else {
+ snprintf(msg, sizeof(msg), "%s %s %s %d",
+ sentinelRedisInstanceTypeStr(ri),
+ ri->name, announceSentinelAddr(ri->addr), ri->addr->port);
+ }
+ fmt += 2;
+ } else {
+ msg[0] = '\0';
+ }
+
+ /* Use vsprintf for the rest of the formatting if any. */
+ if (fmt[0] != '\0') {
+ va_start(ap, fmt);
+ vsnprintf(msg+strlen(msg), sizeof(msg)-strlen(msg), fmt, ap);
+ va_end(ap);
+ }
+
+ /* Log the message if the log level allows it to be logged. */
+ if (level >= server.verbosity)
+ serverLog(level,"%s %s",type,msg);
+
+ /* Publish the message via Pub/Sub if it's not a debugging one. */
+ if (level != LL_DEBUG) {
+ channel = createStringObject(type,strlen(type));
+ payload = createStringObject(msg,strlen(msg));
+ pubsubPublishMessage(channel,payload,0);
+ decrRefCount(channel);
+ decrRefCount(payload);
+ }
+
+ /* Call the notification script if applicable. */
+ if (level == LL_WARNING && ri != NULL) {
+ sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ?
+ ri : ri->master;
+ if (master && master->notification_script) {
+ sentinelScheduleScriptExecution(master->notification_script,
+ type,msg,NULL);
+ }
+ }
+}
+
+/* This function is called only at startup and is used to generate a
+ * +monitor event for every configured master. The same events are also
+ * generated when a master to monitor is added at runtime via the
+ * SENTINEL MONITOR command. */
+void sentinelGenerateInitialMonitorEvents(void) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ sentinelEvent(LL_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
+ }
+ dictReleaseIterator(di);
+}
+
+/* ============================ script execution ============================ */
+
+/* Release a script job structure and all the associated data. */
+void sentinelReleaseScriptJob(sentinelScriptJob *sj) {
+ int j = 0;
+
+ while(sj->argv[j]) sdsfree(sj->argv[j++]);
+ zfree(sj->argv);
+ zfree(sj);
+}
+
+#define SENTINEL_SCRIPT_MAX_ARGS 16
+void sentinelScheduleScriptExecution(char *path, ...) {
+ va_list ap;
+ char *argv[SENTINEL_SCRIPT_MAX_ARGS+1];
+ int argc = 1;
+ sentinelScriptJob *sj;
+
+ va_start(ap, path);
+ while(argc < SENTINEL_SCRIPT_MAX_ARGS) {
+ argv[argc] = va_arg(ap,char*);
+ if (!argv[argc]) break;
+ argv[argc] = sdsnew(argv[argc]); /* Copy the string. */
+ argc++;
+ }
+ va_end(ap);
+ argv[0] = sdsnew(path);
+
+ sj = zmalloc(sizeof(*sj));
+ sj->flags = SENTINEL_SCRIPT_NONE;
+ sj->retry_num = 0;
+ sj->argv = zmalloc(sizeof(char*)*(argc+1));
+ sj->start_time = 0;
+ sj->pid = 0;
+ memcpy(sj->argv,argv,sizeof(char*)*(argc+1));
+
+ listAddNodeTail(sentinel.scripts_queue,sj);
+
+ /* Remove the oldest non running script if we already hit the limit. */
+ if (listLength(sentinel.scripts_queue) > SENTINEL_SCRIPT_MAX_QUEUE) {
+ listNode *ln;
+ listIter li;
+
+ listRewind(sentinel.scripts_queue,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ sj = ln->value;
+
+ if (sj->flags & SENTINEL_SCRIPT_RUNNING) continue;
+ /* The first node is the oldest as we add on tail. */
+ listDelNode(sentinel.scripts_queue,ln);
+ sentinelReleaseScriptJob(sj);
+ break;
+ }
+ serverAssert(listLength(sentinel.scripts_queue) <=
+ SENTINEL_SCRIPT_MAX_QUEUE);
+ }
+}
+
+/* Lookup a script in the scripts queue via pid, and returns the list node
+ * (so that we can easily remove it from the queue if needed). */
+listNode *sentinelGetScriptListNodeByPid(pid_t pid) {
+ listNode *ln;
+ listIter li;
+
+ listRewind(sentinel.scripts_queue,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ sentinelScriptJob *sj = ln->value;
+
+ if ((sj->flags & SENTINEL_SCRIPT_RUNNING) && sj->pid == pid)
+ return ln;
+ }
+ return NULL;
+}
+
+/* Run pending scripts if we are not already at max number of running
+ * scripts. */
+void sentinelRunPendingScripts(void) {
+ listNode *ln;
+ listIter li;
+ mstime_t now = mstime();
+
+ /* Find jobs that are not running and run them, from the top to the
+ * tail of the queue, so we run older jobs first. */
+ listRewind(sentinel.scripts_queue,&li);
+ while (sentinel.running_scripts < SENTINEL_SCRIPT_MAX_RUNNING &&
+ (ln = listNext(&li)) != NULL)
+ {
+ sentinelScriptJob *sj = ln->value;
+ pid_t pid;
+
+ /* Skip if already running. */
+ if (sj->flags & SENTINEL_SCRIPT_RUNNING) continue;
+
+ /* Skip if it's a retry, but not enough time has elapsed. */
+ if (sj->start_time && sj->start_time > now) continue;
+
+ sj->flags |= SENTINEL_SCRIPT_RUNNING;
+ sj->start_time = mstime();
+ sj->retry_num++;
+ pid = fork();
+
+ if (pid == -1) {
+ /* Parent (fork error).
+ * We report fork errors as signal 99, in order to unify the
+ * reporting with other kind of errors. */
+ sentinelEvent(LL_WARNING,"-script-error",NULL,
+ "%s %d %d", sj->argv[0], 99, 0);
+ sj->flags &= ~SENTINEL_SCRIPT_RUNNING;
+ sj->pid = 0;
+ } else if (pid == 0) {
+ /* Child */
+ connTypeCleanupAll();
+ execve(sj->argv[0],sj->argv,environ);
+ /* If we are here an error occurred. */
+ _exit(2); /* Don't retry execution. */
+ } else {
+ sentinel.running_scripts++;
+ sj->pid = pid;
+ sentinelEvent(LL_DEBUG,"+script-child",NULL,"%ld",(long)pid);
+ }
+ }
+}
+
+/* How much to delay the execution of a script that we need to retry after
+ * an error?
+ *
+ * We double the retry delay for every further retry we do. So for instance
+ * if RETRY_DELAY is set to 30 seconds and the max number of retries is 10
+ * starting from the second attempt to execute the script the delays are:
+ * 30 sec, 60 sec, 2 min, 4 min, 8 min, 16 min, 32 min, 64 min, 128 min. */
+mstime_t sentinelScriptRetryDelay(int retry_num) {
+ mstime_t delay = sentinel_script_retry_delay;
+
+ while (retry_num-- > 1) delay *= 2;
+ return delay;
+}
+
+/* Check for scripts that terminated, and remove them from the queue if the
+ * script terminated successfully. If instead the script was terminated by
+ * a signal, or returned exit code "1", it is scheduled to run again if
+ * the max number of retries did not already elapsed. */
+void sentinelCollectTerminatedScripts(void) {
+ int statloc;
+ pid_t pid;
+
+ while ((pid = waitpid(-1, &statloc, WNOHANG)) > 0) {
+ int exitcode = WEXITSTATUS(statloc);
+ int bysignal = 0;
+ listNode *ln;
+ sentinelScriptJob *sj;
+
+ if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
+ sentinelEvent(LL_DEBUG,"-script-child",NULL,"%ld %d %d",
+ (long)pid, exitcode, bysignal);
+
+ ln = sentinelGetScriptListNodeByPid(pid);
+ if (ln == NULL) {
+ serverLog(LL_WARNING,"waitpid() returned a pid (%ld) we can't find in our scripts execution queue!", (long)pid);
+ continue;
+ }
+ sj = ln->value;
+
+ /* If the script was terminated by a signal or returns an
+ * exit code of "1" (that means: please retry), we reschedule it
+ * if the max number of retries is not already reached. */
+ if ((bysignal || exitcode == 1) &&
+ sj->retry_num != SENTINEL_SCRIPT_MAX_RETRY)
+ {
+ sj->flags &= ~SENTINEL_SCRIPT_RUNNING;
+ sj->pid = 0;
+ sj->start_time = mstime() +
+ sentinelScriptRetryDelay(sj->retry_num);
+ } else {
+ /* Otherwise let's remove the script, but log the event if the
+ * execution did not terminated in the best of the ways. */
+ if (bysignal || exitcode != 0) {
+ sentinelEvent(LL_WARNING,"-script-error",NULL,
+ "%s %d %d", sj->argv[0], bysignal, exitcode);
+ }
+ listDelNode(sentinel.scripts_queue,ln);
+ sentinelReleaseScriptJob(sj);
+ }
+ sentinel.running_scripts--;
+ }
+}
+
+/* Kill scripts in timeout, they'll be collected by the
+ * sentinelCollectTerminatedScripts() function. */
+void sentinelKillTimedoutScripts(void) {
+ listNode *ln;
+ listIter li;
+ mstime_t now = mstime();
+
+ listRewind(sentinel.scripts_queue,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ sentinelScriptJob *sj = ln->value;
+
+ if (sj->flags & SENTINEL_SCRIPT_RUNNING &&
+ (now - sj->start_time) > sentinel_script_max_runtime)
+ {
+ sentinelEvent(LL_WARNING,"-script-timeout",NULL,"%s %ld",
+ sj->argv[0], (long)sj->pid);
+ kill(sj->pid,SIGKILL);
+ }
+ }
+}
+
+/* Implements SENTINEL PENDING-SCRIPTS command. */
+void sentinelPendingScriptsCommand(client *c) {
+ listNode *ln;
+ listIter li;
+
+ addReplyArrayLen(c,listLength(sentinel.scripts_queue));
+ listRewind(sentinel.scripts_queue,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ sentinelScriptJob *sj = ln->value;
+ int j = 0;
+
+ addReplyMapLen(c,5);
+
+ addReplyBulkCString(c,"argv");
+ while (sj->argv[j]) j++;
+ addReplyArrayLen(c,j);
+ j = 0;
+ while (sj->argv[j]) addReplyBulkCString(c,sj->argv[j++]);
+
+ addReplyBulkCString(c,"flags");
+ addReplyBulkCString(c,
+ (sj->flags & SENTINEL_SCRIPT_RUNNING) ? "running" : "scheduled");
+
+ addReplyBulkCString(c,"pid");
+ addReplyBulkLongLong(c,sj->pid);
+
+ if (sj->flags & SENTINEL_SCRIPT_RUNNING) {
+ addReplyBulkCString(c,"run-time");
+ addReplyBulkLongLong(c,mstime() - sj->start_time);
+ } else {
+ mstime_t delay = sj->start_time ? (sj->start_time-mstime()) : 0;
+ if (delay < 0) delay = 0;
+ addReplyBulkCString(c,"run-delay");
+ addReplyBulkLongLong(c,delay);
+ }
+
+ addReplyBulkCString(c,"retry-num");
+ addReplyBulkLongLong(c,sj->retry_num);
+ }
+}
+
+/* This function calls, if any, the client reconfiguration script with the
+ * following parameters:
+ *
+ * <master-name> <role> <state> <from-ip> <from-port> <to-ip> <to-port>
+ *
+ * It is called every time a failover is performed.
+ *
+ * <state> is currently always "start".
+ * <role> is either "leader" or "observer".
+ *
+ * from/to fields are respectively master -> promoted slave addresses for
+ * "start" and "end". */
+void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, char *state, sentinelAddr *from, sentinelAddr *to) {
+ char fromport[32], toport[32];
+
+ if (master->client_reconfig_script == NULL) return;
+ ll2string(fromport,sizeof(fromport),from->port);
+ ll2string(toport,sizeof(toport),to->port);
+ sentinelScheduleScriptExecution(master->client_reconfig_script,
+ master->name,
+ (role == SENTINEL_LEADER) ? "leader" : "observer",
+ state, announceSentinelAddr(from), fromport,
+ announceSentinelAddr(to), toport, NULL);
+}
+
+/* =============================== instanceLink ============================= */
+
+/* Create a not yet connected link object. */
+instanceLink *createInstanceLink(void) {
+ instanceLink *link = zmalloc(sizeof(*link));
+
+ link->refcount = 1;
+ link->disconnected = 1;
+ link->pending_commands = 0;
+ link->cc = NULL;
+ link->pc = NULL;
+ link->cc_conn_time = 0;
+ link->pc_conn_time = 0;
+ link->last_reconn_time = 0;
+ link->pc_last_activity = 0;
+ /* We set the act_ping_time to "now" even if we actually don't have yet
+ * a connection with the node, nor we sent a ping.
+ * This is useful to detect a timeout in case we'll not be able to connect
+ * with the node at all. */
+ link->act_ping_time = mstime();
+ link->last_ping_time = 0;
+ link->last_avail_time = mstime();
+ link->last_pong_time = mstime();
+ return link;
+}
+
+/* Disconnect a hiredis connection in the context of an instance link. */
+void instanceLinkCloseConnection(instanceLink *link, redisAsyncContext *c) {
+ if (c == NULL) return;
+
+ if (link->cc == c) {
+ link->cc = NULL;
+ link->pending_commands = 0;
+ }
+ if (link->pc == c) link->pc = NULL;
+ c->data = NULL;
+ link->disconnected = 1;
+ redisAsyncFree(c);
+}
+
+/* Decrement the refcount of a link object, if it drops to zero, actually
+ * free it and return NULL. Otherwise don't do anything and return the pointer
+ * to the object.
+ *
+ * If we are not going to free the link and ri is not NULL, we rebind all the
+ * pending requests in link->cc (hiredis connection for commands) to a
+ * callback that will just ignore them. This is useful to avoid processing
+ * replies for an instance that no longer exists. */
+instanceLink *releaseInstanceLink(instanceLink *link, sentinelRedisInstance *ri)
+{
+ serverAssert(link->refcount > 0);
+ link->refcount--;
+ if (link->refcount != 0) {
+ if (ri && ri->link->cc) {
+ /* This instance may have pending callbacks in the hiredis async
+ * context, having as 'privdata' the instance that we are going to
+ * free. Let's rewrite the callback list, directly exploiting
+ * hiredis internal data structures, in order to bind them with
+ * a callback that will ignore the reply at all. */
+ redisCallback *cb;
+ redisCallbackList *callbacks = &link->cc->replies;
+
+ cb = callbacks->head;
+ while(cb) {
+ if (cb->privdata == ri) {
+ cb->fn = sentinelDiscardReplyCallback;
+ cb->privdata = NULL; /* Not strictly needed. */
+ }
+ cb = cb->next;
+ }
+ }
+ return link; /* Other active users. */
+ }
+
+ instanceLinkCloseConnection(link,link->cc);
+ instanceLinkCloseConnection(link,link->pc);
+ zfree(link);
+ return NULL;
+}
+
+/* This function will attempt to share the instance link we already have
+ * for the same Sentinel in the context of a different master, with the
+ * instance we are passing as argument.
+ *
+ * This way multiple Sentinel objects that refer all to the same physical
+ * Sentinel instance but in the context of different masters will use
+ * a single connection, will send a single PING per second for failure
+ * detection and so forth.
+ *
+ * Return C_OK if a matching Sentinel was found in the context of a
+ * different master and sharing was performed. Otherwise C_ERR
+ * is returned. */
+int sentinelTryConnectionSharing(sentinelRedisInstance *ri) {
+ serverAssert(ri->flags & SRI_SENTINEL);
+ dictIterator *di;
+ dictEntry *de;
+
+ if (ri->runid == NULL) return C_ERR; /* No way to identify it. */
+ if (ri->link->refcount > 1) return C_ERR; /* Already shared. */
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *master = dictGetVal(de), *match;
+ /* We want to share with the same physical Sentinel referenced
+ * in other masters, so skip our master. */
+ if (master == ri->master) continue;
+ match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels,
+ NULL,0,ri->runid);
+ if (match == NULL) continue; /* No match. */
+ if (match == ri) continue; /* Should never happen but... safer. */
+
+ /* We identified a matching Sentinel, great! Let's free our link
+ * and use the one of the matching Sentinel. */
+ releaseInstanceLink(ri->link,NULL);
+ ri->link = match->link;
+ match->link->refcount++;
+ dictReleaseIterator(di);
+ return C_OK;
+ }
+ dictReleaseIterator(di);
+ return C_ERR;
+}
+
+/* Disconnect the relevant master and its replicas. */
+void dropInstanceConnections(sentinelRedisInstance *ri) {
+ serverAssert(ri->flags & SRI_MASTER);
+
+ /* Disconnect with the master. */
+ instanceLinkCloseConnection(ri->link, ri->link->cc);
+ instanceLinkCloseConnection(ri->link, ri->link->pc);
+
+ /* Disconnect with all replicas. */
+ dictIterator *di;
+ dictEntry *de;
+ sentinelRedisInstance *repl_ri;
+ di = dictGetIterator(ri->slaves);
+ while ((de = dictNext(di)) != NULL) {
+ repl_ri = dictGetVal(de);
+ instanceLinkCloseConnection(repl_ri->link, repl_ri->link->cc);
+ instanceLinkCloseConnection(repl_ri->link, repl_ri->link->pc);
+ }
+ dictReleaseIterator(di);
+}
+
+/* Drop all connections to other sentinels. Returns the number of connections
+ * dropped.*/
+int sentinelDropConnections(void) {
+ dictIterator *di;
+ dictEntry *de;
+ int dropped = 0;
+
+ di = dictGetIterator(sentinel.masters);
+ while ((de = dictNext(di)) != NULL) {
+ dictIterator *sdi;
+ dictEntry *sde;
+
+ sentinelRedisInstance *ri = dictGetVal(de);
+ sdi = dictGetIterator(ri->sentinels);
+ while ((sde = dictNext(sdi)) != NULL) {
+ sentinelRedisInstance *si = dictGetVal(sde);
+ if (!si->link->disconnected) {
+ instanceLinkCloseConnection(si->link, si->link->pc);
+ instanceLinkCloseConnection(si->link, si->link->cc);
+ dropped++;
+ }
+ }
+ dictReleaseIterator(sdi);
+ }
+ dictReleaseIterator(di);
+
+ return dropped;
+}
+
+/* When we detect a Sentinel to switch address (reporting a different IP/port
+ * pair in Hello messages), let's update all the matching Sentinels in the
+ * context of other masters as well and disconnect the links, so that everybody
+ * will be updated.
+ *
+ * Return the number of updated Sentinel addresses. */
+int sentinelUpdateSentinelAddressInAllMasters(sentinelRedisInstance *ri) {
+ serverAssert(ri->flags & SRI_SENTINEL);
+ dictIterator *di;
+ dictEntry *de;
+ int reconfigured = 0;
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *master = dictGetVal(de), *match;
+ match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels,
+ NULL,0,ri->runid);
+ /* If there is no match, this master does not know about this
+ * Sentinel, try with the next one. */
+ if (match == NULL) continue;
+
+ /* Disconnect the old links if connected. */
+ if (match->link->cc != NULL)
+ instanceLinkCloseConnection(match->link,match->link->cc);
+ if (match->link->pc != NULL)
+ instanceLinkCloseConnection(match->link,match->link->pc);
+
+ if (match == ri) continue; /* Address already updated for it. */
+
+ /* Update the address of the matching Sentinel by copying the address
+ * of the Sentinel object that received the address update. */
+ releaseSentinelAddr(match->addr);
+ match->addr = dupSentinelAddr(ri->addr);
+ reconfigured++;
+ }
+ dictReleaseIterator(di);
+ if (reconfigured)
+ sentinelEvent(LL_NOTICE,"+sentinel-address-update", ri,
+ "%@ %d additional matching instances", reconfigured);
+ return reconfigured;
+}
+
+/* This function is called when a hiredis connection reported an error.
+ * We set it to NULL and mark the link as disconnected so that it will be
+ * reconnected again.
+ *
+ * Note: we don't free the hiredis context as hiredis will do it for us
+ * for async connections. */
+void instanceLinkConnectionError(const redisAsyncContext *c) {
+ instanceLink *link = c->data;
+ int pubsub;
+
+ if (!link) return;
+
+ pubsub = (link->pc == c);
+ if (pubsub)
+ link->pc = NULL;
+ else
+ link->cc = NULL;
+ link->disconnected = 1;
+}
+
+/* Hiredis connection established / disconnected callbacks. We need them
+ * just to cleanup our link state. */
+void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status) {
+ if (status != C_OK) instanceLinkConnectionError(c);
+}
+
+void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
+ UNUSED(status);
+ instanceLinkConnectionError(c);
+}
+
+/* ========================== sentinelRedisInstance ========================= */
+
+/* Create a redis instance, the following fields must be populated by the
+ * caller if needed:
+ * runid: set to NULL but will be populated once INFO output is received.
+ * info_refresh: is set to 0 to mean that we never received INFO so far.
+ *
+ * If SRI_MASTER is set into initial flags the instance is added to
+ * sentinel.masters table.
+ *
+ * if SRI_SLAVE or SRI_SENTINEL is set then 'master' must be not NULL and the
+ * instance is added into master->slaves or master->sentinels table.
+ *
+ * If the instance is a slave, the name parameter is ignored and is created
+ * automatically as ip/hostname:port.
+ *
+ * The function fails if hostname can't be resolved or port is out of range.
+ * When this happens NULL is returned and errno is set accordingly to the
+ * createSentinelAddr() function.
+ *
+ * The function may also fail and return NULL with errno set to EBUSY if
+ * a master with the same name, a slave with the same address, or a sentinel
+ * with the same ID already exists. */
+
+sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *hostname, int port, int quorum, sentinelRedisInstance *master) {
+ sentinelRedisInstance *ri;
+ sentinelAddr *addr;
+ dict *table = NULL;
+ sds sdsname;
+
+ serverAssert(flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL));
+ serverAssert((flags & SRI_MASTER) || master != NULL);
+
+ /* Check address validity. */
+ addr = createSentinelAddr(hostname,port,1);
+ if (addr == NULL) return NULL;
+
+ /* For slaves use ip/host:port as name. */
+ if (flags & SRI_SLAVE)
+ sdsname = announceSentinelAddrAndPort(addr);
+ else
+ sdsname = sdsnew(name);
+
+ /* Make sure the entry is not duplicated. This may happen when the same
+ * name for a master is used multiple times inside the configuration or
+ * if we try to add multiple times a slave or sentinel with same ip/port
+ * to a master. */
+ if (flags & SRI_MASTER) table = sentinel.masters;
+ else if (flags & SRI_SLAVE) table = master->slaves;
+ else if (flags & SRI_SENTINEL) table = master->sentinels;
+ if (dictFind(table,sdsname)) {
+ releaseSentinelAddr(addr);
+ sdsfree(sdsname);
+ errno = EBUSY;
+ return NULL;
+ }
+
+ /* Create the instance object. */
+ ri = zmalloc(sizeof(*ri));
+ /* Note that all the instances are started in the disconnected state,
+ * the event loop will take care of connecting them. */
+ ri->flags = flags;
+ ri->name = sdsname;
+ ri->runid = NULL;
+ ri->config_epoch = 0;
+ ri->addr = addr;
+ ri->link = createInstanceLink();
+ ri->last_pub_time = mstime();
+ ri->last_hello_time = mstime();
+ ri->last_master_down_reply_time = mstime();
+ ri->s_down_since_time = 0;
+ ri->o_down_since_time = 0;
+ ri->down_after_period = master ? master->down_after_period : sentinel_default_down_after;
+ ri->master_reboot_down_after_period = 0;
+ ri->master_link_down_time = 0;
+ ri->auth_pass = NULL;
+ ri->auth_user = NULL;
+ ri->slave_priority = SENTINEL_DEFAULT_SLAVE_PRIORITY;
+ ri->replica_announced = 1;
+ ri->slave_reconf_sent_time = 0;
+ ri->slave_master_host = NULL;
+ ri->slave_master_port = 0;
+ ri->slave_master_link_status = SENTINEL_MASTER_LINK_STATUS_DOWN;
+ ri->slave_repl_offset = 0;
+ ri->sentinels = dictCreate(&instancesDictType);
+ ri->quorum = quorum;
+ ri->parallel_syncs = SENTINEL_DEFAULT_PARALLEL_SYNCS;
+ ri->master = master;
+ ri->slaves = dictCreate(&instancesDictType);
+ ri->info_refresh = 0;
+ ri->renamed_commands = dictCreate(&renamedCommandsDictType);
+
+ /* Failover state. */
+ ri->leader = NULL;
+ ri->leader_epoch = 0;
+ ri->failover_epoch = 0;
+ ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
+ ri->failover_state_change_time = 0;
+ ri->failover_start_time = 0;
+ ri->failover_timeout = sentinel_default_failover_timeout;
+ ri->failover_delay_logged = 0;
+ ri->promoted_slave = NULL;
+ ri->notification_script = NULL;
+ ri->client_reconfig_script = NULL;
+ ri->info = NULL;
+
+ /* Role */
+ ri->role_reported = ri->flags & (SRI_MASTER|SRI_SLAVE);
+ ri->role_reported_time = mstime();
+ ri->slave_conf_change_time = mstime();
+
+ /* Add into the right table. */
+ dictAdd(table, ri->name, ri);
+ return ri;
+}
+
+/* Release this instance and all its slaves, sentinels, hiredis connections.
+ * This function does not take care of unlinking the instance from the main
+ * masters table (if it is a master) or from its master sentinels/slaves table
+ * if it is a slave or sentinel. */
+void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
+ /* Release all its slaves or sentinels if any. */
+ dictRelease(ri->sentinels);
+ dictRelease(ri->slaves);
+
+ /* Disconnect the instance. */
+ releaseInstanceLink(ri->link,ri);
+
+ /* Free other resources. */
+ sdsfree(ri->name);
+ sdsfree(ri->runid);
+ sdsfree(ri->notification_script);
+ sdsfree(ri->client_reconfig_script);
+ sdsfree(ri->slave_master_host);
+ sdsfree(ri->leader);
+ sdsfree(ri->auth_pass);
+ sdsfree(ri->auth_user);
+ sdsfree(ri->info);
+ releaseSentinelAddr(ri->addr);
+ dictRelease(ri->renamed_commands);
+
+ /* Clear state into the master if needed. */
+ if ((ri->flags & SRI_SLAVE) && (ri->flags & SRI_PROMOTED) && ri->master)
+ ri->master->promoted_slave = NULL;
+
+ zfree(ri);
+}
+
+/* Lookup a slave in a master Redis instance, by ip and port. */
+sentinelRedisInstance *sentinelRedisInstanceLookupSlave(
+ sentinelRedisInstance *ri, char *slave_addr, int port)
+{
+ sds key;
+ sentinelRedisInstance *slave;
+ sentinelAddr *addr;
+
+ serverAssert(ri->flags & SRI_MASTER);
+
+ /* We need to handle a slave_addr that is potentially a hostname.
+ * If that is the case, depending on configuration we either resolve
+ * it and use the IP address or fail.
+ */
+ addr = createSentinelAddr(slave_addr, port, 0);
+ if (!addr) return NULL;
+ key = announceSentinelAddrAndPort(addr);
+ releaseSentinelAddr(addr);
+
+ slave = dictFetchValue(ri->slaves,key);
+ sdsfree(key);
+ return slave;
+}
+
+/* Return the name of the type of the instance as a string. */
+const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri) {
+ if (ri->flags & SRI_MASTER) return "master";
+ else if (ri->flags & SRI_SLAVE) return "slave";
+ else if (ri->flags & SRI_SENTINEL) return "sentinel";
+ else return "unknown";
+}
+
+/* This function remove the Sentinel with the specified ID from the
+ * specified master.
+ *
+ * If "runid" is NULL the function returns ASAP.
+ *
+ * This function is useful because on Sentinels address switch, we want to
+ * remove our old entry and add a new one for the same ID but with the new
+ * address.
+ *
+ * The function returns 1 if the matching Sentinel was removed, otherwise
+ * 0 if there was no Sentinel with this ID. */
+int removeMatchingSentinelFromMaster(sentinelRedisInstance *master, char *runid) {
+ dictIterator *di;
+ dictEntry *de;
+ int removed = 0;
+
+ if (runid == NULL) return 0;
+
+ di = dictGetSafeIterator(master->sentinels);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ if (ri->runid && strcmp(ri->runid,runid) == 0) {
+ dictDelete(master->sentinels,ri->name);
+ removed++;
+ }
+ }
+ dictReleaseIterator(di);
+ return removed;
+}
+
+/* Search an instance with the same runid, ip and port into a dictionary
+ * of instances. Return NULL if not found, otherwise return the instance
+ * pointer.
+ *
+ * runid or addr can be NULL. In such a case the search is performed only
+ * by the non-NULL field. */
+sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *addr, int port, char *runid) {
+ dictIterator *di;
+ dictEntry *de;
+ sentinelRedisInstance *instance = NULL;
+ sentinelAddr *ri_addr = NULL;
+
+ serverAssert(addr || runid); /* User must pass at least one search param. */
+ if (addr != NULL) {
+ /* Try to resolve addr. If hostnames are used, we're accepting an ri_addr
+ * that contains an hostname only and can still be matched based on that.
+ */
+ ri_addr = createSentinelAddr(addr,port,1);
+ if (!ri_addr) return NULL;
+ }
+ di = dictGetIterator(instances);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ if (runid && !ri->runid) continue;
+ if ((runid == NULL || strcmp(ri->runid, runid) == 0) &&
+ (addr == NULL || sentinelAddrOrHostnameEqual(ri->addr, ri_addr)))
+ {
+ instance = ri;
+ break;
+ }
+ }
+ dictReleaseIterator(di);
+ if (ri_addr != NULL)
+ releaseSentinelAddr(ri_addr);
+
+ return instance;
+}
+
+/* Master lookup by name */
+sentinelRedisInstance *sentinelGetMasterByName(char *name) {
+ sentinelRedisInstance *ri;
+ sds sdsname = sdsnew(name);
+
+ ri = dictFetchValue(sentinel.masters,sdsname);
+ sdsfree(sdsname);
+ return ri;
+}
+
+/* Reset the state of a monitored master:
+ * 1) Remove all slaves.
+ * 2) Remove all sentinels.
+ * 3) Remove most of the flags resulting from runtime operations.
+ * 4) Reset timers to their default value. For example after a reset it will be
+ * possible to failover again the same master ASAP, without waiting the
+ * failover timeout delay.
+ * 5) In the process of doing this undo the failover if in progress.
+ * 6) Disconnect the connections with the master (will reconnect automatically).
+ */
+
+#define SENTINEL_RESET_NO_SENTINELS (1<<0)
+void sentinelResetMaster(sentinelRedisInstance *ri, int flags) {
+ serverAssert(ri->flags & SRI_MASTER);
+ dictRelease(ri->slaves);
+ ri->slaves = dictCreate(&instancesDictType);
+ if (!(flags & SENTINEL_RESET_NO_SENTINELS)) {
+ dictRelease(ri->sentinels);
+ ri->sentinels = dictCreate(&instancesDictType);
+ }
+ instanceLinkCloseConnection(ri->link,ri->link->cc);
+ instanceLinkCloseConnection(ri->link,ri->link->pc);
+ ri->flags &= SRI_MASTER;
+ if (ri->leader) {
+ sdsfree(ri->leader);
+ ri->leader = NULL;
+ }
+ ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
+ ri->failover_state_change_time = 0;
+ ri->failover_start_time = 0; /* We can failover again ASAP. */
+ ri->promoted_slave = NULL;
+ sdsfree(ri->runid);
+ sdsfree(ri->slave_master_host);
+ ri->runid = NULL;
+ ri->slave_master_host = NULL;
+ ri->link->act_ping_time = mstime();
+ ri->link->last_ping_time = 0;
+ ri->link->last_avail_time = mstime();
+ ri->link->last_pong_time = mstime();
+ ri->role_reported_time = mstime();
+ ri->role_reported = SRI_MASTER;
+ if (flags & SENTINEL_GENERATE_EVENT)
+ sentinelEvent(LL_WARNING,"+reset-master",ri,"%@");
+}
+
+/* Call sentinelResetMaster() on every master with a name matching the specified
+ * pattern. */
+int sentinelResetMastersByPattern(char *pattern, int flags) {
+ dictIterator *di;
+ dictEntry *de;
+ int reset = 0;
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ if (ri->name) {
+ if (stringmatch(pattern,ri->name,0)) {
+ sentinelResetMaster(ri,flags);
+ reset++;
+ }
+ }
+ }
+ dictReleaseIterator(di);
+ return reset;
+}
+
+/* Reset the specified master with sentinelResetMaster(), and also change
+ * the ip:port address, but take the name of the instance unmodified.
+ *
+ * This is used to handle the +switch-master event.
+ *
+ * The function returns C_ERR if the address can't be resolved for some
+ * reason. Otherwise C_OK is returned. */
+int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *hostname, int port) {
+ sentinelAddr *oldaddr, *newaddr;
+ sentinelAddr **slaves = NULL;
+ int numslaves = 0, j;
+ dictIterator *di;
+ dictEntry *de;
+
+ newaddr = createSentinelAddr(hostname,port,0);
+ if (newaddr == NULL) return C_ERR;
+
+ /* There can be only 0 or 1 slave that has the newaddr.
+ * and It can add old master 1 more slave.
+ * so It allocates dictSize(master->slaves) + 1 */
+ slaves = zmalloc(sizeof(sentinelAddr*)*(dictSize(master->slaves) + 1));
+
+ /* Don't include the one having the address we are switching to. */
+ di = dictGetIterator(master->slaves);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *slave = dictGetVal(de);
+
+ if (sentinelAddrOrHostnameEqual(slave->addr,newaddr)) continue;
+ slaves[numslaves++] = dupSentinelAddr(slave->addr);
+ }
+ dictReleaseIterator(di);
+
+ /* If we are switching to a different address, include the old address
+ * as a slave as well, so that we'll be able to sense / reconfigure
+ * the old master. */
+ if (!sentinelAddrOrHostnameEqual(newaddr,master->addr)) {
+ slaves[numslaves++] = dupSentinelAddr(master->addr);
+ }
+
+ /* Reset and switch address. */
+ sentinelResetMaster(master,SENTINEL_RESET_NO_SENTINELS);
+ oldaddr = master->addr;
+ master->addr = newaddr;
+ master->o_down_since_time = 0;
+ master->s_down_since_time = 0;
+
+ /* Add slaves back. */
+ for (j = 0; j < numslaves; j++) {
+ sentinelRedisInstance *slave;
+
+ slave = createSentinelRedisInstance(NULL,SRI_SLAVE,slaves[j]->hostname,
+ slaves[j]->port, master->quorum, master);
+ releaseSentinelAddr(slaves[j]);
+ if (slave) sentinelEvent(LL_NOTICE,"+slave",slave,"%@");
+ }
+ zfree(slaves);
+
+ /* Release the old address at the end so we are safe even if the function
+ * gets the master->addr->ip and master->addr->port as arguments. */
+ releaseSentinelAddr(oldaddr);
+ sentinelFlushConfig();
+ return C_OK;
+}
+
+/* Return non-zero if there was no SDOWN or ODOWN error associated to this
+ * instance in the latest 'ms' milliseconds. */
+int sentinelRedisInstanceNoDownFor(sentinelRedisInstance *ri, mstime_t ms) {
+ mstime_t most_recent;
+
+ most_recent = ri->s_down_since_time;
+ if (ri->o_down_since_time > most_recent)
+ most_recent = ri->o_down_since_time;
+ return most_recent == 0 || (mstime() - most_recent) > ms;
+}
+
+/* Return the current master address, that is, its address or the address
+ * of the promoted slave if already operational. */
+sentinelAddr *sentinelGetCurrentMasterAddress(sentinelRedisInstance *master) {
+ /* If we are failing over the master, and the state is already
+ * SENTINEL_FAILOVER_STATE_RECONF_SLAVES or greater, it means that we
+ * already have the new configuration epoch in the master, and the
+ * slave acknowledged the configuration switch. Advertise the new
+ * address. */
+ if ((master->flags & SRI_FAILOVER_IN_PROGRESS) &&
+ master->promoted_slave &&
+ master->failover_state >= SENTINEL_FAILOVER_STATE_RECONF_SLAVES)
+ {
+ return master->promoted_slave->addr;
+ } else {
+ return master->addr;
+ }
+}
+
+/* This function sets the down_after_period field value in 'master' to all
+ * the slaves and sentinel instances connected to this master. */
+void sentinelPropagateDownAfterPeriod(sentinelRedisInstance *master) {
+ dictIterator *di;
+ dictEntry *de;
+ int j;
+ dict *d[] = {master->slaves, master->sentinels, NULL};
+
+ for (j = 0; d[j]; j++) {
+ di = dictGetIterator(d[j]);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ ri->down_after_period = master->down_after_period;
+ }
+ dictReleaseIterator(di);
+ }
+}
+
+/* This function is used in order to send commands to Redis instances: the
+ * commands we send from Sentinel may be renamed, a common case is a master
+ * with CONFIG and SLAVEOF commands renamed for security concerns. In that
+ * case we check the ri->renamed_command table (or if the instance is a slave,
+ * we check the one of the master), and map the command that we should send
+ * to the set of renamed commands. However, if the command was not renamed,
+ * we just return "command" itself. */
+char *sentinelInstanceMapCommand(sentinelRedisInstance *ri, char *command) {
+ sds sc = sdsnew(command);
+ if (ri->master) ri = ri->master;
+ char *retval = dictFetchValue(ri->renamed_commands, sc);
+ sdsfree(sc);
+ return retval ? retval : command;
+}
+
+/* ============================ Config handling ============================= */
+
+/* Generalise handling create instance error. Use SRI_MASTER, SRI_SLAVE or
+ * SRI_SENTINEL as a role value. */
+const char *sentinelCheckCreateInstanceErrors(int role) {
+ switch(errno) {
+ case EBUSY:
+ switch (role) {
+ case SRI_MASTER:
+ return "Duplicate master name.";
+ case SRI_SLAVE:
+ return "Duplicate hostname and port for replica.";
+ case SRI_SENTINEL:
+ return "Duplicate runid for sentinel.";
+ default:
+ serverAssert(0);
+ break;
+ }
+ break;
+ case ENOENT:
+ return "Can't resolve instance hostname.";
+ case EINVAL:
+ return "Invalid port number.";
+ default:
+ return "Unknown Error for creating instances.";
+ }
+}
+
+/* init function for server.sentinel_config */
+void initializeSentinelConfig(void) {
+ server.sentinel_config = zmalloc(sizeof(struct sentinelConfig));
+ server.sentinel_config->monitor_cfg = listCreate();
+ server.sentinel_config->pre_monitor_cfg = listCreate();
+ server.sentinel_config->post_monitor_cfg = listCreate();
+ listSetFreeMethod(server.sentinel_config->monitor_cfg,freeSentinelLoadQueueEntry);
+ listSetFreeMethod(server.sentinel_config->pre_monitor_cfg,freeSentinelLoadQueueEntry);
+ listSetFreeMethod(server.sentinel_config->post_monitor_cfg,freeSentinelLoadQueueEntry);
+}
+
+/* destroy function for server.sentinel_config */
+void freeSentinelConfig(void) {
+ /* release these three config queues since we will not use it anymore */
+ listRelease(server.sentinel_config->pre_monitor_cfg);
+ listRelease(server.sentinel_config->monitor_cfg);
+ listRelease(server.sentinel_config->post_monitor_cfg);
+ zfree(server.sentinel_config);
+ server.sentinel_config = NULL;
+}
+
+/* Search config name in pre monitor config name array, return 1 if found,
+ * 0 if not found. */
+int searchPreMonitorCfgName(const char *name) {
+ for (unsigned int i = 0; i < sizeof(preMonitorCfgName)/sizeof(preMonitorCfgName[0]); i++) {
+ if (!strcasecmp(preMonitorCfgName[i],name)) return 1;
+ }
+ return 0;
+}
+
+/* free method for sentinelLoadQueueEntry when release the list */
+void freeSentinelLoadQueueEntry(void *item) {
+ struct sentinelLoadQueueEntry *entry = item;
+ sdsfreesplitres(entry->argv,entry->argc);
+ sdsfree(entry->line);
+ zfree(entry);
+}
+
+/* This function is used for queuing sentinel configuration, the main
+ * purpose of this function is to delay parsing the sentinel config option
+ * in order to avoid the order dependent issue from the config. */
+void queueSentinelConfig(sds *argv, int argc, int linenum, sds line) {
+ int i;
+ struct sentinelLoadQueueEntry *entry;
+
+ /* initialize sentinel_config for the first call */
+ if (server.sentinel_config == NULL) initializeSentinelConfig();
+
+ entry = zmalloc(sizeof(struct sentinelLoadQueueEntry));
+ entry->argv = zmalloc(sizeof(char*)*argc);
+ entry->argc = argc;
+ entry->linenum = linenum;
+ entry->line = sdsdup(line);
+ for (i = 0; i < argc; i++) {
+ entry->argv[i] = sdsdup(argv[i]);
+ }
+ /* Separate config lines with pre monitor config, monitor config and
+ * post monitor config, in order to parsing config dependencies
+ * correctly. */
+ if (!strcasecmp(argv[0],"monitor")) {
+ listAddNodeTail(server.sentinel_config->monitor_cfg,entry);
+ } else if (searchPreMonitorCfgName(argv[0])) {
+ listAddNodeTail(server.sentinel_config->pre_monitor_cfg,entry);
+ } else{
+ listAddNodeTail(server.sentinel_config->post_monitor_cfg,entry);
+ }
+}
+
+/* This function is used for loading the sentinel configuration from
+ * pre_monitor_cfg, monitor_cfg and post_monitor_cfg list */
+void loadSentinelConfigFromQueue(void) {
+ const char *err = NULL;
+ listIter li;
+ listNode *ln;
+ int linenum = 0;
+ sds line = NULL;
+ unsigned int j;
+
+ /* if there is no sentinel_config entry, we can return immediately */
+ if (server.sentinel_config == NULL) return;
+
+ list *sentinel_configs[3] = {
+ server.sentinel_config->pre_monitor_cfg,
+ server.sentinel_config->monitor_cfg,
+ server.sentinel_config->post_monitor_cfg
+ };
+ /* loading from pre monitor config queue first to avoid dependency issues
+ * loading from monitor config queue
+ * loading from the post monitor config queue */
+ for (j = 0; j < sizeof(sentinel_configs) / sizeof(sentinel_configs[0]); j++) {
+ listRewind(sentinel_configs[j],&li);
+ while((ln = listNext(&li))) {
+ struct sentinelLoadQueueEntry *entry = ln->value;
+ err = sentinelHandleConfiguration(entry->argv,entry->argc);
+ if (err) {
+ linenum = entry->linenum;
+ line = entry->line;
+ goto loaderr;
+ }
+ }
+ }
+
+ /* free sentinel_config when config loading is finished */
+ freeSentinelConfig();
+ return;
+
+loaderr:
+ fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR (Redis %s) ***\n",
+ REDIS_VERSION);
+ fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
+ fprintf(stderr, ">>> '%s'\n", line);
+ fprintf(stderr, "%s\n", err);
+ exit(1);
+}
+
+const char *sentinelHandleConfiguration(char **argv, int argc) {
+
+ sentinelRedisInstance *ri;
+
+ if (!strcasecmp(argv[0],"monitor") && argc == 5) {
+ /* monitor <name> <host> <port> <quorum> */
+ int quorum = atoi(argv[4]);
+
+ if (quorum <= 0) return "Quorum must be 1 or greater.";
+ if (createSentinelRedisInstance(argv[1],SRI_MASTER,argv[2],
+ atoi(argv[3]),quorum,NULL) == NULL)
+ {
+ return sentinelCheckCreateInstanceErrors(SRI_MASTER);
+ }
+ } else if (!strcasecmp(argv[0],"down-after-milliseconds") && argc == 3) {
+ /* down-after-milliseconds <name> <milliseconds> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ ri->down_after_period = atoi(argv[2]);
+ if (ri->down_after_period <= 0)
+ return "negative or zero time parameter.";
+ sentinelPropagateDownAfterPeriod(ri);
+ } else if (!strcasecmp(argv[0],"failover-timeout") && argc == 3) {
+ /* failover-timeout <name> <milliseconds> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ ri->failover_timeout = atoi(argv[2]);
+ if (ri->failover_timeout <= 0)
+ return "negative or zero time parameter.";
+ } else if (!strcasecmp(argv[0],"parallel-syncs") && argc == 3) {
+ /* parallel-syncs <name> <milliseconds> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ ri->parallel_syncs = atoi(argv[2]);
+ } else if (!strcasecmp(argv[0],"notification-script") && argc == 3) {
+ /* notification-script <name> <path> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ if (access(argv[2],X_OK) == -1)
+ return "Notification script seems non existing or non executable.";
+ ri->notification_script = sdsnew(argv[2]);
+ } else if (!strcasecmp(argv[0],"client-reconfig-script") && argc == 3) {
+ /* client-reconfig-script <name> <path> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ if (access(argv[2],X_OK) == -1)
+ return "Client reconfiguration script seems non existing or "
+ "non executable.";
+ ri->client_reconfig_script = sdsnew(argv[2]);
+ } else if (!strcasecmp(argv[0],"auth-pass") && argc == 3) {
+ /* auth-pass <name> <password> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ ri->auth_pass = sdsnew(argv[2]);
+ } else if (!strcasecmp(argv[0],"auth-user") && argc == 3) {
+ /* auth-user <name> <username> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ ri->auth_user = sdsnew(argv[2]);
+ } else if (!strcasecmp(argv[0],"current-epoch") && argc == 2) {
+ /* current-epoch <epoch> */
+ unsigned long long current_epoch = strtoull(argv[1],NULL,10);
+ if (current_epoch > sentinel.current_epoch)
+ sentinel.current_epoch = current_epoch;
+ } else if (!strcasecmp(argv[0],"myid") && argc == 2) {
+ if (strlen(argv[1]) != CONFIG_RUN_ID_SIZE)
+ return "Malformed Sentinel id in myid option.";
+ memcpy(sentinel.myid,argv[1],CONFIG_RUN_ID_SIZE);
+ } else if (!strcasecmp(argv[0],"config-epoch") && argc == 3) {
+ /* config-epoch <name> <epoch> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ ri->config_epoch = strtoull(argv[2],NULL,10);
+ /* The following update of current_epoch is not really useful as
+ * now the current epoch is persisted on the config file, but
+ * we leave this check here for redundancy. */
+ if (ri->config_epoch > sentinel.current_epoch)
+ sentinel.current_epoch = ri->config_epoch;
+ } else if (!strcasecmp(argv[0],"leader-epoch") && argc == 3) {
+ /* leader-epoch <name> <epoch> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ ri->leader_epoch = strtoull(argv[2],NULL,10);
+ } else if ((!strcasecmp(argv[0],"known-slave") ||
+ !strcasecmp(argv[0],"known-replica")) && argc == 4)
+ {
+ sentinelRedisInstance *slave;
+
+ /* known-replica <name> <ip> <port> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ if ((slave = createSentinelRedisInstance(NULL,SRI_SLAVE,argv[2],
+ atoi(argv[3]), ri->quorum, ri)) == NULL)
+ {
+ return sentinelCheckCreateInstanceErrors(SRI_SLAVE);
+ }
+ } else if (!strcasecmp(argv[0],"known-sentinel") &&
+ (argc == 4 || argc == 5)) {
+ sentinelRedisInstance *si;
+
+ if (argc == 5) { /* Ignore the old form without runid. */
+ /* known-sentinel <name> <ip> <port> [runid] */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ if ((si = createSentinelRedisInstance(argv[4],SRI_SENTINEL,argv[2],
+ atoi(argv[3]), ri->quorum, ri)) == NULL)
+ {
+ return sentinelCheckCreateInstanceErrors(SRI_SENTINEL);
+ }
+ si->runid = sdsnew(argv[4]);
+ sentinelTryConnectionSharing(si);
+ }
+ } else if (!strcasecmp(argv[0],"rename-command") && argc == 4) {
+ /* rename-command <name> <command> <renamed-command> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ sds oldcmd = sdsnew(argv[2]);
+ sds newcmd = sdsnew(argv[3]);
+ if (dictAdd(ri->renamed_commands,oldcmd,newcmd) != DICT_OK) {
+ sdsfree(oldcmd);
+ sdsfree(newcmd);
+ return "Same command renamed multiple times with rename-command.";
+ }
+ } else if (!strcasecmp(argv[0],"announce-ip") && argc == 2) {
+ /* announce-ip <ip-address> */
+ if (strlen(argv[1]))
+ sentinel.announce_ip = sdsnew(argv[1]);
+ } else if (!strcasecmp(argv[0],"announce-port") && argc == 2) {
+ /* announce-port <port> */
+ sentinel.announce_port = atoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"deny-scripts-reconfig") && argc == 2) {
+ /* deny-scripts-reconfig <yes|no> */
+ if ((sentinel.deny_scripts_reconfig = yesnotoi(argv[1])) == -1) {
+ return "Please specify yes or no for the "
+ "deny-scripts-reconfig options.";
+ }
+ } else if (!strcasecmp(argv[0],"sentinel-user") && argc == 2) {
+ /* sentinel-user <user-name> */
+ if (strlen(argv[1]))
+ sentinel.sentinel_auth_user = sdsnew(argv[1]);
+ } else if (!strcasecmp(argv[0],"sentinel-pass") && argc == 2) {
+ /* sentinel-pass <password> */
+ if (strlen(argv[1]))
+ sentinel.sentinel_auth_pass = sdsnew(argv[1]);
+ } else if (!strcasecmp(argv[0],"resolve-hostnames") && argc == 2) {
+ /* resolve-hostnames <yes|no> */
+ if ((sentinel.resolve_hostnames = yesnotoi(argv[1])) == -1) {
+ return "Please specify yes or no for the resolve-hostnames option.";
+ }
+ } else if (!strcasecmp(argv[0],"announce-hostnames") && argc == 2) {
+ /* announce-hostnames <yes|no> */
+ if ((sentinel.announce_hostnames = yesnotoi(argv[1])) == -1) {
+ return "Please specify yes or no for the announce-hostnames option.";
+ }
+ } else if (!strcasecmp(argv[0],"master-reboot-down-after-period") && argc == 3) {
+ /* master-reboot-down-after-period <name> <milliseconds> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ ri->master_reboot_down_after_period = atoi(argv[2]);
+ if (ri->master_reboot_down_after_period < 0)
+ return "negative time parameter.";
+ } else {
+ return "Unrecognized sentinel configuration statement.";
+ }
+ return NULL;
+}
+
+/* Implements CONFIG REWRITE for "sentinel" option.
+ * This is used not just to rewrite the configuration given by the user
+ * (the configured masters) but also in order to retain the state of
+ * Sentinel across restarts: config epoch of masters, associated slaves
+ * and sentinel instances, and so forth. */
+void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
+ dictIterator *di, *di2;
+ dictEntry *de;
+ sds line;
+
+ /* sentinel unique ID. */
+ line = sdscatprintf(sdsempty(), "sentinel myid %s", sentinel.myid);
+ rewriteConfigRewriteLine(state,"sentinel myid",line,1);
+
+ /* sentinel deny-scripts-reconfig. */
+ line = sdscatprintf(sdsempty(), "sentinel deny-scripts-reconfig %s",
+ sentinel.deny_scripts_reconfig ? "yes" : "no");
+ rewriteConfigRewriteLine(state,"sentinel deny-scripts-reconfig",line,
+ sentinel.deny_scripts_reconfig != SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG);
+
+ /* sentinel resolve-hostnames.
+ * This must be included early in the file so it is already in effect
+ * when reading the file.
+ */
+ line = sdscatprintf(sdsempty(), "sentinel resolve-hostnames %s",
+ sentinel.resolve_hostnames ? "yes" : "no");
+ rewriteConfigRewriteLine(state,"sentinel resolve-hostnames",line,
+ sentinel.resolve_hostnames != SENTINEL_DEFAULT_RESOLVE_HOSTNAMES);
+
+ /* sentinel announce-hostnames. */
+ line = sdscatprintf(sdsempty(), "sentinel announce-hostnames %s",
+ sentinel.announce_hostnames ? "yes" : "no");
+ rewriteConfigRewriteLine(state,"sentinel announce-hostnames",line,
+ sentinel.announce_hostnames != SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES);
+
+ /* For every master emit a "sentinel monitor" config entry. */
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *master, *ri;
+ sentinelAddr *master_addr;
+
+ /* sentinel monitor */
+ master = dictGetVal(de);
+ master_addr = sentinelGetCurrentMasterAddress(master);
+ line = sdscatprintf(sdsempty(),"sentinel monitor %s %s %d %d",
+ master->name, announceSentinelAddr(master_addr), master_addr->port,
+ master->quorum);
+ rewriteConfigRewriteLine(state,"sentinel monitor",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+
+ /* sentinel down-after-milliseconds */
+ if (master->down_after_period != sentinel_default_down_after) {
+ line = sdscatprintf(sdsempty(),
+ "sentinel down-after-milliseconds %s %ld",
+ master->name, (long) master->down_after_period);
+ rewriteConfigRewriteLine(state,"sentinel down-after-milliseconds",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+ }
+
+ /* sentinel failover-timeout */
+ if (master->failover_timeout != sentinel_default_failover_timeout) {
+ line = sdscatprintf(sdsempty(),
+ "sentinel failover-timeout %s %ld",
+ master->name, (long) master->failover_timeout);
+ rewriteConfigRewriteLine(state,"sentinel failover-timeout",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+
+ }
+
+ /* sentinel parallel-syncs */
+ if (master->parallel_syncs != SENTINEL_DEFAULT_PARALLEL_SYNCS) {
+ line = sdscatprintf(sdsempty(),
+ "sentinel parallel-syncs %s %d",
+ master->name, master->parallel_syncs);
+ rewriteConfigRewriteLine(state,"sentinel parallel-syncs",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+ }
+
+ /* sentinel notification-script */
+ if (master->notification_script) {
+ line = sdscatprintf(sdsempty(),
+ "sentinel notification-script %s %s",
+ master->name, master->notification_script);
+ rewriteConfigRewriteLine(state,"sentinel notification-script",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+ }
+
+ /* sentinel client-reconfig-script */
+ if (master->client_reconfig_script) {
+ line = sdscatprintf(sdsempty(),
+ "sentinel client-reconfig-script %s %s",
+ master->name, master->client_reconfig_script);
+ rewriteConfigRewriteLine(state,"sentinel client-reconfig-script",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+ }
+
+ /* sentinel auth-pass & auth-user */
+ if (master->auth_pass) {
+ line = sdscatprintf(sdsempty(),
+ "sentinel auth-pass %s %s",
+ master->name, master->auth_pass);
+ rewriteConfigRewriteLine(state,"sentinel auth-pass",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+ }
+
+ if (master->auth_user) {
+ line = sdscatprintf(sdsempty(),
+ "sentinel auth-user %s %s",
+ master->name, master->auth_user);
+ rewriteConfigRewriteLine(state,"sentinel auth-user",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+ }
+
+ /* sentinel master-reboot-down-after-period */
+ if (master->master_reboot_down_after_period != 0) {
+ line = sdscatprintf(sdsempty(),
+ "sentinel master-reboot-down-after-period %s %ld",
+ master->name, (long) master->master_reboot_down_after_period);
+ rewriteConfigRewriteLine(state,"sentinel master-reboot-down-after-period",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+ }
+
+ /* sentinel config-epoch */
+ line = sdscatprintf(sdsempty(),
+ "sentinel config-epoch %s %llu",
+ master->name, (unsigned long long) master->config_epoch);
+ rewriteConfigRewriteLine(state,"sentinel config-epoch",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+
+
+ /* sentinel leader-epoch */
+ line = sdscatprintf(sdsempty(),
+ "sentinel leader-epoch %s %llu",
+ master->name, (unsigned long long) master->leader_epoch);
+ rewriteConfigRewriteLine(state,"sentinel leader-epoch",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+
+ /* sentinel known-slave */
+ di2 = dictGetIterator(master->slaves);
+ while((de = dictNext(di2)) != NULL) {
+ sentinelAddr *slave_addr;
+
+ ri = dictGetVal(de);
+ slave_addr = ri->addr;
+
+ /* If master_addr (obtained using sentinelGetCurrentMasterAddress()
+ * so it may be the address of the promoted slave) is equal to this
+ * slave's address, a failover is in progress and the slave was
+ * already successfully promoted. So as the address of this slave
+ * we use the old master address instead. */
+ if (sentinelAddrOrHostnameEqual(slave_addr,master_addr))
+ slave_addr = master->addr;
+ line = sdscatprintf(sdsempty(),
+ "sentinel known-replica %s %s %d",
+ master->name, announceSentinelAddr(slave_addr), slave_addr->port);
+ /* try to replace any known-slave option first if found */
+ if (rewriteConfigRewriteLine(state, "sentinel known-slave", sdsdup(line), 0) == 0) {
+ rewriteConfigRewriteLine(state, "sentinel known-replica", line, 1);
+ } else {
+ sdsfree(line);
+ }
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+ }
+ dictReleaseIterator(di2);
+
+ /* sentinel known-sentinel */
+ di2 = dictGetIterator(master->sentinels);
+ while((de = dictNext(di2)) != NULL) {
+ ri = dictGetVal(de);
+ if (ri->runid == NULL) continue;
+ line = sdscatprintf(sdsempty(),
+ "sentinel known-sentinel %s %s %d %s",
+ master->name, announceSentinelAddr(ri->addr), ri->addr->port, ri->runid);
+ rewriteConfigRewriteLine(state,"sentinel known-sentinel",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+ }
+ dictReleaseIterator(di2);
+
+ /* sentinel rename-command */
+ di2 = dictGetIterator(master->renamed_commands);
+ while((de = dictNext(di2)) != NULL) {
+ sds oldname = dictGetKey(de);
+ sds newname = dictGetVal(de);
+ line = sdscatprintf(sdsempty(),
+ "sentinel rename-command %s %s %s",
+ master->name, oldname, newname);
+ rewriteConfigRewriteLine(state,"sentinel rename-command",line,1);
+ /* rewriteConfigMarkAsProcessed is handled after the loop */
+ }
+ dictReleaseIterator(di2);
+ }
+
+ /* sentinel current-epoch is a global state valid for all the masters. */
+ line = sdscatprintf(sdsempty(),
+ "sentinel current-epoch %llu", (unsigned long long) sentinel.current_epoch);
+ rewriteConfigRewriteLine(state,"sentinel current-epoch",line,1);
+
+ /* sentinel announce-ip. */
+ if (sentinel.announce_ip) {
+ line = sdsnew("sentinel announce-ip ");
+ line = sdscatrepr(line, sentinel.announce_ip, sdslen(sentinel.announce_ip));
+ rewriteConfigRewriteLine(state,"sentinel announce-ip",line,1);
+ } else {
+ rewriteConfigMarkAsProcessed(state,"sentinel announce-ip");
+ }
+
+ /* sentinel announce-port. */
+ if (sentinel.announce_port) {
+ line = sdscatprintf(sdsempty(),"sentinel announce-port %d",
+ sentinel.announce_port);
+ rewriteConfigRewriteLine(state,"sentinel announce-port",line,1);
+ } else {
+ rewriteConfigMarkAsProcessed(state,"sentinel announce-port");
+ }
+
+ /* sentinel sentinel-user. */
+ if (sentinel.sentinel_auth_user) {
+ line = sdscatprintf(sdsempty(), "sentinel sentinel-user %s", sentinel.sentinel_auth_user);
+ rewriteConfigRewriteLine(state,"sentinel sentinel-user",line,1);
+ } else {
+ rewriteConfigMarkAsProcessed(state,"sentinel sentinel-user");
+ }
+
+ /* sentinel sentinel-pass. */
+ if (sentinel.sentinel_auth_pass) {
+ line = sdscatprintf(sdsempty(), "sentinel sentinel-pass %s", sentinel.sentinel_auth_pass);
+ rewriteConfigRewriteLine(state,"sentinel sentinel-pass",line,1);
+ } else {
+ rewriteConfigMarkAsProcessed(state,"sentinel sentinel-pass");
+ }
+
+ dictReleaseIterator(di);
+
+ /* NOTE: the purpose here is in case due to the state change, the config rewrite
+ does not handle the configs, however, previously the config was set in the config file,
+ rewriteConfigMarkAsProcessed should be put here to mark it as processed in order to
+ delete the old config entry.
+ */
+ rewriteConfigMarkAsProcessed(state,"sentinel monitor");
+ rewriteConfigMarkAsProcessed(state,"sentinel down-after-milliseconds");
+ rewriteConfigMarkAsProcessed(state,"sentinel failover-timeout");
+ rewriteConfigMarkAsProcessed(state,"sentinel parallel-syncs");
+ rewriteConfigMarkAsProcessed(state,"sentinel notification-script");
+ rewriteConfigMarkAsProcessed(state,"sentinel client-reconfig-script");
+ rewriteConfigMarkAsProcessed(state,"sentinel auth-pass");
+ rewriteConfigMarkAsProcessed(state,"sentinel auth-user");
+ rewriteConfigMarkAsProcessed(state,"sentinel config-epoch");
+ rewriteConfigMarkAsProcessed(state,"sentinel leader-epoch");
+ rewriteConfigMarkAsProcessed(state,"sentinel known-replica");
+ rewriteConfigMarkAsProcessed(state,"sentinel known-sentinel");
+ rewriteConfigMarkAsProcessed(state,"sentinel rename-command");
+ rewriteConfigMarkAsProcessed(state,"sentinel master-reboot-down-after-period");
+}
+
+/* This function uses the config rewriting Redis engine in order to persist
+ * the state of the Sentinel in the current configuration file.
+ *
+ * On failure the function logs a warning on the Redis log. */
+int sentinelFlushConfig(void) {
+ int saved_hz = server.hz;
+ int rewrite_status;
+
+ server.hz = CONFIG_DEFAULT_HZ;
+ rewrite_status = rewriteConfig(server.configfile, 0);
+ server.hz = saved_hz;
+
+ if (rewrite_status == -1) {
+ serverLog(LL_WARNING,"WARNING: Sentinel was not able to save the new configuration on disk!!!: %s", strerror(errno));
+ return C_ERR;
+ } else {
+ serverLog(LL_NOTICE,"Sentinel new configuration saved on disk");
+ return C_OK;
+ }
+}
+
+/* Call sentinelFlushConfig() produce a success/error reply to the
+ * calling client.
+ */
+static void sentinelFlushConfigAndReply(client *c) {
+ if (sentinelFlushConfig() == C_ERR)
+ addReplyError(c, "Failed to save config file. Check server logs.");
+ else
+ addReply(c, shared.ok);
+}
+
+/* ====================== hiredis connection handling ======================= */
+
+/* Send the AUTH command with the specified master password if needed.
+ * Note that for slaves the password set for the master is used.
+ *
+ * In case this Sentinel requires a password as well, via the "requirepass"
+ * configuration directive, we assume we should use the local password in
+ * order to authenticate when connecting with the other Sentinels as well.
+ * So basically all the Sentinels share the same password and use it to
+ * authenticate reciprocally.
+ *
+ * We don't check at all if the command was successfully transmitted
+ * to the instance as if it fails Sentinel will detect the instance down,
+ * will disconnect and reconnect the link and so forth. */
+void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
+ char *auth_pass = NULL;
+ char *auth_user = NULL;
+
+ if (ri->flags & SRI_MASTER) {
+ auth_pass = ri->auth_pass;
+ auth_user = ri->auth_user;
+ } else if (ri->flags & SRI_SLAVE) {
+ auth_pass = ri->master->auth_pass;
+ auth_user = ri->master->auth_user;
+ } else if (ri->flags & SRI_SENTINEL) {
+ /* If sentinel_auth_user is NULL, AUTH will use default user
+ with sentinel_auth_pass to authenticate */
+ if (sentinel.sentinel_auth_pass) {
+ auth_pass = sentinel.sentinel_auth_pass;
+ auth_user = sentinel.sentinel_auth_user;
+ } else {
+ /* Compatibility with old configs. requirepass is used
+ * for both incoming and outgoing authentication. */
+ auth_pass = server.requirepass;
+ auth_user = NULL;
+ }
+ }
+
+ if (auth_pass && auth_user == NULL) {
+ if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "%s %s",
+ sentinelInstanceMapCommand(ri,"AUTH"),
+ auth_pass) == C_OK) ri->link->pending_commands++;
+ } else if (auth_pass && auth_user) {
+ /* If we also have an username, use the ACL-style AUTH command
+ * with two arguments, username and password. */
+ if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "%s %s %s",
+ sentinelInstanceMapCommand(ri,"AUTH"),
+ auth_user, auth_pass) == C_OK) ri->link->pending_commands++;
+ }
+}
+
+/* Use CLIENT SETNAME to name the connection in the Redis instance as
+ * sentinel-<first_8_chars_of_runid>-<connection_type>
+ * The connection type is "cmd" or "pubsub" as specified by 'type'.
+ *
+ * This makes it possible to list all the sentinel instances connected
+ * to a Redis server with CLIENT LIST, grepping for a specific name format. */
+void sentinelSetClientName(sentinelRedisInstance *ri, redisAsyncContext *c, char *type) {
+ char name[64];
+
+ snprintf(name,sizeof(name),"sentinel-%.8s-%s",sentinel.myid,type);
+ if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri,
+ "%s SETNAME %s",
+ sentinelInstanceMapCommand(ri,"CLIENT"),
+ name) == C_OK)
+ {
+ ri->link->pending_commands++;
+ }
+}
+
+static int instanceLinkNegotiateTLS(redisAsyncContext *context) {
+#if USE_OPENSSL == 1 /* BUILD_YES */
+ if (!redis_tls_ctx) return C_ERR;
+ SSL *ssl = SSL_new(redis_tls_client_ctx ? redis_tls_client_ctx : redis_tls_ctx);
+ if (!ssl) return C_ERR;
+
+ if (redisInitiateSSL(&context->c, ssl) == REDIS_ERR) {
+ SSL_free(ssl);
+ return C_ERR;
+ }
+#else
+ UNUSED(context);
+#endif
+ return C_OK;
+}
+
+/* Create the async connections for the instance link if the link
+ * is disconnected. Note that link->disconnected is true even if just
+ * one of the two links (commands and pub/sub) is missing. */
+void sentinelReconnectInstance(sentinelRedisInstance *ri) {
+
+ if (ri->link->disconnected == 0) return;
+ if (ri->addr->port == 0) return; /* port == 0 means invalid address. */
+ instanceLink *link = ri->link;
+ mstime_t now = mstime();
+
+ if (now - ri->link->last_reconn_time < sentinel_ping_period) return;
+ ri->link->last_reconn_time = now;
+
+ /* Commands connection. */
+ if (link->cc == NULL) {
+
+ /* It might be that the instance is disconnected because it wasn't available earlier when the instance
+ * allocated, say during failover, and therefore we failed to resolve its ip.
+ * Another scenario is that the instance restarted with new ip, and we should resolve its new ip based on
+ * its hostname */
+ if (sentinel.resolve_hostnames) {
+ sentinelAddr *tryResolveAddr = createSentinelAddr(ri->addr->hostname, ri->addr->port, 0);
+ if (tryResolveAddr != NULL) {
+ releaseSentinelAddr(ri->addr);
+ ri->addr = tryResolveAddr;
+ }
+ }
+
+ link->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,server.bind_source_addr);
+
+ if (link->cc && !link->cc->err) anetCloexec(link->cc->c.fd);
+ if (!link->cc) {
+ sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #Failed to establish connection");
+ } else if (!link->cc->err && server.tls_replication &&
+ (instanceLinkNegotiateTLS(link->cc) == C_ERR)) {
+ sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #Failed to initialize TLS");
+ instanceLinkCloseConnection(link,link->cc);
+ } else if (link->cc->err) {
+ sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #%s",
+ link->cc->errstr);
+ instanceLinkCloseConnection(link,link->cc);
+ } else {
+ link->pending_commands = 0;
+ link->cc_conn_time = mstime();
+ link->cc->data = link;
+ redisAeAttach(server.el,link->cc);
+ redisAsyncSetConnectCallback(link->cc,
+ sentinelLinkEstablishedCallback);
+ redisAsyncSetDisconnectCallback(link->cc,
+ sentinelDisconnectCallback);
+ sentinelSendAuthIfNeeded(ri,link->cc);
+ sentinelSetClientName(ri,link->cc,"cmd");
+
+ /* Send a PING ASAP when reconnecting. */
+ sentinelSendPing(ri);
+ }
+ }
+ /* Pub / Sub */
+ if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && link->pc == NULL) {
+ link->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,server.bind_source_addr);
+ if (link->pc && !link->pc->err) anetCloexec(link->pc->c.fd);
+ if (!link->pc) {
+ sentinelEvent(LL_DEBUG,"-pubsub-link-reconnection",ri,"%@ #Failed to establish connection");
+ } else if (!link->pc->err && server.tls_replication &&
+ (instanceLinkNegotiateTLS(link->pc) == C_ERR)) {
+ sentinelEvent(LL_DEBUG,"-pubsub-link-reconnection",ri,"%@ #Failed to initialize TLS");
+ } else if (link->pc->err) {
+ sentinelEvent(LL_DEBUG,"-pubsub-link-reconnection",ri,"%@ #%s",
+ link->pc->errstr);
+ instanceLinkCloseConnection(link,link->pc);
+ } else {
+ int retval;
+ link->pc_conn_time = mstime();
+ link->pc->data = link;
+ redisAeAttach(server.el,link->pc);
+ redisAsyncSetConnectCallback(link->pc,
+ sentinelLinkEstablishedCallback);
+ redisAsyncSetDisconnectCallback(link->pc,
+ sentinelDisconnectCallback);
+ sentinelSendAuthIfNeeded(ri,link->pc);
+ sentinelSetClientName(ri,link->pc,"pubsub");
+ /* Now we subscribe to the Sentinels "Hello" channel. */
+ retval = redisAsyncCommand(link->pc,
+ sentinelReceiveHelloMessages, ri, "%s %s",
+ sentinelInstanceMapCommand(ri,"SUBSCRIBE"),
+ SENTINEL_HELLO_CHANNEL);
+ if (retval != C_OK) {
+ /* If we can't subscribe, the Pub/Sub connection is useless
+ * and we can simply disconnect it and try again. */
+ instanceLinkCloseConnection(link,link->pc);
+ return;
+ }
+ }
+ }
+ /* Clear the disconnected status only if we have both the connections
+ * (or just the commands connection if this is a sentinel instance). */
+ if (link->cc && (ri->flags & SRI_SENTINEL || link->pc))
+ link->disconnected = 0;
+}
+
+/* ======================== Redis instances pinging ======================== */
+
+/* Return true if master looks "sane", that is:
+ * 1) It is actually a master in the current configuration.
+ * 2) It reports itself as a master.
+ * 3) It is not SDOWN or ODOWN.
+ * 4) We obtained last INFO no more than two times the INFO period time ago. */
+int sentinelMasterLooksSane(sentinelRedisInstance *master) {
+ return
+ master->flags & SRI_MASTER &&
+ master->role_reported == SRI_MASTER &&
+ (master->flags & (SRI_S_DOWN|SRI_O_DOWN)) == 0 &&
+ (mstime() - master->info_refresh) < sentinel_info_period*2;
+}
+
+/* Process the INFO output from masters. */
+void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
+ sds *lines;
+ int numlines, j;
+ int role = 0;
+
+ /* cache full INFO output for instance */
+ sdsfree(ri->info);
+ ri->info = sdsnew(info);
+
+ /* The following fields must be reset to a given value in the case they
+ * are not found at all in the INFO output. */
+ ri->master_link_down_time = 0;
+
+ /* Process line by line. */
+ lines = sdssplitlen(info,strlen(info),"\r\n",2,&numlines);
+ for (j = 0; j < numlines; j++) {
+ sentinelRedisInstance *slave;
+ sds l = lines[j];
+
+ /* run_id:<40 hex chars>*/
+ if (sdslen(l) >= 47 && !memcmp(l,"run_id:",7)) {
+ if (ri->runid == NULL) {
+ ri->runid = sdsnewlen(l+7,40);
+ } else {
+ if (strncmp(ri->runid,l+7,40) != 0) {
+ sentinelEvent(LL_NOTICE,"+reboot",ri,"%@");
+
+ if (ri->flags & SRI_MASTER && ri->master_reboot_down_after_period != 0) {
+ ri->flags |= SRI_MASTER_REBOOT;
+ ri->master_reboot_since_time = mstime();
+ }
+
+ sdsfree(ri->runid);
+ ri->runid = sdsnewlen(l+7,40);
+ }
+ }
+ }
+
+ /* old versions: slave0:<ip>,<port>,<state>
+ * new versions: slave0:ip=127.0.0.1,port=9999,... */
+ if ((ri->flags & SRI_MASTER) &&
+ sdslen(l) >= 7 &&
+ !memcmp(l,"slave",5) && isdigit(l[5]))
+ {
+ char *ip, *port, *end;
+
+ if (strstr(l,"ip=") == NULL) {
+ /* Old format. */
+ ip = strchr(l,':'); if (!ip) continue;
+ ip++; /* Now ip points to start of ip address. */
+ port = strchr(ip,','); if (!port) continue;
+ *port = '\0'; /* nul term for easy access. */
+ port++; /* Now port points to start of port number. */
+ end = strchr(port,','); if (!end) continue;
+ *end = '\0'; /* nul term for easy access. */
+ } else {
+ /* New format. */
+ ip = strstr(l,"ip="); if (!ip) continue;
+ ip += 3; /* Now ip points to start of ip address. */
+ port = strstr(l,"port="); if (!port) continue;
+ port += 5; /* Now port points to start of port number. */
+ /* Nul term both fields for easy access. */
+ end = strchr(ip,','); if (end) *end = '\0';
+ end = strchr(port,','); if (end) *end = '\0';
+ }
+
+ /* Check if we already have this slave into our table,
+ * otherwise add it. */
+ if (sentinelRedisInstanceLookupSlave(ri,ip,atoi(port)) == NULL) {
+ if ((slave = createSentinelRedisInstance(NULL,SRI_SLAVE,ip,
+ atoi(port), ri->quorum, ri)) != NULL)
+ {
+ sentinelEvent(LL_NOTICE,"+slave",slave,"%@");
+ sentinelFlushConfig();
+ }
+ }
+ }
+
+ /* master_link_down_since_seconds:<seconds> */
+ if (sdslen(l) >= 32 &&
+ !memcmp(l,"master_link_down_since_seconds",30))
+ {
+ ri->master_link_down_time = strtoll(l+31,NULL,10)*1000;
+ }
+
+ /* role:<role> */
+ if (sdslen(l) >= 11 && !memcmp(l,"role:master",11)) role = SRI_MASTER;
+ else if (sdslen(l) >= 10 && !memcmp(l,"role:slave",10)) role = SRI_SLAVE;
+
+ if (role == SRI_SLAVE) {
+ /* master_host:<host> */
+ if (sdslen(l) >= 12 && !memcmp(l,"master_host:",12)) {
+ if (ri->slave_master_host == NULL ||
+ strcasecmp(l+12,ri->slave_master_host))
+ {
+ sdsfree(ri->slave_master_host);
+ ri->slave_master_host = sdsnew(l+12);
+ ri->slave_conf_change_time = mstime();
+ }
+ }
+
+ /* master_port:<port> */
+ if (sdslen(l) >= 12 && !memcmp(l,"master_port:",12)) {
+ int slave_master_port = atoi(l+12);
+
+ if (ri->slave_master_port != slave_master_port) {
+ ri->slave_master_port = slave_master_port;
+ ri->slave_conf_change_time = mstime();
+ }
+ }
+
+ /* master_link_status:<status> */
+ if (sdslen(l) >= 19 && !memcmp(l,"master_link_status:",19)) {
+ ri->slave_master_link_status =
+ (strcasecmp(l+19,"up") == 0) ?
+ SENTINEL_MASTER_LINK_STATUS_UP :
+ SENTINEL_MASTER_LINK_STATUS_DOWN;
+ }
+
+ /* slave_priority:<priority> */
+ if (sdslen(l) >= 15 && !memcmp(l,"slave_priority:",15))
+ ri->slave_priority = atoi(l+15);
+
+ /* slave_repl_offset:<offset> */
+ if (sdslen(l) >= 18 && !memcmp(l,"slave_repl_offset:",18))
+ ri->slave_repl_offset = strtoull(l+18,NULL,10);
+
+ /* replica_announced:<announcement> */
+ if (sdslen(l) >= 18 && !memcmp(l,"replica_announced:",18))
+ ri->replica_announced = atoi(l+18);
+ }
+ }
+ ri->info_refresh = mstime();
+ sdsfreesplitres(lines,numlines);
+
+ /* ---------------------------- Acting half -----------------------------
+ * Some things will not happen if sentinel.tilt is true, but some will
+ * still be processed. */
+
+ /* Remember when the role changed. */
+ if (role != ri->role_reported) {
+ ri->role_reported_time = mstime();
+ ri->role_reported = role;
+ if (role == SRI_SLAVE) ri->slave_conf_change_time = mstime();
+ /* Log the event with +role-change if the new role is coherent or
+ * with -role-change if there is a mismatch with the current config. */
+ sentinelEvent(LL_VERBOSE,
+ ((ri->flags & (SRI_MASTER|SRI_SLAVE)) == role) ?
+ "+role-change" : "-role-change",
+ ri, "%@ new reported role is %s",
+ role == SRI_MASTER ? "master" : "slave");
+ }
+
+ /* None of the following conditions are processed when in tilt mode, so
+ * return asap. */
+ if (sentinel.tilt) return;
+
+ /* Handle master -> slave role switch. */
+ if ((ri->flags & SRI_MASTER) && role == SRI_SLAVE) {
+ /* Nothing to do, but masters claiming to be slaves are
+ * considered to be unreachable by Sentinel, so eventually
+ * a failover will be triggered. */
+ }
+
+ /* Handle slave -> master role switch. */
+ if ((ri->flags & SRI_SLAVE) && role == SRI_MASTER) {
+ /* If this is a promoted slave we can change state to the
+ * failover state machine. */
+ if ((ri->flags & SRI_PROMOTED) &&
+ (ri->master->flags & SRI_FAILOVER_IN_PROGRESS) &&
+ (ri->master->failover_state ==
+ SENTINEL_FAILOVER_STATE_WAIT_PROMOTION))
+ {
+ /* Now that we are sure the slave was reconfigured as a master
+ * set the master configuration epoch to the epoch we won the
+ * election to perform this failover. This will force the other
+ * Sentinels to update their config (assuming there is not
+ * a newer one already available). */
+ ri->master->config_epoch = ri->master->failover_epoch;
+ ri->master->failover_state = SENTINEL_FAILOVER_STATE_RECONF_SLAVES;
+ ri->master->failover_state_change_time = mstime();
+ sentinelFlushConfig();
+ sentinelEvent(LL_WARNING,"+promoted-slave",ri,"%@");
+ if (sentinel.simfailure_flags &
+ SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION)
+ sentinelSimFailureCrash();
+ sentinelEvent(LL_WARNING,"+failover-state-reconf-slaves",
+ ri->master,"%@");
+ sentinelCallClientReconfScript(ri->master,SENTINEL_LEADER,
+ "start",ri->master->addr,ri->addr);
+ sentinelForceHelloUpdateForMaster(ri->master);
+ } else {
+ /* A slave turned into a master. We want to force our view and
+ * reconfigure as slave. Wait some time after the change before
+ * going forward, to receive new configs if any. */
+ mstime_t wait_time = sentinel_publish_period*4;
+
+ if (!(ri->flags & SRI_PROMOTED) &&
+ sentinelMasterLooksSane(ri->master) &&
+ sentinelRedisInstanceNoDownFor(ri,wait_time) &&
+ mstime() - ri->role_reported_time > wait_time)
+ {
+ int retval = sentinelSendSlaveOf(ri,ri->master->addr);
+ if (retval == C_OK)
+ sentinelEvent(LL_NOTICE,"+convert-to-slave",ri,"%@");
+ }
+ }
+ }
+
+ /* Handle slaves replicating to a different master address. */
+ if ((ri->flags & SRI_SLAVE) &&
+ role == SRI_SLAVE &&
+ (ri->slave_master_port != ri->master->addr->port ||
+ !sentinelAddrEqualsHostname(ri->master->addr, ri->slave_master_host)))
+ {
+ mstime_t wait_time = ri->master->failover_timeout;
+
+ /* Make sure the master is sane before reconfiguring this instance
+ * into a slave. */
+ if (sentinelMasterLooksSane(ri->master) &&
+ sentinelRedisInstanceNoDownFor(ri,wait_time) &&
+ mstime() - ri->slave_conf_change_time > wait_time)
+ {
+ int retval = sentinelSendSlaveOf(ri,ri->master->addr);
+ if (retval == C_OK)
+ sentinelEvent(LL_NOTICE,"+fix-slave-config",ri,"%@");
+ }
+ }
+
+ /* Detect if the slave that is in the process of being reconfigured
+ * changed state. */
+ if ((ri->flags & SRI_SLAVE) && role == SRI_SLAVE &&
+ (ri->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG)))
+ {
+ /* SRI_RECONF_SENT -> SRI_RECONF_INPROG. */
+ if ((ri->flags & SRI_RECONF_SENT) &&
+ ri->slave_master_host &&
+ sentinelAddrEqualsHostname(ri->master->promoted_slave->addr,
+ ri->slave_master_host) &&
+ ri->slave_master_port == ri->master->promoted_slave->addr->port)
+ {
+ ri->flags &= ~SRI_RECONF_SENT;
+ ri->flags |= SRI_RECONF_INPROG;
+ sentinelEvent(LL_NOTICE,"+slave-reconf-inprog",ri,"%@");
+ }
+
+ /* SRI_RECONF_INPROG -> SRI_RECONF_DONE */
+ if ((ri->flags & SRI_RECONF_INPROG) &&
+ ri->slave_master_link_status == SENTINEL_MASTER_LINK_STATUS_UP)
+ {
+ ri->flags &= ~SRI_RECONF_INPROG;
+ ri->flags |= SRI_RECONF_DONE;
+ sentinelEvent(LL_NOTICE,"+slave-reconf-done",ri,"%@");
+ }
+ }
+}
+
+void sentinelInfoReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
+ redisReply *r;
+
+ if (!reply || !link) return;
+ link->pending_commands--;
+ r = reply;
+
+ /* INFO reply type is verbatim in resp3. Normally, sentinel will not use
+ * resp3 but this is required for testing (see logreqres.c). */
+ if (r->type == REDIS_REPLY_STRING || r->type == REDIS_REPLY_VERB)
+ sentinelRefreshInstanceInfo(ri,r->str);
+}
+
+/* Just discard the reply. We use this when we are not monitoring the return
+ * value of the command but its effects directly. */
+void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
+ instanceLink *link = c->data;
+ UNUSED(reply);
+ UNUSED(privdata);
+
+ if (link) link->pending_commands--;
+}
+
+void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
+ redisReply *r;
+
+ if (!reply || !link) return;
+ link->pending_commands--;
+ r = reply;
+
+ if (r->type == REDIS_REPLY_STATUS ||
+ r->type == REDIS_REPLY_ERROR) {
+ /* Update the "instance available" field only if this is an
+ * acceptable reply. */
+ if (strncmp(r->str,"PONG",4) == 0 ||
+ strncmp(r->str,"LOADING",7) == 0 ||
+ strncmp(r->str,"MASTERDOWN",10) == 0)
+ {
+ link->last_avail_time = mstime();
+ link->act_ping_time = 0; /* Flag the pong as received. */
+
+ if (ri->flags & SRI_MASTER_REBOOT && strncmp(r->str,"PONG",4) == 0)
+ ri->flags &= ~SRI_MASTER_REBOOT;
+
+ } else {
+ /* Send a SCRIPT KILL command if the instance appears to be
+ * down because of a busy script. */
+ if (strncmp(r->str,"BUSY",4) == 0 &&
+ (ri->flags & SRI_S_DOWN) &&
+ !(ri->flags & SRI_SCRIPT_KILL_SENT))
+ {
+ if (redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri,
+ "%s KILL",
+ sentinelInstanceMapCommand(ri,"SCRIPT")) == C_OK)
+ {
+ ri->link->pending_commands++;
+ }
+ ri->flags |= SRI_SCRIPT_KILL_SENT;
+ }
+ }
+ }
+ link->last_pong_time = mstime();
+}
+
+/* This is called when we get the reply about the PUBLISH command we send
+ * to the master to advertise this sentinel. */
+void sentinelPublishReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
+ redisReply *r;
+
+ if (!reply || !link) return;
+ link->pending_commands--;
+ r = reply;
+
+ /* Only update pub_time if we actually published our message. Otherwise
+ * we'll retry again in 100 milliseconds. */
+ if (r->type != REDIS_REPLY_ERROR)
+ ri->last_pub_time = mstime();
+}
+
+/* Process a hello message received via Pub/Sub in master or slave instance,
+ * or sent directly to this sentinel via the (fake) PUBLISH command of Sentinel.
+ *
+ * If the master name specified in the message is not known, the message is
+ * discarded. */
+void sentinelProcessHelloMessage(char *hello, int hello_len) {
+ /* Format is composed of 8 tokens:
+ * 0=ip,1=port,2=runid,3=current_epoch,4=master_name,
+ * 5=master_ip,6=master_port,7=master_config_epoch. */
+ int numtokens, port, removed, master_port;
+ uint64_t current_epoch, master_config_epoch;
+ char **token = sdssplitlen(hello, hello_len, ",", 1, &numtokens);
+ sentinelRedisInstance *si, *master;
+
+ if (numtokens == 8) {
+ /* Obtain a reference to the master this hello message is about */
+ master = sentinelGetMasterByName(token[4]);
+ if (!master) goto cleanup; /* Unknown master, skip the message. */
+
+ /* First, try to see if we already have this sentinel. */
+ port = atoi(token[1]);
+ master_port = atoi(token[6]);
+ si = getSentinelRedisInstanceByAddrAndRunID(
+ master->sentinels,token[0],port,token[2]);
+ current_epoch = strtoull(token[3],NULL,10);
+ master_config_epoch = strtoull(token[7],NULL,10);
+
+ if (!si) {
+ /* If not, remove all the sentinels that have the same runid
+ * because there was an address change, and add the same Sentinel
+ * with the new address back. */
+ removed = removeMatchingSentinelFromMaster(master,token[2]);
+ if (removed) {
+ sentinelEvent(LL_NOTICE,"+sentinel-address-switch",master,
+ "%@ ip %s port %d for %s", token[0],port,token[2]);
+ } else {
+ /* Check if there is another Sentinel with the same address this
+ * new one is reporting. What we do if this happens is to set its
+ * port to 0, to signal the address is invalid. We'll update it
+ * later if we get an HELLO message. */
+ sentinelRedisInstance *other =
+ getSentinelRedisInstanceByAddrAndRunID(
+ master->sentinels, token[0],port,NULL);
+ if (other) {
+ /* If there is already other sentinel with same address (but
+ * different runid) then remove the old one across all masters */
+ sentinelEvent(LL_NOTICE,"+sentinel-invalid-addr",other,"%@");
+ dictIterator *di;
+ dictEntry *de;
+
+ /* Keep a copy of runid. 'other' about to be deleted in loop. */
+ sds runid_obsolete = sdsnew(other->runid);
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *master = dictGetVal(de);
+ removeMatchingSentinelFromMaster(master, runid_obsolete);
+ }
+ dictReleaseIterator(di);
+ sdsfree(runid_obsolete);
+ }
+ }
+
+ /* Add the new sentinel. */
+ si = createSentinelRedisInstance(token[2],SRI_SENTINEL,
+ token[0],port,master->quorum,master);
+
+ if (si) {
+ if (!removed) sentinelEvent(LL_NOTICE,"+sentinel",si,"%@");
+ /* The runid is NULL after a new instance creation and
+ * for Sentinels we don't have a later chance to fill it,
+ * so do it now. */
+ si->runid = sdsnew(token[2]);
+ sentinelTryConnectionSharing(si);
+ if (removed) sentinelUpdateSentinelAddressInAllMasters(si);
+ sentinelFlushConfig();
+ }
+ }
+
+ /* Update local current_epoch if received current_epoch is greater.*/
+ if (current_epoch > sentinel.current_epoch) {
+ sentinel.current_epoch = current_epoch;
+ sentinelFlushConfig();
+ sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
+ (unsigned long long) sentinel.current_epoch);
+ }
+
+ /* Update master info if received configuration is newer. */
+ if (si && master->config_epoch < master_config_epoch) {
+ master->config_epoch = master_config_epoch;
+ if (master_port != master->addr->port ||
+ !sentinelAddrEqualsHostname(master->addr, token[5]))
+ {
+ sentinelAddr *old_addr;
+
+ sentinelEvent(LL_WARNING,"+config-update-from",si,"%@");
+ sentinelEvent(LL_WARNING,"+switch-master",
+ master,"%s %s %d %s %d",
+ master->name,
+ announceSentinelAddr(master->addr), master->addr->port,
+ token[5], master_port);
+
+ old_addr = dupSentinelAddr(master->addr);
+ sentinelResetMasterAndChangeAddress(master, token[5], master_port);
+ sentinelCallClientReconfScript(master,
+ SENTINEL_OBSERVER,"start",
+ old_addr,master->addr);
+ releaseSentinelAddr(old_addr);
+ }
+ }
+
+ /* Update the state of the Sentinel. */
+ if (si) si->last_hello_time = mstime();
+ }
+
+cleanup:
+ sdsfreesplitres(token,numtokens);
+}
+
+
+/* This is our Pub/Sub callback for the Hello channel. It's useful in order
+ * to discover other sentinels attached at the same master. */
+void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata) {
+ sentinelRedisInstance *ri = privdata;
+ redisReply *r;
+ UNUSED(c);
+
+ if (!reply || !ri) return;
+ r = reply;
+
+ /* Update the last activity in the pubsub channel. Note that since we
+ * receive our messages as well this timestamp can be used to detect
+ * if the link is probably disconnected even if it seems otherwise. */
+ ri->link->pc_last_activity = mstime();
+
+ /* Sanity check in the reply we expect, so that the code that follows
+ * can avoid to check for details.
+ * Note: Reply type is PUSH in resp3. Normally, sentinel will not use
+ * resp3 but this is required for testing (see logreqres.c). */
+ if ((r->type != REDIS_REPLY_ARRAY && r->type != REDIS_REPLY_PUSH) ||
+ r->elements != 3 ||
+ r->element[0]->type != REDIS_REPLY_STRING ||
+ r->element[1]->type != REDIS_REPLY_STRING ||
+ r->element[2]->type != REDIS_REPLY_STRING ||
+ strcmp(r->element[0]->str,"message") != 0) return;
+
+ /* We are not interested in meeting ourselves */
+ if (strstr(r->element[2]->str,sentinel.myid) != NULL) return;
+
+ sentinelProcessHelloMessage(r->element[2]->str, r->element[2]->len);
+}
+
+/* Send a "Hello" message via Pub/Sub to the specified 'ri' Redis
+ * instance in order to broadcast the current configuration for this
+ * master, and to advertise the existence of this Sentinel at the same time.
+ *
+ * The message has the following format:
+ *
+ * sentinel_ip,sentinel_port,sentinel_runid,current_epoch,
+ * master_name,master_ip,master_port,master_config_epoch.
+ *
+ * Returns C_OK if the PUBLISH was queued correctly, otherwise
+ * C_ERR is returned. */
+int sentinelSendHello(sentinelRedisInstance *ri) {
+ char ip[NET_IP_STR_LEN];
+ char payload[NET_IP_STR_LEN+1024];
+ int retval;
+ char *announce_ip;
+ int announce_port;
+ sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ? ri : ri->master;
+ sentinelAddr *master_addr = sentinelGetCurrentMasterAddress(master);
+
+ if (ri->link->disconnected) return C_ERR;
+
+ /* Use the specified announce address if specified, otherwise try to
+ * obtain our own IP address. */
+ if (sentinel.announce_ip) {
+ announce_ip = sentinel.announce_ip;
+ } else {
+ if (anetFdToString(ri->link->cc->c.fd,ip,sizeof(ip),NULL,0) == -1)
+ return C_ERR;
+ announce_ip = ip;
+ }
+ if (sentinel.announce_port) announce_port = sentinel.announce_port;
+ else if (server.tls_replication && server.tls_port) announce_port = server.tls_port;
+ else announce_port = server.port;
+
+ /* Format and send the Hello message. */
+ snprintf(payload,sizeof(payload),
+ "%s,%d,%s,%llu," /* Info about this sentinel. */
+ "%s,%s,%d,%llu", /* Info about current master. */
+ announce_ip, announce_port, sentinel.myid,
+ (unsigned long long) sentinel.current_epoch,
+ /* --- */
+ master->name,announceSentinelAddr(master_addr),master_addr->port,
+ (unsigned long long) master->config_epoch);
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelPublishReplyCallback, ri, "%s %s %s",
+ sentinelInstanceMapCommand(ri,"PUBLISH"),
+ SENTINEL_HELLO_CHANNEL,payload);
+ if (retval != C_OK) return C_ERR;
+ ri->link->pending_commands++;
+ return C_OK;
+}
+
+/* Reset last_pub_time in all the instances in the specified dictionary
+ * in order to force the delivery of a Hello update ASAP. */
+void sentinelForceHelloUpdateDictOfRedisInstances(dict *instances) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(instances);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ if (ri->last_pub_time >= (sentinel_publish_period+1))
+ ri->last_pub_time -= (sentinel_publish_period+1);
+ }
+ dictReleaseIterator(di);
+}
+
+/* This function forces the delivery of a "Hello" message (see
+ * sentinelSendHello() top comment for further information) to all the Redis
+ * and Sentinel instances related to the specified 'master'.
+ *
+ * It is technically not needed since we send an update to every instance
+ * with a period of SENTINEL_PUBLISH_PERIOD milliseconds, however when a
+ * Sentinel upgrades a configuration it is a good idea to deliver an update
+ * to the other Sentinels ASAP. */
+int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master) {
+ if (!(master->flags & SRI_MASTER)) return C_ERR;
+ if (master->last_pub_time >= (sentinel_publish_period+1))
+ master->last_pub_time -= (sentinel_publish_period+1);
+ sentinelForceHelloUpdateDictOfRedisInstances(master->sentinels);
+ sentinelForceHelloUpdateDictOfRedisInstances(master->slaves);
+ return C_OK;
+}
+
+/* Send a PING to the specified instance and refresh the act_ping_time
+ * if it is zero (that is, if we received a pong for the previous ping).
+ *
+ * On error zero is returned, and we can't consider the PING command
+ * queued in the connection. */
+int sentinelSendPing(sentinelRedisInstance *ri) {
+ int retval = redisAsyncCommand(ri->link->cc,
+ sentinelPingReplyCallback, ri, "%s",
+ sentinelInstanceMapCommand(ri,"PING"));
+ if (retval == C_OK) {
+ ri->link->pending_commands++;
+ ri->link->last_ping_time = mstime();
+ /* We update the active ping time only if we received the pong for
+ * the previous ping, otherwise we are technically waiting since the
+ * first ping that did not receive a reply. */
+ if (ri->link->act_ping_time == 0)
+ ri->link->act_ping_time = ri->link->last_ping_time;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Send periodic PING, INFO, and PUBLISH to the Hello channel to
+ * the specified master or slave instance. */
+void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
+ mstime_t now = mstime();
+ mstime_t info_period, ping_period;
+ int retval;
+
+ /* Return ASAP if we have already a PING or INFO already pending, or
+ * in the case the instance is not properly connected. */
+ if (ri->link->disconnected) return;
+
+ /* For INFO, PING, PUBLISH that are not critical commands to send we
+ * also have a limit of SENTINEL_MAX_PENDING_COMMANDS. We don't
+ * want to use a lot of memory just because a link is not working
+ * properly (note that anyway there is a redundant protection about this,
+ * that is, the link will be disconnected and reconnected if a long
+ * timeout condition is detected. */
+ if (ri->link->pending_commands >=
+ SENTINEL_MAX_PENDING_COMMANDS * ri->link->refcount) return;
+
+ /* If this is a slave of a master in O_DOWN condition we start sending
+ * it INFO every second, instead of the usual SENTINEL_INFO_PERIOD
+ * period. In this state we want to closely monitor slaves in case they
+ * are turned into masters by another Sentinel, or by the sysadmin.
+ *
+ * Similarly we monitor the INFO output more often if the slave reports
+ * to be disconnected from the master, so that we can have a fresh
+ * disconnection time figure. */
+ if ((ri->flags & SRI_SLAVE) &&
+ ((ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS)) ||
+ (ri->master_link_down_time != 0)))
+ {
+ info_period = 1000;
+ } else {
+ info_period = sentinel_info_period;
+ }
+
+ /* We ping instances every time the last received pong is older than
+ * the configured 'down-after-milliseconds' time, but every second
+ * anyway if 'down-after-milliseconds' is greater than 1 second. */
+ ping_period = ri->down_after_period;
+ if (ping_period > sentinel_ping_period) ping_period = sentinel_ping_period;
+
+ /* Send INFO to masters and slaves, not sentinels. */
+ if ((ri->flags & SRI_SENTINEL) == 0 &&
+ (ri->info_refresh == 0 ||
+ (now - ri->info_refresh) > info_period))
+ {
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelInfoReplyCallback, ri, "%s",
+ sentinelInstanceMapCommand(ri,"INFO"));
+ if (retval == C_OK) ri->link->pending_commands++;
+ }
+
+ /* Send PING to all the three kinds of instances. */
+ if ((now - ri->link->last_pong_time) > ping_period &&
+ (now - ri->link->last_ping_time) > ping_period/2) {
+ sentinelSendPing(ri);
+ }
+
+ /* PUBLISH hello messages to all the three kinds of instances. */
+ if ((now - ri->last_pub_time) > sentinel_publish_period) {
+ sentinelSendHello(ri);
+ }
+}
+
+/* =========================== SENTINEL command ============================= */
+static void populateDict(dict *options_dict, char **options) {
+ for (int i=0; options[i]; i++) {
+ sds option = sdsnew(options[i]);
+ if (dictAdd(options_dict, option, NULL)==DICT_ERR)
+ sdsfree(option);
+ }
+}
+
+const char* getLogLevel(void) {
+ switch (server.verbosity) {
+ case LL_DEBUG: return "debug";
+ case LL_VERBOSE: return "verbose";
+ case LL_NOTICE: return "notice";
+ case LL_WARNING: return "warning";
+ case LL_NOTHING: return "nothing";
+ }
+ return "unknown";
+}
+
+/* SENTINEL CONFIG SET option value [option value ...] */
+void sentinelConfigSetCommand(client *c) {
+ long long numval;
+ int drop_conns = 0;
+ char *option;
+ robj *val;
+ char *options[] = {
+ "announce-ip",
+ "sentinel-user",
+ "sentinel-pass",
+ "resolve-hostnames",
+ "announce-port",
+ "announce-hostnames",
+ "loglevel",
+ NULL};
+ static dict *options_dict = NULL;
+ if (!options_dict) {
+ options_dict = dictCreate(&stringSetDictType);
+ populateDict(options_dict, options);
+ }
+ dict *set_configs = dictCreate(&stringSetDictType);
+
+ /* Validate arguments are valid */
+ for (int i = 3; i < c->argc; i++) {
+ option = c->argv[i]->ptr;
+
+ /* Validate option is valid */
+ if (dictFind(options_dict, option) == NULL) {
+ addReplyErrorFormat(c, "Invalid argument '%s' to SENTINEL CONFIG SET", option);
+ goto exit;
+ }
+
+ /* Check duplicates */
+ if (dictFind(set_configs, option) != NULL) {
+ addReplyErrorFormat(c, "Duplicate argument '%s' to SENTINEL CONFIG SET", option);
+ goto exit;
+ }
+
+ serverAssert(dictAdd(set_configs, sdsnew(option), NULL) == C_OK);
+
+ /* Validate argument */
+ if (i + 1 == c->argc) {
+ addReplyErrorFormat(c, "Missing argument '%s' value", option);
+ goto exit;
+ }
+ val = c->argv[++i];
+
+ if (!strcasecmp(option, "resolve-hostnames")) {
+ if ((yesnotoi(val->ptr)) == -1) goto badfmt;
+ } else if (!strcasecmp(option, "announce-hostnames")) {
+ if ((yesnotoi(val->ptr)) == -1) goto badfmt;
+ } else if (!strcasecmp(option, "announce-port")) {
+ if (getLongLongFromObject(val, &numval) == C_ERR ||
+ numval < 0 || numval > 65535) goto badfmt;
+ } else if (!strcasecmp(option, "loglevel")) {
+ if (!(!strcasecmp(val->ptr, "debug") || !strcasecmp(val->ptr, "verbose") ||
+ !strcasecmp(val->ptr, "notice") || !strcasecmp(val->ptr, "warning") ||
+ !strcasecmp(val->ptr, "nothing"))) goto badfmt;
+ }
+ }
+
+ /* Apply changes */
+ for (int i = 3; i < c->argc; i++) {
+ int moreargs = (c->argc-1) - i;
+ option = c->argv[i]->ptr;
+ if (!strcasecmp(option, "loglevel") && moreargs > 0) {
+ val = c->argv[++i];
+ if (!strcasecmp(val->ptr, "debug"))
+ server.verbosity = LL_DEBUG;
+ else if (!strcasecmp(val->ptr, "verbose"))
+ server.verbosity = LL_VERBOSE;
+ else if (!strcasecmp(val->ptr, "notice"))
+ server.verbosity = LL_NOTICE;
+ else if (!strcasecmp(val->ptr, "warning"))
+ server.verbosity = LL_WARNING;
+ else if (!strcasecmp(val->ptr, "nothing"))
+ server.verbosity = LL_NOTHING;
+ } else if (!strcasecmp(option, "resolve-hostnames") && moreargs > 0) {
+ val = c->argv[++i];
+ numval = yesnotoi(val->ptr);
+ sentinel.resolve_hostnames = numval;
+ } else if (!strcasecmp(option, "announce-hostnames") && moreargs > 0) {
+ val = c->argv[++i];
+ numval = yesnotoi(val->ptr);
+ sentinel.announce_hostnames = numval;
+ } else if (!strcasecmp(option, "announce-ip") && moreargs > 0) {
+ val = c->argv[++i];
+ if (sentinel.announce_ip) sdsfree(sentinel.announce_ip);
+ sentinel.announce_ip = sdsnew(val->ptr);
+ } else if (!strcasecmp(option, "announce-port") && moreargs > 0) {
+ val = c->argv[++i];
+ getLongLongFromObject(val, &numval);
+ sentinel.announce_port = numval;
+ } else if (!strcasecmp(option, "sentinel-user") && moreargs > 0) {
+ val = c->argv[++i];
+ sdsfree(sentinel.sentinel_auth_user);
+ sentinel.sentinel_auth_user = sdslen(val->ptr) == 0 ?
+ NULL : sdsdup(val->ptr);
+ drop_conns = 1;
+ } else if (!strcasecmp(option, "sentinel-pass") && moreargs > 0) {
+ val = c->argv[++i];
+ sdsfree(sentinel.sentinel_auth_pass);
+ sentinel.sentinel_auth_pass = sdslen(val->ptr) == 0 ?
+ NULL : sdsdup(val->ptr);
+ drop_conns = 1;
+ } else {
+ /* Should never reach here */
+ serverAssert(0);
+ }
+ }
+
+ sentinelFlushConfigAndReply(c);
+
+ /* Drop Sentinel connections to initiate a reconnect if needed. */
+ if (drop_conns)
+ sentinelDropConnections();
+
+exit:
+ dictRelease(set_configs);
+ return;
+
+badfmt:
+ addReplyErrorFormat(c, "Invalid value '%s' to SENTINEL CONFIG SET '%s'",
+ (char *) val->ptr, option);
+ dictRelease(set_configs);
+}
+
+/* SENTINEL CONFIG GET <option> [<option> ...] */
+void sentinelConfigGetCommand(client *c) {
+ char *pattern;
+ void *replylen = addReplyDeferredLen(c);
+ int matches = 0;
+ /* Create a dictionary to store the input configs,to avoid adding duplicate twice */
+ dict *d = dictCreate(&externalStringType);
+ for (int i = 3; i < c->argc; i++) {
+ pattern = c->argv[i]->ptr;
+ /* If the string doesn't contain glob patterns and available in dictionary, don't look further, just continue. */
+ if (!strpbrk(pattern, "[*?") && dictFind(d, pattern)) continue;
+ /* we want to print all the matched patterns and avoid printing duplicates twice */
+ if (stringmatch(pattern,"resolve-hostnames",1) && !dictFind(d, "resolve-hostnames")) {
+ addReplyBulkCString(c,"resolve-hostnames");
+ addReplyBulkCString(c,sentinel.resolve_hostnames ? "yes" : "no");
+ dictAdd(d, "resolve-hostnames", NULL);
+ matches++;
+ }
+ if (stringmatch(pattern, "announce-hostnames", 1) && !dictFind(d, "announce-hostnames")) {
+ addReplyBulkCString(c,"announce-hostnames");
+ addReplyBulkCString(c,sentinel.announce_hostnames ? "yes" : "no");
+ dictAdd(d, "announce-hostnames", NULL);
+ matches++;
+ }
+ if (stringmatch(pattern, "announce-ip", 1) && !dictFind(d, "announce-ip")) {
+ addReplyBulkCString(c,"announce-ip");
+ addReplyBulkCString(c,sentinel.announce_ip ? sentinel.announce_ip : "");
+ dictAdd(d, "announce-ip", NULL);
+ matches++;
+ }
+ if (stringmatch(pattern, "announce-port", 1) && !dictFind(d, "announce-port")) {
+ addReplyBulkCString(c, "announce-port");
+ addReplyBulkLongLong(c, sentinel.announce_port);
+ dictAdd(d, "announce-port", NULL);
+ matches++;
+ }
+ if (stringmatch(pattern, "sentinel-user", 1) && !dictFind(d, "sentinel-user")) {
+ addReplyBulkCString(c, "sentinel-user");
+ addReplyBulkCString(c, sentinel.sentinel_auth_user ? sentinel.sentinel_auth_user : "");
+ dictAdd(d, "sentinel-user", NULL);
+ matches++;
+ }
+ if (stringmatch(pattern, "sentinel-pass", 1) && !dictFind(d, "sentinel-pass")) {
+ addReplyBulkCString(c, "sentinel-pass");
+ addReplyBulkCString(c, sentinel.sentinel_auth_pass ? sentinel.sentinel_auth_pass : "");
+ dictAdd(d, "sentinel-pass", NULL);
+ matches++;
+ }
+ if (stringmatch(pattern, "loglevel", 1) && !dictFind(d, "loglevel")) {
+ addReplyBulkCString(c, "loglevel");
+ addReplyBulkCString(c, getLogLevel());
+ dictAdd(d, "loglevel", NULL);
+ matches++;
+ }
+ }
+ dictRelease(d);
+ setDeferredMapLen(c, replylen, matches);
+}
+
+const char *sentinelFailoverStateStr(int state) {
+ switch(state) {
+ case SENTINEL_FAILOVER_STATE_NONE: return "none";
+ case SENTINEL_FAILOVER_STATE_WAIT_START: return "wait_start";
+ case SENTINEL_FAILOVER_STATE_SELECT_SLAVE: return "select_slave";
+ case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE: return "send_slaveof_noone";
+ case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION: return "wait_promotion";
+ case SENTINEL_FAILOVER_STATE_RECONF_SLAVES: return "reconf_slaves";
+ case SENTINEL_FAILOVER_STATE_UPDATE_CONFIG: return "update_config";
+ default: return "unknown";
+ }
+}
+
+/* Redis instance to Redis protocol representation. */
+void addReplySentinelRedisInstance(client *c, sentinelRedisInstance *ri) {
+ char *flags = sdsempty();
+ void *mbl;
+ int fields = 0;
+
+ mbl = addReplyDeferredLen(c);
+
+ addReplyBulkCString(c,"name");
+ addReplyBulkCString(c,ri->name);
+ fields++;
+
+ addReplyBulkCString(c,"ip");
+ addReplyBulkCString(c,announceSentinelAddr(ri->addr));
+ fields++;
+
+ addReplyBulkCString(c,"port");
+ addReplyBulkLongLong(c,ri->addr->port);
+ fields++;
+
+ addReplyBulkCString(c,"runid");
+ addReplyBulkCString(c,ri->runid ? ri->runid : "");
+ fields++;
+
+ addReplyBulkCString(c,"flags");
+ if (ri->flags & SRI_S_DOWN) flags = sdscat(flags,"s_down,");
+ if (ri->flags & SRI_O_DOWN) flags = sdscat(flags,"o_down,");
+ if (ri->flags & SRI_MASTER) flags = sdscat(flags,"master,");
+ if (ri->flags & SRI_SLAVE) flags = sdscat(flags,"slave,");
+ if (ri->flags & SRI_SENTINEL) flags = sdscat(flags,"sentinel,");
+ if (ri->link->disconnected) flags = sdscat(flags,"disconnected,");
+ if (ri->flags & SRI_MASTER_DOWN) flags = sdscat(flags,"master_down,");
+ if (ri->flags & SRI_FAILOVER_IN_PROGRESS)
+ flags = sdscat(flags,"failover_in_progress,");
+ if (ri->flags & SRI_PROMOTED) flags = sdscat(flags,"promoted,");
+ if (ri->flags & SRI_RECONF_SENT) flags = sdscat(flags,"reconf_sent,");
+ if (ri->flags & SRI_RECONF_INPROG) flags = sdscat(flags,"reconf_inprog,");
+ if (ri->flags & SRI_RECONF_DONE) flags = sdscat(flags,"reconf_done,");
+ if (ri->flags & SRI_FORCE_FAILOVER) flags = sdscat(flags,"force_failover,");
+ if (ri->flags & SRI_SCRIPT_KILL_SENT) flags = sdscat(flags,"script_kill_sent,");
+ if (ri->flags & SRI_MASTER_REBOOT) flags = sdscat(flags,"master_reboot,");
+
+ if (sdslen(flags) != 0) sdsrange(flags,0,-2); /* remove last "," */
+ addReplyBulkCString(c,flags);
+ sdsfree(flags);
+ fields++;
+
+ addReplyBulkCString(c,"link-pending-commands");
+ addReplyBulkLongLong(c,ri->link->pending_commands);
+ fields++;
+
+ addReplyBulkCString(c,"link-refcount");
+ addReplyBulkLongLong(c,ri->link->refcount);
+ fields++;
+
+ if (ri->flags & SRI_FAILOVER_IN_PROGRESS) {
+ addReplyBulkCString(c,"failover-state");
+ addReplyBulkCString(c,(char*)sentinelFailoverStateStr(ri->failover_state));
+ fields++;
+ }
+
+ addReplyBulkCString(c,"last-ping-sent");
+ addReplyBulkLongLong(c,
+ ri->link->act_ping_time ? (mstime() - ri->link->act_ping_time) : 0);
+ fields++;
+
+ addReplyBulkCString(c,"last-ok-ping-reply");
+ addReplyBulkLongLong(c,mstime() - ri->link->last_avail_time);
+ fields++;
+
+ addReplyBulkCString(c,"last-ping-reply");
+ addReplyBulkLongLong(c,mstime() - ri->link->last_pong_time);
+ fields++;
+
+ if (ri->flags & SRI_S_DOWN) {
+ addReplyBulkCString(c,"s-down-time");
+ addReplyBulkLongLong(c,mstime()-ri->s_down_since_time);
+ fields++;
+ }
+
+ if (ri->flags & SRI_O_DOWN) {
+ addReplyBulkCString(c,"o-down-time");
+ addReplyBulkLongLong(c,mstime()-ri->o_down_since_time);
+ fields++;
+ }
+
+ addReplyBulkCString(c,"down-after-milliseconds");
+ addReplyBulkLongLong(c,ri->down_after_period);
+ fields++;
+
+ /* Masters and Slaves */
+ if (ri->flags & (SRI_MASTER|SRI_SLAVE)) {
+ addReplyBulkCString(c,"info-refresh");
+ addReplyBulkLongLong(c,
+ ri->info_refresh ? (mstime() - ri->info_refresh) : 0);
+ fields++;
+
+ addReplyBulkCString(c,"role-reported");
+ addReplyBulkCString(c, (ri->role_reported == SRI_MASTER) ? "master" :
+ "slave");
+ fields++;
+
+ addReplyBulkCString(c,"role-reported-time");
+ addReplyBulkLongLong(c,mstime() - ri->role_reported_time);
+ fields++;
+ }
+
+ /* Only masters */
+ if (ri->flags & SRI_MASTER) {
+ addReplyBulkCString(c,"config-epoch");
+ addReplyBulkLongLong(c,ri->config_epoch);
+ fields++;
+
+ addReplyBulkCString(c,"num-slaves");
+ addReplyBulkLongLong(c,dictSize(ri->slaves));
+ fields++;
+
+ addReplyBulkCString(c,"num-other-sentinels");
+ addReplyBulkLongLong(c,dictSize(ri->sentinels));
+ fields++;
+
+ addReplyBulkCString(c,"quorum");
+ addReplyBulkLongLong(c,ri->quorum);
+ fields++;
+
+ addReplyBulkCString(c,"failover-timeout");
+ addReplyBulkLongLong(c,ri->failover_timeout);
+ fields++;
+
+ addReplyBulkCString(c,"parallel-syncs");
+ addReplyBulkLongLong(c,ri->parallel_syncs);
+ fields++;
+
+ if (ri->notification_script) {
+ addReplyBulkCString(c,"notification-script");
+ addReplyBulkCString(c,ri->notification_script);
+ fields++;
+ }
+
+ if (ri->client_reconfig_script) {
+ addReplyBulkCString(c,"client-reconfig-script");
+ addReplyBulkCString(c,ri->client_reconfig_script);
+ fields++;
+ }
+ }
+
+ /* Only slaves */
+ if (ri->flags & SRI_SLAVE) {
+ addReplyBulkCString(c,"master-link-down-time");
+ addReplyBulkLongLong(c,ri->master_link_down_time);
+ fields++;
+
+ addReplyBulkCString(c,"master-link-status");
+ addReplyBulkCString(c,
+ (ri->slave_master_link_status == SENTINEL_MASTER_LINK_STATUS_UP) ?
+ "ok" : "err");
+ fields++;
+
+ addReplyBulkCString(c,"master-host");
+ addReplyBulkCString(c,
+ ri->slave_master_host ? ri->slave_master_host : "?");
+ fields++;
+
+ addReplyBulkCString(c,"master-port");
+ addReplyBulkLongLong(c,ri->slave_master_port);
+ fields++;
+
+ addReplyBulkCString(c,"slave-priority");
+ addReplyBulkLongLong(c,ri->slave_priority);
+ fields++;
+
+ addReplyBulkCString(c,"slave-repl-offset");
+ addReplyBulkLongLong(c,ri->slave_repl_offset);
+ fields++;
+
+ addReplyBulkCString(c,"replica-announced");
+ addReplyBulkLongLong(c,ri->replica_announced);
+ fields++;
+ }
+
+ /* Only sentinels */
+ if (ri->flags & SRI_SENTINEL) {
+ addReplyBulkCString(c,"last-hello-message");
+ addReplyBulkLongLong(c,mstime() - ri->last_hello_time);
+ fields++;
+
+ addReplyBulkCString(c,"voted-leader");
+ addReplyBulkCString(c,ri->leader ? ri->leader : "?");
+ fields++;
+
+ addReplyBulkCString(c,"voted-leader-epoch");
+ addReplyBulkLongLong(c,ri->leader_epoch);
+ fields++;
+ }
+
+ setDeferredMapLen(c,mbl,fields);
+}
+
+void sentinelSetDebugConfigParameters(client *c){
+ int j;
+ int badarg = 0; /* Bad argument position for error reporting. */
+ char *option;
+
+ /* Process option - value pairs. */
+ for (j = 2; j < c->argc; j++) {
+ int moreargs = (c->argc-1) - j;
+ option = c->argv[j]->ptr;
+ long long ll;
+
+ if (!strcasecmp(option,"info-period") && moreargs > 0) {
+ /* info-period <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_info_period = ll;
+
+ } else if (!strcasecmp(option,"ping-period") && moreargs > 0) {
+ /* ping-period <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_ping_period = ll;
+
+ } else if (!strcasecmp(option,"ask-period") && moreargs > 0) {
+ /* ask-period <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_ask_period = ll;
+
+ } else if (!strcasecmp(option,"publish-period") && moreargs > 0) {
+ /* publish-period <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_publish_period = ll;
+
+ } else if (!strcasecmp(option,"default-down-after") && moreargs > 0) {
+ /* default-down-after <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_default_down_after = ll;
+
+ } else if (!strcasecmp(option,"tilt-trigger") && moreargs > 0) {
+ /* tilt-trigger <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_tilt_trigger = ll;
+
+ } else if (!strcasecmp(option,"tilt-period") && moreargs > 0) {
+ /* tilt-period <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_tilt_period = ll;
+
+ } else if (!strcasecmp(option,"slave-reconf-timeout") && moreargs > 0) {
+ /* slave-reconf-timeout <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_slave_reconf_timeout = ll;
+
+ } else if (!strcasecmp(option,"min-link-reconnect-period") && moreargs > 0) {
+ /* min-link-reconnect-period <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_min_link_reconnect_period = ll;
+
+ } else if (!strcasecmp(option,"default-failover-timeout") && moreargs > 0) {
+ /* default-failover-timeout <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_default_failover_timeout = ll;
+
+ } else if (!strcasecmp(option,"election-timeout") && moreargs > 0) {
+ /* election-timeout <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_election_timeout = ll;
+
+ } else if (!strcasecmp(option,"script-max-runtime") && moreargs > 0) {
+ /* script-max-runtime <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_script_max_runtime = ll;
+
+ } else if (!strcasecmp(option,"script-retry-delay") && moreargs > 0) {
+ /* script-retry-delay <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ sentinel_script_retry_delay = ll;
+
+ } else {
+ addReplyErrorFormat(c,"Unknown option or number of arguments for "
+ "SENTINEL DEBUG '%s'", option);
+ return;
+ }
+ }
+
+ addReply(c,shared.ok);
+ return;
+
+badfmt: /* Bad format errors */
+ addReplyErrorFormat(c,"Invalid argument '%s' for SENTINEL DEBUG '%s'",
+ (char*)c->argv[badarg]->ptr,option);
+
+ return;
+}
+
+void addReplySentinelDebugInfo(client *c) {
+ void *mbl;
+ int fields = 0;
+
+ mbl = addReplyDeferredLen(c);
+
+ addReplyBulkCString(c,"INFO-PERIOD");
+ addReplyBulkLongLong(c,sentinel_info_period);
+ fields++;
+
+ addReplyBulkCString(c,"PING-PERIOD");
+ addReplyBulkLongLong(c,sentinel_ping_period);
+ fields++;
+
+ addReplyBulkCString(c,"ASK-PERIOD");
+ addReplyBulkLongLong(c,sentinel_ask_period);
+ fields++;
+
+ addReplyBulkCString(c,"PUBLISH-PERIOD");
+ addReplyBulkLongLong(c,sentinel_publish_period);
+ fields++;
+
+ addReplyBulkCString(c,"DEFAULT-DOWN-AFTER");
+ addReplyBulkLongLong(c,sentinel_default_down_after);
+ fields++;
+
+ addReplyBulkCString(c,"DEFAULT-FAILOVER-TIMEOUT");
+ addReplyBulkLongLong(c,sentinel_default_failover_timeout);
+ fields++;
+
+ addReplyBulkCString(c,"TILT-TRIGGER");
+ addReplyBulkLongLong(c,sentinel_tilt_trigger);
+ fields++;
+
+ addReplyBulkCString(c,"TILT-PERIOD");
+ addReplyBulkLongLong(c,sentinel_tilt_period);
+ fields++;
+
+ addReplyBulkCString(c,"SLAVE-RECONF-TIMEOUT");
+ addReplyBulkLongLong(c,sentinel_slave_reconf_timeout);
+ fields++;
+
+ addReplyBulkCString(c,"MIN-LINK-RECONNECT-PERIOD");
+ addReplyBulkLongLong(c,sentinel_min_link_reconnect_period);
+ fields++;
+
+ addReplyBulkCString(c,"ELECTION-TIMEOUT");
+ addReplyBulkLongLong(c,sentinel_election_timeout);
+ fields++;
+
+ addReplyBulkCString(c,"SCRIPT-MAX-RUNTIME");
+ addReplyBulkLongLong(c,sentinel_script_max_runtime);
+ fields++;
+
+ addReplyBulkCString(c,"SCRIPT-RETRY-DELAY");
+ addReplyBulkLongLong(c,sentinel_script_retry_delay);
+ fields++;
+
+ setDeferredMapLen(c,mbl,fields);
+}
+
+/* Output a number of instances contained inside a dictionary as
+ * Redis protocol. */
+void addReplyDictOfRedisInstances(client *c, dict *instances) {
+ dictIterator *di;
+ dictEntry *de;
+ long slaves = 0;
+ void *replylen = addReplyDeferredLen(c);
+
+ di = dictGetIterator(instances);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ /* don't announce unannounced replicas */
+ if (ri->flags & SRI_SLAVE && !ri->replica_announced) continue;
+ addReplySentinelRedisInstance(c,ri);
+ slaves++;
+ }
+ dictReleaseIterator(di);
+ setDeferredArrayLen(c, replylen, slaves);
+}
+
+/* Lookup the named master into sentinel.masters.
+ * If the master is not found reply to the client with an error and returns
+ * NULL. */
+sentinelRedisInstance *sentinelGetMasterByNameOrReplyError(client *c,
+ robj *name)
+{
+ sentinelRedisInstance *ri;
+
+ ri = dictFetchValue(sentinel.masters,name->ptr);
+ if (!ri) {
+ addReplyError(c,"No such master with that name");
+ return NULL;
+ }
+ return ri;
+}
+
+#define SENTINEL_ISQR_OK 0
+#define SENTINEL_ISQR_NOQUORUM (1<<0)
+#define SENTINEL_ISQR_NOAUTH (1<<1)
+int sentinelIsQuorumReachable(sentinelRedisInstance *master, int *usableptr) {
+ dictIterator *di;
+ dictEntry *de;
+ int usable = 1; /* Number of usable Sentinels. Init to 1 to count myself. */
+ int result = SENTINEL_ISQR_OK;
+ int voters = dictSize(master->sentinels)+1; /* Known Sentinels + myself. */
+
+ di = dictGetIterator(master->sentinels);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ if (ri->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;
+ usable++;
+ }
+ dictReleaseIterator(di);
+
+ if (usable < (int)master->quorum) result |= SENTINEL_ISQR_NOQUORUM;
+ if (usable < voters/2+1) result |= SENTINEL_ISQR_NOAUTH;
+ if (usableptr) *usableptr = usable;
+ return result;
+}
+
+void sentinelCommand(client *c) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"CKQUORUM <master-name>",
+" Check if the current Sentinel configuration is able to reach the quorum",
+" needed to failover a master and the majority needed to authorize the",
+" failover.",
+"CONFIG SET param value [param value ...]",
+" Set a global Sentinel configuration parameter.",
+"CONFIG GET <param> [param param param ...]",
+" Get global Sentinel configuration parameter.",
+"DEBUG [<param> <value> ...]",
+" Show a list of configurable time parameters and their values (milliseconds).",
+" Or update current configurable parameters values (one or more).",
+"GET-MASTER-ADDR-BY-NAME <master-name>",
+" Return the ip and port number of the master with that name.",
+"FAILOVER <master-name>",
+" Manually failover a master node without asking for agreement from other",
+" Sentinels",
+"FLUSHCONFIG",
+" Force Sentinel to rewrite its configuration on disk, including the current",
+" Sentinel state.",
+"INFO-CACHE <master-name>",
+" Return last cached INFO output from masters and all its replicas.",
+"IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>",
+" Check if the master specified by ip:port is down from current Sentinel's",
+" point of view.",
+"MASTER <master-name>",
+" Show the state and info of the specified master.",
+"MASTERS",
+" Show a list of monitored masters and their state.",
+"MONITOR <name> <ip> <port> <quorum>",
+" Start monitoring a new master with the specified name, ip, port and quorum.",
+"MYID",
+" Return the ID of the Sentinel instance.",
+"PENDING-SCRIPTS",
+" Get pending scripts information.",
+"REMOVE <master-name>",
+" Remove master from Sentinel's monitor list.",
+"REPLICAS <master-name>",
+" Show a list of replicas for this master and their state.",
+"RESET <pattern>",
+" Reset masters for specific master name matching this pattern.",
+"SENTINELS <master-name>",
+" Show a list of Sentinel instances for this master and their state.",
+"SET <master-name> <option> <value> [<option> <value> ...]",
+" Set configuration parameters for certain masters.",
+"SIMULATE-FAILURE [CRASH-AFTER-ELECTION] [CRASH-AFTER-PROMOTION] [HELP]",
+" Simulate a Sentinel crash.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"masters")) {
+ /* SENTINEL MASTERS */
+ if (c->argc != 2) goto numargserr;
+ addReplyDictOfRedisInstances(c,sentinel.masters);
+ } else if (!strcasecmp(c->argv[1]->ptr,"master")) {
+ /* SENTINEL MASTER <name> */
+ sentinelRedisInstance *ri;
+
+ if (c->argc != 3) goto numargserr;
+ if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
+ == NULL) return;
+ addReplySentinelRedisInstance(c,ri);
+ } else if (!strcasecmp(c->argv[1]->ptr,"slaves") ||
+ !strcasecmp(c->argv[1]->ptr,"replicas"))
+ {
+ /* SENTINEL REPLICAS <master-name> */
+ sentinelRedisInstance *ri;
+
+ if (c->argc != 3) goto numargserr;
+ if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2])) == NULL)
+ return;
+ addReplyDictOfRedisInstances(c,ri->slaves);
+ } else if (!strcasecmp(c->argv[1]->ptr,"sentinels")) {
+ /* SENTINEL SENTINELS <master-name> */
+ sentinelRedisInstance *ri;
+
+ if (c->argc != 3) goto numargserr;
+ if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2])) == NULL)
+ return;
+ addReplyDictOfRedisInstances(c,ri->sentinels);
+ } else if (!strcasecmp(c->argv[1]->ptr,"myid") && c->argc == 2) {
+ /* SENTINEL MYID */
+ addReplyBulkCBuffer(c,sentinel.myid,CONFIG_RUN_ID_SIZE);
+ } else if (!strcasecmp(c->argv[1]->ptr,"is-master-down-by-addr")) {
+ /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>
+ *
+ * Arguments:
+ *
+ * ip and port are the ip and port of the master we want to be
+ * checked by Sentinel. Note that the command will not check by
+ * name but just by master, in theory different Sentinels may monitor
+ * different masters with the same name.
+ *
+ * current-epoch is needed in order to understand if we are allowed
+ * to vote for a failover leader or not. Each Sentinel can vote just
+ * one time per epoch.
+ *
+ * runid is "*" if we are not seeking for a vote from the Sentinel
+ * in order to elect the failover leader. Otherwise it is set to the
+ * runid we want the Sentinel to vote if it did not already voted.
+ */
+ sentinelRedisInstance *ri;
+ long long req_epoch;
+ uint64_t leader_epoch = 0;
+ char *leader = NULL;
+ long port;
+ int isdown = 0;
+
+ if (c->argc != 6) goto numargserr;
+ if (getLongFromObjectOrReply(c,c->argv[3],&port,NULL) != C_OK ||
+ getLongLongFromObjectOrReply(c,c->argv[4],&req_epoch,NULL)
+ != C_OK)
+ return;
+ ri = getSentinelRedisInstanceByAddrAndRunID(sentinel.masters,
+ c->argv[2]->ptr,port,NULL);
+
+ /* It exists? Is actually a master? Is subjectively down? It's down.
+ * Note: if we are in tilt mode we always reply with "0". */
+ if (!sentinel.tilt && ri && (ri->flags & SRI_S_DOWN) &&
+ (ri->flags & SRI_MASTER))
+ isdown = 1;
+
+ /* Vote for the master (or fetch the previous vote) if the request
+ * includes a runid, otherwise the sender is not seeking for a vote. */
+ if (ri && ri->flags & SRI_MASTER && strcasecmp(c->argv[5]->ptr,"*")) {
+ leader = sentinelVoteLeader(ri,(uint64_t)req_epoch,
+ c->argv[5]->ptr,
+ &leader_epoch);
+ }
+
+ /* Reply with a three-elements multi-bulk reply:
+ * down state, leader, vote epoch. */
+ addReplyArrayLen(c,3);
+ addReply(c, isdown ? shared.cone : shared.czero);
+ addReplyBulkCString(c, leader ? leader : "*");
+ addReplyLongLong(c, (long long)leader_epoch);
+ if (leader) sdsfree(leader);
+ } else if (!strcasecmp(c->argv[1]->ptr,"reset")) {
+ /* SENTINEL RESET <pattern> */
+ if (c->argc != 3) goto numargserr;
+ addReplyLongLong(c,sentinelResetMastersByPattern(c->argv[2]->ptr,SENTINEL_GENERATE_EVENT));
+ } else if (!strcasecmp(c->argv[1]->ptr,"get-master-addr-by-name")) {
+ /* SENTINEL GET-MASTER-ADDR-BY-NAME <master-name> */
+ sentinelRedisInstance *ri;
+
+ if (c->argc != 3) goto numargserr;
+ ri = sentinelGetMasterByName(c->argv[2]->ptr);
+ if (ri == NULL) {
+ addReplyNullArray(c);
+ } else {
+ sentinelAddr *addr = sentinelGetCurrentMasterAddress(ri);
+
+ addReplyArrayLen(c,2);
+ addReplyBulkCString(c,announceSentinelAddr(addr));
+ addReplyBulkLongLong(c,addr->port);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"failover")) {
+ /* SENTINEL FAILOVER <master-name> */
+ sentinelRedisInstance *ri;
+
+ if (c->argc != 3) goto numargserr;
+ if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2])) == NULL)
+ return;
+ if (ri->flags & SRI_FAILOVER_IN_PROGRESS) {
+ addReplyError(c,"-INPROG Failover already in progress");
+ return;
+ }
+ if (sentinelSelectSlave(ri) == NULL) {
+ addReplyError(c,"-NOGOODSLAVE No suitable replica to promote");
+ return;
+ }
+ serverLog(LL_NOTICE,"Executing user requested FAILOVER of '%s'",
+ ri->name);
+ sentinelStartFailover(ri);
+ ri->flags |= SRI_FORCE_FAILOVER;
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"pending-scripts")) {
+ /* SENTINEL PENDING-SCRIPTS */
+
+ if (c->argc != 2) goto numargserr;
+ sentinelPendingScriptsCommand(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"monitor")) {
+ /* SENTINEL MONITOR <name> <ip> <port> <quorum> */
+ sentinelRedisInstance *ri;
+ long quorum, port;
+ char ip[NET_IP_STR_LEN];
+
+ if (c->argc != 6) goto numargserr;
+ if (getLongFromObjectOrReply(c,c->argv[5],&quorum,"Invalid quorum")
+ != C_OK) return;
+ if (getLongFromObjectOrReply(c,c->argv[4],&port,"Invalid port")
+ != C_OK) return;
+
+ if (quorum <= 0) {
+ addReplyError(c, "Quorum must be 1 or greater.");
+ return;
+ }
+
+ /* If resolve-hostnames is used, actual DNS resolution may take place.
+ * Otherwise just validate address.
+ */
+ if (anetResolve(NULL,c->argv[3]->ptr,ip,sizeof(ip),
+ sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR) {
+ addReplyError(c, "Invalid IP address or hostname specified");
+ return;
+ }
+
+ /* Parameters are valid. Try to create the master instance. */
+ ri = createSentinelRedisInstance(c->argv[2]->ptr,SRI_MASTER,
+ c->argv[3]->ptr,port,quorum,NULL);
+ if (ri == NULL) {
+ addReplyError(c,sentinelCheckCreateInstanceErrors(SRI_MASTER));
+ } else {
+ sentinelFlushConfigAndReply(c);
+ sentinelEvent(LL_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"flushconfig")) {
+ if (c->argc != 2) goto numargserr;
+ sentinelFlushConfigAndReply(c);
+ return;
+ } else if (!strcasecmp(c->argv[1]->ptr,"remove")) {
+ /* SENTINEL REMOVE <name> */
+ sentinelRedisInstance *ri;
+
+ if (c->argc != 3) goto numargserr;
+ if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
+ == NULL) return;
+ sentinelEvent(LL_WARNING,"-monitor",ri,"%@");
+ dictDelete(sentinel.masters,c->argv[2]->ptr);
+ sentinelFlushConfigAndReply(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"ckquorum")) {
+ /* SENTINEL CKQUORUM <name> */
+ sentinelRedisInstance *ri;
+ int usable;
+
+ if (c->argc != 3) goto numargserr;
+ if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
+ == NULL) return;
+ int result = sentinelIsQuorumReachable(ri,&usable);
+ if (result == SENTINEL_ISQR_OK) {
+ addReplySds(c, sdscatfmt(sdsempty(),
+ "+OK %i usable Sentinels. Quorum and failover authorization "
+ "can be reached\r\n",usable));
+ } else {
+ sds e = sdscatfmt(sdsempty(),
+ "-NOQUORUM %i usable Sentinels. ",usable);
+ if (result & SENTINEL_ISQR_NOQUORUM)
+ e = sdscat(e,"Not enough available Sentinels to reach the"
+ " specified quorum for this master");
+ if (result & SENTINEL_ISQR_NOAUTH) {
+ if (result & SENTINEL_ISQR_NOQUORUM) e = sdscat(e,". ");
+ e = sdscat(e, "Not enough available Sentinels to reach the"
+ " majority and authorize a failover");
+ }
+ addReplyErrorSds(c,e);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"set")) {
+ sentinelSetCommand(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"config")) {
+ if (c->argc < 4) goto numargserr;
+ if (!strcasecmp(c->argv[2]->ptr,"set") && c->argc >= 5)
+ sentinelConfigSetCommand(c);
+ else if (!strcasecmp(c->argv[2]->ptr,"get") && c->argc >= 4)
+ sentinelConfigGetCommand(c);
+ else
+ addReplyError(c, "Only SENTINEL CONFIG GET <param> [<param> <param> ...]/ SET <param> <value> [<param> <value> ...] are supported.");
+ } else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
+ /* SENTINEL INFO-CACHE <name> */
+ if (c->argc < 2) goto numargserr;
+ mstime_t now = mstime();
+
+ /* Create an ad-hoc dictionary type so that we can iterate
+ * a dictionary composed of just the master groups the user
+ * requested. */
+ dictType copy_keeper = instancesDictType;
+ copy_keeper.valDestructor = NULL;
+ dict *masters_local = sentinel.masters;
+ if (c->argc > 2) {
+ masters_local = dictCreate(&copy_keeper);
+
+ for (int i = 2; i < c->argc; i++) {
+ sentinelRedisInstance *ri;
+ ri = sentinelGetMasterByName(c->argv[i]->ptr);
+ if (!ri) continue; /* ignore non-existing names */
+ dictAdd(masters_local, ri->name, ri);
+ }
+ }
+
+ /* Reply format:
+ * 1) master name
+ * 2) 1) 1) info cached ms
+ * 2) info from master
+ * 2) 1) info cached ms
+ * 2) info from replica1
+ * ...
+ * 3) other master name
+ * ...
+ * ...
+ */
+ addReplyArrayLen(c,dictSize(masters_local) * 2);
+
+ dictIterator *di;
+ dictEntry *de;
+ di = dictGetIterator(masters_local);
+ while ((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ addReplyBulkCBuffer(c,ri->name,strlen(ri->name));
+ addReplyArrayLen(c,dictSize(ri->slaves) + 1); /* +1 for self */
+ addReplyArrayLen(c,2);
+ addReplyLongLong(c,
+ ri->info_refresh ? (now - ri->info_refresh) : 0);
+ if (ri->info)
+ addReplyBulkCBuffer(c,ri->info,sdslen(ri->info));
+ else
+ addReplyNull(c);
+
+ dictIterator *sdi;
+ dictEntry *sde;
+ sdi = dictGetIterator(ri->slaves);
+ while ((sde = dictNext(sdi)) != NULL) {
+ sentinelRedisInstance *sri = dictGetVal(sde);
+ addReplyArrayLen(c,2);
+ addReplyLongLong(c,
+ ri->info_refresh ? (now - sri->info_refresh) : 0);
+ if (sri->info)
+ addReplyBulkCBuffer(c,sri->info,sdslen(sri->info));
+ else
+ addReplyNull(c);
+ }
+ dictReleaseIterator(sdi);
+ }
+ dictReleaseIterator(di);
+ if (masters_local != sentinel.masters) dictRelease(masters_local);
+ } else if (!strcasecmp(c->argv[1]->ptr,"simulate-failure")) {
+ /* SENTINEL SIMULATE-FAILURE [CRASH-AFTER-ELECTION] [CRASH-AFTER-PROMOTION] [HELP] */
+ int j;
+
+ sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
+ for (j = 2; j < c->argc; j++) {
+ if (!strcasecmp(c->argv[j]->ptr,"crash-after-election")) {
+ sentinel.simfailure_flags |=
+ SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION;
+ serverLog(LL_WARNING,"Failure simulation: this Sentinel "
+ "will crash after being successfully elected as failover "
+ "leader");
+ } else if (!strcasecmp(c->argv[j]->ptr,"crash-after-promotion")) {
+ sentinel.simfailure_flags |=
+ SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION;
+ serverLog(LL_WARNING,"Failure simulation: this Sentinel "
+ "will crash after promoting the selected replica to master");
+ } else if (!strcasecmp(c->argv[j]->ptr,"help")) {
+ addReplyArrayLen(c,2);
+ addReplyBulkCString(c,"crash-after-election");
+ addReplyBulkCString(c,"crash-after-promotion");
+ return;
+ } else {
+ addReplyError(c,"Unknown failure simulation specified");
+ return;
+ }
+ }
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"debug")) {
+ if(c->argc == 2)
+ addReplySentinelDebugInfo(c);
+ else
+ sentinelSetDebugConfigParameters(c);
+ }
+ else {
+ addReplySubcommandSyntaxError(c);
+ }
+ return;
+
+numargserr:
+ addReplyErrorArity(c);
+}
+
+void addInfoSectionsToDict(dict *section_dict, char **sections);
+
+/* INFO [<section> [<section> ...]] */
+void sentinelInfoCommand(client *c) {
+ char *sentinel_sections[] = {"server", "clients", "cpu", "stats", "sentinel", NULL};
+ int sec_all = 0, sec_everything = 0;
+ static dict *cached_all_info_sections = NULL;
+
+ /* Get requested section list. */
+ dict *sections_dict = genInfoSectionDict(c->argv+1, c->argc-1, sentinel_sections, &sec_all, &sec_everything);
+
+ /* Purge unsupported sections from the requested ones. */
+ dictEntry *de;
+ dictIterator *di = dictGetSafeIterator(sections_dict);
+ while((de = dictNext(di)) != NULL) {
+ int i;
+ sds sec = dictGetKey(de);
+ for (i=0; sentinel_sections[i]; i++)
+ if (!strcasecmp(sentinel_sections[i], sec))
+ break;
+ /* section not found? remove it */
+ if (!sentinel_sections[i])
+ dictDelete(sections_dict, sec);
+ }
+ dictReleaseIterator(di);
+
+ /* Insert explicit all sections (don't pass these vars to genRedisInfoString) */
+ if (sec_all || sec_everything) {
+ releaseInfoSectionDict(sections_dict);
+ /* We cache this dict as an optimization. */
+ if (!cached_all_info_sections) {
+ cached_all_info_sections = dictCreate(&stringSetDictType);
+ addInfoSectionsToDict(cached_all_info_sections, sentinel_sections);
+ }
+ sections_dict = cached_all_info_sections;
+ }
+
+ sds info = genRedisInfoString(sections_dict, 0, 0);
+ if (sec_all || (dictFind(sections_dict, "sentinel") != NULL)) {
+ dictIterator *di;
+ dictEntry *de;
+ int master_id = 0;
+
+ if (sdslen(info) != 0)
+ info = sdscat(info,"\r\n");
+ info = sdscatprintf(info,
+ "# Sentinel\r\n"
+ "sentinel_masters:%lu\r\n"
+ "sentinel_tilt:%d\r\n"
+ "sentinel_tilt_since_seconds:%jd\r\n"
+ "sentinel_running_scripts:%d\r\n"
+ "sentinel_scripts_queue_length:%ld\r\n"
+ "sentinel_simulate_failure_flags:%lu\r\n",
+ dictSize(sentinel.masters),
+ sentinel.tilt,
+ sentinel.tilt ? (intmax_t)((mstime()-sentinel.tilt_start_time)/1000) : -1,
+ sentinel.running_scripts,
+ listLength(sentinel.scripts_queue),
+ sentinel.simfailure_flags);
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ char *status = "ok";
+
+ if (ri->flags & SRI_O_DOWN) status = "odown";
+ else if (ri->flags & SRI_S_DOWN) status = "sdown";
+ info = sdscatprintf(info,
+ "master%d:name=%s,status=%s,address=%s:%d,"
+ "slaves=%lu,sentinels=%lu\r\n",
+ master_id++, ri->name, status,
+ announceSentinelAddr(ri->addr), ri->addr->port,
+ dictSize(ri->slaves),
+ dictSize(ri->sentinels)+1);
+ }
+ dictReleaseIterator(di);
+ }
+ if (sections_dict != cached_all_info_sections)
+ releaseInfoSectionDict(sections_dict);
+ addReplyBulkSds(c, info);
+}
+
+/* Implements Sentinel version of the ROLE command. The output is
+ * "sentinel" and the list of currently monitored master names. */
+void sentinelRoleCommand(client *c) {
+ dictIterator *di;
+ dictEntry *de;
+
+ addReplyArrayLen(c,2);
+ addReplyBulkCBuffer(c,"sentinel",8);
+ addReplyArrayLen(c,dictSize(sentinel.masters));
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ addReplyBulkCString(c,ri->name);
+ }
+ dictReleaseIterator(di);
+}
+
+/* SENTINEL SET <mastername> [<option> <value> ...] */
+void sentinelSetCommand(client *c) {
+ sentinelRedisInstance *ri;
+ int j, changes = 0;
+ int badarg = 0; /* Bad argument position for error reporting. */
+ char *option;
+ int redacted;
+
+ if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
+ == NULL) return;
+
+ /* Process option - value pairs. */
+ for (j = 3; j < c->argc; j++) {
+ int moreargs = (c->argc-1) - j;
+ option = c->argv[j]->ptr;
+ long long ll;
+ int old_j = j; /* Used to know what to log as an event. */
+ redacted = 0;
+
+ if (!strcasecmp(option,"down-after-milliseconds") && moreargs > 0) {
+ /* down-after-milliseconds <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ ri->down_after_period = ll;
+ sentinelPropagateDownAfterPeriod(ri);
+ changes++;
+ } else if (!strcasecmp(option,"failover-timeout") && moreargs > 0) {
+ /* failover-timeout <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ ri->failover_timeout = ll;
+ changes++;
+ } else if (!strcasecmp(option,"parallel-syncs") && moreargs > 0) {
+ /* parallel-syncs <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ ri->parallel_syncs = ll;
+ changes++;
+ } else if (!strcasecmp(option,"notification-script") && moreargs > 0) {
+ /* notification-script <path> */
+ char *value = c->argv[++j]->ptr;
+ if (sentinel.deny_scripts_reconfig) {
+ addReplyError(c,
+ "Reconfiguration of scripts path is denied for "
+ "security reasons. Check the deny-scripts-reconfig "
+ "configuration directive in your Sentinel configuration");
+ goto seterr;
+ }
+
+ if (strlen(value) && access(value,X_OK) == -1) {
+ addReplyError(c,
+ "Notification script seems non existing or non executable");
+ goto seterr;
+ }
+ sdsfree(ri->notification_script);
+ ri->notification_script = strlen(value) ? sdsnew(value) : NULL;
+ changes++;
+ } else if (!strcasecmp(option,"client-reconfig-script") && moreargs > 0) {
+ /* client-reconfig-script <path> */
+ char *value = c->argv[++j]->ptr;
+ if (sentinel.deny_scripts_reconfig) {
+ addReplyError(c,
+ "Reconfiguration of scripts path is denied for "
+ "security reasons. Check the deny-scripts-reconfig "
+ "configuration directive in your Sentinel configuration");
+ goto seterr;
+ }
+
+ if (strlen(value) && access(value,X_OK) == -1) {
+ addReplyError(c,
+ "Client reconfiguration script seems non existing or "
+ "non executable");
+ goto seterr;
+ }
+ sdsfree(ri->client_reconfig_script);
+ ri->client_reconfig_script = strlen(value) ? sdsnew(value) : NULL;
+ changes++;
+ } else if (!strcasecmp(option,"auth-pass") && moreargs > 0) {
+ /* auth-pass <password> */
+ char *value = c->argv[++j]->ptr;
+ sdsfree(ri->auth_pass);
+ ri->auth_pass = strlen(value) ? sdsnew(value) : NULL;
+ dropInstanceConnections(ri);
+ changes++;
+ redacted = 1;
+ } else if (!strcasecmp(option,"auth-user") && moreargs > 0) {
+ /* auth-user <username> */
+ char *value = c->argv[++j]->ptr;
+ sdsfree(ri->auth_user);
+ ri->auth_user = strlen(value) ? sdsnew(value) : NULL;
+ dropInstanceConnections(ri);
+ changes++;
+ } else if (!strcasecmp(option,"quorum") && moreargs > 0) {
+ /* quorum <count> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ ri->quorum = ll;
+ changes++;
+ } else if (!strcasecmp(option,"rename-command") && moreargs > 1) {
+ /* rename-command <oldname> <newname> */
+ sds oldname = c->argv[++j]->ptr;
+ sds newname = c->argv[++j]->ptr;
+
+ if ((sdslen(oldname) == 0) || (sdslen(newname) == 0)) {
+ badarg = sdslen(newname) ? j-1 : j;
+ goto badfmt;
+ }
+
+ /* Remove any older renaming for this command. */
+ dictDelete(ri->renamed_commands,oldname);
+
+ /* If the target name is the same as the source name there
+ * is no need to add an entry mapping to itself. */
+ if (!dictSdsKeyCaseCompare(ri->renamed_commands,oldname,newname)) {
+ oldname = sdsdup(oldname);
+ newname = sdsdup(newname);
+ dictAdd(ri->renamed_commands,oldname,newname);
+ }
+ changes++;
+ } else if (!strcasecmp(option,"master-reboot-down-after-period") && moreargs > 0) {
+ /* master-reboot-down-after-period <milliseconds> */
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll < 0) {
+ badarg = j;
+ goto badfmt;
+ }
+ ri->master_reboot_down_after_period = ll;
+ changes++;
+ } else {
+ addReplyErrorFormat(c,"Unknown option or number of arguments for "
+ "SENTINEL SET '%s'", option);
+ goto seterr;
+ }
+
+ /* Log the event. */
+ int numargs = j-old_j+1;
+ switch(numargs) {
+ case 2:
+ sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s",(char*)c->argv[old_j]->ptr,
+ redacted ? "******" : (char*)c->argv[old_j+1]->ptr);
+ break;
+ case 3:
+ sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s %s",(char*)c->argv[old_j]->ptr,
+ (char*)c->argv[old_j+1]->ptr,
+ (char*)c->argv[old_j+2]->ptr);
+ break;
+ default:
+ sentinelEvent(LL_WARNING,"+set",ri,"%@ %s",(char*)c->argv[old_j]->ptr);
+ break;
+ }
+ }
+ if (changes) sentinelFlushConfigAndReply(c);
+ return;
+
+badfmt: /* Bad format errors */
+ addReplyErrorFormat(c,"Invalid argument '%s' for SENTINEL SET '%s'",
+ (char*)c->argv[badarg]->ptr,option);
+seterr:
+ /* TODO: Handle the case of both bad input and save error, possibly handling
+ * SENTINEL SET atomically. */
+ if (changes) sentinelFlushConfig();
+}
+
+/* Our fake PUBLISH command: it is actually useful only to receive hello messages
+ * from the other sentinel instances, and publishing to a channel other than
+ * SENTINEL_HELLO_CHANNEL is forbidden.
+ *
+ * Because we have a Sentinel PUBLISH, the code to send hello messages is the same
+ * for all the three kind of instances: masters, slaves, sentinels. */
+void sentinelPublishCommand(client *c) {
+ if (strcmp(c->argv[1]->ptr,SENTINEL_HELLO_CHANNEL)) {
+ addReplyError(c, "Only HELLO messages are accepted by Sentinel instances.");
+ return;
+ }
+ sentinelProcessHelloMessage(c->argv[2]->ptr,sdslen(c->argv[2]->ptr));
+ addReplyLongLong(c,1);
+}
+
+/* ===================== SENTINEL availability checks ======================= */
+
+/* Is this instance down from our point of view? */
+void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
+ mstime_t elapsed = 0;
+
+ if (ri->link->act_ping_time)
+ elapsed = mstime() - ri->link->act_ping_time;
+ else if (ri->link->disconnected)
+ elapsed = mstime() - ri->link->last_avail_time;
+
+ /* Check if we are in need for a reconnection of one of the
+ * links, because we are detecting low activity.
+ *
+ * 1) Check if the command link seems connected, was connected not less
+ * than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have a
+ * pending ping for more than half the timeout. */
+ if (ri->link->cc &&
+ (mstime() - ri->link->cc_conn_time) >
+ sentinel_min_link_reconnect_period &&
+ ri->link->act_ping_time != 0 && /* There is a pending ping... */
+ /* The pending ping is delayed, and we did not receive
+ * error replies as well. */
+ (mstime() - ri->link->act_ping_time) > (ri->down_after_period/2) &&
+ (mstime() - ri->link->last_pong_time) > (ri->down_after_period/2))
+ {
+ instanceLinkCloseConnection(ri->link,ri->link->cc);
+ }
+
+ /* 2) Check if the pubsub link seems connected, was connected not less
+ * than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have no
+ * activity in the Pub/Sub channel for more than
+ * SENTINEL_PUBLISH_PERIOD * 3.
+ */
+ if (ri->link->pc &&
+ (mstime() - ri->link->pc_conn_time) >
+ sentinel_min_link_reconnect_period &&
+ (mstime() - ri->link->pc_last_activity) > (sentinel_publish_period*3))
+ {
+ instanceLinkCloseConnection(ri->link,ri->link->pc);
+ }
+
+ /* Update the SDOWN flag. We believe the instance is SDOWN if:
+ *
+ * 1) It is not replying.
+ * 2) We believe it is a master, it reports to be a slave for enough time
+ * to meet the down_after_period, plus enough time to get two times
+ * INFO report from the instance. */
+ if (elapsed > ri->down_after_period ||
+ (ri->flags & SRI_MASTER &&
+ ri->role_reported == SRI_SLAVE &&
+ mstime() - ri->role_reported_time >
+ (ri->down_after_period+sentinel_info_period*2)) ||
+ (ri->flags & SRI_MASTER_REBOOT &&
+ mstime()-ri->master_reboot_since_time > ri->master_reboot_down_after_period))
+ {
+ /* Is subjectively down */
+ if ((ri->flags & SRI_S_DOWN) == 0) {
+ sentinelEvent(LL_WARNING,"+sdown",ri,"%@");
+ ri->s_down_since_time = mstime();
+ ri->flags |= SRI_S_DOWN;
+ }
+ } else {
+ /* Is subjectively up */
+ if (ri->flags & SRI_S_DOWN) {
+ sentinelEvent(LL_WARNING,"-sdown",ri,"%@");
+ ri->flags &= ~(SRI_S_DOWN|SRI_SCRIPT_KILL_SENT);
+ }
+ }
+}
+
+/* Is this instance down according to the configured quorum?
+ *
+ * Note that ODOWN is a weak quorum, it only means that enough Sentinels
+ * reported in a given time range that the instance was not reachable.
+ * However messages can be delayed so there are no strong guarantees about
+ * N instances agreeing at the same time about the down state. */
+void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
+ dictIterator *di;
+ dictEntry *de;
+ unsigned int quorum = 0, odown = 0;
+
+ if (master->flags & SRI_S_DOWN) {
+ /* Is down for enough sentinels? */
+ quorum = 1; /* the current sentinel. */
+ /* Count all the other sentinels. */
+ di = dictGetIterator(master->sentinels);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ if (ri->flags & SRI_MASTER_DOWN) quorum++;
+ }
+ dictReleaseIterator(di);
+ if (quorum >= master->quorum) odown = 1;
+ }
+
+ /* Set the flag accordingly to the outcome. */
+ if (odown) {
+ if ((master->flags & SRI_O_DOWN) == 0) {
+ sentinelEvent(LL_WARNING,"+odown",master,"%@ #quorum %d/%d",
+ quorum, master->quorum);
+ master->flags |= SRI_O_DOWN;
+ master->o_down_since_time = mstime();
+ }
+ } else {
+ if (master->flags & SRI_O_DOWN) {
+ sentinelEvent(LL_WARNING,"-odown",master,"%@");
+ master->flags &= ~SRI_O_DOWN;
+ }
+ }
+}
+
+/* Receive the SENTINEL is-master-down-by-addr reply, see the
+ * sentinelAskMasterStateToOtherSentinels() function for more information. */
+void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *privdata) {
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
+ redisReply *r;
+
+ if (!reply || !link) return;
+ link->pending_commands--;
+ r = reply;
+
+ /* Ignore every error or unexpected reply.
+ * Note that if the command returns an error for any reason we'll
+ * end clearing the SRI_MASTER_DOWN flag for timeout anyway. */
+ if (r->type == REDIS_REPLY_ARRAY && r->elements == 3 &&
+ r->element[0]->type == REDIS_REPLY_INTEGER &&
+ r->element[1]->type == REDIS_REPLY_STRING &&
+ r->element[2]->type == REDIS_REPLY_INTEGER)
+ {
+ ri->last_master_down_reply_time = mstime();
+ if (r->element[0]->integer == 1) {
+ ri->flags |= SRI_MASTER_DOWN;
+ } else {
+ ri->flags &= ~SRI_MASTER_DOWN;
+ }
+ if (strcmp(r->element[1]->str,"*")) {
+ /* If the runid in the reply is not "*" the Sentinel actually
+ * replied with a vote. */
+ sdsfree(ri->leader);
+ if ((long long)ri->leader_epoch != r->element[2]->integer)
+ serverLog(LL_NOTICE,
+ "%s voted for %s %llu", ri->name,
+ r->element[1]->str,
+ (unsigned long long) r->element[2]->integer);
+ ri->leader = sdsnew(r->element[1]->str);
+ ri->leader_epoch = r->element[2]->integer;
+ }
+ }
+}
+
+/* If we think the master is down, we start sending
+ * SENTINEL IS-MASTER-DOWN-BY-ADDR requests to other sentinels
+ * in order to get the replies that allow to reach the quorum
+ * needed to mark the master in ODOWN state and trigger a failover. */
+#define SENTINEL_ASK_FORCED (1<<0)
+void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetIterator(master->sentinels);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ mstime_t elapsed = mstime() - ri->last_master_down_reply_time;
+ char port[32];
+ int retval;
+
+ /* If the master state from other sentinel is too old, we clear it. */
+ if (elapsed > sentinel_ask_period*5) {
+ ri->flags &= ~SRI_MASTER_DOWN;
+ sdsfree(ri->leader);
+ ri->leader = NULL;
+ }
+
+ /* Only ask if master is down to other sentinels if:
+ *
+ * 1) We believe it is down, or there is a failover in progress.
+ * 2) Sentinel is connected.
+ * 3) We did not receive the info within SENTINEL_ASK_PERIOD ms. */
+ if ((master->flags & SRI_S_DOWN) == 0) continue;
+ if (ri->link->disconnected) continue;
+ if (!(flags & SENTINEL_ASK_FORCED) &&
+ mstime() - ri->last_master_down_reply_time < sentinel_ask_period)
+ continue;
+
+ /* Ask */
+ ll2string(port,sizeof(port),master->addr->port);
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelReceiveIsMasterDownReply, ri,
+ "%s is-master-down-by-addr %s %s %llu %s",
+ sentinelInstanceMapCommand(ri,"SENTINEL"),
+ announceSentinelAddr(master->addr), port,
+ sentinel.current_epoch,
+ (master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
+ sentinel.myid : "*");
+ if (retval == C_OK) ri->link->pending_commands++;
+ }
+ dictReleaseIterator(di);
+}
+
+/* =============================== FAILOVER ================================= */
+
+/* Crash because of user request via SENTINEL simulate-failure command. */
+void sentinelSimFailureCrash(void) {
+ serverLog(LL_WARNING,
+ "Sentinel CRASH because of SENTINEL simulate-failure");
+ exit(99);
+}
+
+/* Vote for the sentinel with 'req_runid' or return the old vote if already
+ * voted for the specified 'req_epoch' or one greater.
+ *
+ * If a vote is not available returns NULL, otherwise return the Sentinel
+ * runid and populate the leader_epoch with the epoch of the vote. */
+char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch) {
+ if (req_epoch > sentinel.current_epoch) {
+ sentinel.current_epoch = req_epoch;
+ sentinelFlushConfig();
+ sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
+ (unsigned long long) sentinel.current_epoch);
+ }
+
+ if (master->leader_epoch < req_epoch && sentinel.current_epoch <= req_epoch)
+ {
+ sdsfree(master->leader);
+ master->leader = sdsnew(req_runid);
+ master->leader_epoch = sentinel.current_epoch;
+ sentinelFlushConfig();
+ sentinelEvent(LL_WARNING,"+vote-for-leader",master,"%s %llu",
+ master->leader, (unsigned long long) master->leader_epoch);
+ /* If we did not voted for ourselves, set the master failover start
+ * time to now, in order to force a delay before we can start a
+ * failover for the same master. */
+ if (strcasecmp(master->leader,sentinel.myid))
+ master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;
+ }
+
+ *leader_epoch = master->leader_epoch;
+ return master->leader ? sdsnew(master->leader) : NULL;
+}
+
+struct sentinelLeader {
+ char *runid;
+ unsigned long votes;
+};
+
+/* Helper function for sentinelGetLeader, increment the counter
+ * relative to the specified runid. */
+int sentinelLeaderIncr(dict *counters, char *runid) {
+ dictEntry *existing, *de;
+ uint64_t oldval;
+
+ de = dictAddRaw(counters,runid,&existing);
+ if (existing) {
+ oldval = dictGetUnsignedIntegerVal(existing);
+ dictSetUnsignedIntegerVal(existing,oldval+1);
+ return oldval+1;
+ } else {
+ serverAssert(de != NULL);
+ dictSetUnsignedIntegerVal(de,1);
+ return 1;
+ }
+}
+
+/* Scan all the Sentinels attached to this master to check if there
+ * is a leader for the specified epoch.
+ *
+ * To be a leader for a given epoch, we should have the majority of
+ * the Sentinels we know (ever seen since the last SENTINEL RESET) that
+ * reported the same instance as leader for the same epoch. */
+char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
+ dict *counters;
+ dictIterator *di;
+ dictEntry *de;
+ unsigned int voters = 0, voters_quorum;
+ char *myvote;
+ char *winner = NULL;
+ uint64_t leader_epoch;
+ uint64_t max_votes = 0;
+
+ serverAssert(master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS));
+ counters = dictCreate(&leaderVotesDictType);
+
+ voters = dictSize(master->sentinels)+1; /* All the other sentinels and me.*/
+
+ /* Count other sentinels votes */
+ di = dictGetIterator(master->sentinels);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ if (ri->leader != NULL && ri->leader_epoch == sentinel.current_epoch)
+ sentinelLeaderIncr(counters,ri->leader);
+ }
+ dictReleaseIterator(di);
+
+ /* Check what's the winner. For the winner to win, it needs two conditions:
+ * 1) Absolute majority between voters (50% + 1).
+ * 2) And anyway at least master->quorum votes. */
+ di = dictGetIterator(counters);
+ while((de = dictNext(di)) != NULL) {
+ uint64_t votes = dictGetUnsignedIntegerVal(de);
+
+ if (votes > max_votes) {
+ max_votes = votes;
+ winner = dictGetKey(de);
+ }
+ }
+ dictReleaseIterator(di);
+
+ /* Count this Sentinel vote:
+ * if this Sentinel did not voted yet, either vote for the most
+ * common voted sentinel, or for itself if no vote exists at all. */
+ if (winner)
+ myvote = sentinelVoteLeader(master,epoch,winner,&leader_epoch);
+ else
+ myvote = sentinelVoteLeader(master,epoch,sentinel.myid,&leader_epoch);
+
+ if (myvote && leader_epoch == epoch) {
+ uint64_t votes = sentinelLeaderIncr(counters,myvote);
+
+ if (votes > max_votes) {
+ max_votes = votes;
+ winner = myvote;
+ }
+ }
+
+ voters_quorum = voters/2+1;
+ if (winner && (max_votes < voters_quorum || max_votes < master->quorum))
+ winner = NULL;
+
+ winner = winner ? sdsnew(winner) : NULL;
+ sdsfree(myvote);
+ dictRelease(counters);
+ return winner;
+}
+
+/* Send SLAVEOF to the specified instance, always followed by a
+ * CONFIG REWRITE command in order to store the new configuration on disk
+ * when possible (that is, if the Redis instance is recent enough to support
+ * config rewriting, and if the server was started with a configuration file).
+ *
+ * If Host is NULL the function sends "SLAVEOF NO ONE".
+ *
+ * The command returns C_OK if the SLAVEOF command was accepted for
+ * (later) delivery otherwise C_ERR. The command replies are just
+ * discarded. */
+int sentinelSendSlaveOf(sentinelRedisInstance *ri, const sentinelAddr *addr) {
+ char portstr[32];
+ const char *host;
+ int retval;
+
+ /* If host is NULL we send SLAVEOF NO ONE that will turn the instance
+ * into a master. */
+ if (!addr) {
+ host = "NO";
+ memcpy(portstr,"ONE",4);
+ } else {
+ host = announceSentinelAddr(addr);
+ ll2string(portstr,sizeof(portstr),addr->port);
+ }
+
+ /* In order to send SLAVEOF in a safe way, we send a transaction performing
+ * the following tasks:
+ * 1) Reconfigure the instance according to the specified host/port params.
+ * 2) Rewrite the configuration.
+ * 3) Disconnect all clients (but this one sending the command) in order
+ * to trigger the ask-master-on-reconnection protocol for connected
+ * clients.
+ *
+ * Note that we don't check the replies returned by commands, since we
+ * will observe instead the effects in the next INFO output. */
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "%s",
+ sentinelInstanceMapCommand(ri,"MULTI"));
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
+
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "%s %s %s",
+ sentinelInstanceMapCommand(ri,"SLAVEOF"),
+ host, portstr);
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
+
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "%s REWRITE",
+ sentinelInstanceMapCommand(ri,"CONFIG"));
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
+
+ /* CLIENT KILL TYPE <type> is only supported starting from Redis 2.8.12,
+ * however sending it to an instance not understanding this command is not
+ * an issue because CLIENT is variadic command, so Redis will not
+ * recognized as a syntax error, and the transaction will not fail (but
+ * only the unsupported command will fail). */
+ for (int type = 0; type < 2; type++) {
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "%s KILL TYPE %s",
+ sentinelInstanceMapCommand(ri,"CLIENT"),
+ type == 0 ? "normal" : "pubsub");
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
+ }
+
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "%s",
+ sentinelInstanceMapCommand(ri,"EXEC"));
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
+
+ return C_OK;
+}
+
+/* Setup the master state to start a failover. */
+void sentinelStartFailover(sentinelRedisInstance *master) {
+ serverAssert(master->flags & SRI_MASTER);
+
+ master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
+ master->flags |= SRI_FAILOVER_IN_PROGRESS;
+ master->failover_epoch = ++sentinel.current_epoch;
+ sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
+ (unsigned long long) sentinel.current_epoch);
+ sentinelEvent(LL_WARNING,"+try-failover",master,"%@");
+ master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;
+ master->failover_state_change_time = mstime();
+}
+
+/* This function checks if there are the conditions to start the failover,
+ * that is:
+ *
+ * 1) Master must be in ODOWN condition.
+ * 2) No failover already in progress.
+ * 3) No failover already attempted recently.
+ *
+ * We still don't know if we'll win the election so it is possible that we
+ * start the failover but that we'll not be able to act.
+ *
+ * Return non-zero if a failover was started. */
+int sentinelStartFailoverIfNeeded(sentinelRedisInstance *master) {
+ /* We can't failover if the master is not in O_DOWN state. */
+ if (!(master->flags & SRI_O_DOWN)) return 0;
+
+ /* Failover already in progress? */
+ if (master->flags & SRI_FAILOVER_IN_PROGRESS) return 0;
+
+ /* Last failover attempt started too little time ago? */
+ if (mstime() - master->failover_start_time <
+ master->failover_timeout*2)
+ {
+ if (master->failover_delay_logged != master->failover_start_time) {
+ time_t clock = (master->failover_start_time +
+ master->failover_timeout*2) / 1000;
+ char ctimebuf[26];
+
+ ctime_r(&clock,ctimebuf);
+ ctimebuf[24] = '\0'; /* Remove newline. */
+ master->failover_delay_logged = master->failover_start_time;
+ serverLog(LL_NOTICE,
+ "Next failover delay: I will not start a failover before %s",
+ ctimebuf);
+ }
+ return 0;
+ }
+
+ sentinelStartFailover(master);
+ return 1;
+}
+
+/* Select a suitable slave to promote. The current algorithm only uses
+ * the following parameters:
+ *
+ * 1) None of the following conditions: S_DOWN, O_DOWN, DISCONNECTED.
+ * 2) Last time the slave replied to ping no more than 5 times the PING period.
+ * 3) info_refresh not older than 3 times the INFO refresh period.
+ * 4) master_link_down_time no more than:
+ * (now - master->s_down_since_time) + (master->down_after_period * 10).
+ * Basically since the master is down from our POV, the slave reports
+ * to be disconnected no more than 10 times the configured down-after-period.
+ * This is pretty much black magic but the idea is, the master was not
+ * available so the slave may be lagging, but not over a certain time.
+ * Anyway we'll select the best slave according to replication offset.
+ * 5) Slave priority can't be zero, otherwise the slave is discarded.
+ *
+ * Among all the slaves matching the above conditions we select the slave
+ * with, in order of sorting key:
+ *
+ * - lower slave_priority.
+ * - bigger processed replication offset.
+ * - lexicographically smaller runid.
+ *
+ * Basically if runid is the same, the slave that processed more commands
+ * from the master is selected.
+ *
+ * The function returns the pointer to the selected slave, otherwise
+ * NULL if no suitable slave was found.
+ */
+
+/* Helper for sentinelSelectSlave(). This is used by qsort() in order to
+ * sort suitable slaves in a "better first" order, to take the first of
+ * the list. */
+int compareSlavesForPromotion(const void *a, const void *b) {
+ sentinelRedisInstance **sa = (sentinelRedisInstance **)a,
+ **sb = (sentinelRedisInstance **)b;
+ char *sa_runid, *sb_runid;
+
+ if ((*sa)->slave_priority != (*sb)->slave_priority)
+ return (*sa)->slave_priority - (*sb)->slave_priority;
+
+ /* If priority is the same, select the slave with greater replication
+ * offset (processed more data from the master). */
+ if ((*sa)->slave_repl_offset > (*sb)->slave_repl_offset) {
+ return -1; /* a < b */
+ } else if ((*sa)->slave_repl_offset < (*sb)->slave_repl_offset) {
+ return 1; /* a > b */
+ }
+
+ /* If the replication offset is the same select the slave with that has
+ * the lexicographically smaller runid. Note that we try to handle runid
+ * == NULL as there are old Redis versions that don't publish runid in
+ * INFO. A NULL runid is considered bigger than any other runid. */
+ sa_runid = (*sa)->runid;
+ sb_runid = (*sb)->runid;
+ if (sa_runid == NULL && sb_runid == NULL) return 0;
+ else if (sa_runid == NULL) return 1; /* a > b */
+ else if (sb_runid == NULL) return -1; /* a < b */
+ return strcasecmp(sa_runid, sb_runid);
+}
+
+sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
+ sentinelRedisInstance **instance =
+ zmalloc(sizeof(instance[0])*dictSize(master->slaves));
+ sentinelRedisInstance *selected = NULL;
+ int instances = 0;
+ dictIterator *di;
+ dictEntry *de;
+ mstime_t max_master_down_time = 0;
+
+ if (master->flags & SRI_S_DOWN)
+ max_master_down_time += mstime() - master->s_down_since_time;
+ max_master_down_time += master->down_after_period * 10;
+
+ di = dictGetIterator(master->slaves);
+
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *slave = dictGetVal(de);
+ mstime_t info_validity_time;
+
+ if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;
+ if (slave->link->disconnected) continue;
+ if (mstime() - slave->link->last_avail_time > sentinel_ping_period*5) continue;
+ if (slave->slave_priority == 0) continue;
+
+ /* If the master is in SDOWN state we get INFO for slaves every second.
+ * Otherwise we get it with the usual period so we need to account for
+ * a larger delay. */
+ if (master->flags & SRI_S_DOWN)
+ info_validity_time = sentinel_ping_period*5;
+ else
+ info_validity_time = sentinel_info_period*3;
+ if (mstime() - slave->info_refresh > info_validity_time) continue;
+ if (slave->master_link_down_time > max_master_down_time) continue;
+ instance[instances++] = slave;
+ }
+ dictReleaseIterator(di);
+ if (instances) {
+ qsort(instance,instances,sizeof(sentinelRedisInstance*),
+ compareSlavesForPromotion);
+ selected = instance[0];
+ }
+ zfree(instance);
+ return selected;
+}
+
+/* ---------------- Failover state machine implementation ------------------- */
+void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
+ char *leader;
+ int isleader;
+
+ /* Check if we are the leader for the failover epoch. */
+ leader = sentinelGetLeader(ri, ri->failover_epoch);
+ isleader = leader && strcasecmp(leader,sentinel.myid) == 0;
+ sdsfree(leader);
+
+ /* If I'm not the leader, and it is not a forced failover via
+ * SENTINEL FAILOVER, then I can't continue with the failover. */
+ if (!isleader && !(ri->flags & SRI_FORCE_FAILOVER)) {
+ mstime_t election_timeout = sentinel_election_timeout;
+
+ /* The election timeout is the MIN between SENTINEL_ELECTION_TIMEOUT
+ * and the configured failover timeout. */
+ if (election_timeout > ri->failover_timeout)
+ election_timeout = ri->failover_timeout;
+ /* Abort the failover if I'm not the leader after some time. */
+ if (mstime() - ri->failover_start_time > election_timeout) {
+ sentinelEvent(LL_WARNING,"-failover-abort-not-elected",ri,"%@");
+ sentinelAbortFailover(ri);
+ }
+ return;
+ }
+ sentinelEvent(LL_WARNING,"+elected-leader",ri,"%@");
+ if (sentinel.simfailure_flags & SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION)
+ sentinelSimFailureCrash();
+ ri->failover_state = SENTINEL_FAILOVER_STATE_SELECT_SLAVE;
+ ri->failover_state_change_time = mstime();
+ sentinelEvent(LL_WARNING,"+failover-state-select-slave",ri,"%@");
+}
+
+void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
+ sentinelRedisInstance *slave = sentinelSelectSlave(ri);
+
+ /* We don't handle the timeout in this state as the function aborts
+ * the failover or go forward in the next state. */
+ if (slave == NULL) {
+ sentinelEvent(LL_WARNING,"-failover-abort-no-good-slave",ri,"%@");
+ sentinelAbortFailover(ri);
+ } else {
+ sentinelEvent(LL_WARNING,"+selected-slave",slave,"%@");
+ slave->flags |= SRI_PROMOTED;
+ ri->promoted_slave = slave;
+ ri->failover_state = SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE;
+ ri->failover_state_change_time = mstime();
+ sentinelEvent(LL_NOTICE,"+failover-state-send-slaveof-noone",
+ slave, "%@");
+ }
+}
+
+void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
+ int retval;
+
+ /* We can't send the command to the promoted slave if it is now
+ * disconnected. Retry again and again with this state until the timeout
+ * is reached, then abort the failover. */
+ if (ri->promoted_slave->link->disconnected) {
+ if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
+ sentinelEvent(LL_WARNING,"-failover-abort-slave-timeout",ri,"%@");
+ sentinelAbortFailover(ri);
+ }
+ return;
+ }
+
+ /* Send SLAVEOF NO ONE command to turn the slave into a master.
+ * We actually register a generic callback for this command as we don't
+ * really care about the reply. We check if it worked indirectly observing
+ * if INFO returns a different role (master instead of slave). */
+ retval = sentinelSendSlaveOf(ri->promoted_slave,NULL);
+ if (retval != C_OK) return;
+ sentinelEvent(LL_NOTICE, "+failover-state-wait-promotion",
+ ri->promoted_slave,"%@");
+ ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION;
+ ri->failover_state_change_time = mstime();
+}
+
+/* We actually wait for promotion indirectly checking with INFO when the
+ * slave turns into a master. */
+void sentinelFailoverWaitPromotion(sentinelRedisInstance *ri) {
+ /* Just handle the timeout. Switching to the next state is handled
+ * by the function parsing the INFO command of the promoted slave. */
+ if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
+ sentinelEvent(LL_WARNING,"-failover-abort-slave-timeout",ri,"%@");
+ sentinelAbortFailover(ri);
+ }
+}
+
+void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
+ int not_reconfigured = 0, timeout = 0;
+ dictIterator *di;
+ dictEntry *de;
+ mstime_t elapsed = mstime() - master->failover_state_change_time;
+
+ /* We can't consider failover finished if the promoted slave is
+ * not reachable. */
+ if (master->promoted_slave == NULL ||
+ master->promoted_slave->flags & SRI_S_DOWN) return;
+
+ /* The failover terminates once all the reachable slaves are properly
+ * configured. */
+ di = dictGetIterator(master->slaves);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *slave = dictGetVal(de);
+
+ if (slave->flags & (SRI_PROMOTED|SRI_RECONF_DONE)) continue;
+ if (slave->flags & SRI_S_DOWN) continue;
+ not_reconfigured++;
+ }
+ dictReleaseIterator(di);
+
+ /* Force end of failover on timeout. */
+ if (elapsed > master->failover_timeout) {
+ not_reconfigured = 0;
+ timeout = 1;
+ sentinelEvent(LL_WARNING,"+failover-end-for-timeout",master,"%@");
+ }
+
+ if (not_reconfigured == 0) {
+ sentinelEvent(LL_WARNING,"+failover-end",master,"%@");
+ master->failover_state = SENTINEL_FAILOVER_STATE_UPDATE_CONFIG;
+ master->failover_state_change_time = mstime();
+ }
+
+ /* If I'm the leader it is a good idea to send a best effort SLAVEOF
+ * command to all the slaves still not reconfigured to replicate with
+ * the new master. */
+ if (timeout) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetIterator(master->slaves);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *slave = dictGetVal(de);
+ int retval;
+
+ if (slave->flags & (SRI_PROMOTED|SRI_RECONF_DONE|SRI_RECONF_SENT)) continue;
+ if (slave->link->disconnected) continue;
+
+ retval = sentinelSendSlaveOf(slave,master->promoted_slave->addr);
+ if (retval == C_OK) {
+ sentinelEvent(LL_NOTICE,"+slave-reconf-sent-be",slave,"%@");
+ slave->flags |= SRI_RECONF_SENT;
+ }
+ }
+ dictReleaseIterator(di);
+ }
+}
+
+/* Send SLAVE OF <new master address> to all the remaining slaves that
+ * still don't appear to have the configuration updated. */
+void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
+ dictIterator *di;
+ dictEntry *de;
+ int in_progress = 0;
+
+ di = dictGetIterator(master->slaves);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *slave = dictGetVal(de);
+
+ if (slave->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG))
+ in_progress++;
+ }
+ dictReleaseIterator(di);
+
+ di = dictGetIterator(master->slaves);
+ while(in_progress < master->parallel_syncs &&
+ (de = dictNext(di)) != NULL)
+ {
+ sentinelRedisInstance *slave = dictGetVal(de);
+ int retval;
+
+ /* Skip the promoted slave, and already configured slaves. */
+ if (slave->flags & (SRI_PROMOTED|SRI_RECONF_DONE)) continue;
+
+ /* If too much time elapsed without the slave moving forward to
+ * the next state, consider it reconfigured even if it is not.
+ * Sentinels will detect the slave as misconfigured and fix its
+ * configuration later. */
+ if ((slave->flags & SRI_RECONF_SENT) &&
+ (mstime() - slave->slave_reconf_sent_time) >
+ sentinel_slave_reconf_timeout)
+ {
+ sentinelEvent(LL_NOTICE,"-slave-reconf-sent-timeout",slave,"%@");
+ slave->flags &= ~SRI_RECONF_SENT;
+ slave->flags |= SRI_RECONF_DONE;
+ }
+
+ /* Nothing to do for instances that are disconnected or already
+ * in RECONF_SENT state. */
+ if (slave->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG)) continue;
+ if (slave->link->disconnected) continue;
+
+ /* Send SLAVEOF <new master>. */
+ retval = sentinelSendSlaveOf(slave,master->promoted_slave->addr);
+ if (retval == C_OK) {
+ slave->flags |= SRI_RECONF_SENT;
+ slave->slave_reconf_sent_time = mstime();
+ sentinelEvent(LL_NOTICE,"+slave-reconf-sent",slave,"%@");
+ in_progress++;
+ }
+ }
+ dictReleaseIterator(di);
+
+ /* Check if all the slaves are reconfigured and handle timeout. */
+ sentinelFailoverDetectEnd(master);
+}
+
+/* This function is called when the slave is in
+ * SENTINEL_FAILOVER_STATE_UPDATE_CONFIG state. In this state we need
+ * to remove it from the master table and add the promoted slave instead. */
+void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance *master) {
+ sentinelRedisInstance *ref = master->promoted_slave ?
+ master->promoted_slave : master;
+
+ sentinelEvent(LL_WARNING,"+switch-master",master,"%s %s %d %s %d",
+ master->name, announceSentinelAddr(master->addr), master->addr->port,
+ announceSentinelAddr(ref->addr), ref->addr->port);
+
+ sentinelResetMasterAndChangeAddress(master,ref->addr->hostname,ref->addr->port);
+}
+
+void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
+ serverAssert(ri->flags & SRI_MASTER);
+
+ if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS)) return;
+
+ switch(ri->failover_state) {
+ case SENTINEL_FAILOVER_STATE_WAIT_START:
+ sentinelFailoverWaitStart(ri);
+ break;
+ case SENTINEL_FAILOVER_STATE_SELECT_SLAVE:
+ sentinelFailoverSelectSlave(ri);
+ break;
+ case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE:
+ sentinelFailoverSendSlaveOfNoOne(ri);
+ break;
+ case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION:
+ sentinelFailoverWaitPromotion(ri);
+ break;
+ case SENTINEL_FAILOVER_STATE_RECONF_SLAVES:
+ sentinelFailoverReconfNextSlave(ri);
+ break;
+ }
+}
+
+/* Abort a failover in progress:
+ *
+ * This function can only be called before the promoted slave acknowledged
+ * the slave -> master switch. Otherwise the failover can't be aborted and
+ * will reach its end (possibly by timeout). */
+void sentinelAbortFailover(sentinelRedisInstance *ri) {
+ serverAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
+ serverAssert(ri->failover_state <= SENTINEL_FAILOVER_STATE_WAIT_PROMOTION);
+
+ ri->flags &= ~(SRI_FAILOVER_IN_PROGRESS|SRI_FORCE_FAILOVER);
+ ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
+ ri->failover_state_change_time = mstime();
+ if (ri->promoted_slave) {
+ ri->promoted_slave->flags &= ~SRI_PROMOTED;
+ ri->promoted_slave = NULL;
+ }
+}
+
+/* ======================== SENTINEL timer handler ==========================
+ * This is the "main" our Sentinel, being sentinel completely non blocking
+ * in design.
+ * -------------------------------------------------------------------------- */
+
+/* Perform scheduled operations for the specified Redis instance. */
+void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
+ /* ========== MONITORING HALF ============ */
+ /* Every kind of instance */
+ sentinelReconnectInstance(ri);
+ sentinelSendPeriodicCommands(ri);
+
+ /* ============== ACTING HALF ============= */
+ /* We don't proceed with the acting half if we are in TILT mode.
+ * TILT happens when we find something odd with the time, like a
+ * sudden change in the clock. */
+ if (sentinel.tilt) {
+ if (mstime()-sentinel.tilt_start_time < sentinel_tilt_period) return;
+ sentinel.tilt = 0;
+ sentinelEvent(LL_WARNING,"-tilt",NULL,"#tilt mode exited");
+ }
+
+ /* Every kind of instance */
+ sentinelCheckSubjectivelyDown(ri);
+
+ /* Masters and slaves */
+ if (ri->flags & (SRI_MASTER|SRI_SLAVE)) {
+ /* Nothing so far. */
+ }
+
+ /* Only masters */
+ if (ri->flags & SRI_MASTER) {
+ sentinelCheckObjectivelyDown(ri);
+ if (sentinelStartFailoverIfNeeded(ri))
+ sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_ASK_FORCED);
+ sentinelFailoverStateMachine(ri);
+ sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_NO_FLAGS);
+ }
+}
+
+/* Perform scheduled operations for all the instances in the dictionary.
+ * Recursively call the function against dictionaries of slaves. */
+void sentinelHandleDictOfRedisInstances(dict *instances) {
+ dictIterator *di;
+ dictEntry *de;
+ sentinelRedisInstance *switch_to_promoted = NULL;
+
+ /* There are a number of things we need to perform against every master. */
+ di = dictGetIterator(instances);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ sentinelHandleRedisInstance(ri);
+ if (ri->flags & SRI_MASTER) {
+ sentinelHandleDictOfRedisInstances(ri->slaves);
+ sentinelHandleDictOfRedisInstances(ri->sentinels);
+ if (ri->failover_state == SENTINEL_FAILOVER_STATE_UPDATE_CONFIG) {
+ switch_to_promoted = ri;
+ }
+ }
+ }
+ if (switch_to_promoted)
+ sentinelFailoverSwitchToPromotedSlave(switch_to_promoted);
+ dictReleaseIterator(di);
+}
+
+/* This function checks if we need to enter the TILT mode.
+ *
+ * The TILT mode is entered if we detect that between two invocations of the
+ * timer interrupt, a negative amount of time, or too much time has passed.
+ * Note that we expect that more or less just 100 milliseconds will pass
+ * if everything is fine. However we'll see a negative number or a
+ * difference bigger than SENTINEL_TILT_TRIGGER milliseconds if one of the
+ * following conditions happen:
+ *
+ * 1) The Sentinel process for some time is blocked, for every kind of
+ * random reason: the load is huge, the computer was frozen for some time
+ * in I/O or alike, the process was stopped by a signal. Everything.
+ * 2) The system clock was altered significantly.
+ *
+ * Under both this conditions we'll see everything as timed out and failing
+ * without good reasons. Instead we enter the TILT mode and wait
+ * for SENTINEL_TILT_PERIOD to elapse before starting to act again.
+ *
+ * During TILT time we still collect information, we just do not act. */
+void sentinelCheckTiltCondition(void) {
+ mstime_t now = mstime();
+ mstime_t delta = now - sentinel.previous_time;
+
+ if (delta < 0 || delta > sentinel_tilt_trigger) {
+ sentinel.tilt = 1;
+ sentinel.tilt_start_time = mstime();
+ sentinelEvent(LL_WARNING,"+tilt",NULL,"#tilt mode entered");
+ }
+ sentinel.previous_time = mstime();
+}
+
+void sentinelTimer(void) {
+ sentinelCheckTiltCondition();
+ sentinelHandleDictOfRedisInstances(sentinel.masters);
+ sentinelRunPendingScripts();
+ sentinelCollectTerminatedScripts();
+ sentinelKillTimedoutScripts();
+
+ /* We continuously change the frequency of the Redis "timer interrupt"
+ * in order to desynchronize every Sentinel from every other.
+ * This non-determinism avoids that Sentinels started at the same time
+ * exactly continue to stay synchronized asking to be voted at the
+ * same time again and again (resulting in nobody likely winning the
+ * election because of split brain voting). */
+ server.hz = CONFIG_DEFAULT_HZ + rand() % CONFIG_DEFAULT_HZ;
+}
diff --git a/src/server.c b/src/server.c
new file mode 100644
index 0000000..438325f
--- /dev/null
+++ b/src/server.c
@@ -0,0 +1,7365 @@
+/*
+ * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "monotonic.h"
+#include "cluster.h"
+#include "slowlog.h"
+#include "bio.h"
+#include "latency.h"
+#include "atomicvar.h"
+#include "mt19937-64.h"
+#include "functions.h"
+#include "hdr_histogram.h"
+#include "syscheck.h"
+
+#include <time.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <assert.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <arpa/inet.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+#include <sys/utsname.h>
+#include <locale.h>
+#include <sys/socket.h>
+
+#ifdef __linux__
+#include <sys/mman.h>
+#endif
+
+#if defined(HAVE_SYSCTL_KIPC_SOMAXCONN) || defined(HAVE_SYSCTL_KERN_SOMAXCONN)
+#include <sys/sysctl.h>
+#endif
+
+/* Our shared "common" objects */
+
+struct sharedObjectsStruct shared;
+
+/* Global vars that are actually used as constants. The following double
+ * values are used for double on-disk serialization, and are initialized
+ * at runtime to avoid strange compiler optimizations. */
+
+double R_Zero, R_PosInf, R_NegInf, R_Nan;
+
+/*================================= Globals ================================= */
+
+/* Global vars */
+struct redisServer server; /* Server global state */
+
+/*============================ Internal prototypes ========================== */
+
+static inline int isShutdownInitiated(void);
+int isReadyToShutdown(void);
+int finishShutdown(void);
+const char *replstateToString(int replstate);
+
+/*============================ Utility functions ============================ */
+
+/* This macro tells if we are in the context of loading an AOF. */
+#define isAOFLoadingContext() \
+ ((server.current_client && server.current_client->id == CLIENT_ID_AOF) ? 1 : 0)
+
+/* We use a private localtime implementation which is fork-safe. The logging
+ * function of Redis may be called from other threads. */
+void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst);
+
+/* Low level logging. To use only for very big messages, otherwise
+ * serverLog() is to prefer. */
+void serverLogRaw(int level, const char *msg) {
+ const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
+ const char *c = ".-*#";
+ FILE *fp;
+ char buf[64];
+ int rawmode = (level & LL_RAW);
+ int log_to_stdout = server.logfile[0] == '\0';
+
+ level &= 0xff; /* clear flags */
+ if (level < server.verbosity) return;
+
+ fp = log_to_stdout ? stdout : fopen(server.logfile,"a");
+ if (!fp) return;
+
+ if (rawmode) {
+ fprintf(fp,"%s",msg);
+ } else {
+ int off;
+ struct timeval tv;
+ int role_char;
+ pid_t pid = getpid();
+
+ gettimeofday(&tv,NULL);
+ struct tm tm;
+ nolocks_localtime(&tm,tv.tv_sec,server.timezone,server.daylight_active);
+ off = strftime(buf,sizeof(buf),"%d %b %Y %H:%M:%S.",&tm);
+ snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
+ if (server.sentinel_mode) {
+ role_char = 'X'; /* Sentinel. */
+ } else if (pid != server.pid) {
+ role_char = 'C'; /* RDB / AOF writing child. */
+ } else {
+ role_char = (server.masterhost ? 'S':'M'); /* Slave or Master. */
+ }
+ fprintf(fp,"%d:%c %s %c %s\n",
+ (int)getpid(),role_char, buf,c[level],msg);
+ }
+ fflush(fp);
+
+ if (!log_to_stdout) fclose(fp);
+ if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
+}
+
+/* Like serverLogRaw() but with printf-alike support. This is the function that
+ * is used across the code. The raw version is only used in order to dump
+ * the INFO output on crash. */
+void _serverLog(int level, const char *fmt, ...) {
+ va_list ap;
+ char msg[LOG_MAX_LEN];
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ serverLogRaw(level,msg);
+}
+
+/* Log a fixed message without printf-alike capabilities, in a way that is
+ * safe to call from a signal handler.
+ *
+ * We actually use this only for signals that are not fatal from the point
+ * of view of Redis. Signals that are going to kill the server anyway and
+ * where we need printf-alike features are served by serverLog(). */
+void serverLogFromHandler(int level, const char *msg) {
+ int fd;
+ int log_to_stdout = server.logfile[0] == '\0';
+ char buf[64];
+
+ if ((level&0xff) < server.verbosity || (log_to_stdout && server.daemonize))
+ return;
+ fd = log_to_stdout ? STDOUT_FILENO :
+ open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644);
+ if (fd == -1) return;
+ ll2string(buf,sizeof(buf),getpid());
+ if (write(fd,buf,strlen(buf)) == -1) goto err;
+ if (write(fd,":signal-handler (",17) == -1) goto err;
+ ll2string(buf,sizeof(buf),time(NULL));
+ if (write(fd,buf,strlen(buf)) == -1) goto err;
+ if (write(fd,") ",2) == -1) goto err;
+ if (write(fd,msg,strlen(msg)) == -1) goto err;
+ if (write(fd,"\n",1) == -1) goto err;
+err:
+ if (!log_to_stdout) close(fd);
+}
+
+/* Return the UNIX time in microseconds */
+long long ustime(void) {
+ struct timeval tv;
+ long long ust;
+
+ gettimeofday(&tv, NULL);
+ ust = ((long long)tv.tv_sec)*1000000;
+ ust += tv.tv_usec;
+ return ust;
+}
+
+/* Return the UNIX time in milliseconds */
+mstime_t mstime(void) {
+ return ustime()/1000;
+}
+
+/* Return the command time snapshot in milliseconds.
+ * The time the command started is the logical time it runs,
+ * and all the time readings during the execution time should
+ * reflect the same time.
+ * More details can be found in the comments below. */
+mstime_t commandTimeSnapshot(void) {
+ /* When we are in the middle of a command execution, we want to use a
+ * reference time that does not change: in that case we just use the
+ * cached time, that we update before each call in the call() function.
+ * This way we avoid that commands such as RPOPLPUSH or similar, that
+ * may re-open the same key multiple times, can invalidate an already
+ * open object in a next call, if the next call will see the key expired,
+ * while the first did not.
+ * This is specifically important in the context of scripts, where we
+ * pretend that time freezes. This way a key can expire only the first time
+ * it is accessed and not in the middle of the script execution, making
+ * propagation to slaves / AOF consistent. See issue #1525 for more info.
+ * Note that we cannot use the cached server.mstime because it can change
+ * in processEventsWhileBlocked etc. */
+ return server.cmd_time_snapshot;
+}
+
+/* After an RDB dump or AOF rewrite we exit from children using _exit() instead of
+ * exit(), because the latter may interact with the same file objects used by
+ * the parent process. However if we are testing the coverage normal exit() is
+ * used in order to obtain the right coverage information. */
+void exitFromChild(int retcode) {
+#ifdef COVERAGE_TEST
+ exit(retcode);
+#else
+ _exit(retcode);
+#endif
+}
+
+/*====================== Hash table type implementation ==================== */
+
+/* This is a hash table type that uses the SDS dynamic strings library as
+ * keys and redis objects as values (objects can hold SDS strings,
+ * lists, sets). */
+
+void dictVanillaFree(dict *d, void *val)
+{
+ UNUSED(d);
+ zfree(val);
+}
+
+void dictListDestructor(dict *d, void *val)
+{
+ UNUSED(d);
+ listRelease((list*)val);
+}
+
+int dictSdsKeyCompare(dict *d, const void *key1,
+ const void *key2)
+{
+ int l1,l2;
+ UNUSED(d);
+
+ l1 = sdslen((sds)key1);
+ l2 = sdslen((sds)key2);
+ if (l1 != l2) return 0;
+ return memcmp(key1, key2, l1) == 0;
+}
+
+/* A case insensitive version used for the command lookup table and other
+ * places where case insensitive non binary-safe comparison is needed. */
+int dictSdsKeyCaseCompare(dict *d, const void *key1,
+ const void *key2)
+{
+ UNUSED(d);
+ return strcasecmp(key1, key2) == 0;
+}
+
+void dictObjectDestructor(dict *d, void *val)
+{
+ UNUSED(d);
+ if (val == NULL) return; /* Lazy freeing will set value to NULL. */
+ decrRefCount(val);
+}
+
+void dictSdsDestructor(dict *d, void *val)
+{
+ UNUSED(d);
+ sdsfree(val);
+}
+
+void *dictSdsDup(dict *d, const void *key) {
+ UNUSED(d);
+ return sdsdup((const sds) key);
+}
+
+int dictObjKeyCompare(dict *d, const void *key1,
+ const void *key2)
+{
+ const robj *o1 = key1, *o2 = key2;
+ return dictSdsKeyCompare(d, o1->ptr,o2->ptr);
+}
+
+uint64_t dictObjHash(const void *key) {
+ const robj *o = key;
+ return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
+}
+
+uint64_t dictSdsHash(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
+}
+
+uint64_t dictSdsCaseHash(const void *key) {
+ return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
+}
+
+/* Dict hash function for null terminated string */
+uint64_t dictCStrHash(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
+}
+
+/* Dict hash function for null terminated string */
+uint64_t dictCStrCaseHash(const void *key) {
+ return dictGenCaseHashFunction((unsigned char*)key, strlen((char*)key));
+}
+
+/* Dict compare function for null terminated string */
+int dictCStrKeyCompare(dict *d, const void *key1, const void *key2) {
+ int l1,l2;
+ UNUSED(d);
+
+ l1 = strlen((char*)key1);
+ l2 = strlen((char*)key2);
+ if (l1 != l2) return 0;
+ return memcmp(key1, key2, l1) == 0;
+}
+
+/* Dict case insensitive compare function for null terminated string */
+int dictCStrKeyCaseCompare(dict *d, const void *key1, const void *key2) {
+ UNUSED(d);
+ return strcasecmp(key1, key2) == 0;
+}
+
+int dictEncObjKeyCompare(dict *d, const void *key1, const void *key2)
+{
+ robj *o1 = (robj*) key1, *o2 = (robj*) key2;
+ int cmp;
+
+ if (o1->encoding == OBJ_ENCODING_INT &&
+ o2->encoding == OBJ_ENCODING_INT)
+ return o1->ptr == o2->ptr;
+
+ /* Due to OBJ_STATIC_REFCOUNT, we avoid calling getDecodedObject() without
+ * good reasons, because it would incrRefCount() the object, which
+ * is invalid. So we check to make sure dictFind() works with static
+ * objects as well. */
+ if (o1->refcount != OBJ_STATIC_REFCOUNT) o1 = getDecodedObject(o1);
+ if (o2->refcount != OBJ_STATIC_REFCOUNT) o2 = getDecodedObject(o2);
+ cmp = dictSdsKeyCompare(d,o1->ptr,o2->ptr);
+ if (o1->refcount != OBJ_STATIC_REFCOUNT) decrRefCount(o1);
+ if (o2->refcount != OBJ_STATIC_REFCOUNT) decrRefCount(o2);
+ return cmp;
+}
+
+uint64_t dictEncObjHash(const void *key) {
+ robj *o = (robj*) key;
+
+ if (sdsEncodedObject(o)) {
+ return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
+ } else if (o->encoding == OBJ_ENCODING_INT) {
+ char buf[32];
+ int len;
+
+ len = ll2string(buf,32,(long)o->ptr);
+ return dictGenHashFunction((unsigned char*)buf, len);
+ } else {
+ serverPanic("Unknown string encoding");
+ }
+}
+
+/* Return 1 if currently we allow dict to expand. Dict may allocate huge
+ * memory to contain hash buckets when dict expands, that may lead redis
+ * rejects user's requests or evicts some keys, we can stop dict to expand
+ * provisionally if used memory will be over maxmemory after dict expands,
+ * but to guarantee the performance of redis, we still allow dict to expand
+ * if dict load factor exceeds HASHTABLE_MAX_LOAD_FACTOR. */
+int dictExpandAllowed(size_t moreMem, double usedRatio) {
+ if (usedRatio <= HASHTABLE_MAX_LOAD_FACTOR) {
+ return !overMaxmemoryAfterAlloc(moreMem);
+ } else {
+ return 1;
+ }
+}
+
+/* Returns the size of the DB dict entry metadata in bytes. In cluster mode, the
+ * metadata is used for constructing a doubly linked list of the dict entries
+ * belonging to the same cluster slot. See the Slot to Key API in cluster.c. */
+size_t dbDictEntryMetadataSize(dict *d) {
+ UNUSED(d);
+ /* NOTICE: this also affects overhead_ht_slot_to_keys in getMemoryOverheadData.
+ * If we ever add non-cluster related data here, that code must be modified too. */
+ return server.cluster_enabled ? sizeof(clusterDictEntryMetadata) : 0;
+}
+
+/* Returns the size of the DB dict metadata in bytes. In cluster mode, we store
+ * a pointer to the db in the main db dict, used for updating the slot-to-key
+ * mapping when a dictEntry is reallocated. */
+size_t dbDictMetadataSize(void) {
+ return server.cluster_enabled ? sizeof(clusterDictMetadata) : 0;
+}
+
+void dbDictAfterReplaceEntry(dict *d, dictEntry *de) {
+ if (server.cluster_enabled) slotToKeyReplaceEntry(d, de);
+}
+
+/* Generic hash table type where keys are Redis Objects, Values
+ * dummy pointers. */
+dictType objectKeyPointerValueDictType = {
+ dictEncObjHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictEncObjKeyCompare, /* key compare */
+ dictObjectDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Like objectKeyPointerValueDictType(), but values can be destroyed, if
+ * not NULL, calling zfree(). */
+dictType objectKeyHeapPointerValueDictType = {
+ dictEncObjHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictEncObjKeyCompare, /* key compare */
+ dictObjectDestructor, /* key destructor */
+ dictVanillaFree, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Set dictionary type. Keys are SDS strings, values are not used. */
+dictType setDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ .no_value = 1, /* no values in this dict */
+ .keys_are_odd = 1 /* an SDS string is always an odd pointer */
+};
+
+/* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
+dictType zsetDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* Note: SDS string shared & freed by skiplist */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Db->dict, keys are sds strings, vals are Redis objects. */
+dictType dbDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictObjectDestructor, /* val destructor */
+ dictExpandAllowed, /* allow to expand */
+ .dictEntryMetadataBytes = dbDictEntryMetadataSize,
+ .dictMetadataBytes = dbDictMetadataSize,
+ .afterReplaceEntry = dbDictAfterReplaceEntry
+};
+
+/* Db->expires */
+dictType dbExpiresDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL, /* val destructor */
+ dictExpandAllowed /* allow to expand */
+};
+
+/* Command table. sds string -> command struct pointer. */
+dictType commandTableDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Hash type hash table (note that small hashes are represented with listpacks) */
+dictType hashDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictSdsDestructor, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Dict type without destructor */
+dictType sdsReplyDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Keylist hash table type has unencoded redis objects as keys and
+ * lists as values. It's used for blocking operations (BLPOP) and to
+ * map swapped keys to a list of clients waiting for this keys to be loaded. */
+dictType keylistDictType = {
+ dictObjHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictObjKeyCompare, /* key compare */
+ dictObjectDestructor, /* key destructor */
+ dictListDestructor, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Modules system dictionary type. Keys are module name,
+ * values are pointer to RedisModule struct. */
+dictType modulesDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Migrate cache dict type. */
+dictType migrateCacheDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Dict for for case-insensitive search using null terminated C strings.
+ * The keys stored in dict are sds though. */
+dictType stringSetDictType = {
+ dictCStrCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictCStrKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Dict for for case-insensitive search using null terminated C strings.
+ * The key and value do not have a destructor. */
+dictType externalStringType = {
+ dictCStrCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictCStrKeyCaseCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* Dict for case-insensitive search using sds objects with a zmalloc
+ * allocated object as the value. */
+dictType sdsHashDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictVanillaFree, /* val destructor */
+ NULL /* allow to expand */
+};
+
+int htNeedsResize(dict *dict) {
+ long long size, used;
+
+ size = dictSlots(dict);
+ used = dictSize(dict);
+ return (size > DICT_HT_INITIAL_SIZE &&
+ (used*100/size < HASHTABLE_MIN_FILL));
+}
+
+/* If the percentage of used slots in the HT reaches HASHTABLE_MIN_FILL
+ * we resize the hash table to save memory */
+void tryResizeHashTables(int dbid) {
+ if (htNeedsResize(server.db[dbid].dict))
+ dictResize(server.db[dbid].dict);
+ if (htNeedsResize(server.db[dbid].expires))
+ dictResize(server.db[dbid].expires);
+}
+
+/* Our hash table implementation performs rehashing incrementally while
+ * we write/read from the hash table. Still if the server is idle, the hash
+ * table will use two tables for a long time. So we try to use 1 millisecond
+ * of CPU time at every call of this function to perform some rehashing.
+ *
+ * The function returns 1 if some rehashing was performed, otherwise 0
+ * is returned. */
+int incrementallyRehash(int dbid) {
+ /* Keys dictionary */
+ if (dictIsRehashing(server.db[dbid].dict)) {
+ dictRehashMilliseconds(server.db[dbid].dict,1);
+ return 1; /* already used our millisecond for this loop... */
+ }
+ /* Expires */
+ if (dictIsRehashing(server.db[dbid].expires)) {
+ dictRehashMilliseconds(server.db[dbid].expires,1);
+ return 1; /* already used our millisecond for this loop... */
+ }
+ return 0;
+}
+
+/* This function is called once a background process of some kind terminates,
+ * as we want to avoid resizing the hash tables when there is a child in order
+ * to play well with copy-on-write (otherwise when a resize happens lots of
+ * memory pages are copied). The goal of this function is to update the ability
+ * for dict.c to resize or rehash the tables accordingly to the fact we have an
+ * active fork child running. */
+void updateDictResizePolicy(void) {
+ if (server.in_fork_child != CHILD_TYPE_NONE)
+ dictSetResizeEnabled(DICT_RESIZE_FORBID);
+ else if (hasActiveChildProcess())
+ dictSetResizeEnabled(DICT_RESIZE_AVOID);
+ else
+ dictSetResizeEnabled(DICT_RESIZE_ENABLE);
+}
+
+const char *strChildType(int type) {
+ switch(type) {
+ case CHILD_TYPE_RDB: return "RDB";
+ case CHILD_TYPE_AOF: return "AOF";
+ case CHILD_TYPE_LDB: return "LDB";
+ case CHILD_TYPE_MODULE: return "MODULE";
+ default: return "Unknown";
+ }
+}
+
+/* Return true if there are active children processes doing RDB saving,
+ * AOF rewriting, or some side process spawned by a loaded module. */
+int hasActiveChildProcess(void) {
+ return server.child_pid != -1;
+}
+
+void resetChildState(void) {
+ server.child_type = CHILD_TYPE_NONE;
+ server.child_pid = -1;
+ server.stat_current_cow_peak = 0;
+ server.stat_current_cow_bytes = 0;
+ server.stat_current_cow_updated = 0;
+ server.stat_current_save_keys_processed = 0;
+ server.stat_module_progress = 0;
+ server.stat_current_save_keys_total = 0;
+ updateDictResizePolicy();
+ closeChildInfoPipe();
+ moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD,
+ REDISMODULE_SUBEVENT_FORK_CHILD_DIED,
+ NULL);
+}
+
+/* Return if child type is mutually exclusive with other fork children */
+int isMutuallyExclusiveChildType(int type) {
+ return type == CHILD_TYPE_RDB || type == CHILD_TYPE_AOF || type == CHILD_TYPE_MODULE;
+}
+
+/* Returns true when we're inside a long command that yielded to the event loop. */
+int isInsideYieldingLongCommand(void) {
+ return scriptIsTimedout() || server.busy_module_yield_flags;
+}
+
+/* Return true if this instance has persistence completely turned off:
+ * both RDB and AOF are disabled. */
+int allPersistenceDisabled(void) {
+ return server.saveparamslen == 0 && server.aof_state == AOF_OFF;
+}
+
+/* ======================= Cron: called every 100 ms ======================== */
+
+/* Add a sample to the instantaneous metric. This function computes the quotient
+ * of the increment of value and base, which is useful to record operation count
+ * per second, or the average time consumption of an operation.
+ *
+ * current_value - The dividend
+ * current_base - The divisor
+ * */
+void trackInstantaneousMetric(int metric, long long current_value, long long current_base, long long factor) {
+ if (server.inst_metric[metric].last_sample_base > 0) {
+ long long base = current_base - server.inst_metric[metric].last_sample_base;
+ long long value = current_value - server.inst_metric[metric].last_sample_value;
+ long long avg = base > 0 ? (value * factor / base) : 0;
+ server.inst_metric[metric].samples[server.inst_metric[metric].idx] = avg;
+ server.inst_metric[metric].idx++;
+ server.inst_metric[metric].idx %= STATS_METRIC_SAMPLES;
+ }
+ server.inst_metric[metric].last_sample_base = current_base;
+ server.inst_metric[metric].last_sample_value = current_value;
+}
+
+/* Return the mean of all the samples. */
+long long getInstantaneousMetric(int metric) {
+ int j;
+ long long sum = 0;
+
+ for (j = 0; j < STATS_METRIC_SAMPLES; j++)
+ sum += server.inst_metric[metric].samples[j];
+ return sum / STATS_METRIC_SAMPLES;
+}
+
+/* The client query buffer is an sds.c string that can end with a lot of
+ * free space not used, this function reclaims space if needed.
+ *
+ * The function always returns 0 as it never terminates the client. */
+int clientsCronResizeQueryBuffer(client *c) {
+ size_t querybuf_size = sdsalloc(c->querybuf);
+ time_t idletime = server.unixtime - c->lastinteraction;
+
+ /* Only resize the query buffer if the buffer is actually wasting at least a
+ * few kbytes */
+ if (sdsavail(c->querybuf) > 1024*4) {
+ /* There are two conditions to resize the query buffer: */
+ if (idletime > 2) {
+ /* 1) Query is idle for a long time. */
+ c->querybuf = sdsRemoveFreeSpace(c->querybuf, 1);
+ } else if (querybuf_size > PROTO_RESIZE_THRESHOLD && querybuf_size/2 > c->querybuf_peak) {
+ /* 2) Query buffer is too big for latest peak and is larger than
+ * resize threshold. Trim excess space but only up to a limit,
+ * not below the recent peak and current c->querybuf (which will
+ * be soon get used). If we're in the middle of a bulk then make
+ * sure not to resize to less than the bulk length. */
+ size_t resize = sdslen(c->querybuf);
+ if (resize < c->querybuf_peak) resize = c->querybuf_peak;
+ if (c->bulklen != -1 && resize < (size_t)c->bulklen + 2) resize = c->bulklen + 2;
+ c->querybuf = sdsResize(c->querybuf, resize, 1);
+ }
+ }
+
+ /* Reset the peak again to capture the peak memory usage in the next
+ * cycle. */
+ c->querybuf_peak = sdslen(c->querybuf);
+ /* We reset to either the current used, or currently processed bulk size,
+ * which ever is bigger. */
+ if (c->bulklen != -1 && (size_t)c->bulklen + 2 > c->querybuf_peak) c->querybuf_peak = c->bulklen + 2;
+ return 0;
+}
+
+/* The client output buffer can be adjusted to better fit the memory requirements.
+ *
+ * the logic is:
+ * in case the last observed peak size of the buffer equals the buffer size - we double the size
+ * in case the last observed peak size of the buffer is less than half the buffer size - we shrink by half.
+ * The buffer peak will be reset back to the buffer position every server.reply_buffer_peak_reset_time milliseconds
+ * The function always returns 0 as it never terminates the client. */
+int clientsCronResizeOutputBuffer(client *c, mstime_t now_ms) {
+
+ size_t new_buffer_size = 0;
+ char *oldbuf = NULL;
+ const size_t buffer_target_shrink_size = c->buf_usable_size/2;
+ const size_t buffer_target_expand_size = c->buf_usable_size*2;
+
+ /* in case the resizing is disabled return immediately */
+ if(!server.reply_buffer_resizing_enabled)
+ return 0;
+
+ if (buffer_target_shrink_size >= PROTO_REPLY_MIN_BYTES &&
+ c->buf_peak < buffer_target_shrink_size )
+ {
+ new_buffer_size = max(PROTO_REPLY_MIN_BYTES,c->buf_peak+1);
+ server.stat_reply_buffer_shrinks++;
+ } else if (buffer_target_expand_size < PROTO_REPLY_CHUNK_BYTES*2 &&
+ c->buf_peak == c->buf_usable_size)
+ {
+ new_buffer_size = min(PROTO_REPLY_CHUNK_BYTES,buffer_target_expand_size);
+ server.stat_reply_buffer_expands++;
+ }
+
+ serverAssertWithInfo(c, NULL, (!new_buffer_size) || (new_buffer_size >= (size_t)c->bufpos));
+
+ /* reset the peak value each server.reply_buffer_peak_reset_time seconds. in case the client will be idle
+ * it will start to shrink.
+ */
+ if (server.reply_buffer_peak_reset_time >=0 &&
+ now_ms - c->buf_peak_last_reset_time >= server.reply_buffer_peak_reset_time)
+ {
+ c->buf_peak = c->bufpos;
+ c->buf_peak_last_reset_time = now_ms;
+ }
+
+ if (new_buffer_size) {
+ oldbuf = c->buf;
+ c->buf = zmalloc_usable(new_buffer_size, &c->buf_usable_size);
+ memcpy(c->buf,oldbuf,c->bufpos);
+ zfree(oldbuf);
+ }
+ return 0;
+}
+
+/* This function is used in order to track clients using the biggest amount
+ * of memory in the latest few seconds. This way we can provide such information
+ * in the INFO output (clients section), without having to do an O(N) scan for
+ * all the clients.
+ *
+ * This is how it works. We have an array of CLIENTS_PEAK_MEM_USAGE_SLOTS slots
+ * where we track, for each, the biggest client output and input buffers we
+ * saw in that slot. Every slot corresponds to one of the latest seconds, since
+ * the array is indexed by doing UNIXTIME % CLIENTS_PEAK_MEM_USAGE_SLOTS.
+ *
+ * When we want to know what was recently the peak memory usage, we just scan
+ * such few slots searching for the maximum value. */
+#define CLIENTS_PEAK_MEM_USAGE_SLOTS 8
+size_t ClientsPeakMemInput[CLIENTS_PEAK_MEM_USAGE_SLOTS] = {0};
+size_t ClientsPeakMemOutput[CLIENTS_PEAK_MEM_USAGE_SLOTS] = {0};
+
+int clientsCronTrackExpansiveClients(client *c, int time_idx) {
+ size_t in_usage = sdsZmallocSize(c->querybuf) + c->argv_len_sum +
+ (c->argv ? zmalloc_size(c->argv) : 0);
+ size_t out_usage = getClientOutputBufferMemoryUsage(c);
+
+ /* Track the biggest values observed so far in this slot. */
+ if (in_usage > ClientsPeakMemInput[time_idx]) ClientsPeakMemInput[time_idx] = in_usage;
+ if (out_usage > ClientsPeakMemOutput[time_idx]) ClientsPeakMemOutput[time_idx] = out_usage;
+
+ return 0; /* This function never terminates the client. */
+}
+
+/* All normal clients are placed in one of the "mem usage buckets" according
+ * to how much memory they currently use. We use this function to find the
+ * appropriate bucket based on a given memory usage value. The algorithm simply
+ * does a log2(mem) to ge the bucket. This means, for examples, that if a
+ * client's memory usage doubles it's moved up to the next bucket, if it's
+ * halved we move it down a bucket.
+ * For more details see CLIENT_MEM_USAGE_BUCKETS documentation in server.h. */
+static inline clientMemUsageBucket *getMemUsageBucket(size_t mem) {
+ int size_in_bits = 8*(int)sizeof(mem);
+ int clz = mem > 0 ? __builtin_clzl(mem) : size_in_bits;
+ int bucket_idx = size_in_bits - clz;
+ if (bucket_idx > CLIENT_MEM_USAGE_BUCKET_MAX_LOG)
+ bucket_idx = CLIENT_MEM_USAGE_BUCKET_MAX_LOG;
+ else if (bucket_idx < CLIENT_MEM_USAGE_BUCKET_MIN_LOG)
+ bucket_idx = CLIENT_MEM_USAGE_BUCKET_MIN_LOG;
+ bucket_idx -= CLIENT_MEM_USAGE_BUCKET_MIN_LOG;
+ return &server.client_mem_usage_buckets[bucket_idx];
+}
+
+/*
+ * This method updates the client memory usage and update the
+ * server stats for client type.
+ *
+ * This method is called from the clientsCron to have updated
+ * stats for non CLIENT_TYPE_NORMAL/PUBSUB clients to accurately
+ * provide information around clients memory usage.
+ *
+ * It is also used in updateClientMemUsageAndBucket to have latest
+ * client memory usage information to place it into appropriate client memory
+ * usage bucket.
+ */
+void updateClientMemoryUsage(client *c) {
+ size_t mem = getClientMemoryUsage(c, NULL);
+ int type = getClientType(c);
+ /* Now that we have the memory used by the client, remove the old
+ * value from the old category, and add it back. */
+ server.stat_clients_type_memory[c->last_memory_type] -= c->last_memory_usage;
+ server.stat_clients_type_memory[type] += mem;
+ /* Remember what we added and where, to remove it next time. */
+ c->last_memory_type = type;
+ c->last_memory_usage = mem;
+}
+
+int clientEvictionAllowed(client *c) {
+ if (server.maxmemory_clients == 0 || c->flags & CLIENT_NO_EVICT) {
+ return 0;
+ }
+ int type = getClientType(c);
+ return (type == CLIENT_TYPE_NORMAL || type == CLIENT_TYPE_PUBSUB);
+}
+
+
+/* This function is used to cleanup the client's previously tracked memory usage.
+ * This is called during incremental client memory usage tracking as well as
+ * used to reset when client to bucket allocation is not required when
+ * client eviction is disabled. */
+void removeClientFromMemUsageBucket(client *c, int allow_eviction) {
+ if (c->mem_usage_bucket) {
+ c->mem_usage_bucket->mem_usage_sum -= c->last_memory_usage;
+ /* If this client can't be evicted then remove it from the mem usage
+ * buckets */
+ if (!allow_eviction) {
+ listDelNode(c->mem_usage_bucket->clients, c->mem_usage_bucket_node);
+ c->mem_usage_bucket = NULL;
+ c->mem_usage_bucket_node = NULL;
+ }
+ }
+}
+
+/* This is called only if explicit clients when something changed their buffers,
+ * so we can track clients' memory and enforce clients' maxmemory in real time.
+ *
+ * This also adds the client to the correct memory usage bucket. Each bucket contains
+ * all clients with roughly the same amount of memory. This way we group
+ * together clients consuming about the same amount of memory and can quickly
+ * free them in case we reach maxmemory-clients (client eviction).
+ *
+ * Note: This function filters clients of type no-evict, master or replica regardless
+ * of whether the eviction is enabled or not, so the memory usage we get from these
+ * types of clients via the INFO command may be out of date.
+ *
+ * returns 1 if client eviction for this client is allowed, 0 otherwise.
+ */
+int updateClientMemUsageAndBucket(client *c) {
+ serverAssert(io_threads_op == IO_THREADS_OP_IDLE);
+ int allow_eviction = clientEvictionAllowed(c);
+ removeClientFromMemUsageBucket(c, allow_eviction);
+
+ if (!allow_eviction) {
+ return 0;
+ }
+
+ /* Update client memory usage. */
+ updateClientMemoryUsage(c);
+
+ /* Update the client in the mem usage buckets */
+ clientMemUsageBucket *bucket = getMemUsageBucket(c->last_memory_usage);
+ bucket->mem_usage_sum += c->last_memory_usage;
+ if (bucket != c->mem_usage_bucket) {
+ if (c->mem_usage_bucket)
+ listDelNode(c->mem_usage_bucket->clients,
+ c->mem_usage_bucket_node);
+ c->mem_usage_bucket = bucket;
+ listAddNodeTail(bucket->clients, c);
+ c->mem_usage_bucket_node = listLast(bucket->clients);
+ }
+ return 1;
+}
+
+/* Return the max samples in the memory usage of clients tracked by
+ * the function clientsCronTrackExpansiveClients(). */
+void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) {
+ size_t i = 0, o = 0;
+ for (int j = 0; j < CLIENTS_PEAK_MEM_USAGE_SLOTS; j++) {
+ if (ClientsPeakMemInput[j] > i) i = ClientsPeakMemInput[j];
+ if (ClientsPeakMemOutput[j] > o) o = ClientsPeakMemOutput[j];
+ }
+ *in_usage = i;
+ *out_usage = o;
+}
+
+/* This function is called by serverCron() and is used in order to perform
+ * operations on clients that are important to perform constantly. For instance
+ * we use this function in order to disconnect clients after a timeout, including
+ * clients blocked in some blocking command with a non-zero timeout.
+ *
+ * The function makes some effort to process all the clients every second, even
+ * if this cannot be strictly guaranteed, since serverCron() may be called with
+ * an actual frequency lower than server.hz in case of latency events like slow
+ * commands.
+ *
+ * It is very important for this function, and the functions it calls, to be
+ * very fast: sometimes Redis has tens of hundreds of connected clients, and the
+ * default server.hz value is 10, so sometimes here we need to process thousands
+ * of clients per second, turning this function into a source of latency.
+ */
+#define CLIENTS_CRON_MIN_ITERATIONS 5
+void clientsCron(void) {
+ /* Try to process at least numclients/server.hz of clients
+ * per call. Since normally (if there are no big latency events) this
+ * function is called server.hz times per second, in the average case we
+ * process all the clients in 1 second. */
+ int numclients = listLength(server.clients);
+ int iterations = numclients/server.hz;
+ mstime_t now = mstime();
+
+ /* Process at least a few clients while we are at it, even if we need
+ * to process less than CLIENTS_CRON_MIN_ITERATIONS to meet our contract
+ * of processing each client once per second. */
+ if (iterations < CLIENTS_CRON_MIN_ITERATIONS)
+ iterations = (numclients < CLIENTS_CRON_MIN_ITERATIONS) ?
+ numclients : CLIENTS_CRON_MIN_ITERATIONS;
+
+
+ int curr_peak_mem_usage_slot = server.unixtime % CLIENTS_PEAK_MEM_USAGE_SLOTS;
+ /* Always zero the next sample, so that when we switch to that second, we'll
+ * only register samples that are greater in that second without considering
+ * the history of such slot.
+ *
+ * Note: our index may jump to any random position if serverCron() is not
+ * called for some reason with the normal frequency, for instance because
+ * some slow command is called taking multiple seconds to execute. In that
+ * case our array may end containing data which is potentially older
+ * than CLIENTS_PEAK_MEM_USAGE_SLOTS seconds: however this is not a problem
+ * since here we want just to track if "recently" there were very expansive
+ * clients from the POV of memory usage. */
+ int zeroidx = (curr_peak_mem_usage_slot+1) % CLIENTS_PEAK_MEM_USAGE_SLOTS;
+ ClientsPeakMemInput[zeroidx] = 0;
+ ClientsPeakMemOutput[zeroidx] = 0;
+
+
+ while(listLength(server.clients) && iterations--) {
+ client *c;
+ listNode *head;
+
+ /* Take the current head, process, and then rotate the head to tail.
+ * This way we can fairly iterate all clients step by step. */
+ head = listFirst(server.clients);
+ c = listNodeValue(head);
+ listRotateHeadToTail(server.clients);
+ /* The following functions do different service checks on the client.
+ * The protocol is that they return non-zero if the client was
+ * terminated. */
+ if (clientsCronHandleTimeout(c,now)) continue;
+ if (clientsCronResizeQueryBuffer(c)) continue;
+ if (clientsCronResizeOutputBuffer(c,now)) continue;
+
+ if (clientsCronTrackExpansiveClients(c, curr_peak_mem_usage_slot)) continue;
+
+ /* Iterating all the clients in getMemoryOverheadData() is too slow and
+ * in turn would make the INFO command too slow. So we perform this
+ * computation incrementally and track the (not instantaneous but updated
+ * to the second) total memory used by clients using clientsCron() in
+ * a more incremental way (depending on server.hz).
+ * If client eviction is enabled, update the bucket as well. */
+ if (!updateClientMemUsageAndBucket(c))
+ updateClientMemoryUsage(c);
+
+ if (closeClientOnOutputBufferLimitReached(c, 0)) continue;
+ }
+}
+
+/* This function handles 'background' operations we are required to do
+ * incrementally in Redis databases, such as active key expiring, resizing,
+ * rehashing. */
+void databasesCron(void) {
+ /* Expire keys by random sampling. Not required for slaves
+ * as master will synthesize DELs for us. */
+ if (server.active_expire_enabled) {
+ if (iAmMaster()) {
+ activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
+ } else {
+ expireSlaveKeys();
+ }
+ }
+
+ /* Defrag keys gradually. */
+ activeDefragCycle();
+
+ /* Perform hash tables rehashing if needed, but only if there are no
+ * other processes saving the DB on disk. Otherwise rehashing is bad
+ * as will cause a lot of copy-on-write of memory pages. */
+ if (!hasActiveChildProcess()) {
+ /* We use global counters so if we stop the computation at a given
+ * DB we'll be able to start from the successive in the next
+ * cron loop iteration. */
+ static unsigned int resize_db = 0;
+ static unsigned int rehash_db = 0;
+ int dbs_per_call = CRON_DBS_PER_CALL;
+ int j;
+
+ /* Don't test more DBs than we have. */
+ if (dbs_per_call > server.dbnum) dbs_per_call = server.dbnum;
+
+ /* Resize */
+ for (j = 0; j < dbs_per_call; j++) {
+ tryResizeHashTables(resize_db % server.dbnum);
+ resize_db++;
+ }
+
+ /* Rehash */
+ if (server.activerehashing) {
+ for (j = 0; j < dbs_per_call; j++) {
+ int work_done = incrementallyRehash(rehash_db);
+ if (work_done) {
+ /* If the function did some work, stop here, we'll do
+ * more at the next cron loop. */
+ break;
+ } else {
+ /* If this db didn't need rehash, we'll try the next one. */
+ rehash_db++;
+ rehash_db %= server.dbnum;
+ }
+ }
+ }
+ }
+}
+
+static inline void updateCachedTimeWithUs(int update_daylight_info, const long long ustime) {
+ server.ustime = ustime;
+ server.mstime = server.ustime / 1000;
+ time_t unixtime = server.mstime / 1000;
+ atomicSet(server.unixtime, unixtime);
+
+ /* To get information about daylight saving time, we need to call
+ * localtime_r and cache the result. However calling localtime_r in this
+ * context is safe since we will never fork() while here, in the main
+ * thread. The logging function will call a thread safe version of
+ * localtime that has no locks. */
+ if (update_daylight_info) {
+ struct tm tm;
+ time_t ut = server.unixtime;
+ localtime_r(&ut,&tm);
+ server.daylight_active = tm.tm_isdst;
+ }
+}
+
+/* We take a cached value of the unix time in the global state because with
+ * virtual memory and aging there is to store the current time in objects at
+ * every object access, and accuracy is not needed. To access a global var is
+ * a lot faster than calling time(NULL).
+ *
+ * This function should be fast because it is called at every command execution
+ * in call(), so it is possible to decide if to update the daylight saving
+ * info or not using the 'update_daylight_info' argument. Normally we update
+ * such info only when calling this function from serverCron() but not when
+ * calling it from call(). */
+void updateCachedTime(int update_daylight_info) {
+ const long long us = ustime();
+ updateCachedTimeWithUs(update_daylight_info, us);
+}
+
+/* Performing required operations in order to enter an execution unit.
+ * In general, if we are already inside an execution unit then there is nothing to do,
+ * otherwise we need to update cache times so the same cached time will be used all over
+ * the execution unit.
+ * update_cached_time - if 0, will not update the cached time even if required.
+ * us - if not zero, use this time for cached time, otherwise get current time. */
+void enterExecutionUnit(int update_cached_time, long long us) {
+ if (server.execution_nesting++ == 0 && update_cached_time) {
+ if (us == 0) {
+ us = ustime();
+ }
+ updateCachedTimeWithUs(0, us);
+ server.cmd_time_snapshot = server.mstime;
+ }
+}
+
+void exitExecutionUnit(void) {
+ --server.execution_nesting;
+}
+
+void checkChildrenDone(void) {
+ int statloc = 0;
+ pid_t pid;
+
+ if ((pid = waitpid(-1, &statloc, WNOHANG)) != 0) {
+ int exitcode = WIFEXITED(statloc) ? WEXITSTATUS(statloc) : -1;
+ int bysignal = 0;
+
+ if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
+
+ /* sigKillChildHandler catches the signal and calls exit(), but we
+ * must make sure not to flag lastbgsave_status, etc incorrectly.
+ * We could directly terminate the child process via SIGUSR1
+ * without handling it */
+ if (exitcode == SERVER_CHILD_NOERROR_RETVAL) {
+ bysignal = SIGUSR1;
+ exitcode = 1;
+ }
+
+ if (pid == -1) {
+ serverLog(LL_WARNING,"waitpid() returned an error: %s. "
+ "child_type: %s, child_pid = %d",
+ strerror(errno),
+ strChildType(server.child_type),
+ (int) server.child_pid);
+ } else if (pid == server.child_pid) {
+ if (server.child_type == CHILD_TYPE_RDB) {
+ backgroundSaveDoneHandler(exitcode, bysignal);
+ } else if (server.child_type == CHILD_TYPE_AOF) {
+ backgroundRewriteDoneHandler(exitcode, bysignal);
+ } else if (server.child_type == CHILD_TYPE_MODULE) {
+ ModuleForkDoneHandler(exitcode, bysignal);
+ } else {
+ serverPanic("Unknown child type %d for child pid %d", server.child_type, server.child_pid);
+ exit(1);
+ }
+ if (!bysignal && exitcode == 0) receiveChildInfo();
+ resetChildState();
+ } else {
+ if (!ldbRemoveChild(pid)) {
+ serverLog(LL_WARNING,
+ "Warning, detected child with unmatched pid: %ld",
+ (long) pid);
+ }
+ }
+
+ /* start any pending forks immediately. */
+ replicationStartPendingFork();
+ }
+}
+
+/* Called from serverCron and cronUpdateMemoryStats to update cached memory metrics. */
+void cronUpdateMemoryStats(void) {
+ /* Record the max memory used since the server was started. */
+ if (zmalloc_used_memory() > server.stat_peak_memory)
+ server.stat_peak_memory = zmalloc_used_memory();
+
+ run_with_period(100) {
+ /* Sample the RSS and other metrics here since this is a relatively slow call.
+ * We must sample the zmalloc_used at the same time we take the rss, otherwise
+ * the frag ratio calculate may be off (ratio of two samples at different times) */
+ server.cron_malloc_stats.process_rss = zmalloc_get_rss();
+ server.cron_malloc_stats.zmalloc_used = zmalloc_used_memory();
+ /* Sampling the allocator info can be slow too.
+ * The fragmentation ratio it'll show is potentially more accurate
+ * it excludes other RSS pages such as: shared libraries, LUA and other non-zmalloc
+ * allocations, and allocator reserved pages that can be pursed (all not actual frag) */
+ zmalloc_get_allocator_info(&server.cron_malloc_stats.allocator_allocated,
+ &server.cron_malloc_stats.allocator_active,
+ &server.cron_malloc_stats.allocator_resident);
+ /* in case the allocator isn't providing these stats, fake them so that
+ * fragmentation info still shows some (inaccurate metrics) */
+ if (!server.cron_malloc_stats.allocator_resident) {
+ /* LUA memory isn't part of zmalloc_used, but it is part of the process RSS,
+ * so we must deduct it in order to be able to calculate correct
+ * "allocator fragmentation" ratio */
+ size_t lua_memory = evalMemory();
+ server.cron_malloc_stats.allocator_resident = server.cron_malloc_stats.process_rss - lua_memory;
+ }
+ if (!server.cron_malloc_stats.allocator_active)
+ server.cron_malloc_stats.allocator_active = server.cron_malloc_stats.allocator_resident;
+ if (!server.cron_malloc_stats.allocator_allocated)
+ server.cron_malloc_stats.allocator_allocated = server.cron_malloc_stats.zmalloc_used;
+ }
+}
+
+/* This is our timer interrupt, called server.hz times per second.
+ * Here is where we do a number of things that need to be done asynchronously.
+ * For instance:
+ *
+ * - Active expired keys collection (it is also performed in a lazy way on
+ * lookup).
+ * - Software watchdog.
+ * - Update some statistic.
+ * - Incremental rehashing of the DBs hash tables.
+ * - Triggering BGSAVE / AOF rewrite, and handling of terminated children.
+ * - Clients timeout of different kinds.
+ * - Replication reconnection.
+ * - Many more...
+ *
+ * Everything directly called here will be called server.hz times per second,
+ * so in order to throttle execution of things we want to do less frequently
+ * a macro is used: run_with_period(milliseconds) { .... }
+ */
+
+int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
+ int j;
+ UNUSED(eventLoop);
+ UNUSED(id);
+ UNUSED(clientData);
+
+ /* Software watchdog: deliver the SIGALRM that will reach the signal
+ * handler if we don't return here fast enough. */
+ if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
+
+ server.hz = server.config_hz;
+ /* Adapt the server.hz value to the number of configured clients. If we have
+ * many clients, we want to call serverCron() with an higher frequency. */
+ if (server.dynamic_hz) {
+ while (listLength(server.clients) / server.hz >
+ MAX_CLIENTS_PER_CLOCK_TICK)
+ {
+ server.hz *= 2;
+ if (server.hz > CONFIG_MAX_HZ) {
+ server.hz = CONFIG_MAX_HZ;
+ break;
+ }
+ }
+ }
+
+ /* for debug purposes: skip actual cron work if pause_cron is on */
+ if (server.pause_cron) return 1000/server.hz;
+
+ monotime cron_start = getMonotonicUs();
+
+ run_with_period(100) {
+ long long stat_net_input_bytes, stat_net_output_bytes;
+ long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
+ atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
+ atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
+ atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
+ atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
+ monotime current_time = getMonotonicUs();
+ long long factor = 1000000; // us
+ trackInstantaneousMetric(STATS_METRIC_COMMAND, server.stat_numcommands, current_time, factor);
+ trackInstantaneousMetric(STATS_METRIC_NET_INPUT, stat_net_input_bytes + stat_net_repl_input_bytes,
+ current_time, factor);
+ trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT, stat_net_output_bytes + stat_net_repl_output_bytes,
+ current_time, factor);
+ trackInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION, stat_net_repl_input_bytes, current_time,
+ factor);
+ trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION, stat_net_repl_output_bytes,
+ current_time, factor);
+ trackInstantaneousMetric(STATS_METRIC_EL_CYCLE, server.duration_stats[EL_DURATION_TYPE_EL].cnt,
+ current_time, factor);
+ trackInstantaneousMetric(STATS_METRIC_EL_DURATION, server.duration_stats[EL_DURATION_TYPE_EL].sum,
+ server.duration_stats[EL_DURATION_TYPE_EL].cnt, 1);
+ }
+
+ /* We have just LRU_BITS bits per object for LRU information.
+ * So we use an (eventually wrapping) LRU clock.
+ *
+ * Note that even if the counter wraps it's not a big problem,
+ * everything will still work but some object will appear younger
+ * to Redis. However for this to happen a given object should never be
+ * touched for all the time needed to the counter to wrap, which is
+ * not likely.
+ *
+ * Note that you can change the resolution altering the
+ * LRU_CLOCK_RESOLUTION define. */
+ server.lruclock = getLRUClock();
+
+ cronUpdateMemoryStats();
+
+ /* We received a SIGTERM or SIGINT, shutting down here in a safe way, as it is
+ * not ok doing so inside the signal handler. */
+ if (server.shutdown_asap && !isShutdownInitiated()) {
+ int shutdownFlags = SHUTDOWN_NOFLAGS;
+ if (server.last_sig_received == SIGINT && server.shutdown_on_sigint)
+ shutdownFlags = server.shutdown_on_sigint;
+ else if (server.last_sig_received == SIGTERM && server.shutdown_on_sigterm)
+ shutdownFlags = server.shutdown_on_sigterm;
+
+ if (prepareForShutdown(shutdownFlags) == C_OK) exit(0);
+ } else if (isShutdownInitiated()) {
+ if (server.mstime >= server.shutdown_mstime || isReadyToShutdown()) {
+ if (finishShutdown() == C_OK) exit(0);
+ /* Shutdown failed. Continue running. An error has been logged. */
+ }
+ }
+
+ /* Show some info about non-empty databases */
+ if (server.verbosity <= LL_VERBOSE) {
+ run_with_period(5000) {
+ for (j = 0; j < server.dbnum; j++) {
+ long long size, used, vkeys;
+
+ size = dictSlots(server.db[j].dict);
+ used = dictSize(server.db[j].dict);
+ vkeys = dictSize(server.db[j].expires);
+ if (used || vkeys) {
+ serverLog(LL_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
+ }
+ }
+ }
+ }
+
+ /* Show information about connected clients */
+ if (!server.sentinel_mode) {
+ run_with_period(5000) {
+ serverLog(LL_DEBUG,
+ "%lu clients connected (%lu replicas), %zu bytes in use",
+ listLength(server.clients)-listLength(server.slaves),
+ listLength(server.slaves),
+ zmalloc_used_memory());
+ }
+ }
+
+ /* We need to do a few operations on clients asynchronously. */
+ clientsCron();
+
+ /* Handle background operations on Redis databases. */
+ databasesCron();
+
+ /* Start a scheduled AOF rewrite if this was requested by the user while
+ * a BGSAVE was in progress. */
+ if (!hasActiveChildProcess() &&
+ server.aof_rewrite_scheduled &&
+ !aofRewriteLimited())
+ {
+ rewriteAppendOnlyFileBackground();
+ }
+
+ /* Check if a background saving or AOF rewrite in progress terminated. */
+ if (hasActiveChildProcess() || ldbPendingChildren())
+ {
+ run_with_period(1000) receiveChildInfo();
+ checkChildrenDone();
+ } else {
+ /* If there is not a background saving/rewrite in progress check if
+ * we have to save/rewrite now. */
+ for (j = 0; j < server.saveparamslen; j++) {
+ struct saveparam *sp = server.saveparams+j;
+
+ /* Save if we reached the given amount of changes,
+ * the given amount of seconds, and if the latest bgsave was
+ * successful or if, in case of an error, at least
+ * CONFIG_BGSAVE_RETRY_DELAY seconds already elapsed. */
+ if (server.dirty >= sp->changes &&
+ server.unixtime-server.lastsave > sp->seconds &&
+ (server.unixtime-server.lastbgsave_try >
+ CONFIG_BGSAVE_RETRY_DELAY ||
+ server.lastbgsave_status == C_OK))
+ {
+ serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
+ sp->changes, (int)sp->seconds);
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ rdbSaveBackground(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE);
+ break;
+ }
+ }
+
+ /* Trigger an AOF rewrite if needed. */
+ if (server.aof_state == AOF_ON &&
+ !hasActiveChildProcess() &&
+ server.aof_rewrite_perc &&
+ server.aof_current_size > server.aof_rewrite_min_size)
+ {
+ long long base = server.aof_rewrite_base_size ?
+ server.aof_rewrite_base_size : 1;
+ long long growth = (server.aof_current_size*100/base) - 100;
+ if (growth >= server.aof_rewrite_perc && !aofRewriteLimited()) {
+ serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
+ rewriteAppendOnlyFileBackground();
+ }
+ }
+ }
+ /* Just for the sake of defensive programming, to avoid forgetting to
+ * call this function when needed. */
+ updateDictResizePolicy();
+
+
+ /* AOF postponed flush: Try at every cron cycle if the slow fsync
+ * completed. */
+ if ((server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) &&
+ server.aof_flush_postponed_start)
+ {
+ flushAppendOnlyFile(0);
+ }
+
+ /* AOF write errors: in this case we have a buffer to flush as well and
+ * clear the AOF error in case of success to make the DB writable again,
+ * however to try every second is enough in case of 'hz' is set to
+ * a higher frequency. */
+ run_with_period(1000) {
+ if ((server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) &&
+ server.aof_last_write_status == C_ERR)
+ {
+ flushAppendOnlyFile(0);
+ }
+ }
+
+ /* Clear the paused actions state if needed. */
+ updatePausedActions();
+
+ /* Replication cron function -- used to reconnect to master,
+ * detect transfer failures, start background RDB transfers and so forth.
+ *
+ * If Redis is trying to failover then run the replication cron faster so
+ * progress on the handshake happens more quickly. */
+ if (server.failover_state != NO_FAILOVER) {
+ run_with_period(100) replicationCron();
+ } else {
+ run_with_period(1000) replicationCron();
+ }
+
+ /* Run the Redis Cluster cron. */
+ run_with_period(100) {
+ if (server.cluster_enabled) clusterCron();
+ }
+
+ /* Run the Sentinel timer if we are in sentinel mode. */
+ if (server.sentinel_mode) sentinelTimer();
+
+ /* Cleanup expired MIGRATE cached sockets. */
+ run_with_period(1000) {
+ migrateCloseTimedoutSockets();
+ }
+
+ /* Stop the I/O threads if we don't have enough pending work. */
+ stopThreadedIOIfNeeded();
+
+ /* Resize tracking keys table if needed. This is also done at every
+ * command execution, but we want to be sure that if the last command
+ * executed changes the value via CONFIG SET, the server will perform
+ * the operation even if completely idle. */
+ if (server.tracking_clients) trackingLimitUsedSlots();
+
+ /* Start a scheduled BGSAVE if the corresponding flag is set. This is
+ * useful when we are forced to postpone a BGSAVE because an AOF
+ * rewrite is in progress.
+ *
+ * Note: this code must be after the replicationCron() call above so
+ * make sure when refactoring this file to keep this order. This is useful
+ * because we want to give priority to RDB savings for replication. */
+ if (!hasActiveChildProcess() &&
+ server.rdb_bgsave_scheduled &&
+ (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY ||
+ server.lastbgsave_status == C_OK))
+ {
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ if (rdbSaveBackground(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE) == C_OK)
+ server.rdb_bgsave_scheduled = 0;
+ }
+
+ run_with_period(100) {
+ if (moduleCount()) modulesCron();
+ }
+
+ /* Fire the cron loop modules event. */
+ RedisModuleCronLoopV1 ei = {REDISMODULE_CRON_LOOP_VERSION,server.hz};
+ moduleFireServerEvent(REDISMODULE_EVENT_CRON_LOOP,
+ 0,
+ &ei);
+
+ server.cronloops++;
+
+ server.el_cron_duration = getMonotonicUs() - cron_start;
+
+ return 1000/server.hz;
+}
+
+
+void blockingOperationStarts(void) {
+ if(!server.blocking_op_nesting++){
+ updateCachedTime(0);
+ server.blocked_last_cron = server.mstime;
+ }
+}
+
+void blockingOperationEnds(void) {
+ if(!(--server.blocking_op_nesting)){
+ server.blocked_last_cron = 0;
+ }
+}
+
+/* This function fills in the role of serverCron during RDB or AOF loading, and
+ * also during blocked scripts.
+ * It attempts to do its duties at a similar rate as the configured server.hz,
+ * and updates cronloops variable so that similarly to serverCron, the
+ * run_with_period can be used. */
+void whileBlockedCron(void) {
+ /* Here we may want to perform some cron jobs (normally done server.hz times
+ * per second). */
+
+ /* Since this function depends on a call to blockingOperationStarts, let's
+ * make sure it was done. */
+ serverAssert(server.blocked_last_cron);
+
+ /* In case we where called too soon, leave right away. This way one time
+ * jobs after the loop below don't need an if. and we don't bother to start
+ * latency monitor if this function is called too often. */
+ if (server.blocked_last_cron >= server.mstime)
+ return;
+
+ mstime_t latency;
+ latencyStartMonitor(latency);
+
+ /* In some cases we may be called with big intervals, so we may need to do
+ * extra work here. This is because some of the functions in serverCron rely
+ * on the fact that it is performed every 10 ms or so. For instance, if
+ * activeDefragCycle needs to utilize 25% cpu, it will utilize 2.5ms, so we
+ * need to call it multiple times. */
+ long hz_ms = 1000/server.hz;
+ while (server.blocked_last_cron < server.mstime) {
+
+ /* Defrag keys gradually. */
+ activeDefragCycle();
+
+ server.blocked_last_cron += hz_ms;
+
+ /* Increment cronloop so that run_with_period works. */
+ server.cronloops++;
+ }
+
+ /* Other cron jobs do not need to be done in a loop. No need to check
+ * server.blocked_last_cron since we have an early exit at the top. */
+
+ /* Update memory stats during loading (excluding blocked scripts) */
+ if (server.loading) cronUpdateMemoryStats();
+
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("while-blocked-cron",latency);
+
+ /* We received a SIGTERM during loading, shutting down here in a safe way,
+ * as it isn't ok doing so inside the signal handler. */
+ if (server.shutdown_asap && server.loading) {
+ if (prepareForShutdown(SHUTDOWN_NOSAVE) == C_OK) exit(0);
+ serverLog(LL_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
+ server.shutdown_asap = 0;
+ server.last_sig_received = 0;
+ }
+}
+
+static void sendGetackToReplicas(void) {
+ robj *argv[3];
+ argv[0] = shared.replconf;
+ argv[1] = shared.getack;
+ argv[2] = shared.special_asterick; /* Not used argument. */
+ replicationFeedSlaves(server.slaves, -1, argv, 3);
+}
+
+extern int ProcessingEventsWhileBlocked;
+
+/* This function gets called every time Redis is entering the
+ * main loop of the event driven library, that is, before to sleep
+ * for ready file descriptors.
+ *
+ * Note: This function is (currently) called from two functions:
+ * 1. aeMain - The main server loop
+ * 2. processEventsWhileBlocked - Process clients during RDB/AOF load
+ *
+ * If it was called from processEventsWhileBlocked we don't want
+ * to perform all actions (For example, we don't want to expire
+ * keys), but we do need to perform some actions.
+ *
+ * The most important is freeClientsInAsyncFreeQueue but we also
+ * call some other low-risk functions. */
+void beforeSleep(struct aeEventLoop *eventLoop) {
+ UNUSED(eventLoop);
+
+ size_t zmalloc_used = zmalloc_used_memory();
+ if (zmalloc_used > server.stat_peak_memory)
+ server.stat_peak_memory = zmalloc_used;
+
+ /* Just call a subset of vital functions in case we are re-entering
+ * the event loop from processEventsWhileBlocked(). Note that in this
+ * case we keep track of the number of events we are processing, since
+ * processEventsWhileBlocked() wants to stop ASAP if there are no longer
+ * events to handle. */
+ if (ProcessingEventsWhileBlocked) {
+ uint64_t processed = 0;
+ processed += handleClientsWithPendingReadsUsingThreads();
+ processed += connTypeProcessPendingData();
+ if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
+ flushAppendOnlyFile(0);
+ processed += handleClientsWithPendingWrites();
+ processed += freeClientsInAsyncFreeQueue();
+ server.events_processed_while_blocked += processed;
+ return;
+ }
+
+ /* We should handle pending reads clients ASAP after event loop. */
+ handleClientsWithPendingReadsUsingThreads();
+
+ /* Handle pending data(typical TLS). (must be done before flushAppendOnlyFile) */
+ connTypeProcessPendingData();
+
+ /* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
+ aeSetDontWait(server.el, connTypeHasPendingData());
+
+ /* Call the Redis Cluster before sleep function. Note that this function
+ * may change the state of Redis Cluster (from ok to fail or vice versa),
+ * so it's a good idea to call it before serving the unblocked clients
+ * later in this function, must be done before blockedBeforeSleep. */
+ if (server.cluster_enabled) clusterBeforeSleep();
+
+ /* Handle blocked clients.
+ * must be done before flushAppendOnlyFile, in case of appendfsync=always,
+ * since the unblocked clients may write data. */
+ blockedBeforeSleep();
+
+ /* Record cron time in beforeSleep, which is the sum of active-expire, active-defrag and all other
+ * tasks done by cron and beforeSleep, but excluding read, write and AOF, that are counted by other
+ * sets of metrics. */
+ monotime cron_start_time_before_aof = getMonotonicUs();
+
+ /* Run a fast expire cycle (the called function will return
+ * ASAP if a fast cycle is not needed). */
+ if (server.active_expire_enabled && iAmMaster())
+ activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
+
+ if (moduleCount()) {
+ moduleFireServerEvent(REDISMODULE_EVENT_EVENTLOOP,
+ REDISMODULE_SUBEVENT_EVENTLOOP_BEFORE_SLEEP,
+ NULL);
+ }
+
+ /* Send all the slaves an ACK request if at least one client blocked
+ * during the previous event loop iteration. Note that we do this after
+ * processUnblockedClients(), so if there are multiple pipelined WAITs
+ * and the just unblocked WAIT gets blocked again, we don't have to wait
+ * a server cron cycle in absence of other event loop events. See #6623.
+ *
+ * We also don't send the ACKs while clients are paused, since it can
+ * increment the replication backlog, they'll be sent after the pause
+ * if we are still the master. */
+ if (server.get_ack_from_slaves && !isPausedActionsWithUpdate(PAUSE_ACTION_REPLICA)) {
+ sendGetackToReplicas();
+ server.get_ack_from_slaves = 0;
+ }
+
+ /* We may have received updates from clients about their current offset. NOTE:
+ * this can't be done where the ACK is received since failover will disconnect
+ * our clients. */
+ updateFailoverStatus();
+
+ /* Since we rely on current_client to send scheduled invalidation messages
+ * we have to flush them after each command, so when we get here, the list
+ * must be empty. */
+ serverAssert(listLength(server.tracking_pending_keys) == 0);
+ serverAssert(listLength(server.pending_push_messages) == 0);
+
+ /* Send the invalidation messages to clients participating to the
+ * client side caching protocol in broadcasting (BCAST) mode. */
+ trackingBroadcastInvalidationMessages();
+
+ /* Record time consumption of AOF writing. */
+ monotime aof_start_time = getMonotonicUs();
+ /* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing below. */
+ monotime duration_before_aof = aof_start_time - cron_start_time_before_aof;
+
+ /* Write the AOF buffer on disk,
+ * must be done before handleClientsWithPendingWritesUsingThreads,
+ * in case of appendfsync=always. */
+ if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
+ flushAppendOnlyFile(0);
+
+ /* Record time consumption of AOF writing. */
+ durationAddSample(EL_DURATION_TYPE_AOF, getMonotonicUs() - aof_start_time);
+
+ /* Update the fsynced replica offset.
+ * If an initial rewrite is in progress then not all data is guaranteed to have actually been
+ * persisted to disk yet, so we cannot update the field. We will wait for the rewrite to complete. */
+ if (server.aof_state == AOF_ON && server.fsynced_reploff != -1) {
+ long long fsynced_reploff_pending;
+ atomicGet(server.fsynced_reploff_pending, fsynced_reploff_pending);
+ server.fsynced_reploff = fsynced_reploff_pending;
+ }
+
+ /* Handle writes with pending output buffers. */
+ handleClientsWithPendingWritesUsingThreads();
+
+ /* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing above. */
+ monotime cron_start_time_after_write = getMonotonicUs();
+
+ /* Close clients that need to be closed asynchronous */
+ freeClientsInAsyncFreeQueue();
+
+ /* Incrementally trim replication backlog, 10 times the normal speed is
+ * to free replication backlog as much as possible. */
+ if (server.repl_backlog)
+ incrementalTrimReplicationBacklog(10*REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
+
+ /* Disconnect some clients if they are consuming too much memory. */
+ evictClients();
+
+ /* Record cron time in beforeSleep. */
+ monotime duration_after_write = getMonotonicUs() - cron_start_time_after_write;
+
+ /* Record eventloop latency. */
+ if (server.el_start > 0) {
+ monotime el_duration = getMonotonicUs() - server.el_start;
+ durationAddSample(EL_DURATION_TYPE_EL, el_duration);
+ }
+ server.el_cron_duration += duration_before_aof + duration_after_write;
+ durationAddSample(EL_DURATION_TYPE_CRON, server.el_cron_duration);
+ server.el_cron_duration = 0;
+ /* Record max command count per cycle. */
+ if (server.stat_numcommands > server.el_cmd_cnt_start) {
+ long long el_command_cnt = server.stat_numcommands - server.el_cmd_cnt_start;
+ if (el_command_cnt > server.el_cmd_cnt_max) {
+ server.el_cmd_cnt_max = el_command_cnt;
+ }
+ }
+
+ /* Before we are going to sleep, let the threads access the dataset by
+ * releasing the GIL. Redis main thread will not touch anything at this
+ * time. */
+ if (moduleCount()) moduleReleaseGIL();
+ /********************* WARNING ********************
+ * Do NOT add anything below moduleReleaseGIL !!! *
+ ***************************** ********************/
+}
+
+/* This function is called immediately after the event loop multiplexing
+ * API returned, and the control is going to soon return to Redis by invoking
+ * the different events callbacks. */
+void afterSleep(struct aeEventLoop *eventLoop) {
+ UNUSED(eventLoop);
+ /********************* WARNING ********************
+ * Do NOT add anything above moduleAcquireGIL !!! *
+ ***************************** ********************/
+ if (!ProcessingEventsWhileBlocked) {
+ /* Acquire the modules GIL so that their threads won't touch anything. */
+ if (moduleCount()) {
+ mstime_t latency;
+ latencyStartMonitor(latency);
+
+ moduleAcquireGIL();
+ moduleFireServerEvent(REDISMODULE_EVENT_EVENTLOOP,
+ REDISMODULE_SUBEVENT_EVENTLOOP_AFTER_SLEEP,
+ NULL);
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("module-acquire-GIL",latency);
+ }
+ /* Set the eventloop start time. */
+ server.el_start = getMonotonicUs();
+ /* Set the eventloop command count at start. */
+ server.el_cmd_cnt_start = server.stat_numcommands;
+ }
+
+ /* Update the time cache. */
+ updateCachedTime(1);
+
+ /* Update command time snapshot in case it'll be required without a command
+ * e.g. somehow used by module timers. Don't update it while yielding to a
+ * blocked command, call() will handle that and restore the original time. */
+ if (!ProcessingEventsWhileBlocked) {
+ server.cmd_time_snapshot = server.mstime;
+ }
+}
+
+/* =========================== Server initialization ======================== */
+
+void createSharedObjects(void) {
+ int j;
+
+ /* Shared command responses */
+ shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n"));
+ shared.emptybulk = createObject(OBJ_STRING,sdsnew("$0\r\n\r\n"));
+ shared.czero = createObject(OBJ_STRING,sdsnew(":0\r\n"));
+ shared.cone = createObject(OBJ_STRING,sdsnew(":1\r\n"));
+ shared.emptyarray = createObject(OBJ_STRING,sdsnew("*0\r\n"));
+ shared.pong = createObject(OBJ_STRING,sdsnew("+PONG\r\n"));
+ shared.queued = createObject(OBJ_STRING,sdsnew("+QUEUED\r\n"));
+ shared.emptyscan = createObject(OBJ_STRING,sdsnew("*2\r\n$1\r\n0\r\n*0\r\n"));
+ shared.space = createObject(OBJ_STRING,sdsnew(" "));
+ shared.plus = createObject(OBJ_STRING,sdsnew("+"));
+
+ /* Shared command error responses */
+ shared.wrongtypeerr = createObject(OBJ_STRING,sdsnew(
+ "-WRONGTYPE Operation against a key holding the wrong kind of value\r\n"));
+ shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
+ shared.nokeyerr = createObject(OBJ_STRING,sdsnew(
+ "-ERR no such key\r\n"));
+ shared.syntaxerr = createObject(OBJ_STRING,sdsnew(
+ "-ERR syntax error\r\n"));
+ shared.sameobjecterr = createObject(OBJ_STRING,sdsnew(
+ "-ERR source and destination objects are the same\r\n"));
+ shared.outofrangeerr = createObject(OBJ_STRING,sdsnew(
+ "-ERR index out of range\r\n"));
+ shared.noscripterr = createObject(OBJ_STRING,sdsnew(
+ "-NOSCRIPT No matching script. Please use EVAL.\r\n"));
+ shared.loadingerr = createObject(OBJ_STRING,sdsnew(
+ "-LOADING Redis is loading the dataset in memory\r\n"));
+ shared.slowevalerr = createObject(OBJ_STRING,sdsnew(
+ "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
+ shared.slowscripterr = createObject(OBJ_STRING,sdsnew(
+ "-BUSY Redis is busy running a script. You can only call FUNCTION KILL or SHUTDOWN NOSAVE.\r\n"));
+ shared.slowmoduleerr = createObject(OBJ_STRING,sdsnew(
+ "-BUSY Redis is busy running a module command.\r\n"));
+ shared.masterdownerr = createObject(OBJ_STRING,sdsnew(
+ "-MASTERDOWN Link with MASTER is down and replica-serve-stale-data is set to 'no'.\r\n"));
+ shared.bgsaveerr = createObject(OBJ_STRING,sdsnew(
+ "-MISCONF Redis is configured to save RDB snapshots, but it's currently unable to persist to disk. Commands that may modify the data set are disabled, because this instance is configured to report errors during writes if RDB snapshotting fails (stop-writes-on-bgsave-error option). Please check the Redis logs for details about the RDB error.\r\n"));
+ shared.roslaveerr = createObject(OBJ_STRING,sdsnew(
+ "-READONLY You can't write against a read only replica.\r\n"));
+ shared.noautherr = createObject(OBJ_STRING,sdsnew(
+ "-NOAUTH Authentication required.\r\n"));
+ shared.oomerr = createObject(OBJ_STRING,sdsnew(
+ "-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
+ shared.execaborterr = createObject(OBJ_STRING,sdsnew(
+ "-EXECABORT Transaction discarded because of previous errors.\r\n"));
+ shared.noreplicaserr = createObject(OBJ_STRING,sdsnew(
+ "-NOREPLICAS Not enough good replicas to write.\r\n"));
+ shared.busykeyerr = createObject(OBJ_STRING,sdsnew(
+ "-BUSYKEY Target key name already exists.\r\n"));
+
+ /* The shared NULL depends on the protocol version. */
+ shared.null[0] = NULL;
+ shared.null[1] = NULL;
+ shared.null[2] = createObject(OBJ_STRING,sdsnew("$-1\r\n"));
+ shared.null[3] = createObject(OBJ_STRING,sdsnew("_\r\n"));
+
+ shared.nullarray[0] = NULL;
+ shared.nullarray[1] = NULL;
+ shared.nullarray[2] = createObject(OBJ_STRING,sdsnew("*-1\r\n"));
+ shared.nullarray[3] = createObject(OBJ_STRING,sdsnew("_\r\n"));
+
+ shared.emptymap[0] = NULL;
+ shared.emptymap[1] = NULL;
+ shared.emptymap[2] = createObject(OBJ_STRING,sdsnew("*0\r\n"));
+ shared.emptymap[3] = createObject(OBJ_STRING,sdsnew("%0\r\n"));
+
+ shared.emptyset[0] = NULL;
+ shared.emptyset[1] = NULL;
+ shared.emptyset[2] = createObject(OBJ_STRING,sdsnew("*0\r\n"));
+ shared.emptyset[3] = createObject(OBJ_STRING,sdsnew("~0\r\n"));
+
+ for (j = 0; j < PROTO_SHARED_SELECT_CMDS; j++) {
+ char dictid_str[64];
+ int dictid_len;
+
+ dictid_len = ll2string(dictid_str,sizeof(dictid_str),j);
+ shared.select[j] = createObject(OBJ_STRING,
+ sdscatprintf(sdsempty(),
+ "*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
+ dictid_len, dictid_str));
+ }
+ shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
+ shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
+ shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
+ shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
+ shared.ssubscribebulk = createStringObject("$10\r\nssubscribe\r\n", 17);
+ shared.sunsubscribebulk = createStringObject("$12\r\nsunsubscribe\r\n", 19);
+ shared.smessagebulk = createStringObject("$8\r\nsmessage\r\n", 14);
+ shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
+ shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
+
+ /* Shared command names */
+ shared.del = createStringObject("DEL",3);
+ shared.unlink = createStringObject("UNLINK",6);
+ shared.rpop = createStringObject("RPOP",4);
+ shared.lpop = createStringObject("LPOP",4);
+ shared.lpush = createStringObject("LPUSH",5);
+ shared.rpoplpush = createStringObject("RPOPLPUSH",9);
+ shared.lmove = createStringObject("LMOVE",5);
+ shared.blmove = createStringObject("BLMOVE",6);
+ shared.zpopmin = createStringObject("ZPOPMIN",7);
+ shared.zpopmax = createStringObject("ZPOPMAX",7);
+ shared.multi = createStringObject("MULTI",5);
+ shared.exec = createStringObject("EXEC",4);
+ shared.hset = createStringObject("HSET",4);
+ shared.srem = createStringObject("SREM",4);
+ shared.xgroup = createStringObject("XGROUP",6);
+ shared.xclaim = createStringObject("XCLAIM",6);
+ shared.script = createStringObject("SCRIPT",6);
+ shared.replconf = createStringObject("REPLCONF",8);
+ shared.pexpireat = createStringObject("PEXPIREAT",9);
+ shared.pexpire = createStringObject("PEXPIRE",7);
+ shared.persist = createStringObject("PERSIST",7);
+ shared.set = createStringObject("SET",3);
+ shared.eval = createStringObject("EVAL",4);
+
+ /* Shared command argument */
+ shared.left = createStringObject("left",4);
+ shared.right = createStringObject("right",5);
+ shared.pxat = createStringObject("PXAT", 4);
+ shared.time = createStringObject("TIME",4);
+ shared.retrycount = createStringObject("RETRYCOUNT",10);
+ shared.force = createStringObject("FORCE",5);
+ shared.justid = createStringObject("JUSTID",6);
+ shared.entriesread = createStringObject("ENTRIESREAD",11);
+ shared.lastid = createStringObject("LASTID",6);
+ shared.default_username = createStringObject("default",7);
+ shared.ping = createStringObject("ping",4);
+ shared.setid = createStringObject("SETID",5);
+ shared.keepttl = createStringObject("KEEPTTL",7);
+ shared.absttl = createStringObject("ABSTTL",6);
+ shared.load = createStringObject("LOAD",4);
+ shared.createconsumer = createStringObject("CREATECONSUMER",14);
+ shared.getack = createStringObject("GETACK",6);
+ shared.special_asterick = createStringObject("*",1);
+ shared.special_equals = createStringObject("=",1);
+ shared.redacted = makeObjectShared(createStringObject("(redacted)",10));
+
+ for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
+ shared.integers[j] =
+ makeObjectShared(createObject(OBJ_STRING,(void*)(long)j));
+ initObjectLRUOrLFU(shared.integers[j]);
+ shared.integers[j]->encoding = OBJ_ENCODING_INT;
+ }
+ for (j = 0; j < OBJ_SHARED_BULKHDR_LEN; j++) {
+ shared.mbulkhdr[j] = createObject(OBJ_STRING,
+ sdscatprintf(sdsempty(),"*%d\r\n",j));
+ shared.bulkhdr[j] = createObject(OBJ_STRING,
+ sdscatprintf(sdsempty(),"$%d\r\n",j));
+ shared.maphdr[j] = createObject(OBJ_STRING,
+ sdscatprintf(sdsempty(),"%%%d\r\n",j));
+ shared.sethdr[j] = createObject(OBJ_STRING,
+ sdscatprintf(sdsempty(),"~%d\r\n",j));
+ }
+ /* The following two shared objects, minstring and maxstring, are not
+ * actually used for their value but as a special object meaning
+ * respectively the minimum possible string and the maximum possible
+ * string in string comparisons for the ZRANGEBYLEX command. */
+ shared.minstring = sdsnew("minstring");
+ shared.maxstring = sdsnew("maxstring");
+}
+
+void initServerClientMemUsageBuckets(void) {
+ if (server.client_mem_usage_buckets)
+ return;
+ server.client_mem_usage_buckets = zmalloc(sizeof(clientMemUsageBucket)*CLIENT_MEM_USAGE_BUCKETS);
+ for (int j = 0; j < CLIENT_MEM_USAGE_BUCKETS; j++) {
+ server.client_mem_usage_buckets[j].mem_usage_sum = 0;
+ server.client_mem_usage_buckets[j].clients = listCreate();
+ }
+}
+
+void freeServerClientMemUsageBuckets(void) {
+ if (!server.client_mem_usage_buckets)
+ return;
+ for (int j = 0; j < CLIENT_MEM_USAGE_BUCKETS; j++)
+ listRelease(server.client_mem_usage_buckets[j].clients);
+ zfree(server.client_mem_usage_buckets);
+ server.client_mem_usage_buckets = NULL;
+}
+
+void initServerConfig(void) {
+ int j;
+ char *default_bindaddr[CONFIG_DEFAULT_BINDADDR_COUNT] = CONFIG_DEFAULT_BINDADDR;
+
+ initConfigValues();
+ updateCachedTime(1);
+ server.cmd_time_snapshot = server.mstime;
+ getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE);
+ server.runid[CONFIG_RUN_ID_SIZE] = '\0';
+ changeReplicationId();
+ clearReplicationId2();
+ server.hz = CONFIG_DEFAULT_HZ; /* Initialize it ASAP, even if it may get
+ updated later after loading the config.
+ This value may be used before the server
+ is initialized. */
+ server.timezone = getTimeZone(); /* Initialized by tzset(). */
+ server.configfile = NULL;
+ server.executable = NULL;
+ server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
+ server.bindaddr_count = CONFIG_DEFAULT_BINDADDR_COUNT;
+ for (j = 0; j < CONFIG_DEFAULT_BINDADDR_COUNT; j++)
+ server.bindaddr[j] = zstrdup(default_bindaddr[j]);
+ memset(server.listeners, 0x00, sizeof(server.listeners));
+ server.active_expire_enabled = 1;
+ server.lazy_expire_disabled = 0;
+ server.skip_checksum_validation = 0;
+ server.loading = 0;
+ server.async_loading = 0;
+ server.loading_rdb_used_mem = 0;
+ server.aof_state = AOF_OFF;
+ server.aof_rewrite_base_size = 0;
+ server.aof_rewrite_scheduled = 0;
+ server.aof_flush_sleep = 0;
+ server.aof_last_fsync = time(NULL);
+ server.aof_cur_timestamp = 0;
+ atomicSet(server.aof_bio_fsync_status,C_OK);
+ server.aof_rewrite_time_last = -1;
+ server.aof_rewrite_time_start = -1;
+ server.aof_lastbgrewrite_status = C_OK;
+ server.aof_delayed_fsync = 0;
+ server.aof_fd = -1;
+ server.aof_selected_db = -1; /* Make sure the first time will not match */
+ server.aof_flush_postponed_start = 0;
+ server.aof_last_incr_size = 0;
+ server.aof_last_incr_fsync_offset = 0;
+ server.active_defrag_running = 0;
+ server.notify_keyspace_events = 0;
+ server.blocked_clients = 0;
+ memset(server.blocked_clients_by_type,0,
+ sizeof(server.blocked_clients_by_type));
+ server.shutdown_asap = 0;
+ server.shutdown_flags = 0;
+ server.shutdown_mstime = 0;
+ server.cluster_module_flags = CLUSTER_MODULE_FLAG_NONE;
+ server.migrate_cached_sockets = dictCreate(&migrateCacheDictType);
+ server.next_client_id = 1; /* Client IDs, start from 1 .*/
+ server.page_size = sysconf(_SC_PAGESIZE);
+ server.pause_cron = 0;
+
+ server.latency_tracking_info_percentiles_len = 3;
+ server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*(server.latency_tracking_info_percentiles_len));
+ server.latency_tracking_info_percentiles[0] = 50.0; /* p50 */
+ server.latency_tracking_info_percentiles[1] = 99.0; /* p99 */
+ server.latency_tracking_info_percentiles[2] = 99.9; /* p999 */
+
+ server.lruclock = getLRUClock();
+ resetServerSaveParams();
+
+ appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
+ appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
+ appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
+
+ /* Replication related */
+ server.masterhost = NULL;
+ server.masterport = 6379;
+ server.master = NULL;
+ server.cached_master = NULL;
+ server.master_initial_offset = -1;
+ server.repl_state = REPL_STATE_NONE;
+ server.repl_transfer_tmpfile = NULL;
+ server.repl_transfer_fd = -1;
+ server.repl_transfer_s = NULL;
+ server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT;
+ server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
+ server.master_repl_offset = 0;
+ server.fsynced_reploff_pending = 0;
+
+ /* Replication partial resync backlog */
+ server.repl_backlog = NULL;
+ server.repl_no_slaves_since = time(NULL);
+
+ /* Failover related */
+ server.failover_end_time = 0;
+ server.force_failover = 0;
+ server.target_replica_host = NULL;
+ server.target_replica_port = 0;
+ server.failover_state = NO_FAILOVER;
+
+ /* Client output buffer limits */
+ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++)
+ server.client_obuf_limits[j] = clientBufferLimitsDefaults[j];
+
+ /* Linux OOM Score config */
+ for (j = 0; j < CONFIG_OOM_COUNT; j++)
+ server.oom_score_adj_values[j] = configOOMScoreAdjValuesDefaults[j];
+
+ /* Double constants initialization */
+ R_Zero = 0.0;
+ R_PosInf = 1.0/R_Zero;
+ R_NegInf = -1.0/R_Zero;
+ R_Nan = R_Zero/R_Zero;
+
+ /* Command table -- we initialize it here as it is part of the
+ * initial configuration, since command names may be changed via
+ * redis.conf using the rename-command directive. */
+ server.commands = dictCreate(&commandTableDictType);
+ server.orig_commands = dictCreate(&commandTableDictType);
+ populateCommandTable();
+
+ /* Debugging */
+ server.watchdog_period = 0;
+}
+
+extern char **environ;
+
+/* Restart the server, executing the same executable that started this
+ * instance, with the same arguments and configuration file.
+ *
+ * The function is designed to directly call execve() so that the new
+ * server instance will retain the PID of the previous one.
+ *
+ * The list of flags, that may be bitwise ORed together, alter the
+ * behavior of this function:
+ *
+ * RESTART_SERVER_NONE No flags.
+ * RESTART_SERVER_GRACEFULLY Do a proper shutdown before restarting.
+ * RESTART_SERVER_CONFIG_REWRITE Rewrite the config file before restarting.
+ *
+ * On success the function does not return, because the process turns into
+ * a different process. On error C_ERR is returned. */
+int restartServer(int flags, mstime_t delay) {
+ int j;
+
+ /* Check if we still have accesses to the executable that started this
+ * server instance. */
+ if (access(server.executable,X_OK) == -1) {
+ serverLog(LL_WARNING,"Can't restart: this process has no "
+ "permissions to execute %s", server.executable);
+ return C_ERR;
+ }
+
+ /* Config rewriting. */
+ if (flags & RESTART_SERVER_CONFIG_REWRITE &&
+ server.configfile &&
+ rewriteConfig(server.configfile, 0) == -1)
+ {
+ serverLog(LL_WARNING,"Can't restart: configuration rewrite process "
+ "failed: %s", strerror(errno));
+ return C_ERR;
+ }
+
+ /* Perform a proper shutdown. We don't wait for lagging replicas though. */
+ if (flags & RESTART_SERVER_GRACEFULLY &&
+ prepareForShutdown(SHUTDOWN_NOW) != C_OK)
+ {
+ serverLog(LL_WARNING,"Can't restart: error preparing for shutdown");
+ return C_ERR;
+ }
+
+ /* Close all file descriptors, with the exception of stdin, stdout, stderr
+ * which are useful if we restart a Redis server which is not daemonized. */
+ for (j = 3; j < (int)server.maxclients + 1024; j++) {
+ /* Test the descriptor validity before closing it, otherwise
+ * Valgrind issues a warning on close(). */
+ if (fcntl(j,F_GETFD) != -1) close(j);
+ }
+
+ /* Execute the server with the original command line. */
+ if (delay) usleep(delay*1000);
+ zfree(server.exec_argv[0]);
+ server.exec_argv[0] = zstrdup(server.executable);
+ execve(server.executable,server.exec_argv,environ);
+
+ /* If an error occurred here, there is nothing we can do, but exit. */
+ _exit(1);
+
+ return C_ERR; /* Never reached. */
+}
+
+/* This function will configure the current process's oom_score_adj according
+ * to user specified configuration. This is currently implemented on Linux
+ * only.
+ *
+ * A process_class value of -1 implies OOM_CONFIG_MASTER or OOM_CONFIG_REPLICA,
+ * depending on current role.
+ */
+int setOOMScoreAdj(int process_class) {
+ if (process_class == -1)
+ process_class = (server.masterhost ? CONFIG_OOM_REPLICA : CONFIG_OOM_MASTER);
+
+ serverAssert(process_class >= 0 && process_class < CONFIG_OOM_COUNT);
+
+#ifdef HAVE_PROC_OOM_SCORE_ADJ
+ /* The following statics are used to indicate Redis has changed the process's oom score.
+ * And to save the original score so we can restore it later if needed.
+ * We need this so when we disabled oom-score-adj (also during configuration rollback
+ * when another configuration parameter was invalid and causes a rollback after
+ * applying a new oom-score) we can return to the oom-score value from before our
+ * adjustments. */
+ static int oom_score_adjusted_by_redis = 0;
+ static int oom_score_adj_base = 0;
+
+ int fd;
+ int val;
+ char buf[64];
+
+ if (server.oom_score_adj != OOM_SCORE_ADJ_NO) {
+ if (!oom_score_adjusted_by_redis) {
+ oom_score_adjusted_by_redis = 1;
+ /* Backup base value before enabling Redis control over oom score */
+ fd = open("/proc/self/oom_score_adj", O_RDONLY);
+ if (fd < 0 || read(fd, buf, sizeof(buf)) < 0) {
+ serverLog(LL_WARNING, "Unable to read oom_score_adj: %s", strerror(errno));
+ if (fd != -1) close(fd);
+ return C_ERR;
+ }
+ oom_score_adj_base = atoi(buf);
+ close(fd);
+ }
+
+ val = server.oom_score_adj_values[process_class];
+ if (server.oom_score_adj == OOM_SCORE_RELATIVE)
+ val += oom_score_adj_base;
+ if (val > 1000) val = 1000;
+ if (val < -1000) val = -1000;
+ } else if (oom_score_adjusted_by_redis) {
+ oom_score_adjusted_by_redis = 0;
+ val = oom_score_adj_base;
+ }
+ else {
+ return C_OK;
+ }
+
+ snprintf(buf, sizeof(buf) - 1, "%d\n", val);
+
+ fd = open("/proc/self/oom_score_adj", O_WRONLY);
+ if (fd < 0 || write(fd, buf, strlen(buf)) < 0) {
+ serverLog(LL_WARNING, "Unable to write oom_score_adj: %s", strerror(errno));
+ if (fd != -1) close(fd);
+ return C_ERR;
+ }
+
+ close(fd);
+ return C_OK;
+#else
+ /* Unsupported */
+ return C_ERR;
+#endif
+}
+
+/* This function will try to raise the max number of open files accordingly to
+ * the configured max number of clients. It also reserves a number of file
+ * descriptors (CONFIG_MIN_RESERVED_FDS) for extra operations of
+ * persistence, listening sockets, log files and so forth.
+ *
+ * If it will not be possible to set the limit accordingly to the configured
+ * max number of clients, the function will do the reverse setting
+ * server.maxclients to the value that we can actually handle. */
+void adjustOpenFilesLimit(void) {
+ rlim_t maxfiles = server.maxclients+CONFIG_MIN_RESERVED_FDS;
+ struct rlimit limit;
+
+ if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
+ serverLog(LL_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
+ strerror(errno));
+ server.maxclients = 1024-CONFIG_MIN_RESERVED_FDS;
+ } else {
+ rlim_t oldlimit = limit.rlim_cur;
+
+ /* Set the max number of files if the current limit is not enough
+ * for our needs. */
+ if (oldlimit < maxfiles) {
+ rlim_t bestlimit;
+ int setrlimit_error = 0;
+
+ /* Try to set the file limit to match 'maxfiles' or at least
+ * to the higher value supported less than maxfiles. */
+ bestlimit = maxfiles;
+ while(bestlimit > oldlimit) {
+ rlim_t decr_step = 16;
+
+ limit.rlim_cur = bestlimit;
+ limit.rlim_max = bestlimit;
+ if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
+ setrlimit_error = errno;
+
+ /* We failed to set file limit to 'bestlimit'. Try with a
+ * smaller limit decrementing by a few FDs per iteration. */
+ if (bestlimit < decr_step) {
+ bestlimit = oldlimit;
+ break;
+ }
+ bestlimit -= decr_step;
+ }
+
+ /* Assume that the limit we get initially is still valid if
+ * our last try was even lower. */
+ if (bestlimit < oldlimit) bestlimit = oldlimit;
+
+ if (bestlimit < maxfiles) {
+ unsigned int old_maxclients = server.maxclients;
+ server.maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS;
+ /* maxclients is unsigned so may overflow: in order
+ * to check if maxclients is now logically less than 1
+ * we test indirectly via bestlimit. */
+ if (bestlimit <= CONFIG_MIN_RESERVED_FDS) {
+ serverLog(LL_WARNING,"Your current 'ulimit -n' "
+ "of %llu is not enough for the server to start. "
+ "Please increase your open file limit to at least "
+ "%llu. Exiting.",
+ (unsigned long long) oldlimit,
+ (unsigned long long) maxfiles);
+ exit(1);
+ }
+ serverLog(LL_WARNING,"You requested maxclients of %d "
+ "requiring at least %llu max file descriptors.",
+ old_maxclients,
+ (unsigned long long) maxfiles);
+ serverLog(LL_WARNING,"Server can't set maximum open files "
+ "to %llu because of OS error: %s.",
+ (unsigned long long) maxfiles, strerror(setrlimit_error));
+ serverLog(LL_WARNING,"Current maximum open files is %llu. "
+ "maxclients has been reduced to %d to compensate for "
+ "low ulimit. "
+ "If you need higher maxclients increase 'ulimit -n'.",
+ (unsigned long long) bestlimit, server.maxclients);
+ } else {
+ serverLog(LL_NOTICE,"Increased maximum number of open files "
+ "to %llu (it was originally set to %llu).",
+ (unsigned long long) maxfiles,
+ (unsigned long long) oldlimit);
+ }
+ }
+ }
+}
+
+/* Check that server.tcp_backlog can be actually enforced in Linux according
+ * to the value of /proc/sys/net/core/somaxconn, or warn about it. */
+void checkTcpBacklogSettings(void) {
+#if defined(HAVE_PROC_SOMAXCONN)
+ FILE *fp = fopen("/proc/sys/net/core/somaxconn","r");
+ char buf[1024];
+ if (!fp) return;
+ if (fgets(buf,sizeof(buf),fp) != NULL) {
+ int somaxconn = atoi(buf);
+ if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
+ serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
+ }
+ }
+ fclose(fp);
+#elif defined(HAVE_SYSCTL_KIPC_SOMAXCONN)
+ int somaxconn, mib[3];
+ size_t len = sizeof(int);
+
+ mib[0] = CTL_KERN;
+ mib[1] = KERN_IPC;
+ mib[2] = KIPC_SOMAXCONN;
+
+ if (sysctl(mib, 3, &somaxconn, &len, NULL, 0) == 0) {
+ if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
+ serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because kern.ipc.somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
+ }
+ }
+#elif defined(HAVE_SYSCTL_KERN_SOMAXCONN)
+ int somaxconn, mib[2];
+ size_t len = sizeof(int);
+
+ mib[0] = CTL_KERN;
+ mib[1] = KERN_SOMAXCONN;
+
+ if (sysctl(mib, 2, &somaxconn, &len, NULL, 0) == 0) {
+ if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
+ serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because kern.somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
+ }
+ }
+#elif defined(SOMAXCONN)
+ if (SOMAXCONN < server.tcp_backlog) {
+ serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because SOMAXCONN is set to the lower value of %d.", server.tcp_backlog, SOMAXCONN);
+ }
+#endif
+}
+
+void closeListener(connListener *sfd) {
+ int j;
+
+ for (j = 0; j < sfd->count; j++) {
+ if (sfd->fd[j] == -1) continue;
+
+ aeDeleteFileEvent(server.el, sfd->fd[j], AE_READABLE);
+ close(sfd->fd[j]);
+ }
+
+ sfd->count = 0;
+}
+
+/* Create an event handler for accepting new connections in TCP or TLS domain sockets.
+ * This works atomically for all socket fds */
+int createSocketAcceptHandler(connListener *sfd, aeFileProc *accept_handler) {
+ int j;
+
+ for (j = 0; j < sfd->count; j++) {
+ if (aeCreateFileEvent(server.el, sfd->fd[j], AE_READABLE, accept_handler,sfd) == AE_ERR) {
+ /* Rollback */
+ for (j = j-1; j >= 0; j--) aeDeleteFileEvent(server.el, sfd->fd[j], AE_READABLE);
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
+/* Initialize a set of file descriptors to listen to the specified 'port'
+ * binding the addresses specified in the Redis server configuration.
+ *
+ * The listening file descriptors are stored in the integer array 'fds'
+ * and their number is set in '*count'. Actually @sfd should be 'listener',
+ * for the historical reasons, let's keep 'sfd' here.
+ *
+ * The addresses to bind are specified in the global server.bindaddr array
+ * and their number is server.bindaddr_count. If the server configuration
+ * contains no specific addresses to bind, this function will try to
+ * bind * (all addresses) for both the IPv4 and IPv6 protocols.
+ *
+ * On success the function returns C_OK.
+ *
+ * On error the function returns C_ERR. For the function to be on
+ * error, at least one of the server.bindaddr addresses was
+ * impossible to bind, or no bind addresses were specified in the server
+ * configuration but the function is not able to bind * for at least
+ * one of the IPv4 or IPv6 protocols. */
+int listenToPort(connListener *sfd) {
+ int j;
+ int port = sfd->port;
+ char **bindaddr = sfd->bindaddr;
+
+ /* If we have no bind address, we don't listen on a TCP socket */
+ if (sfd->bindaddr_count == 0) return C_OK;
+
+ for (j = 0; j < sfd->bindaddr_count; j++) {
+ char* addr = bindaddr[j];
+ int optional = *addr == '-';
+ if (optional) addr++;
+ if (strchr(addr,':')) {
+ /* Bind IPv6 address. */
+ sfd->fd[sfd->count] = anetTcp6Server(server.neterr,port,addr,server.tcp_backlog);
+ } else {
+ /* Bind IPv4 address. */
+ sfd->fd[sfd->count] = anetTcpServer(server.neterr,port,addr,server.tcp_backlog);
+ }
+ if (sfd->fd[sfd->count] == ANET_ERR) {
+ int net_errno = errno;
+ serverLog(LL_WARNING,
+ "Warning: Could not create server TCP listening socket %s:%d: %s",
+ addr, port, server.neterr);
+ if (net_errno == EADDRNOTAVAIL && optional)
+ continue;
+ if (net_errno == ENOPROTOOPT || net_errno == EPROTONOSUPPORT ||
+ net_errno == ESOCKTNOSUPPORT || net_errno == EPFNOSUPPORT ||
+ net_errno == EAFNOSUPPORT)
+ continue;
+
+ /* Rollback successful listens before exiting */
+ closeListener(sfd);
+ return C_ERR;
+ }
+ if (server.socket_mark_id > 0) anetSetSockMarkId(NULL, sfd->fd[sfd->count], server.socket_mark_id);
+ anetNonBlock(NULL,sfd->fd[sfd->count]);
+ anetCloexec(sfd->fd[sfd->count]);
+ sfd->count++;
+ }
+ return C_OK;
+}
+
+/* Resets the stats that we expose via INFO or other means that we want
+ * to reset via CONFIG RESETSTAT. The function is also used in order to
+ * initialize these fields in initServer() at server startup. */
+void resetServerStats(void) {
+ int j;
+
+ server.stat_numcommands = 0;
+ server.stat_numconnections = 0;
+ server.stat_expiredkeys = 0;
+ server.stat_expired_stale_perc = 0;
+ server.stat_expired_time_cap_reached_count = 0;
+ server.stat_expire_cycle_time_used = 0;
+ server.stat_evictedkeys = 0;
+ server.stat_evictedclients = 0;
+ server.stat_total_eviction_exceeded_time = 0;
+ server.stat_last_eviction_exceeded_time = 0;
+ server.stat_keyspace_misses = 0;
+ server.stat_keyspace_hits = 0;
+ server.stat_active_defrag_hits = 0;
+ server.stat_active_defrag_misses = 0;
+ server.stat_active_defrag_key_hits = 0;
+ server.stat_active_defrag_key_misses = 0;
+ server.stat_active_defrag_scanned = 0;
+ server.stat_total_active_defrag_time = 0;
+ server.stat_last_active_defrag_time = 0;
+ server.stat_fork_time = 0;
+ server.stat_fork_rate = 0;
+ server.stat_total_forks = 0;
+ server.stat_rejected_conn = 0;
+ server.stat_sync_full = 0;
+ server.stat_sync_partial_ok = 0;
+ server.stat_sync_partial_err = 0;
+ server.stat_io_reads_processed = 0;
+ atomicSet(server.stat_total_reads_processed, 0);
+ server.stat_io_writes_processed = 0;
+ atomicSet(server.stat_total_writes_processed, 0);
+ for (j = 0; j < STATS_METRIC_COUNT; j++) {
+ server.inst_metric[j].idx = 0;
+ server.inst_metric[j].last_sample_base = 0;
+ server.inst_metric[j].last_sample_value = 0;
+ memset(server.inst_metric[j].samples,0,
+ sizeof(server.inst_metric[j].samples));
+ }
+ server.stat_aof_rewrites = 0;
+ server.stat_rdb_saves = 0;
+ server.stat_aofrw_consecutive_failures = 0;
+ atomicSet(server.stat_net_input_bytes, 0);
+ atomicSet(server.stat_net_output_bytes, 0);
+ atomicSet(server.stat_net_repl_input_bytes, 0);
+ atomicSet(server.stat_net_repl_output_bytes, 0);
+ server.stat_unexpected_error_replies = 0;
+ server.stat_total_error_replies = 0;
+ server.stat_dump_payload_sanitizations = 0;
+ server.aof_delayed_fsync = 0;
+ server.stat_reply_buffer_shrinks = 0;
+ server.stat_reply_buffer_expands = 0;
+ memset(server.duration_stats, 0, sizeof(durationStats) * EL_DURATION_TYPE_NUM);
+ server.el_cmd_cnt_max = 0;
+ lazyfreeResetStats();
+}
+
+/* Make the thread killable at any time, so that kill threads functions
+ * can work reliably (default cancelability type is PTHREAD_CANCEL_DEFERRED).
+ * Needed for pthread_cancel used by the fast memory test used by the crash report. */
+void makeThreadKillable(void) {
+ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
+}
+
+void initServer(void) {
+ int j;
+
+ signal(SIGHUP, SIG_IGN);
+ signal(SIGPIPE, SIG_IGN);
+ setupSignalHandlers();
+ makeThreadKillable();
+
+ if (server.syslog_enabled) {
+ openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
+ server.syslog_facility);
+ }
+
+ /* Initialization after setting defaults from the config system. */
+ server.aof_state = server.aof_enabled ? AOF_ON : AOF_OFF;
+ server.fsynced_reploff = server.aof_enabled ? 0 : -1;
+ server.hz = server.config_hz;
+ server.pid = getpid();
+ server.in_fork_child = CHILD_TYPE_NONE;
+ server.main_thread_id = pthread_self();
+ server.current_client = NULL;
+ server.errors = raxNew();
+ server.execution_nesting = 0;
+ server.clients = listCreate();
+ server.clients_index = raxNew();
+ server.clients_to_close = listCreate();
+ server.slaves = listCreate();
+ server.monitors = listCreate();
+ server.clients_pending_write = listCreate();
+ server.clients_pending_read = listCreate();
+ server.clients_timeout_table = raxNew();
+ server.replication_allowed = 1;
+ server.slaveseldb = -1; /* Force to emit the first SELECT command. */
+ server.unblocked_clients = listCreate();
+ server.ready_keys = listCreate();
+ server.tracking_pending_keys = listCreate();
+ server.pending_push_messages = listCreate();
+ server.clients_waiting_acks = listCreate();
+ server.get_ack_from_slaves = 0;
+ server.paused_actions = 0;
+ memset(server.client_pause_per_purpose, 0,
+ sizeof(server.client_pause_per_purpose));
+ server.postponed_clients = listCreate();
+ server.events_processed_while_blocked = 0;
+ server.system_memory_size = zmalloc_get_memory_size();
+ server.blocked_last_cron = 0;
+ server.blocking_op_nesting = 0;
+ server.thp_enabled = 0;
+ server.cluster_drop_packet_filter = -1;
+ server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME;
+ server.reply_buffer_resizing_enabled = 1;
+ server.client_mem_usage_buckets = NULL;
+ resetReplicationBuffer();
+
+ /* Make sure the locale is set on startup based on the config file. */
+ if (setlocale(LC_COLLATE,server.locale_collate) == NULL) {
+ serverLog(LL_WARNING, "Failed to configure LOCALE for invalid locale name.");
+ exit(1);
+ }
+
+ createSharedObjects();
+ adjustOpenFilesLimit();
+ const char *clk_msg = monotonicInit();
+ serverLog(LL_NOTICE, "monotonic clock: %s", clk_msg);
+ server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
+ if (server.el == NULL) {
+ serverLog(LL_WARNING,
+ "Failed creating the event loop. Error message: '%s'",
+ strerror(errno));
+ exit(1);
+ }
+ server.db = zmalloc(sizeof(redisDb)*server.dbnum);
+
+ /* Create the Redis databases, and initialize other internal state. */
+ for (j = 0; j < server.dbnum; j++) {
+ server.db[j].dict = dictCreate(&dbDictType);
+ server.db[j].expires = dictCreate(&dbExpiresDictType);
+ server.db[j].expires_cursor = 0;
+ server.db[j].blocking_keys = dictCreate(&keylistDictType);
+ server.db[j].blocking_keys_unblock_on_nokey = dictCreate(&objectKeyPointerValueDictType);
+ server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType);
+ server.db[j].watched_keys = dictCreate(&keylistDictType);
+ server.db[j].id = j;
+ server.db[j].avg_ttl = 0;
+ server.db[j].defrag_later = listCreate();
+ server.db[j].slots_to_keys = NULL; /* Set by clusterInit later on if necessary. */
+ listSetFreeMethod(server.db[j].defrag_later,(void (*)(void*))sdsfree);
+ }
+ evictionPoolAlloc(); /* Initialize the LRU keys pool. */
+ server.pubsub_channels = dictCreate(&keylistDictType);
+ server.pubsub_patterns = dictCreate(&keylistDictType);
+ server.pubsubshard_channels = dictCreate(&keylistDictType);
+ server.cronloops = 0;
+ server.in_exec = 0;
+ server.busy_module_yield_flags = BUSY_MODULE_YIELD_NONE;
+ server.busy_module_yield_reply = NULL;
+ server.client_pause_in_transaction = 0;
+ server.child_pid = -1;
+ server.child_type = CHILD_TYPE_NONE;
+ server.rdb_child_type = RDB_CHILD_TYPE_NONE;
+ server.rdb_pipe_conns = NULL;
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
+ server.rdb_pipe_buff = NULL;
+ server.rdb_pipe_bufflen = 0;
+ server.rdb_bgsave_scheduled = 0;
+ server.child_info_pipe[0] = -1;
+ server.child_info_pipe[1] = -1;
+ server.child_info_nread = 0;
+ server.aof_buf = sdsempty();
+ server.lastsave = time(NULL); /* At startup we consider the DB saved. */
+ server.lastbgsave_try = 0; /* At startup we never tried to BGSAVE. */
+ server.rdb_save_time_last = -1;
+ server.rdb_save_time_start = -1;
+ server.rdb_last_load_keys_expired = 0;
+ server.rdb_last_load_keys_loaded = 0;
+ server.dirty = 0;
+ resetServerStats();
+ /* A few stats we don't want to reset: server startup time, and peak mem. */
+ server.stat_starttime = time(NULL);
+ server.stat_peak_memory = 0;
+ server.stat_current_cow_peak = 0;
+ server.stat_current_cow_bytes = 0;
+ server.stat_current_cow_updated = 0;
+ server.stat_current_save_keys_processed = 0;
+ server.stat_current_save_keys_total = 0;
+ server.stat_rdb_cow_bytes = 0;
+ server.stat_aof_cow_bytes = 0;
+ server.stat_module_cow_bytes = 0;
+ server.stat_module_progress = 0;
+ for (int j = 0; j < CLIENT_TYPE_COUNT; j++)
+ server.stat_clients_type_memory[j] = 0;
+ server.stat_cluster_links_memory = 0;
+ server.cron_malloc_stats.zmalloc_used = 0;
+ server.cron_malloc_stats.process_rss = 0;
+ server.cron_malloc_stats.allocator_allocated = 0;
+ server.cron_malloc_stats.allocator_active = 0;
+ server.cron_malloc_stats.allocator_resident = 0;
+ server.lastbgsave_status = C_OK;
+ server.aof_last_write_status = C_OK;
+ server.aof_last_write_errno = 0;
+ server.repl_good_slaves_count = 0;
+ server.last_sig_received = 0;
+
+ /* Initiate acl info struct */
+ server.acl_info.invalid_cmd_accesses = 0;
+ server.acl_info.invalid_key_accesses = 0;
+ server.acl_info.user_auth_failures = 0;
+ server.acl_info.invalid_channel_accesses = 0;
+
+ /* Create the timer callback, this is our way to process many background
+ * operations incrementally, like clients timeout, eviction of unaccessed
+ * expired keys and so forth. */
+ if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
+ serverPanic("Can't create event loop timers.");
+ exit(1);
+ }
+
+ /* Register a readable event for the pipe used to awake the event loop
+ * from module threads. */
+ if (aeCreateFileEvent(server.el, server.module_pipe[0], AE_READABLE,
+ modulePipeReadable,NULL) == AE_ERR) {
+ serverPanic(
+ "Error registering the readable event for the module pipe.");
+ }
+
+ /* Register before and after sleep handlers (note this needs to be done
+ * before loading persistence since it is used by processEventsWhileBlocked. */
+ aeSetBeforeSleepProc(server.el,beforeSleep);
+ aeSetAfterSleepProc(server.el,afterSleep);
+
+ /* 32 bit instances are limited to 4GB of address space, so if there is
+ * no explicit limit in the user provided configuration we set a limit
+ * at 3 GB using maxmemory with 'noeviction' policy'. This avoids
+ * useless crashes of the Redis instance for out of memory. */
+ if (server.arch_bits == 32 && server.maxmemory == 0) {
+ serverLog(LL_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
+ server.maxmemory = 3072LL*(1024*1024); /* 3 GB */
+ server.maxmemory_policy = MAXMEMORY_NO_EVICTION;
+ }
+
+ scriptingInit(1);
+ functionsInit();
+ slowlogInit();
+ latencyMonitorInit();
+
+ /* Initialize ACL default password if it exists */
+ ACLUpdateDefaultUserPassword(server.requirepass);
+
+ applyWatchdogPeriod();
+
+ if (server.maxmemory_clients != 0)
+ initServerClientMemUsageBuckets();
+}
+
+void initListeners(void) {
+ /* Setup listeners from server config for TCP/TLS/Unix */
+ int conn_index;
+ connListener *listener;
+ if (server.port != 0) {
+ conn_index = connectionIndexByType(CONN_TYPE_SOCKET);
+ if (conn_index < 0)
+ serverPanic("Failed finding connection listener of %s", CONN_TYPE_SOCKET);
+ listener = &server.listeners[conn_index];
+ listener->bindaddr = server.bindaddr;
+ listener->bindaddr_count = server.bindaddr_count;
+ listener->port = server.port;
+ listener->ct = connectionByType(CONN_TYPE_SOCKET);
+ }
+
+ if (server.tls_port || server.tls_replication || server.tls_cluster) {
+ ConnectionType *ct_tls = connectionTypeTls();
+ if (!ct_tls) {
+ serverLog(LL_WARNING, "Failed finding TLS support.");
+ exit(1);
+ }
+ if (connTypeConfigure(ct_tls, &server.tls_ctx_config, 1) == C_ERR) {
+ serverLog(LL_WARNING, "Failed to configure TLS. Check logs for more info.");
+ exit(1);
+ }
+ }
+
+ if (server.tls_port != 0) {
+ conn_index = connectionIndexByType(CONN_TYPE_TLS);
+ if (conn_index < 0)
+ serverPanic("Failed finding connection listener of %s", CONN_TYPE_TLS);
+ listener = &server.listeners[conn_index];
+ listener->bindaddr = server.bindaddr;
+ listener->bindaddr_count = server.bindaddr_count;
+ listener->port = server.tls_port;
+ listener->ct = connectionByType(CONN_TYPE_TLS);
+ }
+ if (server.unixsocket != NULL) {
+ conn_index = connectionIndexByType(CONN_TYPE_UNIX);
+ if (conn_index < 0)
+ serverPanic("Failed finding connection listener of %s", CONN_TYPE_UNIX);
+ listener = &server.listeners[conn_index];
+ listener->bindaddr = &server.unixsocket;
+ listener->bindaddr_count = 1;
+ listener->ct = connectionByType(CONN_TYPE_UNIX);
+ listener->priv = &server.unixsocketperm; /* Unix socket specified */
+ }
+
+ /* create all the configured listener, and add handler to start to accept */
+ int listen_fds = 0;
+ for (int j = 0; j < CONN_TYPE_MAX; j++) {
+ listener = &server.listeners[j];
+ if (listener->ct == NULL)
+ continue;
+
+ if (connListen(listener) == C_ERR) {
+ serverLog(LL_WARNING, "Failed listening on port %u (%s), aborting.", listener->port, listener->ct->get_type(NULL));
+ exit(1);
+ }
+
+ if (createSocketAcceptHandler(listener, connAcceptHandler(listener->ct)) != C_OK)
+ serverPanic("Unrecoverable error creating %s listener accept handler.", listener->ct->get_type(NULL));
+
+ listen_fds += listener->count;
+ }
+
+ if (listen_fds == 0) {
+ serverLog(LL_WARNING, "Configured to not listen anywhere, exiting.");
+ exit(1);
+ }
+}
+
+/* Some steps in server initialization need to be done last (after modules
+ * are loaded).
+ * Specifically, creation of threads due to a race bug in ld.so, in which
+ * Thread Local Storage initialization collides with dlopen call.
+ * see: https://sourceware.org/bugzilla/show_bug.cgi?id=19329 */
+void InitServerLast(void) {
+ bioInit();
+ initThreadedIO();
+ set_jemalloc_bg_thread(server.jemalloc_bg_thread);
+ server.initial_memory_usage = zmalloc_used_memory();
+}
+
+/* The purpose of this function is to try to "glue" consecutive range
+ * key specs in order to build the legacy (first,last,step) spec
+ * used by the COMMAND command.
+ * By far the most common case is just one range spec (e.g. SET)
+ * but some commands' ranges were split into two or more ranges
+ * in order to have different flags for different keys (e.g. SMOVE,
+ * first key is "RW ACCESS DELETE", second key is "RW INSERT").
+ *
+ * Additionally set the CMD_MOVABLE_KEYS flag for commands that may have key
+ * names in their arguments, but the legacy range spec doesn't cover all of them.
+ *
+ * This function uses very basic heuristics and is "best effort":
+ * 1. Only commands which have only "range" specs are considered.
+ * 2. Only range specs with keystep of 1 are considered.
+ * 3. The order of the range specs must be ascending (i.e.
+ * lastkey of spec[i] == firstkey-1 of spec[i+1]).
+ *
+ * This function will succeed on all native Redis commands and may
+ * fail on module commands, even if it only has "range" specs that
+ * could actually be "glued", in the following cases:
+ * 1. The order of "range" specs is not ascending (e.g. the spec for
+ * the key at index 2 was added before the spec of the key at
+ * index 1).
+ * 2. The "range" specs have keystep >1.
+ *
+ * If this functions fails it means that the legacy (first,last,step)
+ * spec used by COMMAND will show 0,0,0. This is not a dire situation
+ * because anyway the legacy (first,last,step) spec is to be deprecated
+ * and one should use the new key specs scheme.
+ */
+void populateCommandLegacyRangeSpec(struct redisCommand *c) {
+ memset(&c->legacy_range_key_spec, 0, sizeof(c->legacy_range_key_spec));
+
+ /* Set the movablekeys flag if we have a GETKEYS flag for modules.
+ * Note that for native redis commands, we always have keyspecs,
+ * with enough information to rely on for movablekeys. */
+ if (c->flags & CMD_MODULE_GETKEYS)
+ c->flags |= CMD_MOVABLE_KEYS;
+
+ /* no key-specs, no keys, exit. */
+ if (c->key_specs_num == 0) {
+ return;
+ }
+
+ if (c->key_specs_num == 1 &&
+ c->key_specs[0].begin_search_type == KSPEC_BS_INDEX &&
+ c->key_specs[0].find_keys_type == KSPEC_FK_RANGE)
+ {
+ /* Quick win, exactly one range spec. */
+ c->legacy_range_key_spec = c->key_specs[0];
+ /* If it has the incomplete flag, set the movablekeys flag on the command. */
+ if (c->key_specs[0].flags & CMD_KEY_INCOMPLETE)
+ c->flags |= CMD_MOVABLE_KEYS;
+ return;
+ }
+
+ int firstkey = INT_MAX, lastkey = 0;
+ int prev_lastkey = 0;
+ for (int i = 0; i < c->key_specs_num; i++) {
+ if (c->key_specs[i].begin_search_type != KSPEC_BS_INDEX ||
+ c->key_specs[i].find_keys_type != KSPEC_FK_RANGE)
+ {
+ /* Found an incompatible (non range) spec, skip it, and set the movablekeys flag. */
+ c->flags |= CMD_MOVABLE_KEYS;
+ continue;
+ }
+ if (c->key_specs[i].fk.range.keystep != 1 ||
+ (prev_lastkey && prev_lastkey != c->key_specs[i].bs.index.pos-1))
+ {
+ /* Found a range spec that's not plain (step of 1) or not consecutive to the previous one.
+ * Skip it, and we set the movablekeys flag. */
+ c->flags |= CMD_MOVABLE_KEYS;
+ continue;
+ }
+ if (c->key_specs[i].flags & CMD_KEY_INCOMPLETE) {
+ /* The spec we're using is incomplete, we can use it, but we also have to set the movablekeys flag. */
+ c->flags |= CMD_MOVABLE_KEYS;
+ }
+ firstkey = min(firstkey, c->key_specs[i].bs.index.pos);
+ /* Get the absolute index for lastkey (in the "range" spec, lastkey is relative to firstkey) */
+ int lastkey_abs_index = c->key_specs[i].fk.range.lastkey;
+ if (lastkey_abs_index >= 0)
+ lastkey_abs_index += c->key_specs[i].bs.index.pos;
+ /* For lastkey we use unsigned comparison to handle negative values correctly */
+ lastkey = max((unsigned)lastkey, (unsigned)lastkey_abs_index);
+ prev_lastkey = lastkey;
+ }
+
+ if (firstkey == INT_MAX) {
+ /* Couldn't find range specs, the legacy range spec will remain empty, and we set the movablekeys flag. */
+ c->flags |= CMD_MOVABLE_KEYS;
+ return;
+ }
+
+ serverAssert(firstkey != 0);
+ serverAssert(lastkey != 0);
+
+ c->legacy_range_key_spec.begin_search_type = KSPEC_BS_INDEX;
+ c->legacy_range_key_spec.bs.index.pos = firstkey;
+ c->legacy_range_key_spec.find_keys_type = KSPEC_FK_RANGE;
+ c->legacy_range_key_spec.fk.range.lastkey = lastkey < 0 ? lastkey : (lastkey-firstkey); /* in the "range" spec, lastkey is relative to firstkey */
+ c->legacy_range_key_spec.fk.range.keystep = 1;
+ c->legacy_range_key_spec.fk.range.limit = 0;
+}
+
+sds catSubCommandFullname(const char *parent_name, const char *sub_name) {
+ return sdscatfmt(sdsempty(), "%s|%s", parent_name, sub_name);
+}
+
+void commandAddSubcommand(struct redisCommand *parent, struct redisCommand *subcommand, const char *declared_name) {
+ if (!parent->subcommands_dict)
+ parent->subcommands_dict = dictCreate(&commandTableDictType);
+
+ subcommand->parent = parent; /* Assign the parent command */
+ subcommand->id = ACLGetCommandID(subcommand->fullname); /* Assign the ID used for ACL. */
+
+ serverAssert(dictAdd(parent->subcommands_dict, sdsnew(declared_name), subcommand) == DICT_OK);
+}
+
+/* Set implicit ACl categories (see comment above the definition of
+ * struct redisCommand). */
+void setImplicitACLCategories(struct redisCommand *c) {
+ if (c->flags & CMD_WRITE)
+ c->acl_categories |= ACL_CATEGORY_WRITE;
+ /* Exclude scripting commands from the RO category. */
+ if (c->flags & CMD_READONLY && !(c->acl_categories & ACL_CATEGORY_SCRIPTING))
+ c->acl_categories |= ACL_CATEGORY_READ;
+ if (c->flags & CMD_ADMIN)
+ c->acl_categories |= ACL_CATEGORY_ADMIN|ACL_CATEGORY_DANGEROUS;
+ if (c->flags & CMD_PUBSUB)
+ c->acl_categories |= ACL_CATEGORY_PUBSUB;
+ if (c->flags & CMD_FAST)
+ c->acl_categories |= ACL_CATEGORY_FAST;
+ if (c->flags & CMD_BLOCKING)
+ c->acl_categories |= ACL_CATEGORY_BLOCKING;
+
+ /* If it's not @fast is @slow in this binary world. */
+ if (!(c->acl_categories & ACL_CATEGORY_FAST))
+ c->acl_categories |= ACL_CATEGORY_SLOW;
+}
+
+/* Recursively populate the command structure.
+ *
+ * On success, the function return C_OK. Otherwise C_ERR is returned and we won't
+ * add this command in the commands dict. */
+int populateCommandStructure(struct redisCommand *c) {
+ /* If the command marks with CMD_SENTINEL, it exists in sentinel. */
+ if (!(c->flags & CMD_SENTINEL) && server.sentinel_mode)
+ return C_ERR;
+
+ /* If the command marks with CMD_ONLY_SENTINEL, it only exists in sentinel. */
+ if (c->flags & CMD_ONLY_SENTINEL && !server.sentinel_mode)
+ return C_ERR;
+
+ /* Translate the command string flags description into an actual
+ * set of flags. */
+ setImplicitACLCategories(c);
+
+ /* We start with an unallocated histogram and only allocate memory when a command
+ * has been issued for the first time */
+ c->latency_histogram = NULL;
+
+ /* Handle the legacy range spec and the "movablekeys" flag (must be done after populating all key specs). */
+ populateCommandLegacyRangeSpec(c);
+
+ /* Assign the ID used for ACL. */
+ c->id = ACLGetCommandID(c->fullname);
+
+ /* Handle subcommands */
+ if (c->subcommands) {
+ for (int j = 0; c->subcommands[j].declared_name; j++) {
+ struct redisCommand *sub = c->subcommands+j;
+
+ sub->fullname = catSubCommandFullname(c->declared_name, sub->declared_name);
+ if (populateCommandStructure(sub) == C_ERR)
+ continue;
+
+ commandAddSubcommand(c, sub, sub->declared_name);
+ }
+ }
+
+ return C_OK;
+}
+
+extern struct redisCommand redisCommandTable[];
+
+/* Populates the Redis Command Table dict from the static table in commands.c
+ * which is auto generated from the json files in the commands folder. */
+void populateCommandTable(void) {
+ int j;
+ struct redisCommand *c;
+
+ for (j = 0;; j++) {
+ c = redisCommandTable + j;
+ if (c->declared_name == NULL)
+ break;
+
+ int retval1, retval2;
+
+ c->fullname = sdsnew(c->declared_name);
+ if (populateCommandStructure(c) == C_ERR)
+ continue;
+
+ retval1 = dictAdd(server.commands, sdsdup(c->fullname), c);
+ /* Populate an additional dictionary that will be unaffected
+ * by rename-command statements in redis.conf. */
+ retval2 = dictAdd(server.orig_commands, sdsdup(c->fullname), c);
+ serverAssert(retval1 == DICT_OK && retval2 == DICT_OK);
+ }
+}
+
+void resetCommandTableStats(dict* commands) {
+ struct redisCommand *c;
+ dictEntry *de;
+ dictIterator *di;
+
+ di = dictGetSafeIterator(commands);
+ while((de = dictNext(di)) != NULL) {
+ c = (struct redisCommand *) dictGetVal(de);
+ c->microseconds = 0;
+ c->calls = 0;
+ c->rejected_calls = 0;
+ c->failed_calls = 0;
+ if(c->latency_histogram) {
+ hdr_close(c->latency_histogram);
+ c->latency_histogram = NULL;
+ }
+ if (c->subcommands_dict)
+ resetCommandTableStats(c->subcommands_dict);
+ }
+ dictReleaseIterator(di);
+}
+
+void resetErrorTableStats(void) {
+ raxFreeWithCallback(server.errors, zfree);
+ server.errors = raxNew();
+}
+
+/* ========================== Redis OP Array API ============================ */
+
+int redisOpArrayAppend(redisOpArray *oa, int dbid, robj **argv, int argc, int target) {
+ redisOp *op;
+ int prev_capacity = oa->capacity;
+
+ if (oa->numops == 0) {
+ oa->capacity = 16;
+ } else if (oa->numops >= oa->capacity) {
+ oa->capacity *= 2;
+ }
+
+ if (prev_capacity != oa->capacity)
+ oa->ops = zrealloc(oa->ops,sizeof(redisOp)*oa->capacity);
+ op = oa->ops+oa->numops;
+ op->dbid = dbid;
+ op->argv = argv;
+ op->argc = argc;
+ op->target = target;
+ oa->numops++;
+ return oa->numops;
+}
+
+void redisOpArrayFree(redisOpArray *oa) {
+ while(oa->numops) {
+ int j;
+ redisOp *op;
+
+ oa->numops--;
+ op = oa->ops+oa->numops;
+ for (j = 0; j < op->argc; j++)
+ decrRefCount(op->argv[j]);
+ zfree(op->argv);
+ }
+ /* no need to free the actual op array, we reuse the memory for future commands */
+ serverAssert(!oa->numops);
+}
+
+/* ====================== Commands lookup and execution ===================== */
+
+int isContainerCommandBySds(sds s) {
+ struct redisCommand *base_cmd = dictFetchValue(server.commands, s);
+ int has_subcommands = base_cmd && base_cmd->subcommands_dict;
+ return has_subcommands;
+}
+
+struct redisCommand *lookupSubcommand(struct redisCommand *container, sds sub_name) {
+ return dictFetchValue(container->subcommands_dict, sub_name);
+}
+
+/* Look up a command by argv and argc
+ *
+ * If `strict` is not 0 we expect argc to be exact (i.e. argc==2
+ * for a subcommand and argc==1 for a top-level command)
+ * `strict` should be used every time we want to look up a command
+ * name (e.g. in COMMAND INFO) rather than to find the command
+ * a user requested to execute (in processCommand).
+ */
+struct redisCommand *lookupCommandLogic(dict *commands, robj **argv, int argc, int strict) {
+ struct redisCommand *base_cmd = dictFetchValue(commands, argv[0]->ptr);
+ int has_subcommands = base_cmd && base_cmd->subcommands_dict;
+ if (argc == 1 || !has_subcommands) {
+ if (strict && argc != 1)
+ return NULL;
+ /* Note: It is possible that base_cmd->proc==NULL (e.g. CONFIG) */
+ return base_cmd;
+ } else { /* argc > 1 && has_subcommands */
+ if (strict && argc != 2)
+ return NULL;
+ /* Note: Currently we support just one level of subcommands */
+ return lookupSubcommand(base_cmd, argv[1]->ptr);
+ }
+}
+
+struct redisCommand *lookupCommand(robj **argv, int argc) {
+ return lookupCommandLogic(server.commands,argv,argc,0);
+}
+
+struct redisCommand *lookupCommandBySdsLogic(dict *commands, sds s) {
+ int argc, j;
+ sds *strings = sdssplitlen(s,sdslen(s),"|",1,&argc);
+ if (strings == NULL)
+ return NULL;
+ if (argc < 1 || argc > 2) {
+ /* Currently we support just one level of subcommands */
+ sdsfreesplitres(strings,argc);
+ return NULL;
+ }
+
+ serverAssert(argc > 0); /* Avoid warning `-Wmaybe-uninitialized` in lookupCommandLogic() */
+ robj objects[argc];
+ robj *argv[argc];
+ for (j = 0; j < argc; j++) {
+ initStaticStringObject(objects[j],strings[j]);
+ argv[j] = &objects[j];
+ }
+
+ struct redisCommand *cmd = lookupCommandLogic(commands,argv,argc,1);
+ sdsfreesplitres(strings,argc);
+ return cmd;
+}
+
+struct redisCommand *lookupCommandBySds(sds s) {
+ return lookupCommandBySdsLogic(server.commands,s);
+}
+
+struct redisCommand *lookupCommandByCStringLogic(dict *commands, const char *s) {
+ struct redisCommand *cmd;
+ sds name = sdsnew(s);
+
+ cmd = lookupCommandBySdsLogic(commands,name);
+ sdsfree(name);
+ return cmd;
+}
+
+struct redisCommand *lookupCommandByCString(const char *s) {
+ return lookupCommandByCStringLogic(server.commands,s);
+}
+
+/* Lookup the command in the current table, if not found also check in
+ * the original table containing the original command names unaffected by
+ * redis.conf rename-command statement.
+ *
+ * This is used by functions rewriting the argument vector such as
+ * rewriteClientCommandVector() in order to set client->cmd pointer
+ * correctly even if the command was renamed. */
+struct redisCommand *lookupCommandOrOriginal(robj **argv ,int argc) {
+ struct redisCommand *cmd = lookupCommandLogic(server.commands, argv, argc, 0);
+
+ if (!cmd) cmd = lookupCommandLogic(server.orig_commands, argv, argc, 0);
+ return cmd;
+}
+
+/* Commands arriving from the master client or AOF client, should never be rejected. */
+int mustObeyClient(client *c) {
+ return c->id == CLIENT_ID_AOF || c->flags & CLIENT_MASTER;
+}
+
+static int shouldPropagate(int target) {
+ if (!server.replication_allowed || target == PROPAGATE_NONE || server.loading)
+ return 0;
+
+ if (target & PROPAGATE_AOF) {
+ if (server.aof_state != AOF_OFF)
+ return 1;
+ }
+ if (target & PROPAGATE_REPL) {
+ if (server.masterhost == NULL && (server.repl_backlog || listLength(server.slaves) != 0))
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Propagate the specified command (in the context of the specified database id)
+ * to AOF and Slaves.
+ *
+ * flags are an xor between:
+ * + PROPAGATE_NONE (no propagation of command at all)
+ * + PROPAGATE_AOF (propagate into the AOF file if is enabled)
+ * + PROPAGATE_REPL (propagate into the replication link)
+ *
+ * This is an internal low-level function and should not be called!
+ *
+ * The API for propagating commands is alsoPropagate().
+ *
+ * dbid value of -1 is saved to indicate that the called do not want
+ * to replicate SELECT for this command (used for database neutral commands).
+ */
+static void propagateNow(int dbid, robj **argv, int argc, int target) {
+ if (!shouldPropagate(target))
+ return;
+
+ /* This needs to be unreachable since the dataset should be fixed during
+ * replica pause (otherwise data may be lost during a failover) */
+ serverAssert(!(isPausedActions(PAUSE_ACTION_REPLICA) &&
+ (!server.client_pause_in_transaction)));
+
+ if (server.aof_state != AOF_OFF && target & PROPAGATE_AOF)
+ feedAppendOnlyFile(dbid,argv,argc);
+ if (target & PROPAGATE_REPL)
+ replicationFeedSlaves(server.slaves,dbid,argv,argc);
+}
+
+/* Used inside commands to schedule the propagation of additional commands
+ * after the current command is propagated to AOF / Replication.
+ *
+ * dbid is the database ID the command should be propagated into.
+ * Arguments of the command to propagate are passed as an array of redis
+ * objects pointers of len 'argc', using the 'argv' vector.
+ *
+ * The function does not take a reference to the passed 'argv' vector,
+ * so it is up to the caller to release the passed argv (but it is usually
+ * stack allocated). The function automatically increments ref count of
+ * passed objects, so the caller does not need to. */
+void alsoPropagate(int dbid, robj **argv, int argc, int target) {
+ robj **argvcopy;
+ int j;
+
+ if (!shouldPropagate(target))
+ return;
+
+ argvcopy = zmalloc(sizeof(robj*)*argc);
+ for (j = 0; j < argc; j++) {
+ argvcopy[j] = argv[j];
+ incrRefCount(argv[j]);
+ }
+ redisOpArrayAppend(&server.also_propagate,dbid,argvcopy,argc,target);
+}
+
+/* It is possible to call the function forceCommandPropagation() inside a
+ * Redis command implementation in order to to force the propagation of a
+ * specific command execution into AOF / Replication. */
+void forceCommandPropagation(client *c, int flags) {
+ serverAssert(c->cmd->flags & (CMD_WRITE | CMD_MAY_REPLICATE));
+ if (flags & PROPAGATE_REPL) c->flags |= CLIENT_FORCE_REPL;
+ if (flags & PROPAGATE_AOF) c->flags |= CLIENT_FORCE_AOF;
+}
+
+/* Avoid that the executed command is propagated at all. This way we
+ * are free to just propagate what we want using the alsoPropagate()
+ * API. */
+void preventCommandPropagation(client *c) {
+ c->flags |= CLIENT_PREVENT_PROP;
+}
+
+/* AOF specific version of preventCommandPropagation(). */
+void preventCommandAOF(client *c) {
+ c->flags |= CLIENT_PREVENT_AOF_PROP;
+}
+
+/* Replication specific version of preventCommandPropagation(). */
+void preventCommandReplication(client *c) {
+ c->flags |= CLIENT_PREVENT_REPL_PROP;
+}
+
+/* Log the last command a client executed into the slowlog. */
+void slowlogPushCurrentCommand(client *c, struct redisCommand *cmd, ustime_t duration) {
+ /* Some commands may contain sensitive data that should not be available in the slowlog. */
+ if (cmd->flags & CMD_SKIP_SLOWLOG)
+ return;
+
+ /* If command argument vector was rewritten, use the original
+ * arguments. */
+ robj **argv = c->original_argv ? c->original_argv : c->argv;
+ int argc = c->original_argv ? c->original_argc : c->argc;
+ slowlogPushEntryIfNeeded(c,argv,argc,duration);
+}
+
+/* This function is called in order to update the total command histogram duration.
+ * The latency unit is nano-seconds.
+ * If needed it will allocate the histogram memory and trim the duration to the upper/lower tracking limits*/
+void updateCommandLatencyHistogram(struct hdr_histogram **latency_histogram, int64_t duration_hist){
+ if (duration_hist < LATENCY_HISTOGRAM_MIN_VALUE)
+ duration_hist=LATENCY_HISTOGRAM_MIN_VALUE;
+ if (duration_hist>LATENCY_HISTOGRAM_MAX_VALUE)
+ duration_hist=LATENCY_HISTOGRAM_MAX_VALUE;
+ if (*latency_histogram==NULL)
+ hdr_init(LATENCY_HISTOGRAM_MIN_VALUE,LATENCY_HISTOGRAM_MAX_VALUE,LATENCY_HISTOGRAM_PRECISION,latency_histogram);
+ hdr_record_value(*latency_histogram,duration_hist);
+}
+
+/* Handle the alsoPropagate() API to handle commands that want to propagate
+ * multiple separated commands. Note that alsoPropagate() is not affected
+ * by CLIENT_PREVENT_PROP flag. */
+static void propagatePendingCommands(void) {
+ if (server.also_propagate.numops == 0)
+ return;
+
+ int j;
+ redisOp *rop;
+
+ /* If we got here it means we have finished an execution-unit.
+ * If that unit has caused propagation of multiple commands, they
+ * should be propagated as a transaction */
+ int transaction = server.also_propagate.numops > 1;
+
+ /* In case a command that may modify random keys was run *directly*
+ * (i.e. not from within a script, MULTI/EXEC, RM_Call, etc.) we want
+ * to avoid using a transaction (much like active-expire) */
+ if (server.current_client &&
+ server.current_client->cmd &&
+ server.current_client->cmd->flags & CMD_TOUCHES_ARBITRARY_KEYS)
+ {
+ transaction = 0;
+ }
+
+ if (transaction) {
+ /* We use dbid=-1 to indicate we do not want to replicate SELECT.
+ * It'll be inserted together with the next command (inside the MULTI) */
+ propagateNow(-1,&shared.multi,1,PROPAGATE_AOF|PROPAGATE_REPL);
+ }
+
+ for (j = 0; j < server.also_propagate.numops; j++) {
+ rop = &server.also_propagate.ops[j];
+ serverAssert(rop->target);
+ propagateNow(rop->dbid,rop->argv,rop->argc,rop->target);
+ }
+
+ if (transaction) {
+ /* We use dbid=-1 to indicate we do not want to replicate select */
+ propagateNow(-1,&shared.exec,1,PROPAGATE_AOF|PROPAGATE_REPL);
+ }
+
+ redisOpArrayFree(&server.also_propagate);
+}
+
+/* Performs operations that should be performed after an execution unit ends.
+ * Execution unit is a code that should be done atomically.
+ * Execution units can be nested and are not necessarily starts with Redis command.
+ *
+ * For example the following is a logical unit:
+ * active expire ->
+ * trigger del notification of some module ->
+ * accessing a key ->
+ * trigger key miss notification of some other module
+ *
+ * What we want to achieve is that the entire execution unit will be done atomically,
+ * currently with respect to replication and post jobs, but in the future there might
+ * be other considerations. So we basically want the `postUnitOperations` to trigger
+ * after the entire chain finished. */
+void postExecutionUnitOperations(void) {
+ if (server.execution_nesting)
+ return;
+
+ firePostExecutionUnitJobs();
+
+ /* If we are at the top-most call() and not inside a an active module
+ * context (e.g. within a module timer) we can propagate what we accumulated. */
+ propagatePendingCommands();
+
+ /* Module subsystem post-execution-unit logic */
+ modulePostExecutionUnitOperations();
+}
+
+/* Increment the command failure counters (either rejected_calls or failed_calls).
+ * The decision which counter to increment is done using the flags argument, options are:
+ * * ERROR_COMMAND_REJECTED - update rejected_calls
+ * * ERROR_COMMAND_FAILED - update failed_calls
+ *
+ * The function also reset the prev_err_count to make sure we will not count the same error
+ * twice, its possible to pass a NULL cmd value to indicate that the error was counted elsewhere.
+ *
+ * The function returns true if stats was updated and false if not. */
+int incrCommandStatsOnError(struct redisCommand *cmd, int flags) {
+ /* hold the prev error count captured on the last command execution */
+ static long long prev_err_count = 0;
+ int res = 0;
+ if (cmd) {
+ if ((server.stat_total_error_replies - prev_err_count) > 0) {
+ if (flags & ERROR_COMMAND_REJECTED) {
+ cmd->rejected_calls++;
+ res = 1;
+ } else if (flags & ERROR_COMMAND_FAILED) {
+ cmd->failed_calls++;
+ res = 1;
+ }
+ }
+ }
+ prev_err_count = server.stat_total_error_replies;
+ return res;
+}
+
+/* Call() is the core of Redis execution of a command.
+ *
+ * The following flags can be passed:
+ * CMD_CALL_NONE No flags.
+ * CMD_CALL_PROPAGATE_AOF Append command to AOF if it modified the dataset
+ * or if the client flags are forcing propagation.
+ * CMD_CALL_PROPAGATE_REPL Send command to slaves if it modified the dataset
+ * or if the client flags are forcing propagation.
+ * CMD_CALL_PROPAGATE Alias for PROPAGATE_AOF|PROPAGATE_REPL.
+ * CMD_CALL_FULL Alias for SLOWLOG|STATS|PROPAGATE.
+ *
+ * The exact propagation behavior depends on the client flags.
+ * Specifically:
+ *
+ * 1. If the client flags CLIENT_FORCE_AOF or CLIENT_FORCE_REPL are set
+ * and assuming the corresponding CMD_CALL_PROPAGATE_AOF/REPL is set
+ * in the call flags, then the command is propagated even if the
+ * dataset was not affected by the command.
+ * 2. If the client flags CLIENT_PREVENT_REPL_PROP or CLIENT_PREVENT_AOF_PROP
+ * are set, the propagation into AOF or to slaves is not performed even
+ * if the command modified the dataset.
+ *
+ * Note that regardless of the client flags, if CMD_CALL_PROPAGATE_AOF
+ * or CMD_CALL_PROPAGATE_REPL are not set, then respectively AOF or
+ * slaves propagation will never occur.
+ *
+ * Client flags are modified by the implementation of a given command
+ * using the following API:
+ *
+ * forceCommandPropagation(client *c, int flags);
+ * preventCommandPropagation(client *c);
+ * preventCommandAOF(client *c);
+ * preventCommandReplication(client *c);
+ *
+ */
+void call(client *c, int flags) {
+ long long dirty;
+ uint64_t client_old_flags = c->flags;
+ struct redisCommand *real_cmd = c->realcmd;
+ client *prev_client = server.executing_client;
+ server.executing_client = c;
+
+ /* When call() is issued during loading the AOF we don't want commands called
+ * from module, exec or LUA to go into the slowlog or to populate statistics. */
+ int update_command_stats = !isAOFLoadingContext();
+
+ /* We want to be aware of a client which is making a first time attempt to execute this command
+ * and a client which is reprocessing command again (after being unblocked).
+ * Blocked clients can be blocked in different places and not always it means the call() function has been
+ * called. For example this is required for avoiding double logging to monitors.*/
+ int reprocessing_command = flags & CMD_CALL_REPROCESSING;
+
+ /* Initialization: clear the flags that must be set by the command on
+ * demand, and initialize the array for additional commands propagation. */
+ c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
+
+ /* Redis core is in charge of propagation when the first entry point
+ * of call() is processCommand().
+ * The only other option to get to call() without having processCommand
+ * as an entry point is if a module triggers RM_Call outside of call()
+ * context (for example, in a timer).
+ * In that case, the module is in charge of propagation. */
+
+ /* Call the command. */
+ dirty = server.dirty;
+ long long old_master_repl_offset = server.master_repl_offset;
+ incrCommandStatsOnError(NULL, 0);
+
+ const long long call_timer = ustime();
+ enterExecutionUnit(1, call_timer);
+
+ /* setting the CLIENT_EXECUTING_COMMAND flag so we will avoid
+ * sending client side caching message in the middle of a command reply.
+ * In case of blocking commands, the flag will be un-set only after successfully
+ * re-processing and unblock the client.*/
+ c->flags |= CLIENT_EXECUTING_COMMAND;
+
+ monotime monotonic_start = 0;
+ if (monotonicGetType() == MONOTONIC_CLOCK_HW)
+ monotonic_start = getMonotonicUs();
+
+ c->cmd->proc(c);
+
+ exitExecutionUnit();
+
+ /* In case client is blocked after trying to execute the command,
+ * it means the execution is not yet completed and we MIGHT reprocess the command in the future. */
+ if (!(c->flags & CLIENT_BLOCKED)) c->flags &= ~(CLIENT_EXECUTING_COMMAND);
+
+ /* In order to avoid performance implication due to querying the clock using a system call 3 times,
+ * we use a monotonic clock, when we are sure its cost is very low, and fall back to non-monotonic call otherwise. */
+ ustime_t duration;
+ if (monotonicGetType() == MONOTONIC_CLOCK_HW)
+ duration = getMonotonicUs() - monotonic_start;
+ else
+ duration = ustime() - call_timer;
+
+ c->duration += duration;
+ dirty = server.dirty-dirty;
+ if (dirty < 0) dirty = 0;
+
+ /* Update failed command calls if required. */
+
+ if (!incrCommandStatsOnError(real_cmd, ERROR_COMMAND_FAILED) && c->deferred_reply_errors) {
+ /* When call is used from a module client, error stats, and total_error_replies
+ * isn't updated since these errors, if handled by the module, are internal,
+ * and not reflected to users. however, the commandstats does show these calls
+ * (made by RM_Call), so it should log if they failed or succeeded. */
+ real_cmd->failed_calls++;
+ }
+
+ /* After executing command, we will close the client after writing entire
+ * reply if it is set 'CLIENT_CLOSE_AFTER_COMMAND' flag. */
+ if (c->flags & CLIENT_CLOSE_AFTER_COMMAND) {
+ c->flags &= ~CLIENT_CLOSE_AFTER_COMMAND;
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ }
+
+ /* Note: the code below uses the real command that was executed
+ * c->cmd and c->lastcmd may be different, in case of MULTI-EXEC or
+ * re-written commands such as EXPIRE, GEOADD, etc. */
+
+ /* Record the latency this command induced on the main thread.
+ * unless instructed by the caller not to log. (happens when processing
+ * a MULTI-EXEC from inside an AOF). */
+ if (update_command_stats) {
+ char *latency_event = (real_cmd->flags & CMD_FAST) ?
+ "fast-command" : "command";
+ latencyAddSampleIfNeeded(latency_event,duration/1000);
+ if (server.execution_nesting == 0)
+ durationAddSample(EL_DURATION_TYPE_CMD, duration);
+ }
+
+ /* Log the command into the Slow log if needed.
+ * If the client is blocked we will handle slowlog when it is unblocked. */
+ if (update_command_stats && !(c->flags & CLIENT_BLOCKED))
+ slowlogPushCurrentCommand(c, real_cmd, c->duration);
+
+ /* Send the command to clients in MONITOR mode if applicable,
+ * since some administrative commands are considered too dangerous to be shown.
+ * Other exceptions is a client which is unblocked and retring to process the command
+ * or we are currently in the process of loading AOF. */
+ if (update_command_stats && !reprocessing_command &&
+ !(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN))) {
+ robj **argv = c->original_argv ? c->original_argv : c->argv;
+ int argc = c->original_argv ? c->original_argc : c->argc;
+ replicationFeedMonitors(c,server.monitors,c->db->id,argv,argc);
+ }
+
+ /* Clear the original argv.
+ * If the client is blocked we will handle slowlog when it is unblocked. */
+ if (!(c->flags & CLIENT_BLOCKED))
+ freeClientOriginalArgv(c);
+
+ /* populate the per-command statistics that we show in INFO commandstats.
+ * If the client is blocked we will handle latency stats and duration when it is unblocked. */
+ if (update_command_stats && !(c->flags & CLIENT_BLOCKED)) {
+ real_cmd->calls++;
+ real_cmd->microseconds += c->duration;
+ if (server.latency_tracking_enabled && !(c->flags & CLIENT_BLOCKED))
+ updateCommandLatencyHistogram(&(real_cmd->latency_histogram), c->duration*1000);
+ }
+
+ /* The duration needs to be reset after each call except for a blocked command,
+ * which is expected to record and reset the duration after unblocking. */
+ if (!(c->flags & CLIENT_BLOCKED)) {
+ c->duration = 0;
+ }
+
+ /* Propagate the command into the AOF and replication link.
+ * We never propagate EXEC explicitly, it will be implicitly
+ * propagated if needed (see propagatePendingCommands).
+ * Also, module commands take care of themselves */
+ if (flags & CMD_CALL_PROPAGATE &&
+ (c->flags & CLIENT_PREVENT_PROP) != CLIENT_PREVENT_PROP &&
+ c->cmd->proc != execCommand &&
+ !(c->cmd->flags & CMD_MODULE))
+ {
+ int propagate_flags = PROPAGATE_NONE;
+
+ /* Check if the command operated changes in the data set. If so
+ * set for replication / AOF propagation. */
+ if (dirty) propagate_flags |= (PROPAGATE_AOF|PROPAGATE_REPL);
+
+ /* If the client forced AOF / replication of the command, set
+ * the flags regardless of the command effects on the data set. */
+ if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL;
+ if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;
+
+ /* However prevent AOF / replication propagation if the command
+ * implementation called preventCommandPropagation() or similar,
+ * or if we don't have the call() flags to do so. */
+ if (c->flags & CLIENT_PREVENT_REPL_PROP ||
+ c->flags & CLIENT_MODULE_PREVENT_REPL_PROP ||
+ !(flags & CMD_CALL_PROPAGATE_REPL))
+ propagate_flags &= ~PROPAGATE_REPL;
+ if (c->flags & CLIENT_PREVENT_AOF_PROP ||
+ c->flags & CLIENT_MODULE_PREVENT_AOF_PROP ||
+ !(flags & CMD_CALL_PROPAGATE_AOF))
+ propagate_flags &= ~PROPAGATE_AOF;
+
+ /* Call alsoPropagate() only if at least one of AOF / replication
+ * propagation is needed. */
+ if (propagate_flags != PROPAGATE_NONE)
+ alsoPropagate(c->db->id,c->argv,c->argc,propagate_flags);
+ }
+
+ /* Restore the old replication flags, since call() can be executed
+ * recursively. */
+ c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
+ c->flags |= client_old_flags &
+ (CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
+
+ /* If the client has keys tracking enabled for client side caching,
+ * make sure to remember the keys it fetched via this command. For read-only
+ * scripts, don't process the script, only the commands it executes. */
+ if ((c->cmd->flags & CMD_READONLY) && (c->cmd->proc != evalRoCommand)
+ && (c->cmd->proc != evalShaRoCommand) && (c->cmd->proc != fcallroCommand))
+ {
+ /* We use the tracking flag of the original external client that
+ * triggered the command, but we take the keys from the actual command
+ * being executed. */
+ if (server.current_client &&
+ (server.current_client->flags & CLIENT_TRACKING) &&
+ !(server.current_client->flags & CLIENT_TRACKING_BCAST))
+ {
+ trackingRememberKeys(server.current_client, c);
+ }
+ }
+
+ if (!(c->flags & CLIENT_BLOCKED))
+ server.stat_numcommands++;
+
+ /* Record peak memory after each command and before the eviction that runs
+ * before the next command. */
+ size_t zmalloc_used = zmalloc_used_memory();
+ if (zmalloc_used > server.stat_peak_memory)
+ server.stat_peak_memory = zmalloc_used;
+
+ /* Do some maintenance job and cleanup */
+ afterCommand(c);
+
+ /* Remember the replication offset of the client, right after its last
+ * command that resulted in propagation. */
+ if (old_master_repl_offset != server.master_repl_offset)
+ c->woff = server.master_repl_offset;
+
+ /* Client pause takes effect after a transaction has finished. This needs
+ * to be located after everything is propagated. */
+ if (!server.in_exec && server.client_pause_in_transaction) {
+ server.client_pause_in_transaction = 0;
+ }
+
+ server.executing_client = prev_client;
+}
+
+/* Used when a command that is ready for execution needs to be rejected, due to
+ * various pre-execution checks. it returns the appropriate error to the client.
+ * If there's a transaction is flags it as dirty, and if the command is EXEC,
+ * it aborts the transaction.
+ * The duration is reset, since we reject the command, and it did not record.
+ * Note: 'reply' is expected to end with \r\n */
+void rejectCommand(client *c, robj *reply) {
+ flagTransaction(c);
+ c->duration = 0;
+ if (c->cmd) c->cmd->rejected_calls++;
+ if (c->cmd && c->cmd->proc == execCommand) {
+ execCommandAbort(c, reply->ptr);
+ } else {
+ /* using addReplyError* rather than addReply so that the error can be logged. */
+ addReplyErrorObject(c, reply);
+ }
+}
+
+void rejectCommandSds(client *c, sds s) {
+ flagTransaction(c);
+ c->duration = 0;
+ if (c->cmd) c->cmd->rejected_calls++;
+ if (c->cmd && c->cmd->proc == execCommand) {
+ execCommandAbort(c, s);
+ sdsfree(s);
+ } else {
+ /* The following frees 's'. */
+ addReplyErrorSds(c, s);
+ }
+}
+
+void rejectCommandFormat(client *c, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap,fmt);
+ sds s = sdscatvprintf(sdsempty(),fmt,ap);
+ va_end(ap);
+ /* Make sure there are no newlines in the string, otherwise invalid protocol
+ * is emitted (The args come from the user, they may contain any character). */
+ sdsmapchars(s, "\r\n", " ", 2);
+ rejectCommandSds(c, s);
+}
+
+/* This is called after a command in call, we can do some maintenance job in it. */
+void afterCommand(client *c) {
+ UNUSED(c);
+ /* Should be done before trackingHandlePendingKeyInvalidations so that we
+ * reply to client before invalidating cache (makes more sense) */
+ postExecutionUnitOperations();
+
+ /* Flush pending tracking invalidations. */
+ trackingHandlePendingKeyInvalidations();
+
+ /* Flush other pending push messages. only when we are not in nested call.
+ * So the messages are not interleaved with transaction response. */
+ if (!server.execution_nesting)
+ listJoin(c->reply, server.pending_push_messages);
+}
+
+/* Check if c->cmd exists, fills `err` with details in case it doesn't.
+ * Return 1 if exists. */
+int commandCheckExistence(client *c, sds *err) {
+ if (c->cmd)
+ return 1;
+ if (!err)
+ return 0;
+ if (isContainerCommandBySds(c->argv[0]->ptr)) {
+ /* If we can't find the command but argv[0] by itself is a command
+ * it means we're dealing with an invalid subcommand. Print Help. */
+ sds cmd = sdsnew((char *)c->argv[0]->ptr);
+ sdstoupper(cmd);
+ *err = sdsnew(NULL);
+ *err = sdscatprintf(*err, "unknown subcommand '%.128s'. Try %s HELP.",
+ (char *)c->argv[1]->ptr, cmd);
+ sdsfree(cmd);
+ } else {
+ sds args = sdsempty();
+ int i;
+ for (i=1; i < c->argc && sdslen(args) < 128; i++)
+ args = sdscatprintf(args, "'%.*s' ", 128-(int)sdslen(args), (char*)c->argv[i]->ptr);
+ *err = sdsnew(NULL);
+ *err = sdscatprintf(*err, "unknown command '%.128s', with args beginning with: %s",
+ (char*)c->argv[0]->ptr, args);
+ sdsfree(args);
+ }
+ /* Make sure there are no newlines in the string, otherwise invalid protocol
+ * is emitted (The args come from the user, they may contain any character). */
+ sdsmapchars(*err, "\r\n", " ", 2);
+ return 0;
+}
+
+/* Check if c->argc is valid for c->cmd, fills `err` with details in case it isn't.
+ * Return 1 if valid. */
+int commandCheckArity(client *c, sds *err) {
+ if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
+ (c->argc < -c->cmd->arity))
+ {
+ if (err) {
+ *err = sdsnew(NULL);
+ *err = sdscatprintf(*err, "wrong number of arguments for '%s' command", c->cmd->fullname);
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+/* If we're executing a script, try to extract a set of command flags from
+ * it, in case it declared them. Note this is just an attempt, we don't yet
+ * know the script command is well formed.*/
+uint64_t getCommandFlags(client *c) {
+ uint64_t cmd_flags = c->cmd->flags;
+
+ if (c->cmd->proc == fcallCommand || c->cmd->proc == fcallroCommand) {
+ cmd_flags = fcallGetCommandFlags(c, cmd_flags);
+ } else if (c->cmd->proc == evalCommand || c->cmd->proc == evalRoCommand ||
+ c->cmd->proc == evalShaCommand || c->cmd->proc == evalShaRoCommand)
+ {
+ cmd_flags = evalGetCommandFlags(c, cmd_flags);
+ }
+
+ return cmd_flags;
+}
+
+/* If this function gets called we already read a whole
+ * command, arguments are in the client argv/argc fields.
+ * processCommand() execute the command or prepare the
+ * server for a bulk read from the client.
+ *
+ * If C_OK is returned the client is still alive and valid and
+ * other operations can be performed by the caller. Otherwise
+ * if C_ERR is returned the client was destroyed (i.e. after QUIT). */
+int processCommand(client *c) {
+ if (!scriptIsTimedout()) {
+ /* Both EXEC and scripts call call() directly so there should be
+ * no way in_exec or scriptIsRunning() is 1.
+ * That is unless lua_timedout, in which case client may run
+ * some commands. */
+ serverAssert(!server.in_exec);
+ serverAssert(!scriptIsRunning());
+ }
+
+ /* in case we are starting to ProcessCommand and we already have a command we assume
+ * this is a reprocessing of this command, so we do not want to perform some of the actions again. */
+ int client_reprocessing_command = c->cmd ? 1 : 0;
+
+ /* only run command filter if not reprocessing command */
+ if (!client_reprocessing_command) {
+ moduleCallCommandFilters(c);
+ reqresAppendRequest(c);
+ }
+
+ /* Handle possible security attacks. */
+ if (!strcasecmp(c->argv[0]->ptr,"host:") || !strcasecmp(c->argv[0]->ptr,"post")) {
+ securityWarningCommand(c);
+ return C_ERR;
+ }
+
+ /* If we're inside a module blocked context yielding that wants to avoid
+ * processing clients, postpone the command. */
+ if (server.busy_module_yield_flags != BUSY_MODULE_YIELD_NONE &&
+ !(server.busy_module_yield_flags & BUSY_MODULE_YIELD_CLIENTS))
+ {
+ blockPostponeClient(c);
+ return C_OK;
+ }
+
+ /* Now lookup the command and check ASAP about trivial error conditions
+ * such as wrong arity, bad command name and so forth.
+ * In case we are reprocessing a command after it was blocked,
+ * we do not have to repeat the same checks */
+ if (!client_reprocessing_command) {
+ c->cmd = c->lastcmd = c->realcmd = lookupCommand(c->argv,c->argc);
+ sds err;
+ if (!commandCheckExistence(c, &err)) {
+ rejectCommandSds(c, err);
+ return C_OK;
+ }
+ if (!commandCheckArity(c, &err)) {
+ rejectCommandSds(c, err);
+ return C_OK;
+ }
+
+
+ /* Check if the command is marked as protected and the relevant configuration allows it */
+ if (c->cmd->flags & CMD_PROTECTED) {
+ if ((c->cmd->proc == debugCommand && !allowProtectedAction(server.enable_debug_cmd, c)) ||
+ (c->cmd->proc == moduleCommand && !allowProtectedAction(server.enable_module_cmd, c)))
+ {
+ rejectCommandFormat(c,"%s command not allowed. If the %s option is set to \"local\", "
+ "you can run it from a local connection, otherwise you need to set this option "
+ "in the configuration file, and then restart the server.",
+ c->cmd->proc == debugCommand ? "DEBUG" : "MODULE",
+ c->cmd->proc == debugCommand ? "enable-debug-command" : "enable-module-command");
+ return C_OK;
+
+ }
+ }
+ }
+
+ uint64_t cmd_flags = getCommandFlags(c);
+
+ int is_read_command = (cmd_flags & CMD_READONLY) ||
+ (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_READONLY));
+ int is_write_command = (cmd_flags & CMD_WRITE) ||
+ (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE));
+ int is_denyoom_command = (cmd_flags & CMD_DENYOOM) ||
+ (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_DENYOOM));
+ int is_denystale_command = !(cmd_flags & CMD_STALE) ||
+ (c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_STALE));
+ int is_denyloading_command = !(cmd_flags & CMD_LOADING) ||
+ (c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_LOADING));
+ int is_may_replicate_command = (cmd_flags & (CMD_WRITE | CMD_MAY_REPLICATE)) ||
+ (c->cmd->proc == execCommand && (c->mstate.cmd_flags & (CMD_WRITE | CMD_MAY_REPLICATE)));
+ int is_deny_async_loading_command = (cmd_flags & CMD_NO_ASYNC_LOADING) ||
+ (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_NO_ASYNC_LOADING));
+ int obey_client = mustObeyClient(c);
+
+ if (authRequired(c)) {
+ /* AUTH and HELLO and no auth commands are valid even in
+ * non-authenticated state. */
+ if (!(c->cmd->flags & CMD_NO_AUTH)) {
+ rejectCommand(c,shared.noautherr);
+ return C_OK;
+ }
+ }
+
+ if (c->flags & CLIENT_MULTI && c->cmd->flags & CMD_NO_MULTI) {
+ rejectCommandFormat(c,"Command not allowed inside a transaction");
+ return C_OK;
+ }
+
+ /* Check if the user can run this command according to the current
+ * ACLs. */
+ int acl_errpos;
+ int acl_retval = ACLCheckAllPerm(c,&acl_errpos);
+ if (acl_retval != ACL_OK) {
+ addACLLogEntry(c,acl_retval,(c->flags & CLIENT_MULTI) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL,acl_errpos,NULL,NULL);
+ sds msg = getAclErrorMessage(acl_retval, c->user, c->cmd, c->argv[acl_errpos]->ptr, 0);
+ rejectCommandFormat(c, "-NOPERM %s", msg);
+ sdsfree(msg);
+ return C_OK;
+ }
+
+ /* If cluster is enabled perform the cluster redirection here.
+ * However we don't perform the redirection if:
+ * 1) The sender of this command is our master.
+ * 2) The command has no key arguments. */
+ if (server.cluster_enabled &&
+ !mustObeyClient(c) &&
+ !(!(c->cmd->flags&CMD_MOVABLE_KEYS) && c->cmd->key_specs_num == 0 &&
+ c->cmd->proc != execCommand))
+ {
+ int error_code;
+ clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,
+ &c->slot,&error_code);
+ if (n == NULL || n != server.cluster->myself) {
+ if (c->cmd->proc == execCommand) {
+ discardTransaction(c);
+ } else {
+ flagTransaction(c);
+ }
+ clusterRedirectClient(c,n,c->slot,error_code);
+ c->cmd->rejected_calls++;
+ return C_OK;
+ }
+ }
+
+ /* Disconnect some clients if total clients memory is too high. We do this
+ * before key eviction, after the last command was executed and consumed
+ * some client output buffer memory. */
+ evictClients();
+ if (server.current_client == NULL) {
+ /* If we evicted ourself then abort processing the command */
+ return C_ERR;
+ }
+
+ /* Handle the maxmemory directive.
+ *
+ * Note that we do not want to reclaim memory if we are here re-entering
+ * the event loop since there is a busy Lua script running in timeout
+ * condition, to avoid mixing the propagation of scripts with the
+ * propagation of DELs due to eviction. */
+ if (server.maxmemory && !isInsideYieldingLongCommand()) {
+ int out_of_memory = (performEvictions() == EVICT_FAIL);
+
+ /* performEvictions may evict keys, so we need flush pending tracking
+ * invalidation keys. If we don't do this, we may get an invalidation
+ * message after we perform operation on the key, where in fact this
+ * message belongs to the old value of the key before it gets evicted.*/
+ trackingHandlePendingKeyInvalidations();
+
+ /* performEvictions may flush slave output buffers. This may result
+ * in a slave, that may be the active client, to be freed. */
+ if (server.current_client == NULL) return C_ERR;
+
+ int reject_cmd_on_oom = is_denyoom_command;
+ /* If client is in MULTI/EXEC context, queuing may consume an unlimited
+ * amount of memory, so we want to stop that.
+ * However, we never want to reject DISCARD, or even EXEC (unless it
+ * contains denied commands, in which case is_denyoom_command is already
+ * set. */
+ if (c->flags & CLIENT_MULTI &&
+ c->cmd->proc != execCommand &&
+ c->cmd->proc != discardCommand &&
+ c->cmd->proc != quitCommand &&
+ c->cmd->proc != resetCommand) {
+ reject_cmd_on_oom = 1;
+ }
+
+ if (out_of_memory && reject_cmd_on_oom) {
+ rejectCommand(c, shared.oomerr);
+ return C_OK;
+ }
+
+ /* Save out_of_memory result at command start, otherwise if we check OOM
+ * in the first write within script, memory used by lua stack and
+ * arguments might interfere. We need to save it for EXEC and module
+ * calls too, since these can call EVAL, but avoid saving it during an
+ * interrupted / yielding busy script / module. */
+ server.pre_command_oom_state = out_of_memory;
+ }
+
+ /* Make sure to use a reasonable amount of memory for client side
+ * caching metadata. */
+ if (server.tracking_clients) trackingLimitUsedSlots();
+
+ /* Don't accept write commands if there are problems persisting on disk
+ * unless coming from our master, in which case check the replica ignore
+ * disk write error config to either log or crash. */
+ int deny_write_type = writeCommandsDeniedByDiskError();
+ if (deny_write_type != DISK_ERROR_TYPE_NONE &&
+ (is_write_command || c->cmd->proc == pingCommand))
+ {
+ if (obey_client) {
+ if (!server.repl_ignore_disk_write_error && c->cmd->proc != pingCommand) {
+ serverPanic("Replica was unable to write command to disk.");
+ } else {
+ static mstime_t last_log_time_ms = 0;
+ const mstime_t log_interval_ms = 10000;
+ if (server.mstime > last_log_time_ms + log_interval_ms) {
+ last_log_time_ms = server.mstime;
+ serverLog(LL_WARNING, "Replica is applying a command even though "
+ "it is unable to write to disk.");
+ }
+ }
+ } else {
+ sds err = writeCommandsGetDiskErrorMessage(deny_write_type);
+ /* remove the newline since rejectCommandSds adds it. */
+ sdssubstr(err, 0, sdslen(err)-2);
+ rejectCommandSds(c, err);
+ return C_OK;
+ }
+ }
+
+ /* Don't accept write commands if there are not enough good slaves and
+ * user configured the min-slaves-to-write option. */
+ if (is_write_command && !checkGoodReplicasStatus()) {
+ rejectCommand(c, shared.noreplicaserr);
+ return C_OK;
+ }
+
+ /* Don't accept write commands if this is a read only slave. But
+ * accept write commands if this is our master. */
+ if (server.masterhost && server.repl_slave_ro &&
+ !obey_client &&
+ is_write_command)
+ {
+ rejectCommand(c, shared.roslaveerr);
+ return C_OK;
+ }
+
+ /* Only allow a subset of commands in the context of Pub/Sub if the
+ * connection is in RESP2 mode. With RESP3 there are no limits. */
+ if ((c->flags & CLIENT_PUBSUB && c->resp == 2) &&
+ c->cmd->proc != pingCommand &&
+ c->cmd->proc != subscribeCommand &&
+ c->cmd->proc != ssubscribeCommand &&
+ c->cmd->proc != unsubscribeCommand &&
+ c->cmd->proc != sunsubscribeCommand &&
+ c->cmd->proc != psubscribeCommand &&
+ c->cmd->proc != punsubscribeCommand &&
+ c->cmd->proc != quitCommand &&
+ c->cmd->proc != resetCommand) {
+ rejectCommandFormat(c,
+ "Can't execute '%s': only (P|S)SUBSCRIBE / "
+ "(P|S)UNSUBSCRIBE / PING / QUIT / RESET are allowed in this context",
+ c->cmd->fullname);
+ return C_OK;
+ }
+
+ /* Only allow commands with flag "t", such as INFO, REPLICAOF and so on,
+ * when replica-serve-stale-data is no and we are a replica with a broken
+ * link with master. */
+ if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED &&
+ server.repl_serve_stale_data == 0 &&
+ is_denystale_command)
+ {
+ rejectCommand(c, shared.masterdownerr);
+ return C_OK;
+ }
+
+ /* Loading DB? Return an error if the command has not the
+ * CMD_LOADING flag. */
+ if (server.loading && !server.async_loading && is_denyloading_command) {
+ rejectCommand(c, shared.loadingerr);
+ return C_OK;
+ }
+
+ /* During async-loading, block certain commands. */
+ if (server.async_loading && is_deny_async_loading_command) {
+ rejectCommand(c,shared.loadingerr);
+ return C_OK;
+ }
+
+ /* when a busy job is being done (script / module)
+ * Only allow a limited number of commands.
+ * Note that we need to allow the transactions commands, otherwise clients
+ * sending a transaction with pipelining without error checking, may have
+ * the MULTI plus a few initial commands refused, then the timeout
+ * condition resolves, and the bottom-half of the transaction gets
+ * executed, see Github PR #7022. */
+ if (isInsideYieldingLongCommand() && !(c->cmd->flags & CMD_ALLOW_BUSY)) {
+ if (server.busy_module_yield_flags && server.busy_module_yield_reply) {
+ rejectCommandFormat(c, "-BUSY %s", server.busy_module_yield_reply);
+ } else if (server.busy_module_yield_flags) {
+ rejectCommand(c, shared.slowmoduleerr);
+ } else if (scriptIsEval()) {
+ rejectCommand(c, shared.slowevalerr);
+ } else {
+ rejectCommand(c, shared.slowscripterr);
+ }
+ return C_OK;
+ }
+
+ /* Prevent a replica from sending commands that access the keyspace.
+ * The main objective here is to prevent abuse of client pause check
+ * from which replicas are exempt. */
+ if ((c->flags & CLIENT_SLAVE) && (is_may_replicate_command || is_write_command || is_read_command)) {
+ rejectCommandFormat(c, "Replica can't interact with the keyspace");
+ return C_OK;
+ }
+
+ /* If the server is paused, block the client until
+ * the pause has ended. Replicas are never paused. */
+ if (!(c->flags & CLIENT_SLAVE) &&
+ ((isPausedActions(PAUSE_ACTION_CLIENT_ALL)) ||
+ ((isPausedActions(PAUSE_ACTION_CLIENT_WRITE)) && is_may_replicate_command)))
+ {
+ blockPostponeClient(c);
+ return C_OK;
+ }
+
+ /* Exec the command */
+ if (c->flags & CLIENT_MULTI &&
+ c->cmd->proc != execCommand &&
+ c->cmd->proc != discardCommand &&
+ c->cmd->proc != multiCommand &&
+ c->cmd->proc != watchCommand &&
+ c->cmd->proc != quitCommand &&
+ c->cmd->proc != resetCommand)
+ {
+ queueMultiCommand(c, cmd_flags);
+ addReply(c,shared.queued);
+ } else {
+ int flags = CMD_CALL_FULL;
+ if (client_reprocessing_command) flags |= CMD_CALL_REPROCESSING;
+ call(c,flags);
+ if (listLength(server.ready_keys) && !isInsideYieldingLongCommand())
+ handleClientsBlockedOnKeys();
+ }
+
+ return C_OK;
+}
+
+/* ====================== Error lookup and execution ===================== */
+
+void incrementErrorCount(const char *fullerr, size_t namelen) {
+ struct redisError *error = raxFind(server.errors,(unsigned char*)fullerr,namelen);
+ if (error == raxNotFound) {
+ error = zmalloc(sizeof(*error));
+ error->count = 0;
+ raxInsert(server.errors,(unsigned char*)fullerr,namelen,error,NULL);
+ }
+ error->count++;
+}
+
+/*================================== Shutdown =============================== */
+
+/* Close listening sockets. Also unlink the unix domain socket if
+ * unlink_unix_socket is non-zero. */
+void closeListeningSockets(int unlink_unix_socket) {
+ int j;
+
+ for (int i = 0; i < CONN_TYPE_MAX; i++) {
+ connListener *listener = &server.listeners[i];
+ if (listener->ct == NULL)
+ continue;
+
+ for (j = 0; j < listener->count; j++) close(listener->fd[j]);
+ }
+
+ if (server.cluster_enabled)
+ for (j = 0; j < server.clistener.count; j++) close(server.clistener.fd[j]);
+ if (unlink_unix_socket && server.unixsocket) {
+ serverLog(LL_NOTICE,"Removing the unix socket file.");
+ if (unlink(server.unixsocket) != 0)
+ serverLog(LL_WARNING,"Error removing the unix socket file: %s",strerror(errno));
+ }
+}
+
+/* Prepare for shutting down the server. Flags:
+ *
+ * - SHUTDOWN_SAVE: Save a database dump even if the server is configured not to
+ * save any dump.
+ *
+ * - SHUTDOWN_NOSAVE: Don't save any database dump even if the server is
+ * configured to save one.
+ *
+ * - SHUTDOWN_NOW: Don't wait for replicas to catch up before shutting down.
+ *
+ * - SHUTDOWN_FORCE: Ignore errors writing AOF and RDB files on disk, which
+ * would normally prevent a shutdown.
+ *
+ * Unless SHUTDOWN_NOW is set and if any replicas are lagging behind, C_ERR is
+ * returned and server.shutdown_mstime is set to a timestamp to allow a grace
+ * period for the replicas to catch up. This is checked and handled by
+ * serverCron() which completes the shutdown as soon as possible.
+ *
+ * If shutting down fails due to errors writing RDB or AOF files, C_ERR is
+ * returned and an error is logged. If the flag SHUTDOWN_FORCE is set, these
+ * errors are logged but ignored and C_OK is returned.
+ *
+ * On success, this function returns C_OK and then it's OK to call exit(0). */
+int prepareForShutdown(int flags) {
+ if (isShutdownInitiated()) return C_ERR;
+
+ /* When SHUTDOWN is called while the server is loading a dataset in
+ * memory we need to make sure no attempt is performed to save
+ * the dataset on shutdown (otherwise it could overwrite the current DB
+ * with half-read data).
+ *
+ * Also when in Sentinel mode clear the SAVE flag and force NOSAVE. */
+ if (server.loading || server.sentinel_mode)
+ flags = (flags & ~SHUTDOWN_SAVE) | SHUTDOWN_NOSAVE;
+
+ server.shutdown_flags = flags;
+
+ serverLog(LL_NOTICE,"User requested shutdown...");
+ if (server.supervised_mode == SUPERVISED_SYSTEMD)
+ redisCommunicateSystemd("STOPPING=1\n");
+
+ /* If we have any replicas, let them catch up the replication offset before
+ * we shut down, to avoid data loss. */
+ if (!(flags & SHUTDOWN_NOW) &&
+ server.shutdown_timeout != 0 &&
+ !isReadyToShutdown())
+ {
+ server.shutdown_mstime = server.mstime + server.shutdown_timeout * 1000;
+ if (!isPausedActions(PAUSE_ACTION_REPLICA)) sendGetackToReplicas();
+ pauseActions(PAUSE_DURING_SHUTDOWN,
+ LLONG_MAX,
+ PAUSE_ACTIONS_CLIENT_WRITE_SET);
+ serverLog(LL_NOTICE, "Waiting for replicas before shutting down.");
+ return C_ERR;
+ }
+
+ return finishShutdown();
+}
+
+static inline int isShutdownInitiated(void) {
+ return server.shutdown_mstime != 0;
+}
+
+/* Returns 0 if there are any replicas which are lagging in replication which we
+ * need to wait for before shutting down. Returns 1 if we're ready to shut
+ * down now. */
+int isReadyToShutdown(void) {
+ if (listLength(server.slaves) == 0) return 1; /* No replicas. */
+
+ listIter li;
+ listNode *ln;
+ listRewind(server.slaves, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *replica = listNodeValue(ln);
+ if (replica->repl_ack_off != server.master_repl_offset) return 0;
+ }
+ return 1;
+}
+
+static void cancelShutdown(void) {
+ server.shutdown_asap = 0;
+ server.shutdown_flags = 0;
+ server.shutdown_mstime = 0;
+ server.last_sig_received = 0;
+ replyToClientsBlockedOnShutdown();
+ unpauseActions(PAUSE_DURING_SHUTDOWN);
+}
+
+/* Returns C_OK if shutdown was aborted and C_ERR if shutdown wasn't ongoing. */
+int abortShutdown(void) {
+ if (isShutdownInitiated()) {
+ cancelShutdown();
+ } else if (server.shutdown_asap) {
+ /* Signal handler has requested shutdown, but it hasn't been initiated
+ * yet. Just clear the flag. */
+ server.shutdown_asap = 0;
+ } else {
+ /* Shutdown neither initiated nor requested. */
+ return C_ERR;
+ }
+ serverLog(LL_NOTICE, "Shutdown manually aborted.");
+ return C_OK;
+}
+
+/* The final step of the shutdown sequence. Returns C_OK if the shutdown
+ * sequence was successful and it's OK to call exit(). If C_ERR is returned,
+ * it's not safe to call exit(). */
+int finishShutdown(void) {
+
+ int save = server.shutdown_flags & SHUTDOWN_SAVE;
+ int nosave = server.shutdown_flags & SHUTDOWN_NOSAVE;
+ int force = server.shutdown_flags & SHUTDOWN_FORCE;
+
+ /* Log a warning for each replica that is lagging. */
+ listIter replicas_iter;
+ listNode *replicas_list_node;
+ int num_replicas = 0, num_lagging_replicas = 0;
+ listRewind(server.slaves, &replicas_iter);
+ while ((replicas_list_node = listNext(&replicas_iter)) != NULL) {
+ client *replica = listNodeValue(replicas_list_node);
+ num_replicas++;
+ if (replica->repl_ack_off != server.master_repl_offset) {
+ num_lagging_replicas++;
+ long lag = replica->replstate == SLAVE_STATE_ONLINE ?
+ time(NULL) - replica->repl_ack_time : 0;
+ serverLog(LL_NOTICE,
+ "Lagging replica %s reported offset %lld behind master, lag=%ld, state=%s.",
+ replicationGetSlaveName(replica),
+ server.master_repl_offset - replica->repl_ack_off,
+ lag,
+ replstateToString(replica->replstate));
+ }
+ }
+ if (num_replicas > 0) {
+ serverLog(LL_NOTICE,
+ "%d of %d replicas are in sync when shutting down.",
+ num_replicas - num_lagging_replicas,
+ num_replicas);
+ }
+
+ /* Kill all the Lua debugger forked sessions. */
+ ldbKillForkedSessions();
+
+ /* Kill the saving child if there is a background saving in progress.
+ We want to avoid race conditions, for instance our saving child may
+ overwrite the synchronous saving did by SHUTDOWN. */
+ if (server.child_type == CHILD_TYPE_RDB) {
+ serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!");
+ killRDBChild();
+ /* Note that, in killRDBChild normally has backgroundSaveDoneHandler
+ * doing it's cleanup, but in this case this code will not be reached,
+ * so we need to call rdbRemoveTempFile which will close fd(in order
+ * to unlink file actually) in background thread.
+ * The temp rdb file fd may won't be closed when redis exits quickly,
+ * but OS will close this fd when process exits. */
+ rdbRemoveTempFile(server.child_pid, 0);
+ }
+
+ /* Kill module child if there is one. */
+ if (server.child_type == CHILD_TYPE_MODULE) {
+ serverLog(LL_WARNING,"There is a module fork child. Killing it!");
+ TerminateModuleForkChild(server.child_pid,0);
+ }
+
+ /* Kill the AOF saving child as the AOF we already have may be longer
+ * but contains the full dataset anyway. */
+ if (server.child_type == CHILD_TYPE_AOF) {
+ /* If we have AOF enabled but haven't written the AOF yet, don't
+ * shutdown or else the dataset will be lost. */
+ if (server.aof_state == AOF_WAIT_REWRITE) {
+ if (force) {
+ serverLog(LL_WARNING, "Writing initial AOF. Exit anyway.");
+ } else {
+ serverLog(LL_WARNING, "Writing initial AOF, can't exit.");
+ if (server.supervised_mode == SUPERVISED_SYSTEMD)
+ redisCommunicateSystemd("STATUS=Writing initial AOF, can't exit.\n");
+ goto error;
+ }
+ }
+ serverLog(LL_WARNING,
+ "There is a child rewriting the AOF. Killing it!");
+ killAppendOnlyChild();
+ }
+ if (server.aof_state != AOF_OFF) {
+ /* Append only file: flush buffers and fsync() the AOF at exit */
+ serverLog(LL_NOTICE,"Calling fsync() on the AOF file.");
+ flushAppendOnlyFile(1);
+ if (redis_fsync(server.aof_fd) == -1) {
+ serverLog(LL_WARNING,"Fail to fsync the AOF file: %s.",
+ strerror(errno));
+ }
+ }
+
+ /* Create a new RDB file before exiting. */
+ if ((server.saveparamslen > 0 && !nosave) || save) {
+ serverLog(LL_NOTICE,"Saving the final RDB snapshot before exiting.");
+ if (server.supervised_mode == SUPERVISED_SYSTEMD)
+ redisCommunicateSystemd("STATUS=Saving the final RDB snapshot\n");
+ /* Snapshotting. Perform a SYNC SAVE and exit */
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ /* Keep the page cache since it's likely to restart soon */
+ if (rdbSave(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_KEEP_CACHE) != C_OK) {
+ /* Ooops.. error saving! The best we can do is to continue
+ * operating. Note that if there was a background saving process,
+ * in the next cron() Redis will be notified that the background
+ * saving aborted, handling special stuff like slaves pending for
+ * synchronization... */
+ if (force) {
+ serverLog(LL_WARNING,"Error trying to save the DB. Exit anyway.");
+ } else {
+ serverLog(LL_WARNING,"Error trying to save the DB, can't exit.");
+ if (server.supervised_mode == SUPERVISED_SYSTEMD)
+ redisCommunicateSystemd("STATUS=Error trying to save the DB, can't exit.\n");
+ goto error;
+ }
+ }
+ }
+
+ /* Free the AOF manifest. */
+ if (server.aof_manifest) aofManifestFree(server.aof_manifest);
+
+ /* Fire the shutdown modules event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_SHUTDOWN,0,NULL);
+
+ /* Remove the pid file if possible and needed. */
+ if (server.daemonize || server.pidfile) {
+ serverLog(LL_NOTICE,"Removing the pid file.");
+ unlink(server.pidfile);
+ }
+
+ /* Best effort flush of slave output buffers, so that we hopefully
+ * send them pending writes. */
+ flushSlavesOutputBuffers();
+
+ /* Close the listening sockets. Apparently this allows faster restarts. */
+ closeListeningSockets(1);
+
+#if !defined(__sun)
+ /* Unlock the cluster config file before shutdown */
+ if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1) {
+ flock(server.cluster_config_file_lock_fd, LOCK_UN|LOCK_NB);
+ }
+#endif /* __sun */
+
+
+ serverLog(LL_WARNING,"%s is now ready to exit, bye bye...",
+ server.sentinel_mode ? "Sentinel" : "Redis");
+ return C_OK;
+
+error:
+ serverLog(LL_WARNING, "Errors trying to shut down the server. Check the logs for more information.");
+ cancelShutdown();
+ return C_ERR;
+}
+
+/*================================== Commands =============================== */
+
+/* Sometimes Redis cannot accept write commands because there is a persistence
+ * error with the RDB or AOF file, and Redis is configured in order to stop
+ * accepting writes in such situation. This function returns if such a
+ * condition is active, and the type of the condition.
+ *
+ * Function return values:
+ *
+ * DISK_ERROR_TYPE_NONE: No problems, we can accept writes.
+ * DISK_ERROR_TYPE_AOF: Don't accept writes: AOF errors.
+ * DISK_ERROR_TYPE_RDB: Don't accept writes: RDB errors.
+ */
+int writeCommandsDeniedByDiskError(void) {
+ if (server.stop_writes_on_bgsave_err &&
+ server.saveparamslen > 0 &&
+ server.lastbgsave_status == C_ERR)
+ {
+ return DISK_ERROR_TYPE_RDB;
+ } else if (server.aof_state != AOF_OFF) {
+ if (server.aof_last_write_status == C_ERR) {
+ return DISK_ERROR_TYPE_AOF;
+ }
+ /* AOF fsync error. */
+ int aof_bio_fsync_status;
+ atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status);
+ if (aof_bio_fsync_status == C_ERR) {
+ atomicGet(server.aof_bio_fsync_errno,server.aof_last_write_errno);
+ return DISK_ERROR_TYPE_AOF;
+ }
+ }
+
+ return DISK_ERROR_TYPE_NONE;
+}
+
+sds writeCommandsGetDiskErrorMessage(int error_code) {
+ sds ret = NULL;
+ if (error_code == DISK_ERROR_TYPE_RDB) {
+ ret = sdsdup(shared.bgsaveerr->ptr);
+ } else {
+ ret = sdscatfmt(sdsempty(),
+ "-MISCONF Errors writing to the AOF file: %s\r\n",
+ strerror(server.aof_last_write_errno));
+ }
+ return ret;
+}
+
+/* The PING command. It works in a different way if the client is in
+ * in Pub/Sub mode. */
+void pingCommand(client *c) {
+ /* The command takes zero or one arguments. */
+ if (c->argc > 2) {
+ addReplyErrorArity(c);
+ return;
+ }
+
+ if (c->flags & CLIENT_PUBSUB && c->resp == 2) {
+ addReply(c,shared.mbulkhdr[2]);
+ addReplyBulkCBuffer(c,"pong",4);
+ if (c->argc == 1)
+ addReplyBulkCBuffer(c,"",0);
+ else
+ addReplyBulk(c,c->argv[1]);
+ } else {
+ if (c->argc == 1)
+ addReply(c,shared.pong);
+ else
+ addReplyBulk(c,c->argv[1]);
+ }
+}
+
+void echoCommand(client *c) {
+ addReplyBulk(c,c->argv[1]);
+}
+
+void timeCommand(client *c) {
+ addReplyArrayLen(c,2);
+ addReplyBulkLongLong(c, server.unixtime);
+ addReplyBulkLongLong(c, server.ustime-((long long)server.unixtime)*1000000);
+}
+
+typedef struct replyFlagNames {
+ uint64_t flag;
+ const char *name;
+} replyFlagNames;
+
+/* Helper function to output flags. */
+void addReplyCommandFlags(client *c, uint64_t flags, replyFlagNames *replyFlags) {
+ int count = 0, j=0;
+ /* Count them so we don't have to use deferred reply. */
+ while (replyFlags[j].name) {
+ if (flags & replyFlags[j].flag)
+ count++;
+ j++;
+ }
+
+ addReplySetLen(c, count);
+ j = 0;
+ while (replyFlags[j].name) {
+ if (flags & replyFlags[j].flag)
+ addReplyStatus(c, replyFlags[j].name);
+ j++;
+ }
+}
+
+void addReplyFlagsForCommand(client *c, struct redisCommand *cmd) {
+ replyFlagNames flagNames[] = {
+ {CMD_WRITE, "write"},
+ {CMD_READONLY, "readonly"},
+ {CMD_DENYOOM, "denyoom"},
+ {CMD_MODULE, "module"},
+ {CMD_ADMIN, "admin"},
+ {CMD_PUBSUB, "pubsub"},
+ {CMD_NOSCRIPT, "noscript"},
+ {CMD_BLOCKING, "blocking"},
+ {CMD_LOADING, "loading"},
+ {CMD_STALE, "stale"},
+ {CMD_SKIP_MONITOR, "skip_monitor"},
+ {CMD_SKIP_SLOWLOG, "skip_slowlog"},
+ {CMD_ASKING, "asking"},
+ {CMD_FAST, "fast"},
+ {CMD_NO_AUTH, "no_auth"},
+ /* {CMD_MAY_REPLICATE, "may_replicate"},, Hidden on purpose */
+ /* {CMD_SENTINEL, "sentinel"}, Hidden on purpose */
+ /* {CMD_ONLY_SENTINEL, "only_sentinel"}, Hidden on purpose */
+ {CMD_NO_MANDATORY_KEYS, "no_mandatory_keys"},
+ /* {CMD_PROTECTED, "protected"}, Hidden on purpose */
+ {CMD_NO_ASYNC_LOADING, "no_async_loading"},
+ {CMD_NO_MULTI, "no_multi"},
+ {CMD_MOVABLE_KEYS, "movablekeys"},
+ {CMD_ALLOW_BUSY, "allow_busy"},
+ /* {CMD_TOUCHES_ARBITRARY_KEYS, "TOUCHES_ARBITRARY_KEYS"}, Hidden on purpose */
+ {0,NULL}
+ };
+ addReplyCommandFlags(c, cmd->flags, flagNames);
+}
+
+void addReplyDocFlagsForCommand(client *c, struct redisCommand *cmd) {
+ replyFlagNames docFlagNames[] = {
+ {CMD_DOC_DEPRECATED, "deprecated"},
+ {CMD_DOC_SYSCMD, "syscmd"},
+ {0,NULL}
+ };
+ addReplyCommandFlags(c, cmd->doc_flags, docFlagNames);
+}
+
+void addReplyFlagsForKeyArgs(client *c, uint64_t flags) {
+ replyFlagNames docFlagNames[] = {
+ {CMD_KEY_RO, "RO"},
+ {CMD_KEY_RW, "RW"},
+ {CMD_KEY_OW, "OW"},
+ {CMD_KEY_RM, "RM"},
+ {CMD_KEY_ACCESS, "access"},
+ {CMD_KEY_UPDATE, "update"},
+ {CMD_KEY_INSERT, "insert"},
+ {CMD_KEY_DELETE, "delete"},
+ {CMD_KEY_NOT_KEY, "not_key"},
+ {CMD_KEY_INCOMPLETE, "incomplete"},
+ {CMD_KEY_VARIABLE_FLAGS, "variable_flags"},
+ {0,NULL}
+ };
+ addReplyCommandFlags(c, flags, docFlagNames);
+}
+
+/* Must match redisCommandArgType */
+const char *ARG_TYPE_STR[] = {
+ "string",
+ "integer",
+ "double",
+ "key",
+ "pattern",
+ "unix-time",
+ "pure-token",
+ "oneof",
+ "block",
+};
+
+void addReplyFlagsForArg(client *c, uint64_t flags) {
+ replyFlagNames argFlagNames[] = {
+ {CMD_ARG_OPTIONAL, "optional"},
+ {CMD_ARG_MULTIPLE, "multiple"},
+ {CMD_ARG_MULTIPLE_TOKEN, "multiple_token"},
+ {0,NULL}
+ };
+ addReplyCommandFlags(c, flags, argFlagNames);
+}
+
+void addReplyCommandArgList(client *c, struct redisCommandArg *args, int num_args) {
+ addReplyArrayLen(c, num_args);
+ for (int j = 0; j<num_args; j++) {
+ /* Count our reply len so we don't have to use deferred reply. */
+ int has_display_text = 1;
+ long maplen = 2;
+ if (args[j].key_spec_index != -1) maplen++;
+ if (args[j].token) maplen++;
+ if (args[j].summary) maplen++;
+ if (args[j].since) maplen++;
+ if (args[j].deprecated_since) maplen++;
+ if (args[j].flags) maplen++;
+ if (args[j].type == ARG_TYPE_ONEOF || args[j].type == ARG_TYPE_BLOCK) {
+ has_display_text = 0;
+ maplen++;
+ }
+ if (has_display_text) maplen++;
+ addReplyMapLen(c, maplen);
+
+ addReplyBulkCString(c, "name");
+ addReplyBulkCString(c, args[j].name);
+
+ addReplyBulkCString(c, "type");
+ addReplyBulkCString(c, ARG_TYPE_STR[args[j].type]);
+
+ if (has_display_text) {
+ addReplyBulkCString(c, "display_text");
+ addReplyBulkCString(c, args[j].display_text ? args[j].display_text : args[j].name);
+ }
+ if (args[j].key_spec_index != -1) {
+ addReplyBulkCString(c, "key_spec_index");
+ addReplyLongLong(c, args[j].key_spec_index);
+ }
+ if (args[j].token) {
+ addReplyBulkCString(c, "token");
+ addReplyBulkCString(c, args[j].token);
+ }
+ if (args[j].summary) {
+ addReplyBulkCString(c, "summary");
+ addReplyBulkCString(c, args[j].summary);
+ }
+ if (args[j].since) {
+ addReplyBulkCString(c, "since");
+ addReplyBulkCString(c, args[j].since);
+ }
+ if (args[j].deprecated_since) {
+ addReplyBulkCString(c, "deprecated_since");
+ addReplyBulkCString(c, args[j].deprecated_since);
+ }
+ if (args[j].flags) {
+ addReplyBulkCString(c, "flags");
+ addReplyFlagsForArg(c, args[j].flags);
+ }
+ if (args[j].type == ARG_TYPE_ONEOF || args[j].type == ARG_TYPE_BLOCK) {
+ addReplyBulkCString(c, "arguments");
+ addReplyCommandArgList(c, args[j].subargs, args[j].num_args);
+ }
+ }
+}
+
+#ifdef LOG_REQ_RES
+
+void addReplyJson(client *c, struct jsonObject *rs) {
+ addReplyMapLen(c, rs->length);
+
+ for (int i = 0; i < rs->length; i++) {
+ struct jsonObjectElement *curr = &rs->elements[i];
+ addReplyBulkCString(c, curr->key);
+ switch (curr->type) {
+ case (JSON_TYPE_BOOLEAN):
+ addReplyBool(c, curr->value.boolean);
+ break;
+ case (JSON_TYPE_INTEGER):
+ addReplyLongLong(c, curr->value.integer);
+ break;
+ case (JSON_TYPE_STRING):
+ addReplyBulkCString(c, curr->value.string);
+ break;
+ case (JSON_TYPE_OBJECT):
+ addReplyJson(c, curr->value.object);
+ break;
+ case (JSON_TYPE_ARRAY):
+ addReplyArrayLen(c, curr->value.array.length);
+ for (int k = 0; k < curr->value.array.length; k++) {
+ struct jsonObject *object = curr->value.array.objects[k];
+ addReplyJson(c, object);
+ }
+ break;
+ default:
+ serverPanic("Invalid JSON type %d", curr->type);
+ }
+ }
+}
+
+#endif
+
+void addReplyCommandHistory(client *c, struct redisCommand *cmd) {
+ addReplySetLen(c, cmd->num_history);
+ for (int j = 0; j<cmd->num_history; j++) {
+ addReplyArrayLen(c, 2);
+ addReplyBulkCString(c, cmd->history[j].since);
+ addReplyBulkCString(c, cmd->history[j].changes);
+ }
+}
+
+void addReplyCommandTips(client *c, struct redisCommand *cmd) {
+ addReplySetLen(c, cmd->num_tips);
+ for (int j = 0; j<cmd->num_tips; j++) {
+ addReplyBulkCString(c, cmd->tips[j]);
+ }
+}
+
+void addReplyCommandKeySpecs(client *c, struct redisCommand *cmd) {
+ addReplySetLen(c, cmd->key_specs_num);
+ for (int i = 0; i < cmd->key_specs_num; i++) {
+ int maplen = 3;
+ if (cmd->key_specs[i].notes) maplen++;
+
+ addReplyMapLen(c, maplen);
+
+ if (cmd->key_specs[i].notes) {
+ addReplyBulkCString(c, "notes");
+ addReplyBulkCString(c,cmd->key_specs[i].notes);
+ }
+
+ addReplyBulkCString(c, "flags");
+ addReplyFlagsForKeyArgs(c,cmd->key_specs[i].flags);
+
+ addReplyBulkCString(c, "begin_search");
+ switch (cmd->key_specs[i].begin_search_type) {
+ case KSPEC_BS_UNKNOWN:
+ addReplyMapLen(c, 2);
+ addReplyBulkCString(c, "type");
+ addReplyBulkCString(c, "unknown");
+
+ addReplyBulkCString(c, "spec");
+ addReplyMapLen(c, 0);
+ break;
+ case KSPEC_BS_INDEX:
+ addReplyMapLen(c, 2);
+ addReplyBulkCString(c, "type");
+ addReplyBulkCString(c, "index");
+
+ addReplyBulkCString(c, "spec");
+ addReplyMapLen(c, 1);
+ addReplyBulkCString(c, "index");
+ addReplyLongLong(c, cmd->key_specs[i].bs.index.pos);
+ break;
+ case KSPEC_BS_KEYWORD:
+ addReplyMapLen(c, 2);
+ addReplyBulkCString(c, "type");
+ addReplyBulkCString(c, "keyword");
+
+ addReplyBulkCString(c, "spec");
+ addReplyMapLen(c, 2);
+ addReplyBulkCString(c, "keyword");
+ addReplyBulkCString(c, cmd->key_specs[i].bs.keyword.keyword);
+ addReplyBulkCString(c, "startfrom");
+ addReplyLongLong(c, cmd->key_specs[i].bs.keyword.startfrom);
+ break;
+ default:
+ serverPanic("Invalid begin_search key spec type %d", cmd->key_specs[i].begin_search_type);
+ }
+
+ addReplyBulkCString(c, "find_keys");
+ switch (cmd->key_specs[i].find_keys_type) {
+ case KSPEC_FK_UNKNOWN:
+ addReplyMapLen(c, 2);
+ addReplyBulkCString(c, "type");
+ addReplyBulkCString(c, "unknown");
+
+ addReplyBulkCString(c, "spec");
+ addReplyMapLen(c, 0);
+ break;
+ case KSPEC_FK_RANGE:
+ addReplyMapLen(c, 2);
+ addReplyBulkCString(c, "type");
+ addReplyBulkCString(c, "range");
+
+ addReplyBulkCString(c, "spec");
+ addReplyMapLen(c, 3);
+ addReplyBulkCString(c, "lastkey");
+ addReplyLongLong(c, cmd->key_specs[i].fk.range.lastkey);
+ addReplyBulkCString(c, "keystep");
+ addReplyLongLong(c, cmd->key_specs[i].fk.range.keystep);
+ addReplyBulkCString(c, "limit");
+ addReplyLongLong(c, cmd->key_specs[i].fk.range.limit);
+ break;
+ case KSPEC_FK_KEYNUM:
+ addReplyMapLen(c, 2);
+ addReplyBulkCString(c, "type");
+ addReplyBulkCString(c, "keynum");
+
+ addReplyBulkCString(c, "spec");
+ addReplyMapLen(c, 3);
+ addReplyBulkCString(c, "keynumidx");
+ addReplyLongLong(c, cmd->key_specs[i].fk.keynum.keynumidx);
+ addReplyBulkCString(c, "firstkey");
+ addReplyLongLong(c, cmd->key_specs[i].fk.keynum.firstkey);
+ addReplyBulkCString(c, "keystep");
+ addReplyLongLong(c, cmd->key_specs[i].fk.keynum.keystep);
+ break;
+ default:
+ serverPanic("Invalid find_keys key spec type %d", cmd->key_specs[i].begin_search_type);
+ }
+ }
+}
+
+/* Reply with an array of sub-command using the provided reply callback. */
+void addReplyCommandSubCommands(client *c, struct redisCommand *cmd, void (*reply_function)(client*, struct redisCommand*), int use_map) {
+ if (!cmd->subcommands_dict) {
+ addReplySetLen(c, 0);
+ return;
+ }
+
+ if (use_map)
+ addReplyMapLen(c, dictSize(cmd->subcommands_dict));
+ else
+ addReplyArrayLen(c, dictSize(cmd->subcommands_dict));
+ dictEntry *de;
+ dictIterator *di = dictGetSafeIterator(cmd->subcommands_dict);
+ while((de = dictNext(di)) != NULL) {
+ struct redisCommand *sub = (struct redisCommand *)dictGetVal(de);
+ if (use_map)
+ addReplyBulkCBuffer(c, sub->fullname, sdslen(sub->fullname));
+ reply_function(c, sub);
+ }
+ dictReleaseIterator(di);
+}
+
+/* Output the representation of a Redis command. Used by the COMMAND command and COMMAND INFO. */
+void addReplyCommandInfo(client *c, struct redisCommand *cmd) {
+ if (!cmd) {
+ addReplyNull(c);
+ } else {
+ int firstkey = 0, lastkey = 0, keystep = 0;
+ if (cmd->legacy_range_key_spec.begin_search_type != KSPEC_BS_INVALID) {
+ firstkey = cmd->legacy_range_key_spec.bs.index.pos;
+ lastkey = cmd->legacy_range_key_spec.fk.range.lastkey;
+ if (lastkey >= 0)
+ lastkey += firstkey;
+ keystep = cmd->legacy_range_key_spec.fk.range.keystep;
+ }
+
+ addReplyArrayLen(c, 10);
+ addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
+ addReplyLongLong(c, cmd->arity);
+ addReplyFlagsForCommand(c, cmd);
+ addReplyLongLong(c, firstkey);
+ addReplyLongLong(c, lastkey);
+ addReplyLongLong(c, keystep);
+ addReplyCommandCategories(c, cmd);
+ addReplyCommandTips(c, cmd);
+ addReplyCommandKeySpecs(c, cmd);
+ addReplyCommandSubCommands(c, cmd, addReplyCommandInfo, 0);
+ }
+}
+
+/* Output the representation of a Redis command. Used by the COMMAND DOCS. */
+void addReplyCommandDocs(client *c, struct redisCommand *cmd) {
+ /* Count our reply len so we don't have to use deferred reply. */
+ long maplen = 1;
+ if (cmd->summary) maplen++;
+ if (cmd->since) maplen++;
+ if (cmd->flags & CMD_MODULE) maplen++;
+ if (cmd->complexity) maplen++;
+ if (cmd->doc_flags) maplen++;
+ if (cmd->deprecated_since) maplen++;
+ if (cmd->replaced_by) maplen++;
+ if (cmd->history) maplen++;
+#ifdef LOG_REQ_RES
+ if (cmd->reply_schema) maplen++;
+#endif
+ if (cmd->args) maplen++;
+ if (cmd->subcommands_dict) maplen++;
+ addReplyMapLen(c, maplen);
+
+ if (cmd->summary) {
+ addReplyBulkCString(c, "summary");
+ addReplyBulkCString(c, cmd->summary);
+ }
+ if (cmd->since) {
+ addReplyBulkCString(c, "since");
+ addReplyBulkCString(c, cmd->since);
+ }
+
+ /* Always have the group, for module commands the group is always "module". */
+ addReplyBulkCString(c, "group");
+ addReplyBulkCString(c, commandGroupStr(cmd->group));
+
+ if (cmd->complexity) {
+ addReplyBulkCString(c, "complexity");
+ addReplyBulkCString(c, cmd->complexity);
+ }
+ if (cmd->flags & CMD_MODULE) {
+ addReplyBulkCString(c, "module");
+ addReplyBulkCString(c, moduleNameFromCommand(cmd));
+ }
+ if (cmd->doc_flags) {
+ addReplyBulkCString(c, "doc_flags");
+ addReplyDocFlagsForCommand(c, cmd);
+ }
+ if (cmd->deprecated_since) {
+ addReplyBulkCString(c, "deprecated_since");
+ addReplyBulkCString(c, cmd->deprecated_since);
+ }
+ if (cmd->replaced_by) {
+ addReplyBulkCString(c, "replaced_by");
+ addReplyBulkCString(c, cmd->replaced_by);
+ }
+ if (cmd->history) {
+ addReplyBulkCString(c, "history");
+ addReplyCommandHistory(c, cmd);
+ }
+#ifdef LOG_REQ_RES
+ if (cmd->reply_schema) {
+ addReplyBulkCString(c, "reply_schema");
+ addReplyJson(c, cmd->reply_schema);
+ }
+#endif
+ if (cmd->args) {
+ addReplyBulkCString(c, "arguments");
+ addReplyCommandArgList(c, cmd->args, cmd->num_args);
+ }
+ if (cmd->subcommands_dict) {
+ addReplyBulkCString(c, "subcommands");
+ addReplyCommandSubCommands(c, cmd, addReplyCommandDocs, 1);
+ }
+}
+
+/* Helper for COMMAND GETKEYS and GETKEYSANDFLAGS */
+void getKeysSubcommandImpl(client *c, int with_flags) {
+ struct redisCommand *cmd = lookupCommand(c->argv+2,c->argc-2);
+ getKeysResult result = GETKEYS_RESULT_INIT;
+ int j;
+
+ if (!cmd) {
+ addReplyError(c,"Invalid command specified");
+ return;
+ } else if (!doesCommandHaveKeys(cmd)) {
+ addReplyError(c,"The command has no key arguments");
+ return;
+ } else if ((cmd->arity > 0 && cmd->arity != c->argc-2) ||
+ ((c->argc-2) < -cmd->arity))
+ {
+ addReplyError(c,"Invalid number of arguments specified for command");
+ return;
+ }
+
+ if (!getKeysFromCommandWithSpecs(cmd,c->argv+2,c->argc-2,GET_KEYSPEC_DEFAULT,&result)) {
+ if (cmd->flags & CMD_NO_MANDATORY_KEYS) {
+ addReplyArrayLen(c,0);
+ } else {
+ addReplyError(c,"Invalid arguments specified for command");
+ }
+ } else {
+ addReplyArrayLen(c,result.numkeys);
+ for (j = 0; j < result.numkeys; j++) {
+ if (!with_flags) {
+ addReplyBulk(c,c->argv[result.keys[j].pos+2]);
+ } else {
+ addReplyArrayLen(c,2);
+ addReplyBulk(c,c->argv[result.keys[j].pos+2]);
+ addReplyFlagsForKeyArgs(c,result.keys[j].flags);
+ }
+ }
+ }
+ getKeysFreeResult(&result);
+}
+
+/* COMMAND GETKEYSANDFLAGS cmd arg1 arg2 ... */
+void commandGetKeysAndFlagsCommand(client *c) {
+ getKeysSubcommandImpl(c, 1);
+}
+
+/* COMMAND GETKEYS cmd arg1 arg2 ... */
+void getKeysSubcommand(client *c) {
+ getKeysSubcommandImpl(c, 0);
+}
+
+/* COMMAND (no args) */
+void commandCommand(client *c) {
+ dictIterator *di;
+ dictEntry *de;
+
+ addReplyArrayLen(c, dictSize(server.commands));
+ di = dictGetIterator(server.commands);
+ while ((de = dictNext(di)) != NULL) {
+ addReplyCommandInfo(c, dictGetVal(de));
+ }
+ dictReleaseIterator(di);
+}
+
+/* COMMAND COUNT */
+void commandCountCommand(client *c) {
+ addReplyLongLong(c, dictSize(server.commands));
+}
+
+typedef enum {
+ COMMAND_LIST_FILTER_MODULE,
+ COMMAND_LIST_FILTER_ACLCAT,
+ COMMAND_LIST_FILTER_PATTERN,
+} commandListFilterType;
+
+typedef struct {
+ commandListFilterType type;
+ sds arg;
+ struct {
+ int valid;
+ union {
+ uint64_t aclcat;
+ void *module_handle;
+ } u;
+ } cache;
+} commandListFilter;
+
+int shouldFilterFromCommandList(struct redisCommand *cmd, commandListFilter *filter) {
+ switch (filter->type) {
+ case (COMMAND_LIST_FILTER_MODULE):
+ if (!filter->cache.valid) {
+ filter->cache.u.module_handle = moduleGetHandleByName(filter->arg);
+ filter->cache.valid = 1;
+ }
+ return !moduleIsModuleCommand(filter->cache.u.module_handle, cmd);
+ case (COMMAND_LIST_FILTER_ACLCAT): {
+ if (!filter->cache.valid) {
+ filter->cache.u.aclcat = ACLGetCommandCategoryFlagByName(filter->arg);
+ filter->cache.valid = 1;
+ }
+ uint64_t cat = filter->cache.u.aclcat;
+ if (cat == 0)
+ return 1; /* Invalid ACL category */
+ return (!(cmd->acl_categories & cat));
+ break;
+ }
+ case (COMMAND_LIST_FILTER_PATTERN):
+ return !stringmatchlen(filter->arg, sdslen(filter->arg), cmd->fullname, sdslen(cmd->fullname), 1);
+ default:
+ serverPanic("Invalid filter type %d", filter->type);
+ }
+}
+
+/* COMMAND LIST FILTERBY (MODULE <module-name>|ACLCAT <cat>|PATTERN <pattern>) */
+void commandListWithFilter(client *c, dict *commands, commandListFilter filter, int *numcmds) {
+ dictEntry *de;
+ dictIterator *di = dictGetIterator(commands);
+
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *cmd = dictGetVal(de);
+ if (!shouldFilterFromCommandList(cmd,&filter)) {
+ addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
+ (*numcmds)++;
+ }
+
+ if (cmd->subcommands_dict) {
+ commandListWithFilter(c, cmd->subcommands_dict, filter, numcmds);
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* COMMAND LIST */
+void commandListWithoutFilter(client *c, dict *commands, int *numcmds) {
+ dictEntry *de;
+ dictIterator *di = dictGetIterator(commands);
+
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *cmd = dictGetVal(de);
+ addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
+ (*numcmds)++;
+
+ if (cmd->subcommands_dict) {
+ commandListWithoutFilter(c, cmd->subcommands_dict, numcmds);
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* COMMAND LIST [FILTERBY (MODULE <module-name>|ACLCAT <cat>|PATTERN <pattern>)] */
+void commandListCommand(client *c) {
+
+ /* Parse options. */
+ int i = 2, got_filter = 0;
+ commandListFilter filter = {0};
+ for (; i < c->argc; i++) {
+ int moreargs = (c->argc-1) - i; /* Number of additional arguments. */
+ char *opt = c->argv[i]->ptr;
+ if (!strcasecmp(opt,"filterby") && moreargs == 2) {
+ char *filtertype = c->argv[i+1]->ptr;
+ if (!strcasecmp(filtertype,"module")) {
+ filter.type = COMMAND_LIST_FILTER_MODULE;
+ } else if (!strcasecmp(filtertype,"aclcat")) {
+ filter.type = COMMAND_LIST_FILTER_ACLCAT;
+ } else if (!strcasecmp(filtertype,"pattern")) {
+ filter.type = COMMAND_LIST_FILTER_PATTERN;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ got_filter = 1;
+ filter.arg = c->argv[i+2]->ptr;
+ i += 2;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ int numcmds = 0;
+ void *replylen = addReplyDeferredLen(c);
+
+ if (got_filter) {
+ commandListWithFilter(c, server.commands, filter, &numcmds);
+ } else {
+ commandListWithoutFilter(c, server.commands, &numcmds);
+ }
+
+ setDeferredArrayLen(c,replylen,numcmds);
+}
+
+/* COMMAND INFO [<command-name> ...] */
+void commandInfoCommand(client *c) {
+ int i;
+
+ if (c->argc == 2) {
+ dictIterator *di;
+ dictEntry *de;
+ addReplyArrayLen(c, dictSize(server.commands));
+ di = dictGetIterator(server.commands);
+ while ((de = dictNext(di)) != NULL) {
+ addReplyCommandInfo(c, dictGetVal(de));
+ }
+ dictReleaseIterator(di);
+ } else {
+ addReplyArrayLen(c, c->argc-2);
+ for (i = 2; i < c->argc; i++) {
+ addReplyCommandInfo(c, lookupCommandBySds(c->argv[i]->ptr));
+ }
+ }
+}
+
+/* COMMAND DOCS [command-name [command-name ...]] */
+void commandDocsCommand(client *c) {
+ int i;
+ if (c->argc == 2) {
+ /* Reply with an array of all commands */
+ dictIterator *di;
+ dictEntry *de;
+ addReplyMapLen(c, dictSize(server.commands));
+ di = dictGetIterator(server.commands);
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *cmd = dictGetVal(de);
+ addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
+ addReplyCommandDocs(c, cmd);
+ }
+ dictReleaseIterator(di);
+ } else {
+ /* Reply with an array of the requested commands (if we find them) */
+ int numcmds = 0;
+ void *replylen = addReplyDeferredLen(c);
+ for (i = 2; i < c->argc; i++) {
+ struct redisCommand *cmd = lookupCommandBySds(c->argv[i]->ptr);
+ if (!cmd)
+ continue;
+ addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
+ addReplyCommandDocs(c, cmd);
+ numcmds++;
+ }
+ setDeferredMapLen(c,replylen,numcmds);
+ }
+}
+
+/* COMMAND GETKEYS arg0 arg1 arg2 ... */
+void commandGetKeysCommand(client *c) {
+ getKeysSubcommand(c);
+}
+
+/* COMMAND HELP */
+void commandHelpCommand(client *c) {
+ const char *help[] = {
+"(no subcommand)",
+" Return details about all Redis commands.",
+"COUNT",
+" Return the total number of commands in this Redis server.",
+"LIST",
+" Return a list of all commands in this Redis server.",
+"INFO [<command-name> ...]",
+" Return details about multiple Redis commands.",
+" If no command names are given, documentation details for all",
+" commands are returned.",
+"DOCS [<command-name> ...]",
+" Return documentation details about multiple Redis commands.",
+" If no command names are given, documentation details for all",
+" commands are returned.",
+"GETKEYS <full-command>",
+" Return the keys from a full Redis command.",
+"GETKEYSANDFLAGS <full-command>",
+" Return the keys and the access flags from a full Redis command.",
+NULL
+ };
+
+ addReplyHelp(c, help);
+}
+
+/* Convert an amount of bytes into a human readable string in the form
+ * of 100B, 2G, 100M, 4K, and so forth. */
+void bytesToHuman(char *s, size_t size, unsigned long long n) {
+ double d;
+
+ if (n < 1024) {
+ /* Bytes */
+ snprintf(s,size,"%lluB",n);
+ } else if (n < (1024*1024)) {
+ d = (double)n/(1024);
+ snprintf(s,size,"%.2fK",d);
+ } else if (n < (1024LL*1024*1024)) {
+ d = (double)n/(1024*1024);
+ snprintf(s,size,"%.2fM",d);
+ } else if (n < (1024LL*1024*1024*1024)) {
+ d = (double)n/(1024LL*1024*1024);
+ snprintf(s,size,"%.2fG",d);
+ } else if (n < (1024LL*1024*1024*1024*1024)) {
+ d = (double)n/(1024LL*1024*1024*1024);
+ snprintf(s,size,"%.2fT",d);
+ } else if (n < (1024LL*1024*1024*1024*1024*1024)) {
+ d = (double)n/(1024LL*1024*1024*1024*1024);
+ snprintf(s,size,"%.2fP",d);
+ } else {
+ /* Let's hope we never need this */
+ snprintf(s,size,"%lluB",n);
+ }
+}
+
+/* Fill percentile distribution of latencies. */
+sds fillPercentileDistributionLatencies(sds info, const char* histogram_name, struct hdr_histogram* histogram) {
+ info = sdscatfmt(info,"latency_percentiles_usec_%s:",histogram_name);
+ for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+ char fbuf[128];
+ size_t len = snprintf(fbuf, sizeof(fbuf), "%f", server.latency_tracking_info_percentiles[j]);
+ trimDoubleString(fbuf, len);
+ info = sdscatprintf(info,"p%s=%.3f", fbuf,
+ ((double)hdr_value_at_percentile(histogram,server.latency_tracking_info_percentiles[j]))/1000.0f);
+ if (j != server.latency_tracking_info_percentiles_len-1)
+ info = sdscatlen(info,",",1);
+ }
+ info = sdscatprintf(info,"\r\n");
+ return info;
+}
+
+const char *replstateToString(int replstate) {
+ switch (replstate) {
+ case SLAVE_STATE_WAIT_BGSAVE_START:
+ case SLAVE_STATE_WAIT_BGSAVE_END:
+ return "wait_bgsave";
+ case SLAVE_STATE_SEND_BULK:
+ return "send_bulk";
+ case SLAVE_STATE_ONLINE:
+ return "online";
+ default:
+ return "";
+ }
+}
+
+/* Characters we sanitize on INFO output to maintain expected format. */
+static char unsafe_info_chars[] = "#:\n\r";
+static char unsafe_info_chars_substs[] = "____"; /* Must be same length as above */
+
+/* Returns a sanitized version of s that contains no unsafe info string chars.
+ * If no unsafe characters are found, simply returns s. Caller needs to
+ * free tmp if it is non-null on return.
+ */
+const char *getSafeInfoString(const char *s, size_t len, char **tmp) {
+ *tmp = NULL;
+ if (mempbrk(s, len, unsafe_info_chars,sizeof(unsafe_info_chars)-1)
+ == NULL) return s;
+ char *new = *tmp = zmalloc(len + 1);
+ memcpy(new, s, len);
+ new[len] = '\0';
+ return memmapchars(new, len, unsafe_info_chars, unsafe_info_chars_substs,
+ sizeof(unsafe_info_chars)-1);
+}
+
+sds genRedisInfoStringCommandStats(sds info, dict *commands) {
+ struct redisCommand *c;
+ dictEntry *de;
+ dictIterator *di;
+ di = dictGetSafeIterator(commands);
+ while((de = dictNext(di)) != NULL) {
+ char *tmpsafe;
+ c = (struct redisCommand *) dictGetVal(de);
+ if (c->calls || c->failed_calls || c->rejected_calls) {
+ info = sdscatprintf(info,
+ "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f"
+ ",rejected_calls=%lld,failed_calls=%lld\r\n",
+ getSafeInfoString(c->fullname, sdslen(c->fullname), &tmpsafe), c->calls, c->microseconds,
+ (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls),
+ c->rejected_calls, c->failed_calls);
+ if (tmpsafe != NULL) zfree(tmpsafe);
+ }
+ if (c->subcommands_dict) {
+ info = genRedisInfoStringCommandStats(info, c->subcommands_dict);
+ }
+ }
+ dictReleaseIterator(di);
+
+ return info;
+}
+
+/* Writes the ACL metrics to the info */
+sds genRedisInfoStringACLStats(sds info) {
+ info = sdscatprintf(info,
+ "acl_access_denied_auth:%lld\r\n"
+ "acl_access_denied_cmd:%lld\r\n"
+ "acl_access_denied_key:%lld\r\n"
+ "acl_access_denied_channel:%lld\r\n",
+ server.acl_info.user_auth_failures,
+ server.acl_info.invalid_cmd_accesses,
+ server.acl_info.invalid_key_accesses,
+ server.acl_info.invalid_channel_accesses);
+ return info;
+}
+
+sds genRedisInfoStringLatencyStats(sds info, dict *commands) {
+ struct redisCommand *c;
+ dictEntry *de;
+ dictIterator *di;
+ di = dictGetSafeIterator(commands);
+ while((de = dictNext(di)) != NULL) {
+ char *tmpsafe;
+ c = (struct redisCommand *) dictGetVal(de);
+ if (c->latency_histogram) {
+ info = fillPercentileDistributionLatencies(info,
+ getSafeInfoString(c->fullname, sdslen(c->fullname), &tmpsafe),
+ c->latency_histogram);
+ if (tmpsafe != NULL) zfree(tmpsafe);
+ }
+ if (c->subcommands_dict) {
+ info = genRedisInfoStringLatencyStats(info, c->subcommands_dict);
+ }
+ }
+ dictReleaseIterator(di);
+
+ return info;
+}
+
+/* Takes a null terminated sections list, and adds them to the dict. */
+void addInfoSectionsToDict(dict *section_dict, char **sections) {
+ while (*sections) {
+ sds section = sdsnew(*sections);
+ if (dictAdd(section_dict, section, NULL)==DICT_ERR)
+ sdsfree(section);
+ sections++;
+ }
+}
+
+/* Cached copy of the default sections, as an optimization. */
+static dict *cached_default_info_sections = NULL;
+
+void releaseInfoSectionDict(dict *sec) {
+ if (sec != cached_default_info_sections)
+ dictRelease(sec);
+}
+
+/* Create a dictionary with unique section names to be used by genRedisInfoString.
+ * 'argv' and 'argc' are list of arguments for INFO.
+ * 'defaults' is an optional null terminated list of default sections.
+ * 'out_all' and 'out_everything' are optional.
+ * The resulting dictionary should be released with releaseInfoSectionDict. */
+dict *genInfoSectionDict(robj **argv, int argc, char **defaults, int *out_all, int *out_everything) {
+ char *default_sections[] = {
+ "server", "clients", "memory", "persistence", "stats", "replication",
+ "cpu", "module_list", "errorstats", "cluster", "keyspace", NULL};
+ if (!defaults)
+ defaults = default_sections;
+
+ if (argc == 0) {
+ /* In this case we know the dict is not gonna be modified, so we cache
+ * it as an optimization for a common case. */
+ if (cached_default_info_sections)
+ return cached_default_info_sections;
+ cached_default_info_sections = dictCreate(&stringSetDictType);
+ dictExpand(cached_default_info_sections, 16);
+ addInfoSectionsToDict(cached_default_info_sections, defaults);
+ return cached_default_info_sections;
+ }
+
+ dict *section_dict = dictCreate(&stringSetDictType);
+ dictExpand(section_dict, min(argc,16));
+ for (int i = 0; i < argc; i++) {
+ if (!strcasecmp(argv[i]->ptr,"default")) {
+ addInfoSectionsToDict(section_dict, defaults);
+ } else if (!strcasecmp(argv[i]->ptr,"all")) {
+ if (out_all) *out_all = 1;
+ } else if (!strcasecmp(argv[i]->ptr,"everything")) {
+ if (out_everything) *out_everything = 1;
+ if (out_all) *out_all = 1;
+ } else {
+ sds section = sdsnew(argv[i]->ptr);
+ if (dictAdd(section_dict, section, NULL) != DICT_OK)
+ sdsfree(section);
+ }
+ }
+ return section_dict;
+}
+
+/* Create the string returned by the INFO command. This is decoupled
+ * by the INFO command itself as we need to report the same information
+ * on memory corruption problems. */
+sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
+ sds info = sdsempty();
+ time_t uptime = server.unixtime-server.stat_starttime;
+ int j;
+ int sections = 0;
+ if (everything) all_sections = 1;
+
+ /* Server */
+ if (all_sections || (dictFind(section_dict,"server") != NULL)) {
+ static int call_uname = 1;
+ static struct utsname name;
+ char *mode;
+ char *supervised;
+
+ if (server.cluster_enabled) mode = "cluster";
+ else if (server.sentinel_mode) mode = "sentinel";
+ else mode = "standalone";
+
+ if (server.supervised) {
+ if (server.supervised_mode == SUPERVISED_UPSTART) supervised = "upstart";
+ else if (server.supervised_mode == SUPERVISED_SYSTEMD) supervised = "systemd";
+ else supervised = "unknown";
+ } else {
+ supervised = "no";
+ }
+
+ if (sections++) info = sdscat(info,"\r\n");
+
+ if (call_uname) {
+ /* Uname can be slow and is always the same output. Cache it. */
+ uname(&name);
+ call_uname = 0;
+ }
+
+ info = sdscatfmt(info,
+ "# Server\r\n"
+ "redis_version:%s\r\n"
+ "redis_git_sha1:%s\r\n"
+ "redis_git_dirty:%i\r\n"
+ "redis_build_id:%s\r\n"
+ "redis_mode:%s\r\n"
+ "os:%s %s %s\r\n"
+ "arch_bits:%i\r\n"
+ "monotonic_clock:%s\r\n"
+ "multiplexing_api:%s\r\n"
+ "atomicvar_api:%s\r\n"
+ "gcc_version:%i.%i.%i\r\n"
+ "process_id:%I\r\n"
+ "process_supervised:%s\r\n"
+ "run_id:%s\r\n"
+ "tcp_port:%i\r\n"
+ "server_time_usec:%I\r\n"
+ "uptime_in_seconds:%I\r\n"
+ "uptime_in_days:%I\r\n"
+ "hz:%i\r\n"
+ "configured_hz:%i\r\n"
+ "lru_clock:%u\r\n"
+ "executable:%s\r\n"
+ "config_file:%s\r\n"
+ "io_threads_active:%i\r\n",
+ REDIS_VERSION,
+ redisGitSHA1(),
+ strtol(redisGitDirty(),NULL,10) > 0,
+ redisBuildIdString(),
+ mode,
+ name.sysname, name.release, name.machine,
+ server.arch_bits,
+ monotonicInfoString(),
+ aeGetApiName(),
+ REDIS_ATOMIC_API,
+#ifdef __GNUC__
+ __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
+#else
+ 0,0,0,
+#endif
+ (int64_t) getpid(),
+ supervised,
+ server.runid,
+ server.port ? server.port : server.tls_port,
+ (int64_t)server.ustime,
+ (int64_t)uptime,
+ (int64_t)(uptime/(3600*24)),
+ server.hz,
+ server.config_hz,
+ server.lruclock,
+ server.executable ? server.executable : "",
+ server.configfile ? server.configfile : "",
+ server.io_threads_active);
+
+ /* Conditional properties */
+ if (isShutdownInitiated()) {
+ info = sdscatfmt(info,
+ "shutdown_in_milliseconds:%I\r\n",
+ (int64_t)(server.shutdown_mstime - commandTimeSnapshot()));
+ }
+
+ /* get all the listeners information */
+ info = getListensInfoString(info);
+ }
+
+ /* Clients */
+ if (all_sections || (dictFind(section_dict,"clients") != NULL)) {
+ size_t maxin, maxout;
+ unsigned long blocking_keys, blocking_keys_on_nokey;
+ getExpansiveClientsInfo(&maxin,&maxout);
+ totalNumberOfBlockingKeys(&blocking_keys, &blocking_keys_on_nokey);
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info,
+ "# Clients\r\n"
+ "connected_clients:%lu\r\n"
+ "cluster_connections:%lu\r\n"
+ "maxclients:%u\r\n"
+ "client_recent_max_input_buffer:%zu\r\n"
+ "client_recent_max_output_buffer:%zu\r\n"
+ "blocked_clients:%d\r\n"
+ "tracking_clients:%d\r\n"
+ "clients_in_timeout_table:%llu\r\n"
+ "total_blocking_keys:%lu\r\n"
+ "total_blocking_keys_on_nokey:%lu\r\n",
+ listLength(server.clients)-listLength(server.slaves),
+ getClusterConnectionsCount(),
+ server.maxclients,
+ maxin, maxout,
+ server.blocked_clients,
+ server.tracking_clients,
+ (unsigned long long) raxSize(server.clients_timeout_table),
+ blocking_keys,
+ blocking_keys_on_nokey);
+ }
+
+ /* Memory */
+ if (all_sections || (dictFind(section_dict,"memory") != NULL)) {
+ char hmem[64];
+ char peak_hmem[64];
+ char total_system_hmem[64];
+ char used_memory_lua_hmem[64];
+ char used_memory_vm_total_hmem[64];
+ char used_memory_scripts_hmem[64];
+ char used_memory_rss_hmem[64];
+ char maxmemory_hmem[64];
+ size_t zmalloc_used = zmalloc_used_memory();
+ size_t total_system_mem = server.system_memory_size;
+ const char *evict_policy = evictPolicyToString();
+ long long memory_lua = evalMemory();
+ long long memory_functions = functionsMemory();
+ struct redisMemOverhead *mh = getMemoryOverheadData();
+
+ /* Peak memory is updated from time to time by serverCron() so it
+ * may happen that the instantaneous value is slightly bigger than
+ * the peak value. This may confuse users, so we update the peak
+ * if found smaller than the current memory usage. */
+ if (zmalloc_used > server.stat_peak_memory)
+ server.stat_peak_memory = zmalloc_used;
+
+ bytesToHuman(hmem,sizeof(hmem),zmalloc_used);
+ bytesToHuman(peak_hmem,sizeof(peak_hmem),server.stat_peak_memory);
+ bytesToHuman(total_system_hmem,sizeof(total_system_hmem),total_system_mem);
+ bytesToHuman(used_memory_lua_hmem,sizeof(used_memory_lua_hmem),memory_lua);
+ bytesToHuman(used_memory_vm_total_hmem,sizeof(used_memory_vm_total_hmem),memory_functions + memory_lua);
+ bytesToHuman(used_memory_scripts_hmem,sizeof(used_memory_scripts_hmem),mh->lua_caches + mh->functions_caches);
+ bytesToHuman(used_memory_rss_hmem,sizeof(used_memory_rss_hmem),server.cron_malloc_stats.process_rss);
+ bytesToHuman(maxmemory_hmem,sizeof(maxmemory_hmem),server.maxmemory);
+
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info,
+ "# Memory\r\n"
+ "used_memory:%zu\r\n"
+ "used_memory_human:%s\r\n"
+ "used_memory_rss:%zu\r\n"
+ "used_memory_rss_human:%s\r\n"
+ "used_memory_peak:%zu\r\n"
+ "used_memory_peak_human:%s\r\n"
+ "used_memory_peak_perc:%.2f%%\r\n"
+ "used_memory_overhead:%zu\r\n"
+ "used_memory_startup:%zu\r\n"
+ "used_memory_dataset:%zu\r\n"
+ "used_memory_dataset_perc:%.2f%%\r\n"
+ "allocator_allocated:%zu\r\n"
+ "allocator_active:%zu\r\n"
+ "allocator_resident:%zu\r\n"
+ "total_system_memory:%lu\r\n"
+ "total_system_memory_human:%s\r\n"
+ "used_memory_lua:%lld\r\n" /* deprecated, renamed to used_memory_vm_eval */
+ "used_memory_vm_eval:%lld\r\n"
+ "used_memory_lua_human:%s\r\n" /* deprecated */
+ "used_memory_scripts_eval:%lld\r\n"
+ "number_of_cached_scripts:%lu\r\n"
+ "number_of_functions:%lu\r\n"
+ "number_of_libraries:%lu\r\n"
+ "used_memory_vm_functions:%lld\r\n"
+ "used_memory_vm_total:%lld\r\n"
+ "used_memory_vm_total_human:%s\r\n"
+ "used_memory_functions:%lld\r\n"
+ "used_memory_scripts:%lld\r\n"
+ "used_memory_scripts_human:%s\r\n"
+ "maxmemory:%lld\r\n"
+ "maxmemory_human:%s\r\n"
+ "maxmemory_policy:%s\r\n"
+ "allocator_frag_ratio:%.2f\r\n"
+ "allocator_frag_bytes:%zu\r\n"
+ "allocator_rss_ratio:%.2f\r\n"
+ "allocator_rss_bytes:%zd\r\n"
+ "rss_overhead_ratio:%.2f\r\n"
+ "rss_overhead_bytes:%zd\r\n"
+ "mem_fragmentation_ratio:%.2f\r\n"
+ "mem_fragmentation_bytes:%zd\r\n"
+ "mem_not_counted_for_evict:%zu\r\n"
+ "mem_replication_backlog:%zu\r\n"
+ "mem_total_replication_buffers:%zu\r\n"
+ "mem_clients_slaves:%zu\r\n"
+ "mem_clients_normal:%zu\r\n"
+ "mem_cluster_links:%zu\r\n"
+ "mem_aof_buffer:%zu\r\n"
+ "mem_allocator:%s\r\n"
+ "active_defrag_running:%d\r\n"
+ "lazyfree_pending_objects:%zu\r\n"
+ "lazyfreed_objects:%zu\r\n",
+ zmalloc_used,
+ hmem,
+ server.cron_malloc_stats.process_rss,
+ used_memory_rss_hmem,
+ server.stat_peak_memory,
+ peak_hmem,
+ mh->peak_perc,
+ mh->overhead_total,
+ mh->startup_allocated,
+ mh->dataset,
+ mh->dataset_perc,
+ server.cron_malloc_stats.allocator_allocated,
+ server.cron_malloc_stats.allocator_active,
+ server.cron_malloc_stats.allocator_resident,
+ (unsigned long)total_system_mem,
+ total_system_hmem,
+ memory_lua,
+ memory_lua,
+ used_memory_lua_hmem,
+ (long long) mh->lua_caches,
+ dictSize(evalScriptsDict()),
+ functionsNum(),
+ functionsLibNum(),
+ memory_functions,
+ memory_functions + memory_lua,
+ used_memory_vm_total_hmem,
+ (long long) mh->functions_caches,
+ (long long) mh->lua_caches + (long long) mh->functions_caches,
+ used_memory_scripts_hmem,
+ server.maxmemory,
+ maxmemory_hmem,
+ evict_policy,
+ mh->allocator_frag,
+ mh->allocator_frag_bytes,
+ mh->allocator_rss,
+ mh->allocator_rss_bytes,
+ mh->rss_extra,
+ mh->rss_extra_bytes,
+ mh->total_frag, /* This is the total RSS overhead, including
+ fragmentation, but not just it. This field
+ (and the next one) is named like that just
+ for backward compatibility. */
+ mh->total_frag_bytes,
+ freeMemoryGetNotCountedMemory(),
+ mh->repl_backlog,
+ server.repl_buffer_mem,
+ mh->clients_slaves,
+ mh->clients_normal,
+ mh->cluster_links,
+ mh->aof_buffer,
+ ZMALLOC_LIB,
+ server.active_defrag_running,
+ lazyfreeGetPendingObjectsCount(),
+ lazyfreeGetFreedObjectsCount()
+ );
+ freeMemoryOverheadData(mh);
+ }
+
+ /* Persistence */
+ if (all_sections || (dictFind(section_dict,"persistence") != NULL)) {
+ if (sections++) info = sdscat(info,"\r\n");
+ double fork_perc = 0;
+ if (server.stat_module_progress) {
+ fork_perc = server.stat_module_progress * 100;
+ } else if (server.stat_current_save_keys_total) {
+ fork_perc = ((double)server.stat_current_save_keys_processed / server.stat_current_save_keys_total) * 100;
+ }
+ int aof_bio_fsync_status;
+ atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status);
+
+ info = sdscatprintf(info,
+ "# Persistence\r\n"
+ "loading:%d\r\n"
+ "async_loading:%d\r\n"
+ "current_cow_peak:%zu\r\n"
+ "current_cow_size:%zu\r\n"
+ "current_cow_size_age:%lu\r\n"
+ "current_fork_perc:%.2f\r\n"
+ "current_save_keys_processed:%zu\r\n"
+ "current_save_keys_total:%zu\r\n"
+ "rdb_changes_since_last_save:%lld\r\n"
+ "rdb_bgsave_in_progress:%d\r\n"
+ "rdb_last_save_time:%jd\r\n"
+ "rdb_last_bgsave_status:%s\r\n"
+ "rdb_last_bgsave_time_sec:%jd\r\n"
+ "rdb_current_bgsave_time_sec:%jd\r\n"
+ "rdb_saves:%lld\r\n"
+ "rdb_last_cow_size:%zu\r\n"
+ "rdb_last_load_keys_expired:%lld\r\n"
+ "rdb_last_load_keys_loaded:%lld\r\n"
+ "aof_enabled:%d\r\n"
+ "aof_rewrite_in_progress:%d\r\n"
+ "aof_rewrite_scheduled:%d\r\n"
+ "aof_last_rewrite_time_sec:%jd\r\n"
+ "aof_current_rewrite_time_sec:%jd\r\n"
+ "aof_last_bgrewrite_status:%s\r\n"
+ "aof_rewrites:%lld\r\n"
+ "aof_rewrites_consecutive_failures:%lld\r\n"
+ "aof_last_write_status:%s\r\n"
+ "aof_last_cow_size:%zu\r\n"
+ "module_fork_in_progress:%d\r\n"
+ "module_fork_last_cow_size:%zu\r\n",
+ (int)(server.loading && !server.async_loading),
+ (int)server.async_loading,
+ server.stat_current_cow_peak,
+ server.stat_current_cow_bytes,
+ server.stat_current_cow_updated ? (unsigned long) elapsedMs(server.stat_current_cow_updated) / 1000 : 0,
+ fork_perc,
+ server.stat_current_save_keys_processed,
+ server.stat_current_save_keys_total,
+ server.dirty,
+ server.child_type == CHILD_TYPE_RDB,
+ (intmax_t)server.lastsave,
+ (server.lastbgsave_status == C_OK) ? "ok" : "err",
+ (intmax_t)server.rdb_save_time_last,
+ (intmax_t)((server.child_type != CHILD_TYPE_RDB) ?
+ -1 : time(NULL)-server.rdb_save_time_start),
+ server.stat_rdb_saves,
+ server.stat_rdb_cow_bytes,
+ server.rdb_last_load_keys_expired,
+ server.rdb_last_load_keys_loaded,
+ server.aof_state != AOF_OFF,
+ server.child_type == CHILD_TYPE_AOF,
+ server.aof_rewrite_scheduled,
+ (intmax_t)server.aof_rewrite_time_last,
+ (intmax_t)((server.child_type != CHILD_TYPE_AOF) ?
+ -1 : time(NULL)-server.aof_rewrite_time_start),
+ (server.aof_lastbgrewrite_status == C_OK) ? "ok" : "err",
+ server.stat_aof_rewrites,
+ server.stat_aofrw_consecutive_failures,
+ (server.aof_last_write_status == C_OK &&
+ aof_bio_fsync_status == C_OK) ? "ok" : "err",
+ server.stat_aof_cow_bytes,
+ server.child_type == CHILD_TYPE_MODULE,
+ server.stat_module_cow_bytes);
+
+ if (server.aof_enabled) {
+ info = sdscatprintf(info,
+ "aof_current_size:%lld\r\n"
+ "aof_base_size:%lld\r\n"
+ "aof_pending_rewrite:%d\r\n"
+ "aof_buffer_length:%zu\r\n"
+ "aof_pending_bio_fsync:%lu\r\n"
+ "aof_delayed_fsync:%lu\r\n",
+ (long long) server.aof_current_size,
+ (long long) server.aof_rewrite_base_size,
+ server.aof_rewrite_scheduled,
+ sdslen(server.aof_buf),
+ bioPendingJobsOfType(BIO_AOF_FSYNC),
+ server.aof_delayed_fsync);
+ }
+
+ if (server.loading) {
+ double perc = 0;
+ time_t eta, elapsed;
+ off_t remaining_bytes = 1;
+
+ if (server.loading_total_bytes) {
+ perc = ((double)server.loading_loaded_bytes / server.loading_total_bytes) * 100;
+ remaining_bytes = server.loading_total_bytes - server.loading_loaded_bytes;
+ } else if(server.loading_rdb_used_mem) {
+ perc = ((double)server.loading_loaded_bytes / server.loading_rdb_used_mem) * 100;
+ remaining_bytes = server.loading_rdb_used_mem - server.loading_loaded_bytes;
+ /* used mem is only a (bad) estimation of the rdb file size, avoid going over 100% */
+ if (perc > 99.99) perc = 99.99;
+ if (remaining_bytes < 1) remaining_bytes = 1;
+ }
+
+ elapsed = time(NULL)-server.loading_start_time;
+ if (elapsed == 0) {
+ eta = 1; /* A fake 1 second figure if we don't have
+ enough info */
+ } else {
+ eta = (elapsed*remaining_bytes)/(server.loading_loaded_bytes+1);
+ }
+
+ info = sdscatprintf(info,
+ "loading_start_time:%jd\r\n"
+ "loading_total_bytes:%llu\r\n"
+ "loading_rdb_used_mem:%llu\r\n"
+ "loading_loaded_bytes:%llu\r\n"
+ "loading_loaded_perc:%.2f\r\n"
+ "loading_eta_seconds:%jd\r\n",
+ (intmax_t) server.loading_start_time,
+ (unsigned long long) server.loading_total_bytes,
+ (unsigned long long) server.loading_rdb_used_mem,
+ (unsigned long long) server.loading_loaded_bytes,
+ perc,
+ (intmax_t)eta
+ );
+ }
+ }
+
+ /* Stats */
+ if (all_sections || (dictFind(section_dict,"stats") != NULL)) {
+ long long stat_total_reads_processed, stat_total_writes_processed;
+ long long stat_net_input_bytes, stat_net_output_bytes;
+ long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
+ long long current_eviction_exceeded_time = server.stat_last_eviction_exceeded_time ?
+ (long long) elapsedUs(server.stat_last_eviction_exceeded_time): 0;
+ long long current_active_defrag_time = server.stat_last_active_defrag_time ?
+ (long long) elapsedUs(server.stat_last_active_defrag_time): 0;
+ atomicGet(server.stat_total_reads_processed, stat_total_reads_processed);
+ atomicGet(server.stat_total_writes_processed, stat_total_writes_processed);
+ atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
+ atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
+ atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
+ atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
+
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info,
+ "# Stats\r\n"
+ "total_connections_received:%lld\r\n"
+ "total_commands_processed:%lld\r\n"
+ "instantaneous_ops_per_sec:%lld\r\n"
+ "total_net_input_bytes:%lld\r\n"
+ "total_net_output_bytes:%lld\r\n"
+ "total_net_repl_input_bytes:%lld\r\n"
+ "total_net_repl_output_bytes:%lld\r\n"
+ "instantaneous_input_kbps:%.2f\r\n"
+ "instantaneous_output_kbps:%.2f\r\n"
+ "instantaneous_input_repl_kbps:%.2f\r\n"
+ "instantaneous_output_repl_kbps:%.2f\r\n"
+ "rejected_connections:%lld\r\n"
+ "sync_full:%lld\r\n"
+ "sync_partial_ok:%lld\r\n"
+ "sync_partial_err:%lld\r\n"
+ "expired_keys:%lld\r\n"
+ "expired_stale_perc:%.2f\r\n"
+ "expired_time_cap_reached_count:%lld\r\n"
+ "expire_cycle_cpu_milliseconds:%lld\r\n"
+ "evicted_keys:%lld\r\n"
+ "evicted_clients:%lld\r\n"
+ "total_eviction_exceeded_time:%lld\r\n"
+ "current_eviction_exceeded_time:%lld\r\n"
+ "keyspace_hits:%lld\r\n"
+ "keyspace_misses:%lld\r\n"
+ "pubsub_channels:%ld\r\n"
+ "pubsub_patterns:%lu\r\n"
+ "pubsubshard_channels:%lu\r\n"
+ "latest_fork_usec:%lld\r\n"
+ "total_forks:%lld\r\n"
+ "migrate_cached_sockets:%ld\r\n"
+ "slave_expires_tracked_keys:%zu\r\n"
+ "active_defrag_hits:%lld\r\n"
+ "active_defrag_misses:%lld\r\n"
+ "active_defrag_key_hits:%lld\r\n"
+ "active_defrag_key_misses:%lld\r\n"
+ "total_active_defrag_time:%lld\r\n"
+ "current_active_defrag_time:%lld\r\n"
+ "tracking_total_keys:%lld\r\n"
+ "tracking_total_items:%lld\r\n"
+ "tracking_total_prefixes:%lld\r\n"
+ "unexpected_error_replies:%lld\r\n"
+ "total_error_replies:%lld\r\n"
+ "dump_payload_sanitizations:%lld\r\n"
+ "total_reads_processed:%lld\r\n"
+ "total_writes_processed:%lld\r\n"
+ "io_threaded_reads_processed:%lld\r\n"
+ "io_threaded_writes_processed:%lld\r\n"
+ "reply_buffer_shrinks:%lld\r\n"
+ "reply_buffer_expands:%lld\r\n"
+ "eventloop_cycles:%llu\r\n"
+ "eventloop_duration_sum:%llu\r\n"
+ "eventloop_duration_cmd_sum:%llu\r\n"
+ "instantaneous_eventloop_cycles_per_sec:%llu\r\n"
+ "instantaneous_eventloop_duration_usec:%llu\r\n",
+ server.stat_numconnections,
+ server.stat_numcommands,
+ getInstantaneousMetric(STATS_METRIC_COMMAND),
+ stat_net_input_bytes + stat_net_repl_input_bytes,
+ stat_net_output_bytes + stat_net_repl_output_bytes,
+ stat_net_repl_input_bytes,
+ stat_net_repl_output_bytes,
+ (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024,
+ (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024,
+ (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION)/1024,
+ (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION)/1024,
+ server.stat_rejected_conn,
+ server.stat_sync_full,
+ server.stat_sync_partial_ok,
+ server.stat_sync_partial_err,
+ server.stat_expiredkeys,
+ server.stat_expired_stale_perc*100,
+ server.stat_expired_time_cap_reached_count,
+ server.stat_expire_cycle_time_used/1000,
+ server.stat_evictedkeys,
+ server.stat_evictedclients,
+ (server.stat_total_eviction_exceeded_time + current_eviction_exceeded_time) / 1000,
+ current_eviction_exceeded_time / 1000,
+ server.stat_keyspace_hits,
+ server.stat_keyspace_misses,
+ dictSize(server.pubsub_channels),
+ dictSize(server.pubsub_patterns),
+ dictSize(server.pubsubshard_channels),
+ server.stat_fork_time,
+ server.stat_total_forks,
+ dictSize(server.migrate_cached_sockets),
+ getSlaveKeyWithExpireCount(),
+ server.stat_active_defrag_hits,
+ server.stat_active_defrag_misses,
+ server.stat_active_defrag_key_hits,
+ server.stat_active_defrag_key_misses,
+ (server.stat_total_active_defrag_time + current_active_defrag_time) / 1000,
+ current_active_defrag_time / 1000,
+ (unsigned long long) trackingGetTotalKeys(),
+ (unsigned long long) trackingGetTotalItems(),
+ (unsigned long long) trackingGetTotalPrefixes(),
+ server.stat_unexpected_error_replies,
+ server.stat_total_error_replies,
+ server.stat_dump_payload_sanitizations,
+ stat_total_reads_processed,
+ stat_total_writes_processed,
+ server.stat_io_reads_processed,
+ server.stat_io_writes_processed,
+ server.stat_reply_buffer_shrinks,
+ server.stat_reply_buffer_expands,
+ server.duration_stats[EL_DURATION_TYPE_EL].cnt,
+ server.duration_stats[EL_DURATION_TYPE_EL].sum,
+ server.duration_stats[EL_DURATION_TYPE_CMD].sum,
+ getInstantaneousMetric(STATS_METRIC_EL_CYCLE),
+ getInstantaneousMetric(STATS_METRIC_EL_DURATION));
+ info = genRedisInfoStringACLStats(info);
+ }
+
+ /* Replication */
+ if (all_sections || (dictFind(section_dict,"replication") != NULL)) {
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info,
+ "# Replication\r\n"
+ "role:%s\r\n",
+ server.masterhost == NULL ? "master" : "slave");
+ if (server.masterhost) {
+ long long slave_repl_offset = 1;
+ long long slave_read_repl_offset = 1;
+
+ if (server.master) {
+ slave_repl_offset = server.master->reploff;
+ slave_read_repl_offset = server.master->read_reploff;
+ } else if (server.cached_master) {
+ slave_repl_offset = server.cached_master->reploff;
+ slave_read_repl_offset = server.cached_master->read_reploff;
+ }
+
+ info = sdscatprintf(info,
+ "master_host:%s\r\n"
+ "master_port:%d\r\n"
+ "master_link_status:%s\r\n"
+ "master_last_io_seconds_ago:%d\r\n"
+ "master_sync_in_progress:%d\r\n"
+ "slave_read_repl_offset:%lld\r\n"
+ "slave_repl_offset:%lld\r\n"
+ ,server.masterhost,
+ server.masterport,
+ (server.repl_state == REPL_STATE_CONNECTED) ?
+ "up" : "down",
+ server.master ?
+ ((int)(server.unixtime-server.master->lastinteraction)) : -1,
+ server.repl_state == REPL_STATE_TRANSFER,
+ slave_read_repl_offset,
+ slave_repl_offset
+ );
+
+ if (server.repl_state == REPL_STATE_TRANSFER) {
+ double perc = 0;
+ if (server.repl_transfer_size) {
+ perc = ((double)server.repl_transfer_read / server.repl_transfer_size) * 100;
+ }
+ info = sdscatprintf(info,
+ "master_sync_total_bytes:%lld\r\n"
+ "master_sync_read_bytes:%lld\r\n"
+ "master_sync_left_bytes:%lld\r\n"
+ "master_sync_perc:%.2f\r\n"
+ "master_sync_last_io_seconds_ago:%d\r\n",
+ (long long) server.repl_transfer_size,
+ (long long) server.repl_transfer_read,
+ (long long) (server.repl_transfer_size - server.repl_transfer_read),
+ perc,
+ (int)(server.unixtime-server.repl_transfer_lastio)
+ );
+ }
+
+ if (server.repl_state != REPL_STATE_CONNECTED) {
+ info = sdscatprintf(info,
+ "master_link_down_since_seconds:%jd\r\n",
+ server.repl_down_since ?
+ (intmax_t)(server.unixtime-server.repl_down_since) : -1);
+ }
+ info = sdscatprintf(info,
+ "slave_priority:%d\r\n"
+ "slave_read_only:%d\r\n"
+ "replica_announced:%d\r\n",
+ server.slave_priority,
+ server.repl_slave_ro,
+ server.replica_announced);
+ }
+
+ info = sdscatprintf(info,
+ "connected_slaves:%lu\r\n",
+ listLength(server.slaves));
+
+ /* If min-slaves-to-write is active, write the number of slaves
+ * currently considered 'good'. */
+ if (server.repl_min_slaves_to_write &&
+ server.repl_min_slaves_max_lag) {
+ info = sdscatprintf(info,
+ "min_slaves_good_slaves:%d\r\n",
+ server.repl_good_slaves_count);
+ }
+
+ if (listLength(server.slaves)) {
+ int slaveid = 0;
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = listNodeValue(ln);
+ char ip[NET_IP_STR_LEN], *slaveip = slave->slave_addr;
+ int port;
+ long lag = 0;
+
+ if (!slaveip) {
+ if (connAddrPeerName(slave->conn,ip,sizeof(ip),&port) == -1)
+ continue;
+ slaveip = ip;
+ }
+ const char *state = replstateToString(slave->replstate);
+ if (state[0] == '\0') continue;
+ if (slave->replstate == SLAVE_STATE_ONLINE)
+ lag = time(NULL) - slave->repl_ack_time;
+
+ info = sdscatprintf(info,
+ "slave%d:ip=%s,port=%d,state=%s,"
+ "offset=%lld,lag=%ld\r\n",
+ slaveid,slaveip,slave->slave_listening_port,state,
+ slave->repl_ack_off, lag);
+ slaveid++;
+ }
+ }
+ info = sdscatprintf(info,
+ "master_failover_state:%s\r\n"
+ "master_replid:%s\r\n"
+ "master_replid2:%s\r\n"
+ "master_repl_offset:%lld\r\n"
+ "second_repl_offset:%lld\r\n"
+ "repl_backlog_active:%d\r\n"
+ "repl_backlog_size:%lld\r\n"
+ "repl_backlog_first_byte_offset:%lld\r\n"
+ "repl_backlog_histlen:%lld\r\n",
+ getFailoverStateString(),
+ server.replid,
+ server.replid2,
+ server.master_repl_offset,
+ server.second_replid_offset,
+ server.repl_backlog != NULL,
+ server.repl_backlog_size,
+ server.repl_backlog ? server.repl_backlog->offset : 0,
+ server.repl_backlog ? server.repl_backlog->histlen : 0);
+ }
+
+ /* CPU */
+ if (all_sections || (dictFind(section_dict,"cpu") != NULL)) {
+ if (sections++) info = sdscat(info,"\r\n");
+
+ struct rusage self_ru, c_ru;
+ getrusage(RUSAGE_SELF, &self_ru);
+ getrusage(RUSAGE_CHILDREN, &c_ru);
+ info = sdscatprintf(info,
+ "# CPU\r\n"
+ "used_cpu_sys:%ld.%06ld\r\n"
+ "used_cpu_user:%ld.%06ld\r\n"
+ "used_cpu_sys_children:%ld.%06ld\r\n"
+ "used_cpu_user_children:%ld.%06ld\r\n",
+ (long)self_ru.ru_stime.tv_sec, (long)self_ru.ru_stime.tv_usec,
+ (long)self_ru.ru_utime.tv_sec, (long)self_ru.ru_utime.tv_usec,
+ (long)c_ru.ru_stime.tv_sec, (long)c_ru.ru_stime.tv_usec,
+ (long)c_ru.ru_utime.tv_sec, (long)c_ru.ru_utime.tv_usec);
+#ifdef RUSAGE_THREAD
+ struct rusage m_ru;
+ getrusage(RUSAGE_THREAD, &m_ru);
+ info = sdscatprintf(info,
+ "used_cpu_sys_main_thread:%ld.%06ld\r\n"
+ "used_cpu_user_main_thread:%ld.%06ld\r\n",
+ (long)m_ru.ru_stime.tv_sec, (long)m_ru.ru_stime.tv_usec,
+ (long)m_ru.ru_utime.tv_sec, (long)m_ru.ru_utime.tv_usec);
+#endif /* RUSAGE_THREAD */
+ }
+
+ /* Modules */
+ if (all_sections || (dictFind(section_dict,"module_list") != NULL) || (dictFind(section_dict,"modules") != NULL)) {
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info,"# Modules\r\n");
+ info = genModulesInfoString(info);
+ }
+
+ /* Command statistics */
+ if (all_sections || (dictFind(section_dict,"commandstats") != NULL)) {
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info, "# Commandstats\r\n");
+ info = genRedisInfoStringCommandStats(info, server.commands);
+ }
+
+ /* Error statistics */
+ if (all_sections || (dictFind(section_dict,"errorstats") != NULL)) {
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscat(info, "# Errorstats\r\n");
+ raxIterator ri;
+ raxStart(&ri,server.errors);
+ raxSeek(&ri,"^",NULL,0);
+ struct redisError *e;
+ while(raxNext(&ri)) {
+ char *tmpsafe;
+ e = (struct redisError *) ri.data;
+ info = sdscatprintf(info,
+ "errorstat_%.*s:count=%lld\r\n",
+ (int)ri.key_len, getSafeInfoString((char *) ri.key, ri.key_len, &tmpsafe), e->count);
+ if (tmpsafe != NULL) zfree(tmpsafe);
+ }
+ raxStop(&ri);
+ }
+
+ /* Latency by percentile distribution per command */
+ if (all_sections || (dictFind(section_dict,"latencystats") != NULL)) {
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info, "# Latencystats\r\n");
+ if (server.latency_tracking_enabled) {
+ info = genRedisInfoStringLatencyStats(info, server.commands);
+ }
+ }
+
+ /* Cluster */
+ if (all_sections || (dictFind(section_dict,"cluster") != NULL)) {
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info,
+ "# Cluster\r\n"
+ "cluster_enabled:%d\r\n",
+ server.cluster_enabled);
+ }
+
+ /* Key space */
+ if (all_sections || (dictFind(section_dict,"keyspace") != NULL)) {
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info, "# Keyspace\r\n");
+ for (j = 0; j < server.dbnum; j++) {
+ long long keys, vkeys;
+
+ keys = dictSize(server.db[j].dict);
+ vkeys = dictSize(server.db[j].expires);
+ if (keys || vkeys) {
+ info = sdscatprintf(info,
+ "db%d:keys=%lld,expires=%lld,avg_ttl=%lld\r\n",
+ j, keys, vkeys, server.db[j].avg_ttl);
+ }
+ }
+ }
+
+ /* Get info from modules.
+ * Returned when the user asked for "everything", "modules", or a specific module section.
+ * We're not aware of the module section names here, and we rather avoid the search when we can.
+ * so we proceed if there's a requested section name that's not found yet, or when the user asked
+ * for "all" with any additional section names. */
+ if (everything || dictFind(section_dict, "modules") != NULL || sections < (int)dictSize(section_dict) ||
+ (all_sections && dictSize(section_dict)))
+ {
+
+ info = modulesCollectInfo(info,
+ everything || dictFind(section_dict, "modules") != NULL ? NULL: section_dict,
+ 0, /* not a crash report */
+ sections);
+ }
+
+ if (dictFind(section_dict, "debug") != NULL) {
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info,
+ "# Debug\r\n"
+ "eventloop_duration_aof_sum:%llu\r\n"
+ "eventloop_duration_cron_sum:%llu\r\n"
+ "eventloop_duration_max:%llu\r\n"
+ "eventloop_cmd_per_cycle_max:%lld\r\n",
+ server.duration_stats[EL_DURATION_TYPE_AOF].sum,
+ server.duration_stats[EL_DURATION_TYPE_CRON].sum,
+ server.duration_stats[EL_DURATION_TYPE_EL].max,
+ server.el_cmd_cnt_max);
+ }
+
+ return info;
+}
+
+/* INFO [<section> [<section> ...]] */
+void infoCommand(client *c) {
+ if (server.sentinel_mode) {
+ sentinelInfoCommand(c);
+ return;
+ }
+ int all_sections = 0;
+ int everything = 0;
+ dict *sections_dict = genInfoSectionDict(c->argv+1, c->argc-1, NULL, &all_sections, &everything);
+ sds info = genRedisInfoString(sections_dict, all_sections, everything);
+ addReplyVerbatim(c,info,sdslen(info),"txt");
+ sdsfree(info);
+ releaseInfoSectionDict(sections_dict);
+ return;
+}
+
+void monitorCommand(client *c) {
+ if (c->flags & CLIENT_DENY_BLOCKING) {
+ /**
+ * A client that has CLIENT_DENY_BLOCKING flag on
+ * expects a reply per command and so can't execute MONITOR. */
+ addReplyError(c, "MONITOR isn't allowed for DENY BLOCKING client");
+ return;
+ }
+
+ /* ignore MONITOR if already slave or in monitor mode */
+ if (c->flags & CLIENT_SLAVE) return;
+
+ c->flags |= (CLIENT_SLAVE|CLIENT_MONITOR);
+ listAddNodeTail(server.monitors,c);
+ addReply(c,shared.ok);
+}
+
+/* =================================== Main! ================================ */
+
+int checkIgnoreWarning(const char *warning) {
+ int argc, j;
+ sds *argv = sdssplitargs(server.ignore_warnings, &argc);
+ if (argv == NULL)
+ return 0;
+
+ for (j = 0; j < argc; j++) {
+ char *flag = argv[j];
+ if (!strcasecmp(flag, warning))
+ break;
+ }
+ sdsfreesplitres(argv,argc);
+ return j < argc;
+}
+
+#ifdef __linux__
+#include <sys/prctl.h>
+/* since linux-3.5, kernel supports to set the state of the "THP disable" flag
+ * for the calling thread. PR_SET_THP_DISABLE is defined in linux/prctl.h */
+static int THPDisable(void) {
+ int ret = -EINVAL;
+
+ if (!server.disable_thp)
+ return ret;
+
+#ifdef PR_SET_THP_DISABLE
+ ret = prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0);
+#endif
+
+ return ret;
+}
+
+void linuxMemoryWarnings(void) {
+ sds err_msg = NULL;
+ if (checkOvercommit(&err_msg) < 0) {
+ serverLog(LL_WARNING,"WARNING %s", err_msg);
+ sdsfree(err_msg);
+ }
+ if (checkTHPEnabled(&err_msg) < 0) {
+ server.thp_enabled = 1;
+ if (THPDisable() == 0) {
+ server.thp_enabled = 0;
+ } else {
+ serverLog(LL_WARNING, "WARNING %s", err_msg);
+ }
+ sdsfree(err_msg);
+ }
+}
+#endif /* __linux__ */
+
+void createPidFile(void) {
+ /* If pidfile requested, but no pidfile defined, use
+ * default pidfile path */
+ if (!server.pidfile) server.pidfile = zstrdup(CONFIG_DEFAULT_PID_FILE);
+
+ /* Try to write the pid file in a best-effort way. */
+ FILE *fp = fopen(server.pidfile,"w");
+ if (fp) {
+ fprintf(fp,"%d\n",(int)getpid());
+ fclose(fp);
+ } else {
+ serverLog(LL_WARNING, "Failed to write PID file: %s", strerror(errno));
+ }
+}
+
+void daemonize(void) {
+ int fd;
+
+ if (fork() != 0) exit(0); /* parent exits */
+ setsid(); /* create a new session */
+
+ /* Every output goes to /dev/null. If Redis is daemonized but
+ * the 'logfile' is set to 'stdout' in the configuration file
+ * it will not log at all. */
+ if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
+ dup2(fd, STDIN_FILENO);
+ dup2(fd, STDOUT_FILENO);
+ dup2(fd, STDERR_FILENO);
+ if (fd > STDERR_FILENO) close(fd);
+ }
+}
+
+void version(void) {
+ printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d build=%llx\n",
+ REDIS_VERSION,
+ redisGitSHA1(),
+ atoi(redisGitDirty()) > 0,
+ ZMALLOC_LIB,
+ sizeof(long) == 4 ? 32 : 64,
+ (unsigned long long) redisBuildId());
+ exit(0);
+}
+
+void usage(void) {
+ fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options] [-]\n");
+ fprintf(stderr," ./redis-server - (read config from stdin)\n");
+ fprintf(stderr," ./redis-server -v or --version\n");
+ fprintf(stderr," ./redis-server -h or --help\n");
+ fprintf(stderr," ./redis-server --test-memory <megabytes>\n");
+ fprintf(stderr," ./redis-server --check-system\n");
+ fprintf(stderr,"\n");
+ fprintf(stderr,"Examples:\n");
+ fprintf(stderr," ./redis-server (run the server with default conf)\n");
+ fprintf(stderr," echo 'maxmemory 128mb' | ./redis-server -\n");
+ fprintf(stderr," ./redis-server /etc/redis/6379.conf\n");
+ fprintf(stderr," ./redis-server --port 7777\n");
+ fprintf(stderr," ./redis-server --port 7777 --replicaof 127.0.0.1 8888\n");
+ fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose -\n");
+ fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose\n\n");
+ fprintf(stderr,"Sentinel mode:\n");
+ fprintf(stderr," ./redis-server /etc/sentinel.conf --sentinel\n");
+ exit(1);
+}
+
+void redisAsciiArt(void) {
+#include "asciilogo.h"
+ char *buf = zmalloc(1024*16);
+ char *mode;
+
+ if (server.cluster_enabled) mode = "cluster";
+ else if (server.sentinel_mode) mode = "sentinel";
+ else mode = "standalone";
+
+ /* Show the ASCII logo if: log file is stdout AND stdout is a
+ * tty AND syslog logging is disabled. Also show logo if the user
+ * forced us to do so via redis.conf. */
+ int show_logo = ((!server.syslog_enabled &&
+ server.logfile[0] == '\0' &&
+ isatty(fileno(stdout))) ||
+ server.always_show_logo);
+
+ if (!show_logo) {
+ serverLog(LL_NOTICE,
+ "Running mode=%s, port=%d.",
+ mode, server.port ? server.port : server.tls_port
+ );
+ } else {
+ snprintf(buf,1024*16,ascii_logo,
+ REDIS_VERSION,
+ redisGitSHA1(),
+ strtol(redisGitDirty(),NULL,10) > 0,
+ (sizeof(long) == 8) ? "64" : "32",
+ mode, server.port ? server.port : server.tls_port,
+ (long) getpid()
+ );
+ serverLogRaw(LL_NOTICE|LL_RAW,buf);
+ }
+ zfree(buf);
+}
+
+/* Get the server listener by type name */
+connListener *listenerByType(const char *typename) {
+ int conn_index;
+
+ conn_index = connectionIndexByType(typename);
+ if (conn_index < 0)
+ return NULL;
+
+ return &server.listeners[conn_index];
+}
+
+/* Close original listener, re-create a new listener from the updated bind address & port */
+int changeListener(connListener *listener) {
+ /* Close old servers */
+ closeListener(listener);
+
+ /* Just close the server if port disabled */
+ if (listener->port == 0) {
+ if (server.set_proc_title) redisSetProcTitle(NULL);
+ return C_OK;
+ }
+
+ /* Re-create listener */
+ if (connListen(listener) != C_OK) {
+ return C_ERR;
+ }
+
+ /* Create event handlers */
+ if (createSocketAcceptHandler(listener, listener->ct->accept_handler) != C_OK) {
+ serverPanic("Unrecoverable error creating %s accept handler.", listener->ct->get_type(NULL));
+ }
+
+ if (server.set_proc_title) redisSetProcTitle(NULL);
+
+ return C_OK;
+}
+
+static void sigShutdownHandler(int sig) {
+ char *msg;
+
+ switch (sig) {
+ case SIGINT:
+ msg = "Received SIGINT scheduling shutdown...";
+ break;
+ case SIGTERM:
+ msg = "Received SIGTERM scheduling shutdown...";
+ break;
+ default:
+ msg = "Received shutdown signal, scheduling shutdown...";
+ };
+
+ /* SIGINT is often delivered via Ctrl+C in an interactive session.
+ * If we receive the signal the second time, we interpret this as
+ * the user really wanting to quit ASAP without waiting to persist
+ * on disk and without waiting for lagging replicas. */
+ if (server.shutdown_asap && sig == SIGINT) {
+ serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
+ rdbRemoveTempFile(getpid(), 1);
+ exit(1); /* Exit with an error since this was not a clean shutdown. */
+ } else if (server.loading) {
+ msg = "Received shutdown signal during loading, scheduling shutdown.";
+ }
+
+ serverLogFromHandler(LL_WARNING, msg);
+ server.shutdown_asap = 1;
+ server.last_sig_received = sig;
+}
+
+void setupSignalHandlers(void) {
+ struct sigaction act;
+
+ /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
+ * Otherwise, sa_handler is used. */
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigShutdownHandler;
+ sigaction(SIGTERM, &act, NULL);
+ sigaction(SIGINT, &act, NULL);
+
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
+ act.sa_sigaction = sigsegvHandler;
+ if(server.crashlog_enabled) {
+ sigaction(SIGSEGV, &act, NULL);
+ sigaction(SIGBUS, &act, NULL);
+ sigaction(SIGFPE, &act, NULL);
+ sigaction(SIGILL, &act, NULL);
+ sigaction(SIGABRT, &act, NULL);
+ }
+ return;
+}
+
+void removeSignalHandlers(void) {
+ struct sigaction act;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_NODEFER | SA_RESETHAND;
+ act.sa_handler = SIG_DFL;
+ sigaction(SIGSEGV, &act, NULL);
+ sigaction(SIGBUS, &act, NULL);
+ sigaction(SIGFPE, &act, NULL);
+ sigaction(SIGILL, &act, NULL);
+ sigaction(SIGABRT, &act, NULL);
+}
+
+/* This is the signal handler for children process. It is currently useful
+ * in order to track the SIGUSR1, that we send to a child in order to terminate
+ * it in a clean way, without the parent detecting an error and stop
+ * accepting writes because of a write error condition. */
+static void sigKillChildHandler(int sig) {
+ UNUSED(sig);
+ int level = server.in_fork_child == CHILD_TYPE_MODULE? LL_VERBOSE: LL_WARNING;
+ serverLogFromHandler(level, "Received SIGUSR1 in child, exiting now.");
+ exitFromChild(SERVER_CHILD_NOERROR_RETVAL);
+}
+
+void setupChildSignalHandlers(void) {
+ struct sigaction act;
+
+ /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
+ * Otherwise, sa_handler is used. */
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigKillChildHandler;
+ sigaction(SIGUSR1, &act, NULL);
+}
+
+/* After fork, the child process will inherit the resources
+ * of the parent process, e.g. fd(socket or flock) etc.
+ * should close the resources not used by the child process, so that if the
+ * parent restarts it can bind/lock despite the child possibly still running. */
+void closeChildUnusedResourceAfterFork(void) {
+ closeListeningSockets(0);
+ if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1)
+ close(server.cluster_config_file_lock_fd); /* don't care if this fails */
+
+ /* Clear server.pidfile, this is the parent pidfile which should not
+ * be touched (or deleted) by the child (on exit / crash) */
+ zfree(server.pidfile);
+ server.pidfile = NULL;
+}
+
+/* purpose is one of CHILD_TYPE_ types */
+int redisFork(int purpose) {
+ if (isMutuallyExclusiveChildType(purpose)) {
+ if (hasActiveChildProcess()) {
+ errno = EEXIST;
+ return -1;
+ }
+
+ openChildInfoPipe();
+ }
+
+ int childpid;
+ long long start = ustime();
+ if ((childpid = fork()) == 0) {
+ /* Child.
+ *
+ * The order of setting things up follows some reasoning:
+ * Setup signal handlers first because a signal could fire at any time.
+ * Adjust OOM score before everything else to assist the OOM killer if
+ * memory resources are low.
+ */
+ server.in_fork_child = purpose;
+ setupChildSignalHandlers();
+ setOOMScoreAdj(CONFIG_OOM_BGCHILD);
+ updateDictResizePolicy();
+ dismissMemoryInChild();
+ closeChildUnusedResourceAfterFork();
+ /* Close the reading part, so that if the parent crashes, the child will
+ * get a write error and exit. */
+ if (server.child_info_pipe[0] != -1)
+ close(server.child_info_pipe[0]);
+ } else {
+ /* Parent */
+ if (childpid == -1) {
+ int fork_errno = errno;
+ if (isMutuallyExclusiveChildType(purpose)) closeChildInfoPipe();
+ errno = fork_errno;
+ return -1;
+ }
+
+ server.stat_total_forks++;
+ server.stat_fork_time = ustime()-start;
+ server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
+ latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
+
+ /* The child_pid and child_type are only for mutually exclusive children.
+ * other child types should handle and store their pid's in dedicated variables.
+ *
+ * Today, we allows CHILD_TYPE_LDB to run in parallel with the other fork types:
+ * - it isn't used for production, so it will not make the server be less efficient
+ * - used for debugging, and we don't want to block it from running while other
+ * forks are running (like RDB and AOF) */
+ if (isMutuallyExclusiveChildType(purpose)) {
+ server.child_pid = childpid;
+ server.child_type = purpose;
+ server.stat_current_cow_peak = 0;
+ server.stat_current_cow_bytes = 0;
+ server.stat_current_cow_updated = 0;
+ server.stat_current_save_keys_processed = 0;
+ server.stat_module_progress = 0;
+ server.stat_current_save_keys_total = dbTotalServerKeyCount();
+ }
+
+ updateDictResizePolicy();
+ moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD,
+ REDISMODULE_SUBEVENT_FORK_CHILD_BORN,
+ NULL);
+ }
+ return childpid;
+}
+
+void sendChildCowInfo(childInfoType info_type, char *pname) {
+ sendChildInfoGeneric(info_type, 0, -1, pname);
+}
+
+void sendChildInfo(childInfoType info_type, size_t keys, char *pname) {
+ sendChildInfoGeneric(info_type, keys, -1, pname);
+}
+
+/* Try to release pages back to the OS directly (bypassing the allocator),
+ * in an effort to decrease CoW during fork. For small allocations, we can't
+ * release any full page, so in an effort to avoid getting the size of the
+ * allocation from the allocator (malloc_size) when we already know it's small,
+ * we check the size_hint. If the size is not already known, passing a size_hint
+ * of 0 will lead the checking the real size of the allocation.
+ * Also please note that the size may be not accurate, so in order to make this
+ * solution effective, the judgement for releasing memory pages should not be
+ * too strict. */
+void dismissMemory(void* ptr, size_t size_hint) {
+ if (ptr == NULL) return;
+
+ /* madvise(MADV_DONTNEED) can not release pages if the size of memory
+ * is too small, we try to release only for the memory which the size
+ * is more than half of page size. */
+ if (size_hint && size_hint <= server.page_size/2) return;
+
+ zmadvise_dontneed(ptr);
+}
+
+/* Dismiss big chunks of memory inside a client structure, see dismissMemory() */
+void dismissClientMemory(client *c) {
+ /* Dismiss client query buffer and static reply buffer. */
+ dismissMemory(c->buf, c->buf_usable_size);
+ dismissSds(c->querybuf);
+ /* Dismiss argv array only if we estimate it contains a big buffer. */
+ if (c->argc && c->argv_len_sum/c->argc >= server.page_size) {
+ for (int i = 0; i < c->argc; i++) {
+ dismissObject(c->argv[i], 0);
+ }
+ }
+ if (c->argc) dismissMemory(c->argv, c->argc*sizeof(robj*));
+
+ /* Dismiss the reply array only if the average buffer size is bigger
+ * than a page. */
+ if (listLength(c->reply) &&
+ c->reply_bytes/listLength(c->reply) >= server.page_size)
+ {
+ listIter li;
+ listNode *ln;
+ listRewind(c->reply, &li);
+ while ((ln = listNext(&li))) {
+ clientReplyBlock *bulk = listNodeValue(ln);
+ /* Default bulk size is 16k, actually it has extra data, maybe it
+ * occupies 20k according to jemalloc bin size if using jemalloc. */
+ if (bulk) dismissMemory(bulk, bulk->size);
+ }
+ }
+}
+
+/* In the child process, we don't need some buffers anymore, and these are
+ * likely to change in the parent when there's heavy write traffic.
+ * We dismiss them right away, to avoid CoW.
+ * see dismissMemeory(). */
+void dismissMemoryInChild(void) {
+ /* madvise(MADV_DONTNEED) may not work if Transparent Huge Pages is enabled. */
+ if (server.thp_enabled) return;
+
+ /* Currently we use zmadvise_dontneed only when we use jemalloc with Linux.
+ * so we avoid these pointless loops when they're not going to do anything. */
+#if defined(USE_JEMALLOC) && defined(__linux__)
+ listIter li;
+ listNode *ln;
+
+ /* Dismiss replication buffer. We don't need to separately dismiss replication
+ * backlog and replica' output buffer, because they just reference the global
+ * replication buffer but don't cost real memory. */
+ listRewind(server.repl_buffer_blocks, &li);
+ while((ln = listNext(&li))) {
+ replBufBlock *o = listNodeValue(ln);
+ dismissMemory(o, o->size);
+ }
+
+ /* Dismiss all clients memory. */
+ listRewind(server.clients, &li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ dismissClientMemory(c);
+ }
+#endif
+}
+
+void memtest(size_t megabytes, int passes);
+
+/* Returns 1 if there is --sentinel among the arguments or if
+ * executable name contains "redis-sentinel". */
+int checkForSentinelMode(int argc, char **argv, char *exec_name) {
+ if (strstr(exec_name,"redis-sentinel") != NULL) return 1;
+
+ for (int j = 1; j < argc; j++)
+ if (!strcmp(argv[j],"--sentinel")) return 1;
+ return 0;
+}
+
+/* Function called at startup to load RDB or AOF file in memory. */
+void loadDataFromDisk(void) {
+ long long start = ustime();
+ if (server.aof_state == AOF_ON) {
+ int ret = loadAppendOnlyFiles(server.aof_manifest);
+ if (ret == AOF_FAILED || ret == AOF_OPEN_ERR)
+ exit(1);
+ if (ret != AOF_NOT_EXIST)
+ serverLog(LL_NOTICE, "DB loaded from append only file: %.3f seconds", (float)(ustime()-start)/1000000);
+ } else {
+ rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+ int rsi_is_valid = 0;
+ errno = 0; /* Prevent a stale value from affecting error checking */
+ int rdb_flags = RDBFLAGS_NONE;
+ if (iAmMaster()) {
+ /* Master may delete expired keys when loading, we should
+ * propagate expire to replication backlog. */
+ createReplicationBacklog();
+ rdb_flags |= RDBFLAGS_FEED_REPL;
+ }
+ int rdb_load_ret = rdbLoad(server.rdb_filename, &rsi, rdb_flags);
+ if (rdb_load_ret == RDB_OK) {
+ serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds",
+ (float)(ustime()-start)/1000000);
+
+ /* Restore the replication ID / offset from the RDB file. */
+ if (rsi.repl_id_is_set &&
+ rsi.repl_offset != -1 &&
+ /* Note that older implementations may save a repl_stream_db
+ * of -1 inside the RDB file in a wrong way, see more
+ * information in function rdbPopulateSaveInfo. */
+ rsi.repl_stream_db != -1)
+ {
+ rsi_is_valid = 1;
+ if (!iAmMaster()) {
+ memcpy(server.replid,rsi.repl_id,sizeof(server.replid));
+ server.master_repl_offset = rsi.repl_offset;
+ /* If this is a replica, create a cached master from this
+ * information, in order to allow partial resynchronizations
+ * with masters. */
+ replicationCacheMasterUsingMyself();
+ selectDb(server.cached_master,rsi.repl_stream_db);
+ } else {
+ /* If this is a master, we can save the replication info
+ * as secondary ID and offset, in order to allow replicas
+ * to partial resynchronizations with masters. */
+ memcpy(server.replid2,rsi.repl_id,sizeof(server.replid));
+ server.second_replid_offset = rsi.repl_offset+1;
+ /* Rebase master_repl_offset from rsi.repl_offset. */
+ server.master_repl_offset += rsi.repl_offset;
+ serverAssert(server.repl_backlog);
+ server.repl_backlog->offset = server.master_repl_offset -
+ server.repl_backlog->histlen + 1;
+ rebaseReplicationBuffer(rsi.repl_offset);
+ server.repl_no_slaves_since = time(NULL);
+ }
+ }
+ } else if (rdb_load_ret != RDB_NOT_EXIST) {
+ serverLog(LL_WARNING, "Fatal error loading the DB, check server logs. Exiting.");
+ exit(1);
+ }
+
+ /* We always create replication backlog if server is a master, we need
+ * it because we put DELs in it when loading expired keys in RDB, but
+ * if RDB doesn't have replication info or there is no rdb, it is not
+ * possible to support partial resynchronization, to avoid extra memory
+ * of replication backlog, we drop it. */
+ if (!rsi_is_valid && server.repl_backlog)
+ freeReplicationBacklog();
+ }
+}
+
+void redisOutOfMemoryHandler(size_t allocation_size) {
+ serverLog(LL_WARNING,"Out Of Memory allocating %zu bytes!",
+ allocation_size);
+ serverPanic("Redis aborting for OUT OF MEMORY. Allocating %zu bytes!",
+ allocation_size);
+}
+
+/* Callback for sdstemplate on proc-title-template. See redis.conf for
+ * supported variables.
+ */
+static sds redisProcTitleGetVariable(const sds varname, void *arg)
+{
+ if (!strcmp(varname, "title")) {
+ return sdsnew(arg);
+ } else if (!strcmp(varname, "listen-addr")) {
+ if (server.port || server.tls_port)
+ return sdscatprintf(sdsempty(), "%s:%u",
+ server.bindaddr_count ? server.bindaddr[0] : "*",
+ server.port ? server.port : server.tls_port);
+ else
+ return sdscatprintf(sdsempty(), "unixsocket:%s", server.unixsocket);
+ } else if (!strcmp(varname, "server-mode")) {
+ if (server.cluster_enabled) return sdsnew("[cluster]");
+ else if (server.sentinel_mode) return sdsnew("[sentinel]");
+ else return sdsempty();
+ } else if (!strcmp(varname, "config-file")) {
+ return sdsnew(server.configfile ? server.configfile : "-");
+ } else if (!strcmp(varname, "port")) {
+ return sdscatprintf(sdsempty(), "%u", server.port);
+ } else if (!strcmp(varname, "tls-port")) {
+ return sdscatprintf(sdsempty(), "%u", server.tls_port);
+ } else if (!strcmp(varname, "unixsocket")) {
+ return sdsnew(server.unixsocket);
+ } else
+ return NULL; /* Unknown variable name */
+}
+
+/* Expand the specified proc-title-template string and return a newly
+ * allocated sds, or NULL. */
+static sds expandProcTitleTemplate(const char *template, const char *title) {
+ sds res = sdstemplate(template, redisProcTitleGetVariable, (void *) title);
+ if (!res)
+ return NULL;
+ return sdstrim(res, " ");
+}
+/* Validate the specified template, returns 1 if valid or 0 otherwise. */
+int validateProcTitleTemplate(const char *template) {
+ int ok = 1;
+ sds res = expandProcTitleTemplate(template, "");
+ if (!res)
+ return 0;
+ if (sdslen(res) == 0) ok = 0;
+ sdsfree(res);
+ return ok;
+}
+
+int redisSetProcTitle(char *title) {
+#ifdef USE_SETPROCTITLE
+ if (!title) title = server.exec_argv[0];
+ sds proc_title = expandProcTitleTemplate(server.proc_title_template, title);
+ if (!proc_title) return C_ERR; /* Not likely, proc_title_template is validated */
+
+ setproctitle("%s", proc_title);
+ sdsfree(proc_title);
+#else
+ UNUSED(title);
+#endif
+
+ return C_OK;
+}
+
+void redisSetCpuAffinity(const char *cpulist) {
+#ifdef USE_SETCPUAFFINITY
+ setcpuaffinity(cpulist);
+#else
+ UNUSED(cpulist);
+#endif
+}
+
+/* Send a notify message to systemd. Returns sd_notify return code which is
+ * a positive number on success. */
+int redisCommunicateSystemd(const char *sd_notify_msg) {
+#ifdef HAVE_LIBSYSTEMD
+ int ret = sd_notify(0, sd_notify_msg);
+
+ if (ret == 0)
+ serverLog(LL_WARNING, "systemd supervision error: NOTIFY_SOCKET not found!");
+ else if (ret < 0)
+ serverLog(LL_WARNING, "systemd supervision error: sd_notify: %d", ret);
+ return ret;
+#else
+ UNUSED(sd_notify_msg);
+ return 0;
+#endif
+}
+
+/* Attempt to set up upstart supervision. Returns 1 if successful. */
+static int redisSupervisedUpstart(void) {
+ const char *upstart_job = getenv("UPSTART_JOB");
+
+ if (!upstart_job) {
+ serverLog(LL_WARNING,
+ "upstart supervision requested, but UPSTART_JOB not found!");
+ return 0;
+ }
+
+ serverLog(LL_NOTICE, "supervised by upstart, will stop to signal readiness.");
+ raise(SIGSTOP);
+ unsetenv("UPSTART_JOB");
+ return 1;
+}
+
+/* Attempt to set up systemd supervision. Returns 1 if successful. */
+static int redisSupervisedSystemd(void) {
+#ifndef HAVE_LIBSYSTEMD
+ serverLog(LL_WARNING,
+ "systemd supervision requested or auto-detected, but Redis is compiled without libsystemd support!");
+ return 0;
+#else
+ if (redisCommunicateSystemd("STATUS=Redis is loading...\n") <= 0)
+ return 0;
+ serverLog(LL_NOTICE,
+ "Supervised by systemd. Please make sure you set appropriate values for TimeoutStartSec and TimeoutStopSec in your service unit.");
+ return 1;
+#endif
+}
+
+int redisIsSupervised(int mode) {
+ int ret = 0;
+
+ if (mode == SUPERVISED_AUTODETECT) {
+ if (getenv("UPSTART_JOB")) {
+ serverLog(LL_VERBOSE, "Upstart supervision detected.");
+ mode = SUPERVISED_UPSTART;
+ } else if (getenv("NOTIFY_SOCKET")) {
+ serverLog(LL_VERBOSE, "Systemd supervision detected.");
+ mode = SUPERVISED_SYSTEMD;
+ }
+ }
+
+ switch (mode) {
+ case SUPERVISED_UPSTART:
+ ret = redisSupervisedUpstart();
+ break;
+ case SUPERVISED_SYSTEMD:
+ ret = redisSupervisedSystemd();
+ break;
+ default:
+ break;
+ }
+
+ if (ret)
+ server.supervised_mode = mode;
+
+ return ret;
+}
+
+int iAmMaster(void) {
+ return ((!server.cluster_enabled && server.masterhost == NULL) ||
+ (server.cluster_enabled && nodeIsMaster(server.cluster->myself)));
+}
+
+#ifdef REDIS_TEST
+#include "testhelp.h"
+#include "intset.h" /* Compact integer set structure */
+
+int __failed_tests = 0;
+int __test_num = 0;
+
+/* The flags are the following:
+* --accurate: Runs tests with more iterations.
+* --large-memory: Enables tests that consume more than 100mb. */
+typedef int redisTestProc(int argc, char **argv, int flags);
+struct redisTest {
+ char *name;
+ redisTestProc *proc;
+ int failed;
+} redisTests[] = {
+ {"ziplist", ziplistTest},
+ {"quicklist", quicklistTest},
+ {"intset", intsetTest},
+ {"zipmap", zipmapTest},
+ {"sha1test", sha1Test},
+ {"util", utilTest},
+ {"endianconv", endianconvTest},
+ {"crc64", crc64Test},
+ {"zmalloc", zmalloc_test},
+ {"sds", sdsTest},
+ {"dict", dictTest},
+ {"listpack", listpackTest}
+};
+redisTestProc *getTestProcByName(const char *name) {
+ int numtests = sizeof(redisTests)/sizeof(struct redisTest);
+ for (int j = 0; j < numtests; j++) {
+ if (!strcasecmp(name,redisTests[j].name)) {
+ return redisTests[j].proc;
+ }
+ }
+ return NULL;
+}
+#endif
+
+int main(int argc, char **argv) {
+ struct timeval tv;
+ int j;
+ char config_from_stdin = 0;
+
+#ifdef REDIS_TEST
+ if (argc >= 3 && !strcasecmp(argv[1], "test")) {
+ int flags = 0;
+ for (j = 3; j < argc; j++) {
+ char *arg = argv[j];
+ if (!strcasecmp(arg, "--accurate")) flags |= REDIS_TEST_ACCURATE;
+ else if (!strcasecmp(arg, "--large-memory")) flags |= REDIS_TEST_LARGE_MEMORY;
+ else if (!strcasecmp(arg, "--valgrind")) flags |= REDIS_TEST_VALGRIND;
+ }
+
+ if (!strcasecmp(argv[2], "all")) {
+ int numtests = sizeof(redisTests)/sizeof(struct redisTest);
+ for (j = 0; j < numtests; j++) {
+ redisTests[j].failed = (redisTests[j].proc(argc,argv,flags) != 0);
+ }
+
+ /* Report tests result */
+ int failed_num = 0;
+ for (j = 0; j < numtests; j++) {
+ if (redisTests[j].failed) {
+ failed_num++;
+ printf("[failed] Test - %s\n", redisTests[j].name);
+ } else {
+ printf("[ok] Test - %s\n", redisTests[j].name);
+ }
+ }
+
+ printf("%d tests, %d passed, %d failed\n", numtests,
+ numtests-failed_num, failed_num);
+
+ return failed_num == 0 ? 0 : 1;
+ } else {
+ redisTestProc *proc = getTestProcByName(argv[2]);
+ if (!proc) return -1; /* test not found */
+ return proc(argc,argv,flags);
+ }
+
+ return 0;
+ }
+#endif
+
+ /* We need to initialize our libraries, and the server configuration. */
+#ifdef INIT_SETPROCTITLE_REPLACEMENT
+ spt_init(argc, argv);
+#endif
+ tzset(); /* Populates 'timezone' global. */
+ zmalloc_set_oom_handler(redisOutOfMemoryHandler);
+
+ /* To achieve entropy, in case of containers, their time() and getpid() can
+ * be the same. But value of tv_usec is fast enough to make the difference */
+ gettimeofday(&tv,NULL);
+ srand(time(NULL)^getpid()^tv.tv_usec);
+ srandom(time(NULL)^getpid()^tv.tv_usec);
+ init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
+ crc64_init();
+
+ /* Store umask value. Because umask(2) only offers a set-and-get API we have
+ * to reset it and restore it back. We do this early to avoid a potential
+ * race condition with threads that could be creating files or directories.
+ */
+ umask(server.umask = umask(0777));
+
+ uint8_t hashseed[16];
+ getRandomBytes(hashseed,sizeof(hashseed));
+ dictSetHashFunctionSeed(hashseed);
+
+ char *exec_name = strrchr(argv[0], '/');
+ if (exec_name == NULL) exec_name = argv[0];
+ server.sentinel_mode = checkForSentinelMode(argc,argv, exec_name);
+ initServerConfig();
+ ACLInit(); /* The ACL subsystem must be initialized ASAP because the
+ basic networking code and client creation depends on it. */
+ moduleInitModulesSystem();
+ connTypeInitialize();
+
+ /* Store the executable path and arguments in a safe place in order
+ * to be able to restart the server later. */
+ server.executable = getAbsolutePath(argv[0]);
+ server.exec_argv = zmalloc(sizeof(char*)*(argc+1));
+ server.exec_argv[argc] = NULL;
+ for (j = 0; j < argc; j++) server.exec_argv[j] = zstrdup(argv[j]);
+
+ /* We need to init sentinel right now as parsing the configuration file
+ * in sentinel mode will have the effect of populating the sentinel
+ * data structures with master nodes to monitor. */
+ if (server.sentinel_mode) {
+ initSentinelConfig();
+ initSentinel();
+ }
+
+ /* Check if we need to start in redis-check-rdb/aof mode. We just execute
+ * the program main. However the program is part of the Redis executable
+ * so that we can easily execute an RDB check on loading errors. */
+ if (strstr(exec_name,"redis-check-rdb") != NULL)
+ redis_check_rdb_main(argc,argv,NULL);
+ else if (strstr(exec_name,"redis-check-aof") != NULL)
+ redis_check_aof_main(argc,argv);
+
+ if (argc >= 2) {
+ j = 1; /* First option to parse in argv[] */
+ sds options = sdsempty();
+
+ /* Handle special options --help and --version */
+ if (strcmp(argv[1], "-v") == 0 ||
+ strcmp(argv[1], "--version") == 0) version();
+ if (strcmp(argv[1], "--help") == 0 ||
+ strcmp(argv[1], "-h") == 0) usage();
+ if (strcmp(argv[1], "--test-memory") == 0) {
+ if (argc == 3) {
+ memtest(atoi(argv[2]),50);
+ exit(0);
+ } else {
+ fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
+ fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
+ exit(1);
+ }
+ } if (strcmp(argv[1], "--check-system") == 0) {
+ exit(syscheck() ? 0 : 1);
+ }
+ /* Parse command line options
+ * Precedence wise, File, stdin, explicit options -- last config is the one that matters.
+ *
+ * First argument is the config file name? */
+ if (argv[1][0] != '-') {
+ /* Replace the config file in server.exec_argv with its absolute path. */
+ server.configfile = getAbsolutePath(argv[1]);
+ zfree(server.exec_argv[1]);
+ server.exec_argv[1] = zstrdup(server.configfile);
+ j = 2; // Skip this arg when parsing options
+ }
+ sds *argv_tmp;
+ int argc_tmp;
+ int handled_last_config_arg = 1;
+ while(j < argc) {
+ /* Either first or last argument - Should we read config from stdin? */
+ if (argv[j][0] == '-' && argv[j][1] == '\0' && (j == 1 || j == argc-1)) {
+ config_from_stdin = 1;
+ }
+ /* All the other options are parsed and conceptually appended to the
+ * configuration file. For instance --port 6380 will generate the
+ * string "port 6380\n" to be parsed after the actual config file
+ * and stdin input are parsed (if they exist).
+ * Only consider that if the last config has at least one argument. */
+ else if (handled_last_config_arg && argv[j][0] == '-' && argv[j][1] == '-') {
+ /* Option name */
+ if (sdslen(options)) options = sdscat(options,"\n");
+ /* argv[j]+2 for removing the preceding `--` */
+ options = sdscat(options,argv[j]+2);
+ options = sdscat(options," ");
+
+ argv_tmp = sdssplitargs(argv[j], &argc_tmp);
+ if (argc_tmp == 1) {
+ /* Means that we only have one option name, like --port or "--port " */
+ handled_last_config_arg = 0;
+
+ if ((j != argc-1) && argv[j+1][0] == '-' && argv[j+1][1] == '-' &&
+ !strcasecmp(argv[j], "--save"))
+ {
+ /* Special case: handle some things like `--save --config value`.
+ * In this case, if next argument starts with `--`, we will reset
+ * handled_last_config_arg flag and append an empty "" config value
+ * to the options, so it will become `--save "" --config value`.
+ * We are doing it to be compatible with pre 7.0 behavior (which we
+ * break it in #10660, 7.0.1), since there might be users who generate
+ * a command line from an array and when it's empty that's what they produce. */
+ options = sdscat(options, "\"\"");
+ handled_last_config_arg = 1;
+ }
+ else if ((j == argc-1) && !strcasecmp(argv[j], "--save")) {
+ /* Special case: when empty save is the last argument.
+ * In this case, we append an empty "" config value to the options,
+ * so it will become `--save ""` and will follow the same reset thing. */
+ options = sdscat(options, "\"\"");
+ }
+ else if ((j != argc-1) && argv[j+1][0] == '-' && argv[j+1][1] == '-' &&
+ !strcasecmp(argv[j], "--sentinel"))
+ {
+ /* Special case: handle some things like `--sentinel --config value`.
+ * It is a pseudo config option with no value. In this case, if next
+ * argument starts with `--`, we will reset handled_last_config_arg flag.
+ * We are doing it to be compatible with pre 7.0 behavior (which we
+ * break it in #10660, 7.0.1). */
+ options = sdscat(options, "");
+ handled_last_config_arg = 1;
+ }
+ else if ((j == argc-1) && !strcasecmp(argv[j], "--sentinel")) {
+ /* Special case: when --sentinel is the last argument.
+ * It is a pseudo config option with no value. In this case, do nothing.
+ * We are doing it to be compatible with pre 7.0 behavior (which we
+ * break it in #10660, 7.0.1). */
+ options = sdscat(options, "");
+ }
+ } else {
+ /* Means that we are passing both config name and it's value in the same arg,
+ * like "--port 6380", so we need to reset handled_last_config_arg flag. */
+ handled_last_config_arg = 1;
+ }
+ sdsfreesplitres(argv_tmp, argc_tmp);
+ } else {
+ /* Option argument */
+ options = sdscatrepr(options,argv[j],strlen(argv[j]));
+ options = sdscat(options," ");
+ handled_last_config_arg = 1;
+ }
+ j++;
+ }
+
+ loadServerConfig(server.configfile, config_from_stdin, options);
+ if (server.sentinel_mode) loadSentinelConfigFromQueue();
+ sdsfree(options);
+ }
+ if (server.sentinel_mode) sentinelCheckConfigFile();
+
+ /* Do system checks */
+#ifdef __linux__
+ linuxMemoryWarnings();
+ sds err_msg = NULL;
+ if (checkXenClocksource(&err_msg) < 0) {
+ serverLog(LL_WARNING, "WARNING %s", err_msg);
+ sdsfree(err_msg);
+ }
+#if defined (__arm64__)
+ int ret;
+ if ((ret = checkLinuxMadvFreeForkBug(&err_msg)) <= 0) {
+ if (ret < 0) {
+ serverLog(LL_WARNING, "WARNING %s", err_msg);
+ sdsfree(err_msg);
+ } else
+ serverLog(LL_WARNING, "Failed to test the kernel for a bug that could lead to data corruption during background save. "
+ "Your system could be affected, please report this error.");
+ if (!checkIgnoreWarning("ARM64-COW-BUG")) {
+ serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. "
+ "Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG");
+ exit(1);
+ }
+ }
+#endif /* __arm64__ */
+#endif /* __linux__ */
+
+ /* Daemonize if needed */
+ server.supervised = redisIsSupervised(server.supervised_mode);
+ int background = server.daemonize && !server.supervised;
+ if (background) daemonize();
+
+ serverLog(LL_NOTICE, "oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo");
+ serverLog(LL_NOTICE,
+ "Redis version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started",
+ REDIS_VERSION,
+ (sizeof(long) == 8) ? 64 : 32,
+ redisGitSHA1(),
+ strtol(redisGitDirty(),NULL,10) > 0,
+ (int)getpid());
+
+ if (argc == 1) {
+ serverLog(LL_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/redis.conf", argv[0]);
+ } else {
+ serverLog(LL_NOTICE, "Configuration loaded");
+ }
+
+ initServer();
+ if (background || server.pidfile) createPidFile();
+ if (server.set_proc_title) redisSetProcTitle(NULL);
+ redisAsciiArt();
+ checkTcpBacklogSettings();
+ if (server.cluster_enabled) {
+ clusterInit();
+ }
+ if (!server.sentinel_mode) {
+ moduleInitModulesSystemLast();
+ moduleLoadFromQueue();
+ }
+ ACLLoadUsersAtStartup();
+ initListeners();
+ if (server.cluster_enabled) {
+ clusterInitListeners();
+ }
+ InitServerLast();
+
+ if (!server.sentinel_mode) {
+ /* Things not needed when running in Sentinel mode. */
+ serverLog(LL_NOTICE,"Server initialized");
+ aofLoadManifestFromDisk();
+ loadDataFromDisk();
+ aofOpenIfNeededOnServerStart();
+ aofDelHistoryFiles();
+ if (server.cluster_enabled) {
+ serverAssert(verifyClusterConfigWithData() == C_OK);
+ }
+
+ for (j = 0; j < CONN_TYPE_MAX; j++) {
+ connListener *listener = &server.listeners[j];
+ if (listener->ct == NULL)
+ continue;
+
+ serverLog(LL_NOTICE,"Ready to accept connections %s", listener->ct->get_type(NULL));
+ }
+
+ if (server.supervised_mode == SUPERVISED_SYSTEMD) {
+ if (!server.masterhost) {
+ redisCommunicateSystemd("STATUS=Ready to accept connections\n");
+ } else {
+ redisCommunicateSystemd("STATUS=Ready to accept connections in read-only mode. Waiting for MASTER <-> REPLICA sync\n");
+ }
+ redisCommunicateSystemd("READY=1\n");
+ }
+ } else {
+ sentinelIsRunning();
+ if (server.supervised_mode == SUPERVISED_SYSTEMD) {
+ redisCommunicateSystemd("STATUS=Ready to accept connections\n");
+ redisCommunicateSystemd("READY=1\n");
+ }
+ }
+
+ /* Warning the user about suspicious maxmemory setting. */
+ if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
+ serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
+ }
+
+ redisSetCpuAffinity(server.server_cpulist);
+ setOOMScoreAdj(-1);
+
+ aeMain(server.el);
+ aeDeleteEventLoop(server.el);
+ return 0;
+}
+
+/* The End */
diff --git a/src/server.h b/src/server.h
new file mode 100644
index 0000000..cb55503
--- /dev/null
+++ b/src/server.h
@@ -0,0 +1,3744 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __REDIS_H
+#define __REDIS_H
+
+#include "fmacros.h"
+#include "config.h"
+#include "solarisfixes.h"
+#include "rio.h"
+#include "atomicvar.h"
+#include "commands.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <time.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <lua.h>
+#include <signal.h>
+
+#ifdef HAVE_LIBSYSTEMD
+#include <systemd/sd-daemon.h>
+#endif
+
+#ifndef static_assert
+#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1]
+#endif
+
+typedef long long mstime_t; /* millisecond time type. */
+typedef long long ustime_t; /* microsecond time type. */
+
+#include "ae.h" /* Event driven programming library */
+#include "sds.h" /* Dynamic safe strings */
+#include "dict.h" /* Hash tables */
+#include "adlist.h" /* Linked lists */
+#include "zmalloc.h" /* total memory usage aware version of malloc/free */
+#include "anet.h" /* Networking the easy way */
+#include "version.h" /* Version macro */
+#include "util.h" /* Misc functions useful in many places */
+#include "latency.h" /* Latency monitor API */
+#include "sparkline.h" /* ASCII graphs API */
+#include "quicklist.h" /* Lists are encoded as linked lists of
+ N-elements flat arrays */
+#include "rax.h" /* Radix tree */
+#include "connection.h" /* Connection abstraction */
+
+#define REDISMODULE_CORE 1
+typedef struct redisObject robj;
+#include "redismodule.h" /* Redis modules API defines. */
+
+/* Following includes allow test functions to be called from Redis main() */
+#include "zipmap.h"
+#include "ziplist.h" /* Compact list data structure */
+#include "sha1.h"
+#include "endianconv.h"
+#include "crc64.h"
+
+struct hdr_histogram;
+
+/* helpers */
+#define numElements(x) (sizeof(x)/sizeof((x)[0]))
+
+/* min/max */
+#undef min
+#undef max
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+
+/* Get the pointer of the outer struct from a member address */
+#define redis_member2struct(struct_name, member_name, member_addr) \
+ ((struct_name *)((char*)member_addr - offsetof(struct_name, member_name)))
+
+/* Error codes */
+#define C_OK 0
+#define C_ERR -1
+
+/* Static server configuration */
+#define CONFIG_DEFAULT_HZ 10 /* Time interrupt calls/sec. */
+#define CONFIG_MIN_HZ 1
+#define CONFIG_MAX_HZ 500
+#define MAX_CLIENTS_PER_CLOCK_TICK 200 /* HZ is adapted based on that. */
+#define CRON_DBS_PER_CALL 16
+#define NET_MAX_WRITES_PER_EVENT (1024*64)
+#define PROTO_SHARED_SELECT_CMDS 10
+#define OBJ_SHARED_INTEGERS 10000
+#define OBJ_SHARED_BULKHDR_LEN 32
+#define OBJ_SHARED_HDR_STRLEN(_len_) (((_len_) < 10) ? 4 : 5) /* see shared.mbulkhdr etc. */
+#define LOG_MAX_LEN 1024 /* Default maximum length of syslog messages.*/
+#define AOF_REWRITE_ITEMS_PER_CMD 64
+#define AOF_ANNOTATION_LINE_MAX_LEN 1024
+#define CONFIG_RUN_ID_SIZE 40
+#define RDB_EOF_MARK_SIZE 40
+#define CONFIG_REPL_BACKLOG_MIN_SIZE (1024*16) /* 16k */
+#define CONFIG_BGSAVE_RETRY_DELAY 5 /* Wait a few secs before trying again. */
+#define CONFIG_DEFAULT_PID_FILE "/var/run/redis.pid"
+#define CONFIG_DEFAULT_BINDADDR_COUNT 2
+#define CONFIG_DEFAULT_BINDADDR { "*", "-::*" }
+#define NET_HOST_STR_LEN 256 /* Longest valid hostname */
+#define NET_IP_STR_LEN 46 /* INET6_ADDRSTRLEN is 46, but we need to be sure */
+#define NET_ADDR_STR_LEN (NET_IP_STR_LEN+32) /* Must be enough for ip:port */
+#define NET_HOST_PORT_STR_LEN (NET_HOST_STR_LEN+32) /* Must be enough for hostname:port */
+#define CONFIG_BINDADDR_MAX 16
+#define CONFIG_MIN_RESERVED_FDS 32
+#define CONFIG_DEFAULT_PROC_TITLE_TEMPLATE "{title} {listen-addr} {server-mode}"
+
+/* Bucket sizes for client eviction pools. Each bucket stores clients with
+ * memory usage of up to twice the size of the bucket below it. */
+#define CLIENT_MEM_USAGE_BUCKET_MIN_LOG 15 /* Bucket sizes start at up to 32KB (2^15) */
+#define CLIENT_MEM_USAGE_BUCKET_MAX_LOG 33 /* Bucket for largest clients: sizes above 4GB (2^32) */
+#define CLIENT_MEM_USAGE_BUCKETS (1+CLIENT_MEM_USAGE_BUCKET_MAX_LOG-CLIENT_MEM_USAGE_BUCKET_MIN_LOG)
+
+#define ACTIVE_EXPIRE_CYCLE_SLOW 0
+#define ACTIVE_EXPIRE_CYCLE_FAST 1
+
+/* Children process will exit with this status code to signal that the
+ * process terminated without an error: this is useful in order to kill
+ * a saving child (RDB or AOF one), without triggering in the parent the
+ * write protection that is normally turned on on write errors.
+ * Usually children that are terminated with SIGUSR1 will exit with this
+ * special code. */
+#define SERVER_CHILD_NOERROR_RETVAL 255
+
+/* Reading copy-on-write info is sometimes expensive and may slow down child
+ * processes that report it continuously. We measure the cost of obtaining it
+ * and hold back additional reading based on this factor. */
+#define CHILD_COW_DUTY_CYCLE 100
+
+/* Instantaneous metrics tracking. */
+#define STATS_METRIC_SAMPLES 16 /* Number of samples per metric. */
+#define STATS_METRIC_COMMAND 0 /* Number of commands executed. */
+#define STATS_METRIC_NET_INPUT 1 /* Bytes read to network. */
+#define STATS_METRIC_NET_OUTPUT 2 /* Bytes written to network. */
+#define STATS_METRIC_NET_INPUT_REPLICATION 3 /* Bytes read to network during replication. */
+#define STATS_METRIC_NET_OUTPUT_REPLICATION 4 /* Bytes written to network during replication. */
+#define STATS_METRIC_EL_CYCLE 5 /* Number of eventloop cycled. */
+#define STATS_METRIC_EL_DURATION 6 /* Eventloop duration. */
+#define STATS_METRIC_COUNT 7
+
+/* Protocol and I/O related defines */
+#define PROTO_IOBUF_LEN (1024*16) /* Generic I/O buffer size */
+#define PROTO_REPLY_CHUNK_BYTES (16*1024) /* 16k output buffer */
+#define PROTO_INLINE_MAX_SIZE (1024*64) /* Max size of inline reads */
+#define PROTO_MBULK_BIG_ARG (1024*32)
+#define PROTO_RESIZE_THRESHOLD (1024*32) /* Threshold for determining whether to resize query buffer */
+#define PROTO_REPLY_MIN_BYTES (1024) /* the lower limit on reply buffer size */
+#define REDIS_AUTOSYNC_BYTES (1024*1024*4) /* Sync file every 4MB. */
+
+#define REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME 5000 /* 5 seconds */
+
+/* When configuring the server eventloop, we setup it so that the total number
+ * of file descriptors we can handle are server.maxclients + RESERVED_FDS +
+ * a few more to stay safe. Since RESERVED_FDS defaults to 32, we add 96
+ * in order to make sure of not over provisioning more than 128 fds. */
+#define CONFIG_FDSET_INCR (CONFIG_MIN_RESERVED_FDS+96)
+
+/* OOM Score Adjustment classes. */
+#define CONFIG_OOM_MASTER 0
+#define CONFIG_OOM_REPLICA 1
+#define CONFIG_OOM_BGCHILD 2
+#define CONFIG_OOM_COUNT 3
+
+extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
+
+/* Hash table parameters */
+#define HASHTABLE_MIN_FILL 10 /* Minimal hash table fill 10% */
+#define HASHTABLE_MAX_LOAD_FACTOR 1.618 /* Maximum hash table load factor. */
+
+/* Command flags. Please check the definition of struct redisCommand in this file
+ * for more information about the meaning of every flag. */
+#define CMD_WRITE (1ULL<<0)
+#define CMD_READONLY (1ULL<<1)
+#define CMD_DENYOOM (1ULL<<2)
+#define CMD_MODULE (1ULL<<3) /* Command exported by module. */
+#define CMD_ADMIN (1ULL<<4)
+#define CMD_PUBSUB (1ULL<<5)
+#define CMD_NOSCRIPT (1ULL<<6)
+#define CMD_BLOCKING (1ULL<<8) /* Has potential to block. */
+#define CMD_LOADING (1ULL<<9)
+#define CMD_STALE (1ULL<<10)
+#define CMD_SKIP_MONITOR (1ULL<<11)
+#define CMD_SKIP_SLOWLOG (1ULL<<12)
+#define CMD_ASKING (1ULL<<13)
+#define CMD_FAST (1ULL<<14)
+#define CMD_NO_AUTH (1ULL<<15)
+#define CMD_MAY_REPLICATE (1ULL<<16)
+#define CMD_SENTINEL (1ULL<<17)
+#define CMD_ONLY_SENTINEL (1ULL<<18)
+#define CMD_NO_MANDATORY_KEYS (1ULL<<19)
+#define CMD_PROTECTED (1ULL<<20)
+#define CMD_MODULE_GETKEYS (1ULL<<21) /* Use the modules getkeys interface. */
+#define CMD_MODULE_NO_CLUSTER (1ULL<<22) /* Deny on Redis Cluster. */
+#define CMD_NO_ASYNC_LOADING (1ULL<<23)
+#define CMD_NO_MULTI (1ULL<<24)
+#define CMD_MOVABLE_KEYS (1ULL<<25) /* The legacy range spec doesn't cover all keys.
+ * Populated by populateCommandLegacyRangeSpec. */
+#define CMD_ALLOW_BUSY ((1ULL<<26))
+#define CMD_MODULE_GETCHANNELS (1ULL<<27) /* Use the modules getchannels interface. */
+#define CMD_TOUCHES_ARBITRARY_KEYS (1ULL<<28)
+
+/* Command flags that describe ACLs categories. */
+#define ACL_CATEGORY_KEYSPACE (1ULL<<0)
+#define ACL_CATEGORY_READ (1ULL<<1)
+#define ACL_CATEGORY_WRITE (1ULL<<2)
+#define ACL_CATEGORY_SET (1ULL<<3)
+#define ACL_CATEGORY_SORTEDSET (1ULL<<4)
+#define ACL_CATEGORY_LIST (1ULL<<5)
+#define ACL_CATEGORY_HASH (1ULL<<6)
+#define ACL_CATEGORY_STRING (1ULL<<7)
+#define ACL_CATEGORY_BITMAP (1ULL<<8)
+#define ACL_CATEGORY_HYPERLOGLOG (1ULL<<9)
+#define ACL_CATEGORY_GEO (1ULL<<10)
+#define ACL_CATEGORY_STREAM (1ULL<<11)
+#define ACL_CATEGORY_PUBSUB (1ULL<<12)
+#define ACL_CATEGORY_ADMIN (1ULL<<13)
+#define ACL_CATEGORY_FAST (1ULL<<14)
+#define ACL_CATEGORY_SLOW (1ULL<<15)
+#define ACL_CATEGORY_BLOCKING (1ULL<<16)
+#define ACL_CATEGORY_DANGEROUS (1ULL<<17)
+#define ACL_CATEGORY_CONNECTION (1ULL<<18)
+#define ACL_CATEGORY_TRANSACTION (1ULL<<19)
+#define ACL_CATEGORY_SCRIPTING (1ULL<<20)
+
+/* Key-spec flags *
+ * -------------- */
+/* The following refer what the command actually does with the value or metadata
+ * of the key, and not necessarily the user data or how it affects it.
+ * Each key-spec may must have exactly one of these. Any operation that's not
+ * distinctly deletion, overwrite or read-only would be marked as RW. */
+#define CMD_KEY_RO (1ULL<<0) /* Read-Only - Reads the value of the key, but
+ * doesn't necessarily returns it. */
+#define CMD_KEY_RW (1ULL<<1) /* Read-Write - Modifies the data stored in the
+ * value of the key or its metadata. */
+#define CMD_KEY_OW (1ULL<<2) /* Overwrite - Overwrites the data stored in
+ * the value of the key. */
+#define CMD_KEY_RM (1ULL<<3) /* Deletes the key. */
+/* The following refer to user data inside the value of the key, not the metadata
+ * like LRU, type, cardinality. It refers to the logical operation on the user's
+ * data (actual input strings / TTL), being used / returned / copied / changed,
+ * It doesn't refer to modification or returning of metadata (like type, count,
+ * presence of data). Any write that's not INSERT or DELETE, would be an UPDATE.
+ * Each key-spec may have one of the writes with or without access, or none: */
+#define CMD_KEY_ACCESS (1ULL<<4) /* Returns, copies or uses the user data from
+ * the value of the key. */
+#define CMD_KEY_UPDATE (1ULL<<5) /* Updates data to the value, new value may
+ * depend on the old value. */
+#define CMD_KEY_INSERT (1ULL<<6) /* Adds data to the value with no chance of
+ * modification or deletion of existing data. */
+#define CMD_KEY_DELETE (1ULL<<7) /* Explicitly deletes some content
+ * from the value of the key. */
+/* Other flags: */
+#define CMD_KEY_NOT_KEY (1ULL<<8) /* A 'fake' key that should be routed
+ * like a key in cluster mode but is
+ * excluded from other key checks. */
+#define CMD_KEY_INCOMPLETE (1ULL<<9) /* Means that the keyspec might not point
+ * out to all keys it should cover */
+#define CMD_KEY_VARIABLE_FLAGS (1ULL<<10) /* Means that some keys might have
+ * different flags depending on arguments */
+
+/* Key flags for when access type is unknown */
+#define CMD_KEY_FULL_ACCESS (CMD_KEY_RW | CMD_KEY_ACCESS | CMD_KEY_UPDATE)
+
+/* Key flags for how key is removed */
+#define DB_FLAG_KEY_NONE 0
+#define DB_FLAG_KEY_DELETED (1ULL<<0)
+#define DB_FLAG_KEY_EXPIRED (1ULL<<1)
+#define DB_FLAG_KEY_EVICTED (1ULL<<2)
+#define DB_FLAG_KEY_OVERWRITE (1ULL<<3)
+
+/* Channel flags share the same flag space as the key flags */
+#define CMD_CHANNEL_PATTERN (1ULL<<11) /* The argument is a channel pattern */
+#define CMD_CHANNEL_SUBSCRIBE (1ULL<<12) /* The command subscribes to channels */
+#define CMD_CHANNEL_UNSUBSCRIBE (1ULL<<13) /* The command unsubscribes to channels */
+#define CMD_CHANNEL_PUBLISH (1ULL<<14) /* The command publishes to channels. */
+
+/* AOF states */
+#define AOF_OFF 0 /* AOF is off */
+#define AOF_ON 1 /* AOF is on */
+#define AOF_WAIT_REWRITE 2 /* AOF waits rewrite to start appending */
+
+/* AOF return values for loadAppendOnlyFiles() and loadSingleAppendOnlyFile() */
+#define AOF_OK 0
+#define AOF_NOT_EXIST 1
+#define AOF_EMPTY 2
+#define AOF_OPEN_ERR 3
+#define AOF_FAILED 4
+#define AOF_TRUNCATED 5
+
+/* RDB return values for rdbLoad. */
+#define RDB_OK 0
+#define RDB_NOT_EXIST 1 /* RDB file doesn't exist. */
+#define RDB_FAILED 2 /* Failed to load the RDB file. */
+
+/* Command doc flags */
+#define CMD_DOC_NONE 0
+#define CMD_DOC_DEPRECATED (1<<0) /* Command is deprecated */
+#define CMD_DOC_SYSCMD (1<<1) /* System (internal) command */
+
+/* Client flags */
+#define CLIENT_SLAVE (1<<0) /* This client is a replica */
+#define CLIENT_MASTER (1<<1) /* This client is a master */
+#define CLIENT_MONITOR (1<<2) /* This client is a slave monitor, see MONITOR */
+#define CLIENT_MULTI (1<<3) /* This client is in a MULTI context */
+#define CLIENT_BLOCKED (1<<4) /* The client is waiting in a blocking operation */
+#define CLIENT_DIRTY_CAS (1<<5) /* Watched keys modified. EXEC will fail. */
+#define CLIENT_CLOSE_AFTER_REPLY (1<<6) /* Close after writing entire reply. */
+#define CLIENT_UNBLOCKED (1<<7) /* This client was unblocked and is stored in
+ server.unblocked_clients */
+#define CLIENT_SCRIPT (1<<8) /* This is a non connected client used by Lua */
+#define CLIENT_ASKING (1<<9) /* Client issued the ASKING command */
+#define CLIENT_CLOSE_ASAP (1<<10)/* Close this client ASAP */
+#define CLIENT_UNIX_SOCKET (1<<11) /* Client connected via Unix domain socket */
+#define CLIENT_DIRTY_EXEC (1<<12) /* EXEC will fail for errors while queueing */
+#define CLIENT_MASTER_FORCE_REPLY (1<<13) /* Queue replies even if is master */
+#define CLIENT_FORCE_AOF (1<<14) /* Force AOF propagation of current cmd. */
+#define CLIENT_FORCE_REPL (1<<15) /* Force replication of current cmd. */
+#define CLIENT_PRE_PSYNC (1<<16) /* Instance don't understand PSYNC. */
+#define CLIENT_READONLY (1<<17) /* Cluster client is in read-only state. */
+#define CLIENT_PUBSUB (1<<18) /* Client is in Pub/Sub mode. */
+#define CLIENT_PREVENT_AOF_PROP (1<<19) /* Don't propagate to AOF. */
+#define CLIENT_PREVENT_REPL_PROP (1<<20) /* Don't propagate to slaves. */
+#define CLIENT_PREVENT_PROP (CLIENT_PREVENT_AOF_PROP|CLIENT_PREVENT_REPL_PROP)
+#define CLIENT_PENDING_WRITE (1<<21) /* Client has output to send but a write
+ handler is yet not installed. */
+#define CLIENT_REPLY_OFF (1<<22) /* Don't send replies to client. */
+#define CLIENT_REPLY_SKIP_NEXT (1<<23) /* Set CLIENT_REPLY_SKIP for next cmd */
+#define CLIENT_REPLY_SKIP (1<<24) /* Don't send just this reply. */
+#define CLIENT_LUA_DEBUG (1<<25) /* Run EVAL in debug mode. */
+#define CLIENT_LUA_DEBUG_SYNC (1<<26) /* EVAL debugging without fork() */
+#define CLIENT_MODULE (1<<27) /* Non connected client used by some module. */
+#define CLIENT_PROTECTED (1<<28) /* Client should not be freed for now. */
+#define CLIENT_EXECUTING_COMMAND (1<<29) /* Indicates that the client is currently in the process of handling
+ a command. usually this will be marked only during call()
+ however, blocked clients might have this flag kept until they
+ will try to reprocess the command. */
+
+#define CLIENT_PENDING_COMMAND (1<<30) /* Indicates the client has a fully
+ * parsed command ready for execution. */
+#define CLIENT_TRACKING (1ULL<<31) /* Client enabled keys tracking in order to
+ perform client side caching. */
+#define CLIENT_TRACKING_BROKEN_REDIR (1ULL<<32) /* Target client is invalid. */
+#define CLIENT_TRACKING_BCAST (1ULL<<33) /* Tracking in BCAST mode. */
+#define CLIENT_TRACKING_OPTIN (1ULL<<34) /* Tracking in opt-in mode. */
+#define CLIENT_TRACKING_OPTOUT (1ULL<<35) /* Tracking in opt-out mode. */
+#define CLIENT_TRACKING_CACHING (1ULL<<36) /* CACHING yes/no was given,
+ depending on optin/optout mode. */
+#define CLIENT_TRACKING_NOLOOP (1ULL<<37) /* Don't send invalidation messages
+ about writes performed by myself.*/
+#define CLIENT_IN_TO_TABLE (1ULL<<38) /* This client is in the timeout table. */
+#define CLIENT_PROTOCOL_ERROR (1ULL<<39) /* Protocol error chatting with it. */
+#define CLIENT_CLOSE_AFTER_COMMAND (1ULL<<40) /* Close after executing commands
+ * and writing entire reply. */
+#define CLIENT_DENY_BLOCKING (1ULL<<41) /* Indicate that the client should not be blocked.
+ currently, turned on inside MULTI, Lua, RM_Call,
+ and AOF client */
+#define CLIENT_REPL_RDBONLY (1ULL<<42) /* This client is a replica that only wants
+ RDB without replication buffer. */
+#define CLIENT_NO_EVICT (1ULL<<43) /* This client is protected against client
+ memory eviction. */
+#define CLIENT_ALLOW_OOM (1ULL<<44) /* Client used by RM_Call is allowed to fully execute
+ scripts even when in OOM */
+#define CLIENT_NO_TOUCH (1ULL<<45) /* This client will not touch LFU/LRU stats. */
+#define CLIENT_PUSHING (1ULL<<46) /* This client is pushing notifications. */
+#define CLIENT_MODULE_AUTH_HAS_RESULT (1ULL<<47) /* Indicates a client in the middle of module based
+ auth had been authenticated from the Module. */
+#define CLIENT_MODULE_PREVENT_AOF_PROP (1ULL<<48) /* Module client do not want to propagate to AOF */
+#define CLIENT_MODULE_PREVENT_REPL_PROP (1ULL<<49) /* Module client do not want to propagate to replica */
+
+/* Client block type (btype field in client structure)
+ * if CLIENT_BLOCKED flag is set. */
+typedef enum blocking_type {
+ BLOCKED_NONE, /* Not blocked, no CLIENT_BLOCKED flag set. */
+ BLOCKED_LIST, /* BLPOP & co. */
+ BLOCKED_WAIT, /* WAIT for synchronous replication. */
+ BLOCKED_WAITAOF, /* WAITAOF for AOF file fsync. */
+ BLOCKED_MODULE, /* Blocked by a loadable module. */
+ BLOCKED_STREAM, /* XREAD. */
+ BLOCKED_ZSET, /* BZPOP et al. */
+ BLOCKED_POSTPONE, /* Blocked by processCommand, re-try processing later. */
+ BLOCKED_SHUTDOWN, /* SHUTDOWN. */
+ BLOCKED_NUM, /* Number of blocked states. */
+ BLOCKED_END /* End of enumeration */
+} blocking_type;
+
+/* Client request types */
+#define PROTO_REQ_INLINE 1
+#define PROTO_REQ_MULTIBULK 2
+
+/* Client classes for client limits, currently used only for
+ * the max-client-output-buffer limit implementation. */
+#define CLIENT_TYPE_NORMAL 0 /* Normal req-reply clients + MONITORs */
+#define CLIENT_TYPE_SLAVE 1 /* Slaves. */
+#define CLIENT_TYPE_PUBSUB 2 /* Clients subscribed to PubSub channels. */
+#define CLIENT_TYPE_MASTER 3 /* Master. */
+#define CLIENT_TYPE_COUNT 4 /* Total number of client types. */
+#define CLIENT_TYPE_OBUF_COUNT 3 /* Number of clients to expose to output
+ buffer configuration. Just the first
+ three: normal, slave, pubsub. */
+
+/* Slave replication state. Used in server.repl_state for slaves to remember
+ * what to do next. */
+typedef enum {
+ REPL_STATE_NONE = 0, /* No active replication */
+ REPL_STATE_CONNECT, /* Must connect to master */
+ REPL_STATE_CONNECTING, /* Connecting to master */
+ /* --- Handshake states, must be ordered --- */
+ REPL_STATE_RECEIVE_PING_REPLY, /* Wait for PING reply */
+ REPL_STATE_SEND_HANDSHAKE, /* Send handshake sequence to master */
+ REPL_STATE_RECEIVE_AUTH_REPLY, /* Wait for AUTH reply */
+ REPL_STATE_RECEIVE_PORT_REPLY, /* Wait for REPLCONF reply */
+ REPL_STATE_RECEIVE_IP_REPLY, /* Wait for REPLCONF reply */
+ REPL_STATE_RECEIVE_CAPA_REPLY, /* Wait for REPLCONF reply */
+ REPL_STATE_SEND_PSYNC, /* Send PSYNC */
+ REPL_STATE_RECEIVE_PSYNC_REPLY, /* Wait for PSYNC reply */
+ /* --- End of handshake states --- */
+ REPL_STATE_TRANSFER, /* Receiving .rdb from master */
+ REPL_STATE_CONNECTED, /* Connected to master */
+} repl_state;
+
+/* The state of an in progress coordinated failover */
+typedef enum {
+ NO_FAILOVER = 0, /* No failover in progress */
+ FAILOVER_WAIT_FOR_SYNC, /* Waiting for target replica to catch up */
+ FAILOVER_IN_PROGRESS /* Waiting for target replica to accept
+ * PSYNC FAILOVER request. */
+} failover_state;
+
+/* State of slaves from the POV of the master. Used in client->replstate.
+ * In SEND_BULK and ONLINE state the slave receives new updates
+ * in its output queue. In the WAIT_BGSAVE states instead the server is waiting
+ * to start the next background saving in order to send updates to it. */
+#define SLAVE_STATE_WAIT_BGSAVE_START 6 /* We need to produce a new RDB file. */
+#define SLAVE_STATE_WAIT_BGSAVE_END 7 /* Waiting RDB file creation to finish. */
+#define SLAVE_STATE_SEND_BULK 8 /* Sending RDB file to slave. */
+#define SLAVE_STATE_ONLINE 9 /* RDB file transmitted, sending just updates. */
+#define SLAVE_STATE_RDB_TRANSMITTED 10 /* RDB file transmitted - This state is used only for
+ * a replica that only wants RDB without replication buffer */
+
+/* Slave capabilities. */
+#define SLAVE_CAPA_NONE 0
+#define SLAVE_CAPA_EOF (1<<0) /* Can parse the RDB EOF streaming format. */
+#define SLAVE_CAPA_PSYNC2 (1<<1) /* Supports PSYNC2 protocol. */
+
+/* Slave requirements */
+#define SLAVE_REQ_NONE 0
+#define SLAVE_REQ_RDB_EXCLUDE_DATA (1 << 0) /* Exclude data from RDB */
+#define SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS (1 << 1) /* Exclude functions from RDB */
+/* Mask of all bits in the slave requirements bitfield that represent non-standard (filtered) RDB requirements */
+#define SLAVE_REQ_RDB_MASK (SLAVE_REQ_RDB_EXCLUDE_DATA | SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS)
+
+/* Synchronous read timeout - slave side */
+#define CONFIG_REPL_SYNCIO_TIMEOUT 5
+
+/* The default number of replication backlog blocks to trim per call. */
+#define REPL_BACKLOG_TRIM_BLOCKS_PER_CALL 64
+
+/* In order to quickly find the requested offset for PSYNC requests,
+ * we index some nodes in the replication buffer linked list into a rax. */
+#define REPL_BACKLOG_INDEX_PER_BLOCKS 64
+
+/* List related stuff */
+#define LIST_HEAD 0
+#define LIST_TAIL 1
+#define ZSET_MIN 0
+#define ZSET_MAX 1
+
+/* Sort operations */
+#define SORT_OP_GET 0
+
+/* Log levels */
+#define LL_DEBUG 0
+#define LL_VERBOSE 1
+#define LL_NOTICE 2
+#define LL_WARNING 3
+#define LL_NOTHING 4
+#define LL_RAW (1<<10) /* Modifier to log without timestamp */
+
+/* Supervision options */
+#define SUPERVISED_NONE 0
+#define SUPERVISED_AUTODETECT 1
+#define SUPERVISED_SYSTEMD 2
+#define SUPERVISED_UPSTART 3
+
+/* Anti-warning macro... */
+#define UNUSED(V) ((void) V)
+
+#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^64 elements */
+#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
+
+/* Append only defines */
+#define AOF_FSYNC_NO 0
+#define AOF_FSYNC_ALWAYS 1
+#define AOF_FSYNC_EVERYSEC 2
+
+/* Replication diskless load defines */
+#define REPL_DISKLESS_LOAD_DISABLED 0
+#define REPL_DISKLESS_LOAD_WHEN_DB_EMPTY 1
+#define REPL_DISKLESS_LOAD_SWAPDB 2
+
+/* TLS Client Authentication */
+#define TLS_CLIENT_AUTH_NO 0
+#define TLS_CLIENT_AUTH_YES 1
+#define TLS_CLIENT_AUTH_OPTIONAL 2
+
+/* Sanitize dump payload */
+#define SANITIZE_DUMP_NO 0
+#define SANITIZE_DUMP_YES 1
+#define SANITIZE_DUMP_CLIENTS 2
+
+/* Enable protected config/command */
+#define PROTECTED_ACTION_ALLOWED_NO 0
+#define PROTECTED_ACTION_ALLOWED_YES 1
+#define PROTECTED_ACTION_ALLOWED_LOCAL 2
+
+/* Sets operations codes */
+#define SET_OP_UNION 0
+#define SET_OP_DIFF 1
+#define SET_OP_INTER 2
+
+/* oom-score-adj defines */
+#define OOM_SCORE_ADJ_NO 0
+#define OOM_SCORE_RELATIVE 1
+#define OOM_SCORE_ADJ_ABSOLUTE 2
+
+/* Redis maxmemory strategies. Instead of using just incremental number
+ * for this defines, we use a set of flags so that testing for certain
+ * properties common to multiple policies is faster. */
+#define MAXMEMORY_FLAG_LRU (1<<0)
+#define MAXMEMORY_FLAG_LFU (1<<1)
+#define MAXMEMORY_FLAG_ALLKEYS (1<<2)
+#define MAXMEMORY_FLAG_NO_SHARED_INTEGERS \
+ (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU)
+
+#define MAXMEMORY_VOLATILE_LRU ((0<<8)|MAXMEMORY_FLAG_LRU)
+#define MAXMEMORY_VOLATILE_LFU ((1<<8)|MAXMEMORY_FLAG_LFU)
+#define MAXMEMORY_VOLATILE_TTL (2<<8)
+#define MAXMEMORY_VOLATILE_RANDOM (3<<8)
+#define MAXMEMORY_ALLKEYS_LRU ((4<<8)|MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_ALLKEYS)
+#define MAXMEMORY_ALLKEYS_LFU ((5<<8)|MAXMEMORY_FLAG_LFU|MAXMEMORY_FLAG_ALLKEYS)
+#define MAXMEMORY_ALLKEYS_RANDOM ((6<<8)|MAXMEMORY_FLAG_ALLKEYS)
+#define MAXMEMORY_NO_EVICTION (7<<8)
+
+/* Units */
+#define UNIT_SECONDS 0
+#define UNIT_MILLISECONDS 1
+
+/* SHUTDOWN flags */
+#define SHUTDOWN_NOFLAGS 0 /* No flags. */
+#define SHUTDOWN_SAVE 1 /* Force SAVE on SHUTDOWN even if no save
+ points are configured. */
+#define SHUTDOWN_NOSAVE 2 /* Don't SAVE on SHUTDOWN. */
+#define SHUTDOWN_NOW 4 /* Don't wait for replicas to catch up. */
+#define SHUTDOWN_FORCE 8 /* Don't let errors prevent shutdown. */
+
+/* Command call flags, see call() function */
+#define CMD_CALL_NONE 0
+#define CMD_CALL_PROPAGATE_AOF (1<<0)
+#define CMD_CALL_PROPAGATE_REPL (1<<1)
+#define CMD_CALL_REPROCESSING (1<<2)
+#define CMD_CALL_FROM_MODULE (1<<3) /* From RM_Call */
+#define CMD_CALL_PROPAGATE (CMD_CALL_PROPAGATE_AOF|CMD_CALL_PROPAGATE_REPL)
+#define CMD_CALL_FULL (CMD_CALL_PROPAGATE)
+
+/* Command propagation flags, see propagateNow() function */
+#define PROPAGATE_NONE 0
+#define PROPAGATE_AOF 1
+#define PROPAGATE_REPL 2
+
+/* Actions pause types */
+#define PAUSE_ACTION_CLIENT_WRITE (1<<0)
+#define PAUSE_ACTION_CLIENT_ALL (1<<1) /* must be bigger than PAUSE_ACTION_CLIENT_WRITE */
+#define PAUSE_ACTION_EXPIRE (1<<2)
+#define PAUSE_ACTION_EVICT (1<<3)
+#define PAUSE_ACTION_REPLICA (1<<4) /* pause replica traffic */
+
+/* common sets of actions to pause/unpause */
+#define PAUSE_ACTIONS_CLIENT_WRITE_SET (PAUSE_ACTION_CLIENT_WRITE|\
+ PAUSE_ACTION_EXPIRE|\
+ PAUSE_ACTION_EVICT|\
+ PAUSE_ACTION_REPLICA)
+#define PAUSE_ACTIONS_CLIENT_ALL_SET (PAUSE_ACTION_CLIENT_ALL|\
+ PAUSE_ACTION_EXPIRE|\
+ PAUSE_ACTION_EVICT|\
+ PAUSE_ACTION_REPLICA)
+
+/* Client pause purposes. Each purpose has its own end time and pause type. */
+typedef enum {
+ PAUSE_BY_CLIENT_COMMAND = 0,
+ PAUSE_DURING_SHUTDOWN,
+ PAUSE_DURING_FAILOVER,
+ NUM_PAUSE_PURPOSES /* This value is the number of purposes above. */
+} pause_purpose;
+
+typedef struct {
+ uint32_t paused_actions; /* Bitmask of actions */
+ mstime_t end;
+} pause_event;
+
+/* Ways that a clusters endpoint can be described */
+typedef enum {
+ CLUSTER_ENDPOINT_TYPE_IP = 0, /* Show IP address */
+ CLUSTER_ENDPOINT_TYPE_HOSTNAME, /* Show hostname */
+ CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT /* Show NULL or empty */
+} cluster_endpoint_type;
+
+/* RDB active child save type. */
+#define RDB_CHILD_TYPE_NONE 0
+#define RDB_CHILD_TYPE_DISK 1 /* RDB is written to disk. */
+#define RDB_CHILD_TYPE_SOCKET 2 /* RDB is written to slave socket. */
+
+/* Keyspace changes notification classes. Every class is associated with a
+ * character for configuration purposes. */
+#define NOTIFY_KEYSPACE (1<<0) /* K */
+#define NOTIFY_KEYEVENT (1<<1) /* E */
+#define NOTIFY_GENERIC (1<<2) /* g */
+#define NOTIFY_STRING (1<<3) /* $ */
+#define NOTIFY_LIST (1<<4) /* l */
+#define NOTIFY_SET (1<<5) /* s */
+#define NOTIFY_HASH (1<<6) /* h */
+#define NOTIFY_ZSET (1<<7) /* z */
+#define NOTIFY_EXPIRED (1<<8) /* x */
+#define NOTIFY_EVICTED (1<<9) /* e */
+#define NOTIFY_STREAM (1<<10) /* t */
+#define NOTIFY_KEY_MISS (1<<11) /* m (Note: This one is excluded from NOTIFY_ALL on purpose) */
+#define NOTIFY_LOADED (1<<12) /* module only key space notification, indicate a key loaded from rdb */
+#define NOTIFY_MODULE (1<<13) /* d, module key space notification */
+#define NOTIFY_NEW (1<<14) /* n, new key notification */
+#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_MODULE) /* A flag */
+
+/* Using the following macro you can run code inside serverCron() with the
+ * specified period, specified in milliseconds.
+ * The actual resolution depends on server.hz. */
+#define run_with_period(_ms_) if (((_ms_) <= 1000/server.hz) || !(server.cronloops%((_ms_)/(1000/server.hz))))
+
+/* We can print the stacktrace, so our assert is defined this way: */
+#define serverAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_serverAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),redis_unreachable()))
+#define serverAssert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),redis_unreachable()))
+#define serverPanic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),redis_unreachable()
+
+/* latency histogram per command init settings */
+#define LATENCY_HISTOGRAM_MIN_VALUE 1L /* >= 1 nanosec */
+#define LATENCY_HISTOGRAM_MAX_VALUE 1000000000L /* <= 1 secs */
+#define LATENCY_HISTOGRAM_PRECISION 2 /* Maintain a value precision of 2 significant digits across LATENCY_HISTOGRAM_MIN_VALUE and LATENCY_HISTOGRAM_MAX_VALUE range.
+ * Value quantization within the range will thus be no larger than 1/100th (or 1%) of any value.
+ * The total size per histogram should sit around 40 KiB Bytes. */
+
+/* Busy module flags, see busy_module_yield_flags */
+#define BUSY_MODULE_YIELD_NONE (0)
+#define BUSY_MODULE_YIELD_EVENTS (1<<0)
+#define BUSY_MODULE_YIELD_CLIENTS (1<<1)
+
+/*-----------------------------------------------------------------------------
+ * Data types
+ *----------------------------------------------------------------------------*/
+
+/* A redis object, that is a type able to hold a string / list / set */
+
+/* The actual Redis Object */
+#define OBJ_STRING 0 /* String object. */
+#define OBJ_LIST 1 /* List object. */
+#define OBJ_SET 2 /* Set object. */
+#define OBJ_ZSET 3 /* Sorted set object. */
+#define OBJ_HASH 4 /* Hash object. */
+
+/* The "module" object type is a special one that signals that the object
+ * is one directly managed by a Redis module. In this case the value points
+ * to a moduleValue struct, which contains the object value (which is only
+ * handled by the module itself) and the RedisModuleType struct which lists
+ * function pointers in order to serialize, deserialize, AOF-rewrite and
+ * free the object.
+ *
+ * Inside the RDB file, module types are encoded as OBJ_MODULE followed
+ * by a 64 bit module type ID, which has a 54 bits module-specific signature
+ * in order to dispatch the loading to the right module, plus a 10 bits
+ * encoding version. */
+#define OBJ_MODULE 5 /* Module object. */
+#define OBJ_STREAM 6 /* Stream object. */
+#define OBJ_TYPE_MAX 7 /* Maximum number of object types */
+
+/* Extract encver / signature from a module type ID. */
+#define REDISMODULE_TYPE_ENCVER_BITS 10
+#define REDISMODULE_TYPE_ENCVER_MASK ((1<<REDISMODULE_TYPE_ENCVER_BITS)-1)
+#define REDISMODULE_TYPE_ENCVER(id) ((id) & REDISMODULE_TYPE_ENCVER_MASK)
+#define REDISMODULE_TYPE_SIGN(id) (((id) & ~((uint64_t)REDISMODULE_TYPE_ENCVER_MASK)) >>REDISMODULE_TYPE_ENCVER_BITS)
+
+/* Bit flags for moduleTypeAuxSaveFunc */
+#define REDISMODULE_AUX_BEFORE_RDB (1<<0)
+#define REDISMODULE_AUX_AFTER_RDB (1<<1)
+
+struct RedisModule;
+struct RedisModuleIO;
+struct RedisModuleDigest;
+struct RedisModuleCtx;
+struct moduleLoadQueueEntry;
+struct RedisModuleKeyOptCtx;
+struct RedisModuleCommand;
+
+/* Each module type implementation should export a set of methods in order
+ * to serialize and deserialize the value in the RDB file, rewrite the AOF
+ * log, create the digest for "DEBUG DIGEST", and free the value when a key
+ * is deleted. */
+typedef void *(*moduleTypeLoadFunc)(struct RedisModuleIO *io, int encver);
+typedef void (*moduleTypeSaveFunc)(struct RedisModuleIO *io, void *value);
+typedef int (*moduleTypeAuxLoadFunc)(struct RedisModuleIO *rdb, int encver, int when);
+typedef void (*moduleTypeAuxSaveFunc)(struct RedisModuleIO *rdb, int when);
+typedef void (*moduleTypeRewriteFunc)(struct RedisModuleIO *io, struct redisObject *key, void *value);
+typedef void (*moduleTypeDigestFunc)(struct RedisModuleDigest *digest, void *value);
+typedef size_t (*moduleTypeMemUsageFunc)(const void *value);
+typedef void (*moduleTypeFreeFunc)(void *value);
+typedef size_t (*moduleTypeFreeEffortFunc)(struct redisObject *key, const void *value);
+typedef void (*moduleTypeUnlinkFunc)(struct redisObject *key, void *value);
+typedef void *(*moduleTypeCopyFunc)(struct redisObject *fromkey, struct redisObject *tokey, const void *value);
+typedef int (*moduleTypeDefragFunc)(struct RedisModuleDefragCtx *ctx, struct redisObject *key, void **value);
+typedef size_t (*moduleTypeMemUsageFunc2)(struct RedisModuleKeyOptCtx *ctx, const void *value, size_t sample_size);
+typedef void (*moduleTypeFreeFunc2)(struct RedisModuleKeyOptCtx *ctx, void *value);
+typedef size_t (*moduleTypeFreeEffortFunc2)(struct RedisModuleKeyOptCtx *ctx, const void *value);
+typedef void (*moduleTypeUnlinkFunc2)(struct RedisModuleKeyOptCtx *ctx, void *value);
+typedef void *(*moduleTypeCopyFunc2)(struct RedisModuleKeyOptCtx *ctx, const void *value);
+typedef int (*moduleTypeAuthCallback)(struct RedisModuleCtx *ctx, void *username, void *password, const char **err);
+
+
+/* The module type, which is referenced in each value of a given type, defines
+ * the methods and links to the module exporting the type. */
+typedef struct RedisModuleType {
+ uint64_t id; /* Higher 54 bits of type ID + 10 lower bits of encoding ver. */
+ struct RedisModule *module;
+ moduleTypeLoadFunc rdb_load;
+ moduleTypeSaveFunc rdb_save;
+ moduleTypeRewriteFunc aof_rewrite;
+ moduleTypeMemUsageFunc mem_usage;
+ moduleTypeDigestFunc digest;
+ moduleTypeFreeFunc free;
+ moduleTypeFreeEffortFunc free_effort;
+ moduleTypeUnlinkFunc unlink;
+ moduleTypeCopyFunc copy;
+ moduleTypeDefragFunc defrag;
+ moduleTypeAuxLoadFunc aux_load;
+ moduleTypeAuxSaveFunc aux_save;
+ moduleTypeMemUsageFunc2 mem_usage2;
+ moduleTypeFreeEffortFunc2 free_effort2;
+ moduleTypeUnlinkFunc2 unlink2;
+ moduleTypeCopyFunc2 copy2;
+ moduleTypeAuxSaveFunc aux_save2;
+ int aux_save_triggers;
+ char name[10]; /* 9 bytes name + null term. Charset: A-Z a-z 0-9 _- */
+} moduleType;
+
+/* In Redis objects 'robj' structures of type OBJ_MODULE, the value pointer
+ * is set to the following structure, referencing the moduleType structure
+ * in order to work with the value, and at the same time providing a raw
+ * pointer to the value, as created by the module commands operating with
+ * the module type.
+ *
+ * So for example in order to free such a value, it is possible to use
+ * the following code:
+ *
+ * if (robj->type == OBJ_MODULE) {
+ * moduleValue *mt = robj->ptr;
+ * mt->type->free(mt->value);
+ * zfree(mt); // We need to release this in-the-middle struct as well.
+ * }
+ */
+typedef struct moduleValue {
+ moduleType *type;
+ void *value;
+} moduleValue;
+
+/* This structure represents a module inside the system. */
+struct RedisModule {
+ void *handle; /* Module dlopen() handle. */
+ char *name; /* Module name. */
+ int ver; /* Module version. We use just progressive integers. */
+ int apiver; /* Module API version as requested during initialization.*/
+ list *types; /* Module data types. */
+ list *usedby; /* List of modules using APIs from this one. */
+ list *using; /* List of modules we use some APIs of. */
+ list *filters; /* List of filters the module has registered. */
+ list *module_configs; /* List of configurations the module has registered */
+ int configs_initialized; /* Have the module configurations been initialized? */
+ int in_call; /* RM_Call() nesting level */
+ int in_hook; /* Hooks callback nesting level for this module (0 or 1). */
+ int options; /* Module options and capabilities. */
+ int blocked_clients; /* Count of RedisModuleBlockedClient in this module. */
+ RedisModuleInfoFunc info_cb; /* Callback for module to add INFO fields. */
+ RedisModuleDefragFunc defrag_cb; /* Callback for global data defrag. */
+ struct moduleLoadQueueEntry *loadmod; /* Module load arguments for config rewrite. */
+ int num_commands_with_acl_categories; /* Number of commands in this module included in acl categories */
+ int onload; /* Flag to identify if the call is being made from Onload (0 or 1) */
+};
+typedef struct RedisModule RedisModule;
+
+/* This is a wrapper for the 'rio' streams used inside rdb.c in Redis, so that
+ * the user does not have to take the total count of the written bytes nor
+ * to care about error conditions. */
+struct RedisModuleIO {
+ size_t bytes; /* Bytes read / written so far. */
+ rio *rio; /* Rio stream. */
+ moduleType *type; /* Module type doing the operation. */
+ int error; /* True if error condition happened. */
+ struct RedisModuleCtx *ctx; /* Optional context, see RM_GetContextFromIO()*/
+ struct redisObject *key; /* Optional name of key processed */
+ int dbid; /* The dbid of the key being processed, -1 when unknown. */
+ sds pre_flush_buffer; /* A buffer that should be flushed before next write operation
+ * See rdbSaveSingleModuleAux for more details */
+};
+
+/* Macro to initialize an IO context. Note that the 'ver' field is populated
+ * inside rdb.c according to the version of the value to load. */
+#define moduleInitIOContext(iovar,mtype,rioptr,keyptr,db) do { \
+ iovar.rio = rioptr; \
+ iovar.type = mtype; \
+ iovar.bytes = 0; \
+ iovar.error = 0; \
+ iovar.key = keyptr; \
+ iovar.dbid = db; \
+ iovar.ctx = NULL; \
+ iovar.pre_flush_buffer = NULL; \
+} while(0)
+
+/* This is a structure used to export DEBUG DIGEST capabilities to Redis
+ * modules. We want to capture both the ordered and unordered elements of
+ * a data structure, so that a digest can be created in a way that correctly
+ * reflects the values. See the DEBUG DIGEST command implementation for more
+ * background. */
+struct RedisModuleDigest {
+ unsigned char o[20]; /* Ordered elements. */
+ unsigned char x[20]; /* Xored elements. */
+ struct redisObject *key; /* Optional name of key processed */
+ int dbid; /* The dbid of the key being processed */
+};
+
+/* Just start with a digest composed of all zero bytes. */
+#define moduleInitDigestContext(mdvar) do { \
+ memset(mdvar.o,0,sizeof(mdvar.o)); \
+ memset(mdvar.x,0,sizeof(mdvar.x)); \
+} while(0)
+
+/* Macro to check if the client is in the middle of module based authentication. */
+#define clientHasModuleAuthInProgress(c) ((c)->module_auth_ctx != NULL)
+
+/* Objects encoding. Some kind of objects like Strings and Hashes can be
+ * internally represented in multiple ways. The 'encoding' field of the object
+ * is set to one of this fields for this object. */
+#define OBJ_ENCODING_RAW 0 /* Raw representation */
+#define OBJ_ENCODING_INT 1 /* Encoded as integer */
+#define OBJ_ENCODING_HT 2 /* Encoded as hash table */
+#define OBJ_ENCODING_ZIPMAP 3 /* No longer used: old hash encoding. */
+#define OBJ_ENCODING_LINKEDLIST 4 /* No longer used: old list encoding. */
+#define OBJ_ENCODING_ZIPLIST 5 /* No longer used: old list/hash/zset encoding. */
+#define OBJ_ENCODING_INTSET 6 /* Encoded as intset */
+#define OBJ_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
+#define OBJ_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
+#define OBJ_ENCODING_QUICKLIST 9 /* Encoded as linked list of listpacks */
+#define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */
+#define OBJ_ENCODING_LISTPACK 11 /* Encoded as a listpack */
+
+#define LRU_BITS 24
+#define LRU_CLOCK_MAX ((1<<LRU_BITS)-1) /* Max value of obj->lru */
+#define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
+
+#define OBJ_SHARED_REFCOUNT INT_MAX /* Global object never destroyed. */
+#define OBJ_STATIC_REFCOUNT (INT_MAX-1) /* Object allocated in the stack. */
+#define OBJ_FIRST_SPECIAL_REFCOUNT OBJ_STATIC_REFCOUNT
+struct redisObject {
+ unsigned type:4;
+ unsigned encoding:4;
+ unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
+ * LFU data (least significant 8 bits frequency
+ * and most significant 16 bits access time). */
+ int refcount;
+ void *ptr;
+};
+
+/* The string name for an object's type as listed above
+ * Native types are checked against the OBJ_STRING, OBJ_LIST, OBJ_* defines,
+ * and Module types have their registered name returned. */
+char *getObjectTypeName(robj*);
+
+/* Macro used to initialize a Redis object allocated on the stack.
+ * Note that this macro is taken near the structure definition to make sure
+ * we'll update it when the structure is changed, to avoid bugs like
+ * bug #85 introduced exactly in this way. */
+#define initStaticStringObject(_var,_ptr) do { \
+ _var.refcount = OBJ_STATIC_REFCOUNT; \
+ _var.type = OBJ_STRING; \
+ _var.encoding = OBJ_ENCODING_RAW; \
+ _var.ptr = _ptr; \
+} while(0)
+
+struct evictionPoolEntry; /* Defined in evict.c */
+
+/* This structure is used in order to represent the output buffer of a client,
+ * which is actually a linked list of blocks like that, that is: client->reply. */
+typedef struct clientReplyBlock {
+ size_t size, used;
+ char buf[];
+} clientReplyBlock;
+
+/* Replication buffer blocks is the list of replBufBlock.
+ *
+ * +--------------+ +--------------+ +--------------+
+ * | refcount = 1 | ... | refcount = 0 | ... | refcount = 2 |
+ * +--------------+ +--------------+ +--------------+
+ * | / \
+ * | / \
+ * | / \
+ * Repl Backlog Replica_A Replica_B
+ *
+ * Each replica or replication backlog increments only the refcount of the
+ * 'ref_repl_buf_node' which it points to. So when replica walks to the next
+ * node, it should first increase the next node's refcount, and when we trim
+ * the replication buffer nodes, we remove node always from the head node which
+ * refcount is 0. If the refcount of the head node is not 0, we must stop
+ * trimming and never iterate the next node. */
+
+/* Similar with 'clientReplyBlock', it is used for shared buffers between
+ * all replica clients and replication backlog. */
+typedef struct replBufBlock {
+ int refcount; /* Number of replicas or repl backlog using. */
+ long long id; /* The unique incremental number. */
+ long long repl_offset; /* Start replication offset of the block. */
+ size_t size, used;
+ char buf[];
+} replBufBlock;
+
+/* Opaque type for the Slot to Key API. */
+typedef struct clusterSlotToKeyMapping clusterSlotToKeyMapping;
+
+/* Redis database representation. There are multiple databases identified
+ * by integers from 0 (the default database) up to the max configured
+ * database. The database number is the 'id' field in the structure. */
+typedef struct redisDb {
+ dict *dict; /* The keyspace for this DB */
+ dict *expires; /* Timeout of keys with a timeout set */
+ dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/
+ dict *blocking_keys_unblock_on_nokey; /* Keys with clients waiting for
+ * data, and should be unblocked if key is deleted (XREADEDGROUP).
+ * This is a subset of blocking_keys*/
+ dict *ready_keys; /* Blocked keys that received a PUSH */
+ dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
+ int id; /* Database ID */
+ long long avg_ttl; /* Average TTL, just for stats */
+ unsigned long expires_cursor; /* Cursor of the active expire cycle. */
+ list *defrag_later; /* List of key names to attempt to defrag one by one, gradually. */
+ clusterSlotToKeyMapping *slots_to_keys; /* Array of slots to keys. Only used in cluster mode (db 0). */
+} redisDb;
+
+/* forward declaration for functions ctx */
+typedef struct functionsLibCtx functionsLibCtx;
+
+/* Holding object that need to be populated during
+ * rdb loading. On loading end it is possible to decide
+ * whether not to set those objects on their rightful place.
+ * For example: dbarray need to be set as main database on
+ * successful loading and dropped on failure. */
+typedef struct rdbLoadingCtx {
+ redisDb* dbarray;
+ functionsLibCtx* functions_lib_ctx;
+}rdbLoadingCtx;
+
+/* Client MULTI/EXEC state */
+typedef struct multiCmd {
+ robj **argv;
+ int argv_len;
+ int argc;
+ struct redisCommand *cmd;
+} multiCmd;
+
+typedef struct multiState {
+ multiCmd *commands; /* Array of MULTI commands */
+ int count; /* Total number of MULTI commands */
+ int cmd_flags; /* The accumulated command flags OR-ed together.
+ So if at least a command has a given flag, it
+ will be set in this field. */
+ int cmd_inv_flags; /* Same as cmd_flags, OR-ing the ~flags. so that it
+ is possible to know if all the commands have a
+ certain flag. */
+ size_t argv_len_sums; /* mem used by all commands arguments */
+ int alloc_count; /* total number of multiCmd struct memory reserved. */
+} multiState;
+
+/* This structure holds the blocking operation state for a client.
+ * The fields used depend on client->btype. */
+typedef struct blockingState {
+ /* Generic fields. */
+ blocking_type btype; /* Type of blocking op if CLIENT_BLOCKED. */
+ mstime_t timeout; /* Blocking operation timeout. If UNIX current time
+ * is > timeout then the operation timed out. */
+ int unblock_on_nokey; /* Whether to unblock the client when at least one of the keys
+ is deleted or does not exist anymore */
+ /* BLOCKED_LIST, BLOCKED_ZSET and BLOCKED_STREAM or any other Keys related blocking */
+ dict *keys; /* The keys we are blocked on */
+
+ /* BLOCKED_WAIT and BLOCKED_WAITAOF */
+ int numreplicas; /* Number of replicas we are waiting for ACK. */
+ int numlocal; /* Indication if WAITAOF is waiting for local fsync. */
+ long long reploffset; /* Replication offset to reach. */
+
+ /* BLOCKED_MODULE */
+ void *module_blocked_handle; /* RedisModuleBlockedClient structure.
+ which is opaque for the Redis core, only
+ handled in module.c. */
+
+ void *async_rm_call_handle; /* RedisModuleAsyncRMCallPromise structure.
+ which is opaque for the Redis core, only
+ handled in module.c. */
+} blockingState;
+
+/* The following structure represents a node in the server.ready_keys list,
+ * where we accumulate all the keys that had clients blocked with a blocking
+ * operation such as B[LR]POP, but received new data in the context of the
+ * last executed command.
+ *
+ * After the execution of every command or script, we iterate over this list to check
+ * if as a result we should serve data to clients blocked, unblocking them.
+ * Note that server.ready_keys will not have duplicates as there dictionary
+ * also called ready_keys in every structure representing a Redis database,
+ * where we make sure to remember if a given key was already added in the
+ * server.ready_keys list. */
+typedef struct readyList {
+ redisDb *db;
+ robj *key;
+} readyList;
+
+/* This structure represents a Redis user. This is useful for ACLs, the
+ * user is associated to the connection after the connection is authenticated.
+ * If there is no associated user, the connection uses the default user. */
+#define USER_COMMAND_BITS_COUNT 1024 /* The total number of command bits
+ in the user structure. The last valid
+ command ID we can set in the user
+ is USER_COMMAND_BITS_COUNT-1. */
+#define USER_FLAG_ENABLED (1<<0) /* The user is active. */
+#define USER_FLAG_DISABLED (1<<1) /* The user is disabled. */
+#define USER_FLAG_NOPASS (1<<2) /* The user requires no password, any
+ provided password will work. For the
+ default user, this also means that
+ no AUTH is needed, and every
+ connection is immediately
+ authenticated. */
+#define USER_FLAG_SANITIZE_PAYLOAD (1<<3) /* The user require a deep RESTORE
+ * payload sanitization. */
+#define USER_FLAG_SANITIZE_PAYLOAD_SKIP (1<<4) /* The user should skip the
+ * deep sanitization of RESTORE
+ * payload. */
+
+#define SELECTOR_FLAG_ROOT (1<<0) /* This is the root user permission
+ * selector. */
+#define SELECTOR_FLAG_ALLKEYS (1<<1) /* The user can mention any key. */
+#define SELECTOR_FLAG_ALLCOMMANDS (1<<2) /* The user can run all commands. */
+#define SELECTOR_FLAG_ALLCHANNELS (1<<3) /* The user can mention any Pub/Sub
+ channel. */
+
+typedef struct {
+ sds name; /* The username as an SDS string. */
+ uint32_t flags; /* See USER_FLAG_* */
+ list *passwords; /* A list of SDS valid passwords for this user. */
+ list *selectors; /* A list of selectors this user validates commands
+ against. This list will always contain at least
+ one selector for backwards compatibility. */
+ robj *acl_string; /* cached string represent of ACLs */
+} user;
+
+/* With multiplexing we need to take per-client state.
+ * Clients are taken in a linked list. */
+
+#define CLIENT_ID_AOF (UINT64_MAX) /* Reserved ID for the AOF client. If you
+ need more reserved IDs use UINT64_MAX-1,
+ -2, ... and so forth. */
+
+/* Replication backlog is not a separate memory, it just is one consumer of
+ * the global replication buffer. This structure records the reference of
+ * replication buffers. Since the replication buffer block list may be very long,
+ * it would cost much time to search replication offset on partial resync, so
+ * we use one rax tree to index some blocks every REPL_BACKLOG_INDEX_PER_BLOCKS
+ * to make searching offset from replication buffer blocks list faster. */
+typedef struct replBacklog {
+ listNode *ref_repl_buf_node; /* Referenced node of replication buffer blocks,
+ * see the definition of replBufBlock. */
+ size_t unindexed_count; /* The count from last creating index block. */
+ rax *blocks_index; /* The index of recorded blocks of replication
+ * buffer for quickly searching replication
+ * offset on partial resynchronization. */
+ long long histlen; /* Backlog actual data length */
+ long long offset; /* Replication "master offset" of first
+ * byte in the replication backlog buffer.*/
+} replBacklog;
+
+typedef struct {
+ list *clients;
+ size_t mem_usage_sum;
+} clientMemUsageBucket;
+
+#ifdef LOG_REQ_RES
+/* Structure used to log client's requests and their
+ * responses (see logreqres.c) */
+typedef struct {
+ /* General */
+ int argv_logged; /* 1 if the command was logged */
+ /* Vars for log buffer */
+ unsigned char *buf; /* Buffer holding the data (request and response) */
+ size_t used;
+ size_t capacity;
+ /* Vars for offsets within the client's reply */
+ struct {
+ /* General */
+ int saved; /* 1 if we already saved the offset (first time we call addReply*) */
+ /* Offset within the static reply buffer */
+ int bufpos;
+ /* Offset within the reply block list */
+ struct {
+ int index;
+ size_t used;
+ } last_node;
+ } offset;
+} clientReqResInfo;
+#endif
+
+typedef struct client {
+ uint64_t id; /* Client incremental unique ID. */
+ uint64_t flags; /* Client flags: CLIENT_* macros. */
+ connection *conn;
+ int resp; /* RESP protocol version. Can be 2 or 3. */
+ redisDb *db; /* Pointer to currently SELECTed DB. */
+ robj *name; /* As set by CLIENT SETNAME. */
+ robj *lib_name; /* The client library name as set by CLIENT SETINFO. */
+ robj *lib_ver; /* The client library version as set by CLIENT SETINFO. */
+ sds querybuf; /* Buffer we use to accumulate client queries. */
+ size_t qb_pos; /* The position we have read in querybuf. */
+ size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */
+ int argc; /* Num of arguments of current command. */
+ robj **argv; /* Arguments of current command. */
+ int argv_len; /* Size of argv array (may be more than argc) */
+ int original_argc; /* Num of arguments of original command if arguments were rewritten. */
+ robj **original_argv; /* Arguments of original command if arguments were rewritten. */
+ size_t argv_len_sum; /* Sum of lengths of objects in argv list. */
+ struct redisCommand *cmd, *lastcmd; /* Last command executed. */
+ struct redisCommand *realcmd; /* The original command that was executed by the client,
+ Used to update error stats in case the c->cmd was modified
+ during the command invocation (like on GEOADD for example). */
+ user *user; /* User associated with this connection. If the
+ user is set to NULL the connection can do
+ anything (admin). */
+ int reqtype; /* Request protocol type: PROTO_REQ_* */
+ int multibulklen; /* Number of multi bulk arguments left to read. */
+ long bulklen; /* Length of bulk argument in multi bulk request. */
+ list *reply; /* List of reply objects to send to the client. */
+ unsigned long long reply_bytes; /* Tot bytes of objects in reply list. */
+ list *deferred_reply_errors; /* Used for module thread safe contexts. */
+ size_t sentlen; /* Amount of bytes already sent in the current
+ buffer or object being sent. */
+ time_t ctime; /* Client creation time. */
+ long duration; /* Current command duration. Used for measuring latency of blocking/non-blocking cmds */
+ int slot; /* The slot the client is executing against. Set to -1 if no slot is being used */
+ dictEntry *cur_script; /* Cached pointer to the dictEntry of the script being executed. */
+ time_t lastinteraction; /* Time of the last interaction, used for timeout */
+ time_t obuf_soft_limit_reached_time;
+ int authenticated; /* Needed when the default user requires auth. */
+ int replstate; /* Replication state if this is a slave. */
+ int repl_start_cmd_stream_on_ack; /* Install slave write handler on first ACK. */
+ int repldbfd; /* Replication DB file descriptor. */
+ off_t repldboff; /* Replication DB file offset. */
+ off_t repldbsize; /* Replication DB file size. */
+ sds replpreamble; /* Replication DB preamble. */
+ long long read_reploff; /* Read replication offset if this is a master. */
+ long long reploff; /* Applied replication offset if this is a master. */
+ long long repl_applied; /* Applied replication data count in querybuf, if this is a replica. */
+ long long repl_ack_off; /* Replication ack offset, if this is a slave. */
+ long long repl_aof_off; /* Replication AOF fsync ack offset, if this is a slave. */
+ long long repl_ack_time;/* Replication ack time, if this is a slave. */
+ long long repl_last_partial_write; /* The last time the server did a partial write from the RDB child pipe to this replica */
+ long long psync_initial_offset; /* FULLRESYNC reply offset other slaves
+ copying this slave output buffer
+ should use. */
+ char replid[CONFIG_RUN_ID_SIZE+1]; /* Master replication ID (if master). */
+ int slave_listening_port; /* As configured with: REPLCONF listening-port */
+ char *slave_addr; /* Optionally given by REPLCONF ip-address */
+ int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */
+ int slave_req; /* Slave requirements: SLAVE_REQ_* */
+ multiState mstate; /* MULTI/EXEC state */
+ blockingState bstate; /* blocking state */
+ long long woff; /* Last write global replication offset. */
+ list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
+ dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
+ dict *pubsub_patterns; /* patterns a client is interested in (PSUBSCRIBE) */
+ dict *pubsubshard_channels; /* shard level channels a client is interested in (SSUBSCRIBE) */
+ sds peerid; /* Cached peer ID. */
+ sds sockname; /* Cached connection target address. */
+ listNode *client_list_node; /* list node in client list */
+ listNode *postponed_list_node; /* list node within the postponed list */
+ listNode *pending_read_list_node; /* list node in clients pending read list */
+ void *module_blocked_client; /* Pointer to the RedisModuleBlockedClient associated with this
+ * client. This is set in case of module authentication before the
+ * unblocked client is reprocessed to handle reply callbacks. */
+ void *module_auth_ctx; /* Ongoing / attempted module based auth callback's ctx.
+ * This is only tracked within the context of the command attempting
+ * authentication. If not NULL, it means module auth is in progress. */
+ RedisModuleUserChangedFunc auth_callback; /* Module callback to execute
+ * when the authenticated user
+ * changes. */
+ void *auth_callback_privdata; /* Private data that is passed when the auth
+ * changed callback is executed. Opaque for
+ * Redis Core. */
+ void *auth_module; /* The module that owns the callback, which is used
+ * to disconnect the client if the module is
+ * unloaded for cleanup. Opaque for Redis Core.*/
+
+ /* If this client is in tracking mode and this field is non zero,
+ * invalidation messages for keys fetched by this client will be sent to
+ * the specified client ID. */
+ uint64_t client_tracking_redirection;
+ rax *client_tracking_prefixes; /* A dictionary of prefixes we are already
+ subscribed to in BCAST mode, in the
+ context of client side caching. */
+ /* In updateClientMemoryUsage() we track the memory usage of
+ * each client and add it to the sum of all the clients of a given type,
+ * however we need to remember what was the old contribution of each
+ * client, and in which category the client was, in order to remove it
+ * before adding it the new value. */
+ size_t last_memory_usage;
+ int last_memory_type;
+
+ listNode *mem_usage_bucket_node;
+ clientMemUsageBucket *mem_usage_bucket;
+
+ listNode *ref_repl_buf_node; /* Referenced node of replication buffer blocks,
+ * see the definition of replBufBlock. */
+ size_t ref_block_pos; /* Access position of referenced buffer block,
+ * i.e. the next offset to send. */
+
+ /* list node in clients_pending_write list */
+ listNode clients_pending_write_node;
+ /* Response buffer */
+ size_t buf_peak; /* Peak used size of buffer in last 5 sec interval. */
+ mstime_t buf_peak_last_reset_time; /* keeps the last time the buffer peak value was reset */
+ int bufpos;
+ size_t buf_usable_size; /* Usable size of buffer. */
+ char *buf;
+#ifdef LOG_REQ_RES
+ clientReqResInfo reqres;
+#endif
+} client;
+
+/* ACL information */
+typedef struct aclInfo {
+ long long user_auth_failures; /* Auth failure counts on user level */
+ long long invalid_cmd_accesses; /* Invalid command accesses that user doesn't have permission to */
+ long long invalid_key_accesses; /* Invalid key accesses that user doesn't have permission to */
+ long long invalid_channel_accesses; /* Invalid channel accesses that user doesn't have permission to */
+} aclInfo;
+
+struct saveparam {
+ time_t seconds;
+ int changes;
+};
+
+struct moduleLoadQueueEntry {
+ sds path;
+ int argc;
+ robj **argv;
+};
+
+struct sentinelLoadQueueEntry {
+ int argc;
+ sds *argv;
+ int linenum;
+ sds line;
+};
+
+struct sentinelConfig {
+ list *pre_monitor_cfg;
+ list *monitor_cfg;
+ list *post_monitor_cfg;
+};
+
+struct sharedObjectsStruct {
+ robj *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
+ *queued, *null[4], *nullarray[4], *emptymap[4], *emptyset[4],
+ *emptyarray, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
+ *outofrangeerr, *noscripterr, *loadingerr,
+ *slowevalerr, *slowscripterr, *slowmoduleerr, *bgsaveerr,
+ *masterdownerr, *roslaveerr, *execaborterr, *noautherr, *noreplicaserr,
+ *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
+ *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink,
+ *rpop, *lpop, *lpush, *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax,
+ *emptyscan, *multi, *exec, *left, *right, *hset, *srem, *xgroup, *xclaim,
+ *script, *replconf, *eval, *persist, *set, *pexpireat, *pexpire,
+ *time, *pxat, *absttl, *retrycount, *force, *justid, *entriesread,
+ *lastid, *ping, *setid, *keepttl, *load, *createconsumer,
+ *getack, *special_asterick, *special_equals, *default_username, *redacted,
+ *ssubscribebulk,*sunsubscribebulk, *smessagebulk,
+ *select[PROTO_SHARED_SELECT_CMDS],
+ *integers[OBJ_SHARED_INTEGERS],
+ *mbulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
+ *bulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "$<value>\r\n" */
+ *maphdr[OBJ_SHARED_BULKHDR_LEN], /* "%<value>\r\n" */
+ *sethdr[OBJ_SHARED_BULKHDR_LEN]; /* "~<value>\r\n" */
+ sds minstring, maxstring;
+};
+
+/* ZSETs use a specialized version of Skiplists */
+typedef struct zskiplistNode {
+ sds ele;
+ double score;
+ struct zskiplistNode *backward;
+ struct zskiplistLevel {
+ struct zskiplistNode *forward;
+ unsigned long span;
+ } level[];
+} zskiplistNode;
+
+typedef struct zskiplist {
+ struct zskiplistNode *header, *tail;
+ unsigned long length;
+ int level;
+} zskiplist;
+
+typedef struct zset {
+ dict *dict;
+ zskiplist *zsl;
+} zset;
+
+typedef struct clientBufferLimitsConfig {
+ unsigned long long hard_limit_bytes;
+ unsigned long long soft_limit_bytes;
+ time_t soft_limit_seconds;
+} clientBufferLimitsConfig;
+
+extern clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT];
+
+/* The redisOp structure defines a Redis Operation, that is an instance of
+ * a command with an argument vector, database ID, propagation target
+ * (PROPAGATE_*), and command pointer.
+ *
+ * Currently only used to additionally propagate more commands to AOF/Replication
+ * after the propagation of the executed command. */
+typedef struct redisOp {
+ robj **argv;
+ int argc, dbid, target;
+} redisOp;
+
+/* Defines an array of Redis operations. There is an API to add to this
+ * structure in an easy way.
+ *
+ * int redisOpArrayAppend(redisOpArray *oa, int dbid, robj **argv, int argc, int target);
+ * void redisOpArrayFree(redisOpArray *oa);
+ */
+typedef struct redisOpArray {
+ redisOp *ops;
+ int numops;
+ int capacity;
+} redisOpArray;
+
+/* This structure is returned by the getMemoryOverheadData() function in
+ * order to return memory overhead information. */
+struct redisMemOverhead {
+ size_t peak_allocated;
+ size_t total_allocated;
+ size_t startup_allocated;
+ size_t repl_backlog;
+ size_t clients_slaves;
+ size_t clients_normal;
+ size_t cluster_links;
+ size_t aof_buffer;
+ size_t lua_caches;
+ size_t functions_caches;
+ size_t overhead_total;
+ size_t dataset;
+ size_t total_keys;
+ size_t bytes_per_key;
+ float dataset_perc;
+ float peak_perc;
+ float total_frag;
+ ssize_t total_frag_bytes;
+ float allocator_frag;
+ ssize_t allocator_frag_bytes;
+ float allocator_rss;
+ ssize_t allocator_rss_bytes;
+ float rss_extra;
+ size_t rss_extra_bytes;
+ size_t num_dbs;
+ struct {
+ size_t dbid;
+ size_t overhead_ht_main;
+ size_t overhead_ht_expires;
+ size_t overhead_ht_slot_to_keys;
+ } *db;
+};
+
+/* Replication error behavior determines the replica behavior
+ * when it receives an error over the replication stream. In
+ * either case the error is logged. */
+typedef enum {
+ PROPAGATION_ERR_BEHAVIOR_IGNORE = 0,
+ PROPAGATION_ERR_BEHAVIOR_PANIC,
+ PROPAGATION_ERR_BEHAVIOR_PANIC_ON_REPLICAS
+} replicationErrorBehavior;
+
+/* This structure can be optionally passed to RDB save/load functions in
+ * order to implement additional functionalities, by storing and loading
+ * metadata to the RDB file.
+ *
+ * For example, to use select a DB at load time, useful in
+ * replication in order to make sure that chained slaves (slaves of slaves)
+ * select the correct DB and are able to accept the stream coming from the
+ * top-level master. */
+typedef struct rdbSaveInfo {
+ /* Used saving and loading. */
+ int repl_stream_db; /* DB to select in server.master client. */
+
+ /* Used only loading. */
+ int repl_id_is_set; /* True if repl_id field is set. */
+ char repl_id[CONFIG_RUN_ID_SIZE+1]; /* Replication ID. */
+ long long repl_offset; /* Replication offset. */
+} rdbSaveInfo;
+
+#define RDB_SAVE_INFO_INIT {-1,0,"0000000000000000000000000000000000000000",-1}
+
+struct malloc_stats {
+ size_t zmalloc_used;
+ size_t process_rss;
+ size_t allocator_allocated;
+ size_t allocator_active;
+ size_t allocator_resident;
+};
+
+/*-----------------------------------------------------------------------------
+ * TLS Context Configuration
+ *----------------------------------------------------------------------------*/
+
+typedef struct redisTLSContextConfig {
+ char *cert_file; /* Server side and optionally client side cert file name */
+ char *key_file; /* Private key filename for cert_file */
+ char *key_file_pass; /* Optional password for key_file */
+ char *client_cert_file; /* Certificate to use as a client; if none, use cert_file */
+ char *client_key_file; /* Private key filename for client_cert_file */
+ char *client_key_file_pass; /* Optional password for client_key_file */
+ char *dh_params_file;
+ char *ca_cert_file;
+ char *ca_cert_dir;
+ char *protocols;
+ char *ciphers;
+ char *ciphersuites;
+ int prefer_server_ciphers;
+ int session_caching;
+ int session_cache_size;
+ int session_cache_timeout;
+} redisTLSContextConfig;
+
+/*-----------------------------------------------------------------------------
+ * AOF manifest definition
+ *----------------------------------------------------------------------------*/
+typedef enum {
+ AOF_FILE_TYPE_BASE = 'b', /* BASE file */
+ AOF_FILE_TYPE_HIST = 'h', /* HISTORY file */
+ AOF_FILE_TYPE_INCR = 'i', /* INCR file */
+} aof_file_type;
+
+typedef struct {
+ sds file_name; /* file name */
+ long long file_seq; /* file sequence */
+ aof_file_type file_type; /* file type */
+} aofInfo;
+
+typedef struct {
+ aofInfo *base_aof_info; /* BASE file information. NULL if there is no BASE file. */
+ list *incr_aof_list; /* INCR AOFs list. We may have multiple INCR AOF when rewrite fails. */
+ list *history_aof_list; /* HISTORY AOF list. When the AOFRW success, The aofInfo contained in
+ `base_aof_info` and `incr_aof_list` will be moved to this list. We
+ will delete these AOF files when AOFRW finish. */
+ long long curr_base_file_seq; /* The sequence number used by the current BASE file. */
+ long long curr_incr_file_seq; /* The sequence number used by the current INCR file. */
+ int dirty; /* 1 Indicates that the aofManifest in the memory is inconsistent with
+ disk, we need to persist it immediately. */
+} aofManifest;
+
+/*-----------------------------------------------------------------------------
+ * Global server state
+ *----------------------------------------------------------------------------*/
+
+/* AIX defines hz to __hz, we don't use this define and in order to allow
+ * Redis build on AIX we need to undef it. */
+#ifdef _AIX
+#undef hz
+#endif
+
+#define CHILD_TYPE_NONE 0
+#define CHILD_TYPE_RDB 1
+#define CHILD_TYPE_AOF 2
+#define CHILD_TYPE_LDB 3
+#define CHILD_TYPE_MODULE 4
+
+typedef enum childInfoType {
+ CHILD_INFO_TYPE_CURRENT_INFO,
+ CHILD_INFO_TYPE_AOF_COW_SIZE,
+ CHILD_INFO_TYPE_RDB_COW_SIZE,
+ CHILD_INFO_TYPE_MODULE_COW_SIZE
+} childInfoType;
+
+struct redisServer {
+ /* General */
+ pid_t pid; /* Main process pid. */
+ pthread_t main_thread_id; /* Main thread id */
+ char *configfile; /* Absolute config file path, or NULL */
+ char *executable; /* Absolute executable file path. */
+ char **exec_argv; /* Executable argv vector (copy). */
+ int dynamic_hz; /* Change hz value depending on # of clients. */
+ int config_hz; /* Configured HZ value. May be different than
+ the actual 'hz' field value if dynamic-hz
+ is enabled. */
+ mode_t umask; /* The umask value of the process on startup */
+ int hz; /* serverCron() calls frequency in hertz */
+ int in_fork_child; /* indication that this is a fork child */
+ redisDb *db;
+ dict *commands; /* Command table */
+ dict *orig_commands; /* Command table before command renaming. */
+ aeEventLoop *el;
+ rax *errors; /* Errors table */
+ unsigned int lruclock; /* Clock for LRU eviction */
+ volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */
+ mstime_t shutdown_mstime; /* Timestamp to limit graceful shutdown. */
+ int last_sig_received; /* Indicates the last SIGNAL received, if any (e.g., SIGINT or SIGTERM). */
+ int shutdown_flags; /* Flags passed to prepareForShutdown(). */
+ int activerehashing; /* Incremental rehash in serverCron() */
+ int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */
+ char *pidfile; /* PID file path */
+ int arch_bits; /* 32 or 64 depending on sizeof(long) */
+ int cronloops; /* Number of times the cron function run */
+ char runid[CONFIG_RUN_ID_SIZE+1]; /* ID always different at every exec. */
+ int sentinel_mode; /* True if this instance is a Sentinel. */
+ size_t initial_memory_usage; /* Bytes used after initialization. */
+ int always_show_logo; /* Show logo even for non-stdout logging. */
+ int in_exec; /* Are we inside EXEC? */
+ int busy_module_yield_flags; /* Are we inside a busy module? (triggered by RM_Yield). see BUSY_MODULE_YIELD_ flags. */
+ const char *busy_module_yield_reply; /* When non-null, we are inside RM_Yield. */
+ char *ignore_warnings; /* Config: warnings that should be ignored. */
+ int client_pause_in_transaction; /* Was a client pause executed during this Exec? */
+ int thp_enabled; /* If true, THP is enabled. */
+ size_t page_size; /* The page size of OS. */
+ /* Modules */
+ dict *moduleapi; /* Exported core APIs dictionary for modules. */
+ dict *sharedapi; /* Like moduleapi but containing the APIs that
+ modules share with each other. */
+ dict *module_configs_queue; /* Dict that stores module configurations from .conf file until after modules are loaded during startup or arguments to loadex. */
+ list *loadmodule_queue; /* List of modules to load at startup. */
+ int module_pipe[2]; /* Pipe used to awake the event loop by module threads. */
+ pid_t child_pid; /* PID of current child */
+ int child_type; /* Type of current child */
+ /* Networking */
+ int port; /* TCP listening port */
+ int tls_port; /* TLS listening port */
+ int tcp_backlog; /* TCP listen() backlog */
+ char *bindaddr[CONFIG_BINDADDR_MAX]; /* Addresses we should bind to */
+ int bindaddr_count; /* Number of addresses in server.bindaddr[] */
+ char *bind_source_addr; /* Source address to bind on for outgoing connections */
+ char *unixsocket; /* UNIX socket path */
+ unsigned int unixsocketperm; /* UNIX socket permission (see mode_t) */
+ connListener listeners[CONN_TYPE_MAX]; /* TCP/Unix/TLS even more types */
+ uint32_t socket_mark_id; /* ID for listen socket marking */
+ connListener clistener; /* Cluster bus listener */
+ list *clients; /* List of active clients */
+ list *clients_to_close; /* Clients to close asynchronously */
+ list *clients_pending_write; /* There is to write or install handler. */
+ list *clients_pending_read; /* Client has pending read socket buffers. */
+ list *slaves, *monitors; /* List of slaves and MONITORs */
+ client *current_client; /* The client that triggered the command execution (External or AOF). */
+ client *executing_client; /* The client executing the current command (possibly script or module). */
+
+#ifdef LOG_REQ_RES
+ char *req_res_logfile; /* Path of log file for logging all requests and their replies. If NULL, no logging will be performed */
+ unsigned int client_default_resp;
+#endif
+
+ /* Stuff for client mem eviction */
+ clientMemUsageBucket* client_mem_usage_buckets;
+
+ rax *clients_timeout_table; /* Radix tree for blocked clients timeouts. */
+ int execution_nesting; /* Execution nesting level.
+ * e.g. call(), async module stuff (timers, events, etc.),
+ * cron stuff (active expire, eviction) */
+ rax *clients_index; /* Active clients dictionary by client ID. */
+ uint32_t paused_actions; /* Bitmask of actions that are currently paused */
+ list *postponed_clients; /* List of postponed clients */
+ pause_event client_pause_per_purpose[NUM_PAUSE_PURPOSES];
+ char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
+ dict *migrate_cached_sockets;/* MIGRATE cached sockets */
+ redisAtomic uint64_t next_client_id; /* Next client unique ID. Incremental. */
+ int protected_mode; /* Don't accept external connections. */
+ int io_threads_num; /* Number of IO threads to use. */
+ int io_threads_do_reads; /* Read and parse from IO threads? */
+ int io_threads_active; /* Is IO threads currently active? */
+ long long events_processed_while_blocked; /* processEventsWhileBlocked() */
+ int enable_protected_configs; /* Enable the modification of protected configs, see PROTECTED_ACTION_ALLOWED_* */
+ int enable_debug_cmd; /* Enable DEBUG commands, see PROTECTED_ACTION_ALLOWED_* */
+ int enable_module_cmd; /* Enable MODULE commands, see PROTECTED_ACTION_ALLOWED_* */
+
+ /* RDB / AOF loading information */
+ volatile sig_atomic_t loading; /* We are loading data from disk if true */
+ volatile sig_atomic_t async_loading; /* We are loading data without blocking the db being served */
+ off_t loading_total_bytes;
+ off_t loading_rdb_used_mem;
+ off_t loading_loaded_bytes;
+ time_t loading_start_time;
+ off_t loading_process_events_interval_bytes;
+ /* Fields used only for stats */
+ time_t stat_starttime; /* Server start time */
+ long long stat_numcommands; /* Number of processed commands */
+ long long stat_numconnections; /* Number of connections received */
+ long long stat_expiredkeys; /* Number of expired keys */
+ double stat_expired_stale_perc; /* Percentage of keys probably expired */
+ long long stat_expired_time_cap_reached_count; /* Early expire cycle stops.*/
+ long long stat_expire_cycle_time_used; /* Cumulative microseconds used. */
+ long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
+ long long stat_evictedclients; /* Number of evicted clients */
+ long long stat_total_eviction_exceeded_time; /* Total time over the memory limit, unit us */
+ monotime stat_last_eviction_exceeded_time; /* Timestamp of current eviction start, unit us */
+ long long stat_keyspace_hits; /* Number of successful lookups of keys */
+ long long stat_keyspace_misses; /* Number of failed lookups of keys */
+ long long stat_active_defrag_hits; /* number of allocations moved */
+ long long stat_active_defrag_misses; /* number of allocations scanned but not moved */
+ long long stat_active_defrag_key_hits; /* number of keys with moved allocations */
+ long long stat_active_defrag_key_misses;/* number of keys scanned and not moved */
+ long long stat_active_defrag_scanned; /* number of dictEntries scanned */
+ long long stat_total_active_defrag_time; /* Total time memory fragmentation over the limit, unit us */
+ monotime stat_last_active_defrag_time; /* Timestamp of current active defrag start */
+ size_t stat_peak_memory; /* Max used memory record */
+ long long stat_aof_rewrites; /* number of aof file rewrites performed */
+ long long stat_aofrw_consecutive_failures; /* The number of consecutive failures of aofrw */
+ long long stat_rdb_saves; /* number of rdb saves performed */
+ long long stat_fork_time; /* Time needed to perform latest fork() */
+ double stat_fork_rate; /* Fork rate in GB/sec. */
+ long long stat_total_forks; /* Total count of fork. */
+ long long stat_rejected_conn; /* Clients rejected because of maxclients */
+ long long stat_sync_full; /* Number of full resyncs with slaves. */
+ long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */
+ long long stat_sync_partial_err;/* Number of unaccepted PSYNC requests. */
+ list *slowlog; /* SLOWLOG list of commands */
+ long long slowlog_entry_id; /* SLOWLOG current entry ID */
+ long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
+ unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
+ struct malloc_stats cron_malloc_stats; /* sampled in serverCron(). */
+ redisAtomic long long stat_net_input_bytes; /* Bytes read from network. */
+ redisAtomic long long stat_net_output_bytes; /* Bytes written to network. */
+ redisAtomic long long stat_net_repl_input_bytes; /* Bytes read during replication, added to stat_net_input_bytes in 'info'. */
+ redisAtomic long long stat_net_repl_output_bytes; /* Bytes written during replication, added to stat_net_output_bytes in 'info'. */
+ size_t stat_current_cow_peak; /* Peak size of copy on write bytes. */
+ size_t stat_current_cow_bytes; /* Copy on write bytes while child is active. */
+ monotime stat_current_cow_updated; /* Last update time of stat_current_cow_bytes */
+ size_t stat_current_save_keys_processed; /* Processed keys while child is active. */
+ size_t stat_current_save_keys_total; /* Number of keys when child started. */
+ size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */
+ size_t stat_aof_cow_bytes; /* Copy on write bytes during AOF rewrite. */
+ size_t stat_module_cow_bytes; /* Copy on write bytes during module fork. */
+ double stat_module_progress; /* Module save progress. */
+ size_t stat_clients_type_memory[CLIENT_TYPE_COUNT];/* Mem usage by type */
+ size_t stat_cluster_links_memory; /* Mem usage by cluster links */
+ long long stat_unexpected_error_replies; /* Number of unexpected (aof-loading, replica to master, etc.) error replies */
+ long long stat_total_error_replies; /* Total number of issued error replies ( command + rejected errors ) */
+ long long stat_dump_payload_sanitizations; /* Number deep dump payloads integrity validations. */
+ long long stat_io_reads_processed; /* Number of read events processed by IO / Main threads */
+ long long stat_io_writes_processed; /* Number of write events processed by IO / Main threads */
+ redisAtomic long long stat_total_reads_processed; /* Total number of read events processed */
+ redisAtomic long long stat_total_writes_processed; /* Total number of write events processed */
+ /* The following two are used to track instantaneous metrics, like
+ * number of operations per second, network traffic. */
+ struct {
+ long long last_sample_base; /* The divisor of last sample window */
+ long long last_sample_value; /* The dividend of last sample window */
+ long long samples[STATS_METRIC_SAMPLES];
+ int idx;
+ } inst_metric[STATS_METRIC_COUNT];
+ long long stat_reply_buffer_shrinks; /* Total number of output buffer shrinks */
+ long long stat_reply_buffer_expands; /* Total number of output buffer expands */
+ monotime el_start;
+ /* The following two are used to record the max number of commands executed in one eventloop.
+ * Note that commands in transactions are also counted. */
+ long long el_cmd_cnt_start;
+ long long el_cmd_cnt_max;
+ /* The sum of active-expire, active-defrag and all other tasks done by cron and beforeSleep,
+ but excluding read, write and AOF, which are counted by other sets of metrics. */
+ monotime el_cron_duration;
+ durationStats duration_stats[EL_DURATION_TYPE_NUM];
+
+ /* Configuration */
+ int verbosity; /* Loglevel in redis.conf */
+ int maxidletime; /* Client timeout in seconds */
+ int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */
+ int active_expire_enabled; /* Can be disabled for testing purposes. */
+ int active_expire_effort; /* From 1 (default) to 10, active effort. */
+ int lazy_expire_disabled; /* If > 0, don't trigger lazy expire */
+ int active_defrag_enabled;
+ int sanitize_dump_payload; /* Enables deep sanitization for ziplist and listpack in RDB and RESTORE. */
+ int skip_checksum_validation; /* Disable checksum validation for RDB and RESTORE payload. */
+ int jemalloc_bg_thread; /* Enable jemalloc background thread */
+ size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */
+ int active_defrag_threshold_lower; /* minimum percentage of fragmentation to start active defrag */
+ int active_defrag_threshold_upper; /* maximum percentage of fragmentation at which we use maximum effort */
+ int active_defrag_cycle_min; /* minimal effort for defrag in CPU percentage */
+ int active_defrag_cycle_max; /* maximal effort for defrag in CPU percentage */
+ unsigned long active_defrag_max_scan_fields; /* maximum number of fields of set/hash/zset/list to process from within the main dict scan */
+ size_t client_max_querybuf_len; /* Limit for client query buffer length */
+ int dbnum; /* Total number of configured DBs */
+ int supervised; /* 1 if supervised, 0 otherwise. */
+ int supervised_mode; /* See SUPERVISED_* */
+ int daemonize; /* True if running as a daemon */
+ int set_proc_title; /* True if change proc title */
+ char *proc_title_template; /* Process title template format */
+ clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
+ int pause_cron; /* Don't run cron tasks (debug) */
+ int latency_tracking_enabled; /* 1 if extended latency tracking is enabled, 0 otherwise. */
+ double *latency_tracking_info_percentiles; /* Extended latency tracking info output percentile list configuration. */
+ int latency_tracking_info_percentiles_len;
+ /* AOF persistence */
+ int aof_enabled; /* AOF configuration */
+ int aof_state; /* AOF_(ON|OFF|WAIT_REWRITE) */
+ int aof_fsync; /* Kind of fsync() policy */
+ char *aof_filename; /* Basename of the AOF file and manifest file */
+ char *aof_dirname; /* Name of the AOF directory */
+ int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */
+ int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */
+ off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */
+ off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
+ off_t aof_current_size; /* AOF current size (Including BASE + INCRs). */
+ off_t aof_last_incr_size; /* The size of the latest incr AOF. */
+ off_t aof_last_incr_fsync_offset; /* AOF offset which is already requested to be synced to disk.
+ * Compare with the aof_last_incr_size. */
+ int aof_flush_sleep; /* Micros to sleep before flush. (used by tests) */
+ int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
+ sds aof_buf; /* AOF buffer, written before entering the event loop */
+ int aof_fd; /* File descriptor of currently selected AOF file */
+ int aof_selected_db; /* Currently selected DB in AOF */
+ time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
+ time_t aof_last_fsync; /* UNIX time of last fsync() */
+ time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
+ time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
+ time_t aof_cur_timestamp; /* Current record timestamp in AOF */
+ int aof_timestamp_enabled; /* Enable record timestamp in AOF */
+ int aof_lastbgrewrite_status; /* C_OK or C_ERR */
+ unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
+ int aof_rewrite_incremental_fsync;/* fsync incrementally while aof rewriting? */
+ int rdb_save_incremental_fsync; /* fsync incrementally while rdb saving? */
+ int aof_last_write_status; /* C_OK or C_ERR */
+ int aof_last_write_errno; /* Valid if aof write/fsync status is ERR */
+ int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */
+ int aof_use_rdb_preamble; /* Specify base AOF to use RDB encoding on AOF rewrites. */
+ redisAtomic int aof_bio_fsync_status; /* Status of AOF fsync in bio job. */
+ redisAtomic int aof_bio_fsync_errno; /* Errno of AOF fsync in bio job. */
+ aofManifest *aof_manifest; /* Used to track AOFs. */
+ int aof_disable_auto_gc; /* If disable automatically deleting HISTORY type AOFs?
+ default no. (for testings). */
+
+ /* RDB persistence */
+ long long dirty; /* Changes to DB from the last save */
+ long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
+ long long rdb_last_load_keys_expired; /* number of expired keys when loading RDB */
+ long long rdb_last_load_keys_loaded; /* number of loaded keys when loading RDB */
+ struct saveparam *saveparams; /* Save points array for RDB */
+ int saveparamslen; /* Number of saving points */
+ char *rdb_filename; /* Name of RDB file */
+ int rdb_compression; /* Use compression in RDB? */
+ int rdb_checksum; /* Use RDB checksum? */
+ int rdb_del_sync_files; /* Remove RDB files used only for SYNC if
+ the instance does not use persistence. */
+ time_t lastsave; /* Unix time of last successful save */
+ time_t lastbgsave_try; /* Unix time of last attempted bgsave */
+ time_t rdb_save_time_last; /* Time used by last RDB save run. */
+ time_t rdb_save_time_start; /* Current RDB save start time. */
+ int rdb_bgsave_scheduled; /* BGSAVE when possible if true. */
+ int rdb_child_type; /* Type of save by active child. */
+ int lastbgsave_status; /* C_OK or C_ERR */
+ int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
+ int rdb_pipe_read; /* RDB pipe used to transfer the rdb data */
+ /* to the parent process in diskless repl. */
+ int rdb_child_exit_pipe; /* Used by the diskless parent allow child exit. */
+ connection **rdb_pipe_conns; /* Connections which are currently the */
+ int rdb_pipe_numconns; /* target of diskless rdb fork child. */
+ int rdb_pipe_numconns_writing; /* Number of rdb conns with pending writes. */
+ char *rdb_pipe_buff; /* In diskless replication, this buffer holds data */
+ int rdb_pipe_bufflen; /* that was read from the rdb pipe. */
+ int rdb_key_save_delay; /* Delay in microseconds between keys while
+ * writing aof or rdb. (for testings). negative
+ * value means fractions of microseconds (on average). */
+ int key_load_delay; /* Delay in microseconds between keys while
+ * loading aof or rdb. (for testings). negative
+ * value means fractions of microseconds (on average). */
+ /* Pipe and data structures for child -> parent info sharing. */
+ int child_info_pipe[2]; /* Pipe used to write the child_info_data. */
+ int child_info_nread; /* Num of bytes of the last read from pipe */
+ /* Propagation of commands in AOF / replication */
+ redisOpArray also_propagate; /* Additional command to propagate. */
+ int replication_allowed; /* Are we allowed to replicate? */
+ /* Logging */
+ char *logfile; /* Path of log file */
+ int syslog_enabled; /* Is syslog enabled? */
+ char *syslog_ident; /* Syslog ident */
+ int syslog_facility; /* Syslog facility */
+ int crashlog_enabled; /* Enable signal handler for crashlog.
+ * disable for clean core dumps. */
+ int memcheck_enabled; /* Enable memory check on crash. */
+ int use_exit_on_panic; /* Use exit() on panic and assert rather than
+ * abort(). useful for Valgrind. */
+ /* Shutdown */
+ int shutdown_timeout; /* Graceful shutdown time limit in seconds. */
+ int shutdown_on_sigint; /* Shutdown flags configured for SIGINT. */
+ int shutdown_on_sigterm; /* Shutdown flags configured for SIGTERM. */
+
+ /* Replication (master) */
+ char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */
+ char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/
+ long long master_repl_offset; /* My current replication offset */
+ long long second_replid_offset; /* Accept offsets up to this for replid2. */
+ redisAtomic long long fsynced_reploff_pending;/* Largest replication offset to
+ * potentially have been fsynced, applied to
+ fsynced_reploff only when AOF state is AOF_ON
+ (not during the initial rewrite) */
+ long long fsynced_reploff; /* Largest replication offset that has been confirmed to be fsynced */
+ int slaveseldb; /* Last SELECTed DB in replication output */
+ int repl_ping_slave_period; /* Master pings the slave every N seconds */
+ replBacklog *repl_backlog; /* Replication backlog for partial syncs */
+ long long repl_backlog_size; /* Backlog circular buffer size */
+ time_t repl_backlog_time_limit; /* Time without slaves after the backlog
+ gets released. */
+ time_t repl_no_slaves_since; /* We have no slaves since that time.
+ Only valid if server.slaves len is 0. */
+ int repl_min_slaves_to_write; /* Min number of slaves to write. */
+ int repl_min_slaves_max_lag; /* Max lag of <count> slaves to write. */
+ int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */
+ int repl_diskless_sync; /* Master send RDB to slaves sockets directly. */
+ int repl_diskless_load; /* Slave parse RDB directly from the socket.
+ * see REPL_DISKLESS_LOAD_* enum */
+ int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
+ int repl_diskless_sync_max_replicas;/* Max replicas for diskless repl BGSAVE
+ * delay (start sooner if they all connect). */
+ size_t repl_buffer_mem; /* The memory of replication buffer. */
+ list *repl_buffer_blocks; /* Replication buffers blocks list
+ * (serving replica clients and repl backlog) */
+ /* Replication (slave) */
+ char *masteruser; /* AUTH with this user and masterauth with master */
+ sds masterauth; /* AUTH with this password with master */
+ char *masterhost; /* Hostname of master */
+ int masterport; /* Port of master */
+ int repl_timeout; /* Timeout after N seconds of master idle */
+ client *master; /* Client that is master for this slave */
+ client *cached_master; /* Cached master to be reused for PSYNC. */
+ int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
+ int repl_state; /* Replication status if the instance is a slave */
+ off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
+ off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
+ off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
+ connection *repl_transfer_s; /* Slave -> Master SYNC connection */
+ int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
+ char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
+ time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
+ int repl_serve_stale_data; /* Serve stale data when link is down? */
+ int repl_slave_ro; /* Slave is read only? */
+ int repl_slave_ignore_maxmemory; /* If true slaves do not evict. */
+ time_t repl_down_since; /* Unix time at which link with master went down */
+ int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
+ int slave_priority; /* Reported in INFO and used by Sentinel. */
+ int replica_announced; /* If true, replica is announced by Sentinel */
+ int slave_announce_port; /* Give the master this listening port. */
+ char *slave_announce_ip; /* Give the master this ip address. */
+ int propagation_error_behavior; /* Configures the behavior of the replica
+ * when it receives an error on the replication stream */
+ int repl_ignore_disk_write_error; /* Configures whether replicas panic when unable to
+ * persist writes to AOF. */
+ /* The following two fields is where we store master PSYNC replid/offset
+ * while the PSYNC is in progress. At the end we'll copy the fields into
+ * the server->master client structure. */
+ char master_replid[CONFIG_RUN_ID_SIZE+1]; /* Master PSYNC runid. */
+ long long master_initial_offset; /* Master PSYNC offset. */
+ int repl_slave_lazy_flush; /* Lazy FLUSHALL before loading DB? */
+ /* Synchronous replication. */
+ list *clients_waiting_acks; /* Clients waiting in WAIT or WAITAOF. */
+ int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */
+ /* Limits */
+ unsigned int maxclients; /* Max number of simultaneous clients */
+ unsigned long long maxmemory; /* Max number of memory bytes to use */
+ ssize_t maxmemory_clients; /* Memory limit for total client buffers */
+ int maxmemory_policy; /* Policy for key eviction */
+ int maxmemory_samples; /* Precision of random sampling */
+ int maxmemory_eviction_tenacity;/* Aggressiveness of eviction processing */
+ int lfu_log_factor; /* LFU logarithmic counter factor. */
+ int lfu_decay_time; /* LFU counter decay factor. */
+ long long proto_max_bulk_len; /* Protocol bulk length maximum size. */
+ int oom_score_adj_values[CONFIG_OOM_COUNT]; /* Linux oom_score_adj configuration */
+ int oom_score_adj; /* If true, oom_score_adj is managed */
+ int disable_thp; /* If true, disable THP by syscall */
+ /* Blocked clients */
+ unsigned int blocked_clients; /* # of clients executing a blocking cmd.*/
+ unsigned int blocked_clients_by_type[BLOCKED_NUM];
+ list *unblocked_clients; /* list of clients to unblock before next loop */
+ list *ready_keys; /* List of readyList structures for BLPOP & co */
+ /* Client side caching. */
+ unsigned int tracking_clients; /* # of clients with tracking enabled.*/
+ size_t tracking_table_max_keys; /* Max number of keys in tracking table. */
+ list *tracking_pending_keys; /* tracking invalidation keys pending to flush */
+ list *pending_push_messages; /* pending publish or other push messages to flush */
+ /* Sort parameters - qsort_r() is only available under BSD so we
+ * have to take this state global, in order to pass it to sortCompare() */
+ int sort_desc;
+ int sort_alpha;
+ int sort_bypattern;
+ int sort_store;
+ /* Zip structure config, see redis.conf for more information */
+ size_t hash_max_listpack_entries;
+ size_t hash_max_listpack_value;
+ size_t set_max_intset_entries;
+ size_t set_max_listpack_entries;
+ size_t set_max_listpack_value;
+ size_t zset_max_listpack_entries;
+ size_t zset_max_listpack_value;
+ size_t hll_sparse_max_bytes;
+ size_t stream_node_max_bytes;
+ long long stream_node_max_entries;
+ /* List parameters */
+ int list_max_listpack_size;
+ int list_compress_depth;
+ /* time cache */
+ redisAtomic time_t unixtime; /* Unix time sampled every cron cycle. */
+ time_t timezone; /* Cached timezone. As set by tzset(). */
+ int daylight_active; /* Currently in daylight saving time. */
+ mstime_t mstime; /* 'unixtime' in milliseconds. */
+ ustime_t ustime; /* 'unixtime' in microseconds. */
+ mstime_t cmd_time_snapshot; /* Time snapshot of the root execution nesting. */
+ size_t blocking_op_nesting; /* Nesting level of blocking operation, used to reset blocked_last_cron. */
+ long long blocked_last_cron; /* Indicate the mstime of the last time we did cron jobs from a blocking operation */
+ /* Pubsub */
+ dict *pubsub_channels; /* Map channels to list of subscribed clients */
+ dict *pubsub_patterns; /* A dict of pubsub_patterns */
+ int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an
+ xor of NOTIFY_... flags. */
+ dict *pubsubshard_channels; /* Map shard channels to list of subscribed clients */
+ /* Cluster */
+ int cluster_enabled; /* Is cluster enabled? */
+ int cluster_port; /* Set the cluster port for a node. */
+ mstime_t cluster_node_timeout; /* Cluster node timeout. */
+ mstime_t cluster_ping_interval; /* A debug configuration for setting how often cluster nodes send ping messages. */
+ char *cluster_configfile; /* Cluster auto-generated config file name. */
+ struct clusterState *cluster; /* State of the cluster */
+ int cluster_migration_barrier; /* Cluster replicas migration barrier. */
+ int cluster_allow_replica_migration; /* Automatic replica migrations to orphaned masters and from empty masters */
+ int cluster_slave_validity_factor; /* Slave max data age for failover. */
+ int cluster_require_full_coverage; /* If true, put the cluster down if
+ there is at least an uncovered slot.*/
+ int cluster_slave_no_failover; /* Prevent slave from starting a failover
+ if the master is in failure state. */
+ char *cluster_announce_ip; /* IP address to announce on cluster bus. */
+ char *cluster_announce_hostname; /* hostname to announce on cluster bus. */
+ char *cluster_announce_human_nodename; /* Human readable node name assigned to a node. */
+ int cluster_preferred_endpoint_type; /* Use the announced hostname when available. */
+ int cluster_announce_port; /* base port to announce on cluster bus. */
+ int cluster_announce_tls_port; /* TLS port to announce on cluster bus. */
+ int cluster_announce_bus_port; /* bus port to announce on cluster bus. */
+ int cluster_module_flags; /* Set of flags that Redis modules are able
+ to set in order to suppress certain
+ native Redis Cluster features. Check the
+ REDISMODULE_CLUSTER_FLAG_*. */
+ int cluster_allow_reads_when_down; /* Are reads allowed when the cluster
+ is down? */
+ int cluster_config_file_lock_fd; /* cluster config fd, will be flocked. */
+ unsigned long long cluster_link_msg_queue_limit_bytes; /* Memory usage limit on individual link msg queue */
+ int cluster_drop_packet_filter; /* Debug config that allows tactically
+ * dropping packets of a specific type */
+ /* Scripting */
+ mstime_t busy_reply_threshold; /* Script / module timeout in milliseconds */
+ int pre_command_oom_state; /* OOM before command (script?) was started */
+ int script_disable_deny_script; /* Allow running commands marked "no-script" inside a script. */
+ /* Lazy free */
+ int lazyfree_lazy_eviction;
+ int lazyfree_lazy_expire;
+ int lazyfree_lazy_server_del;
+ int lazyfree_lazy_user_del;
+ int lazyfree_lazy_user_flush;
+ /* Latency monitor */
+ long long latency_monitor_threshold;
+ dict *latency_events;
+ /* ACLs */
+ char *acl_filename; /* ACL Users file. NULL if not configured. */
+ unsigned long acllog_max_len; /* Maximum length of the ACL LOG list. */
+ sds requirepass; /* Remember the cleartext password set with
+ the old "requirepass" directive for
+ backward compatibility with Redis <= 5. */
+ int acl_pubsub_default; /* Default ACL pub/sub channels flag */
+ aclInfo acl_info; /* ACL info */
+ /* Assert & bug reporting */
+ int watchdog_period; /* Software watchdog period in ms. 0 = off */
+ /* System hardware info */
+ size_t system_memory_size; /* Total memory in system as reported by OS */
+ /* TLS Configuration */
+ int tls_cluster;
+ int tls_replication;
+ int tls_auth_clients;
+ redisTLSContextConfig tls_ctx_config;
+ /* cpu affinity */
+ char *server_cpulist; /* cpu affinity list of redis server main/io thread. */
+ char *bio_cpulist; /* cpu affinity list of bio thread. */
+ char *aof_rewrite_cpulist; /* cpu affinity list of aof rewrite process. */
+ char *bgsave_cpulist; /* cpu affinity list of bgsave process. */
+ /* Sentinel config */
+ struct sentinelConfig *sentinel_config; /* sentinel config to load at startup time. */
+ /* Coordinate failover info */
+ mstime_t failover_end_time; /* Deadline for failover command. */
+ int force_failover; /* If true then failover will be forced at the
+ * deadline, otherwise failover is aborted. */
+ char *target_replica_host; /* Failover target host. If null during a
+ * failover then any replica can be used. */
+ int target_replica_port; /* Failover target port */
+ int failover_state; /* Failover state */
+ int cluster_allow_pubsubshard_when_down; /* Is pubsubshard allowed when the cluster
+ is down, doesn't affect pubsub global. */
+ long reply_buffer_peak_reset_time; /* The amount of time (in milliseconds) to wait between reply buffer peak resets */
+ int reply_buffer_resizing_enabled; /* Is reply buffer resizing enabled (1 by default) */
+ /* Local environment */
+ char *locale_collate;
+};
+
+#define MAX_KEYS_BUFFER 256
+
+typedef struct {
+ int pos; /* The position of the key within the client array */
+ int flags; /* The flags associated with the key access, see
+ CMD_KEY_* for more information */
+} keyReference;
+
+/* A result structure for the various getkeys function calls. It lists the
+ * keys as indices to the provided argv. This functionality is also re-used
+ * for returning channel information.
+ */
+typedef struct {
+ keyReference keysbuf[MAX_KEYS_BUFFER]; /* Pre-allocated buffer, to save heap allocations */
+ keyReference *keys; /* Key indices array, points to keysbuf or heap */
+ int numkeys; /* Number of key indices return */
+ int size; /* Available array size */
+} getKeysResult;
+#define GETKEYS_RESULT_INIT { {{0}}, NULL, 0, MAX_KEYS_BUFFER }
+
+/* Key specs definitions.
+ *
+ * Brief: This is a scheme that tries to describe the location
+ * of key arguments better than the old [first,last,step] scheme
+ * which is limited and doesn't fit many commands.
+ *
+ * There are two steps:
+ * 1. begin_search (BS): in which index should we start searching for keys?
+ * 2. find_keys (FK): relative to the output of BS, how can we will which args are keys?
+ *
+ * There are two types of BS:
+ * 1. index: key args start at a constant index
+ * 2. keyword: key args start just after a specific keyword
+ *
+ * There are two kinds of FK:
+ * 1. range: keys end at a specific index (or relative to the last argument)
+ * 2. keynum: there's an arg that contains the number of key args somewhere before the keys themselves
+ */
+
+/* WARNING! Must be synced with generate-command-code.py and RedisModuleKeySpecBeginSearchType */
+typedef enum {
+ KSPEC_BS_INVALID = 0, /* Must be 0 */
+ KSPEC_BS_UNKNOWN,
+ KSPEC_BS_INDEX,
+ KSPEC_BS_KEYWORD
+} kspec_bs_type;
+
+/* WARNING! Must be synced with generate-command-code.py and RedisModuleKeySpecFindKeysType */
+typedef enum {
+ KSPEC_FK_INVALID = 0, /* Must be 0 */
+ KSPEC_FK_UNKNOWN,
+ KSPEC_FK_RANGE,
+ KSPEC_FK_KEYNUM
+} kspec_fk_type;
+
+/* WARNING! This struct must match RedisModuleCommandKeySpec */
+typedef struct {
+ /* Declarative data */
+ const char *notes;
+ uint64_t flags;
+ kspec_bs_type begin_search_type;
+ union {
+ struct {
+ /* The index from which we start the search for keys */
+ int pos;
+ } index;
+ struct {
+ /* The keyword that indicates the beginning of key args */
+ const char *keyword;
+ /* An index in argv from which to start searching.
+ * Can be negative, which means start search from the end, in reverse
+ * (Example: -2 means to start in reverse from the penultimate arg) */
+ int startfrom;
+ } keyword;
+ } bs;
+ kspec_fk_type find_keys_type;
+ union {
+ /* NOTE: Indices in this struct are relative to the result of the begin_search step!
+ * These are: range.lastkey, keynum.keynumidx, keynum.firstkey */
+ struct {
+ /* Index of the last key.
+ * Can be negative, in which case it's not relative. -1 indicating till the last argument,
+ * -2 one before the last and so on. */
+ int lastkey;
+ /* How many args should we skip after finding a key, in order to find the next one. */
+ int keystep;
+ /* If lastkey is -1, we use limit to stop the search by a factor. 0 and 1 mean no limit.
+ * 2 means 1/2 of the remaining args, 3 means 1/3, and so on. */
+ int limit;
+ } range;
+ struct {
+ /* Index of the argument containing the number of keys to come */
+ int keynumidx;
+ /* Index of the fist key (Usually it's just after keynumidx, in
+ * which case it should be set to keynumidx+1). */
+ int firstkey;
+ /* How many args should we skip after finding a key, in order to find the next one. */
+ int keystep;
+ } keynum;
+ } fk;
+} keySpec;
+
+#ifdef LOG_REQ_RES
+
+/* Must be synced with generate-command-code.py */
+typedef enum {
+ JSON_TYPE_STRING,
+ JSON_TYPE_INTEGER,
+ JSON_TYPE_BOOLEAN,
+ JSON_TYPE_OBJECT,
+ JSON_TYPE_ARRAY,
+} jsonType;
+
+typedef struct jsonObjectElement {
+ jsonType type;
+ const char *key;
+ union {
+ const char *string;
+ long long integer;
+ int boolean;
+ struct jsonObject *object;
+ struct {
+ struct jsonObject **objects;
+ int length;
+ } array;
+ } value;
+} jsonObjectElement;
+
+typedef struct jsonObject {
+ struct jsonObjectElement *elements;
+ int length;
+} jsonObject;
+
+#endif
+
+/* WARNING! This struct must match RedisModuleCommandHistoryEntry */
+typedef struct {
+ const char *since;
+ const char *changes;
+} commandHistory;
+
+/* Must be synced with COMMAND_GROUP_STR and generate-command-code.py */
+typedef enum {
+ COMMAND_GROUP_GENERIC,
+ COMMAND_GROUP_STRING,
+ COMMAND_GROUP_LIST,
+ COMMAND_GROUP_SET,
+ COMMAND_GROUP_SORTED_SET,
+ COMMAND_GROUP_HASH,
+ COMMAND_GROUP_PUBSUB,
+ COMMAND_GROUP_TRANSACTIONS,
+ COMMAND_GROUP_CONNECTION,
+ COMMAND_GROUP_SERVER,
+ COMMAND_GROUP_SCRIPTING,
+ COMMAND_GROUP_HYPERLOGLOG,
+ COMMAND_GROUP_CLUSTER,
+ COMMAND_GROUP_SENTINEL,
+ COMMAND_GROUP_GEO,
+ COMMAND_GROUP_STREAM,
+ COMMAND_GROUP_BITMAP,
+ COMMAND_GROUP_MODULE,
+} redisCommandGroup;
+
+typedef void redisCommandProc(client *c);
+typedef int redisGetKeysProc(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+
+/* Redis command structure.
+ *
+ * Note that the command table is in commands.c and it is auto-generated.
+ *
+ * This is the meaning of the flags:
+ *
+ * CMD_WRITE: Write command (may modify the key space).
+ *
+ * CMD_READONLY: Commands just reading from keys without changing the content.
+ * Note that commands that don't read from the keyspace such as
+ * TIME, SELECT, INFO, administrative commands, and connection
+ * or transaction related commands (multi, exec, discard, ...)
+ * are not flagged as read-only commands, since they affect the
+ * server or the connection in other ways.
+ *
+ * CMD_DENYOOM: May increase memory usage once called. Don't allow if out
+ * of memory.
+ *
+ * CMD_ADMIN: Administrative command, like SAVE or SHUTDOWN.
+ *
+ * CMD_PUBSUB: Pub/Sub related command.
+ *
+ * CMD_NOSCRIPT: Command not allowed in scripts.
+ *
+ * CMD_BLOCKING: The command has the potential to block the client.
+ *
+ * CMD_LOADING: Allow the command while loading the database.
+ *
+ * CMD_NO_ASYNC_LOADING: Deny during async loading (when a replica uses diskless
+ * sync swapdb, and allows access to the old dataset)
+ *
+ * CMD_STALE: Allow the command while a slave has stale data but is not
+ * allowed to serve this data. Normally no command is accepted
+ * in this condition but just a few.
+ *
+ * CMD_SKIP_MONITOR: Do not automatically propagate the command on MONITOR.
+ *
+ * CMD_SKIP_SLOWLOG: Do not automatically propagate the command to the slowlog.
+ *
+ * CMD_ASKING: Perform an implicit ASKING for this command, so the
+ * command will be accepted in cluster mode if the slot is marked
+ * as 'importing'.
+ *
+ * CMD_FAST: Fast command: O(1) or O(log(N)) command that should never
+ * delay its execution as long as the kernel scheduler is giving
+ * us time. Note that commands that may trigger a DEL as a side
+ * effect (like SET) are not fast commands.
+ *
+ * CMD_NO_AUTH: Command doesn't require authentication
+ *
+ * CMD_MAY_REPLICATE: Command may produce replication traffic, but should be
+ * allowed under circumstances where write commands are disallowed.
+ * Examples include PUBLISH, which replicates pubsub messages,and
+ * EVAL, which may execute write commands, which are replicated,
+ * or may just execute read commands. A command can not be marked
+ * both CMD_WRITE and CMD_MAY_REPLICATE
+ *
+ * CMD_SENTINEL: This command is present in sentinel mode.
+ *
+ * CMD_ONLY_SENTINEL: This command is present only when in sentinel mode.
+ * And should be removed from redis.
+ *
+ * CMD_NO_MANDATORY_KEYS: This key arguments for this command are optional.
+ *
+ * CMD_NO_MULTI: The command is not allowed inside a transaction
+ *
+ * CMD_ALLOW_BUSY: The command can run while another command is running for
+ * a long time (timedout script, module command that yields)
+ *
+ * CMD_TOUCHES_ARBITRARY_KEYS: The command may touch (and cause lazy-expire)
+ * arbitrary key (i.e not provided in argv)
+ *
+ * The following additional flags are only used in order to put commands
+ * in a specific ACL category. Commands can have multiple ACL categories.
+ * See redis.conf for the exact meaning of each.
+ *
+ * @keyspace, @read, @write, @set, @sortedset, @list, @hash, @string, @bitmap,
+ * @hyperloglog, @stream, @admin, @fast, @slow, @pubsub, @blocking, @dangerous,
+ * @connection, @transaction, @scripting, @geo.
+ *
+ * Note that:
+ *
+ * 1) The read-only flag implies the @read ACL category.
+ * 2) The write flag implies the @write ACL category.
+ * 3) The fast flag implies the @fast ACL category.
+ * 4) The admin flag implies the @admin and @dangerous ACL category.
+ * 5) The pub-sub flag implies the @pubsub ACL category.
+ * 6) The lack of fast flag implies the @slow ACL category.
+ * 7) The non obvious "keyspace" category includes the commands
+ * that interact with keys without having anything to do with
+ * specific data structures, such as: DEL, RENAME, MOVE, SELECT,
+ * TYPE, EXPIRE*, PEXPIRE*, TTL, PTTL, ...
+ */
+struct redisCommand {
+ /* Declarative data */
+ const char *declared_name; /* A string representing the command declared_name.
+ * It is a const char * for native commands and SDS for module commands. */
+ const char *summary; /* Summary of the command (optional). */
+ const char *complexity; /* Complexity description (optional). */
+ const char *since; /* Debut version of the command (optional). */
+ int doc_flags; /* Flags for documentation (see CMD_DOC_*). */
+ const char *replaced_by; /* In case the command is deprecated, this is the successor command. */
+ const char *deprecated_since; /* In case the command is deprecated, when did it happen? */
+ redisCommandGroup group; /* Command group */
+ commandHistory *history; /* History of the command */
+ int num_history;
+ const char **tips; /* An array of strings that are meant to be tips for clients/proxies regarding this command */
+ int num_tips;
+ redisCommandProc *proc; /* Command implementation */
+ int arity; /* Number of arguments, it is possible to use -N to say >= N */
+ uint64_t flags; /* Command flags, see CMD_*. */
+ uint64_t acl_categories; /* ACl categories, see ACL_CATEGORY_*. */
+ keySpec *key_specs;
+ int key_specs_num;
+ /* Use a function to determine keys arguments in a command line.
+ * Used for Redis Cluster redirect (may be NULL) */
+ redisGetKeysProc *getkeys_proc;
+ int num_args; /* Length of args array. */
+ /* Array of subcommands (may be NULL) */
+ struct redisCommand *subcommands;
+ /* Array of arguments (may be NULL) */
+ struct redisCommandArg *args;
+#ifdef LOG_REQ_RES
+ /* Reply schema */
+ struct jsonObject *reply_schema;
+#endif
+
+ /* Runtime populated data */
+ long long microseconds, calls, rejected_calls, failed_calls;
+ int id; /* Command ID. This is a progressive ID starting from 0 that
+ is assigned at runtime, and is used in order to check
+ ACLs. A connection is able to execute a given command if
+ the user associated to the connection has this command
+ bit set in the bitmap of allowed commands. */
+ sds fullname; /* A SDS string representing the command fullname. */
+ struct hdr_histogram* latency_histogram; /*points to the command latency command histogram (unit of time nanosecond) */
+ keySpec legacy_range_key_spec; /* The legacy (first,last,step) key spec is
+ * still maintained (if applicable) so that
+ * we can still support the reply format of
+ * COMMAND INFO and COMMAND GETKEYS */
+ dict *subcommands_dict; /* A dictionary that holds the subcommands, the key is the subcommand sds name
+ * (not the fullname), and the value is the redisCommand structure pointer. */
+ struct redisCommand *parent;
+ struct RedisModuleCommand *module_cmd; /* A pointer to the module command data (NULL if native command) */
+};
+
+struct redisError {
+ long long count;
+};
+
+struct redisFunctionSym {
+ char *name;
+ unsigned long pointer;
+};
+
+typedef struct _redisSortObject {
+ robj *obj;
+ union {
+ double score;
+ robj *cmpobj;
+ } u;
+} redisSortObject;
+
+typedef struct _redisSortOperation {
+ int type;
+ robj *pattern;
+} redisSortOperation;
+
+/* Structure to hold list iteration abstraction. */
+typedef struct {
+ robj *subject;
+ unsigned char encoding;
+ unsigned char direction; /* Iteration direction */
+
+ unsigned char *lpi; /* listpack iterator */
+ quicklistIter *iter; /* quicklist iterator */
+} listTypeIterator;
+
+/* Structure for an entry while iterating over a list. */
+typedef struct {
+ listTypeIterator *li;
+ unsigned char *lpe; /* Entry in listpack */
+ quicklistEntry entry; /* Entry in quicklist */
+} listTypeEntry;
+
+/* Structure to hold set iteration abstraction. */
+typedef struct {
+ robj *subject;
+ int encoding;
+ int ii; /* intset iterator */
+ dictIterator *di;
+ unsigned char *lpi; /* listpack iterator */
+} setTypeIterator;
+
+/* Structure to hold hash iteration abstraction. Note that iteration over
+ * hashes involves both fields and values. Because it is possible that
+ * not both are required, store pointers in the iterator to avoid
+ * unnecessary memory allocation for fields/values. */
+typedef struct {
+ robj *subject;
+ int encoding;
+
+ unsigned char *fptr, *vptr;
+
+ dictIterator *di;
+ dictEntry *de;
+} hashTypeIterator;
+
+#include "stream.h" /* Stream data type header file. */
+
+#define OBJ_HASH_KEY 1
+#define OBJ_HASH_VALUE 2
+
+#define IO_THREADS_OP_IDLE 0
+#define IO_THREADS_OP_READ 1
+#define IO_THREADS_OP_WRITE 2
+extern int io_threads_op;
+
+/*-----------------------------------------------------------------------------
+ * Extern declarations
+ *----------------------------------------------------------------------------*/
+
+extern struct redisServer server;
+extern struct sharedObjectsStruct shared;
+extern dictType objectKeyPointerValueDictType;
+extern dictType objectKeyHeapPointerValueDictType;
+extern dictType setDictType;
+extern dictType BenchmarkDictType;
+extern dictType zsetDictType;
+extern dictType dbDictType;
+extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
+extern dictType hashDictType;
+extern dictType stringSetDictType;
+extern dictType externalStringType;
+extern dictType sdsHashDictType;
+extern dictType dbExpiresDictType;
+extern dictType modulesDictType;
+extern dictType sdsReplyDictType;
+extern dict *modules;
+
+/*-----------------------------------------------------------------------------
+ * Functions prototypes
+ *----------------------------------------------------------------------------*/
+
+/* Command metadata */
+void populateCommandLegacyRangeSpec(struct redisCommand *c);
+
+/* Modules */
+void moduleInitModulesSystem(void);
+void moduleInitModulesSystemLast(void);
+void modulesCron(void);
+int moduleLoad(const char *path, void **argv, int argc, int is_loadex);
+int moduleUnload(sds name, const char **errmsg);
+void moduleLoadFromQueue(void);
+int moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int moduleGetCommandChannelsViaAPI(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+moduleType *moduleTypeLookupModuleByID(uint64_t id);
+moduleType *moduleTypeLookupModuleByName(const char *name);
+moduleType *moduleTypeLookupModuleByNameIgnoreCase(const char *name);
+void moduleTypeNameByID(char *name, uint64_t moduleid);
+const char *moduleTypeModuleName(moduleType *mt);
+const char *moduleNameFromCommand(struct redisCommand *cmd);
+void moduleFreeContext(struct RedisModuleCtx *ctx);
+void moduleCallCommandUnblockedHandler(client *c);
+void unblockClientFromModule(client *c);
+void moduleHandleBlockedClients(void);
+void moduleBlockedClientTimedOut(client *c);
+void modulePipeReadable(aeEventLoop *el, int fd, void *privdata, int mask);
+size_t moduleCount(void);
+void moduleAcquireGIL(void);
+int moduleTryAcquireGIL(void);
+void moduleReleaseGIL(void);
+void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid);
+void firePostExecutionUnitJobs(void);
+void moduleCallCommandFilters(client *c);
+void modulePostExecutionUnitOperations(void);
+void ModuleForkDoneHandler(int exitcode, int bysignal);
+int TerminateModuleForkChild(int child_pid, int wait);
+ssize_t rdbSaveModulesAux(rio *rdb, int when);
+int moduleAllDatatypesHandleErrors(void);
+int moduleAllModulesHandleReplAsyncLoad(void);
+sds modulesCollectInfo(sds info, dict *sections_dict, int for_crash_report, int sections);
+void moduleFireServerEvent(uint64_t eid, int subid, void *data);
+void processModuleLoadingProgressEvent(int is_aof);
+int moduleTryServeClientBlockedOnKey(client *c, robj *key);
+void moduleUnblockClient(client *c);
+int moduleBlockedClientMayTimeout(client *c);
+int moduleClientIsBlockedOnKeys(client *c);
+void moduleNotifyUserChanged(client *c);
+void moduleNotifyKeyUnlink(robj *key, robj *val, int dbid, int flags);
+size_t moduleGetFreeEffort(robj *key, robj *val, int dbid);
+size_t moduleGetMemUsage(robj *key, robj *val, size_t sample_size, int dbid);
+robj *moduleTypeDupOrReply(client *c, robj *fromkey, robj *tokey, int todb, robj *value);
+int moduleDefragValue(robj *key, robj *obj, int dbid);
+int moduleLateDefrag(robj *key, robj *value, unsigned long *cursor, long long endtime, int dbid);
+void moduleDefragGlobals(void);
+void *moduleGetHandleByName(char *modulename);
+int moduleIsModuleCommand(void *module_handle, struct redisCommand *cmd);
+
+/* Utils */
+long long ustime(void);
+mstime_t mstime(void);
+mstime_t commandTimeSnapshot(void);
+void getRandomHexChars(char *p, size_t len);
+void getRandomBytes(unsigned char *p, size_t len);
+uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
+void exitFromChild(int retcode);
+long long redisPopcount(void *s, long count);
+int redisSetProcTitle(char *title);
+int validateProcTitleTemplate(const char *template);
+int redisCommunicateSystemd(const char *sd_notify_msg);
+void redisSetCpuAffinity(const char *cpulist);
+
+/* afterErrorReply flags */
+#define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL<<0) /* Indicating that we should not update
+ error stats after sending error reply */
+/* networking.c -- Networking and Client related operations */
+client *createClient(connection *conn);
+void freeClient(client *c);
+void freeClientAsync(client *c);
+void logInvalidUseAndFreeClientAsync(client *c, const char *fmt, ...);
+int beforeNextClient(client *c);
+void clearClientConnectionState(client *c);
+void resetClient(client *c);
+void freeClientOriginalArgv(client *c);
+void freeClientArgv(client *c);
+void sendReplyToClient(connection *conn);
+void *addReplyDeferredLen(client *c);
+void setDeferredArrayLen(client *c, void *node, long length);
+void setDeferredMapLen(client *c, void *node, long length);
+void setDeferredSetLen(client *c, void *node, long length);
+void setDeferredAttributeLen(client *c, void *node, long length);
+void setDeferredPushLen(client *c, void *node, long length);
+int processInputBuffer(client *c);
+void acceptCommonHandler(connection *conn, int flags, char *ip);
+void readQueryFromClient(connection *conn);
+int prepareClientToWrite(client *c);
+void addReplyNull(client *c);
+void addReplyNullArray(client *c);
+void addReplyBool(client *c, int b);
+void addReplyVerbatim(client *c, const char *s, size_t len, const char *ext);
+void addReplyProto(client *c, const char *s, size_t len);
+void AddReplyFromClient(client *c, client *src);
+void addReplyBulk(client *c, robj *obj);
+void addReplyBulkCString(client *c, const char *s);
+void addReplyBulkCBuffer(client *c, const void *p, size_t len);
+void addReplyBulkLongLong(client *c, long long ll);
+void addReply(client *c, robj *obj);
+void addReplyStatusLength(client *c, const char *s, size_t len);
+void addReplySds(client *c, sds s);
+void addReplyBulkSds(client *c, sds s);
+void setDeferredReplyBulkSds(client *c, void *node, sds s);
+void addReplyErrorObject(client *c, robj *err);
+void addReplyOrErrorObject(client *c, robj *reply);
+void afterErrorReply(client *c, const char *s, size_t len, int flags);
+void addReplyErrorFormatInternal(client *c, int flags, const char *fmt, va_list ap);
+void addReplyErrorSdsEx(client *c, sds err, int flags);
+void addReplyErrorSds(client *c, sds err);
+void addReplyErrorSdsSafe(client *c, sds err);
+void addReplyError(client *c, const char *err);
+void addReplyErrorArity(client *c);
+void addReplyErrorExpireTime(client *c);
+void addReplyStatus(client *c, const char *status);
+void addReplyDouble(client *c, double d);
+void addReplyLongLongWithPrefix(client *c, long long ll, char prefix);
+void addReplyBigNum(client *c, const char* num, size_t len);
+void addReplyHumanLongDouble(client *c, long double d);
+void addReplyLongLong(client *c, long long ll);
+void addReplyArrayLen(client *c, long length);
+void addReplyMapLen(client *c, long length);
+void addReplySetLen(client *c, long length);
+void addReplyAttributeLen(client *c, long length);
+void addReplyPushLen(client *c, long length);
+void addReplyHelp(client *c, const char **help);
+void addReplySubcommandSyntaxError(client *c);
+void addReplyLoadedModules(client *c);
+void copyReplicaOutputBuffer(client *dst, client *src);
+void addListRangeReply(client *c, robj *o, long start, long end, int reverse);
+void deferredAfterErrorReply(client *c, list *errors);
+size_t sdsZmallocSize(sds s);
+size_t getStringObjectSdsUsedMemory(robj *o);
+void freeClientReplyValue(void *o);
+void *dupClientReplyValue(void *o);
+char *getClientPeerId(client *client);
+char *getClientSockName(client *client);
+sds catClientInfoString(sds s, client *client);
+sds getAllClientsInfoString(int type);
+int clientSetName(client *c, robj *name, const char **err);
+void rewriteClientCommandVector(client *c, int argc, ...);
+void rewriteClientCommandArgument(client *c, int i, robj *newval);
+void replaceClientCommandVector(client *c, int argc, robj **argv);
+void redactClientCommandArgument(client *c, int argc);
+size_t getClientOutputBufferMemoryUsage(client *c);
+size_t getClientMemoryUsage(client *c, size_t *output_buffer_mem_usage);
+int freeClientsInAsyncFreeQueue(void);
+int closeClientOnOutputBufferLimitReached(client *c, int async);
+int getClientType(client *c);
+int getClientTypeByName(char *name);
+char *getClientTypeName(int class);
+void flushSlavesOutputBuffers(void);
+void disconnectSlaves(void);
+void evictClients(void);
+int listenToPort(connListener *fds);
+void pauseActions(pause_purpose purpose, mstime_t end, uint32_t actions_bitmask);
+void unpauseActions(pause_purpose purpose);
+uint32_t isPausedActions(uint32_t action_bitmask);
+uint32_t isPausedActionsWithUpdate(uint32_t action_bitmask);
+void updatePausedActions(void);
+void unblockPostponedClients(void);
+void processEventsWhileBlocked(void);
+void whileBlockedCron(void);
+void blockingOperationStarts(void);
+void blockingOperationEnds(void);
+int handleClientsWithPendingWrites(void);
+int handleClientsWithPendingWritesUsingThreads(void);
+int handleClientsWithPendingReadsUsingThreads(void);
+int stopThreadedIOIfNeeded(void);
+int clientHasPendingReplies(client *c);
+int updateClientMemUsageAndBucket(client *c);
+void removeClientFromMemUsageBucket(client *c, int allow_eviction);
+void unlinkClient(client *c);
+int writeToClient(client *c, int handler_installed);
+void linkClient(client *c);
+void protectClient(client *c);
+void unprotectClient(client *c);
+void initThreadedIO(void);
+client *lookupClientByID(uint64_t id);
+int authRequired(client *c);
+void putClientInPendingWriteQueue(client *c);
+
+/* logreqres.c - logging of requests and responses */
+void reqresReset(client *c, int free_buf);
+void reqresSaveClientReplyOffset(client *c);
+size_t reqresAppendRequest(client *c);
+size_t reqresAppendResponse(client *c);
+
+#ifdef __GNUC__
+void addReplyErrorFormatEx(client *c, int flags, const char *fmt, ...)
+ __attribute__((format(printf, 3, 4)));
+void addReplyErrorFormat(client *c, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+void addReplyStatusFormat(client *c, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+#else
+void addReplyErrorFormatEx(client *c, int flags, const char *fmt, ...);
+void addReplyErrorFormat(client *c, const char *fmt, ...);
+void addReplyStatusFormat(client *c, const char *fmt, ...);
+#endif
+
+/* Client side caching (tracking mode) */
+void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **prefix, size_t numprefix);
+void disableTracking(client *c);
+void trackingRememberKeys(client *tracking, client *executing);
+void trackingInvalidateKey(client *c, robj *keyobj, int bcast);
+void trackingScheduleKeyInvalidation(uint64_t client_id, robj *keyobj);
+void trackingHandlePendingKeyInvalidations(void);
+void trackingInvalidateKeysOnFlush(int async);
+void freeTrackingRadixTree(rax *rt);
+void freeTrackingRadixTreeAsync(rax *rt);
+void trackingLimitUsedSlots(void);
+uint64_t trackingGetTotalItems(void);
+uint64_t trackingGetTotalKeys(void);
+uint64_t trackingGetTotalPrefixes(void);
+void trackingBroadcastInvalidationMessages(void);
+int checkPrefixCollisionsOrReply(client *c, robj **prefix, size_t numprefix);
+
+/* List data type */
+void listTypePush(robj *subject, robj *value, int where);
+robj *listTypePop(robj *subject, int where);
+unsigned long listTypeLength(const robj *subject);
+listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction);
+void listTypeReleaseIterator(listTypeIterator *li);
+void listTypeSetIteratorDirection(listTypeIterator *li, listTypeEntry *entry, unsigned char direction);
+int listTypeNext(listTypeIterator *li, listTypeEntry *entry);
+robj *listTypeGet(listTypeEntry *entry);
+unsigned char *listTypeGetValue(listTypeEntry *entry, size_t *vlen, long long *lval);
+void listTypeInsert(listTypeEntry *entry, robj *value, int where);
+void listTypeReplace(listTypeEntry *entry, robj *value);
+int listTypeEqual(listTypeEntry *entry, robj *o);
+void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry);
+robj *listTypeDup(robj *o);
+void listTypeDelRange(robj *o, long start, long stop);
+void popGenericCommand(client *c, int where);
+void listElementsRemoved(client *c, robj *key, int where, robj *o, long count, int signal, int *deleted);
+typedef enum {
+ LIST_CONV_AUTO,
+ LIST_CONV_GROWING,
+ LIST_CONV_SHRINKING,
+} list_conv_type;
+typedef void (*beforeConvertCB)(void *data);
+void listTypeTryConversion(robj *o, list_conv_type lct, beforeConvertCB fn, void *data);
+void listTypeTryConversionAppend(robj *o, robj **argv, int start, int end, beforeConvertCB fn, void *data);
+
+/* MULTI/EXEC/WATCH... */
+void unwatchAllKeys(client *c);
+void initClientMultiState(client *c);
+void freeClientMultiState(client *c);
+void queueMultiCommand(client *c, uint64_t cmd_flags);
+size_t multiStateMemOverhead(client *c);
+void touchWatchedKey(redisDb *db, robj *key);
+int isWatchedKeyExpired(client *c);
+void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with);
+void discardTransaction(client *c);
+void flagTransaction(client *c);
+void execCommandAbort(client *c, sds error);
+
+/* Redis object implementation */
+void decrRefCount(robj *o);
+void decrRefCountVoid(void *o);
+void incrRefCount(robj *o);
+robj *makeObjectShared(robj *o);
+void freeStringObject(robj *o);
+void freeListObject(robj *o);
+void freeSetObject(robj *o);
+void freeZsetObject(robj *o);
+void freeHashObject(robj *o);
+void dismissObject(robj *o, size_t dump_size);
+robj *createObject(int type, void *ptr);
+void initObjectLRUOrLFU(robj *o);
+robj *createStringObject(const char *ptr, size_t len);
+robj *createRawStringObject(const char *ptr, size_t len);
+robj *createEmbeddedStringObject(const char *ptr, size_t len);
+robj *tryCreateRawStringObject(const char *ptr, size_t len);
+robj *tryCreateStringObject(const char *ptr, size_t len);
+robj *dupStringObject(const robj *o);
+int isSdsRepresentableAsLongLong(sds s, long long *llval);
+int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
+robj *tryObjectEncoding(robj *o);
+robj *tryObjectEncodingEx(robj *o, int try_trim);
+robj *getDecodedObject(robj *o);
+size_t stringObjectLen(robj *o);
+robj *createStringObjectFromLongLong(long long value);
+robj *createStringObjectFromLongLongForValue(long long value);
+robj *createStringObjectFromLongLongWithSds(long long value);
+robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
+robj *createQuicklistObject(void);
+robj *createListListpackObject(void);
+robj *createSetObject(void);
+robj *createIntsetObject(void);
+robj *createSetListpackObject(void);
+robj *createHashObject(void);
+robj *createZsetObject(void);
+robj *createZsetListpackObject(void);
+robj *createStreamObject(void);
+robj *createModuleObject(moduleType *mt, void *value);
+int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg);
+int getPositiveLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg);
+int getRangeLongFromObjectOrReply(client *c, robj *o, long min, long max, long *target, const char *msg);
+int checkType(client *c, robj *o, int type);
+int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg);
+int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg);
+int getDoubleFromObject(const robj *o, double *target);
+int getLongLongFromObject(robj *o, long long *target);
+int getLongDoubleFromObject(robj *o, long double *target);
+int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg);
+int getIntFromObjectOrReply(client *c, robj *o, int *target, const char *msg);
+char *strEncoding(int encoding);
+int compareStringObjects(const robj *a, const robj *b);
+int collateStringObjects(const robj *a, const robj *b);
+int equalStringObjects(robj *a, robj *b);
+unsigned long long estimateObjectIdleTime(robj *o);
+void trimStringObjectIfNeeded(robj *o, int trim_small_values);
+#define sdsEncodedObject(objptr) (objptr->encoding == OBJ_ENCODING_RAW || objptr->encoding == OBJ_ENCODING_EMBSTR)
+
+/* Synchronous I/O with timeout */
+ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
+ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout);
+ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout);
+
+/* Replication */
+void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
+void replicationFeedStreamFromMasterStream(char *buf, size_t buflen);
+void resetReplicationBuffer(void);
+void feedReplicationBuffer(char *buf, size_t len);
+void freeReplicaReferencedReplBuffer(client *replica);
+void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc);
+void updateSlavesWaitingBgsave(int bgsaveerr, int type);
+void replicationCron(void);
+void replicationStartPendingFork(void);
+void replicationHandleMasterDisconnection(void);
+void replicationCacheMaster(client *c);
+void resizeReplicationBacklog(void);
+void replicationSetMaster(char *ip, int port);
+void replicationUnsetMaster(void);
+void refreshGoodSlavesCount(void);
+int checkGoodReplicasStatus(void);
+void processClientsWaitingReplicas(void);
+void unblockClientWaitingReplicas(client *c);
+int replicationCountAcksByOffset(long long offset);
+int replicationCountAOFAcksByOffset(long long offset);
+void replicationSendNewlineToMaster(void);
+long long replicationGetSlaveOffset(void);
+char *replicationGetSlaveName(client *c);
+long long getPsyncInitialOffset(void);
+int replicationSetupSlaveForFullResync(client *slave, long long offset);
+void changeReplicationId(void);
+void clearReplicationId2(void);
+void createReplicationBacklog(void);
+void freeReplicationBacklog(void);
+void replicationCacheMasterUsingMyself(void);
+void feedReplicationBacklog(void *ptr, size_t len);
+void incrementalTrimReplicationBacklog(size_t blocks);
+int canFeedReplicaReplBuffer(client *replica);
+void rebaseReplicationBuffer(long long base_repl_offset);
+void showLatestBacklog(void);
+void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask);
+void rdbPipeWriteHandlerConnRemoved(struct connection *conn);
+void clearFailoverState(void);
+void updateFailoverStatus(void);
+void abortFailover(const char *err);
+const char *getFailoverStateString(void);
+
+/* Generic persistence functions */
+void startLoadingFile(size_t size, char* filename, int rdbflags);
+void startLoading(size_t size, int rdbflags, int async);
+void loadingAbsProgress(off_t pos);
+void loadingIncrProgress(off_t size);
+void stopLoading(int success);
+void updateLoadingFileName(char* filename);
+void startSaving(int rdbflags);
+void stopSaving(int success);
+int allPersistenceDisabled(void);
+
+#define DISK_ERROR_TYPE_AOF 1 /* Don't accept writes: AOF errors. */
+#define DISK_ERROR_TYPE_RDB 2 /* Don't accept writes: RDB errors. */
+#define DISK_ERROR_TYPE_NONE 0 /* No problems, we can accept writes. */
+int writeCommandsDeniedByDiskError(void);
+sds writeCommandsGetDiskErrorMessage(int);
+
+/* RDB persistence */
+#include "rdb.h"
+void killRDBChild(void);
+int bg_unlink(const char *filename);
+
+/* AOF persistence */
+void flushAppendOnlyFile(int force);
+void feedAppendOnlyFile(int dictid, robj **argv, int argc);
+void aofRemoveTempFile(pid_t childpid);
+int rewriteAppendOnlyFileBackground(void);
+int loadAppendOnlyFiles(aofManifest *am);
+void stopAppendOnly(void);
+int startAppendOnly(void);
+void backgroundRewriteDoneHandler(int exitcode, int bysignal);
+void killAppendOnlyChild(void);
+void restartAOFAfterSYNC(void);
+void aofLoadManifestFromDisk(void);
+void aofOpenIfNeededOnServerStart(void);
+void aofManifestFree(aofManifest *am);
+int aofDelHistoryFiles(void);
+int aofRewriteLimited(void);
+
+/* Child info */
+void openChildInfoPipe(void);
+void closeChildInfoPipe(void);
+void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress, char *pname);
+void sendChildCowInfo(childInfoType info_type, char *pname);
+void sendChildInfo(childInfoType info_type, size_t keys, char *pname);
+void receiveChildInfo(void);
+
+/* Fork helpers */
+int redisFork(int purpose);
+int hasActiveChildProcess(void);
+void resetChildState(void);
+int isMutuallyExclusiveChildType(int type);
+
+/* acl.c -- Authentication related prototypes. */
+extern rax *Users;
+extern user *DefaultUser;
+void ACLInit(void);
+/* Return values for ACLCheckAllPerm(). */
+#define ACL_OK 0
+#define ACL_DENIED_CMD 1
+#define ACL_DENIED_KEY 2
+#define ACL_DENIED_AUTH 3 /* Only used for ACL LOG entries. */
+#define ACL_DENIED_CHANNEL 4 /* Only used for pub/sub commands */
+
+/* Context values for addACLLogEntry(). */
+#define ACL_LOG_CTX_TOPLEVEL 0
+#define ACL_LOG_CTX_LUA 1
+#define ACL_LOG_CTX_MULTI 2
+#define ACL_LOG_CTX_MODULE 3
+
+/* ACL key permission types */
+#define ACL_READ_PERMISSION (1<<0)
+#define ACL_WRITE_PERMISSION (1<<1)
+#define ACL_ALL_PERMISSION (ACL_READ_PERMISSION|ACL_WRITE_PERMISSION)
+
+/* Return codes for Authentication functions to indicate the result. */
+typedef enum {
+ AUTH_OK = 0,
+ AUTH_ERR,
+ AUTH_NOT_HANDLED,
+ AUTH_BLOCKED
+} AuthResult;
+
+int ACLCheckUserCredentials(robj *username, robj *password);
+int ACLAuthenticateUser(client *c, robj *username, robj *password, robj **err);
+int checkModuleAuthentication(client *c, robj *username, robj *password, robj **err);
+void addAuthErrReply(client *c, robj *err);
+unsigned long ACLGetCommandID(sds cmdname);
+void ACLClearCommandID(void);
+user *ACLGetUserByName(const char *name, size_t namelen);
+int ACLUserCheckKeyPerm(user *u, const char *key, int keylen, int flags);
+int ACLUserCheckChannelPerm(user *u, sds channel, int literal);
+int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, int argc, int *idxptr);
+int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, int flags);
+int ACLCheckAllPerm(client *c, int *idxptr);
+int ACLSetUser(user *u, const char *op, ssize_t oplen);
+sds ACLStringSetUser(user *u, sds username, sds *argv, int argc);
+uint64_t ACLGetCommandCategoryFlagByName(const char *name);
+int ACLAppendUserForLoading(sds *argv, int argc, int *argc_err);
+const char *ACLSetUserStringError(void);
+int ACLLoadConfiguredUsers(void);
+robj *ACLDescribeUser(user *u);
+void ACLLoadUsersAtStartup(void);
+void addReplyCommandCategories(client *c, struct redisCommand *cmd);
+user *ACLCreateUnlinkedUser(void);
+void ACLFreeUserAndKillClients(user *u);
+void addACLLogEntry(client *c, int reason, int context, int argpos, sds username, sds object);
+sds getAclErrorMessage(int acl_res, user *user, struct redisCommand *cmd, sds errored_val, int verbose);
+void ACLUpdateDefaultUserPassword(sds password);
+sds genRedisInfoStringACLStats(sds info);
+void ACLRecomputeCommandBitsFromCommandRulesAllUsers(void);
+
+/* Sorted sets data type */
+
+/* Input flags. */
+#define ZADD_IN_NONE 0
+#define ZADD_IN_INCR (1<<0) /* Increment the score instead of setting it. */
+#define ZADD_IN_NX (1<<1) /* Don't touch elements not already existing. */
+#define ZADD_IN_XX (1<<2) /* Only touch elements already existing. */
+#define ZADD_IN_GT (1<<3) /* Only update existing when new scores are higher. */
+#define ZADD_IN_LT (1<<4) /* Only update existing when new scores are lower. */
+
+/* Output flags. */
+#define ZADD_OUT_NOP (1<<0) /* Operation not performed because of conditionals.*/
+#define ZADD_OUT_NAN (1<<1) /* Only touch elements already existing. */
+#define ZADD_OUT_ADDED (1<<2) /* The element was new and was added. */
+#define ZADD_OUT_UPDATED (1<<3) /* The element already existed, score updated. */
+
+/* Struct to hold an inclusive/exclusive range spec by score comparison. */
+typedef struct {
+ double min, max;
+ int minex, maxex; /* are min or max exclusive? */
+} zrangespec;
+
+/* Struct to hold an inclusive/exclusive range spec by lexicographic comparison. */
+typedef struct {
+ sds min, max; /* May be set to shared.(minstring|maxstring) */
+ int minex, maxex; /* are min or max exclusive? */
+} zlexrangespec;
+
+/* flags for incrCommandFailedCalls */
+#define ERROR_COMMAND_REJECTED (1<<0) /* Indicate to update the command rejected stats */
+#define ERROR_COMMAND_FAILED (1<<1) /* Indicate to update the command failed stats */
+
+zskiplist *zslCreate(void);
+void zslFree(zskiplist *zsl);
+zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele);
+unsigned char *zzlInsert(unsigned char *zl, sds ele, double score);
+int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node);
+zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range);
+zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range);
+double zzlGetScore(unsigned char *sptr);
+void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
+void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
+unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range);
+unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range);
+unsigned long zsetLength(const robj *zobj);
+void zsetConvert(robj *zobj, int encoding);
+void zsetConvertToListpackIfNeeded(robj *zobj, size_t maxelelen, size_t totelelen);
+int zsetScore(robj *zobj, sds member, double *score);
+unsigned long zslGetRank(zskiplist *zsl, double score, sds o);
+int zsetAdd(robj *zobj, double score, sds ele, int in_flags, int *out_flags, double *newscore);
+long zsetRank(robj *zobj, sds ele, int reverse, double *score);
+int zsetDel(robj *zobj, sds ele);
+robj *zsetDup(robj *o);
+void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey, long count, int use_nested_array, int reply_nil_when_empty, int *deleted);
+sds lpGetObject(unsigned char *sptr);
+int zslValueGteMin(double value, zrangespec *spec);
+int zslValueLteMax(double value, zrangespec *spec);
+void zslFreeLexRange(zlexrangespec *spec);
+int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec);
+unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec *range);
+unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range);
+zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range);
+zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range);
+int zzlLexValueGteMin(unsigned char *p, zlexrangespec *spec);
+int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec);
+int zslLexValueGteMin(sds value, zlexrangespec *spec);
+int zslLexValueLteMax(sds value, zlexrangespec *spec);
+
+/* Core functions */
+int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level);
+size_t freeMemoryGetNotCountedMemory(void);
+int overMaxmemoryAfterAlloc(size_t moremem);
+uint64_t getCommandFlags(client *c);
+int processCommand(client *c);
+int processPendingCommandAndInputBuffer(client *c);
+int processCommandAndResetClient(client *c);
+void setupSignalHandlers(void);
+void removeSignalHandlers(void);
+int createSocketAcceptHandler(connListener *sfd, aeFileProc *accept_handler);
+connListener *listenerByType(const char *typename);
+int changeListener(connListener *listener);
+void closeListener(connListener *listener);
+struct redisCommand *lookupSubcommand(struct redisCommand *container, sds sub_name);
+struct redisCommand *lookupCommand(robj **argv, int argc);
+struct redisCommand *lookupCommandBySdsLogic(dict *commands, sds s);
+struct redisCommand *lookupCommandBySds(sds s);
+struct redisCommand *lookupCommandByCStringLogic(dict *commands, const char *s);
+struct redisCommand *lookupCommandByCString(const char *s);
+struct redisCommand *lookupCommandOrOriginal(robj **argv, int argc);
+int commandCheckExistence(client *c, sds *err);
+int commandCheckArity(client *c, sds *err);
+void startCommandExecution(void);
+int incrCommandStatsOnError(struct redisCommand *cmd, int flags);
+void call(client *c, int flags);
+void alsoPropagate(int dbid, robj **argv, int argc, int target);
+void postExecutionUnitOperations(void);
+void redisOpArrayFree(redisOpArray *oa);
+void forceCommandPropagation(client *c, int flags);
+void preventCommandPropagation(client *c);
+void preventCommandAOF(client *c);
+void preventCommandReplication(client *c);
+void slowlogPushCurrentCommand(client *c, struct redisCommand *cmd, ustime_t duration);
+void updateCommandLatencyHistogram(struct hdr_histogram** latency_histogram, int64_t duration_hist);
+int prepareForShutdown(int flags);
+void replyToClientsBlockedOnShutdown(void);
+int abortShutdown(void);
+void afterCommand(client *c);
+int mustObeyClient(client *c);
+#ifdef __GNUC__
+void _serverLog(int level, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+#else
+void _serverLog(int level, const char *fmt, ...);
+#endif
+void serverLogRaw(int level, const char *msg);
+void serverLogFromHandler(int level, const char *msg);
+void usage(void);
+void updateDictResizePolicy(void);
+int htNeedsResize(dict *dict);
+void populateCommandTable(void);
+void resetCommandTableStats(dict* commands);
+void resetErrorTableStats(void);
+void adjustOpenFilesLimit(void);
+void incrementErrorCount(const char *fullerr, size_t namelen);
+void closeListeningSockets(int unlink_unix_socket);
+void updateCachedTime(int update_daylight_info);
+void enterExecutionUnit(int update_cached_time, long long us);
+void exitExecutionUnit(void);
+void resetServerStats(void);
+void activeDefragCycle(void);
+unsigned int getLRUClock(void);
+unsigned int LRU_CLOCK(void);
+const char *evictPolicyToString(void);
+struct redisMemOverhead *getMemoryOverheadData(void);
+void freeMemoryOverheadData(struct redisMemOverhead *mh);
+void checkChildrenDone(void);
+int setOOMScoreAdj(int process_class);
+void rejectCommandFormat(client *c, const char *fmt, ...);
+void *activeDefragAlloc(void *ptr);
+robj *activeDefragStringOb(robj* ob);
+void dismissSds(sds s);
+void dismissMemory(void* ptr, size_t size_hint);
+void dismissMemoryInChild(void);
+
+#define RESTART_SERVER_NONE 0
+#define RESTART_SERVER_GRACEFULLY (1<<0) /* Do proper shutdown. */
+#define RESTART_SERVER_CONFIG_REWRITE (1<<1) /* CONFIG REWRITE before restart.*/
+int restartServer(int flags, mstime_t delay);
+
+/* Set data type */
+robj *setTypeCreate(sds value, size_t size_hint);
+int setTypeAdd(robj *subject, sds value);
+int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
+int setTypeRemove(robj *subject, sds value);
+int setTypeRemoveAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
+int setTypeIsMember(robj *subject, sds value);
+int setTypeIsMemberAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
+setTypeIterator *setTypeInitIterator(robj *subject);
+void setTypeReleaseIterator(setTypeIterator *si);
+int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele);
+sds setTypeNextObject(setTypeIterator *si);
+int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele);
+unsigned long setTypeSize(const robj *subject);
+void setTypeConvert(robj *subject, int enc);
+int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic);
+robj *setTypeDup(robj *o);
+
+/* Hash data type */
+#define HASH_SET_TAKE_FIELD (1<<0)
+#define HASH_SET_TAKE_VALUE (1<<1)
+#define HASH_SET_COPY 0
+
+void hashTypeConvert(robj *o, int enc);
+void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
+int hashTypeExists(robj *o, sds key);
+int hashTypeDelete(robj *o, sds key);
+unsigned long hashTypeLength(const robj *o);
+hashTypeIterator *hashTypeInitIterator(robj *subject);
+void hashTypeReleaseIterator(hashTypeIterator *hi);
+int hashTypeNext(hashTypeIterator *hi);
+void hashTypeCurrentFromListpack(hashTypeIterator *hi, int what,
+ unsigned char **vstr,
+ unsigned int *vlen,
+ long long *vll);
+sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what);
+void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll);
+sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what);
+robj *hashTypeLookupWriteOrCreate(client *c, robj *key);
+robj *hashTypeGetValueObject(robj *o, sds field);
+int hashTypeSet(robj *o, sds field, sds value, int flags);
+robj *hashTypeDup(robj *o);
+
+/* Pub / Sub */
+int pubsubUnsubscribeAllChannels(client *c, int notify);
+int pubsubUnsubscribeShardAllChannels(client *c, int notify);
+void pubsubUnsubscribeShardChannels(robj **channels, unsigned int count);
+int pubsubUnsubscribeAllPatterns(client *c, int notify);
+int pubsubPublishMessage(robj *channel, robj *message, int sharded);
+int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded);
+void addReplyPubsubMessage(client *c, robj *channel, robj *msg, robj *message_bulk);
+int serverPubsubSubscriptionCount(void);
+int serverPubsubShardSubscriptionCount(void);
+size_t pubsubMemOverhead(client *c);
+
+/* Keyspace events notification */
+void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid);
+int keyspaceEventsStringToFlags(char *classes);
+sds keyspaceEventsFlagsToString(int flags);
+
+/* Configuration */
+/* Configuration Flags */
+#define MODIFIABLE_CONFIG 0 /* This is the implied default for a standard
+ * config, which is mutable. */
+#define IMMUTABLE_CONFIG (1ULL<<0) /* Can this value only be set at startup? */
+#define SENSITIVE_CONFIG (1ULL<<1) /* Does this value contain sensitive information */
+#define DEBUG_CONFIG (1ULL<<2) /* Values that are useful for debugging. */
+#define MULTI_ARG_CONFIG (1ULL<<3) /* This config receives multiple arguments. */
+#define HIDDEN_CONFIG (1ULL<<4) /* This config is hidden in `config get <pattern>` (used for tests/debugging) */
+#define PROTECTED_CONFIG (1ULL<<5) /* Becomes immutable if enable-protected-configs is enabled. */
+#define DENY_LOADING_CONFIG (1ULL<<6) /* This config is forbidden during loading. */
+#define ALIAS_CONFIG (1ULL<<7) /* For configs with multiple names, this flag is set on the alias. */
+#define MODULE_CONFIG (1ULL<<8) /* This config is a module config */
+#define VOLATILE_CONFIG (1ULL<<9) /* The config is a reference to the config data and not the config data itself (ex.
+ * a file name containing more configuration like a tls key). In this case we want
+ * to apply the configuration change even if the new config value is the same as
+ * the old. */
+
+#define INTEGER_CONFIG 0 /* No flags means a simple integer configuration */
+#define MEMORY_CONFIG (1<<0) /* Indicates if this value can be loaded as a memory value */
+#define PERCENT_CONFIG (1<<1) /* Indicates if this value can be loaded as a percent (and stored as a negative int) */
+#define OCTAL_CONFIG (1<<2) /* This value uses octal representation */
+
+/* Enum Configs contain an array of configEnum objects that match a string with an integer. */
+typedef struct configEnum {
+ char *name;
+ int val;
+} configEnum;
+
+/* Type of configuration. */
+typedef enum {
+ BOOL_CONFIG,
+ NUMERIC_CONFIG,
+ STRING_CONFIG,
+ SDS_CONFIG,
+ ENUM_CONFIG,
+ SPECIAL_CONFIG,
+} configType;
+
+void loadServerConfig(char *filename, char config_from_stdin, char *options);
+void appendServerSaveParams(time_t seconds, int changes);
+void resetServerSaveParams(void);
+struct rewriteConfigState; /* Forward declaration to export API. */
+int rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force);
+void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char *option);
+int rewriteConfig(char *path, int force_write);
+void initConfigValues(void);
+void removeConfig(sds name);
+sds getConfigDebugInfo(void);
+int allowProtectedAction(int config, client *c);
+void initServerClientMemUsageBuckets(void);
+void freeServerClientMemUsageBuckets(void);
+
+/* Module Configuration */
+typedef struct ModuleConfig ModuleConfig;
+int performModuleConfigSetFromName(sds name, sds value, const char **err);
+int performModuleConfigSetDefaultFromName(sds name, const char **err);
+void addModuleBoolConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val);
+void addModuleStringConfig(const char *module_name, const char *name, int flags, void *privdata, sds default_val);
+void addModuleEnumConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val, configEnum *enum_vals);
+void addModuleNumericConfig(const char *module_name, const char *name, int flags, void *privdata, long long default_val, int conf_flags, long long lower, long long upper);
+void addModuleConfigApply(list *module_configs, ModuleConfig *module_config);
+int moduleConfigApplyConfig(list *module_configs, const char **err, const char **err_arg_name);
+int getModuleBoolConfig(ModuleConfig *module_config);
+int setModuleBoolConfig(ModuleConfig *config, int val, const char **err);
+sds getModuleStringConfig(ModuleConfig *module_config);
+int setModuleStringConfig(ModuleConfig *config, sds strval, const char **err);
+int getModuleEnumConfig(ModuleConfig *module_config);
+int setModuleEnumConfig(ModuleConfig *config, int val, const char **err);
+long long getModuleNumericConfig(ModuleConfig *module_config);
+int setModuleNumericConfig(ModuleConfig *config, long long val, const char **err);
+
+/* db.c -- Keyspace access API */
+int removeExpire(redisDb *db, robj *key);
+void deleteExpiredKeyAndPropagate(redisDb *db, robj *keyobj);
+void propagateDeletion(redisDb *db, robj *key, int lazy);
+int keyIsExpired(redisDb *db, robj *key);
+long long getExpire(redisDb *db, robj *key);
+void setExpire(client *c, redisDb *db, robj *key, long long when);
+int checkAlreadyExpired(long long when);
+robj *lookupKeyRead(redisDb *db, robj *key);
+robj *lookupKeyWrite(redisDb *db, robj *key);
+robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply);
+robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply);
+robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags);
+robj *lookupKeyWriteWithFlags(redisDb *db, robj *key, int flags);
+robj *objectCommandLookup(client *c, robj *key);
+robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply);
+int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
+ long long lru_clock, int lru_multiplier);
+#define LOOKUP_NONE 0
+#define LOOKUP_NOTOUCH (1<<0) /* Don't update LRU. */
+#define LOOKUP_NONOTIFY (1<<1) /* Don't trigger keyspace event on key misses. */
+#define LOOKUP_NOSTATS (1<<2) /* Don't update keyspace hits/misses counters. */
+#define LOOKUP_WRITE (1<<3) /* Delete expired keys even in replicas. */
+#define LOOKUP_NOEXPIRE (1<<4) /* Avoid deleting lazy expired keys. */
+#define LOOKUP_NOEFFECTS (LOOKUP_NONOTIFY | LOOKUP_NOSTATS | LOOKUP_NOTOUCH | LOOKUP_NOEXPIRE) /* Avoid any effects from fetching the key */
+
+void dbAdd(redisDb *db, robj *key, robj *val);
+int dbAddRDBLoad(redisDb *db, sds key, robj *val);
+void dbReplaceValue(redisDb *db, robj *key, robj *val);
+
+#define SETKEY_KEEPTTL 1
+#define SETKEY_NO_SIGNAL 2
+#define SETKEY_ALREADY_EXIST 4
+#define SETKEY_DOESNT_EXIST 8
+#define SETKEY_ADD_OR_UPDATE 16 /* Key most likely doesn't exists */
+void setKey(client *c, redisDb *db, robj *key, robj *val, int flags);
+robj *dbRandomKey(redisDb *db);
+int dbGenericDelete(redisDb *db, robj *key, int async, int flags);
+int dbSyncDelete(redisDb *db, robj *key);
+int dbDelete(redisDb *db, robj *key);
+robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o);
+
+#define EMPTYDB_NO_FLAGS 0 /* No flags. */
+#define EMPTYDB_ASYNC (1<<0) /* Reclaim memory in another thread. */
+#define EMPTYDB_NOFUNCTIONS (1<<1) /* Indicate not to flush the functions. */
+long long emptyData(int dbnum, int flags, void(callback)(dict*));
+long long emptyDbStructure(redisDb *dbarray, int dbnum, int async, void(callback)(dict*));
+void flushAllDataAndResetRDB(int flags);
+long long dbTotalServerKeyCount(void);
+redisDb *initTempDb(void);
+void discardTempDb(redisDb *tempDb, void(callback)(dict*));
+
+
+int selectDb(client *c, int id);
+void signalModifiedKey(client *c, redisDb *db, robj *key);
+void signalFlushedDb(int dbid, int async);
+void scanGenericCommand(client *c, robj *o, unsigned long cursor);
+int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor);
+int dbAsyncDelete(redisDb *db, robj *key);
+void emptyDbAsync(redisDb *db);
+size_t lazyfreeGetPendingObjectsCount(void);
+size_t lazyfreeGetFreedObjectsCount(void);
+void lazyfreeResetStats(void);
+void freeObjAsync(robj *key, robj *obj, int dbid);
+void freeReplicationBacklogRefMemAsync(list *blocks, rax *index);
+
+/* API to get key arguments from commands */
+#define GET_KEYSPEC_DEFAULT 0
+#define GET_KEYSPEC_INCLUDE_NOT_KEYS (1<<0) /* Consider 'fake' keys as keys */
+#define GET_KEYSPEC_RETURN_PARTIAL (1<<1) /* Return all keys that can be found */
+
+int getKeysFromCommandWithSpecs(struct redisCommand *cmd, robj **argv, int argc, int search_flags, getKeysResult *result);
+keyReference *getKeysPrepareResult(getKeysResult *result, int numkeys);
+int getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int doesCommandHaveKeys(struct redisCommand *cmd);
+int getChannelsFromCommand(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int doesCommandHaveChannelsWithFlags(struct redisCommand *cmd, int flags);
+void getKeysFreeResult(getKeysResult *result);
+int sintercardGetKeys(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result);
+int zunionInterDiffGetKeys(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result);
+int zunionInterDiffStoreGetKeys(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result);
+int evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int functionGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int sortROGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int lmpopGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int blmpopGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int zmpopGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int bzmpopGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int setGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+int bitfieldGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
+
+unsigned short crc16(const char *buf, int len);
+
+/* Sentinel */
+void initSentinelConfig(void);
+void initSentinel(void);
+void sentinelTimer(void);
+const char *sentinelHandleConfiguration(char **argv, int argc);
+void queueSentinelConfig(sds *argv, int argc, int linenum, sds line);
+void loadSentinelConfigFromQueue(void);
+void sentinelIsRunning(void);
+void sentinelCheckConfigFile(void);
+void sentinelCommand(client *c);
+void sentinelInfoCommand(client *c);
+void sentinelPublishCommand(client *c);
+void sentinelRoleCommand(client *c);
+
+/* redis-check-rdb & aof */
+int redis_check_rdb(char *rdbfilename, FILE *fp);
+int redis_check_rdb_main(int argc, char **argv, FILE *fp);
+int redis_check_aof_main(int argc, char **argv);
+
+/* Scripting */
+void scriptingInit(int setup);
+int ldbRemoveChild(pid_t pid);
+void ldbKillForkedSessions(void);
+int ldbPendingChildren(void);
+sds luaCreateFunction(client *c, robj *body);
+void luaLdbLineHook(lua_State *lua, lua_Debug *ar);
+void freeLuaScriptsAsync(dict *lua_scripts);
+void freeFunctionsAsync(functionsLibCtx *lib_ctx);
+int ldbIsEnabled(void);
+void ldbLog(sds entry);
+void ldbLogRedisReply(char *reply);
+void sha1hex(char *digest, char *script, size_t len);
+unsigned long evalMemory(void);
+dict* evalScriptsDict(void);
+unsigned long evalScriptsMemory(void);
+uint64_t evalGetCommandFlags(client *c, uint64_t orig_flags);
+uint64_t fcallGetCommandFlags(client *c, uint64_t orig_flags);
+int isInsideYieldingLongCommand(void);
+
+typedef struct luaScript {
+ uint64_t flags;
+ robj *body;
+} luaScript;
+/* Cache of recently used small arguments to avoid malloc calls. */
+#define LUA_CMD_OBJCACHE_SIZE 32
+#define LUA_CMD_OBJCACHE_MAX_LEN 64
+
+/* Blocked clients API */
+void processUnblockedClients(void);
+void initClientBlockingState(client *c);
+void blockClient(client *c, int btype);
+void unblockClient(client *c, int queue_for_reprocessing);
+void unblockClientOnTimeout(client *c);
+void unblockClientOnError(client *c, const char *err_str);
+void queueClientForReprocessing(client *c);
+void replyToBlockedClientTimedOut(client *c);
+int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int unit);
+void disconnectAllBlockedClients(void);
+void handleClientsBlockedOnKeys(void);
+void signalKeyAsReady(redisDb *db, robj *key, int type);
+void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, int unblock_on_nokey);
+void blockClientShutdown(client *c);
+void blockPostponeClient(client *c);
+void blockForReplication(client *c, mstime_t timeout, long long offset, long numreplicas);
+void blockForAofFsync(client *c, mstime_t timeout, long long offset, int numlocal, long numreplicas);
+void signalDeletedKeyAsReady(redisDb *db, robj *key, int type);
+void updateStatsOnUnblock(client *c, long blocked_us, long reply_us, int had_errors);
+void scanDatabaseForDeletedKeys(redisDb *emptied, redisDb *replaced_with);
+void totalNumberOfBlockingKeys(unsigned long *blocking_keys, unsigned long *bloking_keys_on_nokey);
+void blockedBeforeSleep(void);
+
+/* timeout.c -- Blocked clients timeout and connections timeout. */
+void addClientToTimeoutTable(client *c);
+void removeClientFromTimeoutTable(client *c);
+void handleBlockedClientsTimeout(void);
+int clientsCronHandleTimeout(client *c, mstime_t now_ms);
+
+/* expire.c -- Handling of expired keys */
+void activeExpireCycle(int type);
+void expireSlaveKeys(void);
+void rememberSlaveKeyWithExpire(redisDb *db, robj *key);
+void flushSlaveKeysWithExpireList(void);
+size_t getSlaveKeyWithExpireCount(void);
+
+/* evict.c -- maxmemory handling and LRU eviction. */
+void evictionPoolAlloc(void);
+#define LFU_INIT_VAL 5
+unsigned long LFUGetTimeInMinutes(void);
+uint8_t LFULogIncr(uint8_t value);
+unsigned long LFUDecrAndReturn(robj *o);
+#define EVICT_OK 0
+#define EVICT_RUNNING 1
+#define EVICT_FAIL 2
+int performEvictions(void);
+void startEvictionTimeProc(void);
+
+/* Keys hashing / comparison functions for dict.c hash tables. */
+uint64_t dictSdsHash(const void *key);
+uint64_t dictSdsCaseHash(const void *key);
+int dictSdsKeyCompare(dict *d, const void *key1, const void *key2);
+int dictSdsKeyCaseCompare(dict *d, const void *key1, const void *key2);
+void dictSdsDestructor(dict *d, void *val);
+void dictListDestructor(dict *d, void *val);
+void *dictSdsDup(dict *d, const void *key);
+
+/* Git SHA1 */
+char *redisGitSHA1(void);
+char *redisGitDirty(void);
+uint64_t redisBuildId(void);
+const char *redisBuildIdRaw(void);
+char *redisBuildIdString(void);
+
+/* Commands prototypes */
+void authCommand(client *c);
+void pingCommand(client *c);
+void echoCommand(client *c);
+void commandCommand(client *c);
+void commandCountCommand(client *c);
+void commandListCommand(client *c);
+void commandInfoCommand(client *c);
+void commandGetKeysCommand(client *c);
+void commandGetKeysAndFlagsCommand(client *c);
+void commandHelpCommand(client *c);
+void commandDocsCommand(client *c);
+void setCommand(client *c);
+void setnxCommand(client *c);
+void setexCommand(client *c);
+void psetexCommand(client *c);
+void getCommand(client *c);
+void getexCommand(client *c);
+void getdelCommand(client *c);
+void delCommand(client *c);
+void unlinkCommand(client *c);
+void existsCommand(client *c);
+void setbitCommand(client *c);
+void getbitCommand(client *c);
+void bitfieldCommand(client *c);
+void bitfieldroCommand(client *c);
+void setrangeCommand(client *c);
+void getrangeCommand(client *c);
+void incrCommand(client *c);
+void decrCommand(client *c);
+void incrbyCommand(client *c);
+void decrbyCommand(client *c);
+void incrbyfloatCommand(client *c);
+void selectCommand(client *c);
+void swapdbCommand(client *c);
+void randomkeyCommand(client *c);
+void keysCommand(client *c);
+void scanCommand(client *c);
+void dbsizeCommand(client *c);
+void lastsaveCommand(client *c);
+void saveCommand(client *c);
+void bgsaveCommand(client *c);
+void bgrewriteaofCommand(client *c);
+void shutdownCommand(client *c);
+void slowlogCommand(client *c);
+void moveCommand(client *c);
+void copyCommand(client *c);
+void renameCommand(client *c);
+void renamenxCommand(client *c);
+void lpushCommand(client *c);
+void rpushCommand(client *c);
+void lpushxCommand(client *c);
+void rpushxCommand(client *c);
+void linsertCommand(client *c);
+void lpopCommand(client *c);
+void rpopCommand(client *c);
+void lmpopCommand(client *c);
+void llenCommand(client *c);
+void lindexCommand(client *c);
+void lrangeCommand(client *c);
+void ltrimCommand(client *c);
+void typeCommand(client *c);
+void lsetCommand(client *c);
+void saddCommand(client *c);
+void sremCommand(client *c);
+void smoveCommand(client *c);
+void sismemberCommand(client *c);
+void smismemberCommand(client *c);
+void scardCommand(client *c);
+void spopCommand(client *c);
+void srandmemberCommand(client *c);
+void sinterCommand(client *c);
+void sinterCardCommand(client *c);
+void sinterstoreCommand(client *c);
+void sunionCommand(client *c);
+void sunionstoreCommand(client *c);
+void sdiffCommand(client *c);
+void sdiffstoreCommand(client *c);
+void sscanCommand(client *c);
+void syncCommand(client *c);
+void flushdbCommand(client *c);
+void flushallCommand(client *c);
+void sortCommand(client *c);
+void sortroCommand(client *c);
+void lremCommand(client *c);
+void lposCommand(client *c);
+void rpoplpushCommand(client *c);
+void lmoveCommand(client *c);
+void infoCommand(client *c);
+void mgetCommand(client *c);
+void monitorCommand(client *c);
+void expireCommand(client *c);
+void expireatCommand(client *c);
+void pexpireCommand(client *c);
+void pexpireatCommand(client *c);
+void getsetCommand(client *c);
+void ttlCommand(client *c);
+void touchCommand(client *c);
+void pttlCommand(client *c);
+void expiretimeCommand(client *c);
+void pexpiretimeCommand(client *c);
+void persistCommand(client *c);
+void replicaofCommand(client *c);
+void roleCommand(client *c);
+void debugCommand(client *c);
+void msetCommand(client *c);
+void msetnxCommand(client *c);
+void zaddCommand(client *c);
+void zincrbyCommand(client *c);
+void zrangeCommand(client *c);
+void zrangebyscoreCommand(client *c);
+void zrevrangebyscoreCommand(client *c);
+void zrangebylexCommand(client *c);
+void zrevrangebylexCommand(client *c);
+void zcountCommand(client *c);
+void zlexcountCommand(client *c);
+void zrevrangeCommand(client *c);
+void zcardCommand(client *c);
+void zremCommand(client *c);
+void zscoreCommand(client *c);
+void zmscoreCommand(client *c);
+void zremrangebyscoreCommand(client *c);
+void zremrangebylexCommand(client *c);
+void zpopminCommand(client *c);
+void zpopmaxCommand(client *c);
+void zmpopCommand(client *c);
+void bzpopminCommand(client *c);
+void bzpopmaxCommand(client *c);
+void bzmpopCommand(client *c);
+void zrandmemberCommand(client *c);
+void multiCommand(client *c);
+void execCommand(client *c);
+void discardCommand(client *c);
+void blpopCommand(client *c);
+void brpopCommand(client *c);
+void blmpopCommand(client *c);
+void brpoplpushCommand(client *c);
+void blmoveCommand(client *c);
+void appendCommand(client *c);
+void strlenCommand(client *c);
+void zrankCommand(client *c);
+void zrevrankCommand(client *c);
+void hsetCommand(client *c);
+void hsetnxCommand(client *c);
+void hgetCommand(client *c);
+void hmgetCommand(client *c);
+void hdelCommand(client *c);
+void hlenCommand(client *c);
+void hstrlenCommand(client *c);
+void zremrangebyrankCommand(client *c);
+void zunionstoreCommand(client *c);
+void zinterstoreCommand(client *c);
+void zdiffstoreCommand(client *c);
+void zunionCommand(client *c);
+void zinterCommand(client *c);
+void zinterCardCommand(client *c);
+void zrangestoreCommand(client *c);
+void zdiffCommand(client *c);
+void zscanCommand(client *c);
+void hkeysCommand(client *c);
+void hvalsCommand(client *c);
+void hgetallCommand(client *c);
+void hexistsCommand(client *c);
+void hscanCommand(client *c);
+void hrandfieldCommand(client *c);
+void configSetCommand(client *c);
+void configGetCommand(client *c);
+void configResetStatCommand(client *c);
+void configRewriteCommand(client *c);
+void configHelpCommand(client *c);
+void hincrbyCommand(client *c);
+void hincrbyfloatCommand(client *c);
+void subscribeCommand(client *c);
+void unsubscribeCommand(client *c);
+void psubscribeCommand(client *c);
+void punsubscribeCommand(client *c);
+void publishCommand(client *c);
+void pubsubCommand(client *c);
+void spublishCommand(client *c);
+void ssubscribeCommand(client *c);
+void sunsubscribeCommand(client *c);
+void watchCommand(client *c);
+void unwatchCommand(client *c);
+void clusterCommand(client *c);
+void restoreCommand(client *c);
+void migrateCommand(client *c);
+void askingCommand(client *c);
+void readonlyCommand(client *c);
+void readwriteCommand(client *c);
+int verifyDumpPayload(unsigned char *p, size_t len, uint16_t *rdbver_ptr);
+void dumpCommand(client *c);
+void objectCommand(client *c);
+void memoryCommand(client *c);
+void clientCommand(client *c);
+void helloCommand(client *c);
+void clientSetinfoCommand(client *c);
+void evalCommand(client *c);
+void evalRoCommand(client *c);
+void evalShaCommand(client *c);
+void evalShaRoCommand(client *c);
+void scriptCommand(client *c);
+void fcallCommand(client *c);
+void fcallroCommand(client *c);
+void functionLoadCommand(client *c);
+void functionDeleteCommand(client *c);
+void functionKillCommand(client *c);
+void functionStatsCommand(client *c);
+void functionListCommand(client *c);
+void functionHelpCommand(client *c);
+void functionFlushCommand(client *c);
+void functionRestoreCommand(client *c);
+void functionDumpCommand(client *c);
+void timeCommand(client *c);
+void bitopCommand(client *c);
+void bitcountCommand(client *c);
+void bitposCommand(client *c);
+void replconfCommand(client *c);
+void waitCommand(client *c);
+void waitaofCommand(client *c);
+void georadiusbymemberCommand(client *c);
+void georadiusbymemberroCommand(client *c);
+void georadiusCommand(client *c);
+void georadiusroCommand(client *c);
+void geoaddCommand(client *c);
+void geohashCommand(client *c);
+void geoposCommand(client *c);
+void geodistCommand(client *c);
+void geosearchCommand(client *c);
+void geosearchstoreCommand(client *c);
+void pfselftestCommand(client *c);
+void pfaddCommand(client *c);
+void pfcountCommand(client *c);
+void pfmergeCommand(client *c);
+void pfdebugCommand(client *c);
+void latencyCommand(client *c);
+void moduleCommand(client *c);
+void securityWarningCommand(client *c);
+void xaddCommand(client *c);
+void xrangeCommand(client *c);
+void xrevrangeCommand(client *c);
+void xlenCommand(client *c);
+void xreadCommand(client *c);
+void xgroupCommand(client *c);
+void xsetidCommand(client *c);
+void xackCommand(client *c);
+void xpendingCommand(client *c);
+void xclaimCommand(client *c);
+void xautoclaimCommand(client *c);
+void xinfoCommand(client *c);
+void xdelCommand(client *c);
+void xtrimCommand(client *c);
+void lolwutCommand(client *c);
+void aclCommand(client *c);
+void lcsCommand(client *c);
+void quitCommand(client *c);
+void resetCommand(client *c);
+void failoverCommand(client *c);
+
+#if defined(__GNUC__)
+void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
+void free(void *ptr) __attribute__ ((deprecated));
+void *malloc(size_t size) __attribute__ ((deprecated));
+void *realloc(void *ptr, size_t size) __attribute__ ((deprecated));
+#endif
+
+/* Debugging stuff */
+void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line);
+void _serverAssert(const char *estr, const char *file, int line);
+#ifdef __GNUC__
+void _serverPanic(const char *file, int line, const char *msg, ...)
+ __attribute__ ((format (printf, 3, 4)));
+#else
+void _serverPanic(const char *file, int line, const char *msg, ...);
+#endif
+void serverLogObjectDebugInfo(const robj *o);
+void sigsegvHandler(int sig, siginfo_t *info, void *secret);
+const char *getSafeInfoString(const char *s, size_t len, char **tmp);
+dict *genInfoSectionDict(robj **argv, int argc, char **defaults, int *out_all, int *out_everything);
+void releaseInfoSectionDict(dict *sec);
+sds genRedisInfoString(dict *section_dict, int all_sections, int everything);
+sds genModulesInfoString(sds info);
+void applyWatchdogPeriod(void);
+void watchdogScheduleSignal(int period);
+void serverLogHexDump(int level, char *descr, void *value, size_t len);
+int memtest_preserving_test(unsigned long *m, size_t bytes, int passes);
+void mixDigest(unsigned char *digest, const void *ptr, size_t len);
+void xorDigest(unsigned char *digest, const void *ptr, size_t len);
+sds catSubCommandFullname(const char *parent_name, const char *sub_name);
+void commandAddSubcommand(struct redisCommand *parent, struct redisCommand *subcommand, const char *declared_name);
+void debugDelay(int usec);
+void killIOThreads(void);
+void killThreads(void);
+void makeThreadKillable(void);
+void swapMainDbWithTempDb(redisDb *tempDb);
+
+/* Use macro for checking log level to avoid evaluating arguments in cases log
+ * should be ignored due to low level. */
+#define serverLog(level, ...) do {\
+ if (((level)&0xff) < server.verbosity) break;\
+ _serverLog(level, __VA_ARGS__);\
+ } while(0)
+
+#define redisDebug(fmt, ...) \
+ printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
+#define redisDebugMark() \
+ printf("-- MARK %s:%d --\n", __FILE__, __LINE__)
+
+int iAmMaster(void);
+
+#define STRINGIFY_(x) #x
+#define STRINGIFY(x) STRINGIFY_(x)
+
+#endif
diff --git a/src/setcpuaffinity.c b/src/setcpuaffinity.c
new file mode 100644
index 0000000..77b1621
--- /dev/null
+++ b/src/setcpuaffinity.c
@@ -0,0 +1,155 @@
+/* ==========================================================================
+ * setcpuaffinity.c - Linux/BSD setcpuaffinity.
+ * --------------------------------------------------------------------------
+ * Copyright (C) 2020 zhenwei pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+ * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * ==========================================================================
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#ifdef __linux__
+#include <sched.h>
+#endif
+#ifdef __FreeBSD__
+#include <sys/param.h>
+#include <sys/cpuset.h>
+#endif
+#ifdef __DragonFly__
+#include <pthread.h>
+#include <pthread_np.h>
+#endif
+#ifdef __NetBSD__
+#include <pthread.h>
+#include <sched.h>
+#endif
+#include "config.h"
+
+#ifdef USE_SETCPUAFFINITY
+static const char *next_token(const char *q, int sep) {
+ if (q)
+ q = strchr(q, sep);
+ if (q)
+ q++;
+
+ return q;
+}
+
+static int next_num(const char *str, char **end, int *result) {
+ if (!str || *str == '\0' || !isdigit(*str))
+ return -1;
+
+ *result = strtoul(str, end, 10);
+ if (str == *end)
+ return -1;
+
+ return 0;
+}
+
+/* set current thread cpu affinity to cpu list, this function works like
+ * taskset command (actually cpulist parsing logic reference to util-linux).
+ * example of this function: "0,2,3", "0,2-3", "0-20:2". */
+void setcpuaffinity(const char *cpulist) {
+ const char *p, *q;
+ char *end = NULL;
+#ifdef __linux__
+ cpu_set_t cpuset;
+#endif
+#if defined (__FreeBSD__) || defined(__DragonFly__)
+ cpuset_t cpuset;
+#endif
+#ifdef __NetBSD__
+ cpuset_t *cpuset;
+#endif
+
+ if (!cpulist)
+ return;
+
+#ifndef __NetBSD__
+ CPU_ZERO(&cpuset);
+#else
+ cpuset = cpuset_create();
+#endif
+
+ q = cpulist;
+ while (p = q, q = next_token(q, ','), p) {
+ int a, b, s;
+ const char *c1, *c2;
+
+ if (next_num(p, &end, &a) != 0)
+ return;
+
+ b = a;
+ s = 1;
+ p = end;
+
+ c1 = next_token(p, '-');
+ c2 = next_token(p, ',');
+
+ if (c1 != NULL && (c2 == NULL || c1 < c2)) {
+ if (next_num(c1, &end, &b) != 0)
+ return;
+
+ c1 = end && *end ? next_token(end, ':') : NULL;
+ if (c1 != NULL && (c2 == NULL || c1 < c2)) {
+ if (next_num(c1, &end, &s) != 0)
+ return;
+
+ if (s == 0)
+ return;
+ }
+ }
+
+ if ((a > b))
+ return;
+
+ while (a <= b) {
+#ifndef __NetBSD__
+ CPU_SET(a, &cpuset);
+#else
+ cpuset_set(a, cpuset);
+#endif
+ a += s;
+ }
+ }
+
+ if (end && *end)
+ return;
+
+#ifdef __linux__
+ sched_setaffinity(0, sizeof(cpuset), &cpuset);
+#endif
+#ifdef __FreeBSD__
+ cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(cpuset), &cpuset);
+#endif
+#ifdef __DragonFly__
+ pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+#endif
+#ifdef __NetBSD__
+ pthread_setaffinity_np(pthread_self(), cpuset_size(cpuset), cpuset);
+ cpuset_destroy(cpuset);
+#endif
+}
+
+#endif /* USE_SETCPUAFFINITY */
diff --git a/src/setproctitle.c b/src/setproctitle.c
new file mode 100644
index 0000000..0194023
--- /dev/null
+++ b/src/setproctitle.c
@@ -0,0 +1,331 @@
+/* ==========================================================================
+ * setproctitle.c - Linux/Darwin setproctitle.
+ * --------------------------------------------------------------------------
+ * Copyright (C) 2010 William Ahern
+ * Copyright (C) 2013 Salvatore Sanfilippo
+ * Copyright (C) 2013 Stam He
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+ * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * ==========================================================================
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stddef.h> /* NULL size_t */
+#include <stdarg.h> /* va_list va_start va_end */
+#include <stdlib.h> /* malloc(3) setenv(3) clearenv(3) setproctitle(3) getprogname(3) */
+#include <stdio.h> /* vsnprintf(3) snprintf(3) */
+
+#include <string.h> /* strlen(3) strchr(3) strdup(3) memset(3) memcpy(3) */
+
+#include <errno.h> /* errno program_invocation_name program_invocation_short_name */
+
+#if !defined(HAVE_SETPROCTITLE)
+#if (defined __NetBSD__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __DragonFly__)
+#define HAVE_SETPROCTITLE 1
+#else
+#define HAVE_SETPROCTITLE 0
+#endif
+#endif
+
+
+#if !HAVE_SETPROCTITLE
+#if (defined __linux || defined __APPLE__)
+
+#ifdef __GLIBC__
+#define HAVE_CLEARENV
+#endif
+
+extern char **environ;
+
+static struct {
+ /* original value */
+ const char *arg0;
+
+ /* title space available */
+ char *base, *end;
+
+ /* pointer to original nul character within base */
+ char *nul;
+
+ _Bool reset;
+ int error;
+} SPT;
+
+
+#ifndef SPT_MIN
+#define SPT_MIN(a, b) (((a) < (b))? (a) : (b))
+#endif
+
+static inline size_t spt_min(size_t a, size_t b) {
+ return SPT_MIN(a, b);
+} /* spt_min() */
+
+
+/*
+ * For discussion on the portability of the various methods, see
+ * http://lists.freebsd.org/pipermail/freebsd-stable/2008-June/043136.html
+ */
+int spt_clearenv(void) {
+#ifdef HAVE_CLEARENV
+ return clearenv();
+#else
+ extern char **environ;
+ static char **tmp;
+
+ if (!(tmp = malloc(sizeof *tmp)))
+ return errno;
+
+ tmp[0] = NULL;
+ environ = tmp;
+
+ return 0;
+#endif
+} /* spt_clearenv() */
+
+
+static int spt_copyenv(int envc, char *oldenv[]) {
+ extern char **environ;
+ char **envcopy = NULL;
+ char *eq;
+ int i, error;
+ int envsize;
+
+ if (environ != oldenv)
+ return 0;
+
+ /* Copy environ into envcopy before clearing it. Shallow copy is
+ * enough as clearenv() only clears the environ array.
+ */
+ envsize = (envc + 1) * sizeof(char *);
+ envcopy = malloc(envsize);
+ if (!envcopy)
+ return ENOMEM;
+ memcpy(envcopy, oldenv, envsize);
+
+ /* Note that the state after clearenv() failure is undefined, but we'll
+ * just assume an error means it was left unchanged.
+ */
+ if ((error = spt_clearenv())) {
+ environ = oldenv;
+ free(envcopy);
+ return error;
+ }
+
+ /* Set environ from envcopy */
+ for (i = 0; envcopy[i]; i++) {
+ if (!(eq = strchr(envcopy[i], '=')))
+ continue;
+
+ *eq = '\0';
+ error = (0 != setenv(envcopy[i], eq + 1, 1))? errno : 0;
+ *eq = '=';
+
+ /* On error, do our best to restore state */
+ if (error) {
+#ifdef HAVE_CLEARENV
+ /* We don't assume it is safe to free environ, so we
+ * may leak it. As clearenv() was shallow using envcopy
+ * here is safe.
+ */
+ environ = envcopy;
+#else
+ free(envcopy);
+ free(environ); /* Safe to free, we have just alloc'd it */
+ environ = oldenv;
+#endif
+ return error;
+ }
+ }
+
+ free(envcopy);
+ return 0;
+} /* spt_copyenv() */
+
+
+static int spt_copyargs(int argc, char *argv[]) {
+ char *tmp;
+ int i;
+
+ for (i = 1; i < argc || (i >= argc && argv[i]); i++) {
+ if (!argv[i])
+ continue;
+
+ if (!(tmp = strdup(argv[i])))
+ return errno;
+
+ argv[i] = tmp;
+ }
+
+ return 0;
+} /* spt_copyargs() */
+
+/* Initialize and populate SPT to allow a future setproctitle()
+ * call.
+ *
+ * As setproctitle() basically needs to overwrite argv[0], we're
+ * trying to determine what is the largest contiguous block
+ * starting at argv[0] we can use for this purpose.
+ *
+ * As this range will overwrite some or all of the argv and environ
+ * strings, a deep copy of these two arrays is performed.
+ */
+void spt_init(int argc, char *argv[]) {
+ char **envp = environ;
+ char *base, *end, *nul, *tmp;
+ int i, error, envc;
+
+ if (!(base = argv[0]))
+ return;
+
+ /* We start with end pointing at the end of argv[0] */
+ nul = &base[strlen(base)];
+ end = nul + 1;
+
+ /* Attempt to extend end as far as we can, while making sure
+ * that the range between base and end is only allocated to
+ * argv, or anything that immediately follows argv (presumably
+ * envp).
+ */
+ for (i = 0; i < argc || (i >= argc && argv[i]); i++) {
+ if (!argv[i] || argv[i] < end)
+ continue;
+
+ if (end >= argv[i] && end <= argv[i] + strlen(argv[i]))
+ end = argv[i] + strlen(argv[i]) + 1;
+ }
+
+ /* In case the envp array was not an immediate extension to argv,
+ * scan it explicitly.
+ */
+ for (i = 0; envp[i]; i++) {
+ if (envp[i] < end)
+ continue;
+
+ if (end >= envp[i] && end <= envp[i] + strlen(envp[i]))
+ end = envp[i] + strlen(envp[i]) + 1;
+ }
+ envc = i;
+
+ /* We're going to deep copy argv[], but argv[0] will still point to
+ * the old memory for the purpose of updating the title so we need
+ * to keep the original value elsewhere.
+ */
+ if (!(SPT.arg0 = strdup(argv[0])))
+ goto syerr;
+
+#if __linux__
+ if (!(tmp = strdup(program_invocation_name)))
+ goto syerr;
+
+ program_invocation_name = tmp;
+
+ if (!(tmp = strdup(program_invocation_short_name)))
+ goto syerr;
+
+ program_invocation_short_name = tmp;
+#elif __APPLE__
+ if (!(tmp = strdup(getprogname())))
+ goto syerr;
+
+ setprogname(tmp);
+#endif
+
+ /* Now make a full deep copy of the environment and argv[] */
+ if ((error = spt_copyenv(envc, envp)))
+ goto error;
+
+ if ((error = spt_copyargs(argc, argv)))
+ goto error;
+
+ SPT.nul = nul;
+ SPT.base = base;
+ SPT.end = end;
+
+ return;
+syerr:
+ error = errno;
+error:
+ SPT.error = error;
+} /* spt_init() */
+
+
+#ifndef SPT_MAXTITLE
+#define SPT_MAXTITLE 255
+#endif
+
+void setproctitle(const char *fmt, ...) {
+ char buf[SPT_MAXTITLE + 1]; /* use buffer in case argv[0] is passed */
+ va_list ap;
+ char *nul;
+ int len, error;
+
+ if (!SPT.base)
+ return;
+
+ if (fmt) {
+ va_start(ap, fmt);
+ len = vsnprintf(buf, sizeof buf, fmt, ap);
+ va_end(ap);
+ } else {
+ len = snprintf(buf, sizeof buf, "%s", SPT.arg0);
+ }
+
+ if (len <= 0)
+ { error = errno; goto error; }
+
+ if (!SPT.reset) {
+ memset(SPT.base, 0, SPT.end - SPT.base);
+ SPT.reset = 1;
+ } else {
+ memset(SPT.base, 0, spt_min(sizeof buf, SPT.end - SPT.base));
+ }
+
+ len = spt_min(len, spt_min(sizeof buf, SPT.end - SPT.base) - 1);
+ memcpy(SPT.base, buf, len);
+ nul = &SPT.base[len];
+
+ if (nul < SPT.nul) {
+ *SPT.nul = '.';
+ } else if (nul == SPT.nul && &nul[1] < SPT.end) {
+ *SPT.nul = ' ';
+ *++nul = '\0';
+ }
+
+ return;
+error:
+ SPT.error = error;
+} /* setproctitle() */
+
+
+#endif /* __linux || __APPLE__ */
+#endif /* !HAVE_SETPROCTITLE */
+
+#ifdef SETPROCTITLE_TEST_MAIN
+int main(int argc, char *argv[]) {
+ spt_init(argc, argv);
+
+ printf("SPT.arg0: [%p] '%s'\n", SPT.arg0, SPT.arg0);
+ printf("SPT.base: [%p] '%s'\n", SPT.base, SPT.base);
+ printf("SPT.end: [%p] (%d bytes after base)'\n", SPT.end, (int) (SPT.end - SPT.base));
+ return 0;
+}
+#endif
diff --git a/src/sha1.c b/src/sha1.c
new file mode 100644
index 0000000..4d8c140
--- /dev/null
+++ b/src/sha1.c
@@ -0,0 +1,239 @@
+
+/* from valgrind tests */
+
+/* ================ sha1.c ================ */
+/*
+SHA-1 in C
+By Steve Reid <steve@edmweb.com>
+100% Public Domain
+
+Test Vectors (from FIPS PUB 180-1)
+"abc"
+ A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+ 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+A million repetitions of "a"
+ 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+*/
+
+/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */
+/* #define SHA1HANDSOFF * Copies data before messing with it. */
+
+#define SHA1HANDSOFF
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include "solarisfixes.h"
+#include "sha1.h"
+#include "config.h"
+
+#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
+
+/* blk0() and blk() perform the initial expand. */
+/* I got the idea of expanding during the round function from SSLeay */
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
+ |(rol(block->l[i],8)&0x00FF00FF))
+#elif BYTE_ORDER == BIG_ENDIAN
+#define blk0(i) block->l[i]
+#else
+#error "Endianness not defined!"
+#endif
+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \
+ ^block->l[(i+2)&15]^block->l[i&15],1))
+
+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
+
+
+/* Hash a single 512-bit block. This is the core of the algorithm. */
+
+void SHA1Transform(uint32_t state[5], const unsigned char buffer[64])
+{
+ uint32_t a, b, c, d, e;
+ typedef union {
+ unsigned char c[64];
+ uint32_t l[16];
+ } CHAR64LONG16;
+#ifdef SHA1HANDSOFF
+ CHAR64LONG16 block[1]; /* use array to appear as a pointer */
+ memcpy(block, buffer, 64);
+#else
+ /* The following had better never be used because it causes the
+ * pointer-to-const buffer to be cast into a pointer to non-const.
+ * And the result is written through. I threw a "const" in, hoping
+ * this will cause a diagnostic.
+ */
+ CHAR64LONG16* block = (const CHAR64LONG16*)buffer;
+#endif
+ /* Copy context->state[] to working vars */
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
+ /* 4 rounds of 20 operations each. Loop unrolled. */
+ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
+ R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
+ R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
+ R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
+ R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
+ R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
+ R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
+ R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
+ R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
+ R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
+ R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
+ R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
+ R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
+ R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
+ R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
+ R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
+ R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
+ R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
+ R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
+ R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+ /* Add the working vars back into context.state[] */
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ /* Wipe variables */
+ a = b = c = d = e = 0;
+#ifdef SHA1HANDSOFF
+ memset(block, '\0', sizeof(block));
+#endif
+}
+
+
+/* SHA1Init - Initialize new context */
+
+void SHA1Init(SHA1_CTX* context)
+{
+ /* SHA1 initialization constants */
+ context->state[0] = 0x67452301;
+ context->state[1] = 0xEFCDAB89;
+ context->state[2] = 0x98BADCFE;
+ context->state[3] = 0x10325476;
+ context->state[4] = 0xC3D2E1F0;
+ context->count[0] = context->count[1] = 0;
+}
+
+/* This source code is referenced from
+ * https://github.com/libevent/libevent/commit/e1d7d3e40a7fd50348d849046fbfd9bf976e643c */
+#if defined(__GNUC__) && __GNUC__ >= 12
+#pragma GCC diagnostic push
+/* Ignore the case when SHA1Transform() called with 'char *', that code passed
+ * buffer of 64 bytes anyway (at least now) */
+#pragma GCC diagnostic ignored "-Wstringop-overread"
+#endif
+
+/* Run your data through this. */
+
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len)
+{
+ uint32_t i, j;
+
+ j = context->count[0];
+ if ((context->count[0] += len << 3) < j)
+ context->count[1]++;
+ context->count[1] += (len>>29);
+ j = (j >> 3) & 63;
+ if ((j + len) > 63) {
+ memcpy(&context->buffer[j], data, (i = 64-j));
+ SHA1Transform(context->state, context->buffer);
+ for ( ; i + 63 < len; i += 64) {
+ SHA1Transform(context->state, &data[i]);
+ }
+ j = 0;
+ }
+ else i = 0;
+ memcpy(&context->buffer[j], &data[i], len - i);
+}
+
+#if defined(__GNUC__) && __GNUC__ >= 12
+#pragma GCC diagnostic pop
+#endif
+
+/* Add padding and return the message digest. */
+
+void SHA1Final(unsigned char digest[20], SHA1_CTX* context)
+{
+ unsigned i;
+ unsigned char finalcount[8];
+ unsigned char c;
+
+#if 0 /* untested "improvement" by DHR */
+ /* Convert context->count to a sequence of bytes
+ * in finalcount. Second element first, but
+ * big-endian order within element.
+ * But we do it all backwards.
+ */
+ unsigned char *fcp = &finalcount[8];
+
+ for (i = 0; i < 2; i++)
+ {
+ uint32_t t = context->count[i];
+ int j;
+
+ for (j = 0; j < 4; t >>= 8, j++)
+ *--fcp = (unsigned char) t;
+ }
+#else
+ for (i = 0; i < 8; i++) {
+ finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
+ >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
+ }
+#endif
+ c = 0200;
+ SHA1Update(context, &c, 1);
+ while ((context->count[0] & 504) != 448) {
+ c = 0000;
+ SHA1Update(context, &c, 1);
+ }
+ SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
+ for (i = 0; i < 20; i++) {
+ digest[i] = (unsigned char)
+ ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
+ }
+ /* Wipe variables */
+ memset(context, '\0', sizeof(*context));
+ memset(&finalcount, '\0', sizeof(finalcount));
+}
+/* ================ end of sha1.c ================ */
+
+#ifdef REDIS_TEST
+#define BUFSIZE 4096
+
+#define UNUSED(x) (void)(x)
+int sha1Test(int argc, char **argv, int flags)
+{
+ SHA1_CTX ctx;
+ unsigned char hash[20], buf[BUFSIZE];
+ int i;
+
+ UNUSED(argc);
+ UNUSED(argv);
+ UNUSED(flags);
+
+ for(i=0;i<BUFSIZE;i++)
+ buf[i] = i;
+
+ SHA1Init(&ctx);
+ for(i=0;i<1000;i++)
+ SHA1Update(&ctx, buf, BUFSIZE);
+ SHA1Final(hash, &ctx);
+
+ printf("SHA1=");
+ for(i=0;i<20;i++)
+ printf("%02x", hash[i]);
+ printf("\n");
+ return 0;
+}
+#endif
diff --git a/src/sha1.h b/src/sha1.h
new file mode 100644
index 0000000..a6cb6e8
--- /dev/null
+++ b/src/sha1.h
@@ -0,0 +1,27 @@
+#ifndef SHA1_H
+#define SHA1_H
+/* ================ sha1.h ================ */
+/*
+SHA-1 in C
+By Steve Reid <steve@edmweb.com>
+100% Public Domain
+*/
+
+typedef struct {
+ uint32_t state[5];
+ uint32_t count[2];
+ unsigned char buffer[64];
+} SHA1_CTX;
+
+void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]);
+void SHA1Init(SHA1_CTX* context);
+/* 'noinline' attribute is intended to prevent the `-Wstringop-overread` warning
+ * when using gcc-12 later with LTO enabled. It may be removed once the
+ * bug[https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80922] is fixed. */
+__attribute__((noinline)) void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len);
+void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
+
+#ifdef REDIS_TEST
+int sha1Test(int argc, char **argv, int flags);
+#endif
+#endif
diff --git a/src/sha256.c b/src/sha256.c
new file mode 100644
index 0000000..3b53c45
--- /dev/null
+++ b/src/sha256.c
@@ -0,0 +1,163 @@
+/*********************************************************************
+* Filename: sha256.c
+* Author: Brad Conte (brad AT bradconte.com)
+* Copyright:
+* Disclaimer: This code is presented "as is" without any guarantees.
+* Details: Implementation of the SHA-256 hashing algorithm.
+ SHA-256 is one of the three algorithms in the SHA2
+ specification. The others, SHA-384 and SHA-512, are not
+ offered in this implementation.
+ Algorithm specification can be found here:
+ * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf
+ This implementation uses little endian byte order.
+*********************************************************************/
+
+/*************************** HEADER FILES ***************************/
+#include <stdlib.h>
+#include <string.h>
+#include "sha256.h"
+
+/****************************** MACROS ******************************/
+#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b))))
+#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b))))
+
+#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z)))
+#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
+#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
+#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3))
+#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10))
+
+/**************************** VARIABLES *****************************/
+static const WORD k[64] = {
+ 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
+ 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
+ 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
+ 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
+ 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
+ 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
+ 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
+ 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+};
+
+/*********************** FUNCTION DEFINITIONS ***********************/
+void sha256_transform(SHA256_CTX *ctx, const BYTE data[])
+{
+ WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64];
+
+ for (i = 0, j = 0; i < 16; ++i, j += 4) {
+ m[i] = ((WORD) data[j + 0] << 24) |
+ ((WORD) data[j + 1] << 16) |
+ ((WORD) data[j + 2] << 8) |
+ ((WORD) data[j + 3]);
+ }
+
+ for ( ; i < 64; ++i)
+ m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16];
+
+ a = ctx->state[0];
+ b = ctx->state[1];
+ c = ctx->state[2];
+ d = ctx->state[3];
+ e = ctx->state[4];
+ f = ctx->state[5];
+ g = ctx->state[6];
+ h = ctx->state[7];
+
+ for (i = 0; i < 64; ++i) {
+ t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
+ t2 = EP0(a) + MAJ(a,b,c);
+ h = g;
+ g = f;
+ f = e;
+ e = d + t1;
+ d = c;
+ c = b;
+ b = a;
+ a = t1 + t2;
+ }
+
+ ctx->state[0] += a;
+ ctx->state[1] += b;
+ ctx->state[2] += c;
+ ctx->state[3] += d;
+ ctx->state[4] += e;
+ ctx->state[5] += f;
+ ctx->state[6] += g;
+ ctx->state[7] += h;
+}
+
+void sha256_init(SHA256_CTX *ctx)
+{
+ ctx->datalen = 0;
+ ctx->bitlen = 0;
+ ctx->state[0] = 0x6a09e667;
+ ctx->state[1] = 0xbb67ae85;
+ ctx->state[2] = 0x3c6ef372;
+ ctx->state[3] = 0xa54ff53a;
+ ctx->state[4] = 0x510e527f;
+ ctx->state[5] = 0x9b05688c;
+ ctx->state[6] = 0x1f83d9ab;
+ ctx->state[7] = 0x5be0cd19;
+}
+
+void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len)
+{
+ WORD i;
+
+ for (i = 0; i < len; ++i) {
+ ctx->data[ctx->datalen] = data[i];
+ ctx->datalen++;
+ if (ctx->datalen == 64) {
+ sha256_transform(ctx, ctx->data);
+ ctx->bitlen += 512;
+ ctx->datalen = 0;
+ }
+ }
+}
+
+void sha256_final(SHA256_CTX *ctx, BYTE hash[])
+{
+ WORD i;
+
+ i = ctx->datalen;
+
+ // Pad whatever data is left in the buffer.
+ if (ctx->datalen < 56) {
+ ctx->data[i++] = 0x80;
+ while (i < 56)
+ ctx->data[i++] = 0x00;
+ }
+ else {
+ ctx->data[i++] = 0x80;
+ while (i < 64)
+ ctx->data[i++] = 0x00;
+ sha256_transform(ctx, ctx->data);
+ memset(ctx->data, 0, 56);
+ }
+
+ // Append to the padding the total message's length in bits and transform.
+ ctx->bitlen += ctx->datalen * 8;
+ ctx->data[63] = ctx->bitlen;
+ ctx->data[62] = ctx->bitlen >> 8;
+ ctx->data[61] = ctx->bitlen >> 16;
+ ctx->data[60] = ctx->bitlen >> 24;
+ ctx->data[59] = ctx->bitlen >> 32;
+ ctx->data[58] = ctx->bitlen >> 40;
+ ctx->data[57] = ctx->bitlen >> 48;
+ ctx->data[56] = ctx->bitlen >> 56;
+ sha256_transform(ctx, ctx->data);
+
+ // Since this implementation uses little endian byte ordering and SHA uses big endian,
+ // reverse all the bytes when copying the final state to the output hash.
+ for (i = 0; i < 4; ++i) {
+ hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff;
+ hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff;
+ hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff;
+ hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff;
+ hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff;
+ hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff;
+ hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff;
+ hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff;
+ }
+}
diff --git a/src/sha256.h b/src/sha256.h
new file mode 100644
index 0000000..a0d90a7
--- /dev/null
+++ b/src/sha256.h
@@ -0,0 +1,35 @@
+/*********************************************************************
+* Filename: sha256.h
+* Author: Brad Conte (brad AT bradconte.com)
+* Copyright:
+* Disclaimer: This code is presented "as is" without any guarantees.
+* Details: Defines the API for the corresponding SHA256 implementation.
+*********************************************************************/
+
+#ifndef SHA256_H
+#define SHA256_H
+
+/*************************** HEADER FILES ***************************/
+#include <stddef.h>
+#include <stdint.h>
+
+/****************************** MACROS ******************************/
+#define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest
+
+/**************************** DATA TYPES ****************************/
+typedef uint8_t BYTE; // 8-bit byte
+typedef uint32_t WORD; // 32-bit word
+
+typedef struct {
+ BYTE data[64];
+ WORD datalen;
+ unsigned long long bitlen;
+ WORD state[8];
+} SHA256_CTX;
+
+/*********************** FUNCTION DECLARATIONS **********************/
+void sha256_init(SHA256_CTX *ctx);
+void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len);
+void sha256_final(SHA256_CTX *ctx, BYTE hash[]);
+
+#endif // SHA256_H
diff --git a/src/siphash.c b/src/siphash.c
new file mode 100644
index 0000000..2713d89
--- /dev/null
+++ b/src/siphash.c
@@ -0,0 +1,373 @@
+/*
+ SipHash reference C implementation
+
+ Copyright (c) 2012-2016 Jean-Philippe Aumasson
+ <jeanphilippe.aumasson@gmail.com>
+ Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
+ Copyright (c) 2017 Salvatore Sanfilippo <antirez@gmail.com>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ You should have received a copy of the CC0 Public Domain Dedication along
+ with this software. If not, see
+ <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+ ----------------------------------------------------------------------------
+
+ This version was modified by Salvatore Sanfilippo <antirez@gmail.com>
+ in the following ways:
+
+ 1. We use SipHash 1-2. This is not believed to be as strong as the
+ suggested 2-4 variant, but AFAIK there are not trivial attacks
+ against this reduced-rounds version, and it runs at the same speed
+ as Murmurhash2 that we used previously, while the 2-4 variant slowed
+ down Redis by a 4% figure more or less.
+ 2. Hard-code rounds in the hope the compiler can optimize it more
+ in this raw from. Anyway we always want the standard 2-4 variant.
+ 3. Modify the prototype and implementation so that the function directly
+ returns an uint64_t value, the hash itself, instead of receiving an
+ output buffer. This also means that the output size is set to 8 bytes
+ and the 16 bytes output code handling was removed.
+ 4. Provide a case insensitive variant to be used when hashing strings that
+ must be considered identical by the hash table regardless of the case.
+ If we don't have directly a case insensitive hash function, we need to
+ perform a text transformation in some temporary buffer, which is costly.
+ 5. Remove debugging code.
+ 6. Modified the original test.c file to be a stand-alone function testing
+ the function in the new form (returning an uint64_t) using just the
+ relevant test vector.
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+/* Fast tolower() alike function that does not care about locale
+ * but just returns a-z instead of A-Z. */
+int siptlw(int c) {
+ if (c >= 'A' && c <= 'Z') {
+ return c+('a'-'A');
+ } else {
+ return c;
+ }
+}
+
+#if defined(__has_attribute)
+#if __has_attribute(no_sanitize)
+#define NO_SANITIZE(sanitizer) __attribute__((no_sanitize(sanitizer)))
+#endif
+#endif
+
+#if !defined(NO_SANITIZE)
+#define NO_SANITIZE(sanitizer)
+#endif
+
+/* Test of the CPU is Little Endian and supports not aligned accesses.
+ * Two interesting conditions to speedup the function that happen to be
+ * in most of x86 servers. */
+#if defined(__X86_64__) || defined(__x86_64__) || defined (__i386__) \
+ || defined (__aarch64__) || defined (__arm64__)
+#define UNALIGNED_LE_CPU
+#endif
+
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define U32TO8_LE(p, v) \
+ (p)[0] = (uint8_t)((v)); \
+ (p)[1] = (uint8_t)((v) >> 8); \
+ (p)[2] = (uint8_t)((v) >> 16); \
+ (p)[3] = (uint8_t)((v) >> 24);
+
+#define U64TO8_LE(p, v) \
+ U32TO8_LE((p), (uint32_t)((v))); \
+ U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
+
+#ifdef UNALIGNED_LE_CPU
+#define U8TO64_LE(p) (*((uint64_t*)(p)))
+#else
+#define U8TO64_LE(p) \
+ (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \
+ ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
+ ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
+ ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
+#endif
+
+#define U8TO64_LE_NOCASE(p) \
+ (((uint64_t)(siptlw((p)[0]))) | \
+ ((uint64_t)(siptlw((p)[1])) << 8) | \
+ ((uint64_t)(siptlw((p)[2])) << 16) | \
+ ((uint64_t)(siptlw((p)[3])) << 24) | \
+ ((uint64_t)(siptlw((p)[4])) << 32) | \
+ ((uint64_t)(siptlw((p)[5])) << 40) | \
+ ((uint64_t)(siptlw((p)[6])) << 48) | \
+ ((uint64_t)(siptlw((p)[7])) << 56))
+
+#define SIPROUND \
+ do { \
+ v0 += v1; \
+ v1 = ROTL(v1, 13); \
+ v1 ^= v0; \
+ v0 = ROTL(v0, 32); \
+ v2 += v3; \
+ v3 = ROTL(v3, 16); \
+ v3 ^= v2; \
+ v0 += v3; \
+ v3 = ROTL(v3, 21); \
+ v3 ^= v0; \
+ v2 += v1; \
+ v1 = ROTL(v1, 17); \
+ v1 ^= v2; \
+ v2 = ROTL(v2, 32); \
+ } while (0)
+
+NO_SANITIZE("alignment")
+uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) {
+#ifndef UNALIGNED_LE_CPU
+ uint64_t hash;
+ uint8_t *out = (uint8_t*) &hash;
+#endif
+ uint64_t v0 = 0x736f6d6570736575ULL;
+ uint64_t v1 = 0x646f72616e646f6dULL;
+ uint64_t v2 = 0x6c7967656e657261ULL;
+ uint64_t v3 = 0x7465646279746573ULL;
+ uint64_t k0 = U8TO64_LE(k);
+ uint64_t k1 = U8TO64_LE(k + 8);
+ uint64_t m;
+ const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+ const int left = inlen & 7;
+ uint64_t b = ((uint64_t)inlen) << 56;
+ v3 ^= k1;
+ v2 ^= k0;
+ v1 ^= k1;
+ v0 ^= k0;
+
+ for (; in != end; in += 8) {
+ m = U8TO64_LE(in);
+ v3 ^= m;
+
+ SIPROUND;
+
+ v0 ^= m;
+ }
+
+ switch (left) {
+ case 7: b |= ((uint64_t)in[6]) << 48; /* fall-thru */
+ case 6: b |= ((uint64_t)in[5]) << 40; /* fall-thru */
+ case 5: b |= ((uint64_t)in[4]) << 32; /* fall-thru */
+ case 4: b |= ((uint64_t)in[3]) << 24; /* fall-thru */
+ case 3: b |= ((uint64_t)in[2]) << 16; /* fall-thru */
+ case 2: b |= ((uint64_t)in[1]) << 8; /* fall-thru */
+ case 1: b |= ((uint64_t)in[0]); break;
+ case 0: break;
+ }
+
+ v3 ^= b;
+
+ SIPROUND;
+
+ v0 ^= b;
+ v2 ^= 0xff;
+
+ SIPROUND;
+ SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+#ifndef UNALIGNED_LE_CPU
+ U64TO8_LE(out, b);
+ return hash;
+#else
+ return b;
+#endif
+}
+
+NO_SANITIZE("alignment")
+uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k)
+{
+#ifndef UNALIGNED_LE_CPU
+ uint64_t hash;
+ uint8_t *out = (uint8_t*) &hash;
+#endif
+ uint64_t v0 = 0x736f6d6570736575ULL;
+ uint64_t v1 = 0x646f72616e646f6dULL;
+ uint64_t v2 = 0x6c7967656e657261ULL;
+ uint64_t v3 = 0x7465646279746573ULL;
+ uint64_t k0 = U8TO64_LE(k);
+ uint64_t k1 = U8TO64_LE(k + 8);
+ uint64_t m;
+ const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+ const int left = inlen & 7;
+ uint64_t b = ((uint64_t)inlen) << 56;
+ v3 ^= k1;
+ v2 ^= k0;
+ v1 ^= k1;
+ v0 ^= k0;
+
+ for (; in != end; in += 8) {
+ m = U8TO64_LE_NOCASE(in);
+ v3 ^= m;
+
+ SIPROUND;
+
+ v0 ^= m;
+ }
+
+ switch (left) {
+ case 7: b |= ((uint64_t)siptlw(in[6])) << 48; /* fall-thru */
+ case 6: b |= ((uint64_t)siptlw(in[5])) << 40; /* fall-thru */
+ case 5: b |= ((uint64_t)siptlw(in[4])) << 32; /* fall-thru */
+ case 4: b |= ((uint64_t)siptlw(in[3])) << 24; /* fall-thru */
+ case 3: b |= ((uint64_t)siptlw(in[2])) << 16; /* fall-thru */
+ case 2: b |= ((uint64_t)siptlw(in[1])) << 8; /* fall-thru */
+ case 1: b |= ((uint64_t)siptlw(in[0])); break;
+ case 0: break;
+ }
+
+ v3 ^= b;
+
+ SIPROUND;
+
+ v0 ^= b;
+ v2 ^= 0xff;
+
+ SIPROUND;
+ SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+#ifndef UNALIGNED_LE_CPU
+ U64TO8_LE(out, b);
+ return hash;
+#else
+ return b;
+#endif
+}
+
+
+/* --------------------------------- TEST ------------------------------------ */
+
+#ifdef SIPHASH_TEST
+
+const uint8_t vectors_sip64[64][8] = {
+ { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, },
+ { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, },
+ { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, },
+ { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, },
+ { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, },
+ { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, },
+ { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, },
+ { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, },
+ { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, },
+ { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, },
+ { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, },
+ { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, },
+ { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, },
+ { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, },
+ { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, },
+ { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, },
+ { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, },
+ { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, },
+ { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, },
+ { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, },
+ { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, },
+ { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, },
+ { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, },
+ { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, },
+ { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, },
+ { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, },
+ { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, },
+ { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, },
+ { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, },
+ { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, },
+ { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, },
+ { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, },
+ { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, },
+ { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, },
+ { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, },
+ { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, },
+ { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, },
+ { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, },
+ { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, },
+ { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, },
+ { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, },
+ { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, },
+ { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, },
+ { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, },
+ { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, },
+ { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, },
+ { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, },
+ { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, },
+ { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, },
+ { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, },
+ { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, },
+ { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, },
+ { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, },
+ { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, },
+ { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, },
+ { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, },
+ { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, },
+ { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, },
+ { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, },
+ { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, },
+ { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, },
+ { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, },
+ { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, },
+ { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, },
+};
+
+
+/* Test siphash using a test vector. Returns 0 if the function passed
+ * all the tests, otherwise 1 is returned.
+ *
+ * IMPORTANT: The test vector is for SipHash 2-4. Before running
+ * the test revert back the siphash() function to 2-4 rounds since
+ * now it uses 1-2 rounds. */
+int siphash_test(void) {
+ uint8_t in[64], k[16];
+ int i;
+ int fails = 0;
+
+ for (i = 0; i < 16; ++i)
+ k[i] = i;
+
+ for (i = 0; i < 64; ++i) {
+ in[i] = i;
+ uint64_t hash = siphash(in, i, k);
+ const uint8_t *v = NULL;
+ v = (uint8_t *)vectors_sip64;
+ if (memcmp(&hash, v + (i * 8), 8)) {
+ /* printf("fail for %d bytes\n", i); */
+ fails++;
+ }
+ }
+
+ /* Run a few basic tests with the case insensitive version. */
+ uint64_t h1, h2;
+ h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+ h2 = siphash_nocase((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+ if (h1 != h2) fails++;
+
+ h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+ h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+ if (h1 != h2) fails++;
+
+ h1 = siphash((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+ h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+ if (h1 == h2) fails++;
+
+ if (!fails) return 0;
+ return 1;
+}
+
+int main(void) {
+ if (siphash_test() == 0) {
+ printf("SipHash test: OK\n");
+ return 0;
+ } else {
+ printf("SipHash test: FAILED\n");
+ return 1;
+ }
+}
+
+#endif
diff --git a/src/slowlog.c b/src/slowlog.c
new file mode 100644
index 0000000..4c31917
--- /dev/null
+++ b/src/slowlog.c
@@ -0,0 +1,206 @@
+/* Slowlog implements a system that is able to remember the latest N
+ * queries that took more than M microseconds to execute.
+ *
+ * The execution time to reach to be logged in the slow log is set
+ * using the 'slowlog-log-slower-than' config directive, that is also
+ * readable and writable using the CONFIG SET/GET command.
+ *
+ * The slow queries log is actually not "logged" in the Redis log file
+ * but is accessible thanks to the SLOWLOG command.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "server.h"
+#include "slowlog.h"
+
+/* Create a new slowlog entry.
+ * Incrementing the ref count of all the objects retained is up to
+ * this function. */
+slowlogEntry *slowlogCreateEntry(client *c, robj **argv, int argc, long long duration) {
+ slowlogEntry *se = zmalloc(sizeof(*se));
+ int j, slargc = argc;
+
+ if (slargc > SLOWLOG_ENTRY_MAX_ARGC) slargc = SLOWLOG_ENTRY_MAX_ARGC;
+ se->argc = slargc;
+ se->argv = zmalloc(sizeof(robj*)*slargc);
+ for (j = 0; j < slargc; j++) {
+ /* Logging too many arguments is a useless memory waste, so we stop
+ * at SLOWLOG_ENTRY_MAX_ARGC, but use the last argument to specify
+ * how many remaining arguments there were in the original command. */
+ if (slargc != argc && j == slargc-1) {
+ se->argv[j] = createObject(OBJ_STRING,
+ sdscatprintf(sdsempty(),"... (%d more arguments)",
+ argc-slargc+1));
+ } else {
+ /* Trim too long strings as well... */
+ if (argv[j]->type == OBJ_STRING &&
+ sdsEncodedObject(argv[j]) &&
+ sdslen(argv[j]->ptr) > SLOWLOG_ENTRY_MAX_STRING)
+ {
+ sds s = sdsnewlen(argv[j]->ptr, SLOWLOG_ENTRY_MAX_STRING);
+
+ s = sdscatprintf(s,"... (%lu more bytes)",
+ (unsigned long)
+ sdslen(argv[j]->ptr) - SLOWLOG_ENTRY_MAX_STRING);
+ se->argv[j] = createObject(OBJ_STRING,s);
+ } else if (argv[j]->refcount == OBJ_SHARED_REFCOUNT) {
+ se->argv[j] = argv[j];
+ } else {
+ /* Here we need to duplicate the string objects composing the
+ * argument vector of the command, because those may otherwise
+ * end shared with string objects stored into keys. Having
+ * shared objects between any part of Redis, and the data
+ * structure holding the data, is a problem: FLUSHALL ASYNC
+ * may release the shared string object and create a race. */
+ se->argv[j] = dupStringObject(argv[j]);
+ }
+ }
+ }
+ se->time = time(NULL);
+ se->duration = duration;
+ se->id = server.slowlog_entry_id++;
+ se->peerid = sdsnew(getClientPeerId(c));
+ se->cname = c->name ? sdsnew(c->name->ptr) : sdsempty();
+ return se;
+}
+
+/* Free a slow log entry. The argument is void so that the prototype of this
+ * function matches the one of the 'free' method of adlist.c.
+ *
+ * This function will take care to release all the retained object. */
+void slowlogFreeEntry(void *septr) {
+ slowlogEntry *se = septr;
+ int j;
+
+ for (j = 0; j < se->argc; j++)
+ decrRefCount(se->argv[j]);
+ zfree(se->argv);
+ sdsfree(se->peerid);
+ sdsfree(se->cname);
+ zfree(se);
+}
+
+/* Initialize the slow log. This function should be called a single time
+ * at server startup. */
+void slowlogInit(void) {
+ server.slowlog = listCreate();
+ server.slowlog_entry_id = 0;
+ listSetFreeMethod(server.slowlog,slowlogFreeEntry);
+}
+
+/* Push a new entry into the slow log.
+ * This function will make sure to trim the slow log accordingly to the
+ * configured max length. */
+void slowlogPushEntryIfNeeded(client *c, robj **argv, int argc, long long duration) {
+ if (server.slowlog_log_slower_than < 0) return; /* Slowlog disabled */
+ if (duration >= server.slowlog_log_slower_than)
+ listAddNodeHead(server.slowlog,
+ slowlogCreateEntry(c,argv,argc,duration));
+
+ /* Remove old entries if needed. */
+ while (listLength(server.slowlog) > server.slowlog_max_len)
+ listDelNode(server.slowlog,listLast(server.slowlog));
+}
+
+/* Remove all the entries from the current slow log. */
+void slowlogReset(void) {
+ while (listLength(server.slowlog) > 0)
+ listDelNode(server.slowlog,listLast(server.slowlog));
+}
+
+/* The SLOWLOG command. Implements all the subcommands needed to handle the
+ * Redis slow log. */
+void slowlogCommand(client *c) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"GET [<count>]",
+" Return top <count> entries from the slowlog (default: 10, -1 mean all).",
+" Entries are made of:",
+" id, timestamp, time in microseconds, arguments array, client IP and port,",
+" client name",
+"LEN",
+" Return the length of the slowlog.",
+"RESET",
+" Reset the slowlog.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"reset")) {
+ slowlogReset();
+ addReply(c,shared.ok);
+ } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"len")) {
+ addReplyLongLong(c,listLength(server.slowlog));
+ } else if ((c->argc == 2 || c->argc == 3) &&
+ !strcasecmp(c->argv[1]->ptr,"get"))
+ {
+ long count = 10;
+ listIter li;
+ listNode *ln;
+ slowlogEntry *se;
+
+ if (c->argc == 3) {
+ /* Consume count arg. */
+ if (getRangeLongFromObjectOrReply(c, c->argv[2], -1,
+ LONG_MAX, &count, "count should be greater than or equal to -1") != C_OK)
+ return;
+
+ if (count == -1) {
+ /* We treat -1 as a special value, which means to get all slow logs.
+ * Simply set count to the length of server.slowlog.*/
+ count = listLength(server.slowlog);
+ }
+ }
+
+ if (count > (long)listLength(server.slowlog)) {
+ count = listLength(server.slowlog);
+ }
+ addReplyArrayLen(c, count);
+ listRewind(server.slowlog, &li);
+ while (count--) {
+ int j;
+
+ ln = listNext(&li);
+ se = ln->value;
+ addReplyArrayLen(c,6);
+ addReplyLongLong(c,se->id);
+ addReplyLongLong(c,se->time);
+ addReplyLongLong(c,se->duration);
+ addReplyArrayLen(c,se->argc);
+ for (j = 0; j < se->argc; j++)
+ addReplyBulk(c,se->argv[j]);
+ addReplyBulkCBuffer(c,se->peerid,sdslen(se->peerid));
+ addReplyBulkCBuffer(c,se->cname,sdslen(se->cname));
+ }
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
diff --git a/src/slowlog.h b/src/slowlog.h
new file mode 100644
index 0000000..6a00d12
--- /dev/null
+++ b/src/slowlog.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SLOWLOG_H__
+#define __SLOWLOG_H__
+
+#define SLOWLOG_ENTRY_MAX_ARGC 32
+#define SLOWLOG_ENTRY_MAX_STRING 128
+
+/* This structure defines an entry inside the slow log list */
+typedef struct slowlogEntry {
+ robj **argv;
+ int argc;
+ long long id; /* Unique entry identifier. */
+ long long duration; /* Time spent by the query, in microseconds. */
+ time_t time; /* Unix time at which the query was executed. */
+ sds cname; /* Client name. */
+ sds peerid; /* Client network address. */
+} slowlogEntry;
+
+/* Exported API */
+void slowlogInit(void);
+void slowlogPushEntryIfNeeded(client *c, robj **argv, int argc, long long duration);
+
+#endif /* __SLOWLOG_H__ */
diff --git a/src/socket.c b/src/socket.c
new file mode 100644
index 0000000..dad8e93
--- /dev/null
+++ b/src/socket.c
@@ -0,0 +1,471 @@
+/*
+ * Copyright (c) 2019, Redis Labs
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "connhelpers.h"
+
+/* The connections module provides a lean abstraction of network connections
+ * to avoid direct socket and async event management across the Redis code base.
+ *
+ * It does NOT provide advanced connection features commonly found in similar
+ * libraries such as complete in/out buffer management, throttling, etc. These
+ * functions remain in networking.c.
+ *
+ * The primary goal is to allow transparent handling of TCP and TLS based
+ * connections. To do so, connections have the following properties:
+ *
+ * 1. A connection may live before its corresponding socket exists. This
+ * allows various context and configuration setting to be handled before
+ * establishing the actual connection.
+ * 2. The caller may register/unregister logical read/write handlers to be
+ * called when the connection has data to read from/can accept writes.
+ * These logical handlers may or may not correspond to actual AE events,
+ * depending on the implementation (for TCP they are; for TLS they aren't).
+ */
+
+static ConnectionType CT_Socket;
+
+/* When a connection is created we must know its type already, but the
+ * underlying socket may or may not exist:
+ *
+ * - For accepted connections, it exists as we do not model the listen/accept
+ * part; So caller calls connCreateSocket() followed by connAccept().
+ * - For outgoing connections, the socket is created by the connection module
+ * itself; So caller calls connCreateSocket() followed by connConnect(),
+ * which registers a connect callback that fires on connected/error state
+ * (and after any transport level handshake was done).
+ *
+ * NOTE: An earlier version relied on connections being part of other structs
+ * and not independently allocated. This could lead to further optimizations
+ * like using container_of(), etc. However it was discontinued in favor of
+ * this approach for these reasons:
+ *
+ * 1. In some cases conns are created/handled outside the context of the
+ * containing struct, in which case it gets a bit awkward to copy them.
+ * 2. Future implementations may wish to allocate arbitrary data for the
+ * connection.
+ * 3. The container_of() approach is anyway risky because connections may
+ * be embedded in different structs, not just client.
+ */
+
+static connection *connCreateSocket(void) {
+ connection *conn = zcalloc(sizeof(connection));
+ conn->type = &CT_Socket;
+ conn->fd = -1;
+ conn->iovcnt = IOV_MAX;
+
+ return conn;
+}
+
+/* Create a new socket-type connection that is already associated with
+ * an accepted connection.
+ *
+ * The socket is not ready for I/O until connAccept() was called and
+ * invoked the connection-level accept handler.
+ *
+ * Callers should use connGetState() and verify the created connection
+ * is not in an error state (which is not possible for a socket connection,
+ * but could but possible with other protocols).
+ */
+static connection *connCreateAcceptedSocket(int fd, void *priv) {
+ UNUSED(priv);
+ connection *conn = connCreateSocket();
+ conn->fd = fd;
+ conn->state = CONN_STATE_ACCEPTING;
+ return conn;
+}
+
+static int connSocketConnect(connection *conn, const char *addr, int port, const char *src_addr,
+ ConnectionCallbackFunc connect_handler) {
+ int fd = anetTcpNonBlockBestEffortBindConnect(NULL,addr,port,src_addr);
+ if (fd == -1) {
+ conn->state = CONN_STATE_ERROR;
+ conn->last_errno = errno;
+ return C_ERR;
+ }
+
+ conn->fd = fd;
+ conn->state = CONN_STATE_CONNECTING;
+
+ conn->conn_handler = connect_handler;
+ aeCreateFileEvent(server.el, conn->fd, AE_WRITABLE,
+ conn->type->ae_handler, conn);
+
+ return C_OK;
+}
+
+/* ------ Pure socket connections ------- */
+
+/* A very incomplete list of implementation-specific calls. Much of the above shall
+ * move here as we implement additional connection types.
+ */
+
+static void connSocketShutdown(connection *conn) {
+ if (conn->fd == -1) return;
+
+ shutdown(conn->fd, SHUT_RDWR);
+}
+
+/* Close the connection and free resources. */
+static void connSocketClose(connection *conn) {
+ if (conn->fd != -1) {
+ aeDeleteFileEvent(server.el,conn->fd, AE_READABLE | AE_WRITABLE);
+ close(conn->fd);
+ conn->fd = -1;
+ }
+
+ /* If called from within a handler, schedule the close but
+ * keep the connection until the handler returns.
+ */
+ if (connHasRefs(conn)) {
+ conn->flags |= CONN_FLAG_CLOSE_SCHEDULED;
+ return;
+ }
+
+ zfree(conn);
+}
+
+static int connSocketWrite(connection *conn, const void *data, size_t data_len) {
+ int ret = write(conn->fd, data, data_len);
+ if (ret < 0 && errno != EAGAIN) {
+ conn->last_errno = errno;
+
+ /* Don't overwrite the state of a connection that is not already
+ * connected, not to mess with handler callbacks.
+ */
+ if (errno != EINTR && conn->state == CONN_STATE_CONNECTED)
+ conn->state = CONN_STATE_ERROR;
+ }
+
+ return ret;
+}
+
+static int connSocketWritev(connection *conn, const struct iovec *iov, int iovcnt) {
+ int ret = writev(conn->fd, iov, iovcnt);
+ if (ret < 0 && errno != EAGAIN) {
+ conn->last_errno = errno;
+
+ /* Don't overwrite the state of a connection that is not already
+ * connected, not to mess with handler callbacks.
+ */
+ if (errno != EINTR && conn->state == CONN_STATE_CONNECTED)
+ conn->state = CONN_STATE_ERROR;
+ }
+
+ return ret;
+}
+
+static int connSocketRead(connection *conn, void *buf, size_t buf_len) {
+ int ret = read(conn->fd, buf, buf_len);
+ if (!ret) {
+ conn->state = CONN_STATE_CLOSED;
+ } else if (ret < 0 && errno != EAGAIN) {
+ conn->last_errno = errno;
+
+ /* Don't overwrite the state of a connection that is not already
+ * connected, not to mess with handler callbacks.
+ */
+ if (errno != EINTR && conn->state == CONN_STATE_CONNECTED)
+ conn->state = CONN_STATE_ERROR;
+ }
+
+ return ret;
+}
+
+static int connSocketAccept(connection *conn, ConnectionCallbackFunc accept_handler) {
+ int ret = C_OK;
+
+ if (conn->state != CONN_STATE_ACCEPTING) return C_ERR;
+ conn->state = CONN_STATE_CONNECTED;
+
+ connIncrRefs(conn);
+ if (!callHandler(conn, accept_handler)) ret = C_ERR;
+ connDecrRefs(conn);
+
+ return ret;
+}
+
+/* Register a write handler, to be called when the connection is writable.
+ * If NULL, the existing handler is removed.
+ *
+ * The barrier flag indicates a write barrier is requested, resulting with
+ * CONN_FLAG_WRITE_BARRIER set. This will ensure that the write handler is
+ * always called before and not after the read handler in a single event
+ * loop.
+ */
+static int connSocketSetWriteHandler(connection *conn, ConnectionCallbackFunc func, int barrier) {
+ if (func == conn->write_handler) return C_OK;
+
+ conn->write_handler = func;
+ if (barrier)
+ conn->flags |= CONN_FLAG_WRITE_BARRIER;
+ else
+ conn->flags &= ~CONN_FLAG_WRITE_BARRIER;
+ if (!conn->write_handler)
+ aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE);
+ else
+ if (aeCreateFileEvent(server.el,conn->fd,AE_WRITABLE,
+ conn->type->ae_handler,conn) == AE_ERR) return C_ERR;
+ return C_OK;
+}
+
+/* Register a read handler, to be called when the connection is readable.
+ * If NULL, the existing handler is removed.
+ */
+static int connSocketSetReadHandler(connection *conn, ConnectionCallbackFunc func) {
+ if (func == conn->read_handler) return C_OK;
+
+ conn->read_handler = func;
+ if (!conn->read_handler)
+ aeDeleteFileEvent(server.el,conn->fd,AE_READABLE);
+ else
+ if (aeCreateFileEvent(server.el,conn->fd,
+ AE_READABLE,conn->type->ae_handler,conn) == AE_ERR) return C_ERR;
+ return C_OK;
+}
+
+static const char *connSocketGetLastError(connection *conn) {
+ return strerror(conn->last_errno);
+}
+
+static void connSocketEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask)
+{
+ UNUSED(el);
+ UNUSED(fd);
+ connection *conn = clientData;
+
+ if (conn->state == CONN_STATE_CONNECTING &&
+ (mask & AE_WRITABLE) && conn->conn_handler) {
+
+ int conn_error = anetGetError(conn->fd);
+ if (conn_error) {
+ conn->last_errno = conn_error;
+ conn->state = CONN_STATE_ERROR;
+ } else {
+ conn->state = CONN_STATE_CONNECTED;
+ }
+
+ if (!conn->write_handler) aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE);
+
+ if (!callHandler(conn, conn->conn_handler)) return;
+ conn->conn_handler = NULL;
+ }
+
+ /* Normally we execute the readable event first, and the writable
+ * event later. This is useful as sometimes we may be able
+ * to serve the reply of a query immediately after processing the
+ * query.
+ *
+ * However if WRITE_BARRIER is set in the mask, our application is
+ * asking us to do the reverse: never fire the writable event
+ * after the readable. In such a case, we invert the calls.
+ * This is useful when, for instance, we want to do things
+ * in the beforeSleep() hook, like fsync'ing a file to disk,
+ * before replying to a client. */
+ int invert = conn->flags & CONN_FLAG_WRITE_BARRIER;
+
+ int call_write = (mask & AE_WRITABLE) && conn->write_handler;
+ int call_read = (mask & AE_READABLE) && conn->read_handler;
+
+ /* Handle normal I/O flows */
+ if (!invert && call_read) {
+ if (!callHandler(conn, conn->read_handler)) return;
+ }
+ /* Fire the writable event. */
+ if (call_write) {
+ if (!callHandler(conn, conn->write_handler)) return;
+ }
+ /* If we have to invert the call, fire the readable event now
+ * after the writable one. */
+ if (invert && call_read) {
+ if (!callHandler(conn, conn->read_handler)) return;
+ }
+}
+
+static void connSocketAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
+ int cport, cfd, max = MAX_ACCEPTS_PER_CALL;
+ char cip[NET_IP_STR_LEN];
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
+
+ while(max--) {
+ cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
+ if (cfd == ANET_ERR) {
+ if (errno != EWOULDBLOCK)
+ serverLog(LL_WARNING,
+ "Accepting client connection: %s", server.neterr);
+ return;
+ }
+ serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
+ acceptCommonHandler(connCreateAcceptedSocket(cfd, NULL),0,cip);
+ }
+}
+
+static int connSocketAddr(connection *conn, char *ip, size_t ip_len, int *port, int remote) {
+ if (anetFdToString(conn->fd, ip, ip_len, port, remote) == 0)
+ return C_OK;
+
+ conn->last_errno = errno;
+ return C_ERR;
+}
+
+static int connSocketIsLocal(connection *conn) {
+ char cip[NET_IP_STR_LEN + 1] = { 0 };
+
+ if (connSocketAddr(conn, cip, sizeof(cip) - 1, NULL, 1) == C_ERR)
+ return -1;
+
+ return !strncmp(cip, "127.", 4) || !strcmp(cip, "::1");
+}
+
+static int connSocketListen(connListener *listener) {
+ return listenToPort(listener);
+}
+
+static int connSocketBlockingConnect(connection *conn, const char *addr, int port, long long timeout) {
+ int fd = anetTcpNonBlockConnect(NULL,addr,port);
+ if (fd == -1) {
+ conn->state = CONN_STATE_ERROR;
+ conn->last_errno = errno;
+ return C_ERR;
+ }
+
+ if ((aeWait(fd, AE_WRITABLE, timeout) & AE_WRITABLE) == 0) {
+ conn->state = CONN_STATE_ERROR;
+ conn->last_errno = ETIMEDOUT;
+ }
+
+ conn->fd = fd;
+ conn->state = CONN_STATE_CONNECTED;
+ return C_OK;
+}
+
+/* Connection-based versions of syncio.c functions.
+ * NOTE: This should ideally be refactored out in favor of pure async work.
+ */
+
+static ssize_t connSocketSyncWrite(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return syncWrite(conn->fd, ptr, size, timeout);
+}
+
+static ssize_t connSocketSyncRead(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return syncRead(conn->fd, ptr, size, timeout);
+}
+
+static ssize_t connSocketSyncReadLine(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return syncReadLine(conn->fd, ptr, size, timeout);
+}
+
+static const char *connSocketGetType(connection *conn) {
+ (void) conn;
+
+ return CONN_TYPE_SOCKET;
+}
+
+static ConnectionType CT_Socket = {
+ /* connection type */
+ .get_type = connSocketGetType,
+
+ /* connection type initialize & finalize & configure */
+ .init = NULL,
+ .cleanup = NULL,
+ .configure = NULL,
+
+ /* ae & accept & listen & error & address handler */
+ .ae_handler = connSocketEventHandler,
+ .accept_handler = connSocketAcceptHandler,
+ .addr = connSocketAddr,
+ .is_local = connSocketIsLocal,
+ .listen = connSocketListen,
+
+ /* create/shutdown/close connection */
+ .conn_create = connCreateSocket,
+ .conn_create_accepted = connCreateAcceptedSocket,
+ .shutdown = connSocketShutdown,
+ .close = connSocketClose,
+
+ /* connect & accept */
+ .connect = connSocketConnect,
+ .blocking_connect = connSocketBlockingConnect,
+ .accept = connSocketAccept,
+
+ /* IO */
+ .write = connSocketWrite,
+ .writev = connSocketWritev,
+ .read = connSocketRead,
+ .set_write_handler = connSocketSetWriteHandler,
+ .set_read_handler = connSocketSetReadHandler,
+ .get_last_error = connSocketGetLastError,
+ .sync_write = connSocketSyncWrite,
+ .sync_read = connSocketSyncRead,
+ .sync_readline = connSocketSyncReadLine,
+
+ /* pending data */
+ .has_pending_data = NULL,
+ .process_pending_data = NULL,
+};
+
+int connBlock(connection *conn) {
+ if (conn->fd == -1) return C_ERR;
+ return anetBlock(NULL, conn->fd);
+}
+
+int connNonBlock(connection *conn) {
+ if (conn->fd == -1) return C_ERR;
+ return anetNonBlock(NULL, conn->fd);
+}
+
+int connEnableTcpNoDelay(connection *conn) {
+ if (conn->fd == -1) return C_ERR;
+ return anetEnableTcpNoDelay(NULL, conn->fd);
+}
+
+int connDisableTcpNoDelay(connection *conn) {
+ if (conn->fd == -1) return C_ERR;
+ return anetDisableTcpNoDelay(NULL, conn->fd);
+}
+
+int connKeepAlive(connection *conn, int interval) {
+ if (conn->fd == -1) return C_ERR;
+ return anetKeepAlive(NULL, conn->fd, interval);
+}
+
+int connSendTimeout(connection *conn, long long ms) {
+ return anetSendTimeout(NULL, conn->fd, ms);
+}
+
+int connRecvTimeout(connection *conn, long long ms) {
+ return anetRecvTimeout(NULL, conn->fd, ms);
+}
+
+int RedisRegisterConnectionTypeSocket(void)
+{
+ return connTypeRegister(&CT_Socket);
+}
diff --git a/src/solarisfixes.h b/src/solarisfixes.h
new file mode 100644
index 0000000..3e53ba6
--- /dev/null
+++ b/src/solarisfixes.h
@@ -0,0 +1,54 @@
+/* Solaris specific fixes.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#if defined(__sun)
+
+#if defined(__GNUC__)
+#include <math.h>
+#undef isnan
+#define isnan(x) \
+ __extension__({ __typeof (x) __x_a = (x); \
+ __builtin_expect(__x_a != __x_a, 0); })
+
+#undef isfinite
+#define isfinite(x) \
+ __extension__ ({ __typeof (x) __x_f = (x); \
+ __builtin_expect(!isnan(__x_f - __x_f), 1); })
+
+#undef isinf
+#define isinf(x) \
+ __extension__ ({ __typeof (x) __x_i = (x); \
+ __builtin_expect(!isnan(__x_i) && !isfinite(__x_i), 0); })
+
+#define u_int uint
+#define u_int32_t uint32_t
+#endif /* __GNUC__ */
+
+#endif /* __sun */
diff --git a/src/sort.c b/src/sort.c
new file mode 100644
index 0000000..77f4cbb
--- /dev/null
+++ b/src/sort.c
@@ -0,0 +1,619 @@
+/* SORT command and helper functions.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "server.h"
+#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
+#include <math.h> /* isnan() */
+
+zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank);
+
+redisSortOperation *createSortOperation(int type, robj *pattern) {
+ redisSortOperation *so = zmalloc(sizeof(*so));
+ so->type = type;
+ so->pattern = pattern;
+ return so;
+}
+
+/* Return the value associated to the key with a name obtained using
+ * the following rules:
+ *
+ * 1) The first occurrence of '*' in 'pattern' is substituted with 'subst'.
+ *
+ * 2) If 'pattern' matches the "->" string, everything on the left of
+ * the arrow is treated as the name of a hash field, and the part on the
+ * left as the key name containing a hash. The value of the specified
+ * field is returned.
+ *
+ * 3) If 'pattern' equals "#", the function simply returns 'subst' itself so
+ * that the SORT command can be used like: SORT key GET # to retrieve
+ * the Set/List elements directly.
+ *
+ * The returned object will always have its refcount increased by 1
+ * when it is non-NULL. */
+robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
+ char *p, *f, *k;
+ sds spat, ssub;
+ robj *keyobj, *fieldobj = NULL, *o;
+ int prefixlen, sublen, postfixlen, fieldlen;
+
+ /* If the pattern is "#" return the substitution object itself in order
+ * to implement the "SORT ... GET #" feature. */
+ spat = pattern->ptr;
+ if (spat[0] == '#' && spat[1] == '\0') {
+ incrRefCount(subst);
+ return subst;
+ }
+
+ /* The substitution object may be specially encoded. If so we create
+ * a decoded object on the fly. Otherwise getDecodedObject will just
+ * increment the ref count, that we'll decrement later. */
+ subst = getDecodedObject(subst);
+ ssub = subst->ptr;
+
+ /* If we can't find '*' in the pattern we return NULL as to GET a
+ * fixed key does not make sense. */
+ p = strchr(spat,'*');
+ if (!p) {
+ decrRefCount(subst);
+ return NULL;
+ }
+
+ /* Find out if we're dealing with a hash dereference. */
+ if ((f = strstr(p+1, "->")) != NULL && *(f+2) != '\0') {
+ fieldlen = sdslen(spat)-(f-spat)-2;
+ fieldobj = createStringObject(f+2,fieldlen);
+ } else {
+ fieldlen = 0;
+ }
+
+ /* Perform the '*' substitution. */
+ prefixlen = p-spat;
+ sublen = sdslen(ssub);
+ postfixlen = sdslen(spat)-(prefixlen+1)-(fieldlen ? fieldlen+2 : 0);
+ keyobj = createStringObject(NULL,prefixlen+sublen+postfixlen);
+ k = keyobj->ptr;
+ memcpy(k,spat,prefixlen);
+ memcpy(k+prefixlen,ssub,sublen);
+ memcpy(k+prefixlen+sublen,p+1,postfixlen);
+ decrRefCount(subst); /* Incremented by decodeObject() */
+
+ /* Lookup substituted key */
+ o = lookupKeyRead(db, keyobj);
+ if (o == NULL) goto noobj;
+
+ if (fieldobj) {
+ if (o->type != OBJ_HASH) goto noobj;
+
+ /* Retrieve value from hash by the field name. The returned object
+ * is a new object with refcount already incremented. */
+ o = hashTypeGetValueObject(o, fieldobj->ptr);
+ } else {
+ if (o->type != OBJ_STRING) goto noobj;
+
+ /* Every object that this function returns needs to have its refcount
+ * increased. sortCommand decreases it again. */
+ incrRefCount(o);
+ }
+ decrRefCount(keyobj);
+ if (fieldobj) decrRefCount(fieldobj);
+ return o;
+
+noobj:
+ decrRefCount(keyobj);
+ if (fieldlen) decrRefCount(fieldobj);
+ return NULL;
+}
+
+/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
+ * the additional parameter is not standard but a BSD-specific we have to
+ * pass sorting parameters via the global 'server' structure */
+int sortCompare(const void *s1, const void *s2) {
+ const redisSortObject *so1 = s1, *so2 = s2;
+ int cmp;
+
+ if (!server.sort_alpha) {
+ /* Numeric sorting. Here it's trivial as we precomputed scores */
+ if (so1->u.score > so2->u.score) {
+ cmp = 1;
+ } else if (so1->u.score < so2->u.score) {
+ cmp = -1;
+ } else {
+ /* Objects have the same score, but we don't want the comparison
+ * to be undefined, so we compare objects lexicographically.
+ * This way the result of SORT is deterministic. */
+ cmp = compareStringObjects(so1->obj,so2->obj);
+ }
+ } else {
+ /* Alphanumeric sorting */
+ if (server.sort_bypattern) {
+ if (!so1->u.cmpobj || !so2->u.cmpobj) {
+ /* At least one compare object is NULL */
+ if (so1->u.cmpobj == so2->u.cmpobj)
+ cmp = 0;
+ else if (so1->u.cmpobj == NULL)
+ cmp = -1;
+ else
+ cmp = 1;
+ } else {
+ /* We have both the objects, compare them. */
+ if (server.sort_store) {
+ cmp = compareStringObjects(so1->u.cmpobj,so2->u.cmpobj);
+ } else {
+ /* Here we can use strcoll() directly as we are sure that
+ * the objects are decoded string objects. */
+ cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
+ }
+ }
+ } else {
+ /* Compare elements directly. */
+ if (server.sort_store) {
+ cmp = compareStringObjects(so1->obj,so2->obj);
+ } else {
+ cmp = collateStringObjects(so1->obj,so2->obj);
+ }
+ }
+ }
+ return server.sort_desc ? -cmp : cmp;
+}
+
+/* The SORT command is the most complex command in Redis. Warning: this code
+ * is optimized for speed and a bit less for readability */
+void sortCommandGeneric(client *c, int readonly) {
+ list *operations;
+ unsigned int outputlen = 0;
+ int desc = 0, alpha = 0;
+ long limit_start = 0, limit_count = -1, start, end;
+ int j, dontsort = 0, vectorlen;
+ int getop = 0; /* GET operation counter */
+ int int_conversion_error = 0;
+ int syntax_error = 0;
+ robj *sortval, *sortby = NULL, *storekey = NULL;
+ redisSortObject *vector; /* Resulting vector to sort */
+ int user_has_full_key_access = 0; /* ACL - used in order to verify 'get' and 'by' options can be used */
+ /* Create a list of operations to perform for every sorted element.
+ * Operations can be GET */
+ operations = listCreate();
+ listSetFreeMethod(operations,zfree);
+ j = 2; /* options start at argv[2] */
+
+ user_has_full_key_access = ACLUserCheckCmdWithUnrestrictedKeyAccess(c->user, c->cmd, c->argv, c->argc, CMD_KEY_ACCESS);
+
+ /* The SORT command has an SQL-alike syntax, parse it */
+ while(j < c->argc) {
+ int leftargs = c->argc-j-1;
+ if (!strcasecmp(c->argv[j]->ptr,"asc")) {
+ desc = 0;
+ } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
+ desc = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
+ alpha = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
+ if ((getLongFromObjectOrReply(c, c->argv[j+1], &limit_start, NULL)
+ != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[j+2], &limit_count, NULL)
+ != C_OK))
+ {
+ syntax_error++;
+ break;
+ }
+ j+=2;
+ } else if (readonly == 0 && !strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
+ storekey = c->argv[j+1];
+ j++;
+ } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
+ sortby = c->argv[j+1];
+ /* If the BY pattern does not contain '*', i.e. it is constant,
+ * we don't need to sort nor to lookup the weight keys. */
+ if (strchr(c->argv[j+1]->ptr,'*') == NULL) {
+ dontsort = 1;
+ } else {
+ /* If BY is specified with a real pattern, we can't accept
+ * it in cluster mode. */
+ if (server.cluster_enabled) {
+ addReplyError(c,"BY option of SORT denied in Cluster mode.");
+ syntax_error++;
+ break;
+ }
+ /* If BY is specified with a real pattern, we can't accept
+ * it if no full ACL key access is applied for this command. */
+ if (!user_has_full_key_access) {
+ addReplyError(c,"BY option of SORT denied due to insufficient ACL permissions.");
+ syntax_error++;
+ break;
+ }
+ }
+ j++;
+ } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
+ if (server.cluster_enabled) {
+ addReplyError(c,"GET option of SORT denied in Cluster mode.");
+ syntax_error++;
+ break;
+ }
+ if (!user_has_full_key_access) {
+ addReplyError(c,"GET option of SORT denied due to insufficient ACL permissions.");
+ syntax_error++;
+ break;
+ }
+ listAddNodeTail(operations,createSortOperation(
+ SORT_OP_GET,c->argv[j+1]));
+ getop++;
+ j++;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ syntax_error++;
+ break;
+ }
+ j++;
+ }
+
+ /* Handle syntax errors set during options parsing. */
+ if (syntax_error) {
+ listRelease(operations);
+ return;
+ }
+
+ /* Lookup the key to sort. It must be of the right types */
+ sortval = lookupKeyRead(c->db, c->argv[1]);
+ if (sortval && sortval->type != OBJ_SET &&
+ sortval->type != OBJ_LIST &&
+ sortval->type != OBJ_ZSET)
+ {
+ listRelease(operations);
+ addReplyErrorObject(c,shared.wrongtypeerr);
+ return;
+ }
+
+ /* Now we need to protect sortval incrementing its count, in the future
+ * SORT may have options able to overwrite/delete keys during the sorting
+ * and the sorted key itself may get destroyed */
+ if (sortval)
+ incrRefCount(sortval);
+ else
+ sortval = createQuicklistObject();
+
+
+ /* When sorting a set with no sort specified, we must sort the output
+ * so the result is consistent across scripting and replication.
+ *
+ * The other types (list, sorted set) will retain their native order
+ * even if no sort order is requested, so they remain stable across
+ * scripting and replication. */
+ if (dontsort &&
+ sortval->type == OBJ_SET &&
+ (storekey || c->flags & CLIENT_SCRIPT))
+ {
+ /* Force ALPHA sorting */
+ dontsort = 0;
+ alpha = 1;
+ sortby = NULL;
+ }
+
+ /* Destructively convert encoded sorted sets for SORT. */
+ if (sortval->type == OBJ_ZSET)
+ zsetConvert(sortval, OBJ_ENCODING_SKIPLIST);
+
+ /* Obtain the length of the object to sort. */
+ switch(sortval->type) {
+ case OBJ_LIST: vectorlen = listTypeLength(sortval); break;
+ case OBJ_SET: vectorlen = setTypeSize(sortval); break;
+ case OBJ_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
+ default: vectorlen = 0; serverPanic("Bad SORT type"); /* Avoid GCC warning */
+ }
+
+ /* Perform LIMIT start,count sanity checking.
+ * And avoid integer overflow by limiting inputs to object sizes. */
+ start = min(max(limit_start, 0), vectorlen);
+ limit_count = min(max(limit_count, -1), vectorlen);
+ end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
+ if (start >= vectorlen) {
+ start = vectorlen-1;
+ end = vectorlen-2;
+ }
+ if (end >= vectorlen) end = vectorlen-1;
+
+ /* Whenever possible, we load elements into the output array in a more
+ * direct way. This is possible if:
+ *
+ * 1) The object to sort is a sorted set or a list (internally sorted).
+ * 2) There is nothing to sort as dontsort is true (BY <constant string>).
+ *
+ * In this special case, if we have a LIMIT option that actually reduces
+ * the number of elements to fetch, we also optimize to just load the
+ * range we are interested in and allocating a vector that is big enough
+ * for the selected range length. */
+ if ((sortval->type == OBJ_ZSET || sortval->type == OBJ_LIST) &&
+ dontsort &&
+ (start != 0 || end != vectorlen-1))
+ {
+ vectorlen = end-start+1;
+ }
+
+ /* Load the sorting vector with all the objects to sort */
+ vector = zmalloc(sizeof(redisSortObject)*vectorlen);
+ j = 0;
+
+ if (sortval->type == OBJ_LIST && dontsort) {
+ /* Special handling for a list, if 'dontsort' is true.
+ * This makes sure we return elements in the list original
+ * ordering, accordingly to DESC / ASC options.
+ *
+ * Note that in this case we also handle LIMIT here in a direct
+ * way, just getting the required range, as an optimization. */
+ if (end >= start) {
+ listTypeIterator *li;
+ listTypeEntry entry;
+ li = listTypeInitIterator(sortval,
+ desc ? (long)(listTypeLength(sortval) - start - 1) : start,
+ desc ? LIST_HEAD : LIST_TAIL);
+
+ while(j < vectorlen && listTypeNext(li,&entry)) {
+ vector[j].obj = listTypeGet(&entry);
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ listTypeReleaseIterator(li);
+ /* Fix start/end: output code is not aware of this optimization. */
+ end -= start;
+ start = 0;
+ }
+ } else if (sortval->type == OBJ_LIST) {
+ listTypeIterator *li = listTypeInitIterator(sortval,0,LIST_TAIL);
+ listTypeEntry entry;
+ while(listTypeNext(li,&entry)) {
+ vector[j].obj = listTypeGet(&entry);
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ listTypeReleaseIterator(li);
+ } else if (sortval->type == OBJ_SET) {
+ setTypeIterator *si = setTypeInitIterator(sortval);
+ sds sdsele;
+ while((sdsele = setTypeNextObject(si)) != NULL) {
+ vector[j].obj = createObject(OBJ_STRING,sdsele);
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ setTypeReleaseIterator(si);
+ } else if (sortval->type == OBJ_ZSET && dontsort) {
+ /* Special handling for a sorted set, if 'dontsort' is true.
+ * This makes sure we return elements in the sorted set original
+ * ordering, accordingly to DESC / ASC options.
+ *
+ * Note that in this case we also handle LIMIT here in a direct
+ * way, just getting the required range, as an optimization. */
+
+ zset *zs = sortval->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *ln;
+ sds sdsele;
+ int rangelen = vectorlen;
+
+ /* Check if starting point is trivial, before doing log(N) lookup. */
+ if (desc) {
+ long zsetlen = dictSize(((zset*)sortval->ptr)->dict);
+
+ ln = zsl->tail;
+ if (start > 0)
+ ln = zslGetElementByRank(zsl,zsetlen-start);
+ } else {
+ ln = zsl->header->level[0].forward;
+ if (start > 0)
+ ln = zslGetElementByRank(zsl,start+1);
+ }
+
+ while(rangelen--) {
+ serverAssertWithInfo(c,sortval,ln != NULL);
+ sdsele = ln->ele;
+ vector[j].obj = createStringObject(sdsele,sdslen(sdsele));
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ ln = desc ? ln->backward : ln->level[0].forward;
+ }
+ /* Fix start/end: output code is not aware of this optimization. */
+ end -= start;
+ start = 0;
+ } else if (sortval->type == OBJ_ZSET) {
+ dict *set = ((zset*)sortval->ptr)->dict;
+ dictIterator *di;
+ dictEntry *setele;
+ sds sdsele;
+ di = dictGetIterator(set);
+ while((setele = dictNext(di)) != NULL) {
+ sdsele = dictGetKey(setele);
+ vector[j].obj = createStringObject(sdsele,sdslen(sdsele));
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ dictReleaseIterator(di);
+ } else {
+ serverPanic("Unknown type");
+ }
+ serverAssertWithInfo(c,sortval,j == vectorlen);
+
+ /* Now it's time to load the right scores in the sorting vector */
+ if (!dontsort) {
+ for (j = 0; j < vectorlen; j++) {
+ robj *byval;
+ if (sortby) {
+ /* lookup value to sort by */
+ byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
+ if (!byval) continue;
+ } else {
+ /* use object itself to sort by */
+ byval = vector[j].obj;
+ }
+
+ if (alpha) {
+ if (sortby) vector[j].u.cmpobj = getDecodedObject(byval);
+ } else {
+ if (sdsEncodedObject(byval)) {
+ char *eptr;
+
+ vector[j].u.score = strtod(byval->ptr,&eptr);
+ if (eptr[0] != '\0' || errno == ERANGE ||
+ isnan(vector[j].u.score))
+ {
+ int_conversion_error = 1;
+ }
+ } else if (byval->encoding == OBJ_ENCODING_INT) {
+ /* Don't need to decode the object if it's
+ * integer-encoded (the only encoding supported) so
+ * far. We can just cast it */
+ vector[j].u.score = (long)byval->ptr;
+ } else {
+ serverAssertWithInfo(c,sortval,1 != 1);
+ }
+ }
+
+ /* when the object was retrieved using lookupKeyByPattern,
+ * its refcount needs to be decreased. */
+ if (sortby) {
+ decrRefCount(byval);
+ }
+ }
+
+ server.sort_desc = desc;
+ server.sort_alpha = alpha;
+ server.sort_bypattern = sortby ? 1 : 0;
+ server.sort_store = storekey ? 1 : 0;
+ if (sortby && (start != 0 || end != vectorlen-1))
+ pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
+ else
+ qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
+ }
+
+ /* Send command output to the output buffer, performing the specified
+ * GET/DEL/INCR/DECR operations if any. */
+ outputlen = getop ? getop*(end-start+1) : end-start+1;
+ if (int_conversion_error) {
+ addReplyError(c,"One or more scores can't be converted into double");
+ } else if (storekey == NULL) {
+ /* STORE option not specified, sent the sorting result to client */
+ addReplyArrayLen(c,outputlen);
+ for (j = start; j <= end; j++) {
+ listNode *ln;
+ listIter li;
+
+ if (!getop) addReplyBulk(c,vector[j].obj);
+ listRewind(operations,&li);
+ while((ln = listNext(&li))) {
+ redisSortOperation *sop = ln->value;
+ robj *val = lookupKeyByPattern(c->db,sop->pattern,
+ vector[j].obj);
+
+ if (sop->type == SORT_OP_GET) {
+ if (!val) {
+ addReplyNull(c);
+ } else {
+ addReplyBulk(c,val);
+ decrRefCount(val);
+ }
+ } else {
+ /* Always fails */
+ serverAssertWithInfo(c,sortval,sop->type == SORT_OP_GET);
+ }
+ }
+ }
+ } else {
+ /* We can't predict the size and encoding of the stored list, we
+ * assume it's a large list and then convert it at the end if needed. */
+ robj *sobj = createQuicklistObject();
+
+ /* STORE option specified, set the sorting result as a List object */
+ for (j = start; j <= end; j++) {
+ listNode *ln;
+ listIter li;
+
+ if (!getop) {
+ listTypePush(sobj,vector[j].obj,LIST_TAIL);
+ } else {
+ listRewind(operations,&li);
+ while((ln = listNext(&li))) {
+ redisSortOperation *sop = ln->value;
+ robj *val = lookupKeyByPattern(c->db,sop->pattern,
+ vector[j].obj);
+
+ if (sop->type == SORT_OP_GET) {
+ if (!val) val = createStringObject("",0);
+
+ /* listTypePush does an incrRefCount, so we should take care
+ * care of the incremented refcount caused by either
+ * lookupKeyByPattern or createStringObject("",0) */
+ listTypePush(sobj,val,LIST_TAIL);
+ decrRefCount(val);
+ } else {
+ /* Always fails */
+ serverAssertWithInfo(c,sortval,sop->type == SORT_OP_GET);
+ }
+ }
+ }
+ }
+ if (outputlen) {
+ listTypeTryConversion(sobj,LIST_CONV_AUTO,NULL,NULL);
+ setKey(c,c->db,storekey,sobj,0);
+ notifyKeyspaceEvent(NOTIFY_LIST,"sortstore",storekey,
+ c->db->id);
+ server.dirty += outputlen;
+ } else if (dbDelete(c->db,storekey)) {
+ signalModifiedKey(c,c->db,storekey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",storekey,c->db->id);
+ server.dirty++;
+ }
+ decrRefCount(sobj);
+ addReplyLongLong(c,outputlen);
+ }
+
+ /* Cleanup */
+ for (j = 0; j < vectorlen; j++)
+ decrRefCount(vector[j].obj);
+
+ decrRefCount(sortval);
+ listRelease(operations);
+ for (j = 0; j < vectorlen; j++) {
+ if (alpha && vector[j].u.cmpobj)
+ decrRefCount(vector[j].u.cmpobj);
+ }
+ zfree(vector);
+}
+
+/* SORT wrapper function for read-only mode. */
+void sortroCommand(client *c) {
+ sortCommandGeneric(c, 1);
+}
+
+void sortCommand(client *c) {
+ sortCommandGeneric(c, 0);
+}
diff --git a/src/sparkline.c b/src/sparkline.c
new file mode 100644
index 0000000..4c0f2b8
--- /dev/null
+++ b/src/sparkline.c
@@ -0,0 +1,179 @@
+/* sparkline.c -- ASCII Sparklines
+ * This code is modified from http://github.com/antirez/aspark and adapted
+ * in order to return SDS strings instead of outputting directly to
+ * the terminal.
+ *
+ * ---------------------------------------------------------------------------
+ *
+ * Copyright(C) 2011-2014 Salvatore Sanfilippo <antirez@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+#include <math.h>
+
+/* This is the charset used to display the graphs, but multiple rows are used
+ * to increase the resolution. */
+static char charset[] = "_-`";
+static char charset_fill[] = "_o#";
+static int charset_len = sizeof(charset)-1;
+static int label_margin_top = 1;
+
+/* ----------------------------------------------------------------------------
+ * Sequences are arrays of samples we use to represent data to turn
+ * into sparklines. This is the API in order to generate a sparkline:
+ *
+ * struct sequence *seq = createSparklineSequence();
+ * sparklineSequenceAddSample(seq, 10, NULL);
+ * sparklineSequenceAddSample(seq, 20, NULL);
+ * sparklineSequenceAddSample(seq, 30, "last sample label");
+ * sds output = sparklineRender(sdsempty(), seq, 80, 4, SPARKLINE_FILL);
+ * freeSparklineSequence(seq);
+ * ------------------------------------------------------------------------- */
+
+/* Create a new sequence. */
+struct sequence *createSparklineSequence(void) {
+ struct sequence *seq = zmalloc(sizeof(*seq));
+ seq->length = 0;
+ seq->labels = 0;
+ seq->samples = NULL;
+ seq->min = 0.0f;
+ seq->max = 0.0f;
+ return seq;
+}
+
+/* Add a new sample into a sequence. */
+void sparklineSequenceAddSample(struct sequence *seq, double value, char *label) {
+ label = (label == NULL || label[0] == '\0') ? NULL : zstrdup(label);
+ if (seq->length == 0) {
+ seq->min = seq->max = value;
+ } else {
+ if (value < seq->min) seq->min = value;
+ else if (value > seq->max) seq->max = value;
+ }
+ seq->samples = zrealloc(seq->samples,sizeof(struct sample)*(seq->length+1));
+ seq->samples[seq->length].value = value;
+ seq->samples[seq->length].label = label;
+ seq->length++;
+ if (label) seq->labels++;
+}
+
+/* Free a sequence. */
+void freeSparklineSequence(struct sequence *seq) {
+ int j;
+
+ for (j = 0; j < seq->length; j++)
+ zfree(seq->samples[j].label);
+ zfree(seq->samples);
+ zfree(seq);
+}
+
+/* ----------------------------------------------------------------------------
+ * ASCII rendering of sequence
+ * ------------------------------------------------------------------------- */
+
+/* Render part of a sequence, so that render_sequence() call call this function
+ * with different parts in order to create the full output without overflowing
+ * the current terminal columns. */
+sds sparklineRenderRange(sds output, struct sequence *seq, int rows, int offset, int len, int flags) {
+ int j;
+ double relmax = seq->max - seq->min;
+ int steps = charset_len*rows;
+ int row = 0;
+ char *chars = zmalloc(len);
+ int loop = 1;
+ int opt_fill = flags & SPARKLINE_FILL;
+ int opt_log = flags & SPARKLINE_LOG_SCALE;
+
+ if (opt_log) {
+ relmax = log(relmax+1);
+ } else if (relmax == 0) {
+ relmax = 1;
+ }
+
+ while(loop) {
+ loop = 0;
+ memset(chars,' ',len);
+ for (j = 0; j < len; j++) {
+ struct sample *s = &seq->samples[j+offset];
+ double relval = s->value - seq->min;
+ int step;
+
+ if (opt_log) relval = log(relval+1);
+ step = (int) (relval*steps)/relmax;
+ if (step < 0) step = 0;
+ if (step >= steps) step = steps-1;
+
+ if (row < rows) {
+ /* Print the character needed to create the sparkline */
+ int charidx = step-((rows-row-1)*charset_len);
+ loop = 1;
+ if (charidx >= 0 && charidx < charset_len) {
+ chars[j] = opt_fill ? charset_fill[charidx] :
+ charset[charidx];
+ } else if(opt_fill && charidx >= charset_len) {
+ chars[j] = '|';
+ }
+ } else {
+ /* Labels spacing */
+ if (seq->labels && row-rows < label_margin_top) {
+ loop = 1;
+ break;
+ }
+ /* Print the label if needed. */
+ if (s->label) {
+ int label_len = strlen(s->label);
+ int label_char = row - rows - label_margin_top;
+
+ if (label_len > label_char) {
+ loop = 1;
+ chars[j] = s->label[label_char];
+ }
+ }
+ }
+ }
+ if (loop) {
+ row++;
+ output = sdscatlen(output,chars,len);
+ output = sdscatlen(output,"\n",1);
+ }
+ }
+ zfree(chars);
+ return output;
+}
+
+/* Turn a sequence into its ASCII representation */
+sds sparklineRender(sds output, struct sequence *seq, int columns, int rows, int flags) {
+ int j;
+
+ for (j = 0; j < seq->length; j += columns) {
+ int sublen = (seq->length-j) < columns ? (seq->length-j) : columns;
+
+ if (j != 0) output = sdscatlen(output,"\n",1);
+ output = sparklineRenderRange(output, seq, rows, j, sublen, flags);
+ }
+ return output;
+}
+
diff --git a/src/sparkline.h b/src/sparkline.h
new file mode 100644
index 0000000..6025d2b
--- /dev/null
+++ b/src/sparkline.h
@@ -0,0 +1,56 @@
+/* sparkline.h -- ASCII Sparklines header file
+ *
+ * ---------------------------------------------------------------------------
+ *
+ * Copyright(C) 2011-2014 Salvatore Sanfilippo <antirez@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SPARKLINE_H
+#define __SPARKLINE_H
+
+/* A sequence is represented of many "samples" */
+struct sample {
+ double value;
+ char *label;
+};
+
+struct sequence {
+ int length;
+ int labels;
+ struct sample *samples;
+ double min, max;
+};
+
+#define SPARKLINE_NO_FLAGS 0
+#define SPARKLINE_FILL 1 /* Fill the area under the curve. */
+#define SPARKLINE_LOG_SCALE 2 /* Use logarithmic scale. */
+
+struct sequence *createSparklineSequence(void);
+void sparklineSequenceAddSample(struct sequence *seq, double value, char *label);
+void freeSparklineSequence(struct sequence *seq);
+sds sparklineRenderRange(sds output, struct sequence *seq, int rows, int offset, int len, int flags);
+sds sparklineRender(sds output, struct sequence *seq, int columns, int rows, int flags);
+
+#endif /* __SPARKLINE_H */
diff --git a/src/stream.h b/src/stream.h
new file mode 100644
index 0000000..bfc1654
--- /dev/null
+++ b/src/stream.h
@@ -0,0 +1,147 @@
+#ifndef STREAM_H
+#define STREAM_H
+
+#include "rax.h"
+#include "listpack.h"
+
+/* Stream item ID: a 128 bit number composed of a milliseconds time and
+ * a sequence counter. IDs generated in the same millisecond (or in a past
+ * millisecond if the clock jumped backward) will use the millisecond time
+ * of the latest generated ID and an incremented sequence. */
+typedef struct streamID {
+ uint64_t ms; /* Unix time in milliseconds. */
+ uint64_t seq; /* Sequence number. */
+} streamID;
+
+typedef struct stream {
+ rax *rax; /* The radix tree holding the stream. */
+ uint64_t length; /* Current number of elements inside this stream. */
+ streamID last_id; /* Zero if there are yet no items. */
+ streamID first_id; /* The first non-tombstone entry, zero if empty. */
+ streamID max_deleted_entry_id; /* The maximal ID that was deleted. */
+ uint64_t entries_added; /* All time count of elements added. */
+ rax *cgroups; /* Consumer groups dictionary: name -> streamCG */
+} stream;
+
+/* We define an iterator to iterate stream items in an abstract way, without
+ * caring about the radix tree + listpack representation. Technically speaking
+ * the iterator is only used inside streamReplyWithRange(), so could just
+ * be implemented inside the function, but practically there is the AOF
+ * rewriting code that also needs to iterate the stream to emit the XADD
+ * commands. */
+typedef struct streamIterator {
+ stream *stream; /* The stream we are iterating. */
+ streamID master_id; /* ID of the master entry at listpack head. */
+ uint64_t master_fields_count; /* Master entries # of fields. */
+ unsigned char *master_fields_start; /* Master entries start in listpack. */
+ unsigned char *master_fields_ptr; /* Master field to emit next. */
+ int entry_flags; /* Flags of entry we are emitting. */
+ int rev; /* True if iterating end to start (reverse). */
+ int skip_tombstones; /* True if not emitting tombstone entries. */
+ uint64_t start_key[2]; /* Start key as 128 bit big endian. */
+ uint64_t end_key[2]; /* End key as 128 bit big endian. */
+ raxIterator ri; /* Rax iterator. */
+ unsigned char *lp; /* Current listpack. */
+ unsigned char *lp_ele; /* Current listpack cursor. */
+ unsigned char *lp_flags; /* Current entry flags pointer. */
+ /* Buffers used to hold the string of lpGet() when the element is
+ * integer encoded, so that there is no string representation of the
+ * element inside the listpack itself. */
+ unsigned char field_buf[LP_INTBUF_SIZE];
+ unsigned char value_buf[LP_INTBUF_SIZE];
+} streamIterator;
+
+/* Consumer group. */
+typedef struct streamCG {
+ streamID last_id; /* Last delivered (not acknowledged) ID for this
+ group. Consumers that will just ask for more
+ messages will served with IDs > than this. */
+ long long entries_read; /* In a perfect world (CG starts at 0-0, no dels, no
+ XGROUP SETID, ...), this is the total number of
+ group reads. In the real world, the reasoning behind
+ this value is detailed at the top comment of
+ streamEstimateDistanceFromFirstEverEntry(). */
+ rax *pel; /* Pending entries list. This is a radix tree that
+ has every message delivered to consumers (without
+ the NOACK option) that was yet not acknowledged
+ as processed. The key of the radix tree is the
+ ID as a 64 bit big endian number, while the
+ associated value is a streamNACK structure.*/
+ rax *consumers; /* A radix tree representing the consumers by name
+ and their associated representation in the form
+ of streamConsumer structures. */
+} streamCG;
+
+/* A specific consumer in a consumer group. */
+typedef struct streamConsumer {
+ mstime_t seen_time; /* Last time this consumer tried to perform an action (attempted reading/claiming). */
+ mstime_t active_time; /* Last time this consumer was active (successful reading/claiming). */
+ sds name; /* Consumer name. This is how the consumer
+ will be identified in the consumer group
+ protocol. Case sensitive. */
+ rax *pel; /* Consumer specific pending entries list: all
+ the pending messages delivered to this
+ consumer not yet acknowledged. Keys are
+ big endian message IDs, while values are
+ the same streamNACK structure referenced
+ in the "pel" of the consumer group structure
+ itself, so the value is shared. */
+} streamConsumer;
+
+/* Pending (yet not acknowledged) message in a consumer group. */
+typedef struct streamNACK {
+ mstime_t delivery_time; /* Last time this message was delivered. */
+ uint64_t delivery_count; /* Number of times this message was delivered.*/
+ streamConsumer *consumer; /* The consumer this message was delivered to
+ in the last delivery. */
+} streamNACK;
+
+/* Stream propagation information, passed to functions in order to propagate
+ * XCLAIM commands to AOF and slaves. */
+typedef struct streamPropInfo {
+ robj *keyname;
+ robj *groupname;
+} streamPropInfo;
+
+/* Prototypes of exported APIs. */
+struct client;
+
+/* Flags for streamCreateConsumer */
+#define SCC_DEFAULT 0
+#define SCC_NO_NOTIFY (1<<0) /* Do not notify key space if consumer created */
+#define SCC_NO_DIRTIFY (1<<1) /* Do not dirty++ if consumer created */
+
+#define SCG_INVALID_ENTRIES_READ -1
+
+stream *streamNew(void);
+void freeStream(stream *s);
+unsigned long streamLength(const robj *subject);
+size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count, int rev, streamCG *group, streamConsumer *consumer, int flags, streamPropInfo *spi);
+void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev);
+int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields);
+void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen);
+void streamIteratorRemoveEntry(streamIterator *si, streamID *current);
+void streamIteratorStop(streamIterator *si);
+streamCG *streamLookupCG(stream *s, sds groupname);
+streamConsumer *streamLookupConsumer(streamCG *cg, sds name);
+streamConsumer *streamCreateConsumer(streamCG *cg, sds name, robj *key, int dbid, int flags);
+streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id, long long entries_read);
+streamNACK *streamCreateNACK(streamConsumer *consumer);
+void streamDecodeID(void *buf, streamID *id);
+int streamCompareID(streamID *a, streamID *b);
+void streamFreeNACK(streamNACK *na);
+int streamIncrID(streamID *id);
+int streamDecrID(streamID *id);
+void streamPropagateConsumerCreation(client *c, robj *key, robj *groupname, sds consumername);
+robj *streamDup(robj *o);
+int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep);
+int streamParseID(const robj *o, streamID *id);
+robj *createObjectFromStreamID(streamID *id);
+int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_id, streamID *use_id, int seq_given);
+int streamDeleteItem(stream *s, streamID *id);
+void streamGetEdgeID(stream *s, int first, int skip_tombstones, streamID *edge_id);
+long long streamEstimateDistanceFromFirstEverEntry(stream *s, streamID *id);
+int64_t streamTrimByLength(stream *s, long long maxlen, int approx);
+int64_t streamTrimByID(stream *s, streamID minid, int approx);
+
+#endif
diff --git a/src/strl.c b/src/strl.c
new file mode 100644
index 0000000..f73cf79
--- /dev/null
+++ b/src/strl.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 1998, 2015 Todd C. Miller <millert@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <string.h>
+
+/*
+ * Copy string src to buffer dst of size dsize. At most dsize-1
+ * chars will be copied. Always NUL terminates (unless dsize == 0).
+ * Returns strlen(src); if retval >= dsize, truncation occurred.
+ */
+size_t
+redis_strlcpy(char *dst, const char *src, size_t dsize)
+{
+ const char *osrc = src;
+ size_t nleft = dsize;
+
+ /* Copy as many bytes as will fit. */
+ if (nleft != 0) {
+ while (--nleft != 0) {
+ if ((*dst++ = *src++) == '\0')
+ break;
+ }
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src. */
+ if (nleft == 0) {
+ if (dsize != 0)
+ *dst = '\0'; /* NUL-terminate dst */
+ while (*src++)
+ ;
+ }
+
+ return(src - osrc - 1); /* count does not include NUL */
+}
+
+/*
+ * Appends src to string dst of size dsize (unlike strncat, dsize is the
+ * full size of dst, not space left). At most dsize-1 characters
+ * will be copied. Always NUL terminates (unless dsize <= strlen(dst)).
+ * Returns strlen(src) + MIN(dsize, strlen(initial dst)).
+ * If retval >= dsize, truncation occurred.
+ */
+size_t
+redis_strlcat(char *dst, const char *src, size_t dsize)
+{
+ const char *odst = dst;
+ const char *osrc = src;
+ size_t n = dsize;
+ size_t dlen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end. */
+ while (n-- != 0 && *dst != '\0')
+ dst++;
+ dlen = dst - odst;
+ n = dsize - dlen;
+
+ if (n-- == 0)
+ return(dlen + strlen(src));
+ while (*src != '\0') {
+ if (n != 0) {
+ *dst++ = *src;
+ n--;
+ }
+ src++;
+ }
+ *dst = '\0';
+
+ return(dlen + (src - osrc)); /* count does not include NUL */
+}
+
+
+
+
+
diff --git a/src/syncio.c b/src/syncio.c
new file mode 100644
index 0000000..b2843d5
--- /dev/null
+++ b/src/syncio.c
@@ -0,0 +1,145 @@
+/* Synchronous socket and file I/O operations useful across the core.
+ *
+ * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+/* ----------------- Blocking sockets I/O with timeouts --------------------- */
+
+/* Redis performs most of the I/O in a nonblocking way, with the exception
+ * of the SYNC command where the slave does it in a blocking way, and
+ * the MIGRATE command that must be blocking in order to be atomic from the
+ * point of view of the two instances (one migrating the key and one receiving
+ * the key). This is why need the following blocking I/O functions.
+ *
+ * All the functions take the timeout in milliseconds. */
+
+#define SYNCIO__RESOLUTION 10 /* Resolution in milliseconds */
+
+/* Write the specified payload to 'fd'. If writing the whole payload will be
+ * done within 'timeout' milliseconds the operation succeeds and 'size' is
+ * returned. Otherwise the operation fails, -1 is returned, and an unspecified
+ * partial write could be performed against the file descriptor. */
+ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout) {
+ ssize_t nwritten, ret = size;
+ long long start = mstime();
+ long long remaining = timeout;
+
+ while(1) {
+ long long wait = (remaining > SYNCIO__RESOLUTION) ?
+ remaining : SYNCIO__RESOLUTION;
+ long long elapsed;
+
+ /* Optimistically try to write before checking if the file descriptor
+ * is actually writable. At worst we get EAGAIN. */
+ nwritten = write(fd,ptr,size);
+ if (nwritten == -1) {
+ if (errno != EAGAIN) return -1;
+ } else {
+ ptr += nwritten;
+ size -= nwritten;
+ }
+ if (size == 0) return ret;
+
+ /* Wait */
+ aeWait(fd,AE_WRITABLE,wait);
+ elapsed = mstime() - start;
+ if (elapsed >= timeout) {
+ errno = ETIMEDOUT;
+ return -1;
+ }
+ remaining = timeout - elapsed;
+ }
+}
+
+/* Read the specified amount of bytes from 'fd'. If all the bytes are read
+ * within 'timeout' milliseconds the operation succeed and 'size' is returned.
+ * Otherwise the operation fails, -1 is returned, and an unspecified amount of
+ * data could be read from the file descriptor. */
+ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout) {
+ ssize_t nread, totread = 0;
+ long long start = mstime();
+ long long remaining = timeout;
+
+ if (size == 0) return 0;
+ while(1) {
+ long long wait = (remaining > SYNCIO__RESOLUTION) ?
+ remaining : SYNCIO__RESOLUTION;
+ long long elapsed;
+
+ /* Optimistically try to read before checking if the file descriptor
+ * is actually readable. At worst we get EAGAIN. */
+ nread = read(fd,ptr,size);
+ if (nread == 0) return -1; /* short read. */
+ if (nread == -1) {
+ if (errno != EAGAIN) return -1;
+ } else {
+ ptr += nread;
+ size -= nread;
+ totread += nread;
+ }
+ if (size == 0) return totread;
+
+ /* Wait */
+ aeWait(fd,AE_READABLE,wait);
+ elapsed = mstime() - start;
+ if (elapsed >= timeout) {
+ errno = ETIMEDOUT;
+ return -1;
+ }
+ remaining = timeout - elapsed;
+ }
+}
+
+/* Read a line making sure that every char will not require more than 'timeout'
+ * milliseconds to be read.
+ *
+ * On success the number of bytes read is returned, otherwise -1.
+ * On success the string is always correctly terminated with a 0 byte. */
+ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout) {
+ ssize_t nread = 0;
+
+ size--;
+ while(size) {
+ char c;
+
+ if (syncRead(fd,&c,1,timeout) == -1) return -1;
+ if (c == '\n') {
+ *ptr = '\0';
+ if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
+ return nread;
+ } else {
+ *ptr++ = c;
+ *ptr = '\0';
+ nread++;
+ }
+ size--;
+ }
+ return nread;
+}
diff --git a/src/syscheck.c b/src/syscheck.c
new file mode 100644
index 0000000..0ea3a25
--- /dev/null
+++ b/src/syscheck.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2022, Redis Ltd.
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "fmacros.h"
+#include "config.h"
+#include "syscheck.h"
+#include "sds.h"
+#include "anet.h"
+
+#include <time.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/wait.h>
+
+#ifdef __linux__
+#include <sys/mman.h>
+#endif
+
+
+#ifdef __linux__
+static sds read_sysfs_line(char *path) {
+ char buf[256];
+ FILE *f = fopen(path, "r");
+ if (!f) return NULL;
+ if (!fgets(buf, sizeof(buf), f)) {
+ fclose(f);
+ return NULL;
+ }
+ fclose(f);
+ sds res = sdsnew(buf);
+ res = sdstrim(res, " \n");
+ return res;
+}
+
+/* Verify our clocksource implementation doesn't go through a system call (uses vdso).
+ * Going through a system call to check the time degrades Redis performance. */
+static int checkClocksource(sds *error_msg) {
+ unsigned long test_time_us, system_hz;
+ struct timespec ts;
+ unsigned long long start_us;
+ struct rusage ru_start, ru_end;
+
+ system_hz = sysconf(_SC_CLK_TCK);
+
+ if (getrusage(RUSAGE_SELF, &ru_start) != 0)
+ return 0;
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) {
+ return 0;
+ }
+ start_us = (ts.tv_sec * 1000000 + ts.tv_nsec / 1000);
+
+ /* clock_gettime() busy loop of 5 times system tick (for a system_hz of 100 this is 50ms)
+ * Using system_hz is required to ensure accurate measurements from getrusage().
+ * If our clocksource is configured correctly (vdso) this will result in no system calls.
+ * If our clocksource is inefficient it'll waste most of the busy loop in the kernel. */
+ test_time_us = 5 * 1000000 / system_hz;
+ while (1) {
+ unsigned long long d;
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
+ return 0;
+ d = (ts.tv_sec * 1000000 + ts.tv_nsec / 1000) - start_us;
+ if (d >= test_time_us) break;
+ }
+ if (getrusage(RUSAGE_SELF, &ru_end) != 0)
+ return 0;
+
+ long long stime_us = (ru_end.ru_stime.tv_sec * 1000000 + ru_end.ru_stime.tv_usec) - (ru_start.ru_stime.tv_sec * 1000000 + ru_start.ru_stime.tv_usec);
+ long long utime_us = (ru_end.ru_utime.tv_sec * 1000000 + ru_end.ru_utime.tv_usec) - (ru_start.ru_utime.tv_sec * 1000000 + ru_start.ru_utime.tv_usec);
+
+ /* If more than 10% of the process time was in system calls we probably have an inefficient clocksource, print a warning */
+ if (stime_us * 10 > stime_us + utime_us) {
+ sds avail = read_sysfs_line("/sys/devices/system/clocksource/clocksource0/available_clocksource");
+ sds curr = read_sysfs_line("/sys/devices/system/clocksource/clocksource0/current_clocksource");
+ *error_msg = sdscatprintf(sdsempty(),
+ "Slow system clocksource detected. This can result in degraded performance. "
+ "Consider changing the system's clocksource. "
+ "Current clocksource: %s. Available clocksources: %s. "
+ "For example: run the command 'echo tsc > /sys/devices/system/clocksource/clocksource0/current_clocksource' as root. "
+ "To permanently change the system's clocksource you'll need to set the 'clocksource=' kernel command line parameter.",
+ curr ? curr : "", avail ? avail : "");
+ sdsfree(avail);
+ sdsfree(curr);
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
+/* Verify we're not using the `xen` clocksource. The xen hypervisor's default clocksource is slow and affects
+ * Redis's performance. This has been measured on ec2 xen based instances. ec2 recommends using the non-default
+ * tsc clock source for these instances. */
+int checkXenClocksource(sds *error_msg) {
+ sds curr = read_sysfs_line("/sys/devices/system/clocksource/clocksource0/current_clocksource");
+ int res = 1;
+ if (curr == NULL) {
+ res = 0;
+ } else if (strcmp(curr, "xen") == 0) {
+ *error_msg = sdsnew(
+ "Your system is configured to use the 'xen' clocksource which might lead to degraded performance. "
+ "Check the result of the [slow-clocksource] system check: run 'redis-server --check-system' to check if "
+ "the system's clocksource isn't degrading performance.");
+ res = -1;
+ }
+ sdsfree(curr);
+ return res;
+}
+
+/* Verify overcommit is enabled.
+ * When overcommit memory is disabled Linux will kill the forked child of a background save
+ * if we don't have enough free memory to satisfy double the current memory usage even though
+ * the forked child uses copy-on-write to reduce its actual memory usage. */
+int checkOvercommit(sds *error_msg) {
+ FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
+ char buf[64];
+
+ if (!fp) return 0;
+ if (fgets(buf,64,fp) == NULL) {
+ fclose(fp);
+ return 0;
+ }
+ fclose(fp);
+
+ if (strtol(buf, NULL, 10) != 1) {
+ *error_msg = sdsnew(
+ "Memory overcommit must be enabled! Without it, a background save or replication may fail under low memory condition. "
+#if defined(USE_JEMALLOC)
+ "Being disabled, it can also cause failures without low memory condition, see https://github.com/jemalloc/jemalloc/issues/1328. "
+#endif
+ "To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the "
+ "command 'sysctl vm.overcommit_memory=1' for this to take effect.");
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
+/* Make sure transparent huge pages aren't always enabled. When they are this can cause copy-on-write logic
+ * to consume much more memory and reduce performance during forks. */
+int checkTHPEnabled(sds *error_msg) {
+ char buf[1024];
+
+ FILE *fp = fopen("/sys/kernel/mm/transparent_hugepage/enabled","r");
+ if (!fp) return 0;
+ if (fgets(buf,sizeof(buf),fp) == NULL) {
+ fclose(fp);
+ return 0;
+ }
+ fclose(fp);
+
+ if (strstr(buf,"[always]") != NULL) {
+ *error_msg = sdsnew(
+ "You have Transparent Huge Pages (THP) support enabled in your kernel. "
+ "This will create latency and memory usage issues with Redis. "
+ "To fix this issue run the command 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled' as root, "
+ "and add it to your /etc/rc.local in order to retain the setting after a reboot. "
+ "Redis must be restarted after THP is disabled (set to 'madvise' or 'never').");
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
+#ifdef __arm64__
+/* Get size in kilobytes of the Shared_Dirty pages of the calling process for the
+ * memory map corresponding to the provided address, or -1 on error. */
+static int smapsGetSharedDirty(unsigned long addr) {
+ int ret, in_mapping = 0, val = -1;
+ unsigned long from, to;
+ char buf[64];
+ FILE *f;
+
+ f = fopen("/proc/self/smaps", "r");
+ if (!f) return -1;
+
+ while (1) {
+ if (!fgets(buf, sizeof(buf), f))
+ break;
+
+ ret = sscanf(buf, "%lx-%lx", &from, &to);
+ if (ret == 2)
+ in_mapping = from <= addr && addr < to;
+
+ if (in_mapping && !memcmp(buf, "Shared_Dirty:", 13)) {
+ sscanf(buf, "%*s %d", &val);
+ /* If parsing fails, we remain with val == -1 */
+ break;
+ }
+ }
+
+ fclose(f);
+ return val;
+}
+
+/* Older arm64 Linux kernels have a bug that could lead to data corruption
+ * during background save in certain scenarios. This function checks if the
+ * kernel is affected.
+ * The bug was fixed in commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b
+ * titled: "arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect()"
+ */
+int checkLinuxMadvFreeForkBug(sds *error_msg) {
+ int ret, pipefd[2] = { -1, -1 };
+ pid_t pid;
+ char *p = NULL, *q;
+ int res = 1;
+ long page_size = sysconf(_SC_PAGESIZE);
+ long map_size = 3 * page_size;
+
+ /* Create a memory map that's in our full control (not one used by the allocator). */
+ p = mmap(NULL, map_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p == MAP_FAILED) {
+ return 0;
+ }
+
+ q = p + page_size;
+
+ /* Split the memory map in 3 pages by setting their protection as RO|RW|RO to prevent
+ * Linux from merging this memory map with adjacent VMAs. */
+ ret = mprotect(q, page_size, PROT_READ | PROT_WRITE);
+ if (ret < 0) {
+ res = 0;
+ goto exit;
+ }
+
+ /* Write to the page once to make it resident */
+ *(volatile char*)q = 0;
+
+ /* Tell the kernel that this page is free to be reclaimed. */
+#ifndef MADV_FREE
+#define MADV_FREE 8
+#endif
+ ret = madvise(q, page_size, MADV_FREE);
+ if (ret < 0) {
+ /* MADV_FREE is not available on older kernels that are presumably
+ * not affected. */
+ if (errno == EINVAL) goto exit;
+
+ res = 0;
+ goto exit;
+ }
+
+ /* Write to the page after being marked for freeing, this is supposed to take
+ * ownership of that page again. */
+ *(volatile char*)q = 0;
+
+ /* Create a pipe for the child to return the info to the parent. */
+ ret = anetPipe(pipefd, 0, 0);
+ if (ret < 0) {
+ res = 0;
+ goto exit;
+ }
+
+ /* Fork the process. */
+ pid = fork();
+ if (pid < 0) {
+ res = 0;
+ goto exit;
+ } else if (!pid) {
+ /* Child: check if the page is marked as dirty, page_size in kb.
+ * A value of 0 means the kernel is affected by the bug. */
+ ret = smapsGetSharedDirty((unsigned long) q);
+ if (!ret)
+ res = -1;
+ else if (ret == -1) /* Failed to read */
+ res = 0;
+
+ ret = write(pipefd[1], &res, sizeof(res)); /* Assume success, ignore return value*/
+ exit(0);
+ } else {
+ /* Read the result from the child. */
+ ret = read(pipefd[0], &res, sizeof(res));
+ if (ret < 0) {
+ res = 0;
+ }
+
+ /* Reap the child pid. */
+ waitpid(pid, NULL, 0);
+ }
+
+exit:
+ /* Cleanup */
+ if (pipefd[0] != -1) close(pipefd[0]);
+ if (pipefd[1] != -1) close(pipefd[1]);
+ if (p != NULL) munmap(p, map_size);
+
+ if (res == -1)
+ *error_msg = sdsnew(
+ "Your kernel has a bug that could lead to data corruption during background save. "
+ "Please upgrade to the latest stable kernel.");
+
+ return res;
+}
+#endif /* __arm64__ */
+#endif /* __linux__ */
+
+/*
+ * Standard system check interface:
+ * Each check has a name `name` and a functions pointer `check_fn`.
+ * `check_fn` should return:
+ * -1 in case the check fails.
+ * 1 in case the check passes.
+ * 0 in case the check could not be completed (usually because of some unexpected failed system call).
+ * When (and only when) the check fails and -1 is returned and error description is places in a new sds pointer to by
+ * the single `sds*` argument to `check_fn`. This message should be freed by the caller via `sdsfree()`.
+ */
+typedef struct {
+ const char *name;
+ int (*check_fn)(sds*);
+} check;
+
+check checks[] = {
+#ifdef __linux__
+ {.name = "slow-clocksource", .check_fn = checkClocksource},
+ {.name = "xen-clocksource", .check_fn = checkXenClocksource},
+ {.name = "overcommit", .check_fn = checkOvercommit},
+ {.name = "THP", .check_fn = checkTHPEnabled},
+#ifdef __arm64__
+ {.name = "madvise-free-fork-bug", .check_fn = checkLinuxMadvFreeForkBug},
+#endif
+#endif
+ {.name = NULL, .check_fn = NULL}
+};
+
+/* Performs various system checks, returns 0 if any check fails, 1 otherwise. */
+int syscheck(void) {
+ check *cur_check = checks;
+ int ret = 1;
+ sds err_msg = NULL;
+ while (cur_check->check_fn) {
+ int res = cur_check->check_fn(&err_msg);
+ printf("[%s]...", cur_check->name);
+ if (res == 0) {
+ printf("skipped\n");
+ } else if (res == 1) {
+ printf("OK\n");
+ } else {
+ printf("WARNING:\n");
+ printf("%s\n", err_msg);
+ sdsfree(err_msg);
+ ret = 0;
+ }
+ cur_check++;
+ }
+
+ return ret;
+}
diff --git a/src/syscheck.h b/src/syscheck.h
new file mode 100644
index 0000000..096f0f5
--- /dev/null
+++ b/src/syscheck.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022, Redis Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SYSCHECK_H
+#define __SYSCHECK_H
+
+#include "sds.h"
+#include "config.h"
+
+int syscheck(void);
+#ifdef __linux__
+int checkXenClocksource(sds *error_msg);
+int checkTHPEnabled(sds *error_msg);
+int checkOvercommit(sds *error_msg);
+#ifdef __arm64__
+int checkLinuxMadvFreeForkBug(sds *error_msg);
+#endif
+#endif
+
+#endif
diff --git a/src/t_hash.c b/src/t_hash.c
new file mode 100644
index 0000000..b199d8c
--- /dev/null
+++ b/src/t_hash.c
@@ -0,0 +1,1163 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include <math.h>
+
+/*-----------------------------------------------------------------------------
+ * Hash type API
+ *----------------------------------------------------------------------------*/
+
+/* Check the length of a number of objects to see if we need to convert a
+ * listpack to a real hash. Note that we only check string encoded objects
+ * as their string length can be queried in constant time. */
+void hashTypeTryConversion(robj *o, robj **argv, int start, int end) {
+ int i;
+ size_t sum = 0;
+
+ if (o->encoding != OBJ_ENCODING_LISTPACK) return;
+
+ /* We guess that most of the values in the input are unique, so
+ * if there are enough arguments we create a pre-sized hash, which
+ * might over allocate memory if there are duplicates. */
+ size_t new_fields = (end - start + 1) / 2;
+ if (new_fields > server.hash_max_listpack_entries) {
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ dictExpand(o->ptr, new_fields);
+ return;
+ }
+
+ for (i = start; i <= end; i++) {
+ if (!sdsEncodedObject(argv[i]))
+ continue;
+ size_t len = sdslen(argv[i]->ptr);
+ if (len > server.hash_max_listpack_value) {
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ return;
+ }
+ sum += len;
+ }
+ if (!lpSafeToAdd(o->ptr, sum))
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+}
+
+/* Get the value from a listpack encoded hash, identified by field.
+ * Returns -1 when the field cannot be found. */
+int hashTypeGetFromListpack(robj *o, sds field,
+ unsigned char **vstr,
+ unsigned int *vlen,
+ long long *vll)
+{
+ unsigned char *zl, *fptr = NULL, *vptr = NULL;
+
+ serverAssert(o->encoding == OBJ_ENCODING_LISTPACK);
+
+ zl = o->ptr;
+ fptr = lpFirst(zl);
+ if (fptr != NULL) {
+ fptr = lpFind(zl, fptr, (unsigned char*)field, sdslen(field), 1);
+ if (fptr != NULL) {
+ /* Grab pointer to the value (fptr points to the field) */
+ vptr = lpNext(zl, fptr);
+ serverAssert(vptr != NULL);
+ }
+ }
+
+ if (vptr != NULL) {
+ *vstr = lpGetValue(vptr, vlen, vll);
+ return 0;
+ }
+
+ return -1;
+}
+
+/* Get the value from a hash table encoded hash, identified by field.
+ * Returns NULL when the field cannot be found, otherwise the SDS value
+ * is returned. */
+sds hashTypeGetFromHashTable(robj *o, sds field) {
+ dictEntry *de;
+
+ serverAssert(o->encoding == OBJ_ENCODING_HT);
+
+ de = dictFind(o->ptr, field);
+ if (de == NULL) return NULL;
+ return dictGetVal(de);
+}
+
+/* Higher level function of hashTypeGet*() that returns the hash value
+ * associated with the specified field. If the field is found C_OK
+ * is returned, otherwise C_ERR. The returned object is returned by
+ * reference in either *vstr and *vlen if it's returned in string form,
+ * or stored in *vll if it's returned as a number.
+ *
+ * If *vll is populated *vstr is set to NULL, so the caller
+ * can always check the function return by checking the return value
+ * for C_OK and checking if vll (or vstr) is NULL. */
+int hashTypeGetValue(robj *o, sds field, unsigned char **vstr, unsigned int *vlen, long long *vll) {
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ *vstr = NULL;
+ if (hashTypeGetFromListpack(o, field, vstr, vlen, vll) == 0)
+ return C_OK;
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ sds value;
+ if ((value = hashTypeGetFromHashTable(o, field)) != NULL) {
+ *vstr = (unsigned char*) value;
+ *vlen = sdslen(value);
+ return C_OK;
+ }
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ return C_ERR;
+}
+
+/* Like hashTypeGetValue() but returns a Redis object, which is useful for
+ * interaction with the hash type outside t_hash.c.
+ * The function returns NULL if the field is not found in the hash. Otherwise
+ * a newly allocated string object with the value is returned. */
+robj *hashTypeGetValueObject(robj *o, sds field) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vll;
+
+ if (hashTypeGetValue(o,field,&vstr,&vlen,&vll) == C_ERR) return NULL;
+ if (vstr) return createStringObject((char*)vstr,vlen);
+ else return createStringObjectFromLongLong(vll);
+}
+
+/* Higher level function using hashTypeGet*() to return the length of the
+ * object associated with the requested field, or 0 if the field does not
+ * exist. */
+size_t hashTypeGetValueLength(robj *o, sds field) {
+ size_t len = 0;
+ unsigned char *vstr = NULL;
+ unsigned int vlen = UINT_MAX;
+ long long vll = LLONG_MAX;
+
+ if (hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK)
+ len = vstr ? vlen : sdigits10(vll);
+
+ return len;
+}
+
+/* Test if the specified field exists in the given hash. Returns 1 if the field
+ * exists, and 0 when it doesn't. */
+int hashTypeExists(robj *o, sds field) {
+ unsigned char *vstr = NULL;
+ unsigned int vlen = UINT_MAX;
+ long long vll = LLONG_MAX;
+
+ return hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK;
+}
+
+/* Add a new field, overwrite the old with the new value if it already exists.
+ * Return 0 on insert and 1 on update.
+ *
+ * By default, the key and value SDS strings are copied if needed, so the
+ * caller retains ownership of the strings passed. However this behavior
+ * can be effected by passing appropriate flags (possibly bitwise OR-ed):
+ *
+ * HASH_SET_TAKE_FIELD -- The SDS field ownership passes to the function.
+ * HASH_SET_TAKE_VALUE -- The SDS value ownership passes to the function.
+ *
+ * When the flags are used the caller does not need to release the passed
+ * SDS string(s). It's up to the function to use the string to create a new
+ * entry or to free the SDS string before returning to the caller.
+ *
+ * HASH_SET_COPY corresponds to no flags passed, and means the default
+ * semantics of copying the values if needed.
+ *
+ */
+#define HASH_SET_TAKE_FIELD (1<<0)
+#define HASH_SET_TAKE_VALUE (1<<1)
+#define HASH_SET_COPY 0
+int hashTypeSet(robj *o, sds field, sds value, int flags) {
+ int update = 0;
+
+ /* Check if the field is too long for listpack, and convert before adding the item.
+ * This is needed for HINCRBY* case since in other commands this is handled early by
+ * hashTypeTryConversion, so this check will be a NOP. */
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ if (sdslen(field) > server.hash_max_listpack_value || sdslen(value) > server.hash_max_listpack_value)
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ }
+
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl, *fptr, *vptr;
+
+ zl = o->ptr;
+ fptr = lpFirst(zl);
+ if (fptr != NULL) {
+ fptr = lpFind(zl, fptr, (unsigned char*)field, sdslen(field), 1);
+ if (fptr != NULL) {
+ /* Grab pointer to the value (fptr points to the field) */
+ vptr = lpNext(zl, fptr);
+ serverAssert(vptr != NULL);
+ update = 1;
+
+ /* Replace value */
+ zl = lpReplace(zl, &vptr, (unsigned char*)value, sdslen(value));
+ }
+ }
+
+ if (!update) {
+ /* Push new field/value pair onto the tail of the listpack */
+ zl = lpAppend(zl, (unsigned char*)field, sdslen(field));
+ zl = lpAppend(zl, (unsigned char*)value, sdslen(value));
+ }
+ o->ptr = zl;
+
+ /* Check if the listpack needs to be converted to a hash table */
+ if (hashTypeLength(o) > server.hash_max_listpack_entries)
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ dict *ht = o->ptr;
+ dictEntry *de, *existing;
+ sds v;
+ if (flags & HASH_SET_TAKE_VALUE) {
+ v = value;
+ value = NULL;
+ } else {
+ v = sdsdup(value);
+ }
+ de = dictAddRaw(ht, field, &existing);
+ if (de) {
+ dictSetVal(ht, de, v);
+ if (flags & HASH_SET_TAKE_FIELD) {
+ field = NULL;
+ } else {
+ dictSetKey(ht, de, sdsdup(field));
+ }
+ } else {
+ sdsfree(dictGetVal(existing));
+ dictSetVal(ht, existing, v);
+ update = 1;
+ }
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+
+ /* Free SDS strings we did not referenced elsewhere if the flags
+ * want this function to be responsible. */
+ if (flags & HASH_SET_TAKE_FIELD && field) sdsfree(field);
+ if (flags & HASH_SET_TAKE_VALUE && value) sdsfree(value);
+ return update;
+}
+
+/* Delete an element from a hash.
+ * Return 1 on deleted and 0 on not found. */
+int hashTypeDelete(robj *o, sds field) {
+ int deleted = 0;
+
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl, *fptr;
+
+ zl = o->ptr;
+ fptr = lpFirst(zl);
+ if (fptr != NULL) {
+ fptr = lpFind(zl, fptr, (unsigned char*)field, sdslen(field), 1);
+ if (fptr != NULL) {
+ /* Delete both of the key and the value. */
+ zl = lpDeleteRangeWithEntry(zl,&fptr,2);
+ o->ptr = zl;
+ deleted = 1;
+ }
+ }
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ if (dictDelete((dict*)o->ptr, field) == C_OK) {
+ deleted = 1;
+
+ /* Always check if the dictionary needs a resize after a delete. */
+ if (htNeedsResize(o->ptr)) dictResize(o->ptr);
+ }
+
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ return deleted;
+}
+
+/* Return the number of elements in a hash. */
+unsigned long hashTypeLength(const robj *o) {
+ unsigned long length = ULONG_MAX;
+
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ length = lpLength(o->ptr) / 2;
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ length = dictSize((const dict*)o->ptr);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ return length;
+}
+
+hashTypeIterator *hashTypeInitIterator(robj *subject) {
+ hashTypeIterator *hi = zmalloc(sizeof(hashTypeIterator));
+ hi->subject = subject;
+ hi->encoding = subject->encoding;
+
+ if (hi->encoding == OBJ_ENCODING_LISTPACK) {
+ hi->fptr = NULL;
+ hi->vptr = NULL;
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ hi->di = dictGetIterator(subject->ptr);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ return hi;
+}
+
+void hashTypeReleaseIterator(hashTypeIterator *hi) {
+ if (hi->encoding == OBJ_ENCODING_HT)
+ dictReleaseIterator(hi->di);
+ zfree(hi);
+}
+
+/* Move to the next entry in the hash. Return C_OK when the next entry
+ * could be found and C_ERR when the iterator reaches the end. */
+int hashTypeNext(hashTypeIterator *hi) {
+ if (hi->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl;
+ unsigned char *fptr, *vptr;
+
+ zl = hi->subject->ptr;
+ fptr = hi->fptr;
+ vptr = hi->vptr;
+
+ if (fptr == NULL) {
+ /* Initialize cursor */
+ serverAssert(vptr == NULL);
+ fptr = lpFirst(zl);
+ } else {
+ /* Advance cursor */
+ serverAssert(vptr != NULL);
+ fptr = lpNext(zl, vptr);
+ }
+ if (fptr == NULL) return C_ERR;
+
+ /* Grab pointer to the value (fptr points to the field) */
+ vptr = lpNext(zl, fptr);
+ serverAssert(vptr != NULL);
+
+ /* fptr, vptr now point to the first or next pair */
+ hi->fptr = fptr;
+ hi->vptr = vptr;
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ if ((hi->de = dictNext(hi->di)) == NULL) return C_ERR;
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ return C_OK;
+}
+
+/* Get the field or value at iterator cursor, for an iterator on a hash value
+ * encoded as a listpack. Prototype is similar to `hashTypeGetFromListpack`. */
+void hashTypeCurrentFromListpack(hashTypeIterator *hi, int what,
+ unsigned char **vstr,
+ unsigned int *vlen,
+ long long *vll)
+{
+ serverAssert(hi->encoding == OBJ_ENCODING_LISTPACK);
+
+ if (what & OBJ_HASH_KEY) {
+ *vstr = lpGetValue(hi->fptr, vlen, vll);
+ } else {
+ *vstr = lpGetValue(hi->vptr, vlen, vll);
+ }
+}
+
+/* Get the field or value at iterator cursor, for an iterator on a hash value
+ * encoded as a hash table. Prototype is similar to
+ * `hashTypeGetFromHashTable`. */
+sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what) {
+ serverAssert(hi->encoding == OBJ_ENCODING_HT);
+
+ if (what & OBJ_HASH_KEY) {
+ return dictGetKey(hi->de);
+ } else {
+ return dictGetVal(hi->de);
+ }
+}
+
+/* Higher level function of hashTypeCurrent*() that returns the hash value
+ * at current iterator position.
+ *
+ * The returned element is returned by reference in either *vstr and *vlen if
+ * it's returned in string form, or stored in *vll if it's returned as
+ * a number.
+ *
+ * If *vll is populated *vstr is set to NULL, so the caller
+ * can always check the function return by checking the return value
+ * type checking if vstr == NULL. */
+void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll) {
+ if (hi->encoding == OBJ_ENCODING_LISTPACK) {
+ *vstr = NULL;
+ hashTypeCurrentFromListpack(hi, what, vstr, vlen, vll);
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ sds ele = hashTypeCurrentFromHashTable(hi, what);
+ *vstr = (unsigned char*) ele;
+ *vlen = sdslen(ele);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+}
+
+/* Return the key or value at the current iterator position as a new
+ * SDS string. */
+sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vll;
+
+ hashTypeCurrentObject(hi,what,&vstr,&vlen,&vll);
+ if (vstr) return sdsnewlen(vstr,vlen);
+ return sdsfromlonglong(vll);
+}
+
+robj *hashTypeLookupWriteOrCreate(client *c, robj *key) {
+ robj *o = lookupKeyWrite(c->db,key);
+ if (checkType(c,o,OBJ_HASH)) return NULL;
+
+ if (o == NULL) {
+ o = createHashObject();
+ dbAdd(c->db,key,o);
+ }
+ return o;
+}
+
+
+void hashTypeConvertListpack(robj *o, int enc) {
+ serverAssert(o->encoding == OBJ_ENCODING_LISTPACK);
+
+ if (enc == OBJ_ENCODING_LISTPACK) {
+ /* Nothing to do... */
+
+ } else if (enc == OBJ_ENCODING_HT) {
+ hashTypeIterator *hi;
+ dict *dict;
+ int ret;
+
+ hi = hashTypeInitIterator(o);
+ dict = dictCreate(&hashDictType);
+
+ /* Presize the dict to avoid rehashing */
+ dictExpand(dict,hashTypeLength(o));
+
+ while (hashTypeNext(hi) != C_ERR) {
+ sds key, value;
+
+ key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
+ value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+ ret = dictAdd(dict, key, value);
+ if (ret != DICT_OK) {
+ sdsfree(key); sdsfree(value); /* Needed for gcc ASAN */
+ hashTypeReleaseIterator(hi); /* Needed for gcc ASAN */
+ serverLogHexDump(LL_WARNING,"listpack with dup elements dump",
+ o->ptr,lpBytes(o->ptr));
+ serverPanic("Listpack corruption detected");
+ }
+ }
+ hashTypeReleaseIterator(hi);
+ zfree(o->ptr);
+ o->encoding = OBJ_ENCODING_HT;
+ o->ptr = dict;
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+}
+
+void hashTypeConvert(robj *o, int enc) {
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ hashTypeConvertListpack(o, enc);
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ serverPanic("Not implemented");
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+}
+
+/* This is a helper function for the COPY command.
+ * Duplicate a hash object, with the guarantee that the returned object
+ * has the same encoding as the original one.
+ *
+ * The resulting object always has refcount set to 1 */
+robj *hashTypeDup(robj *o) {
+ robj *hobj;
+ hashTypeIterator *hi;
+
+ serverAssert(o->type == OBJ_HASH);
+
+ if(o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = o->ptr;
+ size_t sz = lpBytes(zl);
+ unsigned char *new_zl = zmalloc(sz);
+ memcpy(new_zl, zl, sz);
+ hobj = createObject(OBJ_HASH, new_zl);
+ hobj->encoding = OBJ_ENCODING_LISTPACK;
+ } else if(o->encoding == OBJ_ENCODING_HT){
+ dict *d = dictCreate(&hashDictType);
+ dictExpand(d, dictSize((const dict*)o->ptr));
+
+ hi = hashTypeInitIterator(o);
+ while (hashTypeNext(hi) != C_ERR) {
+ sds field, value;
+ sds newfield, newvalue;
+ /* Extract a field-value pair from an original hash object.*/
+ field = hashTypeCurrentFromHashTable(hi, OBJ_HASH_KEY);
+ value = hashTypeCurrentFromHashTable(hi, OBJ_HASH_VALUE);
+ newfield = sdsdup(field);
+ newvalue = sdsdup(value);
+
+ /* Add a field-value pair to a new hash object. */
+ dictAdd(d,newfield,newvalue);
+ }
+ hashTypeReleaseIterator(hi);
+
+ hobj = createObject(OBJ_HASH, d);
+ hobj->encoding = OBJ_ENCODING_HT;
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ return hobj;
+}
+
+/* Create a new sds string from the listpack entry. */
+sds hashSdsFromListpackEntry(listpackEntry *e) {
+ return e->sval ? sdsnewlen(e->sval, e->slen) : sdsfromlonglong(e->lval);
+}
+
+/* Reply with bulk string from the listpack entry. */
+void hashReplyFromListpackEntry(client *c, listpackEntry *e) {
+ if (e->sval)
+ addReplyBulkCBuffer(c, e->sval, e->slen);
+ else
+ addReplyBulkLongLong(c, e->lval);
+}
+
+/* Return random element from a non empty hash.
+ * 'key' and 'val' will be set to hold the element.
+ * The memory in them is not to be freed or modified by the caller.
+ * 'val' can be NULL in which case it's not extracted. */
+void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpackEntry *key, listpackEntry *val) {
+ if (hashobj->encoding == OBJ_ENCODING_HT) {
+ dictEntry *de = dictGetFairRandomKey(hashobj->ptr);
+ sds s = dictGetKey(de);
+ key->sval = (unsigned char*)s;
+ key->slen = sdslen(s);
+ if (val) {
+ sds s = dictGetVal(de);
+ val->sval = (unsigned char*)s;
+ val->slen = sdslen(s);
+ }
+ } else if (hashobj->encoding == OBJ_ENCODING_LISTPACK) {
+ lpRandomPair(hashobj->ptr, hashsize, key, val);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+}
+
+
+/*-----------------------------------------------------------------------------
+ * Hash type commands
+ *----------------------------------------------------------------------------*/
+
+void hsetnxCommand(client *c) {
+ robj *o;
+ if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
+
+ if (hashTypeExists(o, c->argv[2]->ptr)) {
+ addReply(c, shared.czero);
+ } else {
+ hashTypeTryConversion(o,c->argv,2,3);
+ hashTypeSet(o,c->argv[2]->ptr,c->argv[3]->ptr,HASH_SET_COPY);
+ addReply(c, shared.cone);
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
+ server.dirty++;
+ }
+}
+
+void hsetCommand(client *c) {
+ int i, created = 0;
+ robj *o;
+
+ if ((c->argc % 2) == 1) {
+ addReplyErrorArity(c);
+ return;
+ }
+
+ if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
+ hashTypeTryConversion(o,c->argv,2,c->argc-1);
+
+ for (i = 2; i < c->argc; i += 2)
+ created += !hashTypeSet(o,c->argv[i]->ptr,c->argv[i+1]->ptr,HASH_SET_COPY);
+
+ /* HMSET (deprecated) and HSET return value is different. */
+ char *cmdname = c->argv[0]->ptr;
+ if (cmdname[1] == 's' || cmdname[1] == 'S') {
+ /* HSET */
+ addReplyLongLong(c, created);
+ } else {
+ /* HMSET */
+ addReply(c, shared.ok);
+ }
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
+ server.dirty += (c->argc - 2)/2;
+}
+
+void hincrbyCommand(client *c) {
+ long long value, incr, oldvalue;
+ robj *o;
+ sds new;
+ unsigned char *vstr;
+ unsigned int vlen;
+
+ if (getLongLongFromObjectOrReply(c,c->argv[3],&incr,NULL) != C_OK) return;
+ if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
+ if (hashTypeGetValue(o,c->argv[2]->ptr,&vstr,&vlen,&value) == C_OK) {
+ if (vstr) {
+ if (string2ll((char*)vstr,vlen,&value) == 0) {
+ addReplyError(c,"hash value is not an integer");
+ return;
+ }
+ } /* Else hashTypeGetValue() already stored it into &value */
+ } else {
+ value = 0;
+ }
+
+ oldvalue = value;
+ if ((incr < 0 && oldvalue < 0 && incr < (LLONG_MIN-oldvalue)) ||
+ (incr > 0 && oldvalue > 0 && incr > (LLONG_MAX-oldvalue))) {
+ addReplyError(c,"increment or decrement would overflow");
+ return;
+ }
+ value += incr;
+ new = sdsfromlonglong(value);
+ hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE);
+ addReplyLongLong(c,value);
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hincrby",c->argv[1],c->db->id);
+ server.dirty++;
+}
+
+void hincrbyfloatCommand(client *c) {
+ long double value, incr;
+ long long ll;
+ robj *o;
+ sds new;
+ unsigned char *vstr;
+ unsigned int vlen;
+
+ if (getLongDoubleFromObjectOrReply(c,c->argv[3],&incr,NULL) != C_OK) return;
+ if (isnan(incr) || isinf(incr)) {
+ addReplyError(c,"value is NaN or Infinity");
+ return;
+ }
+ if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
+ if (hashTypeGetValue(o,c->argv[2]->ptr,&vstr,&vlen,&ll) == C_OK) {
+ if (vstr) {
+ if (string2ld((char*)vstr,vlen,&value) == 0) {
+ addReplyError(c,"hash value is not a float");
+ return;
+ }
+ } else {
+ value = (long double)ll;
+ }
+ } else {
+ value = 0;
+ }
+
+ value += incr;
+ if (isnan(value) || isinf(value)) {
+ addReplyError(c,"increment would produce NaN or Infinity");
+ return;
+ }
+
+ char buf[MAX_LONG_DOUBLE_CHARS];
+ int len = ld2string(buf,sizeof(buf),value,LD_STR_HUMAN);
+ new = sdsnewlen(buf,len);
+ hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE);
+ addReplyBulkCBuffer(c,buf,len);
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id);
+ server.dirty++;
+
+ /* Always replicate HINCRBYFLOAT as an HSET command with the final value
+ * in order to make sure that differences in float precision or formatting
+ * will not create differences in replicas or after an AOF restart. */
+ robj *newobj;
+ newobj = createRawStringObject(buf,len);
+ rewriteClientCommandArgument(c,0,shared.hset);
+ rewriteClientCommandArgument(c,3,newobj);
+ decrRefCount(newobj);
+}
+
+static void addHashFieldToReply(client *c, robj *o, sds field) {
+ if (o == NULL) {
+ addReplyNull(c);
+ return;
+ }
+
+ unsigned char *vstr = NULL;
+ unsigned int vlen = UINT_MAX;
+ long long vll = LLONG_MAX;
+
+ if (hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK) {
+ if (vstr) {
+ addReplyBulkCBuffer(c, vstr, vlen);
+ } else {
+ addReplyBulkLongLong(c, vll);
+ }
+ } else {
+ addReplyNull(c);
+ }
+}
+
+void hgetCommand(client *c) {
+ robj *o;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL ||
+ checkType(c,o,OBJ_HASH)) return;
+
+ addHashFieldToReply(c, o, c->argv[2]->ptr);
+}
+
+void hmgetCommand(client *c) {
+ robj *o;
+ int i;
+
+ /* Don't abort when the key cannot be found. Non-existing keys are empty
+ * hashes, where HMGET should respond with a series of null bulks. */
+ o = lookupKeyRead(c->db, c->argv[1]);
+ if (checkType(c,o,OBJ_HASH)) return;
+
+ addReplyArrayLen(c, c->argc-2);
+ for (i = 2; i < c->argc; i++) {
+ addHashFieldToReply(c, o, c->argv[i]->ptr);
+ }
+}
+
+void hdelCommand(client *c) {
+ robj *o;
+ int j, deleted = 0, keyremoved = 0;
+
+ if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,OBJ_HASH)) return;
+
+ for (j = 2; j < c->argc; j++) {
+ if (hashTypeDelete(o,c->argv[j]->ptr)) {
+ deleted++;
+ if (hashTypeLength(o) == 0) {
+ dbDelete(c->db,c->argv[1]);
+ keyremoved = 1;
+ break;
+ }
+ }
+ }
+ if (deleted) {
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hdel",c->argv[1],c->db->id);
+ if (keyremoved)
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],
+ c->db->id);
+ server.dirty += deleted;
+ }
+ addReplyLongLong(c,deleted);
+}
+
+void hlenCommand(client *c) {
+ robj *o;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,OBJ_HASH)) return;
+
+ addReplyLongLong(c,hashTypeLength(o));
+}
+
+void hstrlenCommand(client *c) {
+ robj *o;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,OBJ_HASH)) return;
+ addReplyLongLong(c,hashTypeGetValueLength(o,c->argv[2]->ptr));
+}
+
+static void addHashIteratorCursorToReply(client *c, hashTypeIterator *hi, int what) {
+ if (hi->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *vstr = NULL;
+ unsigned int vlen = UINT_MAX;
+ long long vll = LLONG_MAX;
+
+ hashTypeCurrentFromListpack(hi, what, &vstr, &vlen, &vll);
+ if (vstr)
+ addReplyBulkCBuffer(c, vstr, vlen);
+ else
+ addReplyBulkLongLong(c, vll);
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ sds value = hashTypeCurrentFromHashTable(hi, what);
+ addReplyBulkCBuffer(c, value, sdslen(value));
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+}
+
+void genericHgetallCommand(client *c, int flags) {
+ robj *o;
+ hashTypeIterator *hi;
+ int length, count = 0;
+
+ robj *emptyResp = (flags & OBJ_HASH_KEY && flags & OBJ_HASH_VALUE) ?
+ shared.emptymap[c->resp] : shared.emptyarray;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],emptyResp))
+ == NULL || checkType(c,o,OBJ_HASH)) return;
+
+ /* We return a map if the user requested keys and values, like in the
+ * HGETALL case. Otherwise to use a flat array makes more sense. */
+ length = hashTypeLength(o);
+ if (flags & OBJ_HASH_KEY && flags & OBJ_HASH_VALUE) {
+ addReplyMapLen(c, length);
+ } else {
+ addReplyArrayLen(c, length);
+ }
+
+ hi = hashTypeInitIterator(o);
+ while (hashTypeNext(hi) != C_ERR) {
+ if (flags & OBJ_HASH_KEY) {
+ addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY);
+ count++;
+ }
+ if (flags & OBJ_HASH_VALUE) {
+ addHashIteratorCursorToReply(c, hi, OBJ_HASH_VALUE);
+ count++;
+ }
+ }
+
+ hashTypeReleaseIterator(hi);
+
+ /* Make sure we returned the right number of elements. */
+ if (flags & OBJ_HASH_KEY && flags & OBJ_HASH_VALUE) count /= 2;
+ serverAssert(count == length);
+}
+
+void hkeysCommand(client *c) {
+ genericHgetallCommand(c,OBJ_HASH_KEY);
+}
+
+void hvalsCommand(client *c) {
+ genericHgetallCommand(c,OBJ_HASH_VALUE);
+}
+
+void hgetallCommand(client *c) {
+ genericHgetallCommand(c,OBJ_HASH_KEY|OBJ_HASH_VALUE);
+}
+
+void hexistsCommand(client *c) {
+ robj *o;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,OBJ_HASH)) return;
+
+ addReply(c, hashTypeExists(o,c->argv[2]->ptr) ? shared.cone : shared.czero);
+}
+
+void hscanCommand(client *c) {
+ robj *o;
+ unsigned long cursor;
+
+ if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
+ checkType(c,o,OBJ_HASH)) return;
+ scanGenericCommand(c,o,cursor);
+}
+
+static void hrandfieldReplyWithListpack(client *c, unsigned int count, listpackEntry *keys, listpackEntry *vals) {
+ for (unsigned long i = 0; i < count; i++) {
+ if (vals && c->resp > 2)
+ addReplyArrayLen(c,2);
+ if (keys[i].sval)
+ addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
+ else
+ addReplyBulkLongLong(c, keys[i].lval);
+ if (vals) {
+ if (vals[i].sval)
+ addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
+ else
+ addReplyBulkLongLong(c, vals[i].lval);
+ }
+ }
+}
+
+/* How many times bigger should be the hash compared to the requested size
+ * for us to not use the "remove elements" strategy? Read later in the
+ * implementation for more info. */
+#define HRANDFIELD_SUB_STRATEGY_MUL 3
+
+/* If client is trying to ask for a very large number of random elements,
+ * queuing may consume an unlimited amount of memory, so we want to limit
+ * the number of randoms per time. */
+#define HRANDFIELD_RANDOM_SAMPLE_LIMIT 1000
+
+void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
+ unsigned long count, size;
+ int uniq = 1;
+ robj *hash;
+
+ if ((hash = lookupKeyReadOrReply(c,c->argv[1],shared.emptyarray))
+ == NULL || checkType(c,hash,OBJ_HASH)) return;
+ size = hashTypeLength(hash);
+
+ if(l >= 0) {
+ count = (unsigned long) l;
+ } else {
+ count = -l;
+ uniq = 0;
+ }
+
+ /* If count is zero, serve it ASAP to avoid special cases later. */
+ if (count == 0) {
+ addReply(c,shared.emptyarray);
+ return;
+ }
+
+ /* CASE 1: The count was negative, so the extraction method is just:
+ * "return N random elements" sampling the whole set every time.
+ * This case is trivial and can be served without auxiliary data
+ * structures. This case is the only one that also needs to return the
+ * elements in random order. */
+ if (!uniq || count == 1) {
+ if (withvalues && c->resp == 2)
+ addReplyArrayLen(c, count*2);
+ else
+ addReplyArrayLen(c, count);
+ if (hash->encoding == OBJ_ENCODING_HT) {
+ sds key, value;
+ while (count--) {
+ dictEntry *de = dictGetFairRandomKey(hash->ptr);
+ key = dictGetKey(de);
+ value = dictGetVal(de);
+ if (withvalues && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addReplyBulkCBuffer(c, key, sdslen(key));
+ if (withvalues)
+ addReplyBulkCBuffer(c, value, sdslen(value));
+ if (c->flags & CLIENT_CLOSE_ASAP)
+ break;
+ }
+ } else if (hash->encoding == OBJ_ENCODING_LISTPACK) {
+ listpackEntry *keys, *vals = NULL;
+ unsigned long limit, sample_count;
+
+ limit = count > HRANDFIELD_RANDOM_SAMPLE_LIMIT ? HRANDFIELD_RANDOM_SAMPLE_LIMIT : count;
+ keys = zmalloc(sizeof(listpackEntry)*limit);
+ if (withvalues)
+ vals = zmalloc(sizeof(listpackEntry)*limit);
+ while (count) {
+ sample_count = count > limit ? limit : count;
+ count -= sample_count;
+ lpRandomPairs(hash->ptr, sample_count, keys, vals);
+ hrandfieldReplyWithListpack(c, sample_count, keys, vals);
+ if (c->flags & CLIENT_CLOSE_ASAP)
+ break;
+ }
+ zfree(keys);
+ zfree(vals);
+ }
+ return;
+ }
+
+ /* Initiate reply count, RESP3 responds with nested array, RESP2 with flat one. */
+ long reply_size = count < size ? count : size;
+ if (withvalues && c->resp == 2)
+ addReplyArrayLen(c, reply_size*2);
+ else
+ addReplyArrayLen(c, reply_size);
+
+ /* CASE 2:
+ * The number of requested elements is greater than the number of
+ * elements inside the hash: simply return the whole hash. */
+ if(count >= size) {
+ hashTypeIterator *hi = hashTypeInitIterator(hash);
+ while (hashTypeNext(hi) != C_ERR) {
+ if (withvalues && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY);
+ if (withvalues)
+ addHashIteratorCursorToReply(c, hi, OBJ_HASH_VALUE);
+ }
+ hashTypeReleaseIterator(hi);
+ return;
+ }
+
+ /* CASE 2.5 listpack only. Sampling unique elements, in non-random order.
+ * Listpack encoded hashes are meant to be relatively small, so
+ * HRANDFIELD_SUB_STRATEGY_MUL isn't necessary and we rather not make
+ * copies of the entries. Instead, we emit them directly to the output
+ * buffer.
+ *
+ * And it is inefficient to repeatedly pick one random element from a
+ * listpack in CASE 4. So we use this instead. */
+ if (hash->encoding == OBJ_ENCODING_LISTPACK) {
+ listpackEntry *keys, *vals = NULL;
+ keys = zmalloc(sizeof(listpackEntry)*count);
+ if (withvalues)
+ vals = zmalloc(sizeof(listpackEntry)*count);
+ serverAssert(lpRandomPairsUnique(hash->ptr, count, keys, vals) == count);
+ hrandfieldReplyWithListpack(c, count, keys, vals);
+ zfree(keys);
+ zfree(vals);
+ return;
+ }
+
+ /* CASE 3:
+ * The number of elements inside the hash is not greater than
+ * HRANDFIELD_SUB_STRATEGY_MUL times the number of requested elements.
+ * In this case we create a hash from scratch with all the elements, and
+ * subtract random elements to reach the requested number of elements.
+ *
+ * This is done because if the number of requested elements is just
+ * a bit less than the number of elements in the hash, the natural approach
+ * used into CASE 4 is highly inefficient. */
+ if (count*HRANDFIELD_SUB_STRATEGY_MUL > size) {
+ /* Hashtable encoding (generic implementation) */
+ dict *d = dictCreate(&sdsReplyDictType);
+ dictExpand(d, size);
+ hashTypeIterator *hi = hashTypeInitIterator(hash);
+
+ /* Add all the elements into the temporary dictionary. */
+ while ((hashTypeNext(hi)) != C_ERR) {
+ int ret = DICT_ERR;
+ sds key, value = NULL;
+
+ key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
+ if (withvalues)
+ value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+ ret = dictAdd(d, key, value);
+
+ serverAssert(ret == DICT_OK);
+ }
+ serverAssert(dictSize(d) == size);
+ hashTypeReleaseIterator(hi);
+
+ /* Remove random elements to reach the right count. */
+ while (size > count) {
+ dictEntry *de;
+ de = dictGetFairRandomKey(d);
+ dictUnlink(d,dictGetKey(de));
+ sdsfree(dictGetKey(de));
+ sdsfree(dictGetVal(de));
+ dictFreeUnlinkedEntry(d,de);
+ size--;
+ }
+
+ /* Reply with what's in the dict and release memory */
+ dictIterator *di;
+ dictEntry *de;
+ di = dictGetIterator(d);
+ while ((de = dictNext(di)) != NULL) {
+ sds key = dictGetKey(de);
+ sds value = dictGetVal(de);
+ if (withvalues && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addReplyBulkSds(c, key);
+ if (withvalues)
+ addReplyBulkSds(c, value);
+ }
+
+ dictReleaseIterator(di);
+ dictRelease(d);
+ }
+
+ /* CASE 4: We have a big hash compared to the requested number of elements.
+ * In this case we can simply get random elements from the hash and add
+ * to the temporary hash, trying to eventually get enough unique elements
+ * to reach the specified count. */
+ else {
+ /* Hashtable encoding (generic implementation) */
+ unsigned long added = 0;
+ listpackEntry key, value;
+ dict *d = dictCreate(&hashDictType);
+ dictExpand(d, count);
+ while(added < count) {
+ hashTypeRandomElement(hash, size, &key, withvalues? &value : NULL);
+
+ /* Try to add the object to the dictionary. If it already exists
+ * free it, otherwise increment the number of objects we have
+ * in the result dictionary. */
+ sds skey = hashSdsFromListpackEntry(&key);
+ if (dictAdd(d,skey,NULL) != DICT_OK) {
+ sdsfree(skey);
+ continue;
+ }
+ added++;
+
+ /* We can reply right away, so that we don't need to store the value in the dict. */
+ if (withvalues && c->resp > 2)
+ addReplyArrayLen(c,2);
+ hashReplyFromListpackEntry(c, &key);
+ if (withvalues)
+ hashReplyFromListpackEntry(c, &value);
+ }
+
+ /* Release memory */
+ dictRelease(d);
+ }
+}
+
+/* HRANDFIELD key [<count> [WITHVALUES]] */
+void hrandfieldCommand(client *c) {
+ long l;
+ int withvalues = 0;
+ robj *hash;
+ listpackEntry ele;
+
+ if (c->argc >= 3) {
+ if (getRangeLongFromObjectOrReply(c,c->argv[2],-LONG_MAX,LONG_MAX,&l,NULL) != C_OK) return;
+ if (c->argc > 4 || (c->argc == 4 && strcasecmp(c->argv[3]->ptr,"withvalues"))) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ } else if (c->argc == 4) {
+ withvalues = 1;
+ if (l < -LONG_MAX/2 || l > LONG_MAX/2) {
+ addReplyError(c,"value is out of range");
+ return;
+ }
+ }
+ hrandfieldWithCountCommand(c, l, withvalues);
+ return;
+ }
+
+ /* Handle variant without <count> argument. Reply with simple bulk string */
+ if ((hash = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))== NULL ||
+ checkType(c,hash,OBJ_HASH)) {
+ return;
+ }
+
+ hashTypeRandomElement(hash,hashTypeLength(hash),&ele,NULL);
+ hashReplyFromListpackEntry(c, &ele);
+}
diff --git a/src/t_list.c b/src/t_list.c
new file mode 100644
index 0000000..dc16606
--- /dev/null
+++ b/src/t_list.c
@@ -0,0 +1,1388 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+/*-----------------------------------------------------------------------------
+ * List API
+ *----------------------------------------------------------------------------*/
+
+/* Check the length and size of a number of objects that will be added to list to see
+ * if we need to convert a listpack to a quicklist. Note that we only check string
+ * encoded objects as their string length can be queried in constant time.
+ *
+ * If callback is given the function is called in order for caller to do some work
+ * before the list conversion. */
+static void listTypeTryConvertListpack(robj *o, robj **argv, int start, int end,
+ beforeConvertCB fn, void *data)
+{
+ serverAssert(o->encoding == OBJ_ENCODING_LISTPACK);
+
+ size_t add_bytes = 0;
+ size_t add_length = 0;
+
+ if (argv) {
+ for (int i = start; i <= end; i++) {
+ if (!sdsEncodedObject(argv[i]))
+ continue;
+ add_bytes += sdslen(argv[i]->ptr);
+ }
+ add_length = end - start + 1;
+ }
+
+ if (quicklistNodeExceedsLimit(server.list_max_listpack_size,
+ lpBytes(o->ptr) + add_bytes, lpLength(o->ptr) + add_length))
+ {
+ /* Invoke callback before conversion. */
+ if (fn) fn(data);
+
+ quicklist *ql = quicklistCreate();
+ quicklistSetOptions(ql, server.list_max_listpack_size, server.list_compress_depth);
+
+ /* Append listpack to quicklist if it's not empty, otherwise release it. */
+ if (lpLength(o->ptr))
+ quicklistAppendListpack(ql, o->ptr);
+ else
+ lpFree(o->ptr);
+ o->ptr = ql;
+ o->encoding = OBJ_ENCODING_QUICKLIST;
+ }
+}
+
+/* Check the length and size of a quicklist to see if we need to convert it to listpack.
+ *
+ * 'shrinking' is 1 means that the conversion is due to a list shrinking, to avoid
+ * frequent conversions of quicklist and listpack due to frequent insertion and
+ * deletion, we don't convert quicklist to listpack until its length or size is
+ * below half of the limit.
+ *
+ * If callback is given the function is called in order for caller to do some work
+ * before the list conversion. */
+static void listTypeTryConvertQuicklist(robj *o, int shrinking, beforeConvertCB fn, void *data) {
+ serverAssert(o->encoding == OBJ_ENCODING_QUICKLIST);
+
+ size_t sz_limit;
+ unsigned int count_limit;
+ quicklist *ql = o->ptr;
+
+ /* A quicklist can be converted to listpack only if it has only one packed node. */
+ if (ql->len != 1 || ql->head->container != QUICKLIST_NODE_CONTAINER_PACKED)
+ return;
+
+ /* Check the length or size of the quicklist is below the limit. */
+ quicklistNodeLimit(server.list_max_listpack_size, &sz_limit, &count_limit);
+ if (shrinking) {
+ sz_limit /= 2;
+ count_limit /= 2;
+ }
+ if (ql->head->sz > sz_limit || ql->count > count_limit) return;
+
+ /* Invoke callback before conversion. */
+ if (fn) fn(data);
+
+ /* Extract the listpack from the unique quicklist node,
+ * then reset it and release the quicklist. */
+ o->ptr = ql->head->entry;
+ ql->head->entry = NULL;
+ quicklistRelease(ql);
+ o->encoding = OBJ_ENCODING_LISTPACK;
+}
+
+/* Check if the list needs to be converted to appropriate encoding due to
+ * growing, shrinking or other cases.
+ *
+ * 'lct' can be one of the following values:
+ * LIST_CONV_AUTO - Used after we built a new list, and we want to let the
+ * function decide on the best encoding for that list.
+ * LIST_CONV_GROWING - Used before or right after adding elements to the list,
+ * in which case we are likely to only consider converting
+ * from listpack to quicklist.
+ * 'argv' is only used in this case to calculate the size
+ * of a number of objects that will be added to list.
+ * LIST_CONV_SHRINKING - Used after removing an element from the list, in which case we
+ * wanna consider converting from quicklist to listpack. When we
+ * know we're shrinking, we use a lower (more strict) threshold in
+ * order to avoid repeated conversions on every list change. */
+static void listTypeTryConversionRaw(robj *o, list_conv_type lct,
+ robj **argv, int start, int end,
+ beforeConvertCB fn, void *data)
+{
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ if (lct == LIST_CONV_GROWING) return; /* Growing has nothing to do with quicklist */
+ listTypeTryConvertQuicklist(o, lct == LIST_CONV_SHRINKING, fn, data);
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ if (lct == LIST_CONV_SHRINKING) return; /* Shrinking has nothing to do with listpack */
+ listTypeTryConvertListpack(o, argv, start, end, fn, data);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+}
+
+/* This is just a wrapper for listTypeTryConversionRaw() that is
+ * able to try conversion without passing 'argv'. */
+void listTypeTryConversion(robj *o, list_conv_type lct, beforeConvertCB fn, void *data) {
+ listTypeTryConversionRaw(o, lct, NULL, 0, 0, fn, data);
+}
+
+/* This is just a wrapper for listTypeTryConversionRaw() that is
+ * able to try conversion before adding elements to the list. */
+void listTypeTryConversionAppend(robj *o, robj **argv, int start, int end,
+ beforeConvertCB fn, void *data)
+{
+ listTypeTryConversionRaw(o, LIST_CONV_GROWING, argv, start, end, fn, data);
+}
+
+/* The function pushes an element to the specified list object 'subject',
+ * at head or tail position as specified by 'where'.
+ *
+ * There is no need for the caller to increment the refcount of 'value' as
+ * the function takes care of it if needed. */
+void listTypePush(robj *subject, robj *value, int where) {
+ if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
+ int pos = (where == LIST_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL;
+ if (value->encoding == OBJ_ENCODING_INT) {
+ char buf[32];
+ ll2string(buf, 32, (long)value->ptr);
+ quicklistPush(subject->ptr, buf, strlen(buf), pos);
+ } else {
+ quicklistPush(subject->ptr, value->ptr, sdslen(value->ptr), pos);
+ }
+ } else if (subject->encoding == OBJ_ENCODING_LISTPACK) {
+ if (value->encoding == OBJ_ENCODING_INT) {
+ subject->ptr = (where == LIST_HEAD) ?
+ lpPrependInteger(subject->ptr, (long)value->ptr) :
+ lpAppendInteger(subject->ptr, (long)value->ptr);
+ } else {
+ subject->ptr = (where == LIST_HEAD) ?
+ lpPrepend(subject->ptr, value->ptr, sdslen(value->ptr)) :
+ lpAppend(subject->ptr, value->ptr, sdslen(value->ptr));
+ }
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+}
+
+void *listPopSaver(unsigned char *data, size_t sz) {
+ return createStringObject((char*)data,sz);
+}
+
+robj *listTypePop(robj *subject, int where) {
+ robj *value = NULL;
+
+ if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
+ long long vlong;
+ int ql_where = where == LIST_HEAD ? QUICKLIST_HEAD : QUICKLIST_TAIL;
+ if (quicklistPopCustom(subject->ptr, ql_where, (unsigned char **)&value,
+ NULL, &vlong, listPopSaver)) {
+ if (!value)
+ value = createStringObjectFromLongLong(vlong);
+ }
+ } else if (subject->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *p;
+ unsigned char *vstr;
+ int64_t vlen;
+ unsigned char intbuf[LP_INTBUF_SIZE];
+
+ p = (where == LIST_HEAD) ? lpFirst(subject->ptr) : lpLast(subject->ptr);
+ if (p) {
+ vstr = lpGet(p, &vlen, intbuf);
+ value = createStringObject((char*)vstr, vlen);
+ subject->ptr = lpDelete(subject->ptr, p, NULL);
+ }
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ return value;
+}
+
+unsigned long listTypeLength(const robj *subject) {
+ if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
+ return quicklistCount(subject->ptr);
+ } else if (subject->encoding == OBJ_ENCODING_LISTPACK) {
+ return lpLength(subject->ptr);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+}
+
+/* Initialize an iterator at the specified index. */
+listTypeIterator *listTypeInitIterator(robj *subject, long index,
+ unsigned char direction) {
+ listTypeIterator *li = zmalloc(sizeof(listTypeIterator));
+ li->subject = subject;
+ li->encoding = subject->encoding;
+ li->direction = direction;
+ li->iter = NULL;
+ /* LIST_HEAD means start at TAIL and move *towards* head.
+ * LIST_TAIL means start at HEAD and move *towards* tail. */
+ if (li->encoding == OBJ_ENCODING_QUICKLIST) {
+ int iter_direction = direction == LIST_HEAD ? AL_START_TAIL : AL_START_HEAD;
+ li->iter = quicklistGetIteratorAtIdx(li->subject->ptr,
+ iter_direction, index);
+ } else if (li->encoding == OBJ_ENCODING_LISTPACK) {
+ li->lpi = lpSeek(subject->ptr, index);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ return li;
+}
+
+/* Sets the direction of an iterator. */
+void listTypeSetIteratorDirection(listTypeIterator *li, listTypeEntry *entry, unsigned char direction) {
+ if (li->direction == direction) return;
+
+ li->direction = direction;
+ if (li->encoding == OBJ_ENCODING_QUICKLIST) {
+ int dir = direction == LIST_HEAD ? AL_START_TAIL : AL_START_HEAD;
+ quicklistSetDirection(li->iter, dir);
+ } else if (li->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = li->subject->ptr;
+ /* Note that the iterator for listpack always points to the next of the current entry,
+ * so we need to update position of the iterator depending on the direction. */
+ li->lpi = (direction == LIST_TAIL) ? lpNext(lp, entry->lpe) : lpPrev(lp, entry->lpe);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+}
+
+/* Clean up the iterator. */
+void listTypeReleaseIterator(listTypeIterator *li) {
+ if (li->encoding == OBJ_ENCODING_QUICKLIST)
+ quicklistReleaseIterator(li->iter);
+ zfree(li);
+}
+
+/* Stores pointer to current the entry in the provided entry structure
+ * and advances the position of the iterator. Returns 1 when the current
+ * entry is in fact an entry, 0 otherwise. */
+int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
+ /* Protect from converting when iterating */
+ serverAssert(li->subject->encoding == li->encoding);
+
+ entry->li = li;
+ if (li->encoding == OBJ_ENCODING_QUICKLIST) {
+ return quicklistNext(li->iter, &entry->entry);
+ } else if (li->encoding == OBJ_ENCODING_LISTPACK) {
+ entry->lpe = li->lpi;
+ if (entry->lpe != NULL) {
+ li->lpi = (li->direction == LIST_TAIL) ?
+ lpNext(li->subject->ptr,li->lpi) : lpPrev(li->subject->ptr,li->lpi);
+ return 1;
+ }
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ return 0;
+}
+
+/* Get entry value at the current position of the iterator.
+ * When the function returns NULL, it populates the integer value by
+ * reference in 'lval'. Otherwise a pointer to the string is returned,
+ * and 'vlen' is set to the length of the string. */
+unsigned char *listTypeGetValue(listTypeEntry *entry, size_t *vlen, long long *lval) {
+ unsigned char *vstr = NULL;
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
+ if (entry->entry.value) {
+ vstr = entry->entry.value;
+ *vlen = entry->entry.sz;
+ } else {
+ *lval = entry->entry.longval;
+ }
+ } else if (entry->li->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned int slen;
+ vstr = lpGetValue(entry->lpe, &slen, lval);
+ *vlen = slen;
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ return vstr;
+}
+
+/* Return entry or NULL at the current position of the iterator. */
+robj *listTypeGet(listTypeEntry *entry) {
+ unsigned char *vstr;
+ size_t vlen;
+ long long lval;
+
+ vstr = listTypeGetValue(entry, &vlen, &lval);
+ if (vstr)
+ return createStringObject((char *)vstr, vlen);
+ else
+ return createStringObjectFromLongLong(lval);
+}
+
+void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
+ robj *subject = entry->li->subject;
+ value = getDecodedObject(value);
+ sds str = value->ptr;
+ size_t len = sdslen(str);
+
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
+ if (where == LIST_TAIL) {
+ quicklistInsertAfter(entry->li->iter, &entry->entry, str, len);
+ } else if (where == LIST_HEAD) {
+ quicklistInsertBefore(entry->li->iter, &entry->entry, str, len);
+ }
+ } else if (entry->li->encoding == OBJ_ENCODING_LISTPACK) {
+ int lpw = (where == LIST_TAIL) ? LP_AFTER : LP_BEFORE;
+ subject->ptr = lpInsertString(subject->ptr, (unsigned char *)str,
+ len, entry->lpe, lpw, &entry->lpe);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ decrRefCount(value);
+}
+
+/* Replaces entry at the current position of the iterator. */
+void listTypeReplace(listTypeEntry *entry, robj *value) {
+ robj *subject = entry->li->subject;
+ value = getDecodedObject(value);
+ sds str = value->ptr;
+ size_t len = sdslen(str);
+
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklistReplaceEntry(entry->li->iter, &entry->entry, str, len);
+ } else if (entry->li->encoding == OBJ_ENCODING_LISTPACK) {
+ subject->ptr = lpReplace(subject->ptr, &entry->lpe, (unsigned char *)str, len);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+
+ decrRefCount(value);
+}
+
+/* Replace entry at offset 'index' by 'value'.
+ *
+ * Returns 1 if replace happened.
+ * Returns 0 if replace failed and no changes happened. */
+int listTypeReplaceAtIndex(robj *o, int index, robj *value) {
+ value = getDecodedObject(value);
+ sds vstr = value->ptr;
+ size_t vlen = sdslen(vstr);
+ int replaced = 0;
+
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ replaced = quicklistReplaceAtIndex(ql, index, vstr, vlen);
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *p = lpSeek(o->ptr,index);
+ if (p) {
+ o->ptr = lpReplace(o->ptr, &p, (unsigned char *)vstr, vlen);
+ replaced = 1;
+ }
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+
+ decrRefCount(value);
+ return replaced;
+}
+
+/* Compare the given object with the entry at the current position. */
+int listTypeEqual(listTypeEntry *entry, robj *o) {
+ serverAssertWithInfo(NULL,o,sdsEncodedObject(o));
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
+ return quicklistCompare(&entry->entry,o->ptr,sdslen(o->ptr));
+ } else if (entry->li->encoding == OBJ_ENCODING_LISTPACK) {
+ return lpCompare(entry->lpe,o->ptr,sdslen(o->ptr));
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+}
+
+/* Delete the element pointed to. */
+void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry) {
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklistDelEntry(iter->iter, &entry->entry);
+ } else if (entry->li->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *p = entry->lpe;
+ iter->subject->ptr = lpDelete(iter->subject->ptr,p,&p);
+
+ /* Update position of the iterator depending on the direction */
+ if (iter->direction == LIST_TAIL)
+ iter->lpi = p;
+ else {
+ if (p) {
+ iter->lpi = lpPrev(iter->subject->ptr,p);
+ } else {
+ /* We deleted the last element, so we need to set the
+ * iterator to the last element. */
+ iter->lpi = lpLast(iter->subject->ptr);
+ }
+ }
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+}
+
+/* This is a helper function for the COPY command.
+ * Duplicate a list object, with the guarantee that the returned object
+ * has the same encoding as the original one.
+ *
+ * The resulting object always has refcount set to 1 */
+robj *listTypeDup(robj *o) {
+ robj *lobj;
+
+ serverAssert(o->type == OBJ_LIST);
+
+ switch (o->encoding) {
+ case OBJ_ENCODING_LISTPACK:
+ lobj = createObject(OBJ_LIST, lpDup(o->ptr));
+ break;
+ case OBJ_ENCODING_QUICKLIST:
+ lobj = createObject(OBJ_LIST, quicklistDup(o->ptr));
+ break;
+ default:
+ serverPanic("Unknown list encoding");
+ break;
+ }
+ lobj->encoding = o->encoding;
+ return lobj;
+}
+
+/* Delete a range of elements from the list. */
+void listTypeDelRange(robj *subject, long start, long count) {
+ if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklistDelRange(subject->ptr, start, count);
+ } else if (subject->encoding == OBJ_ENCODING_LISTPACK) {
+ subject->ptr = lpDeleteRange(subject->ptr, start, count);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ * List Commands
+ *----------------------------------------------------------------------------*/
+
+/* Implements LPUSH/RPUSH/LPUSHX/RPUSHX.
+ * 'xx': push if key exists. */
+void pushGenericCommand(client *c, int where, int xx) {
+ int j;
+
+ robj *lobj = lookupKeyWrite(c->db, c->argv[1]);
+ if (checkType(c,lobj,OBJ_LIST)) return;
+ if (!lobj) {
+ if (xx) {
+ addReply(c, shared.czero);
+ return;
+ }
+
+ lobj = createListListpackObject();
+ dbAdd(c->db,c->argv[1],lobj);
+ }
+
+ listTypeTryConversionAppend(lobj,c->argv,2,c->argc-1,NULL,NULL);
+ for (j = 2; j < c->argc; j++) {
+ listTypePush(lobj,c->argv[j],where);
+ server.dirty++;
+ }
+
+ addReplyLongLong(c, listTypeLength(lobj));
+
+ char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
+}
+
+/* LPUSH <key> <element> [<element> ...] */
+void lpushCommand(client *c) {
+ pushGenericCommand(c,LIST_HEAD,0);
+}
+
+/* RPUSH <key> <element> [<element> ...] */
+void rpushCommand(client *c) {
+ pushGenericCommand(c,LIST_TAIL,0);
+}
+
+/* LPUSHX <key> <element> [<element> ...] */
+void lpushxCommand(client *c) {
+ pushGenericCommand(c,LIST_HEAD,1);
+}
+
+/* RPUSHX <key> <element> [<element> ...] */
+void rpushxCommand(client *c) {
+ pushGenericCommand(c,LIST_TAIL,1);
+}
+
+/* LINSERT <key> (BEFORE|AFTER) <pivot> <element> */
+void linsertCommand(client *c) {
+ int where;
+ robj *subject;
+ listTypeIterator *iter;
+ listTypeEntry entry;
+ int inserted = 0;
+
+ if (strcasecmp(c->argv[2]->ptr,"after") == 0) {
+ where = LIST_TAIL;
+ } else if (strcasecmp(c->argv[2]->ptr,"before") == 0) {
+ where = LIST_HEAD;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ if ((subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,subject,OBJ_LIST)) return;
+
+ /* We're not sure if this value can be inserted yet, but we cannot
+ * convert the list inside the iterator. We don't want to loop over
+ * the list twice (once to see if the value can be inserted and once
+ * to do the actual insert), so we assume this value can be inserted
+ * and convert the listpack to a regular list if necessary. */
+ listTypeTryConversionAppend(subject,c->argv,4,4,NULL,NULL);
+
+ /* Seek pivot from head to tail */
+ iter = listTypeInitIterator(subject,0,LIST_TAIL);
+ while (listTypeNext(iter,&entry)) {
+ if (listTypeEqual(&entry,c->argv[3])) {
+ listTypeInsert(&entry,c->argv[4],where);
+ inserted = 1;
+ break;
+ }
+ }
+ listTypeReleaseIterator(iter);
+
+ if (inserted) {
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_LIST,"linsert",
+ c->argv[1],c->db->id);
+ server.dirty++;
+ } else {
+ /* Notify client of a failed insert */
+ addReplyLongLong(c,-1);
+ return;
+ }
+
+ addReplyLongLong(c,listTypeLength(subject));
+}
+
+/* LLEN <key> */
+void llenCommand(client *c) {
+ robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.czero);
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
+ addReplyLongLong(c,listTypeLength(o));
+}
+
+/* LINDEX <key> <index> */
+void lindexCommand(client *c) {
+ robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]);
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
+ long index;
+
+ if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != C_OK))
+ return;
+
+ listTypeIterator *iter = listTypeInitIterator(o,index,LIST_TAIL);
+ listTypeEntry entry;
+ unsigned char *vstr;
+ size_t vlen;
+ long long lval;
+
+ if (listTypeNext(iter,&entry)) {
+ vstr = listTypeGetValue(&entry,&vlen,&lval);
+ if (vstr) {
+ addReplyBulkCBuffer(c, vstr, vlen);
+ } else {
+ addReplyBulkLongLong(c, lval);
+ }
+ } else {
+ addReplyNull(c);
+ }
+
+ listTypeReleaseIterator(iter);
+}
+
+/* LSET <key> <index> <element> */
+void lsetCommand(client *c) {
+ robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
+ long index;
+ robj *value = c->argv[3];
+
+ if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != C_OK))
+ return;
+
+ listTypeTryConversionAppend(o,c->argv,3,3,NULL,NULL);
+ if (listTypeReplaceAtIndex(o,index,value)) {
+ addReply(c,shared.ok);
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_LIST,"lset",c->argv[1],c->db->id);
+ server.dirty++;
+
+ /* We might replace a big item with a small one or vice versa, but we've
+ * already handled the growing case in listTypeTryConversionAppend()
+ * above, so here we just need to try the conversion for shrinking. */
+ listTypeTryConversion(o,LIST_CONV_SHRINKING,NULL,NULL);
+ } else {
+ addReplyErrorObject(c,shared.outofrangeerr);
+ }
+}
+
+/* A helper function like addListRangeReply, more details see below.
+ * The difference is that here we are returning nested arrays, like:
+ * 1) keyname
+ * 2) 1) element1
+ * 2) element2
+ *
+ * And also actually pop out from the list by calling listElementsRemoved.
+ * We maintain the server.dirty and notifications there.
+ *
+ * 'deleted' is an optional output argument to get an indication
+ * if the key got deleted by this function. */
+void listPopRangeAndReplyWithKey(client *c, robj *o, robj *key, int where, long count, int signal, int *deleted) {
+ long llen = listTypeLength(o);
+ long rangelen = (count > llen) ? llen : count;
+ long rangestart = (where == LIST_HEAD) ? 0 : -rangelen;
+ long rangeend = (where == LIST_HEAD) ? rangelen - 1 : -1;
+ int reverse = (where == LIST_HEAD) ? 0 : 1;
+
+ /* We return key-name just once, and an array of elements */
+ addReplyArrayLen(c, 2);
+ addReplyBulk(c, key);
+ addListRangeReply(c, o, rangestart, rangeend, reverse);
+
+ /* Pop these elements. */
+ listTypeDelRange(o, rangestart, rangelen);
+ /* Maintain the notifications and dirty. */
+ listElementsRemoved(c, key, where, o, rangelen, signal, deleted);
+}
+
+/* Extracted from `addListRangeReply()` to reply with a quicklist list.
+ * Note that the purpose is to make the methods small so that the
+ * code in the loop can be inlined better to improve performance. */
+void addListQuicklistRangeReply(client *c, robj *o, int from, int rangelen, int reverse) {
+ /* Return the result in form of a multi-bulk reply */
+ addReplyArrayLen(c,rangelen);
+
+ int direction = reverse ? AL_START_TAIL : AL_START_HEAD;
+ quicklistIter *iter = quicklistGetIteratorAtIdx(o->ptr, direction, from);
+ while(rangelen--) {
+ quicklistEntry qe;
+ serverAssert(quicklistNext(iter, &qe)); /* fail on corrupt data */
+ if (qe.value) {
+ addReplyBulkCBuffer(c,qe.value,qe.sz);
+ } else {
+ addReplyBulkLongLong(c,qe.longval);
+ }
+ }
+ quicklistReleaseIterator(iter);
+}
+
+/* Extracted from `addListRangeReply()` to reply with a listpack list.
+ * Note that the purpose is to make the methods small so that the
+ * code in the loop can be inlined better to improve performance. */
+void addListListpackRangeReply(client *c, robj *o, int from, int rangelen, int reverse) {
+ unsigned char *p = lpSeek(o->ptr, from);
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long lval;
+
+ /* Return the result in form of a multi-bulk reply */
+ addReplyArrayLen(c,rangelen);
+
+ while(rangelen--) {
+ serverAssert(p); /* fail on corrupt data */
+ vstr = lpGetValue(p, &vlen, &lval);
+ if (vstr) {
+ addReplyBulkCBuffer(c,vstr,vlen);
+ } else {
+ addReplyBulkLongLong(c,lval);
+ }
+ p = reverse ? lpPrev(o->ptr,p) : lpNext(o->ptr,p);
+ }
+}
+
+/* A helper for replying with a list's range between the inclusive start and end
+ * indexes as multi-bulk, with support for negative indexes. Note that start
+ * must be less than end or an empty array is returned. When the reverse
+ * argument is set to a non-zero value, the reply is reversed so that elements
+ * are returned from end to start. */
+void addListRangeReply(client *c, robj *o, long start, long end, int reverse) {
+ long rangelen, llen = listTypeLength(o);
+
+ /* Convert negative indexes. */
+ if (start < 0) start = llen+start;
+ if (end < 0) end = llen+end;
+ if (start < 0) start = 0;
+
+ /* Invariant: start >= 0, so this test will be true when end < 0.
+ * The range is empty when start > end or start >= length. */
+ if (start > end || start >= llen) {
+ addReply(c,shared.emptyarray);
+ return;
+ }
+ if (end >= llen) end = llen-1;
+ rangelen = (end-start)+1;
+
+ int from = reverse ? end : start;
+ if (o->encoding == OBJ_ENCODING_QUICKLIST)
+ addListQuicklistRangeReply(c, o, from, rangelen, reverse);
+ else if (o->encoding == OBJ_ENCODING_LISTPACK)
+ addListListpackRangeReply(c, o, from, rangelen, reverse);
+ else
+ serverPanic("Unknown list encoding");
+}
+
+/* A housekeeping helper for list elements popping tasks.
+ *
+ * If 'signal' is 0, skip calling signalModifiedKey().
+ *
+ * 'deleted' is an optional output argument to get an indication
+ * if the key got deleted by this function. */
+void listElementsRemoved(client *c, robj *key, int where, robj *o, long count, int signal, int *deleted) {
+ char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
+
+ notifyKeyspaceEvent(NOTIFY_LIST, event, key, c->db->id);
+ if (listTypeLength(o) == 0) {
+ if (deleted) *deleted = 1;
+
+ dbDelete(c->db, key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, c->db->id);
+ } else {
+ listTypeTryConversion(o, LIST_CONV_SHRINKING, NULL, NULL);
+ if (deleted) *deleted = 0;
+ }
+ if (signal) signalModifiedKey(c, c->db, key);
+ server.dirty += count;
+}
+
+/* Implements the generic list pop operation for LPOP/RPOP.
+ * The where argument specifies which end of the list is operated on. An
+ * optional count may be provided as the third argument of the client's
+ * command. */
+void popGenericCommand(client *c, int where) {
+ int hascount = (c->argc == 3);
+ long count = 0;
+ robj *value;
+
+ if (c->argc > 3) {
+ addReplyErrorArity(c);
+ return;
+ } else if (hascount) {
+ /* Parse the optional count argument. */
+ if (getPositiveLongFromObjectOrReply(c,c->argv[2],&count,NULL) != C_OK)
+ return;
+ }
+
+ robj *o = lookupKeyWriteOrReply(c, c->argv[1], hascount ? shared.nullarray[c->resp]: shared.null[c->resp]);
+ if (o == NULL || checkType(c, o, OBJ_LIST))
+ return;
+
+ if (hascount && !count) {
+ /* Fast exit path. */
+ addReply(c,shared.emptyarray);
+ return;
+ }
+
+ if (!count) {
+ /* Pop a single element. This is POP's original behavior that replies
+ * with a bulk string. */
+ value = listTypePop(o,where);
+ serverAssert(value != NULL);
+ addReplyBulk(c,value);
+ decrRefCount(value);
+ listElementsRemoved(c,c->argv[1],where,o,1,1,NULL);
+ } else {
+ /* Pop a range of elements. An addition to the original POP command,
+ * which replies with a multi-bulk. */
+ long llen = listTypeLength(o);
+ long rangelen = (count > llen) ? llen : count;
+ long rangestart = (where == LIST_HEAD) ? 0 : -rangelen;
+ long rangeend = (where == LIST_HEAD) ? rangelen - 1 : -1;
+ int reverse = (where == LIST_HEAD) ? 0 : 1;
+
+ addListRangeReply(c,o,rangestart,rangeend,reverse);
+ listTypeDelRange(o,rangestart,rangelen);
+ listElementsRemoved(c,c->argv[1],where,o,rangelen,1,NULL);
+ }
+}
+
+/* Like popGenericCommand but work with multiple keys.
+ * Take multiple keys and return multiple elements from just one key.
+ *
+ * 'numkeys' the number of keys.
+ * 'count' is the number of elements requested to pop.
+ *
+ * Always reply with array. */
+void mpopGenericCommand(client *c, robj **keys, int numkeys, int where, long count) {
+ int j;
+ robj *o;
+ robj *key;
+
+ for (j = 0; j < numkeys; j++) {
+ key = keys[j];
+ o = lookupKeyWrite(c->db, key);
+
+ /* Non-existing key, move to next key. */
+ if (o == NULL) continue;
+
+ if (checkType(c, o, OBJ_LIST)) return;
+
+ long llen = listTypeLength(o);
+ /* Empty list, move to next key. */
+ if (llen == 0) continue;
+
+ /* Pop a range of elements in a nested arrays way. */
+ listPopRangeAndReplyWithKey(c, o, key, where, count, 1, NULL);
+
+ /* Replicate it as [LR]POP COUNT. */
+ robj *count_obj = createStringObjectFromLongLong((count > llen) ? llen : count);
+ rewriteClientCommandVector(c, 3,
+ (where == LIST_HEAD) ? shared.lpop : shared.rpop,
+ key, count_obj);
+ decrRefCount(count_obj);
+ return;
+ }
+
+ /* Look like we are not able to pop up any elements. */
+ addReplyNullArray(c);
+}
+
+/* LPOP <key> [count] */
+void lpopCommand(client *c) {
+ popGenericCommand(c,LIST_HEAD);
+}
+
+/* RPOP <key> [count] */
+void rpopCommand(client *c) {
+ popGenericCommand(c,LIST_TAIL);
+}
+
+/* LRANGE <key> <start> <stop> */
+void lrangeCommand(client *c) {
+ robj *o;
+ long start, end;
+
+ if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyarray)) == NULL
+ || checkType(c,o,OBJ_LIST)) return;
+
+ addListRangeReply(c,o,start,end,0);
+}
+
+/* LTRIM <key> <start> <stop> */
+void ltrimCommand(client *c) {
+ robj *o;
+ long start, end, llen, ltrim, rtrim;
+
+ if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
+
+ if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.ok)) == NULL ||
+ checkType(c,o,OBJ_LIST)) return;
+ llen = listTypeLength(o);
+
+ /* convert negative indexes */
+ if (start < 0) start = llen+start;
+ if (end < 0) end = llen+end;
+ if (start < 0) start = 0;
+
+ /* Invariant: start >= 0, so this test will be true when end < 0.
+ * The range is empty when start > end or start >= length. */
+ if (start > end || start >= llen) {
+ /* Out of range start or start > end result in empty list */
+ ltrim = llen;
+ rtrim = 0;
+ } else {
+ if (end >= llen) end = llen-1;
+ ltrim = start;
+ rtrim = llen-end-1;
+ }
+
+ /* Remove list elements to perform the trim */
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklistDelRange(o->ptr,0,ltrim);
+ quicklistDelRange(o->ptr,-rtrim,rtrim);
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ o->ptr = lpDeleteRange(o->ptr,0,ltrim);
+ o->ptr = lpDeleteRange(o->ptr,-rtrim,rtrim);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+
+ notifyKeyspaceEvent(NOTIFY_LIST,"ltrim",c->argv[1],c->db->id);
+ if (listTypeLength(o) == 0) {
+ dbDelete(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ } else {
+ listTypeTryConversion(o,LIST_CONV_SHRINKING,NULL,NULL);
+ }
+ signalModifiedKey(c,c->db,c->argv[1]);
+ server.dirty += (ltrim + rtrim);
+ addReply(c,shared.ok);
+}
+
+/* LPOS key element [RANK rank] [COUNT num-matches] [MAXLEN len]
+ *
+ * The "rank" is the position of the match, so if it is 1, the first match
+ * is returned, if it is 2 the second match is returned and so forth.
+ * It is 1 by default. If negative has the same meaning but the search is
+ * performed starting from the end of the list.
+ *
+ * If COUNT is given, instead of returning the single element, a list of
+ * all the matching elements up to "num-matches" are returned. COUNT can
+ * be combined with RANK in order to returning only the element starting
+ * from the Nth. If COUNT is zero, all the matching elements are returned.
+ *
+ * MAXLEN tells the command to scan a max of len elements. If zero (the
+ * default), all the elements in the list are scanned if needed.
+ *
+ * The returned elements indexes are always referring to what LINDEX
+ * would return. So first element from head is 0, and so forth. */
+void lposCommand(client *c) {
+ robj *o, *ele;
+ ele = c->argv[2];
+ int direction = LIST_TAIL;
+ long rank = 1, count = -1, maxlen = 0; /* Count -1: option not given. */
+
+ /* Parse the optional arguments. */
+ for (int j = 3; j < c->argc; j++) {
+ char *opt = c->argv[j]->ptr;
+ int moreargs = (c->argc-1)-j;
+
+ if (!strcasecmp(opt,"RANK") && moreargs) {
+ j++;
+ if (getRangeLongFromObjectOrReply(c, c->argv[j], -LONG_MAX, LONG_MAX, &rank, NULL) != C_OK)
+ return;
+ if (rank == 0) {
+ addReplyError(c,"RANK can't be zero: use 1 to start from "
+ "the first match, 2 from the second ... "
+ "or use negative to start from the end of the list");
+ return;
+ }
+ } else if (!strcasecmp(opt,"COUNT") && moreargs) {
+ j++;
+ if (getPositiveLongFromObjectOrReply(c, c->argv[j], &count,
+ "COUNT can't be negative") != C_OK)
+ return;
+ } else if (!strcasecmp(opt,"MAXLEN") && moreargs) {
+ j++;
+ if (getPositiveLongFromObjectOrReply(c, c->argv[j], &maxlen,
+ "MAXLEN can't be negative") != C_OK)
+ return;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* A negative rank means start from the tail. */
+ if (rank < 0) {
+ rank = -rank;
+ direction = LIST_HEAD;
+ }
+
+ /* We return NULL or an empty array if there is no such key (or
+ * if we find no matches, depending on the presence of the COUNT option. */
+ if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) {
+ if (count != -1)
+ addReply(c,shared.emptyarray);
+ else
+ addReply(c,shared.null[c->resp]);
+ return;
+ }
+ if (checkType(c,o,OBJ_LIST)) return;
+
+ /* If we got the COUNT option, prepare to emit an array. */
+ void *arraylenptr = NULL;
+ if (count != -1) arraylenptr = addReplyDeferredLen(c);
+
+ /* Seek the element. */
+ listTypeIterator *li;
+ li = listTypeInitIterator(o,direction == LIST_HEAD ? -1 : 0,direction);
+ listTypeEntry entry;
+ long llen = listTypeLength(o);
+ long index = 0, matches = 0, matchindex = -1, arraylen = 0;
+ while (listTypeNext(li,&entry) && (maxlen == 0 || index < maxlen)) {
+ if (listTypeEqual(&entry,ele)) {
+ matches++;
+ matchindex = (direction == LIST_TAIL) ? index : llen - index - 1;
+ if (matches >= rank) {
+ if (arraylenptr) {
+ arraylen++;
+ addReplyLongLong(c,matchindex);
+ if (count && matches-rank+1 >= count) break;
+ } else {
+ break;
+ }
+ }
+ }
+ index++;
+ matchindex = -1; /* Remember if we exit the loop without a match. */
+ }
+ listTypeReleaseIterator(li);
+
+ /* Reply to the client. Note that arraylenptr is not NULL only if
+ * the COUNT option was selected. */
+ if (arraylenptr != NULL) {
+ setDeferredArrayLen(c,arraylenptr,arraylen);
+ } else {
+ if (matchindex != -1)
+ addReplyLongLong(c,matchindex);
+ else
+ addReply(c,shared.null[c->resp]);
+ }
+}
+
+/* LREM <key> <count> <element> */
+void lremCommand(client *c) {
+ robj *subject, *obj;
+ obj = c->argv[3];
+ long toremove;
+ long removed = 0;
+
+ if ((getLongFromObjectOrReply(c, c->argv[2], &toremove, NULL) != C_OK))
+ return;
+
+ subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero);
+ if (subject == NULL || checkType(c,subject,OBJ_LIST)) return;
+
+ listTypeIterator *li;
+ if (toremove < 0) {
+ toremove = -toremove;
+ li = listTypeInitIterator(subject,-1,LIST_HEAD);
+ } else {
+ li = listTypeInitIterator(subject,0,LIST_TAIL);
+ }
+
+ listTypeEntry entry;
+ while (listTypeNext(li,&entry)) {
+ if (listTypeEqual(&entry,obj)) {
+ listTypeDelete(li, &entry);
+ server.dirty++;
+ removed++;
+ if (toremove && removed == toremove) break;
+ }
+ }
+ listTypeReleaseIterator(li);
+
+ if (removed) {
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_LIST,"lrem",c->argv[1],c->db->id);
+ }
+
+ if (listTypeLength(subject) == 0) {
+ dbDelete(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ } else if (removed) {
+ listTypeTryConversion(subject,LIST_CONV_SHRINKING,NULL,NULL);
+ }
+
+ addReplyLongLong(c,removed);
+}
+
+void lmoveHandlePush(client *c, robj *dstkey, robj *dstobj, robj *value,
+ int where) {
+ /* Create the list if the key does not exist */
+ if (!dstobj) {
+ dstobj = createListListpackObject();
+ dbAdd(c->db,dstkey,dstobj);
+ }
+ signalModifiedKey(c,c->db,dstkey);
+ listTypeTryConversionAppend(dstobj,&value,0,0,NULL,NULL);
+ listTypePush(dstobj,value,where);
+ notifyKeyspaceEvent(NOTIFY_LIST,
+ where == LIST_HEAD ? "lpush" : "rpush",
+ dstkey,
+ c->db->id);
+ /* Always send the pushed value to the client. */
+ addReplyBulk(c,value);
+}
+
+int getListPositionFromObjectOrReply(client *c, robj *arg, int *position) {
+ if (strcasecmp(arg->ptr,"right") == 0) {
+ *position = LIST_TAIL;
+ } else if (strcasecmp(arg->ptr,"left") == 0) {
+ *position = LIST_HEAD;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+robj *getStringObjectFromListPosition(int position) {
+ if (position == LIST_HEAD) {
+ return shared.left;
+ } else {
+ // LIST_TAIL
+ return shared.right;
+ }
+}
+
+void lmoveGenericCommand(client *c, int wherefrom, int whereto) {
+ robj *sobj, *value;
+ if ((sobj = lookupKeyWriteOrReply(c,c->argv[1],shared.null[c->resp]))
+ == NULL || checkType(c,sobj,OBJ_LIST)) return;
+
+ if (listTypeLength(sobj) == 0) {
+ /* This may only happen after loading very old RDB files. Recent
+ * versions of Redis delete keys of empty lists. */
+ addReplyNull(c);
+ } else {
+ robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
+ robj *touchedkey = c->argv[1];
+
+ if (checkType(c,dobj,OBJ_LIST)) return;
+ value = listTypePop(sobj,wherefrom);
+ serverAssert(value); /* assertion for valgrind (avoid NPD) */
+ lmoveHandlePush(c,c->argv[2],dobj,value,whereto);
+ listElementsRemoved(c,touchedkey,wherefrom,sobj,1,1,NULL);
+
+ /* listTypePop returns an object with its refcount incremented */
+ decrRefCount(value);
+
+ if (c->cmd->proc == blmoveCommand) {
+ rewriteClientCommandVector(c,5,shared.lmove,
+ c->argv[1],c->argv[2],c->argv[3],c->argv[4]);
+ } else if (c->cmd->proc == brpoplpushCommand) {
+ rewriteClientCommandVector(c,3,shared.rpoplpush,
+ c->argv[1],c->argv[2]);
+ }
+ }
+}
+
+/* LMOVE <source> <destination> (LEFT|RIGHT) (LEFT|RIGHT) */
+void lmoveCommand(client *c) {
+ int wherefrom, whereto;
+ if (getListPositionFromObjectOrReply(c,c->argv[3],&wherefrom)
+ != C_OK) return;
+ if (getListPositionFromObjectOrReply(c,c->argv[4],&whereto)
+ != C_OK) return;
+ lmoveGenericCommand(c, wherefrom, whereto);
+}
+
+/* This is the semantic of this command:
+ * RPOPLPUSH srclist dstlist:
+ * IF LLEN(srclist) > 0
+ * element = RPOP srclist
+ * LPUSH dstlist element
+ * RETURN element
+ * ELSE
+ * RETURN nil
+ * END
+ * END
+ *
+ * The idea is to be able to get an element from a list in a reliable way
+ * since the element is not just returned but pushed against another list
+ * as well. This command was originally proposed by Ezra Zygmuntowicz.
+ */
+void rpoplpushCommand(client *c) {
+ lmoveGenericCommand(c, LIST_TAIL, LIST_HEAD);
+}
+
+/* Blocking RPOP/LPOP/LMPOP
+ *
+ * 'numkeys' is the number of keys.
+ * 'timeout_idx' parameter position of block timeout.
+ * 'where' LIST_HEAD for LEFT, LIST_TAIL for RIGHT.
+ * 'count' is the number of elements requested to pop, or -1 for plain single pop.
+ *
+ * When count is -1, a reply of a single bulk-string will be used.
+ * When count > 0, an array reply will be used. */
+void blockingPopGenericCommand(client *c, robj **keys, int numkeys, int where, int timeout_idx, long count) {
+ robj *o;
+ robj *key;
+ mstime_t timeout;
+ int j;
+
+ if (getTimeoutFromObjectOrReply(c,c->argv[timeout_idx],&timeout,UNIT_SECONDS)
+ != C_OK) return;
+
+ /* Traverse all input keys, we take action only based on one key. */
+ for (j = 0; j < numkeys; j++) {
+ key = keys[j];
+ o = lookupKeyWrite(c->db, key);
+
+ /* Non-existing key, move to next key. */
+ if (o == NULL) continue;
+
+ if (checkType(c, o, OBJ_LIST)) return;
+
+ long llen = listTypeLength(o);
+ /* Empty list, move to next key. */
+ if (llen == 0) continue;
+
+ if (count != -1) {
+ /* BLMPOP, non empty list, like a normal [LR]POP with count option.
+ * The difference here we pop a range of elements in a nested arrays way. */
+ listPopRangeAndReplyWithKey(c, o, key, where, count, 1, NULL);
+
+ /* Replicate it as [LR]POP COUNT. */
+ robj *count_obj = createStringObjectFromLongLong((count > llen) ? llen : count);
+ rewriteClientCommandVector(c, 3,
+ (where == LIST_HEAD) ? shared.lpop : shared.rpop,
+ key, count_obj);
+ decrRefCount(count_obj);
+ return;
+ }
+
+ /* Non empty list, this is like a normal [LR]POP. */
+ robj *value = listTypePop(o,where);
+ serverAssert(value != NULL);
+
+ addReplyArrayLen(c,2);
+ addReplyBulk(c,key);
+ addReplyBulk(c,value);
+ decrRefCount(value);
+ listElementsRemoved(c,key,where,o,1,1,NULL);
+
+ /* Replicate it as an [LR]POP instead of B[LR]POP. */
+ rewriteClientCommandVector(c,2,
+ (where == LIST_HEAD) ? shared.lpop : shared.rpop,
+ key);
+ return;
+ }
+
+ /* If we are not allowed to block the client, the only thing
+ * we can do is treating it as a timeout (even with timeout 0). */
+ if (c->flags & CLIENT_DENY_BLOCKING) {
+ addReplyNullArray(c);
+ return;
+ }
+
+ /* If the keys do not exist we must block */
+ blockForKeys(c,BLOCKED_LIST,keys,numkeys,timeout,0);
+}
+
+/* BLPOP <key> [<key> ...] <timeout> */
+void blpopCommand(client *c) {
+ blockingPopGenericCommand(c,c->argv+1,c->argc-2,LIST_HEAD,c->argc-1,-1);
+}
+
+/* BRPOP <key> [<key> ...] <timeout> */
+void brpopCommand(client *c) {
+ blockingPopGenericCommand(c,c->argv+1,c->argc-2,LIST_TAIL,c->argc-1,-1);
+}
+
+void blmoveGenericCommand(client *c, int wherefrom, int whereto, mstime_t timeout) {
+ robj *key = lookupKeyWrite(c->db, c->argv[1]);
+ if (checkType(c,key,OBJ_LIST)) return;
+
+ if (key == NULL) {
+ if (c->flags & CLIENT_DENY_BLOCKING) {
+ /* Blocking against an empty list when blocking is not allowed
+ * returns immediately. */
+ addReplyNull(c);
+ } else {
+ /* The list is empty and the client blocks. */
+ blockForKeys(c,BLOCKED_LIST,c->argv + 1,1,timeout,0);
+ }
+ } else {
+ /* The list exists and has elements, so
+ * the regular lmoveCommand is executed. */
+ serverAssertWithInfo(c,key,listTypeLength(key) > 0);
+ lmoveGenericCommand(c,wherefrom,whereto);
+ }
+}
+
+/* BLMOVE <source> <destination> (LEFT|RIGHT) (LEFT|RIGHT) <timeout> */
+void blmoveCommand(client *c) {
+ mstime_t timeout;
+ int wherefrom, whereto;
+ if (getListPositionFromObjectOrReply(c,c->argv[3],&wherefrom)
+ != C_OK) return;
+ if (getListPositionFromObjectOrReply(c,c->argv[4],&whereto)
+ != C_OK) return;
+ if (getTimeoutFromObjectOrReply(c,c->argv[5],&timeout,UNIT_SECONDS)
+ != C_OK) return;
+ blmoveGenericCommand(c,wherefrom,whereto,timeout);
+}
+
+/* BRPOPLPUSH <source> <destination> <timeout> */
+void brpoplpushCommand(client *c) {
+ mstime_t timeout;
+ if (getTimeoutFromObjectOrReply(c,c->argv[3],&timeout,UNIT_SECONDS)
+ != C_OK) return;
+ blmoveGenericCommand(c, LIST_TAIL, LIST_HEAD, timeout);
+}
+
+/* LMPOP/BLMPOP
+ *
+ * 'numkeys_idx' parameter position of key number.
+ * 'is_block' this indicates whether it is a blocking variant. */
+void lmpopGenericCommand(client *c, int numkeys_idx, int is_block) {
+ long j;
+ long numkeys = 0; /* Number of keys. */
+ int where = 0; /* HEAD for LEFT, TAIL for RIGHT. */
+ long count = -1; /* Reply will consist of up to count elements, depending on the list's length. */
+
+ /* Parse the numkeys. */
+ if (getRangeLongFromObjectOrReply(c, c->argv[numkeys_idx], 1, LONG_MAX,
+ &numkeys, "numkeys should be greater than 0") != C_OK)
+ return;
+
+ /* Parse the where. where_idx: the index of where in the c->argv. */
+ long where_idx = numkeys_idx + numkeys + 1;
+ if (where_idx >= c->argc) {
+ addReplyErrorObject(c, shared.syntaxerr);
+ return;
+ }
+ if (getListPositionFromObjectOrReply(c, c->argv[where_idx], &where) != C_OK)
+ return;
+
+ /* Parse the optional arguments. */
+ for (j = where_idx + 1; j < c->argc; j++) {
+ char *opt = c->argv[j]->ptr;
+ int moreargs = (c->argc - 1) - j;
+
+ if (count == -1 && !strcasecmp(opt, "COUNT") && moreargs) {
+ j++;
+ if (getRangeLongFromObjectOrReply(c, c->argv[j], 1, LONG_MAX,
+ &count,"count should be greater than 0") != C_OK)
+ return;
+ } else {
+ addReplyErrorObject(c, shared.syntaxerr);
+ return;
+ }
+ }
+
+ if (count == -1) count = 1;
+
+ if (is_block) {
+ /* BLOCK. We will handle CLIENT_DENY_BLOCKING flag in blockingPopGenericCommand. */
+ blockingPopGenericCommand(c, c->argv+numkeys_idx+1, numkeys, where, 1, count);
+ } else {
+ /* NON-BLOCK */
+ mpopGenericCommand(c, c->argv+numkeys_idx+1, numkeys, where, count);
+ }
+}
+
+/* LMPOP numkeys <key> [<key> ...] (LEFT|RIGHT) [COUNT count] */
+void lmpopCommand(client *c) {
+ lmpopGenericCommand(c, 1, 0);
+}
+
+/* BLMPOP timeout numkeys <key> [<key> ...] (LEFT|RIGHT) [COUNT count] */
+void blmpopCommand(client *c) {
+ lmpopGenericCommand(c, 2, 1);
+}
diff --git a/src/t_set.c b/src/t_set.c
new file mode 100644
index 0000000..ff7dc8f
--- /dev/null
+++ b/src/t_set.c
@@ -0,0 +1,1680 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "intset.h" /* Compact integer set structure */
+
+/*-----------------------------------------------------------------------------
+ * Set Commands
+ *----------------------------------------------------------------------------*/
+
+void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
+ robj *dstkey, int op);
+
+/* Factory method to return a set that *can* hold "value". When the object has
+ * an integer-encodable value, an intset will be returned. Otherwise a listpack
+ * or a regular hash table.
+ *
+ * The size hint indicates approximately how many items will be added which is
+ * used to determine the initial representation. */
+robj *setTypeCreate(sds value, size_t size_hint) {
+ if (isSdsRepresentableAsLongLong(value,NULL) == C_OK && size_hint <= server.set_max_intset_entries)
+ return createIntsetObject();
+ if (size_hint <= server.set_max_listpack_entries)
+ return createSetListpackObject();
+
+ /* We may oversize the set by using the hint if the hint is not accurate,
+ * but we will assume this is acceptable to maximize performance. */
+ robj *o = createSetObject();
+ dictExpand(o->ptr, size_hint);
+ return o;
+}
+
+/* Check if the existing set should be converted to another encoding based off the
+ * the size hint. */
+void setTypeMaybeConvert(robj *set, size_t size_hint) {
+ if ((set->encoding == OBJ_ENCODING_LISTPACK && size_hint > server.set_max_listpack_entries)
+ || (set->encoding == OBJ_ENCODING_INTSET && size_hint > server.set_max_intset_entries))
+ {
+ setTypeConvertAndExpand(set, OBJ_ENCODING_HT, size_hint, 1);
+ }
+}
+
+/* Return the maximum number of entries to store in an intset. */
+static size_t intsetMaxEntries(void) {
+ size_t max_entries = server.set_max_intset_entries;
+ /* limit to 1G entries due to intset internals. */
+ if (max_entries >= 1<<30) max_entries = 1<<30;
+ return max_entries;
+}
+
+/* Converts intset to HT if it contains too many entries. */
+static void maybeConvertIntset(robj *subject) {
+ serverAssert(subject->encoding == OBJ_ENCODING_INTSET);
+ if (intsetLen(subject->ptr) > intsetMaxEntries())
+ setTypeConvert(subject,OBJ_ENCODING_HT);
+}
+
+/* When you know all set elements are integers, call this to convert the set to
+ * an intset. No conversion happens if the set contains too many entries for an
+ * intset. */
+static void maybeConvertToIntset(robj *set) {
+ if (set->encoding == OBJ_ENCODING_INTSET) return; /* already intset */
+ if (setTypeSize(set) > intsetMaxEntries()) return; /* can't use intset */
+ intset *is = intsetNew();
+ char *str;
+ size_t len;
+ int64_t llval;
+ setTypeIterator *si = setTypeInitIterator(set);
+ while (setTypeNext(si, &str, &len, &llval) != -1) {
+ if (str) {
+ /* If the element is returned as a string, we may be able to convert
+ * it to integer. This happens for OBJ_ENCODING_HT. */
+ serverAssert(string2ll(str, len, (long long *)&llval));
+ }
+ uint8_t success = 0;
+ is = intsetAdd(is, llval, &success);
+ serverAssert(success);
+ }
+ setTypeReleaseIterator(si);
+ freeSetObject(set); /* frees the internals but not robj itself */
+ set->ptr = is;
+ set->encoding = OBJ_ENCODING_INTSET;
+}
+
+/* Add the specified sds value into a set.
+ *
+ * If the value was already member of the set, nothing is done and 0 is
+ * returned, otherwise the new element is added and 1 is returned. */
+int setTypeAdd(robj *subject, sds value) {
+ return setTypeAddAux(subject, value, sdslen(value), 0, 1);
+}
+
+/* Add member. This function is optimized for the different encodings. The
+ * value can be provided as an sds string (indicated by passing str_is_sds =
+ * 1), as string and length (str_is_sds = 0) or as an integer in which case str
+ * is set to NULL and llval is provided instead.
+ *
+ * Returns 1 if the value was added and 0 if it was already a member. */
+int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds) {
+ char tmpbuf[LONG_STR_SIZE];
+ if (!str) {
+ if (set->encoding == OBJ_ENCODING_INTSET) {
+ uint8_t success = 0;
+ set->ptr = intsetAdd(set->ptr, llval, &success);
+ if (success) maybeConvertIntset(set);
+ return success;
+ }
+ /* Convert int to string. */
+ len = ll2string(tmpbuf, sizeof tmpbuf, llval);
+ str = tmpbuf;
+ str_is_sds = 0;
+ }
+
+ serverAssert(str);
+ if (set->encoding == OBJ_ENCODING_HT) {
+ /* Avoid duping the string if it is an sds string. */
+ sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
+ dict *ht = set->ptr;
+ void *position = dictFindPositionForInsert(ht, sdsval, NULL);
+ if (position) {
+ /* Key doesn't already exist in the set. Add it but dup the key. */
+ if (sdsval == str) sdsval = sdsdup(sdsval);
+ dictInsertAtPosition(ht, sdsval, position);
+ } else if (sdsval != str) {
+ /* String is already a member. Free our temporary sds copy. */
+ sdsfree(sdsval);
+ }
+ return (position != NULL);
+ } else if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ if (p != NULL)
+ p = lpFind(lp, p, (unsigned char*)str, len, 0);
+ if (p == NULL) {
+ /* Not found. */
+ if (lpLength(lp) < server.set_max_listpack_entries &&
+ len <= server.set_max_listpack_value &&
+ lpSafeToAdd(lp, len))
+ {
+ if (str == tmpbuf) {
+ /* This came in as integer so we can avoid parsing it again.
+ * TODO: Create and use lpFindInteger; don't go via string. */
+ lp = lpAppendInteger(lp, llval);
+ } else {
+ lp = lpAppend(lp, (unsigned char*)str, len);
+ }
+ set->ptr = lp;
+ } else {
+ /* Size limit is reached. Convert to hashtable and add. */
+ setTypeConvertAndExpand(set, OBJ_ENCODING_HT, lpLength(lp) + 1, 1);
+ serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK);
+ }
+ return 1;
+ }
+ } else if (set->encoding == OBJ_ENCODING_INTSET) {
+ long long value;
+ if (string2ll(str, len, &value)) {
+ uint8_t success = 0;
+ set->ptr = intsetAdd(set->ptr,value,&success);
+ if (success) {
+ maybeConvertIntset(set);
+ return 1;
+ }
+ } else {
+ /* Check if listpack encoding is safe not to cross any threshold. */
+ size_t maxelelen = 0, totsize = 0;
+ unsigned long n = intsetLen(set->ptr);
+ if (n != 0) {
+ size_t elelen1 = sdigits10(intsetMax(set->ptr));
+ size_t elelen2 = sdigits10(intsetMin(set->ptr));
+ maxelelen = max(elelen1, elelen2);
+ size_t s1 = lpEstimateBytesRepeatedInteger(intsetMax(set->ptr), n);
+ size_t s2 = lpEstimateBytesRepeatedInteger(intsetMin(set->ptr), n);
+ totsize = max(s1, s2);
+ }
+ if (intsetLen((const intset*)set->ptr) < server.set_max_listpack_entries &&
+ len <= server.set_max_listpack_value &&
+ maxelelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(NULL, totsize + len))
+ {
+ /* In the "safe to add" check above we assumed all elements in
+ * the intset are of size maxelelen. This is an upper bound. */
+ setTypeConvertAndExpand(set, OBJ_ENCODING_LISTPACK,
+ intsetLen(set->ptr) + 1, 1);
+ unsigned char *lp = set->ptr;
+ lp = lpAppend(lp, (unsigned char *)str, len);
+ lp = lpShrinkToFit(lp);
+ set->ptr = lp;
+ return 1;
+ } else {
+ setTypeConvertAndExpand(set, OBJ_ENCODING_HT,
+ intsetLen(set->ptr) + 1, 1);
+ /* The set *was* an intset and this value is not integer
+ * encodable, so dictAdd should always work. */
+ serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK);
+ return 1;
+ }
+ }
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ return 0;
+}
+
+/* Deletes a value provided as an sds string from the set. Returns 1 if the
+ * value was deleted and 0 if it was not a member of the set. */
+int setTypeRemove(robj *setobj, sds value) {
+ return setTypeRemoveAux(setobj, value, sdslen(value), 0, 1);
+}
+
+/* Remove a member. This function is optimized for the different encodings. The
+ * value can be provided as an sds string (indicated by passing str_is_sds =
+ * 1), as string and length (str_is_sds = 0) or as an integer in which case str
+ * is set to NULL and llval is provided instead.
+ *
+ * Returns 1 if the value was deleted and 0 if it was not a member of the set. */
+int setTypeRemoveAux(robj *setobj, char *str, size_t len, int64_t llval, int str_is_sds) {
+ char tmpbuf[LONG_STR_SIZE];
+ if (!str) {
+ if (setobj->encoding == OBJ_ENCODING_INTSET) {
+ int success;
+ setobj->ptr = intsetRemove(setobj->ptr,llval,&success);
+ return success;
+ }
+ len = ll2string(tmpbuf, sizeof tmpbuf, llval);
+ str = tmpbuf;
+ str_is_sds = 0;
+ }
+
+ if (setobj->encoding == OBJ_ENCODING_HT) {
+ sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
+ int deleted = (dictDelete(setobj->ptr, sdsval) == DICT_OK);
+ if (deleted && htNeedsResize(setobj->ptr)) dictResize(setobj->ptr);
+ if (sdsval != str) sdsfree(sdsval); /* free temp copy */
+ return deleted;
+ } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = setobj->ptr;
+ unsigned char *p = lpFirst(lp);
+ if (p == NULL) return 0;
+ p = lpFind(lp, p, (unsigned char*)str, len, 0);
+ if (p != NULL) {
+ lp = lpDelete(lp, p, NULL);
+ setobj->ptr = lp;
+ return 1;
+ }
+ } else if (setobj->encoding == OBJ_ENCODING_INTSET) {
+ long long llval;
+ if (string2ll(str, len, &llval)) {
+ int success;
+ setobj->ptr = intsetRemove(setobj->ptr,llval,&success);
+ if (success) return 1;
+ }
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ return 0;
+}
+
+/* Check if an sds string is a member of the set. Returns 1 if the value is a
+ * member of the set and 0 if it isn't. */
+int setTypeIsMember(robj *subject, sds value) {
+ return setTypeIsMemberAux(subject, value, sdslen(value), 0, 1);
+}
+
+/* Membership checking optimized for the different encodings. The value can be
+ * provided as an sds string (indicated by passing str_is_sds = 1), as string
+ * and length (str_is_sds = 0) or as an integer in which case str is set to NULL
+ * and llval is provided instead.
+ *
+ * Returns 1 if the value is a member of the set and 0 if it isn't. */
+int setTypeIsMemberAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds) {
+ char tmpbuf[LONG_STR_SIZE];
+ if (!str) {
+ if (set->encoding == OBJ_ENCODING_INTSET)
+ return intsetFind(set->ptr, llval);
+ len = ll2string(tmpbuf, sizeof tmpbuf, llval);
+ str = tmpbuf;
+ str_is_sds = 0;
+ }
+
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ return p && lpFind(lp, p, (unsigned char*)str, len, 0);
+ } else if (set->encoding == OBJ_ENCODING_INTSET) {
+ long long llval;
+ return string2ll(str, len, &llval) && intsetFind(set->ptr, llval);
+ } else if (set->encoding == OBJ_ENCODING_HT && str_is_sds) {
+ return dictFind(set->ptr, (sds)str) != NULL;
+ } else if (set->encoding == OBJ_ENCODING_HT) {
+ sds sdsval = sdsnewlen(str, len);
+ int result = dictFind(set->ptr, sdsval) != NULL;
+ sdsfree(sdsval);
+ return result;
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+}
+
+setTypeIterator *setTypeInitIterator(robj *subject) {
+ setTypeIterator *si = zmalloc(sizeof(setTypeIterator));
+ si->subject = subject;
+ si->encoding = subject->encoding;
+ if (si->encoding == OBJ_ENCODING_HT) {
+ si->di = dictGetIterator(subject->ptr);
+ } else if (si->encoding == OBJ_ENCODING_INTSET) {
+ si->ii = 0;
+ } else if (si->encoding == OBJ_ENCODING_LISTPACK) {
+ si->lpi = NULL;
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ return si;
+}
+
+void setTypeReleaseIterator(setTypeIterator *si) {
+ if (si->encoding == OBJ_ENCODING_HT)
+ dictReleaseIterator(si->di);
+ zfree(si);
+}
+
+/* Move to the next entry in the set. Returns the object at the current
+ * position, as a string or as an integer.
+ *
+ * Since set elements can be internally be stored as SDS strings, char buffers or
+ * simple arrays of integers, setTypeNext returns the encoding of the
+ * set object you are iterating, and will populate the appropriate pointers
+ * (str and len) or (llele) depending on whether the value is stored as a string
+ * or as an integer internally.
+ *
+ * If OBJ_ENCODING_HT is returned, then str points to an sds string and can be
+ * used as such. If OBJ_ENCODING_INTSET, then llele is populated and str is
+ * pointed to NULL. If OBJ_ENCODING_LISTPACK is returned, the value can be
+ * either a string or an integer. If *str is not NULL, then str and len are
+ * populated with the string content and length. Otherwise, llele populated with
+ * an integer value.
+ *
+ * Note that str, len and llele pointers should all be passed and cannot
+ * be NULL since the function will try to defensively populate the non
+ * used field with values which are easy to trap if misused.
+ *
+ * When there are no more elements -1 is returned. */
+int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele) {
+ if (si->encoding == OBJ_ENCODING_HT) {
+ dictEntry *de = dictNext(si->di);
+ if (de == NULL) return -1;
+ *str = dictGetKey(de);
+ *len = sdslen(*str);
+ *llele = -123456789; /* Not needed. Defensive. */
+ } else if (si->encoding == OBJ_ENCODING_INTSET) {
+ if (!intsetGet(si->subject->ptr,si->ii++,llele))
+ return -1;
+ *str = NULL;
+ } else if (si->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = si->subject->ptr;
+ unsigned char *lpi = si->lpi;
+ if (lpi == NULL) {
+ lpi = lpFirst(lp);
+ } else {
+ lpi = lpNext(lp, lpi);
+ }
+ if (lpi == NULL) return -1;
+ si->lpi = lpi;
+ unsigned int l;
+ *str = (char *)lpGetValue(lpi, &l, (long long *)llele);
+ *len = (size_t)l;
+ } else {
+ serverPanic("Wrong set encoding in setTypeNext");
+ }
+ return si->encoding;
+}
+
+/* The not copy on write friendly version but easy to use version
+ * of setTypeNext() is setTypeNextObject(), returning new SDS
+ * strings. So if you don't retain a pointer to this object you should call
+ * sdsfree() against it.
+ *
+ * This function is the way to go for write operations where COW is not
+ * an issue. */
+sds setTypeNextObject(setTypeIterator *si) {
+ int64_t intele;
+ char *str;
+ size_t len;
+
+ if (setTypeNext(si, &str, &len, &intele) == -1) return NULL;
+ if (str != NULL) return sdsnewlen(str, len);
+ return sdsfromlonglong(intele);
+}
+
+/* Return random element from a non empty set.
+ * The returned element can be an int64_t value if the set is encoded
+ * as an "intset" blob of integers, or an string.
+ *
+ * The caller provides three pointers to be populated with the right
+ * object. The return value of the function is the object->encoding
+ * field of the object and can be used by the caller to check if the
+ * int64_t pointer or the str and len pointers were populated, as for
+ * setTypeNext. If OBJ_ENCODING_HT is returned, str is pointed to a
+ * string which is actually an sds string and it can be used as such.
+ *
+ * Note that both the str, len and llele pointers should be passed and cannot
+ * be NULL. If str is set to NULL, the value is an integer stored in llele. */
+int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele) {
+ if (setobj->encoding == OBJ_ENCODING_HT) {
+ dictEntry *de = dictGetFairRandomKey(setobj->ptr);
+ *str = dictGetKey(de);
+ *len = sdslen(*str);
+ *llele = -123456789; /* Not needed. Defensive. */
+ } else if (setobj->encoding == OBJ_ENCODING_INTSET) {
+ *llele = intsetRandom(setobj->ptr);
+ *str = NULL; /* Not needed. Defensive. */
+ } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = setobj->ptr;
+ int r = rand() % lpLength(lp);
+ unsigned char *p = lpSeek(lp, r);
+ unsigned int l;
+ *str = (char *)lpGetValue(p, &l, (long long *)llele);
+ *len = (size_t)l;
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ return setobj->encoding;
+}
+
+/* Pops a random element and returns it as an object. */
+robj *setTypePopRandom(robj *set) {
+ robj *obj;
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ /* Find random and delete it without re-seeking the listpack. */
+ unsigned int i = 0;
+ unsigned char *p = lpNextRandom(set->ptr, lpFirst(set->ptr), &i, 1, 0);
+ unsigned int len = 0; /* initialize to silence warning */
+ long long llele = 0; /* initialize to silence warning */
+ char *str = (char *)lpGetValue(p, &len, &llele);
+ if (str)
+ obj = createStringObject(str, len);
+ else
+ obj = createStringObjectFromLongLong(llele);
+ set->ptr = lpDelete(set->ptr, p, NULL);
+ } else {
+ char *str;
+ size_t len = 0;
+ int64_t llele = 0;
+ int encoding = setTypeRandomElement(set, &str, &len, &llele);
+ if (str)
+ obj = createStringObject(str, len);
+ else
+ obj = createStringObjectFromLongLong(llele);
+ setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HT);
+ }
+ return obj;
+}
+
+unsigned long setTypeSize(const robj *subject) {
+ if (subject->encoding == OBJ_ENCODING_HT) {
+ return dictSize((const dict*)subject->ptr);
+ } else if (subject->encoding == OBJ_ENCODING_INTSET) {
+ return intsetLen((const intset*)subject->ptr);
+ } else if (subject->encoding == OBJ_ENCODING_LISTPACK) {
+ return lpLength((unsigned char *)subject->ptr);
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+}
+
+/* Convert the set to specified encoding. The resulting dict (when converting
+ * to a hash table) is presized to hold the number of elements in the original
+ * set. */
+void setTypeConvert(robj *setobj, int enc) {
+ setTypeConvertAndExpand(setobj, enc, setTypeSize(setobj), 1);
+}
+
+/* Converts a set to the specified encoding, pre-sizing it for 'cap' elements.
+ * The 'panic' argument controls whether to panic on OOM (panic=1) or return
+ * C_ERR on OOM (panic=0). If panic=1 is given, this function always returns
+ * C_OK. */
+int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic) {
+ setTypeIterator *si;
+ serverAssertWithInfo(NULL,setobj,setobj->type == OBJ_SET &&
+ setobj->encoding != enc);
+
+ if (enc == OBJ_ENCODING_HT) {
+ dict *d = dictCreate(&setDictType);
+ sds element;
+
+ /* Presize the dict to avoid rehashing */
+ if (panic) {
+ dictExpand(d, cap);
+ } else if (dictTryExpand(d, cap) != DICT_OK) {
+ dictRelease(d);
+ return C_ERR;
+ }
+
+ /* To add the elements we extract integers and create redis objects */
+ si = setTypeInitIterator(setobj);
+ while ((element = setTypeNextObject(si)) != NULL) {
+ serverAssert(dictAdd(d,element,NULL) == DICT_OK);
+ }
+ setTypeReleaseIterator(si);
+
+ freeSetObject(setobj); /* frees the internals but not setobj itself */
+ setobj->encoding = OBJ_ENCODING_HT;
+ setobj->ptr = d;
+ } else if (enc == OBJ_ENCODING_LISTPACK) {
+ /* Preallocate the minimum two bytes per element (enc/value + backlen) */
+ size_t estcap = cap * 2;
+ if (setobj->encoding == OBJ_ENCODING_INTSET && setTypeSize(setobj) > 0) {
+ /* If we're converting from intset, we have a better estimate. */
+ size_t s1 = lpEstimateBytesRepeatedInteger(intsetMin(setobj->ptr), cap);
+ size_t s2 = lpEstimateBytesRepeatedInteger(intsetMax(setobj->ptr), cap);
+ estcap = max(s1, s2);
+ }
+ unsigned char *lp = lpNew(estcap);
+ char *str;
+ size_t len;
+ int64_t llele;
+ si = setTypeInitIterator(setobj);
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ if (str != NULL)
+ lp = lpAppend(lp, (unsigned char *)str, len);
+ else
+ lp = lpAppendInteger(lp, llele);
+ }
+ setTypeReleaseIterator(si);
+
+ freeSetObject(setobj); /* frees the internals but not setobj itself */
+ setobj->encoding = OBJ_ENCODING_LISTPACK;
+ setobj->ptr = lp;
+ } else {
+ serverPanic("Unsupported set conversion");
+ }
+ return C_OK;
+}
+
+/* This is a helper function for the COPY command.
+ * Duplicate a set object, with the guarantee that the returned object
+ * has the same encoding as the original one.
+ *
+ * The resulting object always has refcount set to 1 */
+robj *setTypeDup(robj *o) {
+ robj *set;
+ setTypeIterator *si;
+
+ serverAssert(o->type == OBJ_SET);
+
+ /* Create a new set object that have the same encoding as the original object's encoding */
+ if (o->encoding == OBJ_ENCODING_INTSET) {
+ intset *is = o->ptr;
+ size_t size = intsetBlobLen(is);
+ intset *newis = zmalloc(size);
+ memcpy(newis,is,size);
+ set = createObject(OBJ_SET, newis);
+ set->encoding = OBJ_ENCODING_INTSET;
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = o->ptr;
+ size_t sz = lpBytes(lp);
+ unsigned char *new_lp = zmalloc(sz);
+ memcpy(new_lp, lp, sz);
+ set = createObject(OBJ_SET, new_lp);
+ set->encoding = OBJ_ENCODING_LISTPACK;
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ set = createSetObject();
+ dict *d = o->ptr;
+ dictExpand(set->ptr, dictSize(d));
+ si = setTypeInitIterator(o);
+ char *str;
+ size_t len;
+ int64_t intobj;
+ while (setTypeNext(si, &str, &len, &intobj) != -1) {
+ setTypeAdd(set, (sds)str);
+ }
+ setTypeReleaseIterator(si);
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ return set;
+}
+
+void saddCommand(client *c) {
+ robj *set;
+ int j, added = 0;
+
+ set = lookupKeyWrite(c->db,c->argv[1]);
+ if (checkType(c,set,OBJ_SET)) return;
+
+ if (set == NULL) {
+ set = setTypeCreate(c->argv[2]->ptr, c->argc - 2);
+ dbAdd(c->db,c->argv[1],set);
+ } else {
+ setTypeMaybeConvert(set, c->argc - 2);
+ }
+
+ for (j = 2; j < c->argc; j++) {
+ if (setTypeAdd(set,c->argv[j]->ptr)) added++;
+ }
+ if (added) {
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[1],c->db->id);
+ }
+ server.dirty += added;
+ addReplyLongLong(c,added);
+}
+
+void sremCommand(client *c) {
+ robj *set;
+ int j, deleted = 0, keyremoved = 0;
+
+ if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,set,OBJ_SET)) return;
+
+ for (j = 2; j < c->argc; j++) {
+ if (setTypeRemove(set,c->argv[j]->ptr)) {
+ deleted++;
+ if (setTypeSize(set) == 0) {
+ dbDelete(c->db,c->argv[1]);
+ keyremoved = 1;
+ break;
+ }
+ }
+ }
+ if (deleted) {
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
+ if (keyremoved)
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],
+ c->db->id);
+ server.dirty += deleted;
+ }
+ addReplyLongLong(c,deleted);
+}
+
+void smoveCommand(client *c) {
+ robj *srcset, *dstset, *ele;
+ srcset = lookupKeyWrite(c->db,c->argv[1]);
+ dstset = lookupKeyWrite(c->db,c->argv[2]);
+ ele = c->argv[3];
+
+ /* If the source key does not exist return 0 */
+ if (srcset == NULL) {
+ addReply(c,shared.czero);
+ return;
+ }
+
+ /* If the source key has the wrong type, or the destination key
+ * is set and has the wrong type, return with an error. */
+ if (checkType(c,srcset,OBJ_SET) ||
+ checkType(c,dstset,OBJ_SET)) return;
+
+ /* If srcset and dstset are equal, SMOVE is a no-op */
+ if (srcset == dstset) {
+ addReply(c,setTypeIsMember(srcset,ele->ptr) ?
+ shared.cone : shared.czero);
+ return;
+ }
+
+ /* If the element cannot be removed from the src set, return 0. */
+ if (!setTypeRemove(srcset,ele->ptr)) {
+ addReply(c,shared.czero);
+ return;
+ }
+ notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
+
+ /* Remove the src set from the database when empty */
+ if (setTypeSize(srcset) == 0) {
+ dbDelete(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ }
+
+ /* Create the destination set when it doesn't exist */
+ if (!dstset) {
+ dstset = setTypeCreate(ele->ptr, 1);
+ dbAdd(c->db,c->argv[2],dstset);
+ }
+
+ signalModifiedKey(c,c->db,c->argv[1]);
+ server.dirty++;
+
+ /* An extra key has changed when ele was successfully added to dstset */
+ if (setTypeAdd(dstset,ele->ptr)) {
+ server.dirty++;
+ signalModifiedKey(c,c->db,c->argv[2]);
+ notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[2],c->db->id);
+ }
+ addReply(c,shared.cone);
+}
+
+void sismemberCommand(client *c) {
+ robj *set;
+
+ if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,set,OBJ_SET)) return;
+
+ if (setTypeIsMember(set,c->argv[2]->ptr))
+ addReply(c,shared.cone);
+ else
+ addReply(c,shared.czero);
+}
+
+void smismemberCommand(client *c) {
+ robj *set;
+ int j;
+
+ /* Don't abort when the key cannot be found. Non-existing keys are empty
+ * sets, where SMISMEMBER should respond with a series of zeros. */
+ set = lookupKeyRead(c->db,c->argv[1]);
+ if (set && checkType(c,set,OBJ_SET)) return;
+
+ addReplyArrayLen(c,c->argc - 2);
+
+ for (j = 2; j < c->argc; j++) {
+ if (set && setTypeIsMember(set,c->argv[j]->ptr))
+ addReply(c,shared.cone);
+ else
+ addReply(c,shared.czero);
+ }
+}
+
+void scardCommand(client *c) {
+ robj *o;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,OBJ_SET)) return;
+
+ addReplyLongLong(c,setTypeSize(o));
+}
+
+/* Handle the "SPOP key <count>" variant. The normal version of the
+ * command is handled by the spopCommand() function itself. */
+
+/* How many times bigger should be the set compared to the remaining size
+ * for us to use the "create new set" strategy? Read later in the
+ * implementation for more info. */
+#define SPOP_MOVE_STRATEGY_MUL 5
+
+void spopWithCountCommand(client *c) {
+ long l;
+ unsigned long count, size;
+ robj *set;
+
+ /* Get the count argument */
+ if (getPositiveLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
+ count = (unsigned long) l;
+
+ /* Make sure a key with the name inputted exists, and that it's type is
+ * indeed a set. Otherwise, return nil */
+ if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.emptyset[c->resp]))
+ == NULL || checkType(c,set,OBJ_SET)) return;
+
+ /* If count is zero, serve an empty set ASAP to avoid special
+ * cases later. */
+ if (count == 0) {
+ addReply(c,shared.emptyset[c->resp]);
+ return;
+ }
+
+ size = setTypeSize(set);
+
+ /* Generate an SPOP keyspace notification */
+ notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
+ server.dirty += (count >= size) ? size : count;
+
+ /* CASE 1:
+ * The number of requested elements is greater than or equal to
+ * the number of elements inside the set: simply return the whole set. */
+ if (count >= size) {
+ /* We just return the entire set */
+ sunionDiffGenericCommand(c,c->argv+1,1,NULL,SET_OP_UNION);
+
+ /* Delete the set as it is now empty */
+ dbDelete(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+
+ /* todo: Move the spop notification to be executed after the command logic. */
+
+ /* Propagate this command as a DEL or UNLINK operation */
+ robj *aux = server.lazyfree_lazy_server_del ? shared.unlink : shared.del;
+ rewriteClientCommandVector(c, 2, aux, c->argv[1]);
+ signalModifiedKey(c,c->db,c->argv[1]);
+ return;
+ }
+
+ /* Case 2 and 3 require to replicate SPOP as a set of SREM commands.
+ * Prepare our replication argument vector. Also send the array length
+ * which is common to both the code paths. */
+ unsigned long batchsize = count > 1024 ? 1024 : count;
+ robj **propargv = zmalloc(sizeof(robj *) * (2 + batchsize));
+ propargv[0] = shared.srem;
+ propargv[1] = c->argv[1];
+ unsigned long propindex = 2;
+ addReplySetLen(c,count);
+
+ /* Common iteration vars. */
+ char *str;
+ size_t len;
+ int64_t llele;
+ unsigned long remaining = size-count; /* Elements left after SPOP. */
+
+ /* If we are here, the number of requested elements is less than the
+ * number of elements inside the set. Also we are sure that count < size.
+ * Use two different strategies.
+ *
+ * CASE 2: The number of elements to return is small compared to the
+ * set size. We can just extract random elements and return them to
+ * the set. */
+ if (remaining*SPOP_MOVE_STRATEGY_MUL > count &&
+ set->encoding == OBJ_ENCODING_LISTPACK)
+ {
+ /* Specialized case for listpack. Traverse it only once. */
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ unsigned int index = 0;
+ unsigned char **ps = zmalloc(sizeof(char *) * count);
+ for (unsigned long i = 0; i < count; i++) {
+ p = lpNextRandom(lp, p, &index, count - i, 0);
+ unsigned int len;
+ str = (char *)lpGetValue(p, &len, (long long *)&llele);
+
+ if (str) {
+ addReplyBulkCBuffer(c, str, len);
+ propargv[propindex++] = createStringObject(str, len);
+ } else {
+ addReplyBulkLongLong(c, llele);
+ propargv[propindex++] = createStringObjectFromLongLong(llele);
+ }
+ /* Replicate/AOF this command as an SREM operation */
+ if (propindex == 2 + batchsize) {
+ alsoPropagate(c->db->id, propargv, propindex, PROPAGATE_AOF | PROPAGATE_REPL);
+ for (unsigned long j = 2; j < propindex; j++) {
+ decrRefCount(propargv[j]);
+ }
+ propindex = 2;
+ }
+
+ /* Store pointer for later deletion and move to next. */
+ ps[i] = p;
+ p = lpNext(lp, p);
+ index++;
+ }
+ lp = lpBatchDelete(lp, ps, count);
+ zfree(ps);
+ set->ptr = lp;
+ } else if (remaining*SPOP_MOVE_STRATEGY_MUL > count) {
+ for (unsigned long i = 0; i < count; i++) {
+ propargv[propindex] = setTypePopRandom(set);
+ addReplyBulk(c, propargv[propindex]);
+ propindex++;
+ /* Replicate/AOF this command as an SREM operation */
+ if (propindex == 2 + batchsize) {
+ alsoPropagate(c->db->id, propargv, propindex, PROPAGATE_AOF | PROPAGATE_REPL);
+ for (unsigned long j = 2; j < propindex; j++) {
+ decrRefCount(propargv[j]);
+ }
+ propindex = 2;
+ }
+ }
+ } else {
+ /* CASE 3: The number of elements to return is very big, approaching
+ * the size of the set itself. After some time extracting random elements
+ * from such a set becomes computationally expensive, so we use
+ * a different strategy, we extract random elements that we don't
+ * want to return (the elements that will remain part of the set),
+ * creating a new set as we do this (that will be stored as the original
+ * set). Then we return the elements left in the original set and
+ * release it. */
+ robj *newset = NULL;
+
+ /* Create a new set with just the remaining elements. */
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ /* Specialized case for listpack. Traverse it only once. */
+ newset = createSetListpackObject();
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ unsigned int index = 0;
+ unsigned char **ps = zmalloc(sizeof(char *) * remaining);
+ for (unsigned long i = 0; i < remaining; i++) {
+ p = lpNextRandom(lp, p, &index, remaining - i, 0);
+ unsigned int len;
+ str = (char *)lpGetValue(p, &len, (long long *)&llele);
+ setTypeAddAux(newset, str, len, llele, 0);
+ ps[i] = p;
+ p = lpNext(lp, p);
+ index++;
+ }
+ lp = lpBatchDelete(lp, ps, remaining);
+ zfree(ps);
+ set->ptr = lp;
+ } else {
+ while(remaining--) {
+ int encoding = setTypeRandomElement(set, &str, &len, &llele);
+ if (!newset) {
+ newset = str ? createSetListpackObject() : createIntsetObject();
+ }
+ setTypeAddAux(newset, str, len, llele, encoding == OBJ_ENCODING_HT);
+ setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HT);
+ }
+ }
+
+ /* Transfer the old set to the client. */
+ setTypeIterator *si;
+ si = setTypeInitIterator(set);
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ if (str == NULL) {
+ addReplyBulkLongLong(c,llele);
+ propargv[propindex++] = createStringObjectFromLongLong(llele);
+ } else {
+ addReplyBulkCBuffer(c, str, len);
+ propargv[propindex++] = createStringObject(str, len);
+ }
+ /* Replicate/AOF this command as an SREM operation */
+ if (propindex == 2 + batchsize) {
+ alsoPropagate(c->db->id, propargv, propindex, PROPAGATE_AOF | PROPAGATE_REPL);
+ for (unsigned long i = 2; i < propindex; i++) {
+ decrRefCount(propargv[i]);
+ }
+ propindex = 2;
+ }
+ }
+ setTypeReleaseIterator(si);
+
+ /* Assign the new set as the key value. */
+ dbReplaceValue(c->db,c->argv[1],newset);
+ }
+
+ /* Replicate/AOF the remaining elements as an SREM operation */
+ if (propindex != 2) {
+ alsoPropagate(c->db->id, propargv, propindex, PROPAGATE_AOF | PROPAGATE_REPL);
+ for (unsigned long i = 2; i < propindex; i++) {
+ decrRefCount(propargv[i]);
+ }
+ propindex = 2;
+ }
+ zfree(propargv);
+
+ /* Don't propagate the command itself even if we incremented the
+ * dirty counter. We don't want to propagate an SPOP command since
+ * we propagated the command as a set of SREMs operations using
+ * the alsoPropagate() API. */
+ preventCommandPropagation(c);
+ signalModifiedKey(c,c->db,c->argv[1]);
+}
+
+void spopCommand(client *c) {
+ robj *set, *ele;
+
+ if (c->argc == 3) {
+ spopWithCountCommand(c);
+ return;
+ } else if (c->argc > 3) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Make sure a key with the name inputted exists, and that it's type is
+ * indeed a set */
+ if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.null[c->resp]))
+ == NULL || checkType(c,set,OBJ_SET)) return;
+
+ /* Pop a random element from the set */
+ ele = setTypePopRandom(set);
+
+ notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
+
+ /* Replicate/AOF this command as an SREM operation */
+ rewriteClientCommandVector(c,3,shared.srem,c->argv[1],ele);
+
+ /* Add the element to the reply */
+ addReplyBulk(c, ele);
+ decrRefCount(ele);
+
+ /* Delete the set if it's empty */
+ if (setTypeSize(set) == 0) {
+ dbDelete(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ }
+
+ /* Set has been modified */
+ signalModifiedKey(c,c->db,c->argv[1]);
+ server.dirty++;
+}
+
+/* handle the "SRANDMEMBER key <count>" variant. The normal version of the
+ * command is handled by the srandmemberCommand() function itself. */
+
+/* How many times bigger should be the set compared to the requested size
+ * for us to don't use the "remove elements" strategy? Read later in the
+ * implementation for more info. */
+#define SRANDMEMBER_SUB_STRATEGY_MUL 3
+
+/* If client is trying to ask for a very large number of random elements,
+ * queuing may consume an unlimited amount of memory, so we want to limit
+ * the number of randoms per time. */
+#define SRANDFIELD_RANDOM_SAMPLE_LIMIT 1000
+
+void srandmemberWithCountCommand(client *c) {
+ long l;
+ unsigned long count, size;
+ int uniq = 1;
+ robj *set;
+ char *str;
+ size_t len;
+ int64_t llele;
+
+ dict *d;
+
+ if (getRangeLongFromObjectOrReply(c,c->argv[2],-LONG_MAX,LONG_MAX,&l,NULL) != C_OK) return;
+ if (l >= 0) {
+ count = (unsigned long) l;
+ } else {
+ /* A negative count means: return the same elements multiple times
+ * (i.e. don't remove the extracted element after every extraction). */
+ count = -l;
+ uniq = 0;
+ }
+
+ if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptyarray))
+ == NULL || checkType(c,set,OBJ_SET)) return;
+ size = setTypeSize(set);
+
+ /* If count is zero, serve it ASAP to avoid special cases later. */
+ if (count == 0) {
+ addReply(c,shared.emptyarray);
+ return;
+ }
+
+ /* CASE 1: The count was negative, so the extraction method is just:
+ * "return N random elements" sampling the whole set every time.
+ * This case is trivial and can be served without auxiliary data
+ * structures. This case is the only one that also needs to return the
+ * elements in random order. */
+ if (!uniq || count == 1) {
+ addReplyArrayLen(c,count);
+
+ if (set->encoding == OBJ_ENCODING_LISTPACK && count > 1) {
+ /* Specialized case for listpack, traversing it only once. */
+ unsigned long limit, sample_count;
+ limit = count > SRANDFIELD_RANDOM_SAMPLE_LIMIT ? SRANDFIELD_RANDOM_SAMPLE_LIMIT : count;
+ listpackEntry *entries = zmalloc(limit * sizeof(listpackEntry));
+ while (count) {
+ sample_count = count > limit ? limit : count;
+ count -= sample_count;
+ lpRandomEntries(set->ptr, sample_count, entries);
+ for (unsigned long i = 0; i < sample_count; i++) {
+ if (entries[i].sval)
+ addReplyBulkCBuffer(c, entries[i].sval, entries[i].slen);
+ else
+ addReplyBulkLongLong(c, entries[i].lval);
+ }
+ if (c->flags & CLIENT_CLOSE_ASAP)
+ break;
+ }
+ zfree(entries);
+ return;
+ }
+
+ while(count--) {
+ setTypeRandomElement(set, &str, &len, &llele);
+ if (str == NULL) {
+ addReplyBulkLongLong(c,llele);
+ } else {
+ addReplyBulkCBuffer(c, str, len);
+ }
+ if (c->flags & CLIENT_CLOSE_ASAP)
+ break;
+ }
+ return;
+ }
+
+ /* CASE 2:
+ * The number of requested elements is greater than the number of
+ * elements inside the set: simply return the whole set. */
+ if (count >= size) {
+ setTypeIterator *si;
+ addReplyArrayLen(c,size);
+ si = setTypeInitIterator(set);
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ if (str == NULL) {
+ addReplyBulkLongLong(c,llele);
+ } else {
+ addReplyBulkCBuffer(c, str, len);
+ }
+ size--;
+ }
+ setTypeReleaseIterator(si);
+ serverAssert(size==0);
+ return;
+ }
+
+ /* CASE 2.5 listpack only. Sampling unique elements, in non-random order.
+ * Listpack encoded sets are meant to be relatively small, so
+ * SRANDMEMBER_SUB_STRATEGY_MUL isn't necessary and we rather not make
+ * copies of the entries. Instead, we emit them directly to the output
+ * buffer.
+ *
+ * And it is inefficient to repeatedly pick one random element from a
+ * listpack in CASE 4. So we use this instead. */
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ unsigned int i = 0;
+ addReplyArrayLen(c, count);
+ while (count) {
+ p = lpNextRandom(lp, p, &i, count--, 0);
+ unsigned int len;
+ str = (char *)lpGetValue(p, &len, (long long *)&llele);
+ if (str == NULL) {
+ addReplyBulkLongLong(c, llele);
+ } else {
+ addReplyBulkCBuffer(c, str, len);
+ }
+ p = lpNext(lp, p);
+ i++;
+ }
+ return;
+ }
+
+ /* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
+ d = dictCreate(&sdsReplyDictType);
+
+ /* CASE 3:
+ * The number of elements inside the set is not greater than
+ * SRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements.
+ * In this case we create a set from scratch with all the elements, and
+ * subtract random elements to reach the requested number of elements.
+ *
+ * This is done because if the number of requested elements is just
+ * a bit less than the number of elements in the set, the natural approach
+ * used into CASE 4 is highly inefficient. */
+ if (count*SRANDMEMBER_SUB_STRATEGY_MUL > size) {
+ setTypeIterator *si;
+
+ /* Add all the elements into the temporary dictionary. */
+ si = setTypeInitIterator(set);
+ dictExpand(d, size);
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ int retval = DICT_ERR;
+
+ if (str == NULL) {
+ retval = dictAdd(d,sdsfromlonglong(llele),NULL);
+ } else {
+ retval = dictAdd(d, sdsnewlen(str, len), NULL);
+ }
+ serverAssert(retval == DICT_OK);
+ }
+ setTypeReleaseIterator(si);
+ serverAssert(dictSize(d) == size);
+
+ /* Remove random elements to reach the right count. */
+ while (size > count) {
+ dictEntry *de;
+ de = dictGetFairRandomKey(d);
+ dictUnlink(d,dictGetKey(de));
+ sdsfree(dictGetKey(de));
+ dictFreeUnlinkedEntry(d,de);
+ size--;
+ }
+ }
+
+ /* CASE 4: We have a big set compared to the requested number of elements.
+ * In this case we can simply get random elements from the set and add
+ * to the temporary set, trying to eventually get enough unique elements
+ * to reach the specified count. */
+ else {
+ unsigned long added = 0;
+ sds sdsele;
+
+ dictExpand(d, count);
+ while (added < count) {
+ setTypeRandomElement(set, &str, &len, &llele);
+ if (str == NULL) {
+ sdsele = sdsfromlonglong(llele);
+ } else {
+ sdsele = sdsnewlen(str, len);
+ }
+ /* Try to add the object to the dictionary. If it already exists
+ * free it, otherwise increment the number of objects we have
+ * in the result dictionary. */
+ if (dictAdd(d,sdsele,NULL) == DICT_OK)
+ added++;
+ else
+ sdsfree(sdsele);
+ }
+ }
+
+ /* CASE 3 & 4: send the result to the user. */
+ {
+ dictIterator *di;
+ dictEntry *de;
+
+ addReplyArrayLen(c,count);
+ di = dictGetIterator(d);
+ while((de = dictNext(di)) != NULL)
+ addReplyBulkSds(c,dictGetKey(de));
+ dictReleaseIterator(di);
+ dictRelease(d);
+ }
+}
+
+/* SRANDMEMBER <key> [<count>] */
+void srandmemberCommand(client *c) {
+ robj *set;
+ char *str;
+ size_t len;
+ int64_t llele;
+
+ if (c->argc == 3) {
+ srandmemberWithCountCommand(c);
+ return;
+ } else if (c->argc > 3) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Handle variant without <count> argument. Reply with simple bulk string */
+ if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))
+ == NULL || checkType(c,set,OBJ_SET)) return;
+
+ setTypeRandomElement(set, &str, &len, &llele);
+ if (str == NULL) {
+ addReplyBulkLongLong(c,llele);
+ } else {
+ addReplyBulkCBuffer(c, str, len);
+ }
+}
+
+int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
+ if (setTypeSize(*(robj**)s1) > setTypeSize(*(robj**)s2)) return 1;
+ if (setTypeSize(*(robj**)s1) < setTypeSize(*(robj**)s2)) return -1;
+ return 0;
+}
+
+/* This is used by SDIFF and in this case we can receive NULL that should
+ * be handled as empty sets. */
+int qsortCompareSetsByRevCardinality(const void *s1, const void *s2) {
+ robj *o1 = *(robj**)s1, *o2 = *(robj**)s2;
+ unsigned long first = o1 ? setTypeSize(o1) : 0;
+ unsigned long second = o2 ? setTypeSize(o2) : 0;
+
+ if (first < second) return 1;
+ if (first > second) return -1;
+ return 0;
+}
+
+/* SINTER / SMEMBERS / SINTERSTORE / SINTERCARD
+ *
+ * 'cardinality_only' work for SINTERCARD, only return the cardinality
+ * with minimum processing and memory overheads.
+ *
+ * 'limit' work for SINTERCARD, stop searching after reaching the limit.
+ * Passing a 0 means unlimited.
+ */
+void sinterGenericCommand(client *c, robj **setkeys,
+ unsigned long setnum, robj *dstkey,
+ int cardinality_only, unsigned long limit) {
+ robj **sets = zmalloc(sizeof(robj*)*setnum);
+ setTypeIterator *si;
+ robj *dstset = NULL;
+ char *str;
+ size_t len;
+ int64_t intobj;
+ void *replylen = NULL;
+ unsigned long j, cardinality = 0;
+ int encoding, empty = 0;
+
+ for (j = 0; j < setnum; j++) {
+ robj *setobj = lookupKeyRead(c->db, setkeys[j]);
+ if (!setobj) {
+ /* A NULL is considered an empty set */
+ empty += 1;
+ sets[j] = NULL;
+ continue;
+ }
+ if (checkType(c,setobj,OBJ_SET)) {
+ zfree(sets);
+ return;
+ }
+ sets[j] = setobj;
+ }
+
+ /* Set intersection with an empty set always results in an empty set.
+ * Return ASAP if there is an empty set. */
+ if (empty > 0) {
+ zfree(sets);
+ if (dstkey) {
+ if (dbDelete(c->db,dstkey)) {
+ signalModifiedKey(c,c->db,dstkey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",dstkey,c->db->id);
+ server.dirty++;
+ }
+ addReply(c,shared.czero);
+ } else if (cardinality_only) {
+ addReplyLongLong(c,cardinality);
+ } else {
+ addReply(c,shared.emptyset[c->resp]);
+ }
+ return;
+ }
+
+ /* Sort sets from the smallest to largest, this will improve our
+ * algorithm's performance */
+ qsort(sets,setnum,sizeof(robj*),qsortCompareSetsByCardinality);
+
+ /* The first thing we should output is the total number of elements...
+ * since this is a multi-bulk write, but at this stage we don't know
+ * the intersection set size, so we use a trick, append an empty object
+ * to the output list and save the pointer to later modify it with the
+ * right length */
+ if (dstkey) {
+ /* If we have a target key where to store the resulting set
+ * create this key with an empty set inside */
+ if (sets[0]->encoding == OBJ_ENCODING_INTSET) {
+ /* The first set is an intset, so the result is an intset too. The
+ * elements are inserted in ascending order which is efficient in an
+ * intset. */
+ dstset = createIntsetObject();
+ } else if (sets[0]->encoding == OBJ_ENCODING_LISTPACK) {
+ /* To avoid many reallocs, we estimate that the result is a listpack
+ * of approximately the same size as the first set. Then we shrink
+ * it or possibly convert it to intset in the end. */
+ unsigned char *lp = lpNew(lpBytes(sets[0]->ptr));
+ dstset = createObject(OBJ_SET, lp);
+ dstset->encoding = OBJ_ENCODING_LISTPACK;
+ } else {
+ /* We start off with a listpack, since it's more efficient to append
+ * to than an intset. Later we can convert it to intset or a
+ * hashtable. */
+ dstset = createSetListpackObject();
+ }
+ } else if (!cardinality_only) {
+ replylen = addReplyDeferredLen(c);
+ }
+
+ /* Iterate all the elements of the first (smallest) set, and test
+ * the element against all the other sets, if at least one set does
+ * not include the element it is discarded */
+ int only_integers = 1;
+ si = setTypeInitIterator(sets[0]);
+ while((encoding = setTypeNext(si, &str, &len, &intobj)) != -1) {
+ for (j = 1; j < setnum; j++) {
+ if (sets[j] == sets[0]) continue;
+ if (!setTypeIsMemberAux(sets[j], str, len, intobj,
+ encoding == OBJ_ENCODING_HT))
+ break;
+ }
+
+ /* Only take action when all sets contain the member */
+ if (j == setnum) {
+ if (cardinality_only) {
+ cardinality++;
+
+ /* We stop the searching after reaching the limit. */
+ if (limit && cardinality >= limit)
+ break;
+ } else if (!dstkey) {
+ if (str != NULL)
+ addReplyBulkCBuffer(c, str, len);
+ else
+ addReplyBulkLongLong(c,intobj);
+ cardinality++;
+ } else {
+ if (str && only_integers) {
+ /* It may be an integer although we got it as a string. */
+ if (encoding == OBJ_ENCODING_HT &&
+ string2ll(str, len, (long long *)&intobj))
+ {
+ if (dstset->encoding == OBJ_ENCODING_LISTPACK ||
+ dstset->encoding == OBJ_ENCODING_INTSET)
+ {
+ /* Adding it as an integer is more efficient. */
+ str = NULL;
+ }
+ } else {
+ /* It's not an integer */
+ only_integers = 0;
+ }
+ }
+ setTypeAddAux(dstset, str, len, intobj, encoding == OBJ_ENCODING_HT);
+ }
+ }
+ }
+ setTypeReleaseIterator(si);
+
+ if (cardinality_only) {
+ addReplyLongLong(c,cardinality);
+ } else if (dstkey) {
+ /* Store the resulting set into the target, if the intersection
+ * is not an empty set. */
+ if (setTypeSize(dstset) > 0) {
+ if (only_integers) maybeConvertToIntset(dstset);
+ if (dstset->encoding == OBJ_ENCODING_LISTPACK) {
+ /* We allocated too much memory when we created it to avoid
+ * frequent reallocs. Therefore, we shrink it now. */
+ dstset->ptr = lpShrinkToFit(dstset->ptr);
+ }
+ setKey(c,c->db,dstkey,dstset,0);
+ addReplyLongLong(c,setTypeSize(dstset));
+ notifyKeyspaceEvent(NOTIFY_SET,"sinterstore",
+ dstkey,c->db->id);
+ server.dirty++;
+ } else {
+ addReply(c,shared.czero);
+ if (dbDelete(c->db,dstkey)) {
+ server.dirty++;
+ signalModifiedKey(c,c->db,dstkey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",dstkey,c->db->id);
+ }
+ }
+ decrRefCount(dstset);
+ } else {
+ setDeferredSetLen(c,replylen,cardinality);
+ }
+ zfree(sets);
+}
+
+/* SINTER key [key ...] */
+void sinterCommand(client *c) {
+ sinterGenericCommand(c, c->argv+1, c->argc-1, NULL, 0, 0);
+}
+
+/* SINTERCARD numkeys key [key ...] [LIMIT limit] */
+void sinterCardCommand(client *c) {
+ long j;
+ long numkeys = 0; /* Number of keys. */
+ long limit = 0; /* 0 means not limit. */
+
+ if (getRangeLongFromObjectOrReply(c, c->argv[1], 1, LONG_MAX,
+ &numkeys, "numkeys should be greater than 0") != C_OK)
+ return;
+ if (numkeys > (c->argc - 2)) {
+ addReplyError(c, "Number of keys can't be greater than number of args");
+ return;
+ }
+
+ for (j = 2 + numkeys; j < c->argc; j++) {
+ char *opt = c->argv[j]->ptr;
+ int moreargs = (c->argc - 1) - j;
+
+ if (!strcasecmp(opt, "LIMIT") && moreargs) {
+ j++;
+ if (getPositiveLongFromObjectOrReply(c, c->argv[j], &limit,
+ "LIMIT can't be negative") != C_OK)
+ return;
+ } else {
+ addReplyErrorObject(c, shared.syntaxerr);
+ return;
+ }
+ }
+
+ sinterGenericCommand(c, c->argv+2, numkeys, NULL, 1, limit);
+}
+
+/* SINTERSTORE destination key [key ...] */
+void sinterstoreCommand(client *c) {
+ sinterGenericCommand(c, c->argv+2, c->argc-2, c->argv[1], 0, 0);
+}
+
+void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
+ robj *dstkey, int op) {
+ robj **sets = zmalloc(sizeof(robj*)*setnum);
+ setTypeIterator *si;
+ robj *dstset = NULL;
+ char *str;
+ size_t len;
+ int64_t llval;
+ int encoding;
+ int j, cardinality = 0;
+ int diff_algo = 1;
+ int sameset = 0;
+
+ for (j = 0; j < setnum; j++) {
+ robj *setobj = lookupKeyRead(c->db, setkeys[j]);
+ if (!setobj) {
+ sets[j] = NULL;
+ continue;
+ }
+ if (checkType(c,setobj,OBJ_SET)) {
+ zfree(sets);
+ return;
+ }
+ sets[j] = setobj;
+ if (j > 0 && sets[0] == sets[j]) {
+ sameset = 1;
+ }
+ }
+
+ /* Select what DIFF algorithm to use.
+ *
+ * Algorithm 1 is O(N*M) where N is the size of the element first set
+ * and M the total number of sets.
+ *
+ * Algorithm 2 is O(N) where N is the total number of elements in all
+ * the sets.
+ *
+ * We compute what is the best bet with the current input here. */
+ if (op == SET_OP_DIFF && sets[0] && !sameset) {
+ long long algo_one_work = 0, algo_two_work = 0;
+
+ for (j = 0; j < setnum; j++) {
+ if (sets[j] == NULL) continue;
+
+ algo_one_work += setTypeSize(sets[0]);
+ algo_two_work += setTypeSize(sets[j]);
+ }
+
+ /* Algorithm 1 has better constant times and performs less operations
+ * if there are elements in common. Give it some advantage. */
+ algo_one_work /= 2;
+ diff_algo = (algo_one_work <= algo_two_work) ? 1 : 2;
+
+ if (diff_algo == 1 && setnum > 1) {
+ /* With algorithm 1 it is better to order the sets to subtract
+ * by decreasing size, so that we are more likely to find
+ * duplicated elements ASAP. */
+ qsort(sets+1,setnum-1,sizeof(robj*),
+ qsortCompareSetsByRevCardinality);
+ }
+ }
+
+ /* We need a temp set object to store our union/diff. If the dstkey
+ * is not NULL (that is, we are inside an SUNIONSTORE/SDIFFSTORE operation) then
+ * this set object will be the resulting object to set into the target key*/
+ dstset = createIntsetObject();
+
+ if (op == SET_OP_UNION) {
+ /* Union is trivial, just add every element of every set to the
+ * temporary set. */
+ for (j = 0; j < setnum; j++) {
+ if (!sets[j]) continue; /* non existing keys are like empty sets */
+
+ si = setTypeInitIterator(sets[j]);
+ while ((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
+ cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT);
+ }
+ setTypeReleaseIterator(si);
+ }
+ } else if (op == SET_OP_DIFF && sameset) {
+ /* At least one of the sets is the same one (same key) as the first one, result must be empty. */
+ } else if (op == SET_OP_DIFF && sets[0] && diff_algo == 1) {
+ /* DIFF Algorithm 1:
+ *
+ * We perform the diff by iterating all the elements of the first set,
+ * and only adding it to the target set if the element does not exist
+ * into all the other sets.
+ *
+ * This way we perform at max N*M operations, where N is the size of
+ * the first set, and M the number of sets. */
+ si = setTypeInitIterator(sets[0]);
+ while ((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
+ for (j = 1; j < setnum; j++) {
+ if (!sets[j]) continue; /* no key is an empty set. */
+ if (sets[j] == sets[0]) break; /* same set! */
+ if (setTypeIsMemberAux(sets[j], str, len, llval,
+ encoding == OBJ_ENCODING_HT))
+ break;
+ }
+ if (j == setnum) {
+ /* There is no other set with this element. Add it. */
+ cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT);
+ }
+ }
+ setTypeReleaseIterator(si);
+ } else if (op == SET_OP_DIFF && sets[0] && diff_algo == 2) {
+ /* DIFF Algorithm 2:
+ *
+ * Add all the elements of the first set to the auxiliary set.
+ * Then remove all the elements of all the next sets from it.
+ *
+ * This is O(N) where N is the sum of all the elements in every
+ * set. */
+ for (j = 0; j < setnum; j++) {
+ if (!sets[j]) continue; /* non existing keys are like empty sets */
+
+ si = setTypeInitIterator(sets[j]);
+ while((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
+ if (j == 0) {
+ cardinality += setTypeAddAux(dstset, str, len, llval,
+ encoding == OBJ_ENCODING_HT);
+ } else {
+ cardinality -= setTypeRemoveAux(dstset, str, len, llval,
+ encoding == OBJ_ENCODING_HT);
+ }
+ }
+ setTypeReleaseIterator(si);
+
+ /* Exit if result set is empty as any additional removal
+ * of elements will have no effect. */
+ if (cardinality == 0) break;
+ }
+ }
+
+ /* Output the content of the resulting set, if not in STORE mode */
+ if (!dstkey) {
+ addReplySetLen(c,cardinality);
+ si = setTypeInitIterator(dstset);
+ while (setTypeNext(si, &str, &len, &llval) != -1) {
+ if (str)
+ addReplyBulkCBuffer(c, str, len);
+ else
+ addReplyBulkLongLong(c, llval);
+ }
+ setTypeReleaseIterator(si);
+ server.lazyfree_lazy_server_del ? freeObjAsync(NULL, dstset, -1) :
+ decrRefCount(dstset);
+ } else {
+ /* If we have a target key where to store the resulting set
+ * create this key with the result set inside */
+ if (setTypeSize(dstset) > 0) {
+ setKey(c,c->db,dstkey,dstset,0);
+ addReplyLongLong(c,setTypeSize(dstset));
+ notifyKeyspaceEvent(NOTIFY_SET,
+ op == SET_OP_UNION ? "sunionstore" : "sdiffstore",
+ dstkey,c->db->id);
+ server.dirty++;
+ } else {
+ addReply(c,shared.czero);
+ if (dbDelete(c->db,dstkey)) {
+ server.dirty++;
+ signalModifiedKey(c,c->db,dstkey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",dstkey,c->db->id);
+ }
+ }
+ decrRefCount(dstset);
+ }
+ zfree(sets);
+}
+
+/* SUNION key [key ...] */
+void sunionCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,SET_OP_UNION);
+}
+
+/* SUNIONSTORE destination key [key ...] */
+void sunionstoreCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],SET_OP_UNION);
+}
+
+/* SDIFF key [key ...] */
+void sdiffCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,SET_OP_DIFF);
+}
+
+/* SDIFFSTORE destination key [key ...] */
+void sdiffstoreCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],SET_OP_DIFF);
+}
+
+void sscanCommand(client *c) {
+ robj *set;
+ unsigned long cursor;
+
+ if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
+ if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
+ checkType(c,set,OBJ_SET)) return;
+ scanGenericCommand(c,set,cursor);
+}
diff --git a/src/t_stream.c b/src/t_stream.c
new file mode 100644
index 0000000..5fcb631
--- /dev/null
+++ b/src/t_stream.c
@@ -0,0 +1,4038 @@
+/*
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "endianconv.h"
+#include "stream.h"
+
+/* Every stream item inside the listpack, has a flags field that is used to
+ * mark the entry as deleted, or having the same field as the "master"
+ * entry at the start of the listpack> */
+#define STREAM_ITEM_FLAG_NONE 0 /* No special flags. */
+#define STREAM_ITEM_FLAG_DELETED (1<<0) /* Entry is deleted. Skip it. */
+#define STREAM_ITEM_FLAG_SAMEFIELDS (1<<1) /* Same fields as master entry. */
+
+/* For stream commands that require multiple IDs
+ * when the number of IDs is less than 'STREAMID_STATIC_VECTOR_LEN',
+ * avoid malloc allocation.*/
+#define STREAMID_STATIC_VECTOR_LEN 8
+
+/* Max pre-allocation for listpack. This is done to avoid abuse of a user
+ * setting stream_node_max_bytes to a huge number. */
+#define STREAM_LISTPACK_MAX_PRE_ALLOCATE 4096
+
+/* Don't let listpacks grow too big, even if the user config allows it.
+ * doing so can lead to an overflow (trying to store more than 32bit length
+ * into the listpack header), or actually an assertion since lpInsert
+ * will return NULL. */
+#define STREAM_LISTPACK_MAX_SIZE (1<<30)
+
+void streamFreeCG(streamCG *cg);
+void streamFreeNACK(streamNACK *na);
+size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer);
+int streamParseStrictIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int *seq_given);
+int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq);
+
+/* -----------------------------------------------------------------------
+ * Low level stream encoding: a radix tree of listpacks.
+ * ----------------------------------------------------------------------- */
+
+/* Create a new stream data structure. */
+stream *streamNew(void) {
+ stream *s = zmalloc(sizeof(*s));
+ s->rax = raxNew();
+ s->length = 0;
+ s->first_id.ms = 0;
+ s->first_id.seq = 0;
+ s->last_id.ms = 0;
+ s->last_id.seq = 0;
+ s->max_deleted_entry_id.seq = 0;
+ s->max_deleted_entry_id.ms = 0;
+ s->entries_added = 0;
+ s->cgroups = NULL; /* Created on demand to save memory when not used. */
+ return s;
+}
+
+/* Free a stream, including the listpacks stored inside the radix tree. */
+void freeStream(stream *s) {
+ raxFreeWithCallback(s->rax,(void(*)(void*))lpFree);
+ if (s->cgroups)
+ raxFreeWithCallback(s->cgroups,(void(*)(void*))streamFreeCG);
+ zfree(s);
+}
+
+/* Return the length of a stream. */
+unsigned long streamLength(const robj *subject) {
+ stream *s = subject->ptr;
+ return s->length;
+}
+
+/* Set 'id' to be its successor stream ID.
+ * If 'id' is the maximal possible id, it is wrapped around to 0-0 and a
+ * C_ERR is returned. */
+int streamIncrID(streamID *id) {
+ int ret = C_OK;
+ if (id->seq == UINT64_MAX) {
+ if (id->ms == UINT64_MAX) {
+ /* Special case where 'id' is the last possible streamID... */
+ id->ms = id->seq = 0;
+ ret = C_ERR;
+ } else {
+ id->ms++;
+ id->seq = 0;
+ }
+ } else {
+ id->seq++;
+ }
+ return ret;
+}
+
+/* Set 'id' to be its predecessor stream ID.
+ * If 'id' is the minimal possible id, it remains 0-0 and a C_ERR is
+ * returned. */
+int streamDecrID(streamID *id) {
+ int ret = C_OK;
+ if (id->seq == 0) {
+ if (id->ms == 0) {
+ /* Special case where 'id' is the first possible streamID... */
+ id->ms = id->seq = UINT64_MAX;
+ ret = C_ERR;
+ } else {
+ id->ms--;
+ id->seq = UINT64_MAX;
+ }
+ } else {
+ id->seq--;
+ }
+ return ret;
+}
+
+/* Generate the next stream item ID given the previous one. If the current
+ * milliseconds Unix time is greater than the previous one, just use this
+ * as time part and start with sequence part of zero. Otherwise we use the
+ * previous time (and never go backward) and increment the sequence. */
+void streamNextID(streamID *last_id, streamID *new_id) {
+ uint64_t ms = commandTimeSnapshot();
+ if (ms > last_id->ms) {
+ new_id->ms = ms;
+ new_id->seq = 0;
+ } else {
+ *new_id = *last_id;
+ streamIncrID(new_id);
+ }
+}
+
+/* This is a helper function for the COPY command.
+ * Duplicate a Stream object, with the guarantee that the returned object
+ * has the same encoding as the original one.
+ *
+ * The resulting object always has refcount set to 1 */
+robj *streamDup(robj *o) {
+ robj *sobj;
+
+ serverAssert(o->type == OBJ_STREAM);
+
+ switch (o->encoding) {
+ case OBJ_ENCODING_STREAM:
+ sobj = createStreamObject();
+ break;
+ default:
+ serverPanic("Wrong encoding.");
+ break;
+ }
+
+ stream *s;
+ stream *new_s;
+ s = o->ptr;
+ new_s = sobj->ptr;
+
+ raxIterator ri;
+ uint64_t rax_key[2];
+ raxStart(&ri, s->rax);
+ raxSeek(&ri, "^", NULL, 0);
+ size_t lp_bytes = 0; /* Total bytes in the listpack. */
+ unsigned char *lp = NULL; /* listpack pointer. */
+ /* Get a reference to the listpack node. */
+ while (raxNext(&ri)) {
+ lp = ri.data;
+ lp_bytes = lpBytes(lp);
+ unsigned char *new_lp = zmalloc(lp_bytes);
+ memcpy(new_lp, lp, lp_bytes);
+ memcpy(rax_key, ri.key, sizeof(rax_key));
+ raxInsert(new_s->rax, (unsigned char *)&rax_key, sizeof(rax_key),
+ new_lp, NULL);
+ }
+ new_s->length = s->length;
+ new_s->first_id = s->first_id;
+ new_s->last_id = s->last_id;
+ new_s->max_deleted_entry_id = s->max_deleted_entry_id;
+ new_s->entries_added = s->entries_added;
+ raxStop(&ri);
+
+ if (s->cgroups == NULL) return sobj;
+
+ /* Consumer Groups */
+ raxIterator ri_cgroups;
+ raxStart(&ri_cgroups, s->cgroups);
+ raxSeek(&ri_cgroups, "^", NULL, 0);
+ while (raxNext(&ri_cgroups)) {
+ streamCG *cg = ri_cgroups.data;
+ streamCG *new_cg = streamCreateCG(new_s, (char *)ri_cgroups.key,
+ ri_cgroups.key_len, &cg->last_id,
+ cg->entries_read);
+
+ serverAssert(new_cg != NULL);
+
+ /* Consumer Group PEL */
+ raxIterator ri_cg_pel;
+ raxStart(&ri_cg_pel,cg->pel);
+ raxSeek(&ri_cg_pel,"^",NULL,0);
+ while(raxNext(&ri_cg_pel)){
+ streamNACK *nack = ri_cg_pel.data;
+ streamNACK *new_nack = streamCreateNACK(NULL);
+ new_nack->delivery_time = nack->delivery_time;
+ new_nack->delivery_count = nack->delivery_count;
+ raxInsert(new_cg->pel, ri_cg_pel.key, sizeof(streamID), new_nack, NULL);
+ }
+ raxStop(&ri_cg_pel);
+
+ /* Consumers */
+ raxIterator ri_consumers;
+ raxStart(&ri_consumers, cg->consumers);
+ raxSeek(&ri_consumers, "^", NULL, 0);
+ while (raxNext(&ri_consumers)) {
+ streamConsumer *consumer = ri_consumers.data;
+ streamConsumer *new_consumer;
+ new_consumer = zmalloc(sizeof(*new_consumer));
+ new_consumer->name = sdsdup(consumer->name);
+ new_consumer->pel = raxNew();
+ raxInsert(new_cg->consumers,(unsigned char *)new_consumer->name,
+ sdslen(new_consumer->name), new_consumer, NULL);
+ new_consumer->seen_time = consumer->seen_time;
+ new_consumer->active_time = consumer->active_time;
+
+ /* Consumer PEL */
+ raxIterator ri_cpel;
+ raxStart(&ri_cpel, consumer->pel);
+ raxSeek(&ri_cpel, "^", NULL, 0);
+ while (raxNext(&ri_cpel)) {
+ streamNACK *new_nack = raxFind(new_cg->pel,ri_cpel.key,sizeof(streamID));
+
+ serverAssert(new_nack != raxNotFound);
+
+ new_nack->consumer = new_consumer;
+ raxInsert(new_consumer->pel,ri_cpel.key,sizeof(streamID),new_nack,NULL);
+ }
+ raxStop(&ri_cpel);
+ }
+ raxStop(&ri_consumers);
+ }
+ raxStop(&ri_cgroups);
+ return sobj;
+}
+
+/* This is a wrapper function for lpGet() to directly get an integer value
+ * from the listpack (that may store numbers as a string), converting
+ * the string if needed.
+ * The 'valid" argument is an optional output parameter to get an indication
+ * if the record was valid, when this parameter is NULL, the function will
+ * fail with an assertion. */
+static inline int64_t lpGetIntegerIfValid(unsigned char *ele, int *valid) {
+ int64_t v;
+ unsigned char *e = lpGet(ele,&v,NULL);
+ if (e == NULL) {
+ if (valid)
+ *valid = 1;
+ return v;
+ }
+ /* The following code path should never be used for how listpacks work:
+ * they should always be able to store an int64_t value in integer
+ * encoded form. However the implementation may change. */
+ long long ll;
+ int ret = string2ll((char*)e,v,&ll);
+ if (valid)
+ *valid = ret;
+ else
+ serverAssert(ret != 0);
+ v = ll;
+ return v;
+}
+
+#define lpGetInteger(ele) lpGetIntegerIfValid(ele, NULL)
+
+/* Get an edge streamID of a given listpack.
+ * 'master_id' is an input param, used to build the 'edge_id' output param */
+int lpGetEdgeStreamID(unsigned char *lp, int first, streamID *master_id, streamID *edge_id)
+{
+ if (lp == NULL)
+ return 0;
+
+ unsigned char *lp_ele;
+
+ /* We need to seek either the first or the last entry depending
+ * on the direction of the iteration. */
+ if (first) {
+ /* Get the master fields count. */
+ lp_ele = lpFirst(lp); /* Seek items count */
+ lp_ele = lpNext(lp, lp_ele); /* Seek deleted count. */
+ lp_ele = lpNext(lp, lp_ele); /* Seek num fields. */
+ int64_t master_fields_count = lpGetInteger(lp_ele);
+ lp_ele = lpNext(lp, lp_ele); /* Seek first field. */
+
+ /* If we are iterating in normal order, skip the master fields
+ * to seek the first actual entry. */
+ for (int64_t i = 0; i < master_fields_count; i++)
+ lp_ele = lpNext(lp, lp_ele);
+
+ /* If we are going forward, skip the previous entry's
+ * lp-count field (or in case of the master entry, the zero
+ * term field) */
+ lp_ele = lpNext(lp, lp_ele);
+ if (lp_ele == NULL)
+ return 0;
+ } else {
+ /* If we are iterating in reverse direction, just seek the
+ * last part of the last entry in the listpack (that is, the
+ * fields count). */
+ lp_ele = lpLast(lp);
+
+ /* If we are going backward, read the number of elements this
+ * entry is composed of, and jump backward N times to seek
+ * its start. */
+ int64_t lp_count = lpGetInteger(lp_ele);
+ if (lp_count == 0) /* We reached the master entry. */
+ return 0;
+
+ while (lp_count--)
+ lp_ele = lpPrev(lp, lp_ele);
+ }
+
+ lp_ele = lpNext(lp, lp_ele); /* Seek ID (lp_ele currently points to 'flags'). */
+
+ /* Get the ID: it is encoded as difference between the master
+ * ID and this entry ID. */
+ streamID id = *master_id;
+ id.ms += lpGetInteger(lp_ele);
+ lp_ele = lpNext(lp, lp_ele);
+ id.seq += lpGetInteger(lp_ele);
+ *edge_id = id;
+ return 1;
+}
+
+/* Debugging function to log the full content of a listpack. Useful
+ * for development and debugging. */
+void streamLogListpackContent(unsigned char *lp) {
+ unsigned char *p = lpFirst(lp);
+ while(p) {
+ unsigned char buf[LP_INTBUF_SIZE];
+ int64_t v;
+ unsigned char *ele = lpGet(p,&v,buf);
+ serverLog(LL_WARNING,"- [%d] '%.*s'", (int)v, (int)v, ele);
+ p = lpNext(lp,p);
+ }
+}
+
+/* Convert the specified stream entry ID as a 128 bit big endian number, so
+ * that the IDs can be sorted lexicographically. */
+void streamEncodeID(void *buf, streamID *id) {
+ uint64_t e[2];
+ e[0] = htonu64(id->ms);
+ e[1] = htonu64(id->seq);
+ memcpy(buf,e,sizeof(e));
+}
+
+/* This is the reverse of streamEncodeID(): the decoded ID will be stored
+ * in the 'id' structure passed by reference. The buffer 'buf' must point
+ * to a 128 bit big-endian encoded ID. */
+void streamDecodeID(void *buf, streamID *id) {
+ uint64_t e[2];
+ memcpy(e,buf,sizeof(e));
+ id->ms = ntohu64(e[0]);
+ id->seq = ntohu64(e[1]);
+}
+
+/* Compare two stream IDs. Return -1 if a < b, 0 if a == b, 1 if a > b. */
+int streamCompareID(streamID *a, streamID *b) {
+ if (a->ms > b->ms) return 1;
+ else if (a->ms < b->ms) return -1;
+ /* The ms part is the same. Check the sequence part. */
+ else if (a->seq > b->seq) return 1;
+ else if (a->seq < b->seq) return -1;
+ /* Everything is the same: IDs are equal. */
+ return 0;
+}
+
+/* Retrieves the ID of the stream edge entry. An edge is either the first or
+ * the last ID in the stream, and may be a tombstone. To filter out tombstones,
+ * set the'skip_tombstones' argument to 1. */
+void streamGetEdgeID(stream *s, int first, int skip_tombstones, streamID *edge_id)
+{
+ streamIterator si;
+ int64_t numfields;
+ streamIteratorStart(&si,s,NULL,NULL,!first);
+ si.skip_tombstones = skip_tombstones;
+ int found = streamIteratorGetID(&si,edge_id,&numfields);
+ if (!found) {
+ streamID min_id = {0, 0}, max_id = {UINT64_MAX, UINT64_MAX};
+ *edge_id = first ? max_id : min_id;
+ }
+ streamIteratorStop(&si);
+}
+
+/* Adds a new item into the stream 's' having the specified number of
+ * field-value pairs as specified in 'numfields' and stored into 'argv'.
+ * Returns the new entry ID populating the 'added_id' structure.
+ *
+ * If 'use_id' is not NULL, the ID is not auto-generated by the function,
+ * but instead the passed ID is used to add the new entry. In this case
+ * adding the entry may fail as specified later in this comment.
+ *
+ * When 'use_id' is used alongside with a zero 'seq-given', the sequence
+ * part of the passed ID is ignored and the function will attempt to use an
+ * auto-generated sequence.
+ *
+ * The function returns C_OK if the item was added, this is always true
+ * if the ID was generated by the function. However the function may return
+ * C_ERR in several cases:
+ * 1. If an ID was given via 'use_id', but adding it failed since the
+ * current top ID is greater or equal. errno will be set to EDOM.
+ * 2. If a size of a single element or the sum of the elements is too big to
+ * be stored into the stream. errno will be set to ERANGE. */
+int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_id, streamID *use_id, int seq_given) {
+
+ /* Generate the new entry ID. */
+ streamID id;
+ if (use_id) {
+ if (seq_given) {
+ id = *use_id;
+ } else {
+ /* The automatically generated sequence can be either zero (new
+ * timestamps) or the incremented sequence of the last ID. In the
+ * latter case, we need to prevent an overflow/advancing forward
+ * in time. */
+ if (s->last_id.ms == use_id->ms) {
+ if (s->last_id.seq == UINT64_MAX) {
+ errno = EDOM;
+ return C_ERR;
+ }
+ id = s->last_id;
+ id.seq++;
+ } else {
+ id = *use_id;
+ }
+ }
+ } else {
+ streamNextID(&s->last_id,&id);
+ }
+
+ /* Check that the new ID is greater than the last entry ID
+ * or return an error. Automatically generated IDs might
+ * overflow (and wrap-around) when incrementing the sequence
+ part. */
+ if (streamCompareID(&id,&s->last_id) <= 0) {
+ errno = EDOM;
+ return C_ERR;
+ }
+
+ /* Avoid overflow when trying to add an element to the stream (listpack
+ * can only host up to 32bit length strings, and also a total listpack size
+ * can't be bigger than 32bit length. */
+ size_t totelelen = 0;
+ for (int64_t i = 0; i < numfields*2; i++) {
+ sds ele = argv[i]->ptr;
+ totelelen += sdslen(ele);
+ }
+ if (totelelen > STREAM_LISTPACK_MAX_SIZE) {
+ errno = ERANGE;
+ return C_ERR;
+ }
+
+ /* Add the new entry. */
+ raxIterator ri;
+ raxStart(&ri,s->rax);
+ raxSeek(&ri,"$",NULL,0);
+
+ size_t lp_bytes = 0; /* Total bytes in the tail listpack. */
+ unsigned char *lp = NULL; /* Tail listpack pointer. */
+
+ if (!raxEOF(&ri)) {
+ /* Get a reference to the tail node listpack. */
+ lp = ri.data;
+ lp_bytes = lpBytes(lp);
+ }
+ raxStop(&ri);
+
+ /* We have to add the key into the radix tree in lexicographic order,
+ * to do so we consider the ID as a single 128 bit number written in
+ * big endian, so that the most significant bytes are the first ones. */
+ uint64_t rax_key[2]; /* Key in the radix tree containing the listpack.*/
+ streamID master_id; /* ID of the master entry in the listpack. */
+
+ /* Create a new listpack and radix tree node if needed. Note that when
+ * a new listpack is created, we populate it with a "master entry". This
+ * is just a set of fields that is taken as references in order to compress
+ * the stream entries that we'll add inside the listpack.
+ *
+ * Note that while we use the first added entry fields to create
+ * the master entry, the first added entry is NOT represented in the master
+ * entry, which is a stand alone object. But of course, the first entry
+ * will compress well because it's used as reference.
+ *
+ * The master entry is composed like in the following example:
+ *
+ * +-------+---------+------------+---------+--/--+---------+---------+-+
+ * | count | deleted | num-fields | field_1 | field_2 | ... | field_N |0|
+ * +-------+---------+------------+---------+--/--+---------+---------+-+
+ *
+ * count and deleted just represent respectively the total number of
+ * entries inside the listpack that are valid, and marked as deleted
+ * (deleted flag in the entry flags set). So the total number of items
+ * actually inside the listpack (both deleted and not) is count+deleted.
+ *
+ * The real entries will be encoded with an ID that is just the
+ * millisecond and sequence difference compared to the key stored at
+ * the radix tree node containing the listpack (delta encoding), and
+ * if the fields of the entry are the same as the master entry fields, the
+ * entry flags will specify this fact and the entry fields and number
+ * of fields will be omitted (see later in the code of this function).
+ *
+ * The "0" entry at the end is the same as the 'lp-count' entry in the
+ * regular stream entries (see below), and marks the fact that there are
+ * no more entries, when we scan the stream from right to left. */
+
+ /* First of all, check if we can append to the current macro node or
+ * if we need to switch to the next one. 'lp' will be set to NULL if
+ * the current node is full. */
+ if (lp != NULL) {
+ int new_node = 0;
+ size_t node_max_bytes = server.stream_node_max_bytes;
+ if (node_max_bytes == 0 || node_max_bytes > STREAM_LISTPACK_MAX_SIZE)
+ node_max_bytes = STREAM_LISTPACK_MAX_SIZE;
+ if (lp_bytes + totelelen >= node_max_bytes) {
+ new_node = 1;
+ } else if (server.stream_node_max_entries) {
+ unsigned char *lp_ele = lpFirst(lp);
+ /* Count both live entries and deleted ones. */
+ int64_t count = lpGetInteger(lp_ele) + lpGetInteger(lpNext(lp,lp_ele));
+ if (count >= server.stream_node_max_entries) new_node = 1;
+ }
+
+ if (new_node) {
+ /* Shrink extra pre-allocated memory */
+ lp = lpShrinkToFit(lp);
+ if (ri.data != lp)
+ raxInsert(s->rax,ri.key,ri.key_len,lp,NULL);
+ lp = NULL;
+ }
+ }
+
+ int flags = STREAM_ITEM_FLAG_NONE;
+ if (lp == NULL) {
+ master_id = id;
+ streamEncodeID(rax_key,&id);
+ /* Create the listpack having the master entry ID and fields.
+ * Pre-allocate some bytes when creating listpack to avoid realloc on
+ * every XADD. Since listpack.c uses malloc_size, it'll grow in steps,
+ * and won't realloc on every XADD.
+ * When listpack reaches max number of entries, we'll shrink the
+ * allocation to fit the data. */
+ size_t prealloc = STREAM_LISTPACK_MAX_PRE_ALLOCATE;
+ if (server.stream_node_max_bytes > 0 && server.stream_node_max_bytes < prealloc) {
+ prealloc = server.stream_node_max_bytes;
+ }
+ lp = lpNew(prealloc);
+ lp = lpAppendInteger(lp,1); /* One item, the one we are adding. */
+ lp = lpAppendInteger(lp,0); /* Zero deleted so far. */
+ lp = lpAppendInteger(lp,numfields);
+ for (int64_t i = 0; i < numfields; i++) {
+ sds field = argv[i*2]->ptr;
+ lp = lpAppend(lp,(unsigned char*)field,sdslen(field));
+ }
+ lp = lpAppendInteger(lp,0); /* Master entry zero terminator. */
+ raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL);
+ /* The first entry we insert, has obviously the same fields of the
+ * master entry. */
+ flags |= STREAM_ITEM_FLAG_SAMEFIELDS;
+ } else {
+ serverAssert(ri.key_len == sizeof(rax_key));
+ memcpy(rax_key,ri.key,sizeof(rax_key));
+
+ /* Read the master ID from the radix tree key. */
+ streamDecodeID(rax_key,&master_id);
+ unsigned char *lp_ele = lpFirst(lp);
+
+ /* Update count and skip the deleted fields. */
+ int64_t count = lpGetInteger(lp_ele);
+ lp = lpReplaceInteger(lp,&lp_ele,count+1);
+ lp_ele = lpNext(lp,lp_ele); /* seek deleted. */
+ lp_ele = lpNext(lp,lp_ele); /* seek master entry num fields. */
+
+ /* Check if the entry we are adding, have the same fields
+ * as the master entry. */
+ int64_t master_fields_count = lpGetInteger(lp_ele);
+ lp_ele = lpNext(lp,lp_ele);
+ if (numfields == master_fields_count) {
+ int64_t i;
+ for (i = 0; i < master_fields_count; i++) {
+ sds field = argv[i*2]->ptr;
+ int64_t e_len;
+ unsigned char buf[LP_INTBUF_SIZE];
+ unsigned char *e = lpGet(lp_ele,&e_len,buf);
+ /* Stop if there is a mismatch. */
+ if (sdslen(field) != (size_t)e_len ||
+ memcmp(e,field,e_len) != 0) break;
+ lp_ele = lpNext(lp,lp_ele);
+ }
+ /* All fields are the same! We can compress the field names
+ * setting a single bit in the flags. */
+ if (i == master_fields_count) flags |= STREAM_ITEM_FLAG_SAMEFIELDS;
+ }
+ }
+
+ /* Populate the listpack with the new entry. We use the following
+ * encoding:
+ *
+ * +-----+--------+----------+-------+-------+-/-+-------+-------+--------+
+ * |flags|entry-id|num-fields|field-1|value-1|...|field-N|value-N|lp-count|
+ * +-----+--------+----------+-------+-------+-/-+-------+-------+--------+
+ *
+ * However if the SAMEFIELD flag is set, we have just to populate
+ * the entry with the values, so it becomes:
+ *
+ * +-----+--------+-------+-/-+-------+--------+
+ * |flags|entry-id|value-1|...|value-N|lp-count|
+ * +-----+--------+-------+-/-+-------+--------+
+ *
+ * The entry-id field is actually two separated fields: the ms
+ * and seq difference compared to the master entry.
+ *
+ * The lp-count field is a number that states the number of listpack pieces
+ * that compose the entry, so that it's possible to travel the entry
+ * in reverse order: we can just start from the end of the listpack, read
+ * the entry, and jump back N times to seek the "flags" field to read
+ * the stream full entry. */
+ lp = lpAppendInteger(lp,flags);
+ lp = lpAppendInteger(lp,id.ms - master_id.ms);
+ lp = lpAppendInteger(lp,id.seq - master_id.seq);
+ if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS))
+ lp = lpAppendInteger(lp,numfields);
+ for (int64_t i = 0; i < numfields; i++) {
+ sds field = argv[i*2]->ptr, value = argv[i*2+1]->ptr;
+ if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS))
+ lp = lpAppend(lp,(unsigned char*)field,sdslen(field));
+ lp = lpAppend(lp,(unsigned char*)value,sdslen(value));
+ }
+ /* Compute and store the lp-count field. */
+ int64_t lp_count = numfields;
+ lp_count += 3; /* Add the 3 fixed fields flags + ms-diff + seq-diff. */
+ if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) {
+ /* If the item is not compressed, it also has the fields other than
+ * the values, and an additional num-fields field. */
+ lp_count += numfields+1;
+ }
+ lp = lpAppendInteger(lp,lp_count);
+
+ /* Insert back into the tree in order to update the listpack pointer. */
+ if (ri.data != lp)
+ raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL);
+ s->length++;
+ s->entries_added++;
+ s->last_id = id;
+ if (s->length == 1) s->first_id = id;
+ if (added_id) *added_id = id;
+ return C_OK;
+}
+
+typedef struct {
+ /* XADD options */
+ streamID id; /* User-provided ID, for XADD only. */
+ int id_given; /* Was an ID different than "*" specified? for XADD only. */
+ int seq_given; /* Was an ID different than "ms-*" specified? for XADD only. */
+ int no_mkstream; /* if set to 1 do not create new stream */
+
+ /* XADD + XTRIM common options */
+ int trim_strategy; /* TRIM_STRATEGY_* */
+ int trim_strategy_arg_idx; /* Index of the count in MAXLEN/MINID, for rewriting. */
+ int approx_trim; /* If 1 only delete whole radix tree nodes, so
+ * the trim argument is not applied verbatim. */
+ long long limit; /* Maximum amount of entries to trim. If 0, no limitation
+ * on the amount of trimming work is enforced. */
+ /* TRIM_STRATEGY_MAXLEN options */
+ long long maxlen; /* After trimming, leave stream at this length . */
+ /* TRIM_STRATEGY_MINID options */
+ streamID minid; /* Trim by ID (No stream entries with ID < 'minid' will remain) */
+} streamAddTrimArgs;
+
+#define TRIM_STRATEGY_NONE 0
+#define TRIM_STRATEGY_MAXLEN 1
+#define TRIM_STRATEGY_MINID 2
+
+/* Trim the stream 's' according to args->trim_strategy, and return the
+ * number of elements removed from the stream. The 'approx' option, if non-zero,
+ * specifies that the trimming must be performed in a approximated way in
+ * order to maximize performances. This means that the stream may contain
+ * entries with IDs < 'id' in case of MINID (or more elements than 'maxlen'
+ * in case of MAXLEN), and elements are only removed if we can remove
+ * a *whole* node of the radix tree. The elements are removed from the head
+ * of the stream (older elements).
+ *
+ * The function may return zero if:
+ *
+ * 1) The minimal entry ID of the stream is already < 'id' (MINID); or
+ * 2) The stream is already shorter or equal to the specified max length (MAXLEN); or
+ * 3) The 'approx' option is true and the head node did not have enough elements
+ * to be deleted.
+ *
+ * args->limit is the maximum number of entries to delete. The purpose is to
+ * prevent this function from taking to long.
+ * If 'limit' is 0 then we do not limit the number of deleted entries.
+ * Much like the 'approx', if 'limit' is smaller than the number of entries
+ * that should be trimmed, there is a chance we will still have entries with
+ * IDs < 'id' (or number of elements >= maxlen in case of MAXLEN).
+ */
+int64_t streamTrim(stream *s, streamAddTrimArgs *args) {
+ size_t maxlen = args->maxlen;
+ streamID *id = &args->minid;
+ int approx = args->approx_trim;
+ int64_t limit = args->limit;
+ int trim_strategy = args->trim_strategy;
+
+ if (trim_strategy == TRIM_STRATEGY_NONE)
+ return 0;
+
+ raxIterator ri;
+ raxStart(&ri,s->rax);
+ raxSeek(&ri,"^",NULL,0);
+
+ int64_t deleted = 0;
+ while (raxNext(&ri)) {
+ if (trim_strategy == TRIM_STRATEGY_MAXLEN && s->length <= maxlen)
+ break;
+
+ unsigned char *lp = ri.data, *p = lpFirst(lp);
+ int64_t entries = lpGetInteger(p);
+
+ /* Check if we exceeded the amount of work we could do */
+ if (limit && (deleted + entries) > limit)
+ break;
+
+ /* Check if we can remove the whole node. */
+ int remove_node;
+ streamID master_id = {0}; /* For MINID */
+ if (trim_strategy == TRIM_STRATEGY_MAXLEN) {
+ remove_node = s->length - entries >= maxlen;
+ } else {
+ /* Read the master ID from the radix tree key. */
+ streamDecodeID(ri.key, &master_id);
+
+ /* Read last ID. */
+ streamID last_id = {0,0};
+ lpGetEdgeStreamID(lp, 0, &master_id, &last_id);
+
+ /* We can remove the entire node id its last ID < 'id' */
+ remove_node = streamCompareID(&last_id, id) < 0;
+ }
+
+ if (remove_node) {
+ lpFree(lp);
+ raxRemove(s->rax,ri.key,ri.key_len,NULL);
+ raxSeek(&ri,">=",ri.key,ri.key_len);
+ s->length -= entries;
+ deleted += entries;
+ continue;
+ }
+
+ /* If we cannot remove a whole element, and approx is true,
+ * stop here. */
+ if (approx) break;
+
+ /* Now we have to trim entries from within 'lp' */
+ int64_t deleted_from_lp = 0;
+
+ p = lpNext(lp, p); /* Skip deleted field. */
+ p = lpNext(lp, p); /* Skip num-of-fields in the master entry. */
+
+ /* Skip all the master fields. */
+ int64_t master_fields_count = lpGetInteger(p);
+ p = lpNext(lp,p); /* Skip the first field. */
+ for (int64_t j = 0; j < master_fields_count; j++)
+ p = lpNext(lp,p); /* Skip all master fields. */
+ p = lpNext(lp,p); /* Skip the zero master entry terminator. */
+
+ /* 'p' is now pointing to the first entry inside the listpack.
+ * We have to run entry after entry, marking entries as deleted
+ * if they are already not deleted. */
+ while (p) {
+ /* We keep a copy of p (which point to flags part) in order to
+ * update it after (and if) we actually remove the entry */
+ unsigned char *pcopy = p;
+
+ int64_t flags = lpGetInteger(p);
+ p = lpNext(lp, p); /* Skip flags. */
+ int64_t to_skip;
+
+ int64_t ms_delta = lpGetInteger(p);
+ p = lpNext(lp, p); /* Skip ID ms delta */
+ int64_t seq_delta = lpGetInteger(p);
+ p = lpNext(lp, p); /* Skip ID seq delta */
+
+ streamID currid = {0}; /* For MINID */
+ if (trim_strategy == TRIM_STRATEGY_MINID) {
+ currid.ms = master_id.ms + ms_delta;
+ currid.seq = master_id.seq + seq_delta;
+ }
+
+ int stop;
+ if (trim_strategy == TRIM_STRATEGY_MAXLEN) {
+ stop = s->length <= maxlen;
+ } else {
+ /* Following IDs will definitely be greater because the rax
+ * tree is sorted, no point of continuing. */
+ stop = streamCompareID(&currid, id) >= 0;
+ }
+ if (stop)
+ break;
+
+ if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) {
+ to_skip = master_fields_count;
+ } else {
+ to_skip = lpGetInteger(p); /* Get num-fields. */
+ p = lpNext(lp,p); /* Skip num-fields. */
+ to_skip *= 2; /* Fields and values. */
+ }
+
+ while(to_skip--) p = lpNext(lp,p); /* Skip the whole entry. */
+ p = lpNext(lp,p); /* Skip the final lp-count field. */
+
+ /* Mark the entry as deleted. */
+ if (!(flags & STREAM_ITEM_FLAG_DELETED)) {
+ intptr_t delta = p - lp;
+ flags |= STREAM_ITEM_FLAG_DELETED;
+ lp = lpReplaceInteger(lp, &pcopy, flags);
+ deleted_from_lp++;
+ s->length--;
+ p = lp + delta;
+ }
+ }
+ deleted += deleted_from_lp;
+
+ /* Now we update the entries/deleted counters. */
+ p = lpFirst(lp);
+ lp = lpReplaceInteger(lp,&p,entries-deleted_from_lp);
+ p = lpNext(lp,p); /* Skip deleted field. */
+ int64_t marked_deleted = lpGetInteger(p);
+ lp = lpReplaceInteger(lp,&p,marked_deleted+deleted_from_lp);
+ p = lpNext(lp,p); /* Skip num-of-fields in the master entry. */
+
+ /* Here we should perform garbage collection in case at this point
+ * there are too many entries deleted inside the listpack. */
+ entries -= deleted_from_lp;
+ marked_deleted += deleted_from_lp;
+ if (entries + marked_deleted > 10 && marked_deleted > entries/2) {
+ /* TODO: perform a garbage collection. */
+ }
+
+ /* Update the listpack with the new pointer. */
+ raxInsert(s->rax,ri.key,ri.key_len,lp,NULL);
+
+ break; /* If we are here, there was enough to delete in the current
+ node, so no need to go to the next node. */
+ }
+ raxStop(&ri);
+
+ /* Update the stream's first ID after the trimming. */
+ if (s->length == 0) {
+ s->first_id.ms = 0;
+ s->first_id.seq = 0;
+ } else if (deleted) {
+ streamGetEdgeID(s,1,1,&s->first_id);
+ }
+
+ return deleted;
+}
+
+/* Trims a stream by length. Returns the number of deleted items. */
+int64_t streamTrimByLength(stream *s, long long maxlen, int approx) {
+ streamAddTrimArgs args = {
+ .trim_strategy = TRIM_STRATEGY_MAXLEN,
+ .approx_trim = approx,
+ .limit = approx ? 100 * server.stream_node_max_entries : 0,
+ .maxlen = maxlen
+ };
+ return streamTrim(s, &args);
+}
+
+/* Trims a stream by minimum ID. Returns the number of deleted items. */
+int64_t streamTrimByID(stream *s, streamID minid, int approx) {
+ streamAddTrimArgs args = {
+ .trim_strategy = TRIM_STRATEGY_MINID,
+ .approx_trim = approx,
+ .limit = approx ? 100 * server.stream_node_max_entries : 0,
+ .minid = minid
+ };
+ return streamTrim(s, &args);
+}
+
+/* Parse the arguments of XADD/XTRIM.
+ *
+ * See streamAddTrimArgs for more details about the arguments handled.
+ *
+ * This function returns the position of the ID argument (relevant only to XADD).
+ * On error -1 is returned and a reply is sent. */
+static int streamParseAddOrTrimArgsOrReply(client *c, streamAddTrimArgs *args, int xadd) {
+ /* Initialize arguments to defaults */
+ memset(args, 0, sizeof(*args));
+
+ /* Parse options. */
+ int i = 2; /* This is the first argument position where we could
+ find an option, or the ID. */
+ int limit_given = 0;
+ for (; i < c->argc; i++) {
+ int moreargs = (c->argc-1) - i; /* Number of additional arguments. */
+ char *opt = c->argv[i]->ptr;
+ if (xadd && opt[0] == '*' && opt[1] == '\0') {
+ /* This is just a fast path for the common case of auto-ID
+ * creation. */
+ break;
+ } else if (!strcasecmp(opt,"maxlen") && moreargs) {
+ if (args->trim_strategy != TRIM_STRATEGY_NONE) {
+ addReplyError(c,"syntax error, MAXLEN and MINID options at the same time are not compatible");
+ return -1;
+ }
+ args->approx_trim = 0;
+ char *next = c->argv[i+1]->ptr;
+ /* Check for the form MAXLEN ~ <count>. */
+ if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') {
+ args->approx_trim = 1;
+ i++;
+ } else if (moreargs >= 2 && next[0] == '=' && next[1] == '\0') {
+ i++;
+ }
+ if (getLongLongFromObjectOrReply(c,c->argv[i+1],&args->maxlen,NULL)
+ != C_OK) return -1;
+
+ if (args->maxlen < 0) {
+ addReplyError(c,"The MAXLEN argument must be >= 0.");
+ return -1;
+ }
+ i++;
+ args->trim_strategy = TRIM_STRATEGY_MAXLEN;
+ args->trim_strategy_arg_idx = i;
+ } else if (!strcasecmp(opt,"minid") && moreargs) {
+ if (args->trim_strategy != TRIM_STRATEGY_NONE) {
+ addReplyError(c,"syntax error, MAXLEN and MINID options at the same time are not compatible");
+ return -1;
+ }
+ args->approx_trim = 0;
+ char *next = c->argv[i+1]->ptr;
+ /* Check for the form MINID ~ <id> */
+ if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') {
+ args->approx_trim = 1;
+ i++;
+ } else if (moreargs >= 2 && next[0] == '=' && next[1] == '\0') {
+ i++;
+ }
+
+ if (streamParseStrictIDOrReply(c,c->argv[i+1],&args->minid,0,NULL) != C_OK)
+ return -1;
+
+ i++;
+ args->trim_strategy = TRIM_STRATEGY_MINID;
+ args->trim_strategy_arg_idx = i;
+ } else if (!strcasecmp(opt,"limit") && moreargs) {
+ /* Note about LIMIT: If it was not provided by the caller we set
+ * it to 100*server.stream_node_max_entries, and that's to prevent the
+ * trimming from taking too long, on the expense of not deleting entries
+ * that should be trimmed.
+ * If user wanted exact trimming (i.e. no '~') we never limit the number
+ * of trimmed entries */
+ if (getLongLongFromObjectOrReply(c,c->argv[i+1],&args->limit,NULL) != C_OK)
+ return -1;
+
+ if (args->limit < 0) {
+ addReplyError(c,"The LIMIT argument must be >= 0.");
+ return -1;
+ }
+ limit_given = 1;
+ i++;
+ } else if (xadd && !strcasecmp(opt,"nomkstream")) {
+ args->no_mkstream = 1;
+ } else if (xadd) {
+ /* If we are here is a syntax error or a valid ID. */
+ if (streamParseStrictIDOrReply(c,c->argv[i],&args->id,0,&args->seq_given) != C_OK)
+ return -1;
+ args->id_given = 1;
+ break;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return -1;
+ }
+ }
+
+ if (args->limit && args->trim_strategy == TRIM_STRATEGY_NONE) {
+ addReplyError(c,"syntax error, LIMIT cannot be used without specifying a trimming strategy");
+ return -1;
+ }
+
+ if (!xadd && args->trim_strategy == TRIM_STRATEGY_NONE) {
+ addReplyError(c,"syntax error, XTRIM must be called with a trimming strategy");
+ return -1;
+ }
+
+ if (mustObeyClient(c)) {
+ /* If command came from master or from AOF we must not enforce maxnodes
+ * (The maxlen/minid argument was re-written to make sure there's no
+ * inconsistency). */
+ args->limit = 0;
+ } else {
+ /* We need to set the limit (only if we got '~') */
+ if (limit_given) {
+ if (!args->approx_trim) {
+ /* LIMIT was provided without ~ */
+ addReplyError(c,"syntax error, LIMIT cannot be used without the special ~ option");
+ return -1;
+ }
+ } else {
+ /* User didn't provide LIMIT, we must set it. */
+ if (args->approx_trim) {
+ /* In order to prevent from trimming to do too much work and
+ * cause latency spikes we limit the amount of work it can do.
+ * We have to cap args->limit from both sides in case
+ * stream_node_max_entries is 0 or too big (could cause overflow)
+ */
+ args->limit = 100 * server.stream_node_max_entries; /* Maximum 100 rax nodes. */
+ if (args->limit <= 0) args->limit = 10000;
+ if (args->limit > 1000000) args->limit = 1000000;
+ } else {
+ /* No LIMIT for exact trimming */
+ args->limit = 0;
+ }
+ }
+ }
+
+ return i;
+}
+
+/* Initialize the stream iterator, so that we can call iterating functions
+ * to get the next items. This requires a corresponding streamIteratorStop()
+ * at the end. The 'rev' parameter controls the direction. If it's zero the
+ * iteration is from the start to the end element (inclusive), otherwise
+ * if rev is non-zero, the iteration is reversed.
+ *
+ * Once the iterator is initialized, we iterate like this:
+ *
+ * streamIterator myiterator;
+ * streamIteratorStart(&myiterator,...);
+ * int64_t numfields;
+ * while(streamIteratorGetID(&myiterator,&ID,&numfields)) {
+ * while(numfields--) {
+ * unsigned char *key, *value;
+ * size_t key_len, value_len;
+ * streamIteratorGetField(&myiterator,&key,&value,&key_len,&value_len);
+ *
+ * ... do what you want with key and value ...
+ * }
+ * }
+ * streamIteratorStop(&myiterator); */
+void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev) {
+ /* Initialize the iterator and translates the iteration start/stop
+ * elements into a 128 big big-endian number. */
+ if (start) {
+ streamEncodeID(si->start_key,start);
+ } else {
+ si->start_key[0] = 0;
+ si->start_key[1] = 0;
+ }
+
+ if (end) {
+ streamEncodeID(si->end_key,end);
+ } else {
+ si->end_key[0] = UINT64_MAX;
+ si->end_key[1] = UINT64_MAX;
+ }
+
+ /* Seek the correct node in the radix tree. */
+ raxStart(&si->ri,s->rax);
+ if (!rev) {
+ if (start && (start->ms || start->seq)) {
+ raxSeek(&si->ri,"<=",(unsigned char*)si->start_key,
+ sizeof(si->start_key));
+ if (raxEOF(&si->ri)) raxSeek(&si->ri,"^",NULL,0);
+ } else {
+ raxSeek(&si->ri,"^",NULL,0);
+ }
+ } else {
+ if (end && (end->ms || end->seq)) {
+ raxSeek(&si->ri,"<=",(unsigned char*)si->end_key,
+ sizeof(si->end_key));
+ if (raxEOF(&si->ri)) raxSeek(&si->ri,"$",NULL,0);
+ } else {
+ raxSeek(&si->ri,"$",NULL,0);
+ }
+ }
+ si->stream = s;
+ si->lp = NULL; /* There is no current listpack right now. */
+ si->lp_ele = NULL; /* Current listpack cursor. */
+ si->rev = rev; /* Direction, if non-zero reversed, from end to start. */
+ si->skip_tombstones = 1; /* By default tombstones aren't emitted. */
+}
+
+/* Return 1 and store the current item ID at 'id' if there are still
+ * elements within the iteration range, otherwise return 0 in order to
+ * signal the iteration terminated. */
+int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
+ while(1) { /* Will stop when element > stop_key or end of radix tree. */
+ /* If the current listpack is set to NULL, this is the start of the
+ * iteration or the previous listpack was completely iterated.
+ * Go to the next node. */
+ if (si->lp == NULL || si->lp_ele == NULL) {
+ if (!si->rev && !raxNext(&si->ri)) return 0;
+ else if (si->rev && !raxPrev(&si->ri)) return 0;
+ serverAssert(si->ri.key_len == sizeof(streamID));
+ /* Get the master ID. */
+ streamDecodeID(si->ri.key,&si->master_id);
+ /* Get the master fields count. */
+ si->lp = si->ri.data;
+ si->lp_ele = lpFirst(si->lp); /* Seek items count */
+ si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek deleted count. */
+ si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek num fields. */
+ si->master_fields_count = lpGetInteger(si->lp_ele);
+ si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek first field. */
+ si->master_fields_start = si->lp_ele;
+ /* We are now pointing to the first field of the master entry.
+ * We need to seek either the first or the last entry depending
+ * on the direction of the iteration. */
+ if (!si->rev) {
+ /* If we are iterating in normal order, skip the master fields
+ * to seek the first actual entry. */
+ for (uint64_t i = 0; i < si->master_fields_count; i++)
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ } else {
+ /* If we are iterating in reverse direction, just seek the
+ * last part of the last entry in the listpack (that is, the
+ * fields count). */
+ si->lp_ele = lpLast(si->lp);
+ }
+ } else if (si->rev) {
+ /* If we are iterating in the reverse order, and this is not
+ * the first entry emitted for this listpack, then we already
+ * emitted the current entry, and have to go back to the previous
+ * one. */
+ int64_t lp_count = lpGetInteger(si->lp_ele);
+ while(lp_count--) si->lp_ele = lpPrev(si->lp,si->lp_ele);
+ /* Seek lp-count of prev entry. */
+ si->lp_ele = lpPrev(si->lp,si->lp_ele);
+ }
+
+ /* For every radix tree node, iterate the corresponding listpack,
+ * returning elements when they are within range. */
+ while(1) {
+ if (!si->rev) {
+ /* If we are going forward, skip the previous entry
+ * lp-count field (or in case of the master entry, the zero
+ * term field) */
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ if (si->lp_ele == NULL) break;
+ } else {
+ /* If we are going backward, read the number of elements this
+ * entry is composed of, and jump backward N times to seek
+ * its start. */
+ int64_t lp_count = lpGetInteger(si->lp_ele);
+ if (lp_count == 0) { /* We reached the master entry. */
+ si->lp = NULL;
+ si->lp_ele = NULL;
+ break;
+ }
+ while(lp_count--) si->lp_ele = lpPrev(si->lp,si->lp_ele);
+ }
+
+ /* Get the flags entry. */
+ si->lp_flags = si->lp_ele;
+ int64_t flags = lpGetInteger(si->lp_ele);
+ si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek ID. */
+
+ /* Get the ID: it is encoded as difference between the master
+ * ID and this entry ID. */
+ *id = si->master_id;
+ id->ms += lpGetInteger(si->lp_ele);
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ id->seq += lpGetInteger(si->lp_ele);
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ unsigned char buf[sizeof(streamID)];
+ streamEncodeID(buf,id);
+
+ /* The number of entries is here or not depending on the
+ * flags. */
+ if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) {
+ *numfields = si->master_fields_count;
+ } else {
+ *numfields = lpGetInteger(si->lp_ele);
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ }
+ serverAssert(*numfields>=0);
+
+ /* If current >= start, and the entry is not marked as
+ * deleted or tombstones are included, emit it. */
+ if (!si->rev) {
+ if (memcmp(buf,si->start_key,sizeof(streamID)) >= 0 &&
+ (!si->skip_tombstones || !(flags & STREAM_ITEM_FLAG_DELETED)))
+ {
+ if (memcmp(buf,si->end_key,sizeof(streamID)) > 0)
+ return 0; /* We are already out of range. */
+ si->entry_flags = flags;
+ if (flags & STREAM_ITEM_FLAG_SAMEFIELDS)
+ si->master_fields_ptr = si->master_fields_start;
+ return 1; /* Valid item returned. */
+ }
+ } else {
+ if (memcmp(buf,si->end_key,sizeof(streamID)) <= 0 &&
+ (!si->skip_tombstones || !(flags & STREAM_ITEM_FLAG_DELETED)))
+ {
+ if (memcmp(buf,si->start_key,sizeof(streamID)) < 0)
+ return 0; /* We are already out of range. */
+ si->entry_flags = flags;
+ if (flags & STREAM_ITEM_FLAG_SAMEFIELDS)
+ si->master_fields_ptr = si->master_fields_start;
+ return 1; /* Valid item returned. */
+ }
+ }
+
+ /* If we do not emit, we have to discard if we are going
+ * forward, or seek the previous entry if we are going
+ * backward. */
+ if (!si->rev) {
+ int64_t to_discard = (flags & STREAM_ITEM_FLAG_SAMEFIELDS) ?
+ *numfields : *numfields*2;
+ for (int64_t i = 0; i < to_discard; i++)
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ } else {
+ int64_t prev_times = 4; /* flag + id ms + id seq + one more to
+ go back to the previous entry "count"
+ field. */
+ /* If the entry was not flagged SAMEFIELD we also read the
+ * number of fields, so go back one more. */
+ if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) prev_times++;
+ while(prev_times--) si->lp_ele = lpPrev(si->lp,si->lp_ele);
+ }
+ }
+
+ /* End of listpack reached. Try the next/prev radix tree node. */
+ }
+}
+
+/* Get the field and value of the current item we are iterating. This should
+ * be called immediately after streamIteratorGetID(), and for each field
+ * according to the number of fields returned by streamIteratorGetID().
+ * The function populates the field and value pointers and the corresponding
+ * lengths by reference, that are valid until the next iterator call, assuming
+ * no one touches the stream meanwhile. */
+void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen) {
+ if (si->entry_flags & STREAM_ITEM_FLAG_SAMEFIELDS) {
+ *fieldptr = lpGet(si->master_fields_ptr,fieldlen,si->field_buf);
+ si->master_fields_ptr = lpNext(si->lp,si->master_fields_ptr);
+ } else {
+ *fieldptr = lpGet(si->lp_ele,fieldlen,si->field_buf);
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ }
+ *valueptr = lpGet(si->lp_ele,valuelen,si->value_buf);
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+}
+
+/* Remove the current entry from the stream: can be called after the
+ * GetID() API or after any GetField() call, however we need to iterate
+ * a valid entry while calling this function. Moreover the function
+ * requires the entry ID we are currently iterating, that was previously
+ * returned by GetID().
+ *
+ * Note that after calling this function, next calls to GetField() can't
+ * be performed: the entry is now deleted. Instead the iterator will
+ * automatically re-seek to the next entry, so the caller should continue
+ * with GetID(). */
+void streamIteratorRemoveEntry(streamIterator *si, streamID *current) {
+ unsigned char *lp = si->lp;
+ int64_t aux;
+
+ /* We do not really delete the entry here. Instead we mark it as
+ * deleted by flagging it, and also incrementing the count of the
+ * deleted entries in the listpack header.
+ *
+ * We start flagging: */
+ int64_t flags = lpGetInteger(si->lp_flags);
+ flags |= STREAM_ITEM_FLAG_DELETED;
+ lp = lpReplaceInteger(lp,&si->lp_flags,flags);
+
+ /* Change the valid/deleted entries count in the master entry. */
+ unsigned char *p = lpFirst(lp);
+ aux = lpGetInteger(p);
+
+ if (aux == 1) {
+ /* If this is the last element in the listpack, we can remove the whole
+ * node. */
+ lpFree(lp);
+ raxRemove(si->stream->rax,si->ri.key,si->ri.key_len,NULL);
+ } else {
+ /* In the base case we alter the counters of valid/deleted entries. */
+ lp = lpReplaceInteger(lp,&p,aux-1);
+ p = lpNext(lp,p); /* Seek deleted field. */
+ aux = lpGetInteger(p);
+ lp = lpReplaceInteger(lp,&p,aux+1);
+
+ /* Update the listpack with the new pointer. */
+ if (si->lp != lp)
+ raxInsert(si->stream->rax,si->ri.key,si->ri.key_len,lp,NULL);
+ }
+
+ /* Update the number of entries counter. */
+ si->stream->length--;
+
+ /* Re-seek the iterator to fix the now messed up state. */
+ streamID start, end;
+ if (si->rev) {
+ streamDecodeID(si->start_key,&start);
+ end = *current;
+ } else {
+ start = *current;
+ streamDecodeID(si->end_key,&end);
+ }
+ streamIteratorStop(si);
+ streamIteratorStart(si,si->stream,&start,&end,si->rev);
+
+ /* TODO: perform a garbage collection here if the ratio between
+ * deleted and valid goes over a certain limit. */
+}
+
+/* Stop the stream iterator. The only cleanup we need is to free the rax
+ * iterator, since the stream iterator itself is supposed to be stack
+ * allocated. */
+void streamIteratorStop(streamIterator *si) {
+ raxStop(&si->ri);
+}
+
+/* Return 1 if `id` exists in `s` (and not marked as deleted) */
+int streamEntryExists(stream *s, streamID *id) {
+ streamIterator si;
+ streamIteratorStart(&si,s,id,id,0);
+ streamID myid;
+ int64_t numfields;
+ int found = streamIteratorGetID(&si,&myid,&numfields);
+ streamIteratorStop(&si);
+ if (!found)
+ return 0;
+ serverAssert(streamCompareID(id,&myid) == 0);
+ return 1;
+}
+
+/* Delete the specified item ID from the stream, returning 1 if the item
+ * was deleted 0 otherwise (if it does not exist). */
+int streamDeleteItem(stream *s, streamID *id) {
+ int deleted = 0;
+ streamIterator si;
+ streamIteratorStart(&si,s,id,id,0);
+ streamID myid;
+ int64_t numfields;
+ if (streamIteratorGetID(&si,&myid,&numfields)) {
+ streamIteratorRemoveEntry(&si,&myid);
+ deleted = 1;
+ }
+ streamIteratorStop(&si);
+ return deleted;
+}
+
+/* Get the last valid (non-tombstone) streamID of 's'. */
+void streamLastValidID(stream *s, streamID *maxid)
+{
+ streamIterator si;
+ streamIteratorStart(&si,s,NULL,NULL,1);
+ int64_t numfields;
+ if (!streamIteratorGetID(&si,maxid,&numfields) && s->length)
+ serverPanic("Corrupt stream, length is %llu, but no max id", (unsigned long long)s->length);
+ streamIteratorStop(&si);
+}
+
+/* Maximum size for a stream ID string. In theory 20*2+1 should be enough,
+ * But to avoid chance for off by one issues and null-term, in case this will
+ * be used as parsing buffer, we use a slightly larger buffer. On the other
+ * hand considering sds header is gonna add 4 bytes, we wanna keep below the
+ * allocator's 48 bytes bin. */
+#define STREAM_ID_STR_LEN 44
+
+sds createStreamIDString(streamID *id) {
+ /* Optimization: pre-allocate a big enough buffer to avoid reallocs. */
+ sds str = sdsnewlen(SDS_NOINIT, STREAM_ID_STR_LEN);
+ sdssetlen(str, 0);
+ return sdscatfmt(str,"%U-%U", id->ms,id->seq);
+}
+
+/* Emit a reply in the client output buffer by formatting a Stream ID
+ * in the standard <ms>-<seq> format, using the simple string protocol
+ * of REPL. */
+void addReplyStreamID(client *c, streamID *id) {
+ addReplyBulkSds(c,createStreamIDString(id));
+}
+
+void setDeferredReplyStreamID(client *c, void *dr, streamID *id) {
+ setDeferredReplyBulkSds(c, dr, createStreamIDString(id));
+}
+
+/* Similar to the above function, but just creates an object, usually useful
+ * for replication purposes to create arguments. */
+robj *createObjectFromStreamID(streamID *id) {
+ return createObject(OBJ_STRING, createStreamIDString(id));
+}
+
+/* Returns non-zero if the ID is 0-0. */
+int streamIDEqZero(streamID *id) {
+ return !(id->ms || id->seq);
+}
+
+/* A helper that returns non-zero if the range from 'start' to `end`
+ * contains a tombstone.
+ *
+ * NOTE: this assumes that the caller had verified that 'start' is less than
+ * 's->last_id'. */
+int streamRangeHasTombstones(stream *s, streamID *start, streamID *end) {
+ streamID start_id, end_id;
+
+ if (!s->length || streamIDEqZero(&s->max_deleted_entry_id)) {
+ /* The stream is empty or has no tombstones. */
+ return 0;
+ }
+
+ if (streamCompareID(&s->first_id,&s->max_deleted_entry_id) > 0) {
+ /* The latest tombstone is before the first entry. */
+ return 0;
+ }
+
+ if (start) {
+ start_id = *start;
+ } else {
+ start_id.ms = 0;
+ start_id.seq = 0;
+ }
+
+ if (end) {
+ end_id = *end;
+ } else {
+ end_id.ms = UINT64_MAX;
+ end_id.seq = UINT64_MAX;
+ }
+
+ if (streamCompareID(&start_id,&s->max_deleted_entry_id) <= 0 &&
+ streamCompareID(&s->max_deleted_entry_id,&end_id) <= 0)
+ {
+ /* start_id <= max_deleted_entry_id <= end_id: The range does include a tombstone. */
+ return 1;
+ }
+
+ /* The range doesn't includes a tombstone. */
+ return 0;
+}
+
+/* Replies with a consumer group's current lag, that is the number of messages
+ * in the stream that are yet to be delivered. In case that the lag isn't
+ * available due to fragmentation, the reply to the client is a null. */
+void streamReplyWithCGLag(client *c, stream *s, streamCG *cg) {
+ int valid = 0;
+ long long lag = 0;
+
+ if (!s->entries_added) {
+ /* The lag of a newly-initialized stream is 0. */
+ lag = 0;
+ valid = 1;
+ } else if (cg->entries_read != SCG_INVALID_ENTRIES_READ && !streamRangeHasTombstones(s,&cg->last_id,NULL)) {
+ /* No fragmentation ahead means that the group's logical reads counter
+ * is valid for performing the lag calculation. */
+ lag = (long long)s->entries_added - cg->entries_read;
+ valid = 1;
+ } else {
+ /* Attempt to retrieve the group's last ID logical read counter. */
+ long long entries_read = streamEstimateDistanceFromFirstEverEntry(s,&cg->last_id);
+ if (entries_read != SCG_INVALID_ENTRIES_READ) {
+ /* A valid counter was obtained. */
+ lag = (long long)s->entries_added - entries_read;
+ valid = 1;
+ }
+ }
+
+ if (valid) {
+ addReplyLongLong(c,lag);
+ } else {
+ addReplyNull(c);
+ }
+}
+
+/* This function returns a value that is the ID's logical read counter, or its
+ * distance (the number of entries) from the first entry ever to have been added
+ * to the stream.
+ *
+ * A counter is returned only in one of the following cases:
+ * 1. The ID is the same as the stream's last ID. In this case, the returned
+ * is the same as the stream's entries_added counter.
+ * 2. The ID equals that of the currently first entry in the stream, and the
+ * stream has no tombstones. The returned value, in this case, is the result
+ * of subtracting the stream's length from its added_entries, incremented by
+ * one.
+ * 3. The ID less than the stream's first current entry's ID, and there are no
+ * tombstones. Here the estimated counter is the result of subtracting the
+ * stream's length from its added_entries.
+ * 4. The stream's added_entries is zero, meaning that no entries were ever
+ * added.
+ *
+ * The special return value of ULLONG_MAX signals that the counter's value isn't
+ * obtainable. It is returned in these cases:
+ * 1. The provided ID, if it even exists, is somewhere between the stream's
+ * current first and last entries' IDs, or in the future.
+ * 2. The stream contains one or more tombstones. */
+long long streamEstimateDistanceFromFirstEverEntry(stream *s, streamID *id) {
+ /* The counter of any ID in an empty, never-before-used stream is 0. */
+ if (!s->entries_added) {
+ return 0;
+ }
+
+ /* In the empty stream, if the ID is smaller or equal to the last ID,
+ * it can set to the current added_entries value. */
+ if (!s->length && streamCompareID(id,&s->last_id) < 1) {
+ return s->entries_added;
+ }
+
+ int cmp_last = streamCompareID(id,&s->last_id);
+ if (cmp_last == 0) {
+ /* Return the exact counter of the last entry in the stream. */
+ return s->entries_added;
+ } else if (cmp_last > 0) {
+ /* The counter of a future ID is unknown. */
+ return SCG_INVALID_ENTRIES_READ;
+ }
+
+ int cmp_id_first = streamCompareID(id,&s->first_id);
+ int cmp_xdel_first = streamCompareID(&s->max_deleted_entry_id,&s->first_id);
+ if (streamIDEqZero(&s->max_deleted_entry_id) || cmp_xdel_first < 0) {
+ /* There's definitely no fragmentation ahead. */
+ if (cmp_id_first < 0) {
+ /* Return the estimated counter. */
+ return s->entries_added - s->length;
+ } else if (cmp_id_first == 0) {
+ /* Return the exact counter of the first entry in the stream. */
+ return s->entries_added - s->length + 1;
+ }
+ }
+
+ /* The ID is either before an XDEL that fragments the stream or an arbitrary
+ * ID. Either case, so we can't make a prediction. */
+ return SCG_INVALID_ENTRIES_READ;
+}
+
+/* As a result of an explicit XCLAIM or XREADGROUP command, new entries
+ * are created in the pending list of the stream and consumers. We need
+ * to propagate this changes in the form of XCLAIM commands. */
+void streamPropagateXCLAIM(client *c, robj *key, streamCG *group, robj *groupname, robj *id, streamNACK *nack) {
+ /* We need to generate an XCLAIM that will work in a idempotent fashion:
+ *
+ * XCLAIM <key> <group> <consumer> 0 <id> TIME <milliseconds-unix-time>
+ * RETRYCOUNT <count> FORCE JUSTID LASTID <id>.
+ *
+ * Note that JUSTID is useful in order to avoid that XCLAIM will do
+ * useless work in the slave side, trying to fetch the stream item. */
+ robj *argv[14];
+ argv[0] = shared.xclaim;
+ argv[1] = key;
+ argv[2] = groupname;
+ argv[3] = createStringObject(nack->consumer->name,sdslen(nack->consumer->name));
+ argv[4] = shared.integers[0];
+ argv[5] = id;
+ argv[6] = shared.time;
+ argv[7] = createStringObjectFromLongLong(nack->delivery_time);
+ argv[8] = shared.retrycount;
+ argv[9] = createStringObjectFromLongLong(nack->delivery_count);
+ argv[10] = shared.force;
+ argv[11] = shared.justid;
+ argv[12] = shared.lastid;
+ argv[13] = createObjectFromStreamID(&group->last_id);
+
+ alsoPropagate(c->db->id,argv,14,PROPAGATE_AOF|PROPAGATE_REPL);
+
+ decrRefCount(argv[3]);
+ decrRefCount(argv[7]);
+ decrRefCount(argv[9]);
+ decrRefCount(argv[13]);
+}
+
+/* We need this when we want to propagate the new last-id of a consumer group
+ * that was consumed by XREADGROUP with the NOACK option: in that case we can't
+ * propagate the last ID just using the XCLAIM LASTID option, so we emit
+ *
+ * XGROUP SETID <key> <groupname> <id> ENTRIESREAD <entries_read>
+ */
+void streamPropagateGroupID(client *c, robj *key, streamCG *group, robj *groupname) {
+ robj *argv[7];
+ argv[0] = shared.xgroup;
+ argv[1] = shared.setid;
+ argv[2] = key;
+ argv[3] = groupname;
+ argv[4] = createObjectFromStreamID(&group->last_id);
+ argv[5] = shared.entriesread;
+ argv[6] = createStringObjectFromLongLong(group->entries_read);
+
+ alsoPropagate(c->db->id,argv,7,PROPAGATE_AOF|PROPAGATE_REPL);
+
+ decrRefCount(argv[4]);
+ decrRefCount(argv[6]);
+}
+
+/* We need this when we want to propagate creation of consumer that was created
+ * by XREADGROUP with the NOACK option. In that case, the only way to create
+ * the consumer at the replica is by using XGROUP CREATECONSUMER (see issue #7140)
+ *
+ * XGROUP CREATECONSUMER <key> <groupname> <consumername>
+ */
+void streamPropagateConsumerCreation(client *c, robj *key, robj *groupname, sds consumername) {
+ robj *argv[5];
+ argv[0] = shared.xgroup;
+ argv[1] = shared.createconsumer;
+ argv[2] = key;
+ argv[3] = groupname;
+ argv[4] = createObject(OBJ_STRING,sdsdup(consumername));
+
+ alsoPropagate(c->db->id,argv,5,PROPAGATE_AOF|PROPAGATE_REPL);
+
+ decrRefCount(argv[4]);
+}
+
+/* Send the stream items in the specified range to the client 'c'. The range
+ * the client will receive is between start and end inclusive, if 'count' is
+ * non zero, no more than 'count' elements are sent.
+ *
+ * The 'end' pointer can be NULL to mean that we want all the elements from
+ * 'start' till the end of the stream. If 'rev' is non zero, elements are
+ * produced in reversed order from end to start.
+ *
+ * The function returns the number of entries emitted.
+ *
+ * If group and consumer are not NULL, the function performs additional work:
+ * 1. It updates the last delivered ID in the group in case we are
+ * sending IDs greater than the current last ID.
+ * 2. If the requested IDs are already assigned to some other consumer, the
+ * function will not return it to the client.
+ * 3. An entry in the pending list will be created for every entry delivered
+ * for the first time to this consumer.
+ * 4. The group's read counter is incremented if it is already valid and there
+ * are no future tombstones, or is invalidated (set to 0) otherwise. If the
+ * counter is invalid to begin with, we try to obtain it for the last
+ * delivered ID.
+ *
+ * The behavior may be modified passing non-zero flags:
+ *
+ * STREAM_RWR_NOACK: Do not create PEL entries, that is, the point "3" above
+ * is not performed.
+ * STREAM_RWR_RAWENTRIES: Do not emit array boundaries, but just the entries,
+ * and return the number of entries emitted as usually.
+ * This is used when the function is just used in order
+ * to emit data and there is some higher level logic.
+ *
+ * The final argument 'spi' (stream propagation info pointer) is a structure
+ * filled with information needed to propagate the command execution to AOF
+ * and slaves, in the case a consumer group was passed: we need to generate
+ * XCLAIM commands to create the pending list into AOF/slaves in that case.
+ *
+ * If 'spi' is set to NULL no propagation will happen even if the group was
+ * given, but currently such a feature is never used by the code base that
+ * will always pass 'spi' and propagate when a group is passed.
+ *
+ * Note that this function is recursive in certain cases. When it's called
+ * with a non NULL group and consumer argument, it may call
+ * streamReplyWithRangeFromConsumerPEL() in order to get entries from the
+ * consumer pending entries list. However such a function will then call
+ * streamReplyWithRange() in order to emit single entries (found in the
+ * PEL by ID) to the client. This is the use case for the STREAM_RWR_RAWENTRIES
+ * flag.
+ */
+#define STREAM_RWR_NOACK (1<<0) /* Do not create entries in the PEL. */
+#define STREAM_RWR_RAWENTRIES (1<<1) /* Do not emit protocol for array
+ boundaries, just the entries. */
+#define STREAM_RWR_HISTORY (1<<2) /* Only serve consumer local PEL. */
+size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count, int rev, streamCG *group, streamConsumer *consumer, int flags, streamPropInfo *spi) {
+ void *arraylen_ptr = NULL;
+ size_t arraylen = 0;
+ streamIterator si;
+ int64_t numfields;
+ streamID id;
+ int propagate_last_id = 0;
+ int noack = flags & STREAM_RWR_NOACK;
+
+ /* If the client is asking for some history, we serve it using a
+ * different function, so that we return entries *solely* from its
+ * own PEL. This ensures each consumer will always and only see
+ * the history of messages delivered to it and not yet confirmed
+ * as delivered. */
+ if (group && (flags & STREAM_RWR_HISTORY)) {
+ return streamReplyWithRangeFromConsumerPEL(c,s,start,end,count,
+ consumer);
+ }
+
+ if (!(flags & STREAM_RWR_RAWENTRIES))
+ arraylen_ptr = addReplyDeferredLen(c);
+ streamIteratorStart(&si,s,start,end,rev);
+ while(streamIteratorGetID(&si,&id,&numfields)) {
+ /* Update the group last_id if needed. */
+ if (group && streamCompareID(&id,&group->last_id) > 0) {
+ if (group->entries_read != SCG_INVALID_ENTRIES_READ && !streamRangeHasTombstones(s,&id,NULL)) {
+ /* A valid counter and no future tombstones mean we can
+ * increment the read counter to keep tracking the group's
+ * progress. */
+ group->entries_read++;
+ } else if (s->entries_added) {
+ /* The group's counter may be invalid, so we try to obtain it. */
+ group->entries_read = streamEstimateDistanceFromFirstEverEntry(s,&id);
+ }
+ group->last_id = id;
+ /* Group last ID should be propagated only if NOACK was
+ * specified, otherwise the last id will be included
+ * in the propagation of XCLAIM itself. */
+ if (noack) propagate_last_id = 1;
+ }
+
+ /* Emit a two elements array for each item. The first is
+ * the ID, the second is an array of field-value pairs. */
+ addReplyArrayLen(c,2);
+ addReplyStreamID(c,&id);
+
+ addReplyArrayLen(c,numfields*2);
+
+ /* Emit the field-value pairs. */
+ while(numfields--) {
+ unsigned char *key, *value;
+ int64_t key_len, value_len;
+ streamIteratorGetField(&si,&key,&value,&key_len,&value_len);
+ addReplyBulkCBuffer(c,key,key_len);
+ addReplyBulkCBuffer(c,value,value_len);
+ }
+
+ /* If a group is passed, we need to create an entry in the
+ * PEL (pending entries list) of this group *and* this consumer.
+ *
+ * Note that we cannot be sure about the fact the message is not
+ * already owned by another consumer, because the admin is able
+ * to change the consumer group last delivered ID using the
+ * XGROUP SETID command. So if we find that there is already
+ * a NACK for the entry, we need to associate it to the new
+ * consumer. */
+ if (group && !noack) {
+ unsigned char buf[sizeof(streamID)];
+ streamEncodeID(buf,&id);
+
+ /* Try to add a new NACK. Most of the time this will work and
+ * will not require extra lookups. We'll fix the problem later
+ * if we find that there is already a entry for this ID. */
+ streamNACK *nack = streamCreateNACK(consumer);
+ int group_inserted =
+ raxTryInsert(group->pel,buf,sizeof(buf),nack,NULL);
+ int consumer_inserted =
+ raxTryInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
+
+ /* Now we can check if the entry was already busy, and
+ * in that case reassign the entry to the new consumer,
+ * or update it if the consumer is the same as before. */
+ if (group_inserted == 0) {
+ streamFreeNACK(nack);
+ nack = raxFind(group->pel,buf,sizeof(buf));
+ serverAssert(nack != raxNotFound);
+ raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
+ /* Update the consumer and NACK metadata. */
+ nack->consumer = consumer;
+ nack->delivery_time = commandTimeSnapshot();
+ nack->delivery_count = 1;
+ /* Add the entry in the new consumer local PEL. */
+ raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
+ } else if (group_inserted == 1 && consumer_inserted == 0) {
+ serverPanic("NACK half-created. Should not be possible.");
+ }
+
+ consumer->active_time = commandTimeSnapshot();
+
+ /* Propagate as XCLAIM. */
+ if (spi) {
+ robj *idarg = createObjectFromStreamID(&id);
+ streamPropagateXCLAIM(c,spi->keyname,group,spi->groupname,idarg,nack);
+ decrRefCount(idarg);
+ }
+ }
+
+ arraylen++;
+ if (count && count == arraylen) break;
+ }
+
+ if (spi && propagate_last_id)
+ streamPropagateGroupID(c,spi->keyname,group,spi->groupname);
+
+ streamIteratorStop(&si);
+ if (arraylen_ptr) setDeferredArrayLen(c,arraylen_ptr,arraylen);
+ return arraylen;
+}
+
+/* This is a helper function for streamReplyWithRange() when called with
+ * group and consumer arguments, but with a range that is referring to already
+ * delivered messages. In this case we just emit messages that are already
+ * in the history of the consumer, fetching the IDs from its PEL.
+ *
+ * Note that this function does not have a 'rev' argument because it's not
+ * possible to iterate in reverse using a group. Basically this function
+ * is only called as a result of the XREADGROUP command.
+ *
+ * This function is more expensive because it needs to inspect the PEL and then
+ * seek into the radix tree of the messages in order to emit the full message
+ * to the client. However clients only reach this code path when they are
+ * fetching the history of already retrieved messages, which is rare. */
+size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer) {
+ raxIterator ri;
+ unsigned char startkey[sizeof(streamID)];
+ unsigned char endkey[sizeof(streamID)];
+ streamEncodeID(startkey,start);
+ if (end) streamEncodeID(endkey,end);
+
+ size_t arraylen = 0;
+ void *arraylen_ptr = addReplyDeferredLen(c);
+ raxStart(&ri,consumer->pel);
+ raxSeek(&ri,">=",startkey,sizeof(startkey));
+ while(raxNext(&ri) && (!count || arraylen < count)) {
+ if (end && memcmp(ri.key,end,ri.key_len) > 0) break;
+ streamID thisid;
+ streamDecodeID(ri.key,&thisid);
+ if (streamReplyWithRange(c,s,&thisid,&thisid,1,0,NULL,NULL,
+ STREAM_RWR_RAWENTRIES,NULL) == 0)
+ {
+ /* Note that we may have a not acknowledged entry in the PEL
+ * about a message that's no longer here because was removed
+ * by the user by other means. In that case we signal it emitting
+ * the ID but then a NULL entry for the fields. */
+ addReplyArrayLen(c,2);
+ addReplyStreamID(c,&thisid);
+ addReplyNullArray(c);
+ } else {
+ streamNACK *nack = ri.data;
+ nack->delivery_time = commandTimeSnapshot();
+ nack->delivery_count++;
+ }
+ arraylen++;
+ }
+ raxStop(&ri);
+ setDeferredArrayLen(c,arraylen_ptr,arraylen);
+ return arraylen;
+}
+
+/* -----------------------------------------------------------------------
+ * Stream commands implementation
+ * ----------------------------------------------------------------------- */
+
+/* Look the stream at 'key' and return the corresponding stream object.
+ * The function creates a key setting it to an empty stream if needed. */
+robj *streamTypeLookupWriteOrCreate(client *c, robj *key, int no_create) {
+ robj *o = lookupKeyWrite(c->db,key);
+ if (checkType(c,o,OBJ_STREAM)) return NULL;
+ if (o == NULL) {
+ if (no_create) {
+ addReplyNull(c);
+ return NULL;
+ }
+ o = createStreamObject();
+ dbAdd(c->db,key,o);
+ }
+ return o;
+}
+
+/* Parse a stream ID in the format given by clients to Redis, that is
+ * <ms>-<seq>, and converts it into a streamID structure. If
+ * the specified ID is invalid C_ERR is returned and an error is reported
+ * to the client, otherwise C_OK is returned. The ID may be in incomplete
+ * form, just stating the milliseconds time part of the stream. In such a case
+ * the missing part is set according to the value of 'missing_seq' parameter.
+ *
+ * The IDs "-" and "+" specify respectively the minimum and maximum IDs
+ * that can be represented. If 'strict' is set to 1, "-" and "+" will be
+ * treated as an invalid ID.
+ *
+ * The ID form <ms>-* specifies a millisconds-only ID, leaving the sequence part
+ * to be autogenerated. When a non-NULL 'seq_given' argument is provided, this
+ * form is accepted and the argument is set to 0 unless the sequence part is
+ * specified.
+ *
+ * If 'c' is set to NULL, no reply is sent to the client. */
+int streamGenericParseIDOrReply(client *c, const robj *o, streamID *id, uint64_t missing_seq, int strict, int *seq_given) {
+ char buf[128];
+ if (sdslen(o->ptr) > sizeof(buf)-1) goto invalid;
+ memcpy(buf,o->ptr,sdslen(o->ptr)+1);
+
+ if (strict && (buf[0] == '-' || buf[0] == '+') && buf[1] == '\0')
+ goto invalid;
+
+ if (seq_given != NULL) {
+ *seq_given = 1;
+ }
+
+ /* Handle the "-" and "+" special cases. */
+ if (buf[0] == '-' && buf[1] == '\0') {
+ id->ms = 0;
+ id->seq = 0;
+ return C_OK;
+ } else if (buf[0] == '+' && buf[1] == '\0') {
+ id->ms = UINT64_MAX;
+ id->seq = UINT64_MAX;
+ return C_OK;
+ }
+
+ /* Parse <ms>-<seq> form. */
+ unsigned long long ms, seq;
+ char *dot = strchr(buf,'-');
+ if (dot) *dot = '\0';
+ if (string2ull(buf,&ms) == 0) goto invalid;
+ if (dot) {
+ size_t seqlen = strlen(dot+1);
+ if (seq_given != NULL && seqlen == 1 && *(dot + 1) == '*') {
+ /* Handle the <ms>-* form. */
+ seq = 0;
+ *seq_given = 0;
+ } else if (string2ull(dot+1,&seq) == 0) {
+ goto invalid;
+ }
+ } else {
+ seq = missing_seq;
+ }
+ id->ms = ms;
+ id->seq = seq;
+ return C_OK;
+
+invalid:
+ if (c) addReplyError(c,"Invalid stream ID specified as stream "
+ "command argument");
+ return C_ERR;
+}
+
+/* Wrapper for streamGenericParseIDOrReply() used by module API. */
+int streamParseID(const robj *o, streamID *id) {
+ return streamGenericParseIDOrReply(NULL,o,id,0,0,NULL);
+}
+
+/* Wrapper for streamGenericParseIDOrReply() with 'strict' argument set to
+ * 0, to be used when - and + are acceptable IDs. */
+int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) {
+ return streamGenericParseIDOrReply(c,o,id,missing_seq,0,NULL);
+}
+
+/* Wrapper for streamGenericParseIDOrReply() with 'strict' argument set to
+ * 1, to be used when we want to return an error if the special IDs + or -
+ * are provided. */
+int streamParseStrictIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int *seq_given) {
+ return streamGenericParseIDOrReply(c,o,id,missing_seq,1,seq_given);
+}
+
+/* Helper for parsing a stream ID that is a range query interval. When the
+ * exclude argument is NULL, streamParseIDOrReply() is called and the interval
+ * is treated as close (inclusive). Otherwise, the exclude argument is set if
+ * the interval is open (the "(" prefix) and streamParseStrictIDOrReply() is
+ * called in that case.
+ */
+int streamParseIntervalIDOrReply(client *c, robj *o, streamID *id, int *exclude, uint64_t missing_seq) {
+ char *p = o->ptr;
+ size_t len = sdslen(p);
+ int invalid = 0;
+
+ if (exclude != NULL) *exclude = (len > 1 && p[0] == '(');
+ if (exclude != NULL && *exclude) {
+ robj *t = createStringObject(p+1,len-1);
+ invalid = (streamParseStrictIDOrReply(c,t,id,missing_seq,NULL) == C_ERR);
+ decrRefCount(t);
+ } else
+ invalid = (streamParseIDOrReply(c,o,id,missing_seq) == C_ERR);
+ if (invalid)
+ return C_ERR;
+ return C_OK;
+}
+
+void streamRewriteApproxSpecifier(client *c, int idx) {
+ rewriteClientCommandArgument(c,idx,shared.special_equals);
+}
+
+/* We propagate MAXLEN/MINID ~ <count> as MAXLEN/MINID = <resulting-len-of-stream>
+ * otherwise trimming is no longer deterministic on replicas / AOF. */
+void streamRewriteTrimArgument(client *c, stream *s, int trim_strategy, int idx) {
+ robj *arg;
+ if (trim_strategy == TRIM_STRATEGY_MAXLEN) {
+ arg = createStringObjectFromLongLong(s->length);
+ } else {
+ streamID first_id;
+ streamGetEdgeID(s,1,0,&first_id);
+ arg = createObjectFromStreamID(&first_id);
+ }
+
+ rewriteClientCommandArgument(c,idx,arg);
+ decrRefCount(arg);
+}
+
+/* XADD key [(MAXLEN [~|=] <count> | MINID [~|=] <id>) [LIMIT <entries>]] [NOMKSTREAM] <ID or *> [field value] [field value] ... */
+void xaddCommand(client *c) {
+ /* Parse options. */
+ streamAddTrimArgs parsed_args;
+ int idpos = streamParseAddOrTrimArgsOrReply(c, &parsed_args, 1);
+ if (idpos < 0)
+ return; /* streamParseAddOrTrimArgsOrReply already replied. */
+ int field_pos = idpos+1; /* The ID is always one argument before the first field */
+
+ /* Check arity. */
+ if ((c->argc - field_pos) < 2 || ((c->argc-field_pos) % 2) == 1) {
+ addReplyErrorArity(c);
+ return;
+ }
+
+ /* Return ASAP if minimal ID (0-0) was given so we avoid possibly creating
+ * a new stream and have streamAppendItem fail, leaving an empty key in the
+ * database. */
+ if (parsed_args.id_given && parsed_args.seq_given &&
+ parsed_args.id.ms == 0 && parsed_args.id.seq == 0)
+ {
+ addReplyError(c,"The ID specified in XADD must be greater than 0-0");
+ return;
+ }
+
+ /* Lookup the stream at key. */
+ robj *o;
+ stream *s;
+ if ((o = streamTypeLookupWriteOrCreate(c,c->argv[1],parsed_args.no_mkstream)) == NULL) return;
+ s = o->ptr;
+
+ /* Return ASAP if the stream has reached the last possible ID */
+ if (s->last_id.ms == UINT64_MAX && s->last_id.seq == UINT64_MAX) {
+ addReplyError(c,"The stream has exhausted the last possible ID, "
+ "unable to add more items");
+ return;
+ }
+
+ /* Append using the low level function and return the ID. */
+ errno = 0;
+ streamID id;
+ if (streamAppendItem(s,c->argv+field_pos,(c->argc-field_pos)/2,
+ &id,parsed_args.id_given ? &parsed_args.id : NULL,parsed_args.seq_given) == C_ERR)
+ {
+ serverAssert(errno != 0);
+ if (errno == EDOM)
+ addReplyError(c,"The ID specified in XADD is equal or smaller than "
+ "the target stream top item");
+ else
+ addReplyError(c,"Elements are too large to be stored");
+ return;
+ }
+ sds replyid = createStreamIDString(&id);
+ addReplyBulkCBuffer(c, replyid, sdslen(replyid));
+
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xadd",c->argv[1],c->db->id);
+ server.dirty++;
+
+ /* Trim if needed. */
+ if (parsed_args.trim_strategy != TRIM_STRATEGY_NONE) {
+ if (streamTrim(s, &parsed_args)) {
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xtrim",c->argv[1],c->db->id);
+ }
+ if (parsed_args.approx_trim) {
+ /* In case our trimming was limited (by LIMIT or by ~) we must
+ * re-write the relevant trim argument to make sure there will be
+ * no inconsistencies in AOF loading or in the replica.
+ * It's enough to check only args->approx because there is no
+ * way LIMIT is given without the ~ option. */
+ streamRewriteApproxSpecifier(c,parsed_args.trim_strategy_arg_idx-1);
+ streamRewriteTrimArgument(c,s,parsed_args.trim_strategy,parsed_args.trim_strategy_arg_idx);
+ }
+ }
+
+ /* Let's rewrite the ID argument with the one actually generated for
+ * AOF/replication propagation. */
+ if (!parsed_args.id_given || !parsed_args.seq_given) {
+ robj *idarg = createObject(OBJ_STRING, replyid);
+ rewriteClientCommandArgument(c, idpos, idarg);
+ decrRefCount(idarg);
+ } else {
+ sdsfree(replyid);
+ }
+
+ /* We need to signal to blocked clients that there is new data on this
+ * stream. */
+ signalKeyAsReady(c->db, c->argv[1], OBJ_STREAM);
+}
+
+/* XRANGE/XREVRANGE actual implementation.
+ * The 'start' and 'end' IDs are parsed as follows:
+ * Incomplete 'start' has its sequence set to 0, and 'end' to UINT64_MAX.
+ * "-" and "+"" mean the minimal and maximal ID values, respectively.
+ * The "(" prefix means an open (exclusive) range, so XRANGE stream (1-0 (2-0
+ * will match anything from 1-1 and 1-UINT64_MAX.
+ */
+void xrangeGenericCommand(client *c, int rev) {
+ robj *o;
+ stream *s;
+ streamID startid, endid;
+ long long count = -1;
+ robj *startarg = rev ? c->argv[3] : c->argv[2];
+ robj *endarg = rev ? c->argv[2] : c->argv[3];
+ int startex = 0, endex = 0;
+
+ /* Parse start and end IDs. */
+ if (streamParseIntervalIDOrReply(c,startarg,&startid,&startex,0) != C_OK)
+ return;
+ if (startex && streamIncrID(&startid) != C_OK) {
+ addReplyError(c,"invalid start ID for the interval");
+ return;
+ }
+ if (streamParseIntervalIDOrReply(c,endarg,&endid,&endex,UINT64_MAX) != C_OK)
+ return;
+ if (endex && streamDecrID(&endid) != C_OK) {
+ addReplyError(c,"invalid end ID for the interval");
+ return;
+ }
+
+ /* Parse the COUNT option if any. */
+ if (c->argc > 4) {
+ for (int j = 4; j < c->argc; j++) {
+ int additional = c->argc-j-1;
+ if (strcasecmp(c->argv[j]->ptr,"COUNT") == 0 && additional >= 1) {
+ if (getLongLongFromObjectOrReply(c,c->argv[j+1],&count,NULL)
+ != C_OK) return;
+ if (count < 0) count = 0;
+ j++; /* Consume additional arg. */
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+ }
+
+ /* Return the specified range to the user. */
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyarray)) == NULL ||
+ checkType(c,o,OBJ_STREAM)) return;
+
+ s = o->ptr;
+
+ if (count == 0) {
+ addReplyNullArray(c);
+ } else {
+ if (count == -1) count = 0;
+ streamReplyWithRange(c,s,&startid,&endid,count,rev,NULL,NULL,0,NULL);
+ }
+}
+
+/* XRANGE key start end [COUNT <n>] */
+void xrangeCommand(client *c) {
+ xrangeGenericCommand(c,0);
+}
+
+/* XREVRANGE key end start [COUNT <n>] */
+void xrevrangeCommand(client *c) {
+ xrangeGenericCommand(c,1);
+}
+
+/* XLEN key*/
+void xlenCommand(client *c) {
+ robj *o;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL
+ || checkType(c,o,OBJ_STREAM)) return;
+ stream *s = o->ptr;
+ addReplyLongLong(c,s->length);
+}
+
+/* XREAD [BLOCK <milliseconds>] [COUNT <count>] STREAMS key_1 key_2 ... key_N
+ * ID_1 ID_2 ... ID_N
+ *
+ * This function also implements the XREADGROUP command, which is like XREAD
+ * but accepting the [GROUP group-name consumer-name] additional option.
+ * This is useful because while XREAD is a read command and can be called
+ * on slaves, XREADGROUP is not. */
+#define XREAD_BLOCKED_DEFAULT_COUNT 1000
+void xreadCommand(client *c) {
+ long long timeout = -1; /* -1 means, no BLOCK argument given. */
+ long long count = 0;
+ int streams_count = 0;
+ int streams_arg = 0;
+ int noack = 0; /* True if NOACK option was specified. */
+ streamID static_ids[STREAMID_STATIC_VECTOR_LEN];
+ streamID *ids = static_ids;
+ streamCG **groups = NULL;
+ int xreadgroup = sdslen(c->argv[0]->ptr) == 10; /* XREAD or XREADGROUP? */
+ robj *groupname = NULL;
+ robj *consumername = NULL;
+
+ /* Parse arguments. */
+ for (int i = 1; i < c->argc; i++) {
+ int moreargs = c->argc-i-1;
+ char *o = c->argv[i]->ptr;
+ if (!strcasecmp(o,"BLOCK") && moreargs) {
+ if (c->flags & CLIENT_SCRIPT) {
+ /*
+ * Although the CLIENT_DENY_BLOCKING flag should protect from blocking the client
+ * on Lua/MULTI/RM_Call we want special treatment for Lua to keep backward compatibility.
+ * There is no sense to use BLOCK option within Lua. */
+ addReplyErrorFormat(c, "%s command is not allowed with BLOCK option from scripts", (char *)c->argv[0]->ptr);
+ return;
+ }
+ i++;
+ if (getTimeoutFromObjectOrReply(c,c->argv[i],&timeout,
+ UNIT_MILLISECONDS) != C_OK) return;
+ } else if (!strcasecmp(o,"COUNT") && moreargs) {
+ i++;
+ if (getLongLongFromObjectOrReply(c,c->argv[i],&count,NULL) != C_OK)
+ return;
+ if (count < 0) count = 0;
+ } else if (!strcasecmp(o,"STREAMS") && moreargs) {
+ streams_arg = i+1;
+ streams_count = (c->argc-streams_arg);
+ if ((streams_count % 2) != 0) {
+ char symbol = xreadgroup ? '>' : '$';
+ addReplyErrorFormat(c,"Unbalanced '%s' list of streams: "
+ "for each stream key an ID or '%c' must be "
+ "specified.", c->cmd->fullname,symbol);
+ return;
+ }
+ streams_count /= 2; /* We have two arguments for each stream. */
+ break;
+ } else if (!strcasecmp(o,"GROUP") && moreargs >= 2) {
+ if (!xreadgroup) {
+ addReplyError(c,"The GROUP option is only supported by "
+ "XREADGROUP. You called XREAD instead.");
+ return;
+ }
+ groupname = c->argv[i+1];
+ consumername = c->argv[i+2];
+ i += 2;
+ } else if (!strcasecmp(o,"NOACK")) {
+ if (!xreadgroup) {
+ addReplyError(c,"The NOACK option is only supported by "
+ "XREADGROUP. You called XREAD instead.");
+ return;
+ }
+ noack = 1;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* STREAMS option is mandatory. */
+ if (streams_arg == 0) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* If the user specified XREADGROUP then it must also
+ * provide the GROUP option. */
+ if (xreadgroup && groupname == NULL) {
+ addReplyError(c,"Missing GROUP option for XREADGROUP");
+ return;
+ }
+
+ /* Parse the IDs and resolve the group name. */
+ if (streams_count > STREAMID_STATIC_VECTOR_LEN)
+ ids = zmalloc(sizeof(streamID)*streams_count);
+ if (groupname) groups = zmalloc(sizeof(streamCG*)*streams_count);
+
+ for (int i = streams_arg + streams_count; i < c->argc; i++) {
+ /* Specifying "$" as last-known-id means that the client wants to be
+ * served with just the messages that will arrive into the stream
+ * starting from now. */
+ int id_idx = i - streams_arg - streams_count;
+ robj *key = c->argv[i-streams_count];
+ robj *o = lookupKeyRead(c->db,key);
+ if (checkType(c,o,OBJ_STREAM)) goto cleanup;
+ streamCG *group = NULL;
+
+ /* If a group was specified, than we need to be sure that the
+ * key and group actually exist. */
+ if (groupname) {
+ if (o == NULL ||
+ (group = streamLookupCG(o->ptr,groupname->ptr)) == NULL)
+ {
+ addReplyErrorFormat(c, "-NOGROUP No such key '%s' or consumer "
+ "group '%s' in XREADGROUP with GROUP "
+ "option",
+ (char*)key->ptr,(char*)groupname->ptr);
+ goto cleanup;
+ }
+ groups[id_idx] = group;
+ }
+
+ if (strcmp(c->argv[i]->ptr,"$") == 0) {
+ if (xreadgroup) {
+ addReplyError(c,"The $ ID is meaningless in the context of "
+ "XREADGROUP: you want to read the history of "
+ "this consumer by specifying a proper ID, or "
+ "use the > ID to get new messages. The $ ID would "
+ "just return an empty result set.");
+ goto cleanup;
+ }
+ if (o) {
+ stream *s = o->ptr;
+ ids[id_idx] = s->last_id;
+ } else {
+ ids[id_idx].ms = 0;
+ ids[id_idx].seq = 0;
+ }
+ continue;
+ } else if (strcmp(c->argv[i]->ptr,">") == 0) {
+ if (!xreadgroup) {
+ addReplyError(c,"The > ID can be specified only when calling "
+ "XREADGROUP using the GROUP <group> "
+ "<consumer> option.");
+ goto cleanup;
+ }
+ /* We use just the maximum ID to signal this is a ">" ID, anyway
+ * the code handling the blocking clients will have to update the
+ * ID later in order to match the changing consumer group last ID. */
+ ids[id_idx].ms = UINT64_MAX;
+ ids[id_idx].seq = UINT64_MAX;
+ continue;
+ }
+ if (streamParseStrictIDOrReply(c,c->argv[i],ids+id_idx,0,NULL) != C_OK)
+ goto cleanup;
+ }
+
+ /* Try to serve the client synchronously. */
+ size_t arraylen = 0;
+ void *arraylen_ptr = NULL;
+ for (int i = 0; i < streams_count; i++) {
+ robj *o = lookupKeyRead(c->db,c->argv[streams_arg+i]);
+ if (o == NULL) continue;
+ stream *s = o->ptr;
+ streamID *gt = ids+i; /* ID must be greater than this. */
+ int serve_synchronously = 0;
+ int serve_history = 0; /* True for XREADGROUP with ID != ">". */
+ streamConsumer *consumer = NULL; /* Unused if XREAD */
+ streamPropInfo spi = {c->argv[streams_arg+i],groupname}; /* Unused if XREAD */
+
+ /* Check if there are the conditions to serve the client
+ * synchronously. */
+ if (groups) {
+ /* If the consumer is blocked on a group, we always serve it
+ * synchronously (serving its local history) if the ID specified
+ * was not the special ">" ID. */
+ if (gt->ms != UINT64_MAX ||
+ gt->seq != UINT64_MAX)
+ {
+ serve_synchronously = 1;
+ serve_history = 1;
+ } else if (s->length) {
+ /* We also want to serve a consumer in a consumer group
+ * synchronously in case the group top item delivered is smaller
+ * than what the stream has inside. */
+ streamID maxid, *last = &groups[i]->last_id;
+ streamLastValidID(s, &maxid);
+ if (streamCompareID(&maxid, last) > 0) {
+ serve_synchronously = 1;
+ *gt = *last;
+ }
+ }
+ consumer = streamLookupConsumer(groups[i],consumername->ptr);
+ if (consumer == NULL) {
+ consumer = streamCreateConsumer(groups[i],consumername->ptr,
+ c->argv[streams_arg+i],
+ c->db->id,SCC_DEFAULT);
+ if (noack)
+ streamPropagateConsumerCreation(c,spi.keyname,
+ spi.groupname,
+ consumer->name);
+ }
+ consumer->seen_time = commandTimeSnapshot();
+ } else if (s->length) {
+ /* For consumers without a group, we serve synchronously if we can
+ * actually provide at least one item from the stream. */
+ streamID maxid;
+ streamLastValidID(s, &maxid);
+ if (streamCompareID(&maxid, gt) > 0) {
+ serve_synchronously = 1;
+ }
+ }
+
+ if (serve_synchronously) {
+ arraylen++;
+ if (arraylen == 1) arraylen_ptr = addReplyDeferredLen(c);
+ /* streamReplyWithRange() handles the 'start' ID as inclusive,
+ * so start from the next ID, since we want only messages with
+ * IDs greater than start. */
+ streamID start = *gt;
+ streamIncrID(&start);
+
+ /* Emit the two elements sub-array consisting of the name
+ * of the stream and the data we extracted from it. */
+ if (c->resp == 2) addReplyArrayLen(c,2);
+ addReplyBulk(c,c->argv[streams_arg+i]);
+
+ int flags = 0;
+ if (noack) flags |= STREAM_RWR_NOACK;
+ if (serve_history) flags |= STREAM_RWR_HISTORY;
+ streamReplyWithRange(c,s,&start,NULL,count,0,
+ groups ? groups[i] : NULL,
+ consumer, flags, &spi);
+ if (groups) server.dirty++;
+ }
+ }
+
+ /* We replied synchronously! Set the top array len and return to caller. */
+ if (arraylen) {
+ if (c->resp == 2)
+ setDeferredArrayLen(c,arraylen_ptr,arraylen);
+ else
+ setDeferredMapLen(c,arraylen_ptr,arraylen);
+ goto cleanup;
+ }
+
+ /* Block if needed. */
+ if (timeout != -1) {
+ /* If we are not allowed to block the client, the only thing
+ * we can do is treating it as a timeout (even with timeout 0). */
+ if (c->flags & CLIENT_DENY_BLOCKING) {
+ addReplyNullArray(c);
+ goto cleanup;
+ }
+ /* We change the '$' to the current last ID for this stream. this is
+ * Since later on when we unblock on arriving data - we would like to
+ * re-process the command and in case '$' stays we will spin-block forever.
+ */
+ for (int id_idx = 0; id_idx < streams_count; id_idx++) {
+ int arg_idx = id_idx + streams_arg + streams_count;
+ if (strcmp(c->argv[arg_idx]->ptr,"$") == 0) {
+ robj *argv_streamid = createObjectFromStreamID(&ids[id_idx]);
+ rewriteClientCommandArgument(c, arg_idx, argv_streamid);
+ decrRefCount(argv_streamid);
+ }
+ }
+ blockForKeys(c, BLOCKED_STREAM, c->argv+streams_arg, streams_count, timeout, xreadgroup);
+ goto cleanup;
+ }
+
+ /* No BLOCK option, nor any stream we can serve. Reply as with a
+ * timeout happened. */
+ addReplyNullArray(c);
+ /* Continue to cleanup... */
+
+cleanup: /* Cleanup. */
+
+ /* The command is propagated (in the READGROUP form) as a side effect
+ * of calling lower level APIs. So stop any implicit propagation. */
+ preventCommandPropagation(c);
+ if (ids != static_ids) zfree(ids);
+ zfree(groups);
+}
+
+/* -----------------------------------------------------------------------
+ * Low level implementation of consumer groups
+ * ----------------------------------------------------------------------- */
+
+/* Create a NACK entry setting the delivery count to 1 and the delivery
+ * time to the current time. The NACK consumer will be set to the one
+ * specified as argument of the function. */
+streamNACK *streamCreateNACK(streamConsumer *consumer) {
+ streamNACK *nack = zmalloc(sizeof(*nack));
+ nack->delivery_time = commandTimeSnapshot();
+ nack->delivery_count = 1;
+ nack->consumer = consumer;
+ return nack;
+}
+
+/* Free a NACK entry. */
+void streamFreeNACK(streamNACK *na) {
+ zfree(na);
+}
+
+/* Free a consumer and associated data structures. Note that this function
+ * will not reassign the pending messages associated with this consumer
+ * nor will delete them from the stream, so when this function is called
+ * to delete a consumer, and not when the whole stream is destroyed, the caller
+ * should do some work before. */
+void streamFreeConsumer(streamConsumer *sc) {
+ raxFree(sc->pel); /* No value free callback: the PEL entries are shared
+ between the consumer and the main stream PEL. */
+ sdsfree(sc->name);
+ zfree(sc);
+}
+
+/* Create a new consumer group in the context of the stream 's', having the
+ * specified name, last server ID and reads counter. If a consumer group with
+ * the same name already exists NULL is returned, otherwise the pointer to the
+ * consumer group is returned. */
+streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id, long long entries_read) {
+ if (s->cgroups == NULL) s->cgroups = raxNew();
+ if (raxFind(s->cgroups,(unsigned char*)name,namelen) != raxNotFound)
+ return NULL;
+
+ streamCG *cg = zmalloc(sizeof(*cg));
+ cg->pel = raxNew();
+ cg->consumers = raxNew();
+ cg->last_id = *id;
+ cg->entries_read = entries_read;
+ raxInsert(s->cgroups,(unsigned char*)name,namelen,cg,NULL);
+ return cg;
+}
+
+/* Free a consumer group and all its associated data. */
+void streamFreeCG(streamCG *cg) {
+ raxFreeWithCallback(cg->pel,(void(*)(void*))streamFreeNACK);
+ raxFreeWithCallback(cg->consumers,(void(*)(void*))streamFreeConsumer);
+ zfree(cg);
+}
+
+/* Lookup the consumer group in the specified stream and returns its
+ * pointer, otherwise if there is no such group, NULL is returned. */
+streamCG *streamLookupCG(stream *s, sds groupname) {
+ if (s->cgroups == NULL) return NULL;
+ streamCG *cg = raxFind(s->cgroups,(unsigned char*)groupname,
+ sdslen(groupname));
+ return (cg == raxNotFound) ? NULL : cg;
+}
+
+/* Create a consumer with the specified name in the group 'cg' and return.
+ * If the consumer exists, return NULL. As a side effect, when the consumer
+ * is successfully created, the key space will be notified and dirty++ unless
+ * the SCC_NO_NOTIFY or SCC_NO_DIRTIFY flags is specified. */
+streamConsumer *streamCreateConsumer(streamCG *cg, sds name, robj *key, int dbid, int flags) {
+ if (cg == NULL) return NULL;
+ int notify = !(flags & SCC_NO_NOTIFY);
+ int dirty = !(flags & SCC_NO_DIRTIFY);
+ streamConsumer *consumer = zmalloc(sizeof(*consumer));
+ int success = raxTryInsert(cg->consumers,(unsigned char*)name,
+ sdslen(name),consumer,NULL);
+ if (!success) {
+ zfree(consumer);
+ return NULL;
+ }
+ consumer->name = sdsdup(name);
+ consumer->pel = raxNew();
+ consumer->active_time = -1;
+ consumer->seen_time = commandTimeSnapshot();
+ if (dirty) server.dirty++;
+ if (notify) notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-createconsumer",key,dbid);
+ return consumer;
+}
+
+/* Lookup the consumer with the specified name in the group 'cg'. */
+streamConsumer *streamLookupConsumer(streamCG *cg, sds name) {
+ if (cg == NULL) return NULL;
+ streamConsumer *consumer = raxFind(cg->consumers,(unsigned char*)name,
+ sdslen(name));
+ if (consumer == raxNotFound) return NULL;
+ return consumer;
+}
+
+/* Delete the consumer specified in the consumer group 'cg'. */
+void streamDelConsumer(streamCG *cg, streamConsumer *consumer) {
+ /* Iterate all the consumer pending messages, deleting every corresponding
+ * entry from the global entry. */
+ raxIterator ri;
+ raxStart(&ri,consumer->pel);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamNACK *nack = ri.data;
+ raxRemove(cg->pel,ri.key,ri.key_len,NULL);
+ streamFreeNACK(nack);
+ }
+ raxStop(&ri);
+
+ /* Deallocate the consumer. */
+ raxRemove(cg->consumers,(unsigned char*)consumer->name,
+ sdslen(consumer->name),NULL);
+ streamFreeConsumer(consumer);
+}
+
+/* -----------------------------------------------------------------------
+ * Consumer groups commands
+ * ----------------------------------------------------------------------- */
+
+/* XGROUP CREATE <key> <groupname> <id or $> [MKSTREAM] [ENTRIESREAD entries_read]
+ * XGROUP SETID <key> <groupname> <id or $> [ENTRIESREAD entries_read]
+ * XGROUP DESTROY <key> <groupname>
+ * XGROUP CREATECONSUMER <key> <groupname> <consumer>
+ * XGROUP DELCONSUMER <key> <groupname> <consumername> */
+void xgroupCommand(client *c) {
+ stream *s = NULL;
+ sds grpname = NULL;
+ streamCG *cg = NULL;
+ char *opt = c->argv[1]->ptr; /* Subcommand name. */
+ int mkstream = 0;
+ long long entries_read = SCG_INVALID_ENTRIES_READ;
+ robj *o;
+
+ /* Everything but the "HELP" option requires a key and group name. */
+ if (c->argc >= 4) {
+ /* Parse optional arguments for CREATE and SETID */
+ int i = 5;
+ int create_subcmd = !strcasecmp(opt,"CREATE");
+ int setid_subcmd = !strcasecmp(opt,"SETID");
+ while (i < c->argc) {
+ if (create_subcmd && !strcasecmp(c->argv[i]->ptr,"MKSTREAM")) {
+ mkstream = 1;
+ i++;
+ } else if ((create_subcmd || setid_subcmd) && !strcasecmp(c->argv[i]->ptr,"ENTRIESREAD") && i + 1 < c->argc) {
+ if (getLongLongFromObjectOrReply(c,c->argv[i+1],&entries_read,NULL) != C_OK)
+ return;
+ if (entries_read < 0 && entries_read != SCG_INVALID_ENTRIES_READ) {
+ addReplyError(c,"value for ENTRIESREAD must be positive or -1");
+ return;
+ }
+ i += 2;
+ } else {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+ }
+
+ o = lookupKeyWrite(c->db,c->argv[2]);
+ if (o) {
+ if (checkType(c,o,OBJ_STREAM)) return;
+ s = o->ptr;
+ }
+ grpname = c->argv[3]->ptr;
+ }
+
+ /* Check for missing key/group. */
+ if (c->argc >= 4 && !mkstream) {
+ /* At this point key must exist, or there is an error. */
+ if (s == NULL) {
+ addReplyError(c,
+ "The XGROUP subcommand requires the key to exist. "
+ "Note that for CREATE you may want to use the MKSTREAM "
+ "option to create an empty stream automatically.");
+ return;
+ }
+
+ /* Certain subcommands require the group to exist. */
+ if ((cg = streamLookupCG(s,grpname)) == NULL &&
+ (!strcasecmp(opt,"SETID") ||
+ !strcasecmp(opt,"CREATECONSUMER") ||
+ !strcasecmp(opt,"DELCONSUMER")))
+ {
+ addReplyErrorFormat(c, "-NOGROUP No such consumer group '%s' "
+ "for key name '%s'",
+ (char*)grpname, (char*)c->argv[2]->ptr);
+ return;
+ }
+ }
+
+ /* Dispatch the different subcommands. */
+ if (c->argc == 2 && !strcasecmp(opt,"HELP")) {
+ const char *help[] = {
+"CREATE <key> <groupname> <id|$> [option]",
+" Create a new consumer group. Options are:",
+" * MKSTREAM",
+" Create the empty stream if it does not exist.",
+" * ENTRIESREAD entries_read",
+" Set the group's entries_read counter (internal use).",
+"CREATECONSUMER <key> <groupname> <consumer>",
+" Create a new consumer in the specified group.",
+"DELCONSUMER <key> <groupname> <consumer>",
+" Remove the specified consumer.",
+"DESTROY <key> <groupname>",
+" Remove the specified group.",
+"SETID <key> <groupname> <id|$> [ENTRIESREAD entries_read]",
+" Set the current group ID and entries_read counter.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(opt,"CREATE") && (c->argc >= 5 && c->argc <= 8)) {
+ streamID id;
+ if (!strcmp(c->argv[4]->ptr,"$")) {
+ if (s) {
+ id = s->last_id;
+ } else {
+ id.ms = 0;
+ id.seq = 0;
+ }
+ } else if (streamParseStrictIDOrReply(c,c->argv[4],&id,0,NULL) != C_OK) {
+ return;
+ }
+
+ /* Handle the MKSTREAM option now that the command can no longer fail. */
+ if (s == NULL) {
+ serverAssert(mkstream);
+ o = createStreamObject();
+ dbAdd(c->db,c->argv[2],o);
+ s = o->ptr;
+ signalModifiedKey(c,c->db,c->argv[2]);
+ }
+
+ streamCG *cg = streamCreateCG(s,grpname,sdslen(grpname),&id,entries_read);
+ if (cg) {
+ addReply(c,shared.ok);
+ server.dirty++;
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-create",
+ c->argv[2],c->db->id);
+ } else {
+ addReplyError(c,"-BUSYGROUP Consumer Group name already exists");
+ }
+ } else if (!strcasecmp(opt,"SETID") && (c->argc == 5 || c->argc == 7)) {
+ streamID id;
+ if (!strcmp(c->argv[4]->ptr,"$")) {
+ id = s->last_id;
+ } else if (streamParseIDOrReply(c,c->argv[4],&id,0) != C_OK) {
+ return;
+ }
+ cg->last_id = id;
+ cg->entries_read = entries_read;
+ addReply(c,shared.ok);
+ server.dirty++;
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-setid",c->argv[2],c->db->id);
+ } else if (!strcasecmp(opt,"DESTROY") && c->argc == 4) {
+ if (cg) {
+ raxRemove(s->cgroups,(unsigned char*)grpname,sdslen(grpname),NULL);
+ streamFreeCG(cg);
+ addReply(c,shared.cone);
+ server.dirty++;
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-destroy",
+ c->argv[2],c->db->id);
+ /* We want to unblock any XREADGROUP consumers with -NOGROUP. */
+ signalKeyAsReady(c->db,c->argv[2],OBJ_STREAM);
+ } else {
+ addReply(c,shared.czero);
+ }
+ } else if (!strcasecmp(opt,"CREATECONSUMER") && c->argc == 5) {
+ streamConsumer *created = streamCreateConsumer(cg,c->argv[4]->ptr,c->argv[2],
+ c->db->id,SCC_DEFAULT);
+ addReplyLongLong(c,created ? 1 : 0);
+ } else if (!strcasecmp(opt,"DELCONSUMER") && c->argc == 5) {
+ long long pending = 0;
+ streamConsumer *consumer = streamLookupConsumer(cg,c->argv[4]->ptr);
+ if (consumer) {
+ /* Delete the consumer and returns the number of pending messages
+ * that were yet associated with such a consumer. */
+ pending = raxSize(consumer->pel);
+ streamDelConsumer(cg,consumer);
+ server.dirty++;
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-delconsumer",
+ c->argv[2],c->db->id);
+ }
+ addReplyLongLong(c,pending);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
+
+/* XSETID <stream> <id> [ENTRIESADDED entries_added] [MAXDELETEDID max_deleted_entry_id]
+ *
+ * Set the internal "last ID", "added entries" and "maximal deleted entry ID"
+ * of a stream. */
+void xsetidCommand(client *c) {
+ streamID id, max_xdel_id = {0, 0};
+ long long entries_added = -1;
+
+ if (streamParseStrictIDOrReply(c,c->argv[2],&id,0,NULL) != C_OK)
+ return;
+
+ int i = 3;
+ while (i < c->argc) {
+ int moreargs = (c->argc-1) - i; /* Number of additional arguments. */
+ char *opt = c->argv[i]->ptr;
+ if (!strcasecmp(opt,"ENTRIESADDED") && moreargs) {
+ if (getLongLongFromObjectOrReply(c,c->argv[i+1],&entries_added,NULL) != C_OK) {
+ return;
+ } else if (entries_added < 0) {
+ addReplyError(c,"entries_added must be positive");
+ return;
+ }
+ i += 2;
+ } else if (!strcasecmp(opt,"MAXDELETEDID") && moreargs) {
+ if (streamParseStrictIDOrReply(c,c->argv[i+1],&max_xdel_id,0,NULL) != C_OK) {
+ return;
+ } else if (streamCompareID(&id,&max_xdel_id) < 0) {
+ addReplyError(c,"The ID specified in XSETID is smaller than the provided max_deleted_entry_id");
+ return;
+ }
+ i += 2;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
+ if (o == NULL || checkType(c,o,OBJ_STREAM)) return;
+ stream *s = o->ptr;
+
+ if (streamCompareID(&id,&s->max_deleted_entry_id) < 0) {
+ addReplyError(c,"The ID specified in XSETID is smaller than current max_deleted_entry_id");
+ return;
+ }
+
+ /* If the stream has at least one item, we want to check that the user
+ * is setting a last ID that is equal or greater than the current top
+ * item, otherwise the fundamental ID monotonicity assumption is violated. */
+ if (s->length > 0) {
+ streamID maxid;
+ streamLastValidID(s,&maxid);
+
+ if (streamCompareID(&id,&maxid) < 0) {
+ addReplyError(c,"The ID specified in XSETID is smaller than the target stream top item");
+ return;
+ }
+
+ /* If an entries_added was provided, it can't be lower than the length. */
+ if (entries_added != -1 && s->length > (uint64_t)entries_added) {
+ addReplyError(c,"The entries_added specified in XSETID is smaller than the target stream length");
+ return;
+ }
+ }
+
+ s->last_id = id;
+ if (entries_added != -1)
+ s->entries_added = entries_added;
+ if (!streamIDEqZero(&max_xdel_id))
+ s->max_deleted_entry_id = max_xdel_id;
+ addReply(c,shared.ok);
+ server.dirty++;
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xsetid",c->argv[1],c->db->id);
+}
+
+/* XACK <key> <group> <id> <id> ... <id>
+ * Acknowledge a message as processed. In practical terms we just check the
+ * pending entries list (PEL) of the group, and delete the PEL entry both from
+ * the group and the consumer (pending messages are referenced in both places).
+ *
+ * Return value of the command is the number of messages successfully
+ * acknowledged, that is, the IDs we were actually able to resolve in the PEL.
+ */
+void xackCommand(client *c) {
+ streamCG *group = NULL;
+ robj *o = lookupKeyRead(c->db,c->argv[1]);
+ if (o) {
+ if (checkType(c,o,OBJ_STREAM)) return; /* Type error. */
+ group = streamLookupCG(o->ptr,c->argv[2]->ptr);
+ }
+
+ /* No key or group? Nothing to ack. */
+ if (o == NULL || group == NULL) {
+ addReply(c,shared.czero);
+ return;
+ }
+
+ /* Start parsing the IDs, so that we abort ASAP if there is a syntax
+ * error: the return value of this command cannot be an error in case
+ * the client successfully acknowledged some messages, so it should be
+ * executed in a "all or nothing" fashion. */
+ streamID static_ids[STREAMID_STATIC_VECTOR_LEN];
+ streamID *ids = static_ids;
+ int id_count = c->argc-3;
+ if (id_count > STREAMID_STATIC_VECTOR_LEN)
+ ids = zmalloc(sizeof(streamID)*id_count);
+ for (int j = 3; j < c->argc; j++) {
+ if (streamParseStrictIDOrReply(c,c->argv[j],&ids[j-3],0,NULL) != C_OK) goto cleanup;
+ }
+
+ int acknowledged = 0;
+ for (int j = 3; j < c->argc; j++) {
+ unsigned char buf[sizeof(streamID)];
+ streamEncodeID(buf,&ids[j-3]);
+
+ /* Lookup the ID in the group PEL: it will have a reference to the
+ * NACK structure that will have a reference to the consumer, so that
+ * we are able to remove the entry from both PELs. */
+ streamNACK *nack = raxFind(group->pel,buf,sizeof(buf));
+ if (nack != raxNotFound) {
+ raxRemove(group->pel,buf,sizeof(buf),NULL);
+ raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
+ streamFreeNACK(nack);
+ acknowledged++;
+ server.dirty++;
+ }
+ }
+ addReplyLongLong(c,acknowledged);
+cleanup:
+ if (ids != static_ids) zfree(ids);
+}
+
+/* XPENDING <key> <group> [[IDLE <idle>] <start> <stop> <count> [<consumer>]]
+ *
+ * If start and stop are omitted, the command just outputs information about
+ * the amount of pending messages for the key/group pair, together with
+ * the minimum and maximum ID of pending messages.
+ *
+ * If start and stop are provided instead, the pending messages are returned
+ * with information about the current owner, number of deliveries and last
+ * delivery time and so forth. */
+void xpendingCommand(client *c) {
+ int justinfo = c->argc == 3; /* Without the range just outputs general
+ information about the PEL. */
+ robj *key = c->argv[1];
+ robj *groupname = c->argv[2];
+ robj *consumername = NULL;
+ streamID startid, endid;
+ long long count = 0;
+ long long minidle = 0;
+ int startex = 0, endex = 0;
+
+ /* Start and stop, and the consumer, can be omitted. Also the IDLE modifier. */
+ if (c->argc != 3 && (c->argc < 6 || c->argc > 9)) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Parse start/end/count arguments ASAP if needed, in order to report
+ * syntax errors before any other error. */
+ if (c->argc >= 6) {
+ int startidx = 3; /* Without IDLE */
+
+ if (!strcasecmp(c->argv[3]->ptr, "IDLE")) {
+ if (getLongLongFromObjectOrReply(c, c->argv[4], &minidle, NULL) == C_ERR)
+ return;
+ if (c->argc < 8) {
+ /* If IDLE was provided we must have at least 'start end count' */
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ /* Search for rest of arguments after 'IDLE <idle>' */
+ startidx += 2;
+ }
+
+ /* count argument. */
+ if (getLongLongFromObjectOrReply(c,c->argv[startidx+2],&count,NULL) == C_ERR)
+ return;
+ if (count < 0) count = 0;
+
+ /* start and end arguments. */
+ if (streamParseIntervalIDOrReply(c,c->argv[startidx],&startid,&startex,0) != C_OK)
+ return;
+ if (startex && streamIncrID(&startid) != C_OK) {
+ addReplyError(c,"invalid start ID for the interval");
+ return;
+ }
+ if (streamParseIntervalIDOrReply(c,c->argv[startidx+1],&endid,&endex,UINT64_MAX) != C_OK)
+ return;
+ if (endex && streamDecrID(&endid) != C_OK) {
+ addReplyError(c,"invalid end ID for the interval");
+ return;
+ }
+
+ if (startidx+3 < c->argc) {
+ /* 'consumer' was provided */
+ consumername = c->argv[startidx+3];
+ }
+ }
+
+ /* Lookup the key and the group inside the stream. */
+ robj *o = lookupKeyRead(c->db,c->argv[1]);
+ streamCG *group;
+
+ if (checkType(c,o,OBJ_STREAM)) return;
+ if (o == NULL ||
+ (group = streamLookupCG(o->ptr,groupname->ptr)) == NULL)
+ {
+ addReplyErrorFormat(c, "-NOGROUP No such key '%s' or consumer "
+ "group '%s'",
+ (char*)key->ptr,(char*)groupname->ptr);
+ return;
+ }
+
+ /* XPENDING <key> <group> variant. */
+ if (justinfo) {
+ addReplyArrayLen(c,4);
+ /* Total number of messages in the PEL. */
+ addReplyLongLong(c,raxSize(group->pel));
+ /* First and last IDs. */
+ if (raxSize(group->pel) == 0) {
+ addReplyNull(c); /* Start. */
+ addReplyNull(c); /* End. */
+ addReplyNullArray(c); /* Clients. */
+ } else {
+ /* Start. */
+ raxIterator ri;
+ raxStart(&ri,group->pel);
+ raxSeek(&ri,"^",NULL,0);
+ raxNext(&ri);
+ streamDecodeID(ri.key,&startid);
+ addReplyStreamID(c,&startid);
+
+ /* End. */
+ raxSeek(&ri,"$",NULL,0);
+ raxNext(&ri);
+ streamDecodeID(ri.key,&endid);
+ addReplyStreamID(c,&endid);
+ raxStop(&ri);
+
+ /* Consumers with pending messages. */
+ raxStart(&ri,group->consumers);
+ raxSeek(&ri,"^",NULL,0);
+ void *arraylen_ptr = addReplyDeferredLen(c);
+ size_t arraylen = 0;
+ while(raxNext(&ri)) {
+ streamConsumer *consumer = ri.data;
+ if (raxSize(consumer->pel) == 0) continue;
+ addReplyArrayLen(c,2);
+ addReplyBulkCBuffer(c,ri.key,ri.key_len);
+ addReplyBulkLongLong(c,raxSize(consumer->pel));
+ arraylen++;
+ }
+ setDeferredArrayLen(c,arraylen_ptr,arraylen);
+ raxStop(&ri);
+ }
+ } else { /* <start>, <stop> and <count> provided, return actual pending entries (not just info) */
+ streamConsumer *consumer = NULL;
+ if (consumername) {
+ consumer = streamLookupConsumer(group,consumername->ptr);
+
+ /* If a consumer name was mentioned but it does not exist, we can
+ * just return an empty array. */
+ if (consumer == NULL) {
+ addReplyArrayLen(c,0);
+ return;
+ }
+ }
+
+ rax *pel = consumer ? consumer->pel : group->pel;
+ unsigned char startkey[sizeof(streamID)];
+ unsigned char endkey[sizeof(streamID)];
+ raxIterator ri;
+ mstime_t now = commandTimeSnapshot();
+
+ streamEncodeID(startkey,&startid);
+ streamEncodeID(endkey,&endid);
+ raxStart(&ri,pel);
+ raxSeek(&ri,">=",startkey,sizeof(startkey));
+ void *arraylen_ptr = addReplyDeferredLen(c);
+ size_t arraylen = 0;
+
+ while(count && raxNext(&ri) && memcmp(ri.key,endkey,ri.key_len) <= 0) {
+ streamNACK *nack = ri.data;
+
+ if (minidle) {
+ mstime_t this_idle = now - nack->delivery_time;
+ if (this_idle < minidle) continue;
+ }
+
+ arraylen++;
+ count--;
+ addReplyArrayLen(c,4);
+
+ /* Entry ID. */
+ streamID id;
+ streamDecodeID(ri.key,&id);
+ addReplyStreamID(c,&id);
+
+ /* Consumer name. */
+ addReplyBulkCBuffer(c,nack->consumer->name,
+ sdslen(nack->consumer->name));
+
+ /* Milliseconds elapsed since last delivery. */
+ mstime_t elapsed = now - nack->delivery_time;
+ if (elapsed < 0) elapsed = 0;
+ addReplyLongLong(c,elapsed);
+
+ /* Number of deliveries. */
+ addReplyLongLong(c,nack->delivery_count);
+ }
+ raxStop(&ri);
+ setDeferredArrayLen(c,arraylen_ptr,arraylen);
+ }
+}
+
+/* XCLAIM <key> <group> <consumer> <min-idle-time> <ID-1> <ID-2>
+ * [IDLE <milliseconds>] [TIME <mstime>] [RETRYCOUNT <count>]
+ * [FORCE] [JUSTID]
+ *
+ * Changes ownership of one or multiple messages in the Pending Entries List
+ * of a given stream consumer group.
+ *
+ * If the message ID (among the specified ones) exists, and its idle
+ * time greater or equal to <min-idle-time>, then the message new owner
+ * becomes the specified <consumer>. If the minimum idle time specified
+ * is zero, messages are claimed regardless of their idle time.
+ *
+ * All the messages that cannot be found inside the pending entries list
+ * are ignored, but in case the FORCE option is used. In that case we
+ * create the NACK (representing a not yet acknowledged message) entry in
+ * the consumer group PEL.
+ *
+ * This command creates the consumer as side effect if it does not yet
+ * exists. Moreover the command reset the idle time of the message to 0,
+ * even if by using the IDLE or TIME options, the user can control the
+ * new idle time.
+ *
+ * The options at the end can be used in order to specify more attributes
+ * to set in the representation of the pending message:
+ *
+ * 1. IDLE <ms>:
+ * Set the idle time (last time it was delivered) of the message.
+ * If IDLE is not specified, an IDLE of 0 is assumed, that is,
+ * the time count is reset because the message has now a new
+ * owner trying to process it.
+ *
+ * 2. TIME <ms-unix-time>:
+ * This is the same as IDLE but instead of a relative amount of
+ * milliseconds, it sets the idle time to a specific unix time
+ * (in milliseconds). This is useful in order to rewrite the AOF
+ * file generating XCLAIM commands.
+ *
+ * 3. RETRYCOUNT <count>:
+ * Set the retry counter to the specified value. This counter is
+ * incremented every time a message is delivered again. Normally
+ * XCLAIM does not alter this counter, which is just served to clients
+ * when the XPENDING command is called: this way clients can detect
+ * anomalies, like messages that are never processed for some reason
+ * after a big number of delivery attempts.
+ *
+ * 4. FORCE:
+ * Creates the pending message entry in the PEL even if certain
+ * specified IDs are not already in the PEL assigned to a different
+ * client. However the message must be exist in the stream, otherwise
+ * the IDs of non existing messages are ignored.
+ *
+ * 5. JUSTID:
+ * Return just an array of IDs of messages successfully claimed,
+ * without returning the actual message.
+ *
+ * 6. LASTID <id>:
+ * Update the consumer group last ID with the specified ID if the
+ * current last ID is smaller than the provided one.
+ * This is used for replication / AOF, so that when we read from a
+ * consumer group, the XCLAIM that gets propagated to give ownership
+ * to the consumer, is also used in order to update the group current
+ * ID.
+ *
+ * The command returns an array of messages that the user
+ * successfully claimed, so that the caller is able to understand
+ * what messages it is now in charge of. */
+void xclaimCommand(client *c) {
+ streamCG *group = NULL;
+ robj *o = lookupKeyRead(c->db,c->argv[1]);
+ long long minidle; /* Minimum idle time argument. */
+ long long retrycount = -1; /* -1 means RETRYCOUNT option not given. */
+ mstime_t deliverytime = -1; /* -1 means IDLE/TIME options not given. */
+ int force = 0;
+ int justid = 0;
+
+ if (o) {
+ if (checkType(c,o,OBJ_STREAM)) return; /* Type error. */
+ group = streamLookupCG(o->ptr,c->argv[2]->ptr);
+ }
+
+ /* No key or group? Send an error given that the group creation
+ * is mandatory. */
+ if (o == NULL || group == NULL) {
+ addReplyErrorFormat(c,"-NOGROUP No such key '%s' or "
+ "consumer group '%s'", (char*)c->argv[1]->ptr,
+ (char*)c->argv[2]->ptr);
+ return;
+ }
+
+ if (getLongLongFromObjectOrReply(c,c->argv[4],&minidle,
+ "Invalid min-idle-time argument for XCLAIM")
+ != C_OK) return;
+ if (minidle < 0) minidle = 0;
+
+ /* Start parsing the IDs, so that we abort ASAP if there is a syntax
+ * error: the return value of this command cannot be an error in case
+ * the client successfully claimed some message, so it should be
+ * executed in a "all or nothing" fashion. */
+ int j;
+ streamID static_ids[STREAMID_STATIC_VECTOR_LEN];
+ streamID *ids = static_ids;
+ int id_count = c->argc-5;
+ if (id_count > STREAMID_STATIC_VECTOR_LEN)
+ ids = zmalloc(sizeof(streamID)*id_count);
+ for (j = 5; j < c->argc; j++) {
+ if (streamParseStrictIDOrReply(NULL,c->argv[j],&ids[j-5],0,NULL) != C_OK) break;
+ }
+ int last_id_arg = j-1; /* Next time we iterate the IDs we now the range. */
+
+ /* If we stopped because some IDs cannot be parsed, perhaps they
+ * are trailing options. */
+ mstime_t now = commandTimeSnapshot();
+ streamID last_id = {0,0};
+ int propagate_last_id = 0;
+ for (; j < c->argc; j++) {
+ int moreargs = (c->argc-1) - j; /* Number of additional arguments. */
+ char *opt = c->argv[j]->ptr;
+ if (!strcasecmp(opt,"FORCE")) {
+ force = 1;
+ } else if (!strcasecmp(opt,"JUSTID")) {
+ justid = 1;
+ } else if (!strcasecmp(opt,"IDLE") && moreargs) {
+ j++;
+ if (getLongLongFromObjectOrReply(c,c->argv[j],&deliverytime,
+ "Invalid IDLE option argument for XCLAIM")
+ != C_OK) goto cleanup;
+ deliverytime = now - deliverytime;
+ } else if (!strcasecmp(opt,"TIME") && moreargs) {
+ j++;
+ if (getLongLongFromObjectOrReply(c,c->argv[j],&deliverytime,
+ "Invalid TIME option argument for XCLAIM")
+ != C_OK) goto cleanup;
+ } else if (!strcasecmp(opt,"RETRYCOUNT") && moreargs) {
+ j++;
+ if (getLongLongFromObjectOrReply(c,c->argv[j],&retrycount,
+ "Invalid RETRYCOUNT option argument for XCLAIM")
+ != C_OK) goto cleanup;
+ } else if (!strcasecmp(opt,"LASTID") && moreargs) {
+ j++;
+ if (streamParseStrictIDOrReply(c,c->argv[j],&last_id,0,NULL) != C_OK) goto cleanup;
+ } else {
+ addReplyErrorFormat(c,"Unrecognized XCLAIM option '%s'",opt);
+ goto cleanup;
+ }
+ }
+
+ if (streamCompareID(&last_id,&group->last_id) > 0) {
+ group->last_id = last_id;
+ propagate_last_id = 1;
+ }
+
+ if (deliverytime != -1) {
+ /* If a delivery time was passed, either with IDLE or TIME, we
+ * do some sanity check on it, and set the deliverytime to now
+ * (which is a sane choice usually) if the value is bogus.
+ * To raise an error here is not wise because clients may compute
+ * the idle time doing some math starting from their local time,
+ * and this is not a good excuse to fail in case, for instance,
+ * the computer time is a bit in the future from our POV. */
+ if (deliverytime < 0 || deliverytime > now) deliverytime = now;
+ } else {
+ /* If no IDLE/TIME option was passed, we want the last delivery
+ * time to be now, so that the idle time of the message will be
+ * zero. */
+ deliverytime = now;
+ }
+
+ /* Do the actual claiming. */
+ streamConsumer *consumer = streamLookupConsumer(group,c->argv[3]->ptr);
+ if (consumer == NULL) {
+ consumer = streamCreateConsumer(group,c->argv[3]->ptr,c->argv[1],c->db->id,SCC_DEFAULT);
+ }
+ consumer->seen_time = commandTimeSnapshot();
+
+ void *arraylenptr = addReplyDeferredLen(c);
+ size_t arraylen = 0;
+ for (int j = 5; j <= last_id_arg; j++) {
+ streamID id = ids[j-5];
+ unsigned char buf[sizeof(streamID)];
+ streamEncodeID(buf,&id);
+
+ /* Lookup the ID in the group PEL. */
+ streamNACK *nack = raxFind(group->pel,buf,sizeof(buf));
+
+ /* Item must exist for us to transfer it to another consumer. */
+ if (!streamEntryExists(o->ptr,&id)) {
+ /* Clear this entry from the PEL, it no longer exists */
+ if (nack != raxNotFound) {
+ /* Propagate this change (we are going to delete the NACK). */
+ streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],c->argv[j],nack);
+ propagate_last_id = 0; /* Will be propagated by XCLAIM itself. */
+ server.dirty++;
+ /* Release the NACK */
+ raxRemove(group->pel,buf,sizeof(buf),NULL);
+ raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
+ streamFreeNACK(nack);
+ }
+ continue;
+ }
+
+ /* If FORCE is passed, let's check if at least the entry
+ * exists in the Stream. In such case, we'll create a new
+ * entry in the PEL from scratch, so that XCLAIM can also
+ * be used to create entries in the PEL. Useful for AOF
+ * and replication of consumer groups. */
+ if (force && nack == raxNotFound) {
+ /* Create the NACK. */
+ nack = streamCreateNACK(NULL);
+ raxInsert(group->pel,buf,sizeof(buf),nack,NULL);
+ }
+
+ if (nack != raxNotFound) {
+ /* We need to check if the minimum idle time requested
+ * by the caller is satisfied by this entry.
+ *
+ * Note that the nack could be created by FORCE, in this
+ * case there was no pre-existing entry and minidle should
+ * be ignored, but in that case nack->consumer is NULL. */
+ if (nack->consumer && minidle) {
+ mstime_t this_idle = now - nack->delivery_time;
+ if (this_idle < minidle) continue;
+ }
+
+ if (nack->consumer != consumer) {
+ /* Remove the entry from the old consumer.
+ * Note that nack->consumer is NULL if we created the
+ * NACK above because of the FORCE option. */
+ if (nack->consumer)
+ raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
+ }
+ nack->delivery_time = deliverytime;
+ /* Set the delivery attempts counter if given, otherwise
+ * autoincrement unless JUSTID option provided */
+ if (retrycount >= 0) {
+ nack->delivery_count = retrycount;
+ } else if (!justid) {
+ nack->delivery_count++;
+ }
+ if (nack->consumer != consumer) {
+ /* Add the entry in the new consumer local PEL. */
+ raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
+ nack->consumer = consumer;
+ }
+ /* Send the reply for this entry. */
+ if (justid) {
+ addReplyStreamID(c,&id);
+ } else {
+ serverAssert(streamReplyWithRange(c,o->ptr,&id,&id,1,0,NULL,NULL,STREAM_RWR_RAWENTRIES,NULL) == 1);
+ }
+ arraylen++;
+
+ consumer->active_time = commandTimeSnapshot();
+
+ /* Propagate this change. */
+ streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],c->argv[j],nack);
+ propagate_last_id = 0; /* Will be propagated by XCLAIM itself. */
+ server.dirty++;
+ }
+ }
+ if (propagate_last_id) {
+ streamPropagateGroupID(c,c->argv[1],group,c->argv[2]);
+ server.dirty++;
+ }
+ setDeferredArrayLen(c,arraylenptr,arraylen);
+ preventCommandPropagation(c);
+cleanup:
+ if (ids != static_ids) zfree(ids);
+}
+
+/* XAUTOCLAIM <key> <group> <consumer> <min-idle-time> <start> [COUNT <count>] [JUSTID]
+ *
+ * Changes ownership of one or multiple messages in the Pending Entries List
+ * of a given stream consumer group.
+ *
+ * For each PEL entry, if its idle time greater or equal to <min-idle-time>,
+ * then the message new owner becomes the specified <consumer>.
+ * If the minimum idle time specified is zero, messages are claimed
+ * regardless of their idle time.
+ *
+ * This command creates the consumer as side effect if it does not yet
+ * exists. Moreover the command reset the idle time of the message to 0.
+ *
+ * The command returns an array of messages that the user
+ * successfully claimed, so that the caller is able to understand
+ * what messages it is now in charge of. */
+void xautoclaimCommand(client *c) {
+ streamCG *group = NULL;
+ robj *o = lookupKeyRead(c->db,c->argv[1]);
+ long long minidle; /* Minimum idle time argument, in milliseconds. */
+ long count = 100; /* Maximum entries to claim. */
+ const unsigned attempts_factor = 10;
+ streamID startid;
+ int startex;
+ int justid = 0;
+
+ /* Parse idle/start/end/count arguments ASAP if needed, in order to report
+ * syntax errors before any other error. */
+ if (getLongLongFromObjectOrReply(c,c->argv[4],&minidle,"Invalid min-idle-time argument for XAUTOCLAIM") != C_OK)
+ return;
+ if (minidle < 0) minidle = 0;
+
+ if (streamParseIntervalIDOrReply(c,c->argv[5],&startid,&startex,0) != C_OK)
+ return;
+ if (startex && streamIncrID(&startid) != C_OK) {
+ addReplyError(c,"invalid start ID for the interval");
+ return;
+ }
+
+ int j = 6; /* options start at argv[6] */
+ while(j < c->argc) {
+ int moreargs = (c->argc-1) - j; /* Number of additional arguments. */
+ char *opt = c->argv[j]->ptr;
+ if (!strcasecmp(opt,"COUNT") && moreargs) {
+ long max_count = LONG_MAX / (max(sizeof(streamID), attempts_factor));
+ if (getRangeLongFromObjectOrReply(c,c->argv[j+1],1,max_count,&count,"COUNT must be > 0") != C_OK)
+ return;
+ j++;
+ } else if (!strcasecmp(opt,"JUSTID")) {
+ justid = 1;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ j++;
+ }
+
+ if (o) {
+ if (checkType(c,o,OBJ_STREAM))
+ return; /* Type error. */
+ group = streamLookupCG(o->ptr,c->argv[2]->ptr);
+ }
+
+ /* No key or group? Send an error given that the group creation
+ * is mandatory. */
+ if (o == NULL || group == NULL) {
+ addReplyErrorFormat(c,"-NOGROUP No such key '%s' or consumer group '%s'",
+ (char*)c->argv[1]->ptr,
+ (char*)c->argv[2]->ptr);
+ return;
+ }
+
+ streamID *deleted_ids = ztrymalloc(count * sizeof(streamID));
+ if (!deleted_ids) {
+ addReplyError(c, "Insufficient memory, failed allocating transient memory, COUNT too high.");
+ return;
+ }
+
+ /* Do the actual claiming. */
+ streamConsumer *consumer = streamLookupConsumer(group,c->argv[3]->ptr);
+ if (consumer == NULL) {
+ consumer = streamCreateConsumer(group,c->argv[3]->ptr,c->argv[1],c->db->id,SCC_DEFAULT);
+ }
+ consumer->seen_time = commandTimeSnapshot();
+
+ long long attempts = count * attempts_factor;
+
+ addReplyArrayLen(c, 3); /* We add another reply later */
+ void *endidptr = addReplyDeferredLen(c); /* reply[0] */
+ void *arraylenptr = addReplyDeferredLen(c); /* reply[1] */
+
+ unsigned char startkey[sizeof(streamID)];
+ streamEncodeID(startkey,&startid);
+ raxIterator ri;
+ raxStart(&ri,group->pel);
+ raxSeek(&ri,">=",startkey,sizeof(startkey));
+ size_t arraylen = 0;
+ mstime_t now = commandTimeSnapshot();
+ int deleted_id_num = 0;
+ while (attempts-- && count && raxNext(&ri)) {
+ streamNACK *nack = ri.data;
+
+ streamID id;
+ streamDecodeID(ri.key, &id);
+
+ /* Item must exist for us to transfer it to another consumer. */
+ if (!streamEntryExists(o->ptr,&id)) {
+ /* Propagate this change (we are going to delete the NACK). */
+ robj *idstr = createObjectFromStreamID(&id);
+ streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],idstr,nack);
+ decrRefCount(idstr);
+ server.dirty++;
+ /* Clear this entry from the PEL, it no longer exists */
+ raxRemove(group->pel,ri.key,ri.key_len,NULL);
+ raxRemove(nack->consumer->pel,ri.key,ri.key_len,NULL);
+ streamFreeNACK(nack);
+ /* Remember the ID for later */
+ deleted_ids[deleted_id_num++] = id;
+ raxSeek(&ri,">=",ri.key,ri.key_len);
+ count--; /* Count is a limit of the command response size. */
+ continue;
+ }
+
+ if (minidle) {
+ mstime_t this_idle = now - nack->delivery_time;
+ if (this_idle < minidle)
+ continue;
+ }
+
+ if (nack->consumer != consumer) {
+ /* Remove the entry from the old consumer.
+ * Note that nack->consumer is NULL if we created the
+ * NACK above because of the FORCE option. */
+ if (nack->consumer)
+ raxRemove(nack->consumer->pel,ri.key,ri.key_len,NULL);
+ }
+
+ /* Update the consumer and idle time. */
+ nack->delivery_time = now;
+ /* Increment the delivery attempts counter unless JUSTID option provided */
+ if (!justid)
+ nack->delivery_count++;
+
+ if (nack->consumer != consumer) {
+ /* Add the entry in the new consumer local PEL. */
+ raxInsert(consumer->pel,ri.key,ri.key_len,nack,NULL);
+ nack->consumer = consumer;
+ }
+
+ /* Send the reply for this entry. */
+ if (justid) {
+ addReplyStreamID(c,&id);
+ } else {
+ serverAssert(streamReplyWithRange(c,o->ptr,&id,&id,1,0,NULL,NULL,STREAM_RWR_RAWENTRIES,NULL) == 1);
+ }
+ arraylen++;
+ count--;
+
+ consumer->active_time = commandTimeSnapshot();
+
+ /* Propagate this change. */
+ robj *idstr = createObjectFromStreamID(&id);
+ streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],idstr,nack);
+ decrRefCount(idstr);
+ server.dirty++;
+ }
+
+ /* We need to return the next entry as a cursor for the next XAUTOCLAIM call */
+ raxNext(&ri);
+
+ streamID endid;
+ if (raxEOF(&ri)) {
+ endid.ms = endid.seq = 0;
+ } else {
+ streamDecodeID(ri.key, &endid);
+ }
+ raxStop(&ri);
+
+ setDeferredArrayLen(c,arraylenptr,arraylen);
+ setDeferredReplyStreamID(c,endidptr,&endid);
+
+ addReplyArrayLen(c, deleted_id_num); /* reply[2] */
+ for (int i = 0; i < deleted_id_num; i++) {
+ addReplyStreamID(c, &deleted_ids[i]);
+ }
+ zfree(deleted_ids);
+
+ preventCommandPropagation(c);
+}
+
+/* XDEL <key> [<ID1> <ID2> ... <IDN>]
+ *
+ * Removes the specified entries from the stream. Returns the number
+ * of items actually deleted, that may be different from the number
+ * of IDs passed in case certain IDs do not exist. */
+void xdelCommand(client *c) {
+ robj *o;
+
+ if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL
+ || checkType(c,o,OBJ_STREAM)) return;
+ stream *s = o->ptr;
+
+ /* We need to sanity check the IDs passed to start. Even if not
+ * a big issue, it is not great that the command is only partially
+ * executed because at some point an invalid ID is parsed. */
+ streamID static_ids[STREAMID_STATIC_VECTOR_LEN];
+ streamID *ids = static_ids;
+ int id_count = c->argc-2;
+ if (id_count > STREAMID_STATIC_VECTOR_LEN)
+ ids = zmalloc(sizeof(streamID)*id_count);
+ for (int j = 2; j < c->argc; j++) {
+ if (streamParseStrictIDOrReply(c,c->argv[j],&ids[j-2],0,NULL) != C_OK) goto cleanup;
+ }
+
+ /* Actually apply the command. */
+ int deleted = 0;
+ int first_entry = 0;
+ for (int j = 2; j < c->argc; j++) {
+ streamID *id = &ids[j-2];
+ if (streamDeleteItem(s,id)) {
+ /* We want to know if the first entry in the stream was deleted
+ * so we can later set the new one. */
+ if (streamCompareID(id,&s->first_id) == 0) {
+ first_entry = 1;
+ }
+ /* Update the stream's maximal tombstone if needed. */
+ if (streamCompareID(id,&s->max_deleted_entry_id) > 0) {
+ s->max_deleted_entry_id = *id;
+ }
+ deleted++;
+ };
+ }
+
+ /* Update the stream's first ID. */
+ if (deleted) {
+ if (s->length == 0) {
+ s->first_id.ms = 0;
+ s->first_id.seq = 0;
+ } else if (first_entry) {
+ streamGetEdgeID(s,1,1,&s->first_id);
+ }
+ }
+
+ /* Propagate the write if needed. */
+ if (deleted) {
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xdel",c->argv[1],c->db->id);
+ server.dirty += deleted;
+ }
+ addReplyLongLong(c,deleted);
+cleanup:
+ if (ids != static_ids) zfree(ids);
+}
+
+/* General form: XTRIM <key> [... options ...]
+ *
+ * List of options:
+ *
+ * Trim strategies:
+ *
+ * MAXLEN [~|=] <count> -- Trim so that the stream will be capped at
+ * the specified length. Use ~ before the
+ * count in order to demand approximated trimming
+ * (like XADD MAXLEN option).
+ * MINID [~|=] <id> -- Trim so that the stream will not contain entries
+ * with IDs smaller than 'id'. Use ~ before the
+ * count in order to demand approximated trimming
+ * (like XADD MINID option).
+ *
+ * Other options:
+ *
+ * LIMIT <entries> -- The maximum number of entries to trim.
+ * 0 means unlimited. Unless specified, it is set
+ * to a default of 100*server.stream_node_max_entries,
+ * and that's in order to keep the trimming time sane.
+ * Has meaning only if `~` was provided.
+ */
+void xtrimCommand(client *c) {
+ robj *o;
+
+ /* Argument parsing. */
+ streamAddTrimArgs parsed_args;
+ if (streamParseAddOrTrimArgsOrReply(c, &parsed_args, 0) < 0)
+ return; /* streamParseAddOrTrimArgsOrReply already replied. */
+
+ /* If the key does not exist, we are ok returning zero, that is, the
+ * number of elements removed from the stream. */
+ if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL
+ || checkType(c,o,OBJ_STREAM)) return;
+ stream *s = o->ptr;
+
+ /* Perform the trimming. */
+ int64_t deleted = streamTrim(s, &parsed_args);
+ if (deleted) {
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xtrim",c->argv[1],c->db->id);
+ if (parsed_args.approx_trim) {
+ /* In case our trimming was limited (by LIMIT or by ~) we must
+ * re-write the relevant trim argument to make sure there will be
+ * no inconsistencies in AOF loading or in the replica.
+ * It's enough to check only args->approx because there is no
+ * way LIMIT is given without the ~ option. */
+ streamRewriteApproxSpecifier(c,parsed_args.trim_strategy_arg_idx-1);
+ streamRewriteTrimArgument(c,s,parsed_args.trim_strategy,parsed_args.trim_strategy_arg_idx);
+ }
+
+ /* Propagate the write. */
+ signalModifiedKey(c, c->db,c->argv[1]);
+ server.dirty += deleted;
+ }
+ addReplyLongLong(c,deleted);
+}
+
+/* Helper function for xinfoCommand.
+ * Handles the variants of XINFO STREAM */
+void xinfoReplyWithStreamInfo(client *c, stream *s) {
+ int full = 1;
+ long long count = 10; /* Default COUNT is 10 so we don't block the server */
+ robj **optv = c->argv + 3; /* Options start after XINFO STREAM <key> */
+ int optc = c->argc - 3;
+
+ /* Parse options. */
+ if (optc == 0) {
+ full = 0;
+ } else {
+ /* Valid options are [FULL] or [FULL COUNT <count>] */
+ if (optc != 1 && optc != 3) {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+
+ /* First option must be "FULL" */
+ if (strcasecmp(optv[0]->ptr,"full")) {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+
+ if (optc == 3) {
+ /* First option must be "FULL" */
+ if (strcasecmp(optv[1]->ptr,"count")) {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+ if (getLongLongFromObjectOrReply(c,optv[2],&count,NULL) == C_ERR)
+ return;
+ if (count < 0) count = 10;
+ }
+ }
+
+ addReplyMapLen(c,full ? 9 : 10);
+ addReplyBulkCString(c,"length");
+ addReplyLongLong(c,s->length);
+ addReplyBulkCString(c,"radix-tree-keys");
+ addReplyLongLong(c,raxSize(s->rax));
+ addReplyBulkCString(c,"radix-tree-nodes");
+ addReplyLongLong(c,s->rax->numnodes);
+ addReplyBulkCString(c,"last-generated-id");
+ addReplyStreamID(c,&s->last_id);
+ addReplyBulkCString(c,"max-deleted-entry-id");
+ addReplyStreamID(c,&s->max_deleted_entry_id);
+ addReplyBulkCString(c,"entries-added");
+ addReplyLongLong(c,s->entries_added);
+ addReplyBulkCString(c,"recorded-first-entry-id");
+ addReplyStreamID(c,&s->first_id);
+
+ if (!full) {
+ /* XINFO STREAM <key> */
+
+ addReplyBulkCString(c,"groups");
+ addReplyLongLong(c,s->cgroups ? raxSize(s->cgroups) : 0);
+
+ /* To emit the first/last entry we use streamReplyWithRange(). */
+ int emitted;
+ streamID start, end;
+ start.ms = start.seq = 0;
+ end.ms = end.seq = UINT64_MAX;
+ addReplyBulkCString(c,"first-entry");
+ emitted = streamReplyWithRange(c,s,&start,&end,1,0,NULL,NULL,
+ STREAM_RWR_RAWENTRIES,NULL);
+ if (!emitted) addReplyNull(c);
+ addReplyBulkCString(c,"last-entry");
+ emitted = streamReplyWithRange(c,s,&start,&end,1,1,NULL,NULL,
+ STREAM_RWR_RAWENTRIES,NULL);
+ if (!emitted) addReplyNull(c);
+ } else {
+ /* XINFO STREAM <key> FULL [COUNT <count>] */
+
+ /* Stream entries */
+ addReplyBulkCString(c,"entries");
+ streamReplyWithRange(c,s,NULL,NULL,count,0,NULL,NULL,0,NULL);
+
+ /* Consumer groups */
+ addReplyBulkCString(c,"groups");
+ if (s->cgroups == NULL) {
+ addReplyArrayLen(c,0);
+ } else {
+ addReplyArrayLen(c,raxSize(s->cgroups));
+ raxIterator ri_cgroups;
+ raxStart(&ri_cgroups,s->cgroups);
+ raxSeek(&ri_cgroups,"^",NULL,0);
+ while(raxNext(&ri_cgroups)) {
+ streamCG *cg = ri_cgroups.data;
+ addReplyMapLen(c,7);
+
+ /* Name */
+ addReplyBulkCString(c,"name");
+ addReplyBulkCBuffer(c,ri_cgroups.key,ri_cgroups.key_len);
+
+ /* Last delivered ID */
+ addReplyBulkCString(c,"last-delivered-id");
+ addReplyStreamID(c,&cg->last_id);
+
+ /* Read counter of the last delivered ID */
+ addReplyBulkCString(c,"entries-read");
+ if (cg->entries_read != SCG_INVALID_ENTRIES_READ) {
+ addReplyLongLong(c,cg->entries_read);
+ } else {
+ addReplyNull(c);
+ }
+
+ /* Group lag */
+ addReplyBulkCString(c,"lag");
+ streamReplyWithCGLag(c,s,cg);
+
+ /* Group PEL count */
+ addReplyBulkCString(c,"pel-count");
+ addReplyLongLong(c,raxSize(cg->pel));
+
+ /* Group PEL */
+ addReplyBulkCString(c,"pending");
+ long long arraylen_cg_pel = 0;
+ void *arrayptr_cg_pel = addReplyDeferredLen(c);
+ raxIterator ri_cg_pel;
+ raxStart(&ri_cg_pel,cg->pel);
+ raxSeek(&ri_cg_pel,"^",NULL,0);
+ while(raxNext(&ri_cg_pel) && (!count || arraylen_cg_pel < count)) {
+ streamNACK *nack = ri_cg_pel.data;
+ addReplyArrayLen(c,4);
+
+ /* Entry ID. */
+ streamID id;
+ streamDecodeID(ri_cg_pel.key,&id);
+ addReplyStreamID(c,&id);
+
+ /* Consumer name. */
+ serverAssert(nack->consumer); /* assertion for valgrind (avoid NPD) */
+ addReplyBulkCBuffer(c,nack->consumer->name,
+ sdslen(nack->consumer->name));
+
+ /* Last delivery. */
+ addReplyLongLong(c,nack->delivery_time);
+
+ /* Number of deliveries. */
+ addReplyLongLong(c,nack->delivery_count);
+
+ arraylen_cg_pel++;
+ }
+ setDeferredArrayLen(c,arrayptr_cg_pel,arraylen_cg_pel);
+ raxStop(&ri_cg_pel);
+
+ /* Consumers */
+ addReplyBulkCString(c,"consumers");
+ addReplyArrayLen(c,raxSize(cg->consumers));
+ raxIterator ri_consumers;
+ raxStart(&ri_consumers,cg->consumers);
+ raxSeek(&ri_consumers,"^",NULL,0);
+ while(raxNext(&ri_consumers)) {
+ streamConsumer *consumer = ri_consumers.data;
+ addReplyMapLen(c,5);
+
+ /* Consumer name */
+ addReplyBulkCString(c,"name");
+ addReplyBulkCBuffer(c,consumer->name,sdslen(consumer->name));
+
+ /* Seen-time */
+ addReplyBulkCString(c,"seen-time");
+ addReplyLongLong(c,consumer->seen_time);
+
+ /* Active-time */
+ addReplyBulkCString(c,"active-time");
+ addReplyLongLong(c,consumer->active_time);
+
+ /* Consumer PEL count */
+ addReplyBulkCString(c,"pel-count");
+ addReplyLongLong(c,raxSize(consumer->pel));
+
+ /* Consumer PEL */
+ addReplyBulkCString(c,"pending");
+ long long arraylen_cpel = 0;
+ void *arrayptr_cpel = addReplyDeferredLen(c);
+ raxIterator ri_cpel;
+ raxStart(&ri_cpel,consumer->pel);
+ raxSeek(&ri_cpel,"^",NULL,0);
+ while(raxNext(&ri_cpel) && (!count || arraylen_cpel < count)) {
+ streamNACK *nack = ri_cpel.data;
+ addReplyArrayLen(c,3);
+
+ /* Entry ID. */
+ streamID id;
+ streamDecodeID(ri_cpel.key,&id);
+ addReplyStreamID(c,&id);
+
+ /* Last delivery. */
+ addReplyLongLong(c,nack->delivery_time);
+
+ /* Number of deliveries. */
+ addReplyLongLong(c,nack->delivery_count);
+
+ arraylen_cpel++;
+ }
+ setDeferredArrayLen(c,arrayptr_cpel,arraylen_cpel);
+ raxStop(&ri_cpel);
+ }
+ raxStop(&ri_consumers);
+ }
+ raxStop(&ri_cgroups);
+ }
+ }
+}
+
+/* XINFO CONSUMERS <key> <group>
+ * XINFO GROUPS <key>
+ * XINFO STREAM <key> [FULL [COUNT <count>]]
+ * XINFO HELP. */
+void xinfoCommand(client *c) {
+ stream *s = NULL;
+ char *opt;
+ robj *key;
+
+ /* HELP is special. Handle it ASAP. */
+ if (!strcasecmp(c->argv[1]->ptr,"HELP")) {
+ const char *help[] = {
+"CONSUMERS <key> <groupname>",
+" Show consumers of <groupname>.",
+"GROUPS <key>",
+" Show the stream consumer groups.",
+"STREAM <key> [FULL [COUNT <count>]",
+" Show information about the stream.",
+NULL
+ };
+ addReplyHelp(c, help);
+ return;
+ }
+
+ /* With the exception of HELP handled before any other sub commands, all
+ * the ones are in the form of "<subcommand> <key>". */
+ opt = c->argv[1]->ptr;
+ key = c->argv[2];
+
+ /* Lookup the key now, this is common for all the subcommands but HELP. */
+ robj *o = lookupKeyReadOrReply(c,key,shared.nokeyerr);
+ if (o == NULL || checkType(c,o,OBJ_STREAM)) return;
+ s = o->ptr;
+
+ /* Dispatch the different subcommands. */
+ if (!strcasecmp(opt,"CONSUMERS") && c->argc == 4) {
+ /* XINFO CONSUMERS <key> <group>. */
+ streamCG *cg = streamLookupCG(s,c->argv[3]->ptr);
+ if (cg == NULL) {
+ addReplyErrorFormat(c, "-NOGROUP No such consumer group '%s' "
+ "for key name '%s'",
+ (char*)c->argv[3]->ptr, (char*)key->ptr);
+ return;
+ }
+
+ addReplyArrayLen(c,raxSize(cg->consumers));
+ raxIterator ri;
+ raxStart(&ri,cg->consumers);
+ raxSeek(&ri,"^",NULL,0);
+ mstime_t now = commandTimeSnapshot();
+ while(raxNext(&ri)) {
+ streamConsumer *consumer = ri.data;
+ mstime_t inactive = consumer->active_time != -1 ? now - consumer->active_time : consumer->active_time;
+ mstime_t idle = now - consumer->seen_time;
+ if (idle < 0) idle = 0;
+
+ addReplyMapLen(c,4);
+ addReplyBulkCString(c,"name");
+ addReplyBulkCBuffer(c,consumer->name,sdslen(consumer->name));
+ addReplyBulkCString(c,"pending");
+ addReplyLongLong(c,raxSize(consumer->pel));
+ addReplyBulkCString(c,"idle");
+ addReplyLongLong(c,idle);
+ addReplyBulkCString(c,"inactive");
+ addReplyLongLong(c,inactive);
+ }
+ raxStop(&ri);
+ } else if (!strcasecmp(opt,"GROUPS") && c->argc == 3) {
+ /* XINFO GROUPS <key>. */
+ if (s->cgroups == NULL) {
+ addReplyArrayLen(c,0);
+ return;
+ }
+
+ addReplyArrayLen(c,raxSize(s->cgroups));
+ raxIterator ri;
+ raxStart(&ri,s->cgroups);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamCG *cg = ri.data;
+ addReplyMapLen(c,6);
+ addReplyBulkCString(c,"name");
+ addReplyBulkCBuffer(c,ri.key,ri.key_len);
+ addReplyBulkCString(c,"consumers");
+ addReplyLongLong(c,raxSize(cg->consumers));
+ addReplyBulkCString(c,"pending");
+ addReplyLongLong(c,raxSize(cg->pel));
+ addReplyBulkCString(c,"last-delivered-id");
+ addReplyStreamID(c,&cg->last_id);
+ addReplyBulkCString(c,"entries-read");
+ if (cg->entries_read != SCG_INVALID_ENTRIES_READ) {
+ addReplyLongLong(c,cg->entries_read);
+ } else {
+ addReplyNull(c);
+ }
+ addReplyBulkCString(c,"lag");
+ streamReplyWithCGLag(c,s,cg);
+ }
+ raxStop(&ri);
+ } else if (!strcasecmp(opt,"STREAM")) {
+ /* XINFO STREAM <key> [FULL [COUNT <count>]]. */
+ xinfoReplyWithStreamInfo(c,s);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
+
+/* Validate the integrity stream listpack entries structure. Both in term of a
+ * valid listpack, but also that the structure of the entries matches a valid
+ * stream. return 1 if valid 0 if not valid. */
+int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep) {
+ int valid_record;
+ unsigned char *p, *next;
+
+ /* Since we don't want to run validation of all records twice, we'll
+ * run the listpack validation of just the header and do the rest here. */
+ if (!lpValidateIntegrity(lp, size, 0, NULL, NULL))
+ return 0;
+
+ /* In non-deep mode we just validated the listpack header (encoded size) */
+ if (!deep) return 1;
+
+ next = p = lpValidateFirst(lp);
+ if (!lpValidateNext(lp, &next, size)) return 0;
+ if (!p) return 0;
+
+ /* entry count */
+ int64_t entry_count = lpGetIntegerIfValid(p, &valid_record);
+ if (!valid_record) return 0;
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+
+ /* deleted */
+ int64_t deleted_count = lpGetIntegerIfValid(p, &valid_record);
+ if (!valid_record) return 0;
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+
+ /* num-of-fields */
+ int64_t master_fields = lpGetIntegerIfValid(p, &valid_record);
+ if (!valid_record) return 0;
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+
+ /* the field names */
+ for (int64_t j = 0; j < master_fields; j++) {
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+ }
+
+ /* the zero master entry terminator. */
+ int64_t zero = lpGetIntegerIfValid(p, &valid_record);
+ if (!valid_record || zero != 0) return 0;
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+
+ entry_count += deleted_count;
+ while (entry_count--) {
+ if (!p) return 0;
+ int64_t fields = master_fields, extra_fields = 3;
+ int64_t flags = lpGetIntegerIfValid(p, &valid_record);
+ if (!valid_record) return 0;
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+
+ /* entry id */
+ lpGetIntegerIfValid(p, &valid_record);
+ if (!valid_record) return 0;
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+ lpGetIntegerIfValid(p, &valid_record);
+ if (!valid_record) return 0;
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+
+ if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) {
+ /* num-of-fields */
+ fields = lpGetIntegerIfValid(p, &valid_record);
+ if (!valid_record) return 0;
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+
+ /* the field names */
+ for (int64_t j = 0; j < fields; j++) {
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+ }
+
+ extra_fields += fields + 1;
+ }
+
+ /* the values */
+ for (int64_t j = 0; j < fields; j++) {
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+ }
+
+ /* lp-count */
+ int64_t lp_count = lpGetIntegerIfValid(p, &valid_record);
+ if (!valid_record) return 0;
+ if (lp_count != fields + extra_fields) return 0;
+ p = next; if (!lpValidateNext(lp, &next, size)) return 0;
+ }
+
+ if (next)
+ return 0;
+
+ return 1;
+}
diff --git a/src/t_string.c b/src/t_string.c
new file mode 100644
index 0000000..2bce3ac
--- /dev/null
+++ b/src/t_string.c
@@ -0,0 +1,951 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include <math.h> /* isnan(), isinf() */
+
+/* Forward declarations */
+int getGenericCommand(client *c);
+
+/*-----------------------------------------------------------------------------
+ * String Commands
+ *----------------------------------------------------------------------------*/
+
+static int checkStringLength(client *c, long long size, long long append) {
+ if (mustObeyClient(c))
+ return C_OK;
+ /* 'uint64_t' cast is there just to prevent undefined behavior on overflow */
+ long long total = (uint64_t)size + append;
+ /* Test configured max-bulk-len represending a limit of the biggest string object,
+ * and also test for overflow. */
+ if (total > server.proto_max_bulk_len || total < size || total < append) {
+ addReplyError(c,"string exceeds maximum allowed size (proto-max-bulk-len)");
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+/* The setGenericCommand() function implements the SET operation with different
+ * options and variants. This function is called in order to implement the
+ * following commands: SET, SETEX, PSETEX, SETNX, GETSET.
+ *
+ * 'flags' changes the behavior of the command (NX, XX or GET, see below).
+ *
+ * 'expire' represents an expire to set in form of a Redis object as passed
+ * by the user. It is interpreted according to the specified 'unit'.
+ *
+ * 'ok_reply' and 'abort_reply' is what the function will reply to the client
+ * if the operation is performed, or when it is not because of NX or
+ * XX flags.
+ *
+ * If ok_reply is NULL "+OK" is used.
+ * If abort_reply is NULL, "$-1" is used. */
+
+#define OBJ_NO_FLAGS 0
+#define OBJ_SET_NX (1<<0) /* Set if key not exists. */
+#define OBJ_SET_XX (1<<1) /* Set if key exists. */
+#define OBJ_EX (1<<2) /* Set if time in seconds is given */
+#define OBJ_PX (1<<3) /* Set if time in ms in given */
+#define OBJ_KEEPTTL (1<<4) /* Set and keep the ttl */
+#define OBJ_SET_GET (1<<5) /* Set if want to get key before set */
+#define OBJ_EXAT (1<<6) /* Set if timestamp in second is given */
+#define OBJ_PXAT (1<<7) /* Set if timestamp in ms is given */
+#define OBJ_PERSIST (1<<8) /* Set if we need to remove the ttl */
+
+/* Forward declaration */
+static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds);
+
+void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
+ long long milliseconds = 0; /* initialized to avoid any harmness warning */
+ int found = 0;
+ int setkey_flags = 0;
+
+ if (expire && getExpireMillisecondsOrReply(c, expire, flags, unit, &milliseconds) != C_OK) {
+ return;
+ }
+
+ if (flags & OBJ_SET_GET) {
+ if (getGenericCommand(c) == C_ERR) return;
+ }
+
+ found = (lookupKeyWrite(c->db,key) != NULL);
+
+ if ((flags & OBJ_SET_NX && found) ||
+ (flags & OBJ_SET_XX && !found))
+ {
+ if (!(flags & OBJ_SET_GET)) {
+ addReply(c, abort_reply ? abort_reply : shared.null[c->resp]);
+ }
+ return;
+ }
+
+ /* When expire is not NULL, we avoid deleting the TTL so it can be updated later instead of being deleted and then created again. */
+ setkey_flags |= ((flags & OBJ_KEEPTTL) || expire) ? SETKEY_KEEPTTL : 0;
+ setkey_flags |= found ? SETKEY_ALREADY_EXIST : SETKEY_DOESNT_EXIST;
+
+ setKey(c,c->db,key,val,setkey_flags);
+ server.dirty++;
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",key,c->db->id);
+
+ if (expire) {
+ setExpire(c,c->db,key,milliseconds);
+ /* Propagate as SET Key Value PXAT millisecond-timestamp if there is
+ * EX/PX/EXAT flag. */
+ if (!(flags & OBJ_PXAT)) {
+ robj *milliseconds_obj = createStringObjectFromLongLong(milliseconds);
+ rewriteClientCommandVector(c, 5, shared.set, key, val, shared.pxat, milliseconds_obj);
+ decrRefCount(milliseconds_obj);
+ }
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id);
+ }
+
+ if (!(flags & OBJ_SET_GET)) {
+ addReply(c, ok_reply ? ok_reply : shared.ok);
+ }
+
+ /* Propagate without the GET argument (Isn't needed if we had expire since in that case we completely re-written the command argv) */
+ if ((flags & OBJ_SET_GET) && !expire) {
+ int argc = 0;
+ int j;
+ robj **argv = zmalloc((c->argc-1)*sizeof(robj*));
+ for (j=0; j < c->argc; j++) {
+ char *a = c->argv[j]->ptr;
+ /* Skip GET which may be repeated multiple times. */
+ if (j >= 3 &&
+ (a[0] == 'g' || a[0] == 'G') &&
+ (a[1] == 'e' || a[1] == 'E') &&
+ (a[2] == 't' || a[2] == 'T') && a[3] == '\0')
+ continue;
+ argv[argc++] = c->argv[j];
+ incrRefCount(c->argv[j]);
+ }
+ replaceClientCommandVector(c, argc, argv);
+ }
+}
+
+/*
+ * Extract the `expire` argument of a given GET/SET command as an absolute timestamp in milliseconds.
+ *
+ * "client" is the client that sent the `expire` argument.
+ * "expire" is the `expire` argument to be extracted.
+ * "flags" represents the behavior of the command (e.g. PX or EX).
+ * "unit" is the original unit of the given `expire` argument (e.g. UNIT_SECONDS).
+ * "milliseconds" is output argument.
+ *
+ * If return C_OK, "milliseconds" output argument will be set to the resulting absolute timestamp.
+ * If return C_ERR, an error reply has been added to the given client.
+ */
+static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds) {
+ int ret = getLongLongFromObjectOrReply(c, expire, milliseconds, NULL);
+ if (ret != C_OK) {
+ return ret;
+ }
+
+ if (*milliseconds <= 0 || (unit == UNIT_SECONDS && *milliseconds > LLONG_MAX / 1000)) {
+ /* Negative value provided or multiplication is gonna overflow. */
+ addReplyErrorExpireTime(c);
+ return C_ERR;
+ }
+
+ if (unit == UNIT_SECONDS) *milliseconds *= 1000;
+
+ if ((flags & OBJ_PX) || (flags & OBJ_EX)) {
+ *milliseconds += commandTimeSnapshot();
+ }
+
+ if (*milliseconds <= 0) {
+ /* Overflow detected. */
+ addReplyErrorExpireTime(c);
+ return C_ERR;
+ }
+
+ return C_OK;
+}
+
+#define COMMAND_GET 0
+#define COMMAND_SET 1
+/*
+ * The parseExtendedStringArgumentsOrReply() function performs the common validation for extended
+ * string arguments used in SET and GET command.
+ *
+ * Get specific commands - PERSIST/DEL
+ * Set specific commands - XX/NX/GET
+ * Common commands - EX/EXAT/PX/PXAT/KEEPTTL
+ *
+ * Function takes pointers to client, flags, unit, pointer to pointer of expire obj if needed
+ * to be determined and command_type which can be COMMAND_GET or COMMAND_SET.
+ *
+ * If there are any syntax violations C_ERR is returned else C_OK is returned.
+ *
+ * Input flags are updated upon parsing the arguments. Unit and expire are updated if there are any
+ * EX/EXAT/PX/PXAT arguments. Unit is updated to millisecond if PX/PXAT is set.
+ */
+int parseExtendedStringArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, int command_type) {
+
+ int j = command_type == COMMAND_GET ? 2 : 3;
+ for (; j < c->argc; j++) {
+ char *opt = c->argv[j]->ptr;
+ robj *next = (j == c->argc-1) ? NULL : c->argv[j+1];
+
+ if ((opt[0] == 'n' || opt[0] == 'N') &&
+ (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
+ !(*flags & OBJ_SET_XX) && (command_type == COMMAND_SET))
+ {
+ *flags |= OBJ_SET_NX;
+ } else if ((opt[0] == 'x' || opt[0] == 'X') &&
+ (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
+ !(*flags & OBJ_SET_NX) && (command_type == COMMAND_SET))
+ {
+ *flags |= OBJ_SET_XX;
+ } else if ((opt[0] == 'g' || opt[0] == 'G') &&
+ (opt[1] == 'e' || opt[1] == 'E') &&
+ (opt[2] == 't' || opt[2] == 'T') && opt[3] == '\0' &&
+ (command_type == COMMAND_SET))
+ {
+ *flags |= OBJ_SET_GET;
+ } else if (!strcasecmp(opt, "KEEPTTL") && !(*flags & OBJ_PERSIST) &&
+ !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
+ !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && (command_type == COMMAND_SET))
+ {
+ *flags |= OBJ_KEEPTTL;
+ } else if (!strcasecmp(opt,"PERSIST") && (command_type == COMMAND_GET) &&
+ !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
+ !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) &&
+ !(*flags & OBJ_KEEPTTL))
+ {
+ *flags |= OBJ_PERSIST;
+ } else if ((opt[0] == 'e' || opt[0] == 'E') &&
+ (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
+ !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
+ !(*flags & OBJ_EXAT) && !(*flags & OBJ_PX) &&
+ !(*flags & OBJ_PXAT) && next)
+ {
+ *flags |= OBJ_EX;
+ *expire = next;
+ j++;
+ } else if ((opt[0] == 'p' || opt[0] == 'P') &&
+ (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
+ !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
+ !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
+ !(*flags & OBJ_PXAT) && next)
+ {
+ *flags |= OBJ_PX;
+ *unit = UNIT_MILLISECONDS;
+ *expire = next;
+ j++;
+ } else if ((opt[0] == 'e' || opt[0] == 'E') &&
+ (opt[1] == 'x' || opt[1] == 'X') &&
+ (opt[2] == 'a' || opt[2] == 'A') &&
+ (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' &&
+ !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
+ !(*flags & OBJ_EX) && !(*flags & OBJ_PX) &&
+ !(*flags & OBJ_PXAT) && next)
+ {
+ *flags |= OBJ_EXAT;
+ *expire = next;
+ j++;
+ } else if ((opt[0] == 'p' || opt[0] == 'P') &&
+ (opt[1] == 'x' || opt[1] == 'X') &&
+ (opt[2] == 'a' || opt[2] == 'A') &&
+ (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' &&
+ !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
+ !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
+ !(*flags & OBJ_PX) && next)
+ {
+ *flags |= OBJ_PXAT;
+ *unit = UNIT_MILLISECONDS;
+ *expire = next;
+ j++;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
+/* SET key value [NX] [XX] [KEEPTTL] [GET] [EX <seconds>] [PX <milliseconds>]
+ * [EXAT <seconds-timestamp>][PXAT <milliseconds-timestamp>] */
+void setCommand(client *c) {
+ robj *expire = NULL;
+ int unit = UNIT_SECONDS;
+ int flags = OBJ_NO_FLAGS;
+
+ if (parseExtendedStringArgumentsOrReply(c,&flags,&unit,&expire,COMMAND_SET) != C_OK) {
+ return;
+ }
+
+ c->argv[2] = tryObjectEncoding(c->argv[2]);
+ setGenericCommand(c,flags,c->argv[1],c->argv[2],expire,unit,NULL,NULL);
+}
+
+void setnxCommand(client *c) {
+ c->argv[2] = tryObjectEncoding(c->argv[2]);
+ setGenericCommand(c,OBJ_SET_NX,c->argv[1],c->argv[2],NULL,0,shared.cone,shared.czero);
+}
+
+void setexCommand(client *c) {
+ c->argv[3] = tryObjectEncoding(c->argv[3]);
+ setGenericCommand(c,OBJ_EX,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL);
+}
+
+void psetexCommand(client *c) {
+ c->argv[3] = tryObjectEncoding(c->argv[3]);
+ setGenericCommand(c,OBJ_PX,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL);
+}
+
+int getGenericCommand(client *c) {
+ robj *o;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL)
+ return C_OK;
+
+ if (checkType(c,o,OBJ_STRING)) {
+ return C_ERR;
+ }
+
+ addReplyBulk(c,o);
+ return C_OK;
+}
+
+void getCommand(client *c) {
+ getGenericCommand(c);
+}
+
+/*
+ * GETEX <key> [PERSIST][EX seconds][PX milliseconds][EXAT seconds-timestamp][PXAT milliseconds-timestamp]
+ *
+ * The getexCommand() function implements extended options and variants of the GET command. Unlike GET
+ * command this command is not read-only.
+ *
+ * The default behavior when no options are specified is same as GET and does not alter any TTL.
+ *
+ * Only one of the below options can be used at a given time.
+ *
+ * 1. PERSIST removes any TTL associated with the key.
+ * 2. EX Set expiry TTL in seconds.
+ * 3. PX Set expiry TTL in milliseconds.
+ * 4. EXAT Same like EX instead of specifying the number of seconds representing the TTL
+ * (time to live), it takes an absolute Unix timestamp
+ * 5. PXAT Same like PX instead of specifying the number of milliseconds representing the TTL
+ * (time to live), it takes an absolute Unix timestamp
+ *
+ * Command would either return the bulk string, error or nil.
+ */
+void getexCommand(client *c) {
+ robj *expire = NULL;
+ int unit = UNIT_SECONDS;
+ int flags = OBJ_NO_FLAGS;
+
+ if (parseExtendedStringArgumentsOrReply(c,&flags,&unit,&expire,COMMAND_GET) != C_OK) {
+ return;
+ }
+
+ robj *o;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL)
+ return;
+
+ if (checkType(c,o,OBJ_STRING)) {
+ return;
+ }
+
+ /* Validate the expiration time value first */
+ long long milliseconds = 0;
+ if (expire && getExpireMillisecondsOrReply(c, expire, flags, unit, &milliseconds) != C_OK) {
+ return;
+ }
+
+ /* We need to do this before we expire the key or delete it */
+ addReplyBulk(c,o);
+
+ /* This command is never propagated as is. It is either propagated as PEXPIRE[AT],DEL,UNLINK or PERSIST.
+ * This why it doesn't need special handling in feedAppendOnlyFile to convert relative expire time to absolute one. */
+ if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(milliseconds)) {
+ /* When PXAT/EXAT absolute timestamp is specified, there can be a chance that timestamp
+ * has already elapsed so delete the key in that case. */
+ int deleted = dbGenericDelete(c->db, c->argv[1], server.lazyfree_lazy_expire, DB_FLAG_KEY_EXPIRED);
+ serverAssert(deleted);
+ robj *aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
+ rewriteClientCommandVector(c,2,aux,c->argv[1]);
+ signalModifiedKey(c, c->db, c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id);
+ server.dirty++;
+ } else if (expire) {
+ setExpire(c,c->db,c->argv[1],milliseconds);
+ /* Propagate as PXEXPIREAT millisecond-timestamp if there is
+ * EX/PX/EXAT/PXAT flag and the key has not expired. */
+ robj *milliseconds_obj = createStringObjectFromLongLong(milliseconds);
+ rewriteClientCommandVector(c,3,shared.pexpireat,c->argv[1],milliseconds_obj);
+ decrRefCount(milliseconds_obj);
+ signalModifiedKey(c, c->db, c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",c->argv[1],c->db->id);
+ server.dirty++;
+ } else if (flags & OBJ_PERSIST) {
+ if (removeExpire(c->db, c->argv[1])) {
+ signalModifiedKey(c, c->db, c->argv[1]);
+ rewriteClientCommandVector(c, 2, shared.persist, c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"persist",c->argv[1],c->db->id);
+ server.dirty++;
+ }
+ }
+}
+
+void getdelCommand(client *c) {
+ if (getGenericCommand(c) == C_ERR) return;
+ if (dbSyncDelete(c->db, c->argv[1])) {
+ /* Propagate as DEL command */
+ rewriteClientCommandVector(c,2,shared.del,c->argv[1]);
+ signalModifiedKey(c, c->db, c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id);
+ server.dirty++;
+ }
+}
+
+void getsetCommand(client *c) {
+ if (getGenericCommand(c) == C_ERR) return;
+ c->argv[2] = tryObjectEncoding(c->argv[2]);
+ setKey(c,c->db,c->argv[1],c->argv[2],0);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[1],c->db->id);
+ server.dirty++;
+
+ /* Propagate as SET command */
+ rewriteClientCommandArgument(c,0,shared.set);
+}
+
+void setrangeCommand(client *c) {
+ robj *o;
+ long offset;
+ sds value = c->argv[3]->ptr;
+
+ if (getLongFromObjectOrReply(c,c->argv[2],&offset,NULL) != C_OK)
+ return;
+
+ if (offset < 0) {
+ addReplyError(c,"offset is out of range");
+ return;
+ }
+
+ o = lookupKeyWrite(c->db,c->argv[1]);
+ if (o == NULL) {
+ /* Return 0 when setting nothing on a non-existing string */
+ if (sdslen(value) == 0) {
+ addReply(c,shared.czero);
+ return;
+ }
+
+ /* Return when the resulting string exceeds allowed size */
+ if (checkStringLength(c,offset,sdslen(value)) != C_OK)
+ return;
+
+ o = createObject(OBJ_STRING,sdsnewlen(NULL, offset+sdslen(value)));
+ dbAdd(c->db,c->argv[1],o);
+ } else {
+ size_t olen;
+
+ /* Key exists, check type */
+ if (checkType(c,o,OBJ_STRING))
+ return;
+
+ /* Return existing string length when setting nothing */
+ olen = stringObjectLen(o);
+ if (sdslen(value) == 0) {
+ addReplyLongLong(c,olen);
+ return;
+ }
+
+ /* Return when the resulting string exceeds allowed size */
+ if (checkStringLength(c,offset,sdslen(value)) != C_OK)
+ return;
+
+ /* Create a copy when the object is shared or encoded. */
+ o = dbUnshareStringValue(c->db,c->argv[1],o);
+ }
+
+ if (sdslen(value) > 0) {
+ o->ptr = sdsgrowzero(o->ptr,offset+sdslen(value));
+ memcpy((char*)o->ptr+offset,value,sdslen(value));
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STRING,
+ "setrange",c->argv[1],c->db->id);
+ server.dirty++;
+ }
+ addReplyLongLong(c,sdslen(o->ptr));
+}
+
+void getrangeCommand(client *c) {
+ robj *o;
+ long long start, end;
+ char *str, llbuf[32];
+ size_t strlen;
+
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK)
+ return;
+ if (getLongLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK)
+ return;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptybulk)) == NULL ||
+ checkType(c,o,OBJ_STRING)) return;
+
+ if (o->encoding == OBJ_ENCODING_INT) {
+ str = llbuf;
+ strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr);
+ } else {
+ str = o->ptr;
+ strlen = sdslen(str);
+ }
+
+ /* Convert negative indexes */
+ if (start < 0 && end < 0 && start > end) {
+ addReply(c,shared.emptybulk);
+ return;
+ }
+ if (start < 0) start = strlen+start;
+ if (end < 0) end = strlen+end;
+ if (start < 0) start = 0;
+ if (end < 0) end = 0;
+ if ((unsigned long long)end >= strlen) end = strlen-1;
+
+ /* Precondition: end >= 0 && end < strlen, so the only condition where
+ * nothing can be returned is: start > end. */
+ if (start > end || strlen == 0) {
+ addReply(c,shared.emptybulk);
+ } else {
+ addReplyBulkCBuffer(c,(char*)str+start,end-start+1);
+ }
+}
+
+void mgetCommand(client *c) {
+ int j;
+
+ addReplyArrayLen(c,c->argc-1);
+ for (j = 1; j < c->argc; j++) {
+ robj *o = lookupKeyRead(c->db,c->argv[j]);
+ if (o == NULL) {
+ addReplyNull(c);
+ } else {
+ if (o->type != OBJ_STRING) {
+ addReplyNull(c);
+ } else {
+ addReplyBulk(c,o);
+ }
+ }
+ }
+}
+
+void msetGenericCommand(client *c, int nx) {
+ int j;
+
+ if ((c->argc % 2) == 0) {
+ addReplyErrorArity(c);
+ return;
+ }
+
+ /* Handle the NX flag. The MSETNX semantic is to return zero and don't
+ * set anything if at least one key already exists. */
+ if (nx) {
+ for (j = 1; j < c->argc; j += 2) {
+ if (lookupKeyWrite(c->db,c->argv[j]) != NULL) {
+ addReply(c, shared.czero);
+ return;
+ }
+ }
+ }
+
+ int setkey_flags = nx ? SETKEY_DOESNT_EXIST : 0;
+ for (j = 1; j < c->argc; j += 2) {
+ c->argv[j+1] = tryObjectEncoding(c->argv[j+1]);
+ setKey(c, c->db, c->argv[j], c->argv[j + 1], setkey_flags);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[j],c->db->id);
+ /* In MSETNX, It could be that we're overriding the same key, we can't be sure it doesn't exist. */
+ if (nx)
+ setkey_flags = SETKEY_ADD_OR_UPDATE;
+ }
+ server.dirty += (c->argc-1)/2;
+ addReply(c, nx ? shared.cone : shared.ok);
+}
+
+void msetCommand(client *c) {
+ msetGenericCommand(c,0);
+}
+
+void msetnxCommand(client *c) {
+ msetGenericCommand(c,1);
+}
+
+void incrDecrCommand(client *c, long long incr) {
+ long long value, oldvalue;
+ robj *o, *new;
+
+ o = lookupKeyWrite(c->db,c->argv[1]);
+ if (checkType(c,o,OBJ_STRING)) return;
+ if (getLongLongFromObjectOrReply(c,o,&value,NULL) != C_OK) return;
+
+ oldvalue = value;
+ if ((incr < 0 && oldvalue < 0 && incr < (LLONG_MIN-oldvalue)) ||
+ (incr > 0 && oldvalue > 0 && incr > (LLONG_MAX-oldvalue))) {
+ addReplyError(c,"increment or decrement would overflow");
+ return;
+ }
+ value += incr;
+
+ if (o && o->refcount == 1 && o->encoding == OBJ_ENCODING_INT &&
+ (value < 0 || value >= OBJ_SHARED_INTEGERS) &&
+ value >= LONG_MIN && value <= LONG_MAX)
+ {
+ new = o;
+ o->ptr = (void*)((long)value);
+ } else {
+ new = createStringObjectFromLongLongForValue(value);
+ if (o) {
+ dbReplaceValue(c->db,c->argv[1],new);
+ } else {
+ dbAdd(c->db,c->argv[1],new);
+ }
+ }
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STRING,"incrby",c->argv[1],c->db->id);
+ server.dirty++;
+ addReplyLongLong(c, value);
+}
+
+void incrCommand(client *c) {
+ incrDecrCommand(c,1);
+}
+
+void decrCommand(client *c) {
+ incrDecrCommand(c,-1);
+}
+
+void incrbyCommand(client *c) {
+ long long incr;
+
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != C_OK) return;
+ incrDecrCommand(c,incr);
+}
+
+void decrbyCommand(client *c) {
+ long long incr;
+
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != C_OK) return;
+ /* Overflow check: negating LLONG_MIN will cause an overflow */
+ if (incr == LLONG_MIN) {
+ addReplyError(c, "decrement would overflow");
+ return;
+ }
+ incrDecrCommand(c,-incr);
+}
+
+void incrbyfloatCommand(client *c) {
+ long double incr, value;
+ robj *o, *new;
+
+ o = lookupKeyWrite(c->db,c->argv[1]);
+ if (checkType(c,o,OBJ_STRING)) return;
+ if (getLongDoubleFromObjectOrReply(c,o,&value,NULL) != C_OK ||
+ getLongDoubleFromObjectOrReply(c,c->argv[2],&incr,NULL) != C_OK)
+ return;
+
+ value += incr;
+ if (isnan(value) || isinf(value)) {
+ addReplyError(c,"increment would produce NaN or Infinity");
+ return;
+ }
+ new = createStringObjectFromLongDouble(value,1);
+ if (o)
+ dbReplaceValue(c->db,c->argv[1],new);
+ else
+ dbAdd(c->db,c->argv[1],new);
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STRING,"incrbyfloat",c->argv[1],c->db->id);
+ server.dirty++;
+ addReplyBulk(c,new);
+
+ /* Always replicate INCRBYFLOAT as a SET command with the final value
+ * in order to make sure that differences in float precision or formatting
+ * will not create differences in replicas or after an AOF restart. */
+ rewriteClientCommandArgument(c,0,shared.set);
+ rewriteClientCommandArgument(c,2,new);
+ rewriteClientCommandArgument(c,3,shared.keepttl);
+}
+
+void appendCommand(client *c) {
+ size_t totlen;
+ robj *o, *append;
+
+ o = lookupKeyWrite(c->db,c->argv[1]);
+ if (o == NULL) {
+ /* Create the key */
+ c->argv[2] = tryObjectEncoding(c->argv[2]);
+ dbAdd(c->db,c->argv[1],c->argv[2]);
+ incrRefCount(c->argv[2]);
+ totlen = stringObjectLen(c->argv[2]);
+ } else {
+ /* Key exists, check type */
+ if (checkType(c,o,OBJ_STRING))
+ return;
+
+ /* "append" is an argument, so always an sds */
+ append = c->argv[2];
+ if (checkStringLength(c,stringObjectLen(o),sdslen(append->ptr)) != C_OK)
+ return;
+
+ /* Append the value */
+ o = dbUnshareStringValue(c->db,c->argv[1],o);
+ o->ptr = sdscatlen(o->ptr,append->ptr,sdslen(append->ptr));
+ totlen = sdslen(o->ptr);
+ }
+ signalModifiedKey(c,c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STRING,"append",c->argv[1],c->db->id);
+ server.dirty++;
+ addReplyLongLong(c,totlen);
+}
+
+void strlenCommand(client *c) {
+ robj *o;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,OBJ_STRING)) return;
+ addReplyLongLong(c,stringObjectLen(o));
+}
+
+/* LCS key1 key2 [LEN] [IDX] [MINMATCHLEN <len>] [WITHMATCHLEN] */
+void lcsCommand(client *c) {
+ uint32_t i, j;
+ long long minmatchlen = 0;
+ sds a = NULL, b = NULL;
+ int getlen = 0, getidx = 0, withmatchlen = 0;
+ robj *obja = NULL, *objb = NULL;
+
+ obja = lookupKeyRead(c->db,c->argv[1]);
+ objb = lookupKeyRead(c->db,c->argv[2]);
+ if ((obja && obja->type != OBJ_STRING) ||
+ (objb && objb->type != OBJ_STRING))
+ {
+ addReplyError(c,
+ "The specified keys must contain string values");
+ /* Don't cleanup the objects, we need to do that
+ * only after calling getDecodedObject(). */
+ obja = NULL;
+ objb = NULL;
+ goto cleanup;
+ }
+ obja = obja ? getDecodedObject(obja) : createStringObject("",0);
+ objb = objb ? getDecodedObject(objb) : createStringObject("",0);
+ a = obja->ptr;
+ b = objb->ptr;
+
+ for (j = 3; j < (uint32_t)c->argc; j++) {
+ char *opt = c->argv[j]->ptr;
+ int moreargs = (c->argc-1) - j;
+
+ if (!strcasecmp(opt,"IDX")) {
+ getidx = 1;
+ } else if (!strcasecmp(opt,"LEN")) {
+ getlen = 1;
+ } else if (!strcasecmp(opt,"WITHMATCHLEN")) {
+ withmatchlen = 1;
+ } else if (!strcasecmp(opt,"MINMATCHLEN") && moreargs) {
+ if (getLongLongFromObjectOrReply(c,c->argv[j+1],&minmatchlen,NULL)
+ != C_OK) goto cleanup;
+ if (minmatchlen < 0) minmatchlen = 0;
+ j++;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ goto cleanup;
+ }
+ }
+
+ /* Complain if the user passed ambiguous parameters. */
+ if (getlen && getidx) {
+ addReplyError(c,
+ "If you want both the length and indexes, please just use IDX.");
+ goto cleanup;
+ }
+
+ /* Detect string truncation or later overflows. */
+ if (sdslen(a) >= UINT32_MAX-1 || sdslen(b) >= UINT32_MAX-1) {
+ addReplyError(c, "String too long for LCS");
+ goto cleanup;
+ }
+
+ /* Compute the LCS using the vanilla dynamic programming technique of
+ * building a table of LCS(x,y) substrings. */
+ uint32_t alen = sdslen(a);
+ uint32_t blen = sdslen(b);
+
+ /* Setup an uint32_t array to store at LCS[i,j] the length of the
+ * LCS A0..i-1, B0..j-1. Note that we have a linear array here, so
+ * we index it as LCS[j+(blen+1)*i] */
+ #define LCS(A,B) lcs[(B)+((A)*(blen+1))]
+
+ /* Try to allocate the LCS table, and abort on overflow or insufficient memory. */
+ unsigned long long lcssize = (unsigned long long)(alen+1)*(blen+1); /* Can't overflow due to the size limits above. */
+ unsigned long long lcsalloc = lcssize * sizeof(uint32_t);
+ uint32_t *lcs = NULL;
+ if (lcsalloc < SIZE_MAX && lcsalloc / lcssize == sizeof(uint32_t)) {
+ if (lcsalloc > (size_t)server.proto_max_bulk_len) {
+ addReplyError(c, "Insufficient memory, transient memory for LCS exceeds proto-max-bulk-len");
+ goto cleanup;
+ }
+ lcs = ztrymalloc(lcsalloc);
+ }
+ if (!lcs) {
+ addReplyError(c, "Insufficient memory, failed allocating transient memory for LCS");
+ goto cleanup;
+ }
+
+ /* Start building the LCS table. */
+ for (uint32_t i = 0; i <= alen; i++) {
+ for (uint32_t j = 0; j <= blen; j++) {
+ if (i == 0 || j == 0) {
+ /* If one substring has length of zero, the
+ * LCS length is zero. */
+ LCS(i,j) = 0;
+ } else if (a[i-1] == b[j-1]) {
+ /* The len LCS (and the LCS itself) of two
+ * sequences with the same final character, is the
+ * LCS of the two sequences without the last char
+ * plus that last char. */
+ LCS(i,j) = LCS(i-1,j-1)+1;
+ } else {
+ /* If the last character is different, take the longest
+ * between the LCS of the first string and the second
+ * minus the last char, and the reverse. */
+ uint32_t lcs1 = LCS(i-1,j);
+ uint32_t lcs2 = LCS(i,j-1);
+ LCS(i,j) = lcs1 > lcs2 ? lcs1 : lcs2;
+ }
+ }
+ }
+
+ /* Store the actual LCS string in "result" if needed. We create
+ * it backward, but the length is already known, we store it into idx. */
+ uint32_t idx = LCS(alen,blen);
+ sds result = NULL; /* Resulting LCS string. */
+ void *arraylenptr = NULL; /* Deferred length of the array for IDX. */
+ uint32_t arange_start = alen, /* alen signals that values are not set. */
+ arange_end = 0,
+ brange_start = 0,
+ brange_end = 0;
+
+ /* Do we need to compute the actual LCS string? Allocate it in that case. */
+ int computelcs = getidx || !getlen;
+ if (computelcs) result = sdsnewlen(SDS_NOINIT,idx);
+
+ /* Start with a deferred array if we have to emit the ranges. */
+ uint32_t arraylen = 0; /* Number of ranges emitted in the array. */
+ if (getidx) {
+ addReplyMapLen(c,2);
+ addReplyBulkCString(c,"matches");
+ arraylenptr = addReplyDeferredLen(c);
+ }
+
+ i = alen, j = blen;
+ while (computelcs && i > 0 && j > 0) {
+ int emit_range = 0;
+ if (a[i-1] == b[j-1]) {
+ /* If there is a match, store the character and reduce
+ * the indexes to look for a new match. */
+ result[idx-1] = a[i-1];
+
+ /* Track the current range. */
+ if (arange_start == alen) {
+ arange_start = i-1;
+ arange_end = i-1;
+ brange_start = j-1;
+ brange_end = j-1;
+ } else {
+ /* Let's see if we can extend the range backward since
+ * it is contiguous. */
+ if (arange_start == i && brange_start == j) {
+ arange_start--;
+ brange_start--;
+ } else {
+ emit_range = 1;
+ }
+ }
+ /* Emit the range if we matched with the first byte of
+ * one of the two strings. We'll exit the loop ASAP. */
+ if (arange_start == 0 || brange_start == 0) emit_range = 1;
+ idx--; i--; j--;
+ } else {
+ /* Otherwise reduce i and j depending on the largest
+ * LCS between, to understand what direction we need to go. */
+ uint32_t lcs1 = LCS(i-1,j);
+ uint32_t lcs2 = LCS(i,j-1);
+ if (lcs1 > lcs2)
+ i--;
+ else
+ j--;
+ if (arange_start != alen) emit_range = 1;
+ }
+
+ /* Emit the current range if needed. */
+ uint32_t match_len = arange_end - arange_start + 1;
+ if (emit_range) {
+ if (minmatchlen == 0 || match_len >= minmatchlen) {
+ if (arraylenptr) {
+ addReplyArrayLen(c,2+withmatchlen);
+ addReplyArrayLen(c,2);
+ addReplyLongLong(c,arange_start);
+ addReplyLongLong(c,arange_end);
+ addReplyArrayLen(c,2);
+ addReplyLongLong(c,brange_start);
+ addReplyLongLong(c,brange_end);
+ if (withmatchlen) addReplyLongLong(c,match_len);
+ arraylen++;
+ }
+ }
+ arange_start = alen; /* Restart at the next match. */
+ }
+ }
+
+ /* Signal modified key, increment dirty, ... */
+
+ /* Reply depending on the given options. */
+ if (arraylenptr) {
+ addReplyBulkCString(c,"len");
+ addReplyLongLong(c,LCS(alen,blen));
+ setDeferredArrayLen(c,arraylenptr,arraylen);
+ } else if (getlen) {
+ addReplyLongLong(c,LCS(alen,blen));
+ } else {
+ addReplyBulkSds(c,result);
+ result = NULL;
+ }
+
+ /* Cleanup. */
+ sdsfree(result);
+ zfree(lcs);
+
+cleanup:
+ if (obja) decrRefCount(obja);
+ if (objb) decrRefCount(objb);
+ return;
+}
+
diff --git a/src/t_zset.c b/src/t_zset.c
new file mode 100644
index 0000000..7717a4a
--- /dev/null
+++ b/src/t_zset.c
@@ -0,0 +1,4460 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-----------------------------------------------------------------------------
+ * Sorted set API
+ *----------------------------------------------------------------------------*/
+
+/* ZSETs are ordered sets using two data structures to hold the same elements
+ * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
+ * data structure.
+ *
+ * The elements are added to a hash table mapping Redis objects to scores.
+ * At the same time the elements are added to a skip list mapping scores
+ * to Redis objects (so objects are sorted by scores in this "view").
+ *
+ * Note that the SDS string representing the element is the same in both
+ * the hash table and skiplist in order to save memory. What we do in order
+ * to manage the shared SDS string more easily is to free the SDS string
+ * only in zslFreeNode(). The dictionary has no value free method set.
+ * So we should always remove an element from the dictionary, and later from
+ * the skiplist.
+ *
+ * This skiplist implementation is almost a C translation of the original
+ * algorithm described by William Pugh in "Skip Lists: A Probabilistic
+ * Alternative to Balanced Trees", modified in three ways:
+ * a) this implementation allows for repeated scores.
+ * b) the comparison is not just by key (our 'score') but by satellite data.
+ * c) there is a back pointer, so it's a doubly linked list with the back
+ * pointers being only at "level 1". This allows to traverse the list
+ * from tail to head, useful for ZREVRANGE. */
+
+#include "server.h"
+#include "intset.h" /* Compact integer set structure */
+#include <math.h>
+
+/*-----------------------------------------------------------------------------
+ * Skiplist implementation of the low level API
+ *----------------------------------------------------------------------------*/
+
+int zslLexValueGteMin(sds value, zlexrangespec *spec);
+int zslLexValueLteMax(sds value, zlexrangespec *spec);
+void zsetConvertAndExpand(robj *zobj, int encoding, unsigned long cap);
+
+/* Create a skiplist node with the specified number of levels.
+ * The SDS string 'ele' is referenced by the node after the call. */
+zskiplistNode *zslCreateNode(int level, double score, sds ele) {
+ zskiplistNode *zn =
+ zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel));
+ zn->score = score;
+ zn->ele = ele;
+ return zn;
+}
+
+/* Create a new skiplist. */
+zskiplist *zslCreate(void) {
+ int j;
+ zskiplist *zsl;
+
+ zsl = zmalloc(sizeof(*zsl));
+ zsl->level = 1;
+ zsl->length = 0;
+ zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
+ for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
+ zsl->header->level[j].forward = NULL;
+ zsl->header->level[j].span = 0;
+ }
+ zsl->header->backward = NULL;
+ zsl->tail = NULL;
+ return zsl;
+}
+
+/* Free the specified skiplist node. The referenced SDS string representation
+ * of the element is freed too, unless node->ele is set to NULL before calling
+ * this function. */
+void zslFreeNode(zskiplistNode *node) {
+ sdsfree(node->ele);
+ zfree(node);
+}
+
+/* Free a whole skiplist. */
+void zslFree(zskiplist *zsl) {
+ zskiplistNode *node = zsl->header->level[0].forward, *next;
+
+ zfree(zsl->header);
+ while(node) {
+ next = node->level[0].forward;
+ zslFreeNode(node);
+ node = next;
+ }
+ zfree(zsl);
+}
+
+/* Returns a random level for the new skiplist node we are going to create.
+ * The return value of this function is between 1 and ZSKIPLIST_MAXLEVEL
+ * (both inclusive), with a powerlaw-alike distribution where higher
+ * levels are less likely to be returned. */
+int zslRandomLevel(void) {
+ static const int threshold = ZSKIPLIST_P*RAND_MAX;
+ int level = 1;
+ while (random() < threshold)
+ level += 1;
+ return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
+}
+
+/* Insert a new node in the skiplist. Assumes the element does not already
+ * exist (up to the caller to enforce that). The skiplist takes ownership
+ * of the passed SDS string 'ele'. */
+zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ unsigned long rank[ZSKIPLIST_MAXLEVEL];
+ int i, level;
+
+ serverAssert(!isnan(score));
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ /* store rank that is crossed to reach the insert position */
+ rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
+ while (x->level[i].forward &&
+ (x->level[i].forward->score < score ||
+ (x->level[i].forward->score == score &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ {
+ rank[i] += x->level[i].span;
+ x = x->level[i].forward;
+ }
+ update[i] = x;
+ }
+ /* we assume the element is not already inside, since we allow duplicated
+ * scores, reinserting the same element should never happen since the
+ * caller of zslInsert() should test in the hash table if the element is
+ * already inside or not. */
+ level = zslRandomLevel();
+ if (level > zsl->level) {
+ for (i = zsl->level; i < level; i++) {
+ rank[i] = 0;
+ update[i] = zsl->header;
+ update[i]->level[i].span = zsl->length;
+ }
+ zsl->level = level;
+ }
+ x = zslCreateNode(level,score,ele);
+ for (i = 0; i < level; i++) {
+ x->level[i].forward = update[i]->level[i].forward;
+ update[i]->level[i].forward = x;
+
+ /* update span covered by update[i] as x is inserted here */
+ x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]);
+ update[i]->level[i].span = (rank[0] - rank[i]) + 1;
+ }
+
+ /* increment span for untouched levels */
+ for (i = level; i < zsl->level; i++) {
+ update[i]->level[i].span++;
+ }
+
+ x->backward = (update[0] == zsl->header) ? NULL : update[0];
+ if (x->level[0].forward)
+ x->level[0].forward->backward = x;
+ else
+ zsl->tail = x;
+ zsl->length++;
+ return x;
+}
+
+/* Internal function used by zslDelete, zslDeleteRangeByScore and
+ * zslDeleteRangeByRank. */
+void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
+ int i;
+ for (i = 0; i < zsl->level; i++) {
+ if (update[i]->level[i].forward == x) {
+ update[i]->level[i].span += x->level[i].span - 1;
+ update[i]->level[i].forward = x->level[i].forward;
+ } else {
+ update[i]->level[i].span -= 1;
+ }
+ }
+ if (x->level[0].forward) {
+ x->level[0].forward->backward = x->backward;
+ } else {
+ zsl->tail = x->backward;
+ }
+ while(zsl->level > 1 && zsl->header->level[zsl->level-1].forward == NULL)
+ zsl->level--;
+ zsl->length--;
+}
+
+/* Delete an element with matching score/element from the skiplist.
+ * The function returns 1 if the node was found and deleted, otherwise
+ * 0 is returned.
+ *
+ * If 'node' is NULL the deleted node is freed by zslFreeNode(), otherwise
+ * it is not freed (but just unlinked) and *node is set to the node pointer,
+ * so that it is possible for the caller to reuse the node (including the
+ * referenced SDS string at node->ele). */
+int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward &&
+ (x->level[i].forward->score < score ||
+ (x->level[i].forward->score == score &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ {
+ x = x->level[i].forward;
+ }
+ update[i] = x;
+ }
+ /* We may have multiple elements with the same score, what we need
+ * is to find the element with both the right score and object. */
+ x = x->level[0].forward;
+ if (x && score == x->score && sdscmp(x->ele,ele) == 0) {
+ zslDeleteNode(zsl, x, update);
+ if (!node)
+ zslFreeNode(x);
+ else
+ *node = x;
+ return 1;
+ }
+ return 0; /* not found */
+}
+
+/* Update the score of an element inside the sorted set skiplist.
+ * Note that the element must exist and must match 'score'.
+ * This function does not update the score in the hash table side, the
+ * caller should take care of it.
+ *
+ * Note that this function attempts to just update the node, in case after
+ * the score update, the node would be exactly at the same position.
+ * Otherwise the skiplist is modified by removing and re-adding a new
+ * element, which is more costly.
+ *
+ * The function returns the updated element skiplist node pointer. */
+zskiplistNode *zslUpdateScore(zskiplist *zsl, double curscore, sds ele, double newscore) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ int i;
+
+ /* We need to seek to element to update to start: this is useful anyway,
+ * we'll have to update or remove it. */
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward &&
+ (x->level[i].forward->score < curscore ||
+ (x->level[i].forward->score == curscore &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ {
+ x = x->level[i].forward;
+ }
+ update[i] = x;
+ }
+
+ /* Jump to our element: note that this function assumes that the
+ * element with the matching score exists. */
+ x = x->level[0].forward;
+ serverAssert(x && curscore == x->score && sdscmp(x->ele,ele) == 0);
+
+ /* If the node, after the score update, would be still exactly
+ * at the same position, we can just update the score without
+ * actually removing and re-inserting the element in the skiplist. */
+ if ((x->backward == NULL || x->backward->score < newscore) &&
+ (x->level[0].forward == NULL || x->level[0].forward->score > newscore))
+ {
+ x->score = newscore;
+ return x;
+ }
+
+ /* No way to reuse the old node: we need to remove and insert a new
+ * one at a different place. */
+ zslDeleteNode(zsl, x, update);
+ zskiplistNode *newnode = zslInsert(zsl,newscore,x->ele);
+ /* We reused the old node x->ele SDS string, free the node now
+ * since zslInsert created a new one. */
+ x->ele = NULL;
+ zslFreeNode(x);
+ return newnode;
+}
+
+int zslValueGteMin(double value, zrangespec *spec) {
+ return spec->minex ? (value > spec->min) : (value >= spec->min);
+}
+
+int zslValueLteMax(double value, zrangespec *spec) {
+ return spec->maxex ? (value < spec->max) : (value <= spec->max);
+}
+
+/* Returns if there is a part of the zset is in range. */
+int zslIsInRange(zskiplist *zsl, zrangespec *range) {
+ zskiplistNode *x;
+
+ /* Test for ranges that will always be empty. */
+ if (range->min > range->max ||
+ (range->min == range->max && (range->minex || range->maxex)))
+ return 0;
+ x = zsl->tail;
+ if (x == NULL || !zslValueGteMin(x->score,range))
+ return 0;
+ x = zsl->header->level[0].forward;
+ if (x == NULL || !zslValueLteMax(x->score,range))
+ return 0;
+ return 1;
+}
+
+/* Find the first node that is contained in the specified range.
+ * Returns NULL when no element is contained in the range. */
+zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range) {
+ zskiplistNode *x;
+ int i;
+
+ /* If everything is out of range, return early. */
+ if (!zslIsInRange(zsl,range)) return NULL;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ /* Go forward while *OUT* of range. */
+ while (x->level[i].forward &&
+ !zslValueGteMin(x->level[i].forward->score,range))
+ x = x->level[i].forward;
+ }
+
+ /* This is an inner range, so the next node cannot be NULL. */
+ x = x->level[0].forward;
+ serverAssert(x != NULL);
+
+ /* Check if score <= max. */
+ if (!zslValueLteMax(x->score,range)) return NULL;
+ return x;
+}
+
+/* Find the last node that is contained in the specified range.
+ * Returns NULL when no element is contained in the range. */
+zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range) {
+ zskiplistNode *x;
+ int i;
+
+ /* If everything is out of range, return early. */
+ if (!zslIsInRange(zsl,range)) return NULL;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ /* Go forward while *IN* range. */
+ while (x->level[i].forward &&
+ zslValueLteMax(x->level[i].forward->score,range))
+ x = x->level[i].forward;
+ }
+
+ /* This is an inner range, so this node cannot be NULL. */
+ serverAssert(x != NULL);
+
+ /* Check if score >= min. */
+ if (!zslValueGteMin(x->score,range)) return NULL;
+ return x;
+}
+
+/* Delete all the elements with score between min and max from the skiplist.
+ * Both min and max can be inclusive or exclusive (see range->minex and
+ * range->maxex). When inclusive a score >= min && score <= max is deleted.
+ * Note that this function takes the reference to the hash table view of the
+ * sorted set, in order to remove the elements from the hash table too. */
+unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dict) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ unsigned long removed = 0;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward &&
+ !zslValueGteMin(x->level[i].forward->score, range))
+ x = x->level[i].forward;
+ update[i] = x;
+ }
+
+ /* Current node is the last with score < or <= min. */
+ x = x->level[0].forward;
+
+ /* Delete nodes while in range. */
+ while (x && zslValueLteMax(x->score, range)) {
+ zskiplistNode *next = x->level[0].forward;
+ zslDeleteNode(zsl,x,update);
+ dictDelete(dict,x->ele);
+ zslFreeNode(x); /* Here is where x->ele is actually released. */
+ removed++;
+ x = next;
+ }
+ return removed;
+}
+
+unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, dict *dict) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ unsigned long removed = 0;
+ int i;
+
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward &&
+ !zslLexValueGteMin(x->level[i].forward->ele,range))
+ x = x->level[i].forward;
+ update[i] = x;
+ }
+
+ /* Current node is the last with score < or <= min. */
+ x = x->level[0].forward;
+
+ /* Delete nodes while in range. */
+ while (x && zslLexValueLteMax(x->ele,range)) {
+ zskiplistNode *next = x->level[0].forward;
+ zslDeleteNode(zsl,x,update);
+ dictDelete(dict,x->ele);
+ zslFreeNode(x); /* Here is where x->ele is actually released. */
+ removed++;
+ x = next;
+ }
+ return removed;
+}
+
+/* Delete all the elements with rank between start and end from the skiplist.
+ * Start and end are inclusive. Note that start and end need to be 1-based */
+unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ unsigned long traversed = 0, removed = 0;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward && (traversed + x->level[i].span) < start) {
+ traversed += x->level[i].span;
+ x = x->level[i].forward;
+ }
+ update[i] = x;
+ }
+
+ traversed++;
+ x = x->level[0].forward;
+ while (x && traversed <= end) {
+ zskiplistNode *next = x->level[0].forward;
+ zslDeleteNode(zsl,x,update);
+ dictDelete(dict,x->ele);
+ zslFreeNode(x);
+ removed++;
+ traversed++;
+ x = next;
+ }
+ return removed;
+}
+
+/* Find the rank for an element by both score and key.
+ * Returns 0 when the element cannot be found, rank otherwise.
+ * Note that the rank is 1-based due to the span of zsl->header to the
+ * first element. */
+unsigned long zslGetRank(zskiplist *zsl, double score, sds ele) {
+ zskiplistNode *x;
+ unsigned long rank = 0;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward &&
+ (x->level[i].forward->score < score ||
+ (x->level[i].forward->score == score &&
+ sdscmp(x->level[i].forward->ele,ele) <= 0))) {
+ rank += x->level[i].span;
+ x = x->level[i].forward;
+ }
+
+ /* x might be equal to zsl->header, so test if obj is non-NULL */
+ if (x->ele && x->score == score && sdscmp(x->ele,ele) == 0) {
+ return rank;
+ }
+ }
+ return 0;
+}
+
+/* Finds an element by its rank. The rank argument needs to be 1-based. */
+zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank) {
+ zskiplistNode *x;
+ unsigned long traversed = 0;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward && (traversed + x->level[i].span) <= rank)
+ {
+ traversed += x->level[i].span;
+ x = x->level[i].forward;
+ }
+ if (traversed == rank) {
+ return x;
+ }
+ }
+ return NULL;
+}
+
+/* Populate the rangespec according to the objects min and max. */
+static int zslParseRange(robj *min, robj *max, zrangespec *spec) {
+ char *eptr;
+ spec->minex = spec->maxex = 0;
+
+ /* Parse the min-max interval. If one of the values is prefixed
+ * by the "(" character, it's considered "open". For instance
+ * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
+ * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
+ if (min->encoding == OBJ_ENCODING_INT) {
+ spec->min = (long)min->ptr;
+ } else {
+ if (((char*)min->ptr)[0] == '(') {
+ spec->min = strtod((char*)min->ptr+1,&eptr);
+ if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR;
+ spec->minex = 1;
+ } else {
+ spec->min = strtod((char*)min->ptr,&eptr);
+ if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR;
+ }
+ }
+ if (max->encoding == OBJ_ENCODING_INT) {
+ spec->max = (long)max->ptr;
+ } else {
+ if (((char*)max->ptr)[0] == '(') {
+ spec->max = strtod((char*)max->ptr+1,&eptr);
+ if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR;
+ spec->maxex = 1;
+ } else {
+ spec->max = strtod((char*)max->ptr,&eptr);
+ if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR;
+ }
+ }
+
+ return C_OK;
+}
+
+/* ------------------------ Lexicographic ranges ---------------------------- */
+
+/* Parse max or min argument of ZRANGEBYLEX.
+ * (foo means foo (open interval)
+ * [foo means foo (closed interval)
+ * - means the min string possible
+ * + means the max string possible
+ *
+ * If the string is valid the *dest pointer is set to the redis object
+ * that will be used for the comparison, and ex will be set to 0 or 1
+ * respectively if the item is exclusive or inclusive. C_OK will be
+ * returned.
+ *
+ * If the string is not a valid range C_ERR is returned, and the value
+ * of *dest and *ex is undefined. */
+int zslParseLexRangeItem(robj *item, sds *dest, int *ex) {
+ char *c = item->ptr;
+
+ switch(c[0]) {
+ case '+':
+ if (c[1] != '\0') return C_ERR;
+ *ex = 1;
+ *dest = shared.maxstring;
+ return C_OK;
+ case '-':
+ if (c[1] != '\0') return C_ERR;
+ *ex = 1;
+ *dest = shared.minstring;
+ return C_OK;
+ case '(':
+ *ex = 1;
+ *dest = sdsnewlen(c+1,sdslen(c)-1);
+ return C_OK;
+ case '[':
+ *ex = 0;
+ *dest = sdsnewlen(c+1,sdslen(c)-1);
+ return C_OK;
+ default:
+ return C_ERR;
+ }
+}
+
+/* Free a lex range structure, must be called only after zslParseLexRange()
+ * populated the structure with success (C_OK returned). */
+void zslFreeLexRange(zlexrangespec *spec) {
+ if (spec->min != shared.minstring &&
+ spec->min != shared.maxstring) sdsfree(spec->min);
+ if (spec->max != shared.minstring &&
+ spec->max != shared.maxstring) sdsfree(spec->max);
+}
+
+/* Populate the lex rangespec according to the objects min and max.
+ *
+ * Return C_OK on success. On error C_ERR is returned.
+ * When OK is returned the structure must be freed with zslFreeLexRange(),
+ * otherwise no release is needed. */
+int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec) {
+ /* The range can't be valid if objects are integer encoded.
+ * Every item must start with ( or [. */
+ if (min->encoding == OBJ_ENCODING_INT ||
+ max->encoding == OBJ_ENCODING_INT) return C_ERR;
+
+ spec->min = spec->max = NULL;
+ if (zslParseLexRangeItem(min, &spec->min, &spec->minex) == C_ERR ||
+ zslParseLexRangeItem(max, &spec->max, &spec->maxex) == C_ERR) {
+ zslFreeLexRange(spec);
+ return C_ERR;
+ } else {
+ return C_OK;
+ }
+}
+
+/* This is just a wrapper to sdscmp() that is able to
+ * handle shared.minstring and shared.maxstring as the equivalent of
+ * -inf and +inf for strings */
+int sdscmplex(sds a, sds b) {
+ if (a == b) return 0;
+ if (a == shared.minstring || b == shared.maxstring) return -1;
+ if (a == shared.maxstring || b == shared.minstring) return 1;
+ return sdscmp(a,b);
+}
+
+int zslLexValueGteMin(sds value, zlexrangespec *spec) {
+ return spec->minex ?
+ (sdscmplex(value,spec->min) > 0) :
+ (sdscmplex(value,spec->min) >= 0);
+}
+
+int zslLexValueLteMax(sds value, zlexrangespec *spec) {
+ return spec->maxex ?
+ (sdscmplex(value,spec->max) < 0) :
+ (sdscmplex(value,spec->max) <= 0);
+}
+
+/* Returns if there is a part of the zset is in the lex range. */
+int zslIsInLexRange(zskiplist *zsl, zlexrangespec *range) {
+ zskiplistNode *x;
+
+ /* Test for ranges that will always be empty. */
+ int cmp = sdscmplex(range->min,range->max);
+ if (cmp > 0 || (cmp == 0 && (range->minex || range->maxex)))
+ return 0;
+ x = zsl->tail;
+ if (x == NULL || !zslLexValueGteMin(x->ele,range))
+ return 0;
+ x = zsl->header->level[0].forward;
+ if (x == NULL || !zslLexValueLteMax(x->ele,range))
+ return 0;
+ return 1;
+}
+
+/* Find the first node that is contained in the specified lex range.
+ * Returns NULL when no element is contained in the range. */
+zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range) {
+ zskiplistNode *x;
+ int i;
+
+ /* If everything is out of range, return early. */
+ if (!zslIsInLexRange(zsl,range)) return NULL;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ /* Go forward while *OUT* of range. */
+ while (x->level[i].forward &&
+ !zslLexValueGteMin(x->level[i].forward->ele,range))
+ x = x->level[i].forward;
+ }
+
+ /* This is an inner range, so the next node cannot be NULL. */
+ x = x->level[0].forward;
+ serverAssert(x != NULL);
+
+ /* Check if score <= max. */
+ if (!zslLexValueLteMax(x->ele,range)) return NULL;
+ return x;
+}
+
+/* Find the last node that is contained in the specified range.
+ * Returns NULL when no element is contained in the range. */
+zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range) {
+ zskiplistNode *x;
+ int i;
+
+ /* If everything is out of range, return early. */
+ if (!zslIsInLexRange(zsl,range)) return NULL;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ /* Go forward while *IN* range. */
+ while (x->level[i].forward &&
+ zslLexValueLteMax(x->level[i].forward->ele,range))
+ x = x->level[i].forward;
+ }
+
+ /* This is an inner range, so this node cannot be NULL. */
+ serverAssert(x != NULL);
+
+ /* Check if score >= min. */
+ if (!zslLexValueGteMin(x->ele,range)) return NULL;
+ return x;
+}
+
+/*-----------------------------------------------------------------------------
+ * Listpack-backed sorted set API
+ *----------------------------------------------------------------------------*/
+
+double zzlStrtod(unsigned char *vstr, unsigned int vlen) {
+ char buf[128];
+ if (vlen > sizeof(buf) - 1)
+ vlen = sizeof(buf) - 1;
+ memcpy(buf,vstr,vlen);
+ buf[vlen] = '\0';
+ return strtod(buf,NULL);
+ }
+
+double zzlGetScore(unsigned char *sptr) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ double score;
+
+ serverAssert(sptr != NULL);
+ vstr = lpGetValue(sptr,&vlen,&vlong);
+
+ if (vstr) {
+ score = zzlStrtod(vstr,vlen);
+ } else {
+ score = vlong;
+ }
+
+ return score;
+}
+
+/* Return a listpack element as an SDS string. */
+sds lpGetObject(unsigned char *sptr) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+
+ serverAssert(sptr != NULL);
+ vstr = lpGetValue(sptr,&vlen,&vlong);
+
+ if (vstr) {
+ return sdsnewlen((char*)vstr,vlen);
+ } else {
+ return sdsfromlonglong(vlong);
+ }
+}
+
+/* Compare element in sorted set with given element. */
+int zzlCompareElements(unsigned char *eptr, unsigned char *cstr, unsigned int clen) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ unsigned char vbuf[32];
+ int minlen, cmp;
+
+ vstr = lpGetValue(eptr,&vlen,&vlong);
+ if (vstr == NULL) {
+ /* Store string representation of long long in buf. */
+ vlen = ll2string((char*)vbuf,sizeof(vbuf),vlong);
+ vstr = vbuf;
+ }
+
+ minlen = (vlen < clen) ? vlen : clen;
+ cmp = memcmp(vstr,cstr,minlen);
+ if (cmp == 0) return vlen-clen;
+ return cmp;
+}
+
+unsigned int zzlLength(unsigned char *zl) {
+ return lpLength(zl)/2;
+}
+
+/* Move to next entry based on the values in eptr and sptr. Both are set to
+ * NULL when there is no next entry. */
+void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr) {
+ unsigned char *_eptr, *_sptr;
+ serverAssert(*eptr != NULL && *sptr != NULL);
+
+ _eptr = lpNext(zl,*sptr);
+ if (_eptr != NULL) {
+ _sptr = lpNext(zl,_eptr);
+ serverAssert(_sptr != NULL);
+ } else {
+ /* No next entry. */
+ _sptr = NULL;
+ }
+
+ *eptr = _eptr;
+ *sptr = _sptr;
+}
+
+/* Move to the previous entry based on the values in eptr and sptr. Both are
+ * set to NULL when there is no prev entry. */
+void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr) {
+ unsigned char *_eptr, *_sptr;
+ serverAssert(*eptr != NULL && *sptr != NULL);
+
+ _sptr = lpPrev(zl,*eptr);
+ if (_sptr != NULL) {
+ _eptr = lpPrev(zl,_sptr);
+ serverAssert(_eptr != NULL);
+ } else {
+ /* No previous entry. */
+ _eptr = NULL;
+ }
+
+ *eptr = _eptr;
+ *sptr = _sptr;
+}
+
+/* Returns if there is a part of the zset is in range. Should only be used
+ * internally by zzlFirstInRange and zzlLastInRange. */
+int zzlIsInRange(unsigned char *zl, zrangespec *range) {
+ unsigned char *p;
+ double score;
+
+ /* Test for ranges that will always be empty. */
+ if (range->min > range->max ||
+ (range->min == range->max && (range->minex || range->maxex)))
+ return 0;
+
+ p = lpSeek(zl,-1); /* Last score. */
+ if (p == NULL) return 0; /* Empty sorted set */
+ score = zzlGetScore(p);
+ if (!zslValueGteMin(score,range))
+ return 0;
+
+ p = lpSeek(zl,1); /* First score. */
+ serverAssert(p != NULL);
+ score = zzlGetScore(p);
+ if (!zslValueLteMax(score,range))
+ return 0;
+
+ return 1;
+}
+
+/* Find pointer to the first element contained in the specified range.
+ * Returns NULL when no element is contained in the range. */
+unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range) {
+ unsigned char *eptr = lpSeek(zl,0), *sptr;
+ double score;
+
+ /* If everything is out of range, return early. */
+ if (!zzlIsInRange(zl,range)) return NULL;
+
+ while (eptr != NULL) {
+ sptr = lpNext(zl,eptr);
+ serverAssert(sptr != NULL);
+
+ score = zzlGetScore(sptr);
+ if (zslValueGteMin(score,range)) {
+ /* Check if score <= max. */
+ if (zslValueLteMax(score,range))
+ return eptr;
+ return NULL;
+ }
+
+ /* Move to next element. */
+ eptr = lpNext(zl,sptr);
+ }
+
+ return NULL;
+}
+
+/* Find pointer to the last element contained in the specified range.
+ * Returns NULL when no element is contained in the range. */
+unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range) {
+ unsigned char *eptr = lpSeek(zl,-2), *sptr;
+ double score;
+
+ /* If everything is out of range, return early. */
+ if (!zzlIsInRange(zl,range)) return NULL;
+
+ while (eptr != NULL) {
+ sptr = lpNext(zl,eptr);
+ serverAssert(sptr != NULL);
+
+ score = zzlGetScore(sptr);
+ if (zslValueLteMax(score,range)) {
+ /* Check if score >= min. */
+ if (zslValueGteMin(score,range))
+ return eptr;
+ return NULL;
+ }
+
+ /* Move to previous element by moving to the score of previous element.
+ * When this returns NULL, we know there also is no element. */
+ sptr = lpPrev(zl,eptr);
+ if (sptr != NULL)
+ serverAssert((eptr = lpPrev(zl,sptr)) != NULL);
+ else
+ eptr = NULL;
+ }
+
+ return NULL;
+}
+
+int zzlLexValueGteMin(unsigned char *p, zlexrangespec *spec) {
+ sds value = lpGetObject(p);
+ int res = zslLexValueGteMin(value,spec);
+ sdsfree(value);
+ return res;
+}
+
+int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec) {
+ sds value = lpGetObject(p);
+ int res = zslLexValueLteMax(value,spec);
+ sdsfree(value);
+ return res;
+}
+
+/* Returns if there is a part of the zset is in range. Should only be used
+ * internally by zzlFirstInLexRange and zzlLastInLexRange. */
+int zzlIsInLexRange(unsigned char *zl, zlexrangespec *range) {
+ unsigned char *p;
+
+ /* Test for ranges that will always be empty. */
+ int cmp = sdscmplex(range->min,range->max);
+ if (cmp > 0 || (cmp == 0 && (range->minex || range->maxex)))
+ return 0;
+
+ p = lpSeek(zl,-2); /* Last element. */
+ if (p == NULL) return 0;
+ if (!zzlLexValueGteMin(p,range))
+ return 0;
+
+ p = lpSeek(zl,0); /* First element. */
+ serverAssert(p != NULL);
+ if (!zzlLexValueLteMax(p,range))
+ return 0;
+
+ return 1;
+}
+
+/* Find pointer to the first element contained in the specified lex range.
+ * Returns NULL when no element is contained in the range. */
+unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec *range) {
+ unsigned char *eptr = lpSeek(zl,0), *sptr;
+
+ /* If everything is out of range, return early. */
+ if (!zzlIsInLexRange(zl,range)) return NULL;
+
+ while (eptr != NULL) {
+ if (zzlLexValueGteMin(eptr,range)) {
+ /* Check if score <= max. */
+ if (zzlLexValueLteMax(eptr,range))
+ return eptr;
+ return NULL;
+ }
+
+ /* Move to next element. */
+ sptr = lpNext(zl,eptr); /* This element score. Skip it. */
+ serverAssert(sptr != NULL);
+ eptr = lpNext(zl,sptr); /* Next element. */
+ }
+
+ return NULL;
+}
+
+/* Find pointer to the last element contained in the specified lex range.
+ * Returns NULL when no element is contained in the range. */
+unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range) {
+ unsigned char *eptr = lpSeek(zl,-2), *sptr;
+
+ /* If everything is out of range, return early. */
+ if (!zzlIsInLexRange(zl,range)) return NULL;
+
+ while (eptr != NULL) {
+ if (zzlLexValueLteMax(eptr,range)) {
+ /* Check if score >= min. */
+ if (zzlLexValueGteMin(eptr,range))
+ return eptr;
+ return NULL;
+ }
+
+ /* Move to previous element by moving to the score of previous element.
+ * When this returns NULL, we know there also is no element. */
+ sptr = lpPrev(zl,eptr);
+ if (sptr != NULL)
+ serverAssert((eptr = lpPrev(zl,sptr)) != NULL);
+ else
+ eptr = NULL;
+ }
+
+ return NULL;
+}
+
+unsigned char *zzlFind(unsigned char *lp, sds ele, double *score) {
+ unsigned char *eptr, *sptr;
+
+ if ((eptr = lpFirst(lp)) == NULL) return NULL;
+ eptr = lpFind(lp, eptr, (unsigned char*)ele, sdslen(ele), 1);
+ if (eptr) {
+ sptr = lpNext(lp,eptr);
+ serverAssert(sptr != NULL);
+
+ /* Matching element, pull out score. */
+ if (score != NULL) *score = zzlGetScore(sptr);
+ return eptr;
+ }
+
+ return NULL;
+}
+
+/* Delete (element,score) pair from listpack. Use local copy of eptr because we
+ * don't want to modify the one given as argument. */
+unsigned char *zzlDelete(unsigned char *zl, unsigned char *eptr) {
+ return lpDeleteRangeWithEntry(zl,&eptr,2);
+}
+
+unsigned char *zzlInsertAt(unsigned char *zl, unsigned char *eptr, sds ele, double score) {
+ unsigned char *sptr;
+ char scorebuf[MAX_D2STRING_CHARS];
+ int scorelen = 0;
+ long long lscore;
+ int score_is_long = double2ll(score, &lscore);
+ if (!score_is_long)
+ scorelen = d2string(scorebuf,sizeof(scorebuf),score);
+ if (eptr == NULL) {
+ zl = lpAppend(zl,(unsigned char*)ele,sdslen(ele));
+ if (score_is_long)
+ zl = lpAppendInteger(zl,lscore);
+ else
+ zl = lpAppend(zl,(unsigned char*)scorebuf,scorelen);
+ } else {
+ /* Insert member before the element 'eptr'. */
+ zl = lpInsertString(zl,(unsigned char*)ele,sdslen(ele),eptr,LP_BEFORE,&sptr);
+
+ /* Insert score after the member. */
+ if (score_is_long)
+ zl = lpInsertInteger(zl,lscore,sptr,LP_AFTER,NULL);
+ else
+ zl = lpInsertString(zl,(unsigned char*)scorebuf,scorelen,sptr,LP_AFTER,NULL);
+ }
+ return zl;
+}
+
+/* Insert (element,score) pair in listpack. This function assumes the element is
+ * not yet present in the list. */
+unsigned char *zzlInsert(unsigned char *zl, sds ele, double score) {
+ unsigned char *eptr = lpSeek(zl,0), *sptr;
+ double s;
+
+ while (eptr != NULL) {
+ sptr = lpNext(zl,eptr);
+ serverAssert(sptr != NULL);
+ s = zzlGetScore(sptr);
+
+ if (s > score) {
+ /* First element with score larger than score for element to be
+ * inserted. This means we should take its spot in the list to
+ * maintain ordering. */
+ zl = zzlInsertAt(zl,eptr,ele,score);
+ break;
+ } else if (s == score) {
+ /* Ensure lexicographical ordering for elements. */
+ if (zzlCompareElements(eptr,(unsigned char*)ele,sdslen(ele)) > 0) {
+ zl = zzlInsertAt(zl,eptr,ele,score);
+ break;
+ }
+ }
+
+ /* Move to next element. */
+ eptr = lpNext(zl,sptr);
+ }
+
+ /* Push on tail of list when it was not yet inserted. */
+ if (eptr == NULL)
+ zl = zzlInsertAt(zl,NULL,ele,score);
+ return zl;
+}
+
+unsigned char *zzlDeleteRangeByScore(unsigned char *zl, zrangespec *range, unsigned long *deleted) {
+ unsigned char *eptr, *sptr;
+ double score;
+ unsigned long num = 0;
+
+ if (deleted != NULL) *deleted = 0;
+
+ eptr = zzlFirstInRange(zl,range);
+ if (eptr == NULL) return zl;
+
+ /* When the tail of the listpack is deleted, eptr will be NULL. */
+ while (eptr && (sptr = lpNext(zl,eptr)) != NULL) {
+ score = zzlGetScore(sptr);
+ if (zslValueLteMax(score,range)) {
+ /* Delete both the element and the score. */
+ zl = lpDeleteRangeWithEntry(zl,&eptr,2);
+ num++;
+ } else {
+ /* No longer in range. */
+ break;
+ }
+ }
+
+ if (deleted != NULL) *deleted = num;
+ return zl;
+}
+
+unsigned char *zzlDeleteRangeByLex(unsigned char *zl, zlexrangespec *range, unsigned long *deleted) {
+ unsigned char *eptr, *sptr;
+ unsigned long num = 0;
+
+ if (deleted != NULL) *deleted = 0;
+
+ eptr = zzlFirstInLexRange(zl,range);
+ if (eptr == NULL) return zl;
+
+ /* When the tail of the listpack is deleted, eptr will be NULL. */
+ while (eptr && (sptr = lpNext(zl,eptr)) != NULL) {
+ if (zzlLexValueLteMax(eptr,range)) {
+ /* Delete both the element and the score. */
+ zl = lpDeleteRangeWithEntry(zl,&eptr,2);
+ num++;
+ } else {
+ /* No longer in range. */
+ break;
+ }
+ }
+
+ if (deleted != NULL) *deleted = num;
+ return zl;
+}
+
+/* Delete all the elements with rank between start and end from the skiplist.
+ * Start and end are inclusive. Note that start and end need to be 1-based */
+unsigned char *zzlDeleteRangeByRank(unsigned char *zl, unsigned int start, unsigned int end, unsigned long *deleted) {
+ unsigned int num = (end-start)+1;
+ if (deleted) *deleted = num;
+ zl = lpDeleteRange(zl,2*(start-1),2*num);
+ return zl;
+}
+
+/*-----------------------------------------------------------------------------
+ * Common sorted set API
+ *----------------------------------------------------------------------------*/
+
+unsigned long zsetLength(const robj *zobj) {
+ unsigned long length = 0;
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ length = zzlLength(zobj->ptr);
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ length = ((const zset*)zobj->ptr)->zsl->length;
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ return length;
+}
+
+/* Factory method to return a zset.
+ *
+ * The size hint indicates approximately how many items will be added,
+ * and the value len hint indicates the approximate individual size of the added elements,
+ * they are used to determine the initial representation.
+ *
+ * If the hints are not known, and underestimation or 0 is suitable. */
+robj *zsetTypeCreate(size_t size_hint, size_t val_len_hint) {
+ if (size_hint <= server.zset_max_listpack_entries &&
+ val_len_hint <= server.zset_max_listpack_value)
+ {
+ return createZsetListpackObject();
+ }
+
+ robj *zobj = createZsetObject();
+ zset *zs = zobj->ptr;
+ dictExpand(zs->dict, size_hint);
+ return zobj;
+}
+
+/* Check if the existing zset should be converted to another encoding based off the
+ * the size hint. */
+void zsetTypeMaybeConvert(robj *zobj, size_t size_hint) {
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK &&
+ size_hint > server.zset_max_listpack_entries)
+ {
+ zsetConvertAndExpand(zobj, OBJ_ENCODING_SKIPLIST, size_hint);
+ }
+}
+
+/* Convert the zset to specified encoding. The zset dict (when converting
+ * to a skiplist) is presized to hold the number of elements in the original
+ * zset. */
+void zsetConvert(robj *zobj, int encoding) {
+ zsetConvertAndExpand(zobj, encoding, zsetLength(zobj));
+}
+
+/* Converts a zset to the specified encoding, pre-sizing it for 'cap' elements. */
+void zsetConvertAndExpand(robj *zobj, int encoding, unsigned long cap) {
+ zset *zs;
+ zskiplistNode *node, *next;
+ sds ele;
+ double score;
+
+ if (zobj->encoding == encoding) return;
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+
+ if (encoding != OBJ_ENCODING_SKIPLIST)
+ serverPanic("Unknown target encoding");
+
+ zs = zmalloc(sizeof(*zs));
+ zs->dict = dictCreate(&zsetDictType);
+ zs->zsl = zslCreate();
+
+ /* Presize the dict to avoid rehashing */
+ dictExpand(zs->dict, cap);
+
+ eptr = lpSeek(zl,0);
+ if (eptr != NULL) {
+ sptr = lpNext(zl,eptr);
+ serverAssertWithInfo(NULL,zobj,sptr != NULL);
+ }
+
+ while (eptr != NULL) {
+ score = zzlGetScore(sptr);
+ vstr = lpGetValue(eptr,&vlen,&vlong);
+ if (vstr == NULL)
+ ele = sdsfromlonglong(vlong);
+ else
+ ele = sdsnewlen((char*)vstr,vlen);
+
+ node = zslInsert(zs->zsl,score,ele);
+ serverAssert(dictAdd(zs->dict,ele,&node->score) == DICT_OK);
+ zzlNext(zl,&eptr,&sptr);
+ }
+
+ zfree(zobj->ptr);
+ zobj->ptr = zs;
+ zobj->encoding = OBJ_ENCODING_SKIPLIST;
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ unsigned char *zl = lpNew(0);
+
+ if (encoding != OBJ_ENCODING_LISTPACK)
+ serverPanic("Unknown target encoding");
+
+ /* Approach similar to zslFree(), since we want to free the skiplist at
+ * the same time as creating the listpack. */
+ zs = zobj->ptr;
+ dictRelease(zs->dict);
+ node = zs->zsl->header->level[0].forward;
+ zfree(zs->zsl->header);
+ zfree(zs->zsl);
+
+ while (node) {
+ zl = zzlInsertAt(zl,NULL,node->ele,node->score);
+ next = node->level[0].forward;
+ zslFreeNode(node);
+ node = next;
+ }
+
+ zfree(zs);
+ zobj->ptr = zl;
+ zobj->encoding = OBJ_ENCODING_LISTPACK;
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+}
+
+/* Convert the sorted set object into a listpack if it is not already a listpack
+ * and if the number of elements and the maximum element size and total elements size
+ * are within the expected ranges. */
+void zsetConvertToListpackIfNeeded(robj *zobj, size_t maxelelen, size_t totelelen) {
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) return;
+ zset *zset = zobj->ptr;
+
+ if (zset->zsl->length <= server.zset_max_listpack_entries &&
+ maxelelen <= server.zset_max_listpack_value &&
+ lpSafeToAdd(NULL, totelelen))
+ {
+ zsetConvert(zobj,OBJ_ENCODING_LISTPACK);
+ }
+}
+
+/* Return (by reference) the score of the specified member of the sorted set
+ * storing it into *score. If the element does not exist C_ERR is returned
+ * otherwise C_OK is returned and *score is correctly populated.
+ * If 'zobj' or 'member' is NULL, C_ERR is returned. */
+int zsetScore(robj *zobj, sds member, double *score) {
+ if (!zobj || !member) return C_ERR;
+
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ if (zzlFind(zobj->ptr, member, score) == NULL) return C_ERR;
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ dictEntry *de = dictFind(zs->dict, member);
+ if (de == NULL) return C_ERR;
+ *score = *(double*)dictGetVal(de);
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ return C_OK;
+}
+
+/* Add a new element or update the score of an existing element in a sorted
+ * set, regardless of its encoding.
+ *
+ * The set of flags change the command behavior.
+ *
+ * The input flags are the following:
+ *
+ * ZADD_INCR: Increment the current element score by 'score' instead of updating
+ * the current element score. If the element does not exist, we
+ * assume 0 as previous score.
+ * ZADD_NX: Perform the operation only if the element does not exist.
+ * ZADD_XX: Perform the operation only if the element already exist.
+ * ZADD_GT: Perform the operation on existing elements only if the new score is
+ * greater than the current score.
+ * ZADD_LT: Perform the operation on existing elements only if the new score is
+ * less than the current score.
+ *
+ * When ZADD_INCR is used, the new score of the element is stored in
+ * '*newscore' if 'newscore' is not NULL.
+ *
+ * The returned flags are the following:
+ *
+ * ZADD_NAN: The resulting score is not a number.
+ * ZADD_ADDED: The element was added (not present before the call).
+ * ZADD_UPDATED: The element score was updated.
+ * ZADD_NOP: No operation was performed because of NX or XX.
+ *
+ * Return value:
+ *
+ * The function returns 1 on success, and sets the appropriate flags
+ * ADDED or UPDATED to signal what happened during the operation (note that
+ * none could be set if we re-added an element using the same score it used
+ * to have, or in the case a zero increment is used).
+ *
+ * The function returns 0 on error, currently only when the increment
+ * produces a NAN condition, or when the 'score' value is NAN since the
+ * start.
+ *
+ * The command as a side effect of adding a new element may convert the sorted
+ * set internal encoding from listpack to hashtable+skiplist.
+ *
+ * Memory management of 'ele':
+ *
+ * The function does not take ownership of the 'ele' SDS string, but copies
+ * it if needed. */
+int zsetAdd(robj *zobj, double score, sds ele, int in_flags, int *out_flags, double *newscore) {
+ /* Turn options into simple to check vars. */
+ int incr = (in_flags & ZADD_IN_INCR) != 0;
+ int nx = (in_flags & ZADD_IN_NX) != 0;
+ int xx = (in_flags & ZADD_IN_XX) != 0;
+ int gt = (in_flags & ZADD_IN_GT) != 0;
+ int lt = (in_flags & ZADD_IN_LT) != 0;
+ *out_flags = 0; /* We'll return our response flags. */
+ double curscore;
+
+ /* NaN as input is an error regardless of all the other parameters. */
+ if (isnan(score)) {
+ *out_flags = ZADD_OUT_NAN;
+ return 0;
+ }
+
+ /* Update the sorted set according to its encoding. */
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *eptr;
+
+ if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {
+ /* NX? Return, same element already exists. */
+ if (nx) {
+ *out_flags |= ZADD_OUT_NOP;
+ return 1;
+ }
+
+ /* Prepare the score for the increment if needed. */
+ if (incr) {
+ score += curscore;
+ if (isnan(score)) {
+ *out_flags |= ZADD_OUT_NAN;
+ return 0;
+ }
+ }
+
+ /* GT/LT? Only update if score is greater/less than current. */
+ if ((lt && score >= curscore) || (gt && score <= curscore)) {
+ *out_flags |= ZADD_OUT_NOP;
+ return 1;
+ }
+
+ if (newscore) *newscore = score;
+
+ /* Remove and re-insert when score changed. */
+ if (score != curscore) {
+ zobj->ptr = zzlDelete(zobj->ptr,eptr);
+ zobj->ptr = zzlInsert(zobj->ptr,ele,score);
+ *out_flags |= ZADD_OUT_UPDATED;
+ }
+ return 1;
+ } else if (!xx) {
+ /* check if the element is too large or the list
+ * becomes too long *before* executing zzlInsert. */
+ if (zzlLength(zobj->ptr)+1 > server.zset_max_listpack_entries ||
+ sdslen(ele) > server.zset_max_listpack_value ||
+ !lpSafeToAdd(zobj->ptr, sdslen(ele)))
+ {
+ zsetConvertAndExpand(zobj, OBJ_ENCODING_SKIPLIST, zsetLength(zobj) + 1);
+ } else {
+ zobj->ptr = zzlInsert(zobj->ptr,ele,score);
+ if (newscore) *newscore = score;
+ *out_flags |= ZADD_OUT_ADDED;
+ return 1;
+ }
+ } else {
+ *out_flags |= ZADD_OUT_NOP;
+ return 1;
+ }
+ }
+
+ /* Note that the above block handling listpack would have either returned or
+ * converted the key to skiplist. */
+ if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplistNode *znode;
+ dictEntry *de;
+
+ de = dictFind(zs->dict,ele);
+ if (de != NULL) {
+ /* NX? Return, same element already exists. */
+ if (nx) {
+ *out_flags |= ZADD_OUT_NOP;
+ return 1;
+ }
+
+ curscore = *(double*)dictGetVal(de);
+
+ /* Prepare the score for the increment if needed. */
+ if (incr) {
+ score += curscore;
+ if (isnan(score)) {
+ *out_flags |= ZADD_OUT_NAN;
+ return 0;
+ }
+ }
+
+ /* GT/LT? Only update if score is greater/less than current. */
+ if ((lt && score >= curscore) || (gt && score <= curscore)) {
+ *out_flags |= ZADD_OUT_NOP;
+ return 1;
+ }
+
+ if (newscore) *newscore = score;
+
+ /* Remove and re-insert when score changes. */
+ if (score != curscore) {
+ znode = zslUpdateScore(zs->zsl,curscore,ele,score);
+ /* Note that we did not removed the original element from
+ * the hash table representing the sorted set, so we just
+ * update the score. */
+ dictSetVal(zs->dict, de, &znode->score); /* Update score ptr. */
+ *out_flags |= ZADD_OUT_UPDATED;
+ }
+ return 1;
+ } else if (!xx) {
+ ele = sdsdup(ele);
+ znode = zslInsert(zs->zsl,score,ele);
+ serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
+ *out_flags |= ZADD_OUT_ADDED;
+ if (newscore) *newscore = score;
+ return 1;
+ } else {
+ *out_flags |= ZADD_OUT_NOP;
+ return 1;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ return 0; /* Never reached. */
+}
+
+/* Deletes the element 'ele' from the sorted set encoded as a skiplist+dict,
+ * returning 1 if the element existed and was deleted, 0 otherwise (the
+ * element was not there). It does not resize the dict after deleting the
+ * element. */
+static int zsetRemoveFromSkiplist(zset *zs, sds ele) {
+ dictEntry *de;
+ double score;
+
+ de = dictUnlink(zs->dict,ele);
+ if (de != NULL) {
+ /* Get the score in order to delete from the skiplist later. */
+ score = *(double*)dictGetVal(de);
+
+ /* Delete from the hash table and later from the skiplist.
+ * Note that the order is important: deleting from the skiplist
+ * actually releases the SDS string representing the element,
+ * which is shared between the skiplist and the hash table, so
+ * we need to delete from the skiplist as the final step. */
+ dictFreeUnlinkedEntry(zs->dict,de);
+
+ /* Delete from skiplist. */
+ int retval = zslDelete(zs->zsl,score,ele,NULL);
+ serverAssert(retval);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Delete the element 'ele' from the sorted set, returning 1 if the element
+ * existed and was deleted, 0 otherwise (the element was not there). */
+int zsetDel(robj *zobj, sds ele) {
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *eptr;
+
+ if ((eptr = zzlFind(zobj->ptr,ele,NULL)) != NULL) {
+ zobj->ptr = zzlDelete(zobj->ptr,eptr);
+ return 1;
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ if (zsetRemoveFromSkiplist(zs, ele)) {
+ if (htNeedsResize(zs->dict)) dictResize(zs->dict);
+ return 1;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ return 0; /* No such element found. */
+}
+
+/* Given a sorted set object returns the 0-based rank of the object or
+ * -1 if the object does not exist.
+ *
+ * For rank we mean the position of the element in the sorted collection
+ * of elements. So the first element has rank 0, the second rank 1, and so
+ * forth up to length-1 elements.
+ *
+ * If 'reverse' is false, the rank is returned considering as first element
+ * the one with the lowest score. Otherwise if 'reverse' is non-zero
+ * the rank is computed considering as element with rank 0 the one with
+ * the highest score. */
+long zsetRank(robj *zobj, sds ele, int reverse, double *output_score) {
+ unsigned long llen;
+ unsigned long rank;
+
+ llen = zsetLength(zobj);
+
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+
+ eptr = lpSeek(zl,0);
+ serverAssert(eptr != NULL);
+ sptr = lpNext(zl,eptr);
+ serverAssert(sptr != NULL);
+
+ rank = 1;
+ while(eptr != NULL) {
+ if (lpCompare(eptr,(unsigned char*)ele,sdslen(ele)))
+ break;
+ rank++;
+ zzlNext(zl,&eptr,&sptr);
+ }
+
+ if (eptr != NULL) {
+ if (output_score)
+ *output_score = zzlGetScore(sptr);
+ if (reverse)
+ return llen-rank;
+ else
+ return rank-1;
+ } else {
+ return -1;
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ dictEntry *de;
+ double score;
+
+ de = dictFind(zs->dict,ele);
+ if (de != NULL) {
+ score = *(double*)dictGetVal(de);
+ rank = zslGetRank(zsl,score,ele);
+ /* Existing elements always have a rank. */
+ serverAssert(rank != 0);
+ if (output_score)
+ *output_score = score;
+ if (reverse)
+ return llen-rank;
+ else
+ return rank-1;
+ } else {
+ return -1;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+}
+
+/* This is a helper function for the COPY command.
+ * Duplicate a sorted set object, with the guarantee that the returned object
+ * has the same encoding as the original one.
+ *
+ * The resulting object always has refcount set to 1 */
+robj *zsetDup(robj *o) {
+ robj *zobj;
+ zset *zs;
+ zset *new_zs;
+
+ serverAssert(o->type == OBJ_ZSET);
+
+ /* Create a new sorted set object that have the same encoding as the original object's encoding */
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = o->ptr;
+ size_t sz = lpBytes(zl);
+ unsigned char *new_zl = zmalloc(sz);
+ memcpy(new_zl, zl, sz);
+ zobj = createObject(OBJ_ZSET, new_zl);
+ zobj->encoding = OBJ_ENCODING_LISTPACK;
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
+ zobj = createZsetObject();
+ zs = o->ptr;
+ new_zs = zobj->ptr;
+ dictExpand(new_zs->dict,dictSize(zs->dict));
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *ln;
+ sds ele;
+ long llen = zsetLength(o);
+
+ /* We copy the skiplist elements from the greatest to the
+ * smallest (that's trivial since the elements are already ordered in
+ * the skiplist): this improves the load process, since the next loaded
+ * element will always be the smaller, so adding to the skiplist
+ * will always immediately stop at the head, making the insertion
+ * O(1) instead of O(log(N)). */
+ ln = zsl->tail;
+ while (llen--) {
+ ele = ln->ele;
+ sds new_ele = sdsdup(ele);
+ zskiplistNode *znode = zslInsert(new_zs->zsl,ln->score,new_ele);
+ dictAdd(new_zs->dict,new_ele,&znode->score);
+ ln = ln->backward;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ return zobj;
+}
+
+/* Create a new sds string from the listpack entry. */
+sds zsetSdsFromListpackEntry(listpackEntry *e) {
+ return e->sval ? sdsnewlen(e->sval, e->slen) : sdsfromlonglong(e->lval);
+}
+
+/* Reply with bulk string from the listpack entry. */
+void zsetReplyFromListpackEntry(client *c, listpackEntry *e) {
+ if (e->sval)
+ addReplyBulkCBuffer(c, e->sval, e->slen);
+ else
+ addReplyBulkLongLong(c, e->lval);
+}
+
+
+/* Return random element from a non empty zset.
+ * 'key' and 'val' will be set to hold the element.
+ * The memory in `key` is not to be freed or modified by the caller.
+ * 'score' can be NULL in which case it's not extracted. */
+void zsetTypeRandomElement(robj *zsetobj, unsigned long zsetsize, listpackEntry *key, double *score) {
+ if (zsetobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zsetobj->ptr;
+ dictEntry *de = dictGetFairRandomKey(zs->dict);
+ sds s = dictGetKey(de);
+ key->sval = (unsigned char*)s;
+ key->slen = sdslen(s);
+ if (score)
+ *score = *(double*)dictGetVal(de);
+ } else if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) {
+ listpackEntry val;
+ lpRandomPair(zsetobj->ptr, zsetsize, key, &val);
+ if (score) {
+ if (val.sval) {
+ *score = zzlStrtod(val.sval,val.slen);
+ } else {
+ *score = (double)val.lval;
+ }
+ }
+ } else {
+ serverPanic("Unknown zset encoding");
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ * Sorted set commands
+ *----------------------------------------------------------------------------*/
+
+/* This generic command implements both ZADD and ZINCRBY. */
+void zaddGenericCommand(client *c, int flags) {
+ static char *nanerr = "resulting score is not a number (NaN)";
+ robj *key = c->argv[1];
+ robj *zobj;
+ sds ele;
+ double score = 0, *scores = NULL;
+ int j, elements, ch = 0;
+ int scoreidx = 0;
+ /* The following vars are used in order to track what the command actually
+ * did during the execution, to reply to the client and to trigger the
+ * notification of keyspace change. */
+ int added = 0; /* Number of new elements added. */
+ int updated = 0; /* Number of elements with updated score. */
+ int processed = 0; /* Number of elements processed, may remain zero with
+ options like XX. */
+
+ /* Parse options. At the end 'scoreidx' is set to the argument position
+ * of the score of the first score-element pair. */
+ scoreidx = 2;
+ while(scoreidx < c->argc) {
+ char *opt = c->argv[scoreidx]->ptr;
+ if (!strcasecmp(opt,"nx")) flags |= ZADD_IN_NX;
+ else if (!strcasecmp(opt,"xx")) flags |= ZADD_IN_XX;
+ else if (!strcasecmp(opt,"ch")) ch = 1; /* Return num of elements added or updated. */
+ else if (!strcasecmp(opt,"incr")) flags |= ZADD_IN_INCR;
+ else if (!strcasecmp(opt,"gt")) flags |= ZADD_IN_GT;
+ else if (!strcasecmp(opt,"lt")) flags |= ZADD_IN_LT;
+ else break;
+ scoreidx++;
+ }
+
+ /* Turn options into simple to check vars. */
+ int incr = (flags & ZADD_IN_INCR) != 0;
+ int nx = (flags & ZADD_IN_NX) != 0;
+ int xx = (flags & ZADD_IN_XX) != 0;
+ int gt = (flags & ZADD_IN_GT) != 0;
+ int lt = (flags & ZADD_IN_LT) != 0;
+
+ /* After the options, we expect to have an even number of args, since
+ * we expect any number of score-element pairs. */
+ elements = c->argc-scoreidx;
+ if (elements % 2 || !elements) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ elements /= 2; /* Now this holds the number of score-element pairs. */
+
+ /* Check for incompatible options. */
+ if (nx && xx) {
+ addReplyError(c,
+ "XX and NX options at the same time are not compatible");
+ return;
+ }
+
+ if ((gt && nx) || (lt && nx) || (gt && lt)) {
+ addReplyError(c,
+ "GT, LT, and/or NX options at the same time are not compatible");
+ return;
+ }
+ /* Note that XX is compatible with either GT or LT */
+
+ if (incr && elements > 1) {
+ addReplyError(c,
+ "INCR option supports a single increment-element pair");
+ return;
+ }
+
+ /* Start parsing all the scores, we need to emit any syntax error
+ * before executing additions to the sorted set, as the command should
+ * either execute fully or nothing at all. */
+ scores = zmalloc(sizeof(double)*elements);
+ for (j = 0; j < elements; j++) {
+ if (getDoubleFromObjectOrReply(c,c->argv[scoreidx+j*2],&scores[j],NULL)
+ != C_OK) goto cleanup;
+ }
+
+ /* Lookup the key and create the sorted set if does not exist. */
+ zobj = lookupKeyWrite(c->db,key);
+ if (checkType(c,zobj,OBJ_ZSET)) goto cleanup;
+ if (zobj == NULL) {
+ if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
+ zobj = zsetTypeCreate(elements, sdslen(c->argv[scoreidx+1]->ptr));
+ dbAdd(c->db,key,zobj);
+ } else {
+ zsetTypeMaybeConvert(zobj, elements);
+ }
+
+ for (j = 0; j < elements; j++) {
+ double newscore;
+ score = scores[j];
+ int retflags = 0;
+
+ ele = c->argv[scoreidx+1+j*2]->ptr;
+ int retval = zsetAdd(zobj, score, ele, flags, &retflags, &newscore);
+ if (retval == 0) {
+ addReplyError(c,nanerr);
+ goto cleanup;
+ }
+ if (retflags & ZADD_OUT_ADDED) added++;
+ if (retflags & ZADD_OUT_UPDATED) updated++;
+ if (!(retflags & ZADD_OUT_NOP)) processed++;
+ score = newscore;
+ }
+ server.dirty += (added+updated);
+
+reply_to_client:
+ if (incr) { /* ZINCRBY or INCR option. */
+ if (processed)
+ addReplyDouble(c,score);
+ else
+ addReplyNull(c);
+ } else { /* ZADD. */
+ addReplyLongLong(c,ch ? added+updated : added);
+ }
+
+cleanup:
+ zfree(scores);
+ if (added || updated) {
+ signalModifiedKey(c,c->db,key);
+ notifyKeyspaceEvent(NOTIFY_ZSET,
+ incr ? "zincr" : "zadd", key, c->db->id);
+ }
+}
+
+void zaddCommand(client *c) {
+ zaddGenericCommand(c,ZADD_IN_NONE);
+}
+
+void zincrbyCommand(client *c) {
+ zaddGenericCommand(c,ZADD_IN_INCR);
+}
+
+void zremCommand(client *c) {
+ robj *key = c->argv[1];
+ robj *zobj;
+ int deleted = 0, keyremoved = 0, j;
+
+ if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL ||
+ checkType(c,zobj,OBJ_ZSET)) return;
+
+ for (j = 2; j < c->argc; j++) {
+ if (zsetDel(zobj,c->argv[j]->ptr)) deleted++;
+ if (zsetLength(zobj) == 0) {
+ dbDelete(c->db,key);
+ keyremoved = 1;
+ break;
+ }
+ }
+
+ if (deleted) {
+ notifyKeyspaceEvent(NOTIFY_ZSET,"zrem",key,c->db->id);
+ if (keyremoved)
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
+ signalModifiedKey(c,c->db,key);
+ server.dirty += deleted;
+ }
+ addReplyLongLong(c,deleted);
+}
+
+typedef enum {
+ ZRANGE_AUTO = 0,
+ ZRANGE_RANK,
+ ZRANGE_SCORE,
+ ZRANGE_LEX,
+} zrange_type;
+
+/* Implements ZREMRANGEBYRANK, ZREMRANGEBYSCORE, ZREMRANGEBYLEX commands. */
+void zremrangeGenericCommand(client *c, zrange_type rangetype) {
+ robj *key = c->argv[1];
+ robj *zobj;
+ int keyremoved = 0;
+ unsigned long deleted = 0;
+ zrangespec range;
+ zlexrangespec lexrange;
+ long start, end, llen;
+ char *notify_type = NULL;
+
+ /* Step 1: Parse the range. */
+ if (rangetype == ZRANGE_RANK) {
+ notify_type = "zremrangebyrank";
+ if ((getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK))
+ return;
+ } else if (rangetype == ZRANGE_SCORE) {
+ notify_type = "zremrangebyscore";
+ if (zslParseRange(c->argv[2],c->argv[3],&range) != C_OK) {
+ addReplyError(c,"min or max is not a float");
+ return;
+ }
+ } else if (rangetype == ZRANGE_LEX) {
+ notify_type = "zremrangebylex";
+ if (zslParseLexRange(c->argv[2],c->argv[3],&lexrange) != C_OK) {
+ addReplyError(c,"min or max not valid string range item");
+ return;
+ }
+ } else {
+ serverPanic("unknown rangetype %d", (int)rangetype);
+ }
+
+ /* Step 2: Lookup & range sanity checks if needed. */
+ if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL ||
+ checkType(c,zobj,OBJ_ZSET)) goto cleanup;
+
+ if (rangetype == ZRANGE_RANK) {
+ /* Sanitize indexes. */
+ llen = zsetLength(zobj);
+ if (start < 0) start = llen+start;
+ if (end < 0) end = llen+end;
+ if (start < 0) start = 0;
+
+ /* Invariant: start >= 0, so this test will be true when end < 0.
+ * The range is empty when start > end or start >= length. */
+ if (start > end || start >= llen) {
+ addReply(c,shared.czero);
+ goto cleanup;
+ }
+ if (end >= llen) end = llen-1;
+ }
+
+ /* Step 3: Perform the range deletion operation. */
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ switch(rangetype) {
+ case ZRANGE_AUTO:
+ case ZRANGE_RANK:
+ zobj->ptr = zzlDeleteRangeByRank(zobj->ptr,start+1,end+1,&deleted);
+ break;
+ case ZRANGE_SCORE:
+ zobj->ptr = zzlDeleteRangeByScore(zobj->ptr,&range,&deleted);
+ break;
+ case ZRANGE_LEX:
+ zobj->ptr = zzlDeleteRangeByLex(zobj->ptr,&lexrange,&deleted);
+ break;
+ }
+ if (zzlLength(zobj->ptr) == 0) {
+ dbDelete(c->db,key);
+ keyremoved = 1;
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ switch(rangetype) {
+ case ZRANGE_AUTO:
+ case ZRANGE_RANK:
+ deleted = zslDeleteRangeByRank(zs->zsl,start+1,end+1,zs->dict);
+ break;
+ case ZRANGE_SCORE:
+ deleted = zslDeleteRangeByScore(zs->zsl,&range,zs->dict);
+ break;
+ case ZRANGE_LEX:
+ deleted = zslDeleteRangeByLex(zs->zsl,&lexrange,zs->dict);
+ break;
+ }
+ if (htNeedsResize(zs->dict)) dictResize(zs->dict);
+ if (dictSize(zs->dict) == 0) {
+ dbDelete(c->db,key);
+ keyremoved = 1;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+
+ /* Step 4: Notifications and reply. */
+ if (deleted) {
+ signalModifiedKey(c,c->db,key);
+ notifyKeyspaceEvent(NOTIFY_ZSET,notify_type,key,c->db->id);
+ if (keyremoved)
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
+ }
+ server.dirty += deleted;
+ addReplyLongLong(c,deleted);
+
+cleanup:
+ if (rangetype == ZRANGE_LEX) zslFreeLexRange(&lexrange);
+}
+
+void zremrangebyrankCommand(client *c) {
+ zremrangeGenericCommand(c,ZRANGE_RANK);
+}
+
+void zremrangebyscoreCommand(client *c) {
+ zremrangeGenericCommand(c,ZRANGE_SCORE);
+}
+
+void zremrangebylexCommand(client *c) {
+ zremrangeGenericCommand(c,ZRANGE_LEX);
+}
+
+typedef struct {
+ robj *subject;
+ int type; /* Set, sorted set */
+ int encoding;
+ double weight;
+
+ union {
+ /* Set iterators. */
+ union _iterset {
+ struct {
+ intset *is;
+ int ii;
+ } is;
+ struct {
+ dict *dict;
+ dictIterator *di;
+ dictEntry *de;
+ } ht;
+ struct {
+ unsigned char *lp;
+ unsigned char *p;
+ } lp;
+ } set;
+
+ /* Sorted set iterators. */
+ union _iterzset {
+ struct {
+ unsigned char *zl;
+ unsigned char *eptr, *sptr;
+ } zl;
+ struct {
+ zset *zs;
+ zskiplistNode *node;
+ } sl;
+ } zset;
+ } iter;
+} zsetopsrc;
+
+
+/* Use dirty flags for pointers that need to be cleaned up in the next
+ * iteration over the zsetopval. The dirty flag for the long long value is
+ * special, since long long values don't need cleanup. Instead, it means that
+ * we already checked that "ell" holds a long long, or tried to convert another
+ * representation into a long long value. When this was successful,
+ * OPVAL_VALID_LL is set as well. */
+#define OPVAL_DIRTY_SDS 1
+#define OPVAL_DIRTY_LL 2
+#define OPVAL_VALID_LL 4
+
+/* Store value retrieved from the iterator. */
+typedef struct {
+ int flags;
+ unsigned char _buf[32]; /* Private buffer. */
+ sds ele;
+ unsigned char *estr;
+ unsigned int elen;
+ long long ell;
+ double score;
+} zsetopval;
+
+typedef union _iterset iterset;
+typedef union _iterzset iterzset;
+
+void zuiInitIterator(zsetopsrc *op) {
+ if (op->subject == NULL)
+ return;
+
+ if (op->type == OBJ_SET) {
+ iterset *it = &op->iter.set;
+ if (op->encoding == OBJ_ENCODING_INTSET) {
+ it->is.is = op->subject->ptr;
+ it->is.ii = 0;
+ } else if (op->encoding == OBJ_ENCODING_HT) {
+ it->ht.dict = op->subject->ptr;
+ it->ht.di = dictGetIterator(op->subject->ptr);
+ it->ht.de = dictNext(it->ht.di);
+ } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ it->lp.lp = op->subject->ptr;
+ it->lp.p = lpFirst(it->lp.lp);
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (op->type == OBJ_ZSET) {
+ /* Sorted sets are traversed in reverse order to optimize for
+ * the insertion of the elements in a new list as in
+ * ZDIFF/ZINTER/ZUNION */
+ iterzset *it = &op->iter.zset;
+ if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ it->zl.zl = op->subject->ptr;
+ it->zl.eptr = lpSeek(it->zl.zl,-2);
+ if (it->zl.eptr != NULL) {
+ it->zl.sptr = lpNext(it->zl.zl,it->zl.eptr);
+ serverAssert(it->zl.sptr != NULL);
+ }
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
+ it->sl.zs = op->subject->ptr;
+ it->sl.node = it->sl.zs->zsl->tail;
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else {
+ serverPanic("Unsupported type");
+ }
+}
+
+void zuiClearIterator(zsetopsrc *op) {
+ if (op->subject == NULL)
+ return;
+
+ if (op->type == OBJ_SET) {
+ iterset *it = &op->iter.set;
+ if (op->encoding == OBJ_ENCODING_INTSET) {
+ UNUSED(it); /* skip */
+ } else if (op->encoding == OBJ_ENCODING_HT) {
+ dictReleaseIterator(it->ht.di);
+ } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ UNUSED(it);
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (op->type == OBJ_ZSET) {
+ iterzset *it = &op->iter.zset;
+ if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ UNUSED(it); /* skip */
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
+ UNUSED(it); /* skip */
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else {
+ serverPanic("Unsupported type");
+ }
+}
+
+void zuiDiscardDirtyValue(zsetopval *val) {
+ if (val->flags & OPVAL_DIRTY_SDS) {
+ sdsfree(val->ele);
+ val->ele = NULL;
+ val->flags &= ~OPVAL_DIRTY_SDS;
+ }
+}
+
+unsigned long zuiLength(zsetopsrc *op) {
+ if (op->subject == NULL)
+ return 0;
+
+ if (op->type == OBJ_SET) {
+ return setTypeSize(op->subject);
+ } else if (op->type == OBJ_ZSET) {
+ if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ return zzlLength(op->subject->ptr);
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = op->subject->ptr;
+ return zs->zsl->length;
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else {
+ serverPanic("Unsupported type");
+ }
+}
+
+/* Check if the current value is valid. If so, store it in the passed structure
+ * and move to the next element. If not valid, this means we have reached the
+ * end of the structure and can abort. */
+int zuiNext(zsetopsrc *op, zsetopval *val) {
+ if (op->subject == NULL)
+ return 0;
+
+ zuiDiscardDirtyValue(val);
+
+ memset(val,0,sizeof(zsetopval));
+
+ if (op->type == OBJ_SET) {
+ iterset *it = &op->iter.set;
+ if (op->encoding == OBJ_ENCODING_INTSET) {
+ int64_t ell;
+
+ if (!intsetGet(it->is.is,it->is.ii,&ell))
+ return 0;
+ val->ell = ell;
+ val->score = 1.0;
+
+ /* Move to next element. */
+ it->is.ii++;
+ } else if (op->encoding == OBJ_ENCODING_HT) {
+ if (it->ht.de == NULL)
+ return 0;
+ val->ele = dictGetKey(it->ht.de);
+ val->score = 1.0;
+
+ /* Move to next element. */
+ it->ht.de = dictNext(it->ht.di);
+ } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ if (it->lp.p == NULL)
+ return 0;
+ val->estr = lpGetValue(it->lp.p, &val->elen, &val->ell);
+ val->score = 1.0;
+
+ /* Move to next element. */
+ it->lp.p = lpNext(it->lp.lp, it->lp.p);
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (op->type == OBJ_ZSET) {
+ iterzset *it = &op->iter.zset;
+ if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ /* No need to check both, but better be explicit. */
+ if (it->zl.eptr == NULL || it->zl.sptr == NULL)
+ return 0;
+ val->estr = lpGetValue(it->zl.eptr,&val->elen,&val->ell);
+ val->score = zzlGetScore(it->zl.sptr);
+
+ /* Move to next element (going backwards, see zuiInitIterator). */
+ zzlPrev(it->zl.zl,&it->zl.eptr,&it->zl.sptr);
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
+ if (it->sl.node == NULL)
+ return 0;
+ val->ele = it->sl.node->ele;
+ val->score = it->sl.node->score;
+
+ /* Move to next element. (going backwards, see zuiInitIterator) */
+ it->sl.node = it->sl.node->backward;
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else {
+ serverPanic("Unsupported type");
+ }
+ return 1;
+}
+
+int zuiLongLongFromValue(zsetopval *val) {
+ if (!(val->flags & OPVAL_DIRTY_LL)) {
+ val->flags |= OPVAL_DIRTY_LL;
+
+ if (val->ele != NULL) {
+ if (string2ll(val->ele,sdslen(val->ele),&val->ell))
+ val->flags |= OPVAL_VALID_LL;
+ } else if (val->estr != NULL) {
+ if (string2ll((char*)val->estr,val->elen,&val->ell))
+ val->flags |= OPVAL_VALID_LL;
+ } else {
+ /* The long long was already set, flag as valid. */
+ val->flags |= OPVAL_VALID_LL;
+ }
+ }
+ return val->flags & OPVAL_VALID_LL;
+}
+
+sds zuiSdsFromValue(zsetopval *val) {
+ if (val->ele == NULL) {
+ if (val->estr != NULL) {
+ val->ele = sdsnewlen((char*)val->estr,val->elen);
+ } else {
+ val->ele = sdsfromlonglong(val->ell);
+ }
+ val->flags |= OPVAL_DIRTY_SDS;
+ }
+ return val->ele;
+}
+
+/* This is different from zuiSdsFromValue since returns a new SDS string
+ * which is up to the caller to free. */
+sds zuiNewSdsFromValue(zsetopval *val) {
+ if (val->flags & OPVAL_DIRTY_SDS) {
+ /* We have already one to return! */
+ sds ele = val->ele;
+ val->flags &= ~OPVAL_DIRTY_SDS;
+ val->ele = NULL;
+ return ele;
+ } else if (val->ele) {
+ return sdsdup(val->ele);
+ } else if (val->estr) {
+ return sdsnewlen((char*)val->estr,val->elen);
+ } else {
+ return sdsfromlonglong(val->ell);
+ }
+}
+
+int zuiBufferFromValue(zsetopval *val) {
+ if (val->estr == NULL) {
+ if (val->ele != NULL) {
+ val->elen = sdslen(val->ele);
+ val->estr = (unsigned char*)val->ele;
+ } else {
+ val->elen = ll2string((char*)val->_buf,sizeof(val->_buf),val->ell);
+ val->estr = val->_buf;
+ }
+ }
+ return 1;
+}
+
+/* Find value pointed to by val in the source pointer to by op. When found,
+ * return 1 and store its score in target. Return 0 otherwise. */
+int zuiFind(zsetopsrc *op, zsetopval *val, double *score) {
+ if (op->subject == NULL)
+ return 0;
+
+ if (op->type == OBJ_SET) {
+ char *str = val->ele ? val->ele : (char *)val->estr;
+ size_t len = val->ele ? sdslen(val->ele) : val->elen;
+ if (setTypeIsMemberAux(op->subject, str, len, val->ell, val->ele != NULL)) {
+ *score = 1.0;
+ return 1;
+ } else {
+ return 0;
+ }
+ } else if (op->type == OBJ_ZSET) {
+ zuiSdsFromValue(val);
+
+ if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ if (zzlFind(op->subject->ptr,val->ele,score) != NULL) {
+ /* Score is already set by zzlFind. */
+ return 1;
+ } else {
+ return 0;
+ }
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = op->subject->ptr;
+ dictEntry *de;
+ if ((de = dictFind(zs->dict,val->ele)) != NULL) {
+ *score = *(double*)dictGetVal(de);
+ return 1;
+ } else {
+ return 0;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else {
+ serverPanic("Unsupported type");
+ }
+}
+
+int zuiCompareByCardinality(const void *s1, const void *s2) {
+ unsigned long first = zuiLength((zsetopsrc*)s1);
+ unsigned long second = zuiLength((zsetopsrc*)s2);
+ if (first > second) return 1;
+ if (first < second) return -1;
+ return 0;
+}
+
+static int zuiCompareByRevCardinality(const void *s1, const void *s2) {
+ return zuiCompareByCardinality(s1, s2) * -1;
+}
+
+#define REDIS_AGGR_SUM 1
+#define REDIS_AGGR_MIN 2
+#define REDIS_AGGR_MAX 3
+#define zunionInterDictValue(_e) (dictGetVal(_e) == NULL ? 1.0 : *(double*)dictGetVal(_e))
+
+inline static void zunionInterAggregate(double *target, double val, int aggregate) {
+ if (aggregate == REDIS_AGGR_SUM) {
+ *target = *target + val;
+ /* The result of adding two doubles is NaN when one variable
+ * is +inf and the other is -inf. When these numbers are added,
+ * we maintain the convention of the result being 0.0. */
+ if (isnan(*target)) *target = 0.0;
+ } else if (aggregate == REDIS_AGGR_MIN) {
+ *target = val < *target ? val : *target;
+ } else if (aggregate == REDIS_AGGR_MAX) {
+ *target = val > *target ? val : *target;
+ } else {
+ /* safety net */
+ serverPanic("Unknown ZUNION/INTER aggregate type");
+ }
+}
+
+static size_t zsetDictGetMaxElementLength(dict *d, size_t *totallen) {
+ dictIterator *di;
+ dictEntry *de;
+ size_t maxelelen = 0;
+
+ di = dictGetIterator(d);
+
+ while((de = dictNext(di)) != NULL) {
+ sds ele = dictGetKey(de);
+ if (sdslen(ele) > maxelelen) maxelelen = sdslen(ele);
+ if (totallen)
+ (*totallen) += sdslen(ele);
+ }
+
+ dictReleaseIterator(di);
+
+ return maxelelen;
+}
+
+static void zdiffAlgorithm1(zsetopsrc *src, long setnum, zset *dstzset, size_t *maxelelen, size_t *totelelen) {
+ /* DIFF Algorithm 1:
+ *
+ * We perform the diff by iterating all the elements of the first set,
+ * and only adding it to the target set if the element does not exist
+ * into all the other sets.
+ *
+ * This way we perform at max N*M operations, where N is the size of
+ * the first set, and M the number of sets.
+ *
+ * There is also a O(K*log(K)) cost for adding the resulting elements
+ * to the target set, where K is the final size of the target set.
+ *
+ * The final complexity of this algorithm is O(N*M + K*log(K)). */
+ int j;
+ zsetopval zval;
+ zskiplistNode *znode;
+ sds tmp;
+
+ /* With algorithm 1 it is better to order the sets to subtract
+ * by decreasing size, so that we are more likely to find
+ * duplicated elements ASAP. */
+ qsort(src+1,setnum-1,sizeof(zsetopsrc),zuiCompareByRevCardinality);
+
+ memset(&zval, 0, sizeof(zval));
+ zuiInitIterator(&src[0]);
+ while (zuiNext(&src[0],&zval)) {
+ double value;
+ int exists = 0;
+
+ for (j = 1; j < setnum; j++) {
+ /* It is not safe to access the zset we are
+ * iterating, so explicitly check for equal object.
+ * This check isn't really needed anymore since we already
+ * check for a duplicate set in the zsetChooseDiffAlgorithm
+ * function, but we're leaving it for future-proofing. */
+ if (src[j].subject == src[0].subject ||
+ zuiFind(&src[j],&zval,&value)) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (!exists) {
+ tmp = zuiNewSdsFromValue(&zval);
+ znode = zslInsert(dstzset->zsl,zval.score,tmp);
+ dictAdd(dstzset->dict,tmp,&znode->score);
+ if (sdslen(tmp) > *maxelelen) *maxelelen = sdslen(tmp);
+ (*totelelen) += sdslen(tmp);
+ }
+ }
+ zuiClearIterator(&src[0]);
+}
+
+
+static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t *maxelelen, size_t *totelelen) {
+ /* DIFF Algorithm 2:
+ *
+ * Add all the elements of the first set to the auxiliary set.
+ * Then remove all the elements of all the next sets from it.
+ *
+
+ * This is O(L + (N-K)log(N)) where L is the sum of all the elements in every
+ * set, N is the size of the first set, and K is the size of the result set.
+ *
+ * Note that from the (L-N) dict searches, (N-K) got to the zsetRemoveFromSkiplist
+ * which costs log(N)
+ *
+ * There is also a O(K) cost at the end for finding the largest element
+ * size, but this doesn't change the algorithm complexity since K < L, and
+ * O(2L) is the same as O(L). */
+ int j;
+ int cardinality = 0;
+ zsetopval zval;
+ zskiplistNode *znode;
+ sds tmp;
+
+ for (j = 0; j < setnum; j++) {
+ if (zuiLength(&src[j]) == 0) continue;
+
+ memset(&zval, 0, sizeof(zval));
+ zuiInitIterator(&src[j]);
+ while (zuiNext(&src[j],&zval)) {
+ if (j == 0) {
+ tmp = zuiNewSdsFromValue(&zval);
+ znode = zslInsert(dstzset->zsl,zval.score,tmp);
+ dictAdd(dstzset->dict,tmp,&znode->score);
+ cardinality++;
+ } else {
+ tmp = zuiSdsFromValue(&zval);
+ if (zsetRemoveFromSkiplist(dstzset, tmp)) {
+ cardinality--;
+ }
+ }
+
+ /* Exit if result set is empty as any additional removal
+ * of elements will have no effect. */
+ if (cardinality == 0) break;
+ }
+ zuiClearIterator(&src[j]);
+
+ if (cardinality == 0) break;
+ }
+
+ /* Resize dict if needed after removing multiple elements */
+ if (htNeedsResize(dstzset->dict)) dictResize(dstzset->dict);
+
+ /* Using this algorithm, we can't calculate the max element as we go,
+ * we have to iterate through all elements to find the max one after. */
+ *maxelelen = zsetDictGetMaxElementLength(dstzset->dict, totelelen);
+}
+
+static int zsetChooseDiffAlgorithm(zsetopsrc *src, long setnum) {
+ int j;
+
+ /* Select what DIFF algorithm to use.
+ *
+ * Algorithm 1 is O(N*M + K*log(K)) where N is the size of the
+ * first set, M the total number of sets, and K is the size of the
+ * result set.
+ *
+ * Algorithm 2 is O(L + (N-K)log(N)) where L is the total number of elements
+ * in all the sets, N is the size of the first set, and K is the size of the
+ * result set.
+ *
+ * We compute what is the best bet with the current input here. */
+ long long algo_one_work = 0;
+ long long algo_two_work = 0;
+
+ for (j = 0; j < setnum; j++) {
+ /* If any other set is equal to the first set, there is nothing to be
+ * done, since we would remove all elements anyway. */
+ if (j > 0 && src[0].subject == src[j].subject) {
+ return 0;
+ }
+
+ algo_one_work += zuiLength(&src[0]);
+ algo_two_work += zuiLength(&src[j]);
+ }
+
+ /* Algorithm 1 has better constant times and performs less operations
+ * if there are elements in common. Give it some advantage. */
+ algo_one_work /= 2;
+ return (algo_one_work <= algo_two_work) ? 1 : 2;
+}
+
+static void zdiff(zsetopsrc *src, long setnum, zset *dstzset, size_t *maxelelen, size_t *totelelen) {
+ /* Skip everything if the smallest input is empty. */
+ if (zuiLength(&src[0]) > 0) {
+ int diff_algo = zsetChooseDiffAlgorithm(src, setnum);
+ if (diff_algo == 1) {
+ zdiffAlgorithm1(src, setnum, dstzset, maxelelen, totelelen);
+ } else if (diff_algo == 2) {
+ zdiffAlgorithm2(src, setnum, dstzset, maxelelen, totelelen);
+ } else if (diff_algo != 0) {
+ serverPanic("Unknown algorithm");
+ }
+ }
+}
+
+dictType setAccumulatorDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL, /* val destructor */
+ NULL /* allow to expand */
+};
+
+/* The zunionInterDiffGenericCommand() function is called in order to implement the
+ * following commands: ZUNION, ZINTER, ZDIFF, ZUNIONSTORE, ZINTERSTORE, ZDIFFSTORE,
+ * ZINTERCARD.
+ *
+ * 'numkeysIndex' parameter position of key number. for ZUNION/ZINTER/ZDIFF command,
+ * this value is 1, for ZUNIONSTORE/ZINTERSTORE/ZDIFFSTORE command, this value is 2.
+ *
+ * 'op' SET_OP_INTER, SET_OP_UNION or SET_OP_DIFF.
+ *
+ * 'cardinality_only' is currently only applicable when 'op' is SET_OP_INTER.
+ * Work for SINTERCARD, only return the cardinality with minimum processing and memory overheads.
+ */
+void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, int op,
+ int cardinality_only) {
+ int i, j;
+ long setnum;
+ int aggregate = REDIS_AGGR_SUM;
+ zsetopsrc *src;
+ zsetopval zval;
+ sds tmp;
+ size_t maxelelen = 0, totelelen = 0;
+ robj *dstobj = NULL;
+ zset *dstzset = NULL;
+ zskiplistNode *znode;
+ int withscores = 0;
+ unsigned long cardinality = 0;
+ long limit = 0; /* Stop searching after reaching the limit. 0 means unlimited. */
+
+ /* expect setnum input keys to be given */
+ if ((getLongFromObjectOrReply(c, c->argv[numkeysIndex], &setnum, NULL) != C_OK))
+ return;
+
+ if (setnum < 1) {
+ addReplyErrorFormat(c,
+ "at least 1 input key is needed for '%s' command", c->cmd->fullname);
+ return;
+ }
+
+ /* test if the expected number of keys would overflow */
+ if (setnum > (c->argc-(numkeysIndex+1))) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ /* read keys to be used for input */
+ src = zcalloc(sizeof(zsetopsrc) * setnum);
+ for (i = 0, j = numkeysIndex+1; i < setnum; i++, j++) {
+ robj *obj = lookupKeyRead(c->db, c->argv[j]);
+ if (obj != NULL) {
+ if (obj->type != OBJ_ZSET && obj->type != OBJ_SET) {
+ zfree(src);
+ addReplyErrorObject(c,shared.wrongtypeerr);
+ return;
+ }
+
+ src[i].subject = obj;
+ src[i].type = obj->type;
+ src[i].encoding = obj->encoding;
+ } else {
+ src[i].subject = NULL;
+ }
+
+ /* Default all weights to 1. */
+ src[i].weight = 1.0;
+ }
+
+ /* parse optional extra arguments */
+ if (j < c->argc) {
+ int remaining = c->argc - j;
+
+ while (remaining) {
+ if (op != SET_OP_DIFF && !cardinality_only &&
+ remaining >= (setnum + 1) &&
+ !strcasecmp(c->argv[j]->ptr,"weights"))
+ {
+ j++; remaining--;
+ for (i = 0; i < setnum; i++, j++, remaining--) {
+ if (getDoubleFromObjectOrReply(c,c->argv[j],&src[i].weight,
+ "weight value is not a float") != C_OK)
+ {
+ zfree(src);
+ return;
+ }
+ }
+ } else if (op != SET_OP_DIFF && !cardinality_only &&
+ remaining >= 2 &&
+ !strcasecmp(c->argv[j]->ptr,"aggregate"))
+ {
+ j++; remaining--;
+ if (!strcasecmp(c->argv[j]->ptr,"sum")) {
+ aggregate = REDIS_AGGR_SUM;
+ } else if (!strcasecmp(c->argv[j]->ptr,"min")) {
+ aggregate = REDIS_AGGR_MIN;
+ } else if (!strcasecmp(c->argv[j]->ptr,"max")) {
+ aggregate = REDIS_AGGR_MAX;
+ } else {
+ zfree(src);
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ j++; remaining--;
+ } else if (remaining >= 1 &&
+ !dstkey && !cardinality_only &&
+ !strcasecmp(c->argv[j]->ptr,"withscores"))
+ {
+ j++; remaining--;
+ withscores = 1;
+ } else if (cardinality_only && remaining >= 2 &&
+ !strcasecmp(c->argv[j]->ptr, "limit"))
+ {
+ j++; remaining--;
+ if (getPositiveLongFromObjectOrReply(c, c->argv[j], &limit,
+ "LIMIT can't be negative") != C_OK)
+ {
+ zfree(src);
+ return;
+ }
+ j++; remaining--;
+ } else {
+ zfree(src);
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+ }
+
+ if (op != SET_OP_DIFF) {
+ /* sort sets from the smallest to largest, this will improve our
+ * algorithm's performance */
+ qsort(src,setnum,sizeof(zsetopsrc),zuiCompareByCardinality);
+ }
+
+ /* We need a temp zset object to store our union/inter/diff. If the dstkey
+ * is not NULL (that is, we are inside an ZUNIONSTORE/ZINTERSTORE/ZDIFFSTORE operation) then
+ * this zset object will be the resulting object to zset into the target key.
+ * In SINTERCARD case, we don't need the temp obj, so we can avoid creating it. */
+ if (!cardinality_only) {
+ dstobj = createZsetObject();
+ dstzset = dstobj->ptr;
+ }
+ memset(&zval, 0, sizeof(zval));
+
+ if (op == SET_OP_INTER) {
+ /* Skip everything if the smallest input is empty. */
+ if (zuiLength(&src[0]) > 0) {
+ /* Precondition: as src[0] is non-empty and the inputs are ordered
+ * by size, all src[i > 0] are non-empty too. */
+ zuiInitIterator(&src[0]);
+ while (zuiNext(&src[0],&zval)) {
+ double score, value;
+
+ score = src[0].weight * zval.score;
+ if (isnan(score)) score = 0;
+
+ for (j = 1; j < setnum; j++) {
+ /* It is not safe to access the zset we are
+ * iterating, so explicitly check for equal object. */
+ if (src[j].subject == src[0].subject) {
+ value = zval.score*src[j].weight;
+ zunionInterAggregate(&score,value,aggregate);
+ } else if (zuiFind(&src[j],&zval,&value)) {
+ value *= src[j].weight;
+ zunionInterAggregate(&score,value,aggregate);
+ } else {
+ break;
+ }
+ }
+
+ /* Only continue when present in every input. */
+ if (j == setnum && cardinality_only) {
+ cardinality++;
+
+ /* We stop the searching after reaching the limit. */
+ if (limit && cardinality >= (unsigned long)limit) {
+ /* Cleanup before we break the zuiNext loop. */
+ zuiDiscardDirtyValue(&zval);
+ break;
+ }
+ } else if (j == setnum) {
+ tmp = zuiNewSdsFromValue(&zval);
+ znode = zslInsert(dstzset->zsl,score,tmp);
+ dictAdd(dstzset->dict,tmp,&znode->score);
+ totelelen += sdslen(tmp);
+ if (sdslen(tmp) > maxelelen) maxelelen = sdslen(tmp);
+ }
+ }
+ zuiClearIterator(&src[0]);
+ }
+ } else if (op == SET_OP_UNION) {
+ dict *accumulator = dictCreate(&setAccumulatorDictType);
+ dictIterator *di;
+ dictEntry *de, *existing;
+ double score;
+
+ if (setnum) {
+ /* Our union is at least as large as the largest set.
+ * Resize the dictionary ASAP to avoid useless rehashing. */
+ dictExpand(accumulator,zuiLength(&src[setnum-1]));
+ }
+
+ /* Step 1: Create a dictionary of elements -> aggregated-scores
+ * by iterating one sorted set after the other. */
+ for (i = 0; i < setnum; i++) {
+ if (zuiLength(&src[i]) == 0) continue;
+
+ zuiInitIterator(&src[i]);
+ while (zuiNext(&src[i],&zval)) {
+ /* Initialize value */
+ score = src[i].weight * zval.score;
+ if (isnan(score)) score = 0;
+
+ /* Search for this element in the accumulating dictionary. */
+ de = dictAddRaw(accumulator,zuiSdsFromValue(&zval),&existing);
+ /* If we don't have it, we need to create a new entry. */
+ if (!existing) {
+ tmp = zuiNewSdsFromValue(&zval);
+ /* Remember the longest single element encountered,
+ * to understand if it's possible to convert to listpack
+ * at the end. */
+ totelelen += sdslen(tmp);
+ if (sdslen(tmp) > maxelelen) maxelelen = sdslen(tmp);
+ /* Update the element with its initial score. */
+ dictSetKey(accumulator, de, tmp);
+ dictSetDoubleVal(de,score);
+ } else {
+ /* Update the score with the score of the new instance
+ * of the element found in the current sorted set.
+ *
+ * Here we access directly the dictEntry double
+ * value inside the union as it is a big speedup
+ * compared to using the getDouble/setDouble API. */
+ double *existing_score_ptr = dictGetDoubleValPtr(existing);
+ zunionInterAggregate(existing_score_ptr, score, aggregate);
+ }
+ }
+ zuiClearIterator(&src[i]);
+ }
+
+ /* Step 2: convert the dictionary into the final sorted set. */
+ di = dictGetIterator(accumulator);
+
+ /* We now are aware of the final size of the resulting sorted set,
+ * let's resize the dictionary embedded inside the sorted set to the
+ * right size, in order to save rehashing time. */
+ dictExpand(dstzset->dict,dictSize(accumulator));
+
+ while((de = dictNext(di)) != NULL) {
+ sds ele = dictGetKey(de);
+ score = dictGetDoubleVal(de);
+ znode = zslInsert(dstzset->zsl,score,ele);
+ dictAdd(dstzset->dict,ele,&znode->score);
+ }
+ dictReleaseIterator(di);
+ dictRelease(accumulator);
+ } else if (op == SET_OP_DIFF) {
+ zdiff(src, setnum, dstzset, &maxelelen, &totelelen);
+ } else {
+ serverPanic("Unknown operator");
+ }
+
+ if (dstkey) {
+ if (dstzset->zsl->length) {
+ zsetConvertToListpackIfNeeded(dstobj, maxelelen, totelelen);
+ setKey(c, c->db, dstkey, dstobj, 0);
+ addReplyLongLong(c, zsetLength(dstobj));
+ notifyKeyspaceEvent(NOTIFY_ZSET,
+ (op == SET_OP_UNION) ? "zunionstore" :
+ (op == SET_OP_INTER ? "zinterstore" : "zdiffstore"),
+ dstkey, c->db->id);
+ server.dirty++;
+ } else {
+ addReply(c, shared.czero);
+ if (dbDelete(c->db, dstkey)) {
+ signalModifiedKey(c, c->db, dstkey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC, "del", dstkey, c->db->id);
+ server.dirty++;
+ }
+ }
+ decrRefCount(dstobj);
+ } else if (cardinality_only) {
+ addReplyLongLong(c, cardinality);
+ } else {
+ unsigned long length = dstzset->zsl->length;
+ zskiplist *zsl = dstzset->zsl;
+ zskiplistNode *zn = zsl->header->level[0].forward;
+ /* In case of WITHSCORES, respond with a single array in RESP2, and
+ * nested arrays in RESP3. We can't use a map response type since the
+ * client library needs to know to respect the order. */
+ if (withscores && c->resp == 2)
+ addReplyArrayLen(c, length*2);
+ else
+ addReplyArrayLen(c, length);
+
+ while (zn != NULL) {
+ if (withscores && c->resp > 2) addReplyArrayLen(c,2);
+ addReplyBulkCBuffer(c,zn->ele,sdslen(zn->ele));
+ if (withscores) addReplyDouble(c,zn->score);
+ zn = zn->level[0].forward;
+ }
+ server.lazyfree_lazy_server_del ? freeObjAsync(NULL, dstobj, -1) :
+ decrRefCount(dstobj);
+ }
+ zfree(src);
+}
+
+/* ZUNIONSTORE destination numkeys key [key ...] [WEIGHTS weight] [AGGREGATE SUM|MIN|MAX] */
+void zunionstoreCommand(client *c) {
+ zunionInterDiffGenericCommand(c, c->argv[1], 2, SET_OP_UNION, 0);
+}
+
+/* ZINTERSTORE destination numkeys key [key ...] [WEIGHTS weight] [AGGREGATE SUM|MIN|MAX] */
+void zinterstoreCommand(client *c) {
+ zunionInterDiffGenericCommand(c, c->argv[1], 2, SET_OP_INTER, 0);
+}
+
+/* ZDIFFSTORE destination numkeys key [key ...] */
+void zdiffstoreCommand(client *c) {
+ zunionInterDiffGenericCommand(c, c->argv[1], 2, SET_OP_DIFF, 0);
+}
+
+/* ZUNION numkeys key [key ...] [WEIGHTS weight] [AGGREGATE SUM|MIN|MAX] [WITHSCORES] */
+void zunionCommand(client *c) {
+ zunionInterDiffGenericCommand(c, NULL, 1, SET_OP_UNION, 0);
+}
+
+/* ZINTER numkeys key [key ...] [WEIGHTS weight] [AGGREGATE SUM|MIN|MAX] [WITHSCORES] */
+void zinterCommand(client *c) {
+ zunionInterDiffGenericCommand(c, NULL, 1, SET_OP_INTER, 0);
+}
+
+/* ZINTERCARD numkeys key [key ...] [LIMIT limit] */
+void zinterCardCommand(client *c) {
+ zunionInterDiffGenericCommand(c, NULL, 1, SET_OP_INTER, 1);
+}
+
+/* ZDIFF numkeys key [key ...] [WITHSCORES] */
+void zdiffCommand(client *c) {
+ zunionInterDiffGenericCommand(c, NULL, 1, SET_OP_DIFF, 0);
+}
+
+typedef enum {
+ ZRANGE_DIRECTION_AUTO = 0,
+ ZRANGE_DIRECTION_FORWARD,
+ ZRANGE_DIRECTION_REVERSE
+} zrange_direction;
+
+typedef enum {
+ ZRANGE_CONSUMER_TYPE_CLIENT = 0,
+ ZRANGE_CONSUMER_TYPE_INTERNAL
+} zrange_consumer_type;
+
+typedef struct zrange_result_handler zrange_result_handler;
+
+typedef void (*zrangeResultBeginFunction)(zrange_result_handler *c, long length);
+typedef void (*zrangeResultFinalizeFunction)(
+ zrange_result_handler *c, size_t result_count);
+typedef void (*zrangeResultEmitCBufferFunction)(
+ zrange_result_handler *c, const void *p, size_t len, double score);
+typedef void (*zrangeResultEmitLongLongFunction)(
+ zrange_result_handler *c, long long ll, double score);
+
+void zrangeGenericCommand (zrange_result_handler *handler, int argc_start, int store,
+ zrange_type rangetype, zrange_direction direction);
+
+/* Interface struct for ZRANGE/ZRANGESTORE generic implementation.
+ * There is one implementation of this interface that sends a RESP reply to clients.
+ * and one implementation that stores the range result into a zset object. */
+struct zrange_result_handler {
+ zrange_consumer_type type;
+ client *client;
+ robj *dstkey;
+ robj *dstobj;
+ void *userdata;
+ int withscores;
+ int should_emit_array_length;
+ zrangeResultBeginFunction beginResultEmission;
+ zrangeResultFinalizeFunction finalizeResultEmission;
+ zrangeResultEmitCBufferFunction emitResultFromCBuffer;
+ zrangeResultEmitLongLongFunction emitResultFromLongLong;
+};
+
+/* Result handler methods for responding the ZRANGE to clients.
+ * length can be used to provide the result length in advance (avoids deferred reply overhead).
+ * length can be set to -1 if the result length is not know in advance.
+ */
+static void zrangeResultBeginClient(zrange_result_handler *handler, long length) {
+ if (length > 0) {
+ /* In case of WITHSCORES, respond with a single array in RESP2, and
+ * nested arrays in RESP3. We can't use a map response type since the
+ * client library needs to know to respect the order. */
+ if (handler->withscores && (handler->client->resp == 2)) {
+ length *= 2;
+ }
+ addReplyArrayLen(handler->client, length);
+ handler->userdata = NULL;
+ return;
+ }
+ handler->userdata = addReplyDeferredLen(handler->client);
+}
+
+static void zrangeResultEmitCBufferToClient(zrange_result_handler *handler,
+ const void *value, size_t value_length_in_bytes, double score)
+{
+ if (handler->should_emit_array_length) {
+ addReplyArrayLen(handler->client, 2);
+ }
+
+ addReplyBulkCBuffer(handler->client, value, value_length_in_bytes);
+
+ if (handler->withscores) {
+ addReplyDouble(handler->client, score);
+ }
+}
+
+static void zrangeResultEmitLongLongToClient(zrange_result_handler *handler,
+ long long value, double score)
+{
+ if (handler->should_emit_array_length) {
+ addReplyArrayLen(handler->client, 2);
+ }
+
+ addReplyBulkLongLong(handler->client, value);
+
+ if (handler->withscores) {
+ addReplyDouble(handler->client, score);
+ }
+}
+
+static void zrangeResultFinalizeClient(zrange_result_handler *handler,
+ size_t result_count)
+{
+ /* If the reply size was know at start there's nothing left to do */
+ if (!handler->userdata)
+ return;
+ /* In case of WITHSCORES, respond with a single array in RESP2, and
+ * nested arrays in RESP3. We can't use a map response type since the
+ * client library needs to know to respect the order. */
+ if (handler->withscores && (handler->client->resp == 2)) {
+ result_count *= 2;
+ }
+
+ setDeferredArrayLen(handler->client, handler->userdata, result_count);
+}
+
+/* Result handler methods for storing the ZRANGESTORE to a zset. */
+static void zrangeResultBeginStore(zrange_result_handler *handler, long length)
+{
+ handler->dstobj = zsetTypeCreate(length, 0);
+}
+
+static void zrangeResultEmitCBufferForStore(zrange_result_handler *handler,
+ const void *value, size_t value_length_in_bytes, double score)
+{
+ double newscore;
+ int retflags = 0;
+ sds ele = sdsnewlen(value, value_length_in_bytes);
+ int retval = zsetAdd(handler->dstobj, score, ele, ZADD_IN_NONE, &retflags, &newscore);
+ sdsfree(ele);
+ serverAssert(retval);
+}
+
+static void zrangeResultEmitLongLongForStore(zrange_result_handler *handler,
+ long long value, double score)
+{
+ double newscore;
+ int retflags = 0;
+ sds ele = sdsfromlonglong(value);
+ int retval = zsetAdd(handler->dstobj, score, ele, ZADD_IN_NONE, &retflags, &newscore);
+ sdsfree(ele);
+ serverAssert(retval);
+}
+
+static void zrangeResultFinalizeStore(zrange_result_handler *handler, size_t result_count)
+{
+ if (result_count) {
+ setKey(handler->client, handler->client->db, handler->dstkey, handler->dstobj, 0);
+ addReplyLongLong(handler->client, result_count);
+ notifyKeyspaceEvent(NOTIFY_ZSET, "zrangestore", handler->dstkey, handler->client->db->id);
+ server.dirty++;
+ } else {
+ addReply(handler->client, shared.czero);
+ if (dbDelete(handler->client->db, handler->dstkey)) {
+ signalModifiedKey(handler->client, handler->client->db, handler->dstkey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC, "del", handler->dstkey, handler->client->db->id);
+ server.dirty++;
+ }
+ }
+ decrRefCount(handler->dstobj);
+}
+
+/* Initialize the consumer interface type with the requested type. */
+static void zrangeResultHandlerInit(zrange_result_handler *handler,
+ client *client, zrange_consumer_type type)
+{
+ memset(handler, 0, sizeof(*handler));
+
+ handler->client = client;
+
+ switch (type) {
+ case ZRANGE_CONSUMER_TYPE_CLIENT:
+ handler->beginResultEmission = zrangeResultBeginClient;
+ handler->finalizeResultEmission = zrangeResultFinalizeClient;
+ handler->emitResultFromCBuffer = zrangeResultEmitCBufferToClient;
+ handler->emitResultFromLongLong = zrangeResultEmitLongLongToClient;
+ break;
+
+ case ZRANGE_CONSUMER_TYPE_INTERNAL:
+ handler->beginResultEmission = zrangeResultBeginStore;
+ handler->finalizeResultEmission = zrangeResultFinalizeStore;
+ handler->emitResultFromCBuffer = zrangeResultEmitCBufferForStore;
+ handler->emitResultFromLongLong = zrangeResultEmitLongLongForStore;
+ break;
+ }
+}
+
+static void zrangeResultHandlerScoreEmissionEnable(zrange_result_handler *handler) {
+ handler->withscores = 1;
+ handler->should_emit_array_length = (handler->client->resp > 2);
+}
+
+static void zrangeResultHandlerDestinationKeySet (zrange_result_handler *handler,
+ robj *dstkey)
+{
+ handler->dstkey = dstkey;
+}
+
+/* This command implements ZRANGE, ZREVRANGE. */
+void genericZrangebyrankCommand(zrange_result_handler *handler,
+ robj *zobj, long start, long end, int withscores, int reverse) {
+
+ client *c = handler->client;
+ long llen;
+ long rangelen;
+ size_t result_cardinality;
+
+ /* Sanitize indexes. */
+ llen = zsetLength(zobj);
+ if (start < 0) start = llen+start;
+ if (end < 0) end = llen+end;
+ if (start < 0) start = 0;
+
+
+ /* Invariant: start >= 0, so this test will be true when end < 0.
+ * The range is empty when start > end or start >= length. */
+ if (start > end || start >= llen) {
+ handler->beginResultEmission(handler, 0);
+ handler->finalizeResultEmission(handler, 0);
+ return;
+ }
+ if (end >= llen) end = llen-1;
+ rangelen = (end-start)+1;
+ result_cardinality = rangelen;
+
+ handler->beginResultEmission(handler, rangelen);
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ double score = 0.0;
+
+ if (reverse)
+ eptr = lpSeek(zl,-2-(2*start));
+ else
+ eptr = lpSeek(zl,2*start);
+
+ serverAssertWithInfo(c,zobj,eptr != NULL);
+ sptr = lpNext(zl,eptr);
+
+ while (rangelen--) {
+ serverAssertWithInfo(c,zobj,eptr != NULL && sptr != NULL);
+ vstr = lpGetValue(eptr,&vlen,&vlong);
+
+ if (withscores) /* don't bother to extract the score if it's gonna be ignored. */
+ score = zzlGetScore(sptr);
+
+ if (vstr == NULL) {
+ handler->emitResultFromLongLong(handler, vlong, score);
+ } else {
+ handler->emitResultFromCBuffer(handler, vstr, vlen, score);
+ }
+
+ if (reverse)
+ zzlPrev(zl,&eptr,&sptr);
+ else
+ zzlNext(zl,&eptr,&sptr);
+ }
+
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *ln;
+
+ /* Check if starting point is trivial, before doing log(N) lookup. */
+ if (reverse) {
+ ln = zsl->tail;
+ if (start > 0)
+ ln = zslGetElementByRank(zsl,llen-start);
+ } else {
+ ln = zsl->header->level[0].forward;
+ if (start > 0)
+ ln = zslGetElementByRank(zsl,start+1);
+ }
+
+ while(rangelen--) {
+ serverAssertWithInfo(c,zobj,ln != NULL);
+ sds ele = ln->ele;
+ handler->emitResultFromCBuffer(handler, ele, sdslen(ele), ln->score);
+ ln = reverse ? ln->backward : ln->level[0].forward;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+
+ handler->finalizeResultEmission(handler, result_cardinality);
+}
+
+/* ZRANGESTORE <dst> <src> <min> <max> [BYSCORE | BYLEX] [REV] [LIMIT offset count] */
+void zrangestoreCommand (client *c) {
+ robj *dstkey = c->argv[1];
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_INTERNAL);
+ zrangeResultHandlerDestinationKeySet(&handler, dstkey);
+ zrangeGenericCommand(&handler, 2, 1, ZRANGE_AUTO, ZRANGE_DIRECTION_AUTO);
+}
+
+/* ZRANGE <key> <min> <max> [BYSCORE | BYLEX] [REV] [WITHSCORES] [LIMIT offset count] */
+void zrangeCommand(client *c) {
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_AUTO, ZRANGE_DIRECTION_AUTO);
+}
+
+/* ZREVRANGE <key> <start> <stop> [WITHSCORES] */
+void zrevrangeCommand(client *c) {
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_RANK, ZRANGE_DIRECTION_REVERSE);
+}
+
+/* This command implements ZRANGEBYSCORE, ZREVRANGEBYSCORE. */
+void genericZrangebyscoreCommand(zrange_result_handler *handler,
+ zrangespec *range, robj *zobj, long offset, long limit,
+ int reverse) {
+ unsigned long rangelen = 0;
+
+ handler->beginResultEmission(handler, -1);
+
+ /* For invalid offset, return directly. */
+ if (offset > 0 && offset >= (long)zsetLength(zobj)) {
+ handler->finalizeResultEmission(handler, 0);
+ return;
+ }
+
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+
+ /* If reversed, get the last node in range as starting point. */
+ if (reverse) {
+ eptr = zzlLastInRange(zl,range);
+ } else {
+ eptr = zzlFirstInRange(zl,range);
+ }
+
+ /* Get score pointer for the first element. */
+ if (eptr)
+ sptr = lpNext(zl,eptr);
+
+ /* If there is an offset, just traverse the number of elements without
+ * checking the score because that is done in the next loop. */
+ while (eptr && offset--) {
+ if (reverse) {
+ zzlPrev(zl,&eptr,&sptr);
+ } else {
+ zzlNext(zl,&eptr,&sptr);
+ }
+ }
+
+ while (eptr && limit--) {
+ double score = zzlGetScore(sptr);
+
+ /* Abort when the node is no longer in range. */
+ if (reverse) {
+ if (!zslValueGteMin(score,range)) break;
+ } else {
+ if (!zslValueLteMax(score,range)) break;
+ }
+
+ vstr = lpGetValue(eptr,&vlen,&vlong);
+ rangelen++;
+ if (vstr == NULL) {
+ handler->emitResultFromLongLong(handler, vlong, score);
+ } else {
+ handler->emitResultFromCBuffer(handler, vstr, vlen, score);
+ }
+
+ /* Move to next node */
+ if (reverse) {
+ zzlPrev(zl,&eptr,&sptr);
+ } else {
+ zzlNext(zl,&eptr,&sptr);
+ }
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *ln;
+
+ /* If reversed, get the last node in range as starting point. */
+ if (reverse) {
+ ln = zslLastInRange(zsl,range);
+ } else {
+ ln = zslFirstInRange(zsl,range);
+ }
+
+ /* If there is an offset, just traverse the number of elements without
+ * checking the score because that is done in the next loop. */
+ while (ln && offset--) {
+ if (reverse) {
+ ln = ln->backward;
+ } else {
+ ln = ln->level[0].forward;
+ }
+ }
+
+ while (ln && limit--) {
+ /* Abort when the node is no longer in range. */
+ if (reverse) {
+ if (!zslValueGteMin(ln->score,range)) break;
+ } else {
+ if (!zslValueLteMax(ln->score,range)) break;
+ }
+
+ rangelen++;
+ handler->emitResultFromCBuffer(handler, ln->ele, sdslen(ln->ele), ln->score);
+
+ /* Move to next node */
+ if (reverse) {
+ ln = ln->backward;
+ } else {
+ ln = ln->level[0].forward;
+ }
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+
+ handler->finalizeResultEmission(handler, rangelen);
+}
+
+/* ZRANGEBYSCORE <key> <min> <max> [WITHSCORES] [LIMIT offset count] */
+void zrangebyscoreCommand(client *c) {
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_SCORE, ZRANGE_DIRECTION_FORWARD);
+}
+
+/* ZREVRANGEBYSCORE <key> <max> <min> [WITHSCORES] [LIMIT offset count] */
+void zrevrangebyscoreCommand(client *c) {
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_SCORE, ZRANGE_DIRECTION_REVERSE);
+}
+
+void zcountCommand(client *c) {
+ robj *key = c->argv[1];
+ robj *zobj;
+ zrangespec range;
+ unsigned long count = 0;
+
+ /* Parse the range arguments */
+ if (zslParseRange(c->argv[2],c->argv[3],&range) != C_OK) {
+ addReplyError(c,"min or max is not a float");
+ return;
+ }
+
+ /* Lookup the sorted set */
+ if ((zobj = lookupKeyReadOrReply(c, key, shared.czero)) == NULL ||
+ checkType(c, zobj, OBJ_ZSET)) return;
+
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ double score;
+
+ /* Use the first element in range as the starting point */
+ eptr = zzlFirstInRange(zl,&range);
+
+ /* No "first" element */
+ if (eptr == NULL) {
+ addReply(c, shared.czero);
+ return;
+ }
+
+ /* First element is in range */
+ sptr = lpNext(zl,eptr);
+ score = zzlGetScore(sptr);
+ serverAssertWithInfo(c,zobj,zslValueLteMax(score,&range));
+
+ /* Iterate over elements in range */
+ while (eptr) {
+ score = zzlGetScore(sptr);
+
+ /* Abort when the node is no longer in range. */
+ if (!zslValueLteMax(score,&range)) {
+ break;
+ } else {
+ count++;
+ zzlNext(zl,&eptr,&sptr);
+ }
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *zn;
+ unsigned long rank;
+
+ /* Find first element in range */
+ zn = zslFirstInRange(zsl, &range);
+
+ /* Use rank of first element, if any, to determine preliminary count */
+ if (zn != NULL) {
+ rank = zslGetRank(zsl, zn->score, zn->ele);
+ count = (zsl->length - (rank - 1));
+
+ /* Find last element in range */
+ zn = zslLastInRange(zsl, &range);
+
+ /* Use rank of last element, if any, to determine the actual count */
+ if (zn != NULL) {
+ rank = zslGetRank(zsl, zn->score, zn->ele);
+ count -= (zsl->length - rank);
+ }
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+
+ addReplyLongLong(c, count);
+}
+
+void zlexcountCommand(client *c) {
+ robj *key = c->argv[1];
+ robj *zobj;
+ zlexrangespec range;
+ unsigned long count = 0;
+
+ /* Parse the range arguments */
+ if (zslParseLexRange(c->argv[2],c->argv[3],&range) != C_OK) {
+ addReplyError(c,"min or max not valid string range item");
+ return;
+ }
+
+ /* Lookup the sorted set */
+ if ((zobj = lookupKeyReadOrReply(c, key, shared.czero)) == NULL ||
+ checkType(c, zobj, OBJ_ZSET))
+ {
+ zslFreeLexRange(&range);
+ return;
+ }
+
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+
+ /* Use the first element in range as the starting point */
+ eptr = zzlFirstInLexRange(zl,&range);
+
+ /* No "first" element */
+ if (eptr == NULL) {
+ zslFreeLexRange(&range);
+ addReply(c, shared.czero);
+ return;
+ }
+
+ /* First element is in range */
+ sptr = lpNext(zl,eptr);
+ serverAssertWithInfo(c,zobj,zzlLexValueLteMax(eptr,&range));
+
+ /* Iterate over elements in range */
+ while (eptr) {
+ /* Abort when the node is no longer in range. */
+ if (!zzlLexValueLteMax(eptr,&range)) {
+ break;
+ } else {
+ count++;
+ zzlNext(zl,&eptr,&sptr);
+ }
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *zn;
+ unsigned long rank;
+
+ /* Find first element in range */
+ zn = zslFirstInLexRange(zsl, &range);
+
+ /* Use rank of first element, if any, to determine preliminary count */
+ if (zn != NULL) {
+ rank = zslGetRank(zsl, zn->score, zn->ele);
+ count = (zsl->length - (rank - 1));
+
+ /* Find last element in range */
+ zn = zslLastInLexRange(zsl, &range);
+
+ /* Use rank of last element, if any, to determine the actual count */
+ if (zn != NULL) {
+ rank = zslGetRank(zsl, zn->score, zn->ele);
+ count -= (zsl->length - rank);
+ }
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+
+ zslFreeLexRange(&range);
+ addReplyLongLong(c, count);
+}
+
+/* This command implements ZRANGEBYLEX, ZREVRANGEBYLEX. */
+void genericZrangebylexCommand(zrange_result_handler *handler,
+ zlexrangespec *range, robj *zobj, int withscores, long offset, long limit,
+ int reverse)
+{
+ unsigned long rangelen = 0;
+
+ handler->beginResultEmission(handler, -1);
+
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+
+ /* If reversed, get the last node in range as starting point. */
+ if (reverse) {
+ eptr = zzlLastInLexRange(zl,range);
+ } else {
+ eptr = zzlFirstInLexRange(zl,range);
+ }
+
+ /* Get score pointer for the first element. */
+ if (eptr)
+ sptr = lpNext(zl,eptr);
+
+ /* If there is an offset, just traverse the number of elements without
+ * checking the score because that is done in the next loop. */
+ while (eptr && offset--) {
+ if (reverse) {
+ zzlPrev(zl,&eptr,&sptr);
+ } else {
+ zzlNext(zl,&eptr,&sptr);
+ }
+ }
+
+ while (eptr && limit--) {
+ double score = 0;
+ if (withscores) /* don't bother to extract the score if it's gonna be ignored. */
+ score = zzlGetScore(sptr);
+
+ /* Abort when the node is no longer in range. */
+ if (reverse) {
+ if (!zzlLexValueGteMin(eptr,range)) break;
+ } else {
+ if (!zzlLexValueLteMax(eptr,range)) break;
+ }
+
+ vstr = lpGetValue(eptr,&vlen,&vlong);
+ rangelen++;
+ if (vstr == NULL) {
+ handler->emitResultFromLongLong(handler, vlong, score);
+ } else {
+ handler->emitResultFromCBuffer(handler, vstr, vlen, score);
+ }
+
+ /* Move to next node */
+ if (reverse) {
+ zzlPrev(zl,&eptr,&sptr);
+ } else {
+ zzlNext(zl,&eptr,&sptr);
+ }
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *ln;
+
+ /* If reversed, get the last node in range as starting point. */
+ if (reverse) {
+ ln = zslLastInLexRange(zsl,range);
+ } else {
+ ln = zslFirstInLexRange(zsl,range);
+ }
+
+ /* If there is an offset, just traverse the number of elements without
+ * checking the score because that is done in the next loop. */
+ while (ln && offset--) {
+ if (reverse) {
+ ln = ln->backward;
+ } else {
+ ln = ln->level[0].forward;
+ }
+ }
+
+ while (ln && limit--) {
+ /* Abort when the node is no longer in range. */
+ if (reverse) {
+ if (!zslLexValueGteMin(ln->ele,range)) break;
+ } else {
+ if (!zslLexValueLteMax(ln->ele,range)) break;
+ }
+
+ rangelen++;
+ handler->emitResultFromCBuffer(handler, ln->ele, sdslen(ln->ele), ln->score);
+
+ /* Move to next node */
+ if (reverse) {
+ ln = ln->backward;
+ } else {
+ ln = ln->level[0].forward;
+ }
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+
+ handler->finalizeResultEmission(handler, rangelen);
+}
+
+/* ZRANGEBYLEX <key> <min> <max> [LIMIT offset count] */
+void zrangebylexCommand(client *c) {
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_LEX, ZRANGE_DIRECTION_FORWARD);
+}
+
+/* ZREVRANGEBYLEX <key> <max> <min> [LIMIT offset count] */
+void zrevrangebylexCommand(client *c) {
+ zrange_result_handler handler;
+ zrangeResultHandlerInit(&handler, c, ZRANGE_CONSUMER_TYPE_CLIENT);
+ zrangeGenericCommand(&handler, 1, 0, ZRANGE_LEX, ZRANGE_DIRECTION_REVERSE);
+}
+
+/**
+ * This function handles ZRANGE and ZRANGESTORE, and also the deprecated
+ * Z[REV]RANGE[BYSCORE|BYLEX] commands.
+ *
+ * The simple ZRANGE and ZRANGESTORE can take _AUTO in rangetype and direction,
+ * other command pass explicit value.
+ *
+ * The argc_start points to the src key argument, so following syntax is like:
+ * <src> <min> <max> [BYSCORE | BYLEX] [REV] [WITHSCORES] [LIMIT offset count]
+ */
+void zrangeGenericCommand(zrange_result_handler *handler, int argc_start, int store,
+ zrange_type rangetype, zrange_direction direction)
+{
+ client *c = handler->client;
+ robj *key = c->argv[argc_start];
+ robj *zobj;
+ zrangespec range;
+ zlexrangespec lexrange;
+ int minidx = argc_start + 1;
+ int maxidx = argc_start + 2;
+
+ /* Options common to all */
+ long opt_start = 0;
+ long opt_end = 0;
+ int opt_withscores = 0;
+ long opt_offset = 0;
+ long opt_limit = -1;
+
+ /* Step 1: Skip the <src> <min> <max> args and parse remaining optional arguments. */
+ for (int j=argc_start + 3; j < c->argc; j++) {
+ int leftargs = c->argc-j-1;
+ if (!store && !strcasecmp(c->argv[j]->ptr,"withscores")) {
+ opt_withscores = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
+ if ((getLongFromObjectOrReply(c, c->argv[j+1], &opt_offset, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[j+2], &opt_limit, NULL) != C_OK))
+ {
+ return;
+ }
+ j += 2;
+ } else if (direction == ZRANGE_DIRECTION_AUTO &&
+ !strcasecmp(c->argv[j]->ptr,"rev"))
+ {
+ direction = ZRANGE_DIRECTION_REVERSE;
+ } else if (rangetype == ZRANGE_AUTO &&
+ !strcasecmp(c->argv[j]->ptr,"bylex"))
+ {
+ rangetype = ZRANGE_LEX;
+ } else if (rangetype == ZRANGE_AUTO &&
+ !strcasecmp(c->argv[j]->ptr,"byscore"))
+ {
+ rangetype = ZRANGE_SCORE;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* Use defaults if not overridden by arguments. */
+ if (direction == ZRANGE_DIRECTION_AUTO)
+ direction = ZRANGE_DIRECTION_FORWARD;
+ if (rangetype == ZRANGE_AUTO)
+ rangetype = ZRANGE_RANK;
+
+ /* Check for conflicting arguments. */
+ if (opt_limit != -1 && rangetype == ZRANGE_RANK) {
+ addReplyError(c,"syntax error, LIMIT is only supported in combination with either BYSCORE or BYLEX");
+ return;
+ }
+ if (opt_withscores && rangetype == ZRANGE_LEX) {
+ addReplyError(c,"syntax error, WITHSCORES not supported in combination with BYLEX");
+ return;
+ }
+
+ if (direction == ZRANGE_DIRECTION_REVERSE &&
+ ((ZRANGE_SCORE == rangetype) || (ZRANGE_LEX == rangetype)))
+ {
+ /* Range is given as [max,min] */
+ int tmp = maxidx;
+ maxidx = minidx;
+ minidx = tmp;
+ }
+
+ /* Step 2: Parse the range. */
+ switch (rangetype) {
+ case ZRANGE_AUTO:
+ case ZRANGE_RANK:
+ /* Z[REV]RANGE, ZRANGESTORE [REV]RANGE */
+ if ((getLongFromObjectOrReply(c, c->argv[minidx], &opt_start,NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[maxidx], &opt_end,NULL) != C_OK))
+ {
+ return;
+ }
+ break;
+
+ case ZRANGE_SCORE:
+ /* Z[REV]RANGEBYSCORE, ZRANGESTORE [REV]RANGEBYSCORE */
+ if (zslParseRange(c->argv[minidx], c->argv[maxidx], &range) != C_OK) {
+ addReplyError(c, "min or max is not a float");
+ return;
+ }
+ break;
+
+ case ZRANGE_LEX:
+ /* Z[REV]RANGEBYLEX, ZRANGESTORE [REV]RANGEBYLEX */
+ if (zslParseLexRange(c->argv[minidx], c->argv[maxidx], &lexrange) != C_OK) {
+ addReplyError(c, "min or max not valid string range item");
+ return;
+ }
+ break;
+ }
+
+ if (opt_withscores || store) {
+ zrangeResultHandlerScoreEmissionEnable(handler);
+ }
+
+ /* Step 3: Lookup the key and get the range. */
+ zobj = lookupKeyRead(c->db, key);
+ if (zobj == NULL) {
+ if (store) {
+ handler->beginResultEmission(handler, -1);
+ handler->finalizeResultEmission(handler, 0);
+ } else {
+ addReply(c, shared.emptyarray);
+ }
+ goto cleanup;
+ }
+
+ if (checkType(c,zobj,OBJ_ZSET)) goto cleanup;
+
+ /* Step 4: Pass this to the command-specific handler. */
+ switch (rangetype) {
+ case ZRANGE_AUTO:
+ case ZRANGE_RANK:
+ genericZrangebyrankCommand(handler, zobj, opt_start, opt_end,
+ opt_withscores || store, direction == ZRANGE_DIRECTION_REVERSE);
+ break;
+
+ case ZRANGE_SCORE:
+ genericZrangebyscoreCommand(handler, &range, zobj, opt_offset,
+ opt_limit, direction == ZRANGE_DIRECTION_REVERSE);
+ break;
+
+ case ZRANGE_LEX:
+ genericZrangebylexCommand(handler, &lexrange, zobj, opt_withscores || store,
+ opt_offset, opt_limit, direction == ZRANGE_DIRECTION_REVERSE);
+ break;
+ }
+
+ /* Instead of returning here, we'll just fall-through the clean-up. */
+
+cleanup:
+
+ if (rangetype == ZRANGE_LEX) {
+ zslFreeLexRange(&lexrange);
+ }
+}
+
+void zcardCommand(client *c) {
+ robj *key = c->argv[1];
+ robj *zobj;
+
+ if ((zobj = lookupKeyReadOrReply(c,key,shared.czero)) == NULL ||
+ checkType(c,zobj,OBJ_ZSET)) return;
+
+ addReplyLongLong(c,zsetLength(zobj));
+}
+
+void zscoreCommand(client *c) {
+ robj *key = c->argv[1];
+ robj *zobj;
+ double score;
+
+ if ((zobj = lookupKeyReadOrReply(c,key,shared.null[c->resp])) == NULL ||
+ checkType(c,zobj,OBJ_ZSET)) return;
+
+ if (zsetScore(zobj,c->argv[2]->ptr,&score) == C_ERR) {
+ addReplyNull(c);
+ } else {
+ addReplyDouble(c,score);
+ }
+}
+
+void zmscoreCommand(client *c) {
+ robj *key = c->argv[1];
+ robj *zobj;
+ double score;
+ zobj = lookupKeyRead(c->db,key);
+ if (checkType(c,zobj,OBJ_ZSET)) return;
+
+ addReplyArrayLen(c,c->argc - 2);
+ for (int j = 2; j < c->argc; j++) {
+ /* Treat a missing set the same way as an empty set */
+ if (zobj == NULL || zsetScore(zobj,c->argv[j]->ptr,&score) == C_ERR) {
+ addReplyNull(c);
+ } else {
+ addReplyDouble(c,score);
+ }
+ }
+}
+
+void zrankGenericCommand(client *c, int reverse) {
+ robj *key = c->argv[1];
+ robj *ele = c->argv[2];
+ robj *zobj;
+ robj* reply;
+ long rank;
+ int opt_withscore = 0;
+ double score;
+
+ if (c->argc > 4) {
+ addReplyErrorArity(c);
+ return;
+ }
+ if (c->argc > 3) {
+ if (!strcasecmp(c->argv[3]->ptr, "withscore")) {
+ opt_withscore = 1;
+ } else {
+ addReplyErrorObject(c, shared.syntaxerr);
+ return;
+ }
+ }
+ reply = opt_withscore ? shared.nullarray[c->resp] : shared.null[c->resp];
+ if ((zobj = lookupKeyReadOrReply(c, key, reply)) == NULL || checkType(c, zobj, OBJ_ZSET)) {
+ return;
+ }
+ serverAssertWithInfo(c, ele, sdsEncodedObject(ele));
+ rank = zsetRank(zobj, ele->ptr, reverse, opt_withscore ? &score : NULL);
+ if (rank >= 0) {
+ if (opt_withscore) {
+ addReplyArrayLen(c, 2);
+ }
+ addReplyLongLong(c, rank);
+ if (opt_withscore) {
+ addReplyDouble(c, score);
+ }
+ } else {
+ if (opt_withscore) {
+ addReplyNullArray(c);
+ } else {
+ addReplyNull(c);
+ }
+ }
+}
+
+void zrankCommand(client *c) {
+ zrankGenericCommand(c, 0);
+}
+
+void zrevrankCommand(client *c) {
+ zrankGenericCommand(c, 1);
+}
+
+void zscanCommand(client *c) {
+ robj *o;
+ unsigned long cursor;
+
+ if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
+ checkType(c,o,OBJ_ZSET)) return;
+ scanGenericCommand(c,o,cursor);
+}
+
+/* This command implements the generic zpop operation, used by:
+ * ZPOPMIN, ZPOPMAX, BZPOPMIN, BZPOPMAX and ZMPOP. This function is also used
+ * inside blocked.c in the unblocking stage of BZPOPMIN, BZPOPMAX and BZMPOP.
+ *
+ * If 'emitkey' is true also the key name is emitted, useful for the blocking
+ * behavior of BZPOP[MIN|MAX], since we can block into multiple keys.
+ * Or in ZMPOP/BZMPOP, because we also can take multiple keys.
+ *
+ * 'count' is the number of elements requested to pop, or -1 for plain single pop.
+ *
+ * 'use_nested_array' when false it generates a flat array (with or without key name).
+ * When true, it generates a nested 2 level array of field + score pairs, or 3 level when emitkey is set.
+ *
+ * 'reply_nil_when_empty' when true we reply a NIL if we are not able to pop up any elements.
+ * Like in ZMPOP/BZMPOP we reply with a structured nested array containing key name
+ * and member + score pairs. In these commands, we reply with null when we have no result.
+ * Otherwise in ZPOPMIN/ZPOPMAX we reply an empty array by default.
+ *
+ * 'deleted' is an optional output argument to get an indication
+ * if the key got deleted by this function.
+ * */
+void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey,
+ long count, int use_nested_array, int reply_nil_when_empty, int *deleted) {
+ int idx;
+ robj *key = NULL;
+ robj *zobj = NULL;
+ sds ele;
+ double score;
+
+ if (deleted) *deleted = 0;
+
+ /* Check type and break on the first error, otherwise identify candidate. */
+ idx = 0;
+ while (idx < keyc) {
+ key = keyv[idx++];
+ zobj = lookupKeyWrite(c->db,key);
+ if (!zobj) continue;
+ if (checkType(c,zobj,OBJ_ZSET)) return;
+ break;
+ }
+
+ /* No candidate for zpopping, return empty. */
+ if (!zobj) {
+ if (reply_nil_when_empty) {
+ addReplyNullArray(c);
+ } else {
+ addReply(c,shared.emptyarray);
+ }
+ return;
+ }
+
+ if (count == 0) {
+ /* ZPOPMIN/ZPOPMAX with count 0. */
+ addReply(c, shared.emptyarray);
+ return;
+ }
+
+ long result_count = 0;
+
+ /* When count is -1, we need to correct it to 1 for plain single pop. */
+ if (count == -1) count = 1;
+
+ long llen = zsetLength(zobj);
+ long rangelen = (count > llen) ? llen : count;
+
+ if (!use_nested_array && !emitkey) {
+ /* ZPOPMIN/ZPOPMAX with or without COUNT option in RESP2. */
+ addReplyArrayLen(c, rangelen * 2);
+ } else if (use_nested_array && !emitkey) {
+ /* ZPOPMIN/ZPOPMAX with COUNT option in RESP3. */
+ addReplyArrayLen(c, rangelen);
+ } else if (!use_nested_array && emitkey) {
+ /* BZPOPMIN/BZPOPMAX in RESP2 and RESP3. */
+ addReplyArrayLen(c, rangelen * 2 + 1);
+ addReplyBulk(c, key);
+ } else if (use_nested_array && emitkey) {
+ /* ZMPOP/BZMPOP in RESP2 and RESP3. */
+ addReplyArrayLen(c, 2);
+ addReplyBulk(c, key);
+ addReplyArrayLen(c, rangelen);
+ }
+
+ /* Remove the element. */
+ do {
+ if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+
+ /* Get the first or last element in the sorted set. */
+ eptr = lpSeek(zl,where == ZSET_MAX ? -2 : 0);
+ serverAssertWithInfo(c,zobj,eptr != NULL);
+ vstr = lpGetValue(eptr,&vlen,&vlong);
+ if (vstr == NULL)
+ ele = sdsfromlonglong(vlong);
+ else
+ ele = sdsnewlen(vstr,vlen);
+
+ /* Get the score. */
+ sptr = lpNext(zl,eptr);
+ serverAssertWithInfo(c,zobj,sptr != NULL);
+ score = zzlGetScore(sptr);
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *zln;
+
+ /* Get the first or last element in the sorted set. */
+ zln = (where == ZSET_MAX ? zsl->tail :
+ zsl->header->level[0].forward);
+
+ /* There must be an element in the sorted set. */
+ serverAssertWithInfo(c,zobj,zln != NULL);
+ ele = sdsdup(zln->ele);
+ score = zln->score;
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+
+ serverAssertWithInfo(c,zobj,zsetDel(zobj,ele));
+ server.dirty++;
+
+ if (result_count == 0) { /* Do this only for the first iteration. */
+ char *events[2] = {"zpopmin","zpopmax"};
+ notifyKeyspaceEvent(NOTIFY_ZSET,events[where],key,c->db->id);
+ signalModifiedKey(c,c->db,key);
+ }
+
+ if (use_nested_array) {
+ addReplyArrayLen(c,2);
+ }
+ addReplyBulkCBuffer(c,ele,sdslen(ele));
+ addReplyDouble(c,score);
+ sdsfree(ele);
+ ++result_count;
+ } while(--rangelen);
+
+ /* Remove the key, if indeed needed. */
+ if (zsetLength(zobj) == 0) {
+ if (deleted) *deleted = 1;
+
+ dbDelete(c->db,key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
+ }
+
+ if (c->cmd->proc == zmpopCommand) {
+ /* Always replicate it as ZPOP[MIN|MAX] with COUNT option instead of ZMPOP. */
+ robj *count_obj = createStringObjectFromLongLong((count > llen) ? llen : count);
+ rewriteClientCommandVector(c, 3,
+ (where == ZSET_MAX) ? shared.zpopmax : shared.zpopmin,
+ key, count_obj);
+ decrRefCount(count_obj);
+ }
+}
+
+/* ZPOPMIN/ZPOPMAX key [<count>] */
+void zpopMinMaxCommand(client *c, int where) {
+ if (c->argc > 3) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+
+ long count = -1; /* -1 for plain single pop. */
+ if (c->argc == 3 && getPositiveLongFromObjectOrReply(c, c->argv[2], &count, NULL) != C_OK)
+ return;
+
+ /* Respond with a single (flat) array in RESP2 or if count is -1
+ * (returning a single element). In RESP3, when count > 0 use nested array. */
+ int use_nested_array = (c->resp > 2 && count != -1);
+
+ genericZpopCommand(c, &c->argv[1], 1, where, 0, count, use_nested_array, 0, NULL);
+}
+
+/* ZPOPMIN key [<count>] */
+void zpopminCommand(client *c) {
+ zpopMinMaxCommand(c, ZSET_MIN);
+}
+
+/* ZPOPMAX key [<count>] */
+void zpopmaxCommand(client *c) {
+ zpopMinMaxCommand(c, ZSET_MAX);
+}
+
+/* BZPOPMIN, BZPOPMAX, BZMPOP actual implementation.
+ *
+ * 'numkeys' is the number of keys.
+ *
+ * 'timeout_idx' parameter position of block timeout.
+ *
+ * 'where' ZSET_MIN or ZSET_MAX.
+ *
+ * 'count' is the number of elements requested to pop, or -1 for plain single pop.
+ *
+ * 'use_nested_array' when false it generates a flat array (with or without key name).
+ * When true, it generates a nested 3 level array of keyname, field + score pairs.
+ * */
+void blockingGenericZpopCommand(client *c, robj **keys, int numkeys, int where,
+ int timeout_idx, long count, int use_nested_array, int reply_nil_when_empty) {
+ robj *o;
+ robj *key;
+ mstime_t timeout;
+ int j;
+
+ if (getTimeoutFromObjectOrReply(c,c->argv[timeout_idx],&timeout,UNIT_SECONDS)
+ != C_OK) return;
+
+ for (j = 0; j < numkeys; j++) {
+ key = keys[j];
+ o = lookupKeyWrite(c->db,key);
+ /* Non-existing key, move to next key. */
+ if (o == NULL) continue;
+
+ if (checkType(c,o,OBJ_ZSET)) return;
+
+ long llen = zsetLength(o);
+ /* Empty zset, move to next key. */
+ if (llen == 0) continue;
+
+ /* Non empty zset, this is like a normal ZPOP[MIN|MAX]. */
+ genericZpopCommand(c, &key, 1, where, 1, count, use_nested_array, reply_nil_when_empty, NULL);
+
+ if (count == -1) {
+ /* Replicate it as ZPOP[MIN|MAX] instead of BZPOP[MIN|MAX]. */
+ rewriteClientCommandVector(c,2,
+ (where == ZSET_MAX) ? shared.zpopmax : shared.zpopmin,
+ key);
+ } else {
+ /* Replicate it as ZPOP[MIN|MAX] with COUNT option. */
+ robj *count_obj = createStringObjectFromLongLong((count > llen) ? llen : count);
+ rewriteClientCommandVector(c, 3,
+ (where == ZSET_MAX) ? shared.zpopmax : shared.zpopmin,
+ key, count_obj);
+ decrRefCount(count_obj);
+ }
+
+ return;
+ }
+
+ /* If we are not allowed to block the client and the zset is empty the only thing
+ * we can do is treating it as a timeout (even with timeout 0). */
+ if (c->flags & CLIENT_DENY_BLOCKING) {
+ addReplyNullArray(c);
+ return;
+ }
+
+ /* If the keys do not exist we must block */
+ blockForKeys(c,BLOCKED_ZSET,keys,numkeys,timeout,0);
+}
+
+// BZPOPMIN key [key ...] timeout
+void bzpopminCommand(client *c) {
+ blockingGenericZpopCommand(c, c->argv+1, c->argc-2, ZSET_MIN, c->argc-1, -1, 0, 0);
+}
+
+// BZPOPMAX key [key ...] timeout
+void bzpopmaxCommand(client *c) {
+ blockingGenericZpopCommand(c, c->argv+1, c->argc-2, ZSET_MAX, c->argc-1, -1, 0, 0);
+}
+
+static void zrandmemberReplyWithListpack(client *c, unsigned int count, listpackEntry *keys, listpackEntry *vals) {
+ for (unsigned long i = 0; i < count; i++) {
+ if (vals && c->resp > 2)
+ addReplyArrayLen(c,2);
+ if (keys[i].sval)
+ addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
+ else
+ addReplyBulkLongLong(c, keys[i].lval);
+ if (vals) {
+ if (vals[i].sval) {
+ addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
+ } else
+ addReplyDouble(c, vals[i].lval);
+ }
+ }
+}
+
+/* How many times bigger should be the zset compared to the requested size
+ * for us to not use the "remove elements" strategy? Read later in the
+ * implementation for more info. */
+#define ZRANDMEMBER_SUB_STRATEGY_MUL 3
+
+/* If client is trying to ask for a very large number of random elements,
+ * queuing may consume an unlimited amount of memory, so we want to limit
+ * the number of randoms per time. */
+#define ZRANDMEMBER_RANDOM_SAMPLE_LIMIT 1000
+
+void zrandmemberWithCountCommand(client *c, long l, int withscores) {
+ unsigned long count, size;
+ int uniq = 1;
+ robj *zsetobj;
+
+ if ((zsetobj = lookupKeyReadOrReply(c, c->argv[1], shared.emptyarray))
+ == NULL || checkType(c, zsetobj, OBJ_ZSET)) return;
+ size = zsetLength(zsetobj);
+
+ if(l >= 0) {
+ count = (unsigned long) l;
+ } else {
+ count = -l;
+ uniq = 0;
+ }
+
+ /* If count is zero, serve it ASAP to avoid special cases later. */
+ if (count == 0) {
+ addReply(c,shared.emptyarray);
+ return;
+ }
+
+ /* CASE 1: The count was negative, so the extraction method is just:
+ * "return N random elements" sampling the whole set every time.
+ * This case is trivial and can be served without auxiliary data
+ * structures. This case is the only one that also needs to return the
+ * elements in random order. */
+ if (!uniq || count == 1) {
+ if (withscores && c->resp == 2)
+ addReplyArrayLen(c, count*2);
+ else
+ addReplyArrayLen(c, count);
+ if (zsetobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zsetobj->ptr;
+ while (count--) {
+ dictEntry *de = dictGetFairRandomKey(zs->dict);
+ sds key = dictGetKey(de);
+ if (withscores && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addReplyBulkCBuffer(c, key, sdslen(key));
+ if (withscores)
+ addReplyDouble(c, *(double*)dictGetVal(de));
+ if (c->flags & CLIENT_CLOSE_ASAP)
+ break;
+ }
+ } else if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) {
+ listpackEntry *keys, *vals = NULL;
+ unsigned long limit, sample_count;
+ limit = count > ZRANDMEMBER_RANDOM_SAMPLE_LIMIT ? ZRANDMEMBER_RANDOM_SAMPLE_LIMIT : count;
+ keys = zmalloc(sizeof(listpackEntry)*limit);
+ if (withscores)
+ vals = zmalloc(sizeof(listpackEntry)*limit);
+ while (count) {
+ sample_count = count > limit ? limit : count;
+ count -= sample_count;
+ lpRandomPairs(zsetobj->ptr, sample_count, keys, vals);
+ zrandmemberReplyWithListpack(c, sample_count, keys, vals);
+ if (c->flags & CLIENT_CLOSE_ASAP)
+ break;
+ }
+ zfree(keys);
+ zfree(vals);
+ }
+ return;
+ }
+
+ zsetopsrc src;
+ zsetopval zval;
+ src.subject = zsetobj;
+ src.type = zsetobj->type;
+ src.encoding = zsetobj->encoding;
+ zuiInitIterator(&src);
+ memset(&zval, 0, sizeof(zval));
+
+ /* Initiate reply count, RESP3 responds with nested array, RESP2 with flat one. */
+ long reply_size = count < size ? count : size;
+ if (withscores && c->resp == 2)
+ addReplyArrayLen(c, reply_size*2);
+ else
+ addReplyArrayLen(c, reply_size);
+
+ /* CASE 2:
+ * The number of requested elements is greater than the number of
+ * elements inside the zset: simply return the whole zset. */
+ if (count >= size) {
+ while (zuiNext(&src, &zval)) {
+ if (withscores && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addReplyBulkSds(c, zuiNewSdsFromValue(&zval));
+ if (withscores)
+ addReplyDouble(c, zval.score);
+ }
+ zuiClearIterator(&src);
+ return;
+ }
+
+ /* CASE 2.5 listpack only. Sampling unique elements, in non-random order.
+ * Listpack encoded zsets are meant to be relatively small, so
+ * ZRANDMEMBER_SUB_STRATEGY_MUL isn't necessary and we rather not make
+ * copies of the entries. Instead, we emit them directly to the output
+ * buffer.
+ *
+ * And it is inefficient to repeatedly pick one random element from a
+ * listpack in CASE 4. So we use this instead. */
+ if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) {
+ listpackEntry *keys, *vals = NULL;
+ keys = zmalloc(sizeof(listpackEntry)*count);
+ if (withscores)
+ vals = zmalloc(sizeof(listpackEntry)*count);
+ serverAssert(lpRandomPairsUnique(zsetobj->ptr, count, keys, vals) == count);
+ zrandmemberReplyWithListpack(c, count, keys, vals);
+ zfree(keys);
+ zfree(vals);
+ zuiClearIterator(&src);
+ return;
+ }
+
+ /* CASE 3:
+ * The number of elements inside the zset is not greater than
+ * ZRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements.
+ * In this case we create a dict from scratch with all the elements, and
+ * subtract random elements to reach the requested number of elements.
+ *
+ * This is done because if the number of requested elements is just
+ * a bit less than the number of elements in the set, the natural approach
+ * used into CASE 4 is highly inefficient. */
+ if (count*ZRANDMEMBER_SUB_STRATEGY_MUL > size) {
+ /* Hashtable encoding (generic implementation) */
+ dict *d = dictCreate(&sdsReplyDictType);
+ dictExpand(d, size);
+ /* Add all the elements into the temporary dictionary. */
+ while (zuiNext(&src, &zval)) {
+ sds key = zuiNewSdsFromValue(&zval);
+ dictEntry *de = dictAddRaw(d, key, NULL);
+ serverAssert(de);
+ if (withscores)
+ dictSetDoubleVal(de, zval.score);
+ }
+ serverAssert(dictSize(d) == size);
+
+ /* Remove random elements to reach the right count. */
+ while (size > count) {
+ dictEntry *de;
+ de = dictGetFairRandomKey(d);
+ dictUnlink(d,dictGetKey(de));
+ sdsfree(dictGetKey(de));
+ dictFreeUnlinkedEntry(d,de);
+ size--;
+ }
+
+ /* Reply with what's in the dict and release memory */
+ dictIterator *di;
+ dictEntry *de;
+ di = dictGetIterator(d);
+ while ((de = dictNext(di)) != NULL) {
+ if (withscores && c->resp > 2)
+ addReplyArrayLen(c,2);
+ addReplyBulkSds(c, dictGetKey(de));
+ if (withscores)
+ addReplyDouble(c, dictGetDoubleVal(de));
+ }
+
+ dictReleaseIterator(di);
+ dictRelease(d);
+ }
+
+ /* CASE 4: We have a big zset compared to the requested number of elements.
+ * In this case we can simply get random elements from the zset and add
+ * to the temporary set, trying to eventually get enough unique elements
+ * to reach the specified count. */
+ else {
+ /* Hashtable encoding (generic implementation) */
+ unsigned long added = 0;
+ dict *d = dictCreate(&hashDictType);
+ dictExpand(d, count);
+
+ while (added < count) {
+ listpackEntry key;
+ double score;
+ zsetTypeRandomElement(zsetobj, size, &key, withscores ? &score: NULL);
+
+ /* Try to add the object to the dictionary. If it already exists
+ * free it, otherwise increment the number of objects we have
+ * in the result dictionary. */
+ sds skey = zsetSdsFromListpackEntry(&key);
+ if (dictAdd(d,skey,NULL) != DICT_OK) {
+ sdsfree(skey);
+ continue;
+ }
+ added++;
+
+ if (withscores && c->resp > 2)
+ addReplyArrayLen(c,2);
+ zsetReplyFromListpackEntry(c, &key);
+ if (withscores)
+ addReplyDouble(c, score);
+ }
+
+ /* Release memory */
+ dictRelease(d);
+ }
+ zuiClearIterator(&src);
+}
+
+/* ZRANDMEMBER key [<count> [WITHSCORES]] */
+void zrandmemberCommand(client *c) {
+ long l;
+ int withscores = 0;
+ robj *zset;
+ listpackEntry ele;
+
+ if (c->argc >= 3) {
+ if (getRangeLongFromObjectOrReply(c,c->argv[2],-LONG_MAX,LONG_MAX,&l,NULL) != C_OK) return;
+ if (c->argc > 4 || (c->argc == 4 && strcasecmp(c->argv[3]->ptr,"withscores"))) {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ } else if (c->argc == 4) {
+ withscores = 1;
+ if (l < -LONG_MAX/2 || l > LONG_MAX/2) {
+ addReplyError(c,"value is out of range");
+ return;
+ }
+ }
+ zrandmemberWithCountCommand(c, l, withscores);
+ return;
+ }
+
+ /* Handle variant without <count> argument. Reply with simple bulk string */
+ if ((zset = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))== NULL ||
+ checkType(c,zset,OBJ_ZSET)) {
+ return;
+ }
+
+ zsetTypeRandomElement(zset, zsetLength(zset), &ele,NULL);
+ zsetReplyFromListpackEntry(c,&ele);
+}
+
+/* ZMPOP/BZMPOP
+ *
+ * 'numkeys_idx' parameter position of key number.
+ * 'is_block' this indicates whether it is a blocking variant. */
+void zmpopGenericCommand(client *c, int numkeys_idx, int is_block) {
+ long j;
+ long numkeys = 0; /* Number of keys. */
+ int where = 0; /* ZSET_MIN or ZSET_MAX. */
+ long count = -1; /* Reply will consist of up to count elements, depending on the zset's length. */
+
+ /* Parse the numkeys. */
+ if (getRangeLongFromObjectOrReply(c, c->argv[numkeys_idx], 1, LONG_MAX,
+ &numkeys, "numkeys should be greater than 0") != C_OK)
+ return;
+
+ /* Parse the where. where_idx: the index of where in the c->argv. */
+ long where_idx = numkeys_idx + numkeys + 1;
+ if (where_idx >= c->argc) {
+ addReplyErrorObject(c, shared.syntaxerr);
+ return;
+ }
+ if (!strcasecmp(c->argv[where_idx]->ptr, "MIN")) {
+ where = ZSET_MIN;
+ } else if (!strcasecmp(c->argv[where_idx]->ptr, "MAX")) {
+ where = ZSET_MAX;
+ } else {
+ addReplyErrorObject(c, shared.syntaxerr);
+ return;
+ }
+
+ /* Parse the optional arguments. */
+ for (j = where_idx + 1; j < c->argc; j++) {
+ char *opt = c->argv[j]->ptr;
+ int moreargs = (c->argc - 1) - j;
+
+ if (count == -1 && !strcasecmp(opt, "COUNT") && moreargs) {
+ j++;
+ if (getRangeLongFromObjectOrReply(c, c->argv[j], 1, LONG_MAX,
+ &count,"count should be greater than 0") != C_OK)
+ return;
+ } else {
+ addReplyErrorObject(c, shared.syntaxerr);
+ return;
+ }
+ }
+
+ if (count == -1) count = 1;
+
+ if (is_block) {
+ /* BLOCK. We will handle CLIENT_DENY_BLOCKING flag in blockingGenericZpopCommand. */
+ blockingGenericZpopCommand(c, c->argv+numkeys_idx+1, numkeys, where, 1, count, 1, 1);
+ } else {
+ /* NON-BLOCK */
+ genericZpopCommand(c, c->argv+numkeys_idx+1, numkeys, where, 1, count, 1, 1, NULL);
+ }
+}
+
+/* ZMPOP numkeys key [<key> ...] MIN|MAX [COUNT count] */
+void zmpopCommand(client *c) {
+ zmpopGenericCommand(c, 1, 0);
+}
+
+/* BZMPOP timeout numkeys key [<key> ...] MIN|MAX [COUNT count] */
+void bzmpopCommand(client *c) {
+ zmpopGenericCommand(c, 2, 1);
+}
diff --git a/src/testhelp.h b/src/testhelp.h
new file mode 100644
index 0000000..d5be80e
--- /dev/null
+++ b/src/testhelp.h
@@ -0,0 +1,62 @@
+/* This is a really minimal testing framework for C.
+ *
+ * Example:
+ *
+ * test_cond("Check if 1 == 1", 1==1)
+ * test_cond("Check if 5 > 10", 5 > 10)
+ * test_report()
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2010-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __TESTHELP_H
+#define __TESTHELP_H
+
+#define REDIS_TEST_ACCURATE (1<<0)
+#define REDIS_TEST_LARGE_MEMORY (1<<1)
+#define REDIS_TEST_VALGRIND (1<<2)
+
+extern int __failed_tests;
+extern int __test_num;
+
+#define test_cond(descr,_c) do { \
+ __test_num++; printf("%d - %s: ", __test_num, descr); \
+ if(_c) printf("PASSED\n"); else {printf("FAILED\n"); __failed_tests++;} \
+} while(0)
+#define test_report() do { \
+ printf("%d tests, %d passed, %d failed\n", __test_num, \
+ __test_num-__failed_tests, __failed_tests); \
+ if (__failed_tests) { \
+ printf("=== WARNING === We have failed tests here...\n"); \
+ exit(1); \
+ } \
+} while(0)
+
+#endif
diff --git a/src/timeout.c b/src/timeout.c
new file mode 100644
index 0000000..eb971dc
--- /dev/null
+++ b/src/timeout.c
@@ -0,0 +1,202 @@
+/* Copyright (c) 2009-2020, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "cluster.h"
+
+#include <math.h>
+
+/* ========================== Clients timeouts ============================= */
+
+/* Check if this blocked client timedout (does nothing if the client is
+ * not blocked right now). If so send a reply, unblock it, and return 1.
+ * Otherwise 0 is returned and no operation is performed. */
+int checkBlockedClientTimeout(client *c, mstime_t now) {
+ if (c->flags & CLIENT_BLOCKED &&
+ c->bstate.timeout != 0
+ && c->bstate.timeout < now)
+ {
+ /* Handle blocking operation specific timeout. */
+ unblockClientOnTimeout(c);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Check for timeouts. Returns non-zero if the client was terminated.
+ * The function gets the current time in milliseconds as argument since
+ * it gets called multiple times in a loop, so calling gettimeofday() for
+ * each iteration would be costly without any actual gain. */
+int clientsCronHandleTimeout(client *c, mstime_t now_ms) {
+ time_t now = now_ms/1000;
+
+ if (server.maxidletime &&
+ /* This handles the idle clients connection timeout if set. */
+ !(c->flags & CLIENT_SLAVE) && /* No timeout for slaves and monitors */
+ !mustObeyClient(c) && /* No timeout for masters and AOF */
+ !(c->flags & CLIENT_BLOCKED) && /* No timeout for BLPOP */
+ !(c->flags & CLIENT_PUBSUB) && /* No timeout for Pub/Sub clients */
+ (now - c->lastinteraction > server.maxidletime))
+ {
+ serverLog(LL_VERBOSE,"Closing idle client");
+ freeClient(c);
+ return 1;
+ } else if (c->flags & CLIENT_BLOCKED) {
+ /* Cluster: handle unblock & redirect of clients blocked
+ * into keys no longer served by this server. */
+ if (server.cluster_enabled) {
+ if (clusterRedirectBlockedClientIfNeeded(c))
+ unblockClientOnError(c, NULL);
+ }
+ }
+ return 0;
+}
+
+/* For blocked clients timeouts we populate a radix tree of 128 bit keys
+ * composed as such:
+ *
+ * [8 byte big endian expire time]+[8 byte client ID]
+ *
+ * We don't do any cleanup in the Radix tree: when we run the clients that
+ * reached the timeout already, if they are no longer existing or no longer
+ * blocked with such timeout, we just go forward.
+ *
+ * Every time a client blocks with a timeout, we add the client in
+ * the tree. In beforeSleep() we call handleBlockedClientsTimeout() to run
+ * the tree and unblock the clients. */
+
+#define CLIENT_ST_KEYLEN 16 /* 8 bytes mstime + 8 bytes client ID. */
+
+/* Given client ID and timeout, write the resulting radix tree key in buf. */
+void encodeTimeoutKey(unsigned char *buf, uint64_t timeout, client *c) {
+ timeout = htonu64(timeout);
+ memcpy(buf,&timeout,sizeof(timeout));
+ memcpy(buf+8,&c,sizeof(c));
+ if (sizeof(c) == 4) memset(buf+12,0,4); /* Zero padding for 32bit target. */
+}
+
+/* Given a key encoded with encodeTimeoutKey(), resolve the fields and write
+ * the timeout into *toptr and the client pointer into *cptr. */
+void decodeTimeoutKey(unsigned char *buf, uint64_t *toptr, client **cptr) {
+ memcpy(toptr,buf,sizeof(*toptr));
+ *toptr = ntohu64(*toptr);
+ memcpy(cptr,buf+8,sizeof(*cptr));
+}
+
+/* Add the specified client id / timeout as a key in the radix tree we use
+ * to handle blocked clients timeouts. The client is not added to the list
+ * if its timeout is zero (block forever). */
+void addClientToTimeoutTable(client *c) {
+ if (c->bstate.timeout == 0) return;
+ uint64_t timeout = c->bstate.timeout;
+ unsigned char buf[CLIENT_ST_KEYLEN];
+ encodeTimeoutKey(buf,timeout,c);
+ if (raxTryInsert(server.clients_timeout_table,buf,sizeof(buf),NULL,NULL))
+ c->flags |= CLIENT_IN_TO_TABLE;
+}
+
+/* Remove the client from the table when it is unblocked for reasons
+ * different than timing out. */
+void removeClientFromTimeoutTable(client *c) {
+ if (!(c->flags & CLIENT_IN_TO_TABLE)) return;
+ c->flags &= ~CLIENT_IN_TO_TABLE;
+ uint64_t timeout = c->bstate.timeout;
+ unsigned char buf[CLIENT_ST_KEYLEN];
+ encodeTimeoutKey(buf,timeout,c);
+ raxRemove(server.clients_timeout_table,buf,sizeof(buf),NULL);
+}
+
+/* This function is called in beforeSleep() in order to unblock clients
+ * that are waiting in blocking operations with a timeout set. */
+void handleBlockedClientsTimeout(void) {
+ if (raxSize(server.clients_timeout_table) == 0) return;
+ uint64_t now = mstime();
+ raxIterator ri;
+ raxStart(&ri,server.clients_timeout_table);
+ raxSeek(&ri,"^",NULL,0);
+
+ while(raxNext(&ri)) {
+ uint64_t timeout;
+ client *c;
+ decodeTimeoutKey(ri.key,&timeout,&c);
+ if (timeout >= now) break; /* All the timeouts are in the future. */
+ c->flags &= ~CLIENT_IN_TO_TABLE;
+ checkBlockedClientTimeout(c,now);
+ raxRemove(server.clients_timeout_table,ri.key,ri.key_len,NULL);
+ raxSeek(&ri,"^",NULL,0);
+ }
+ raxStop(&ri);
+}
+
+/* Get a timeout value from an object and store it into 'timeout'.
+ * The final timeout is always stored as milliseconds as a time where the
+ * timeout will expire, however the parsing is performed according to
+ * the 'unit' that can be seconds or milliseconds.
+ *
+ * Note that if the timeout is zero (usually from the point of view of
+ * commands API this means no timeout) the value stored into 'timeout'
+ * is zero. */
+int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int unit) {
+ long long tval;
+ long double ftval;
+ mstime_t now = commandTimeSnapshot();
+
+ if (unit == UNIT_SECONDS) {
+ if (getLongDoubleFromObjectOrReply(c,object,&ftval,
+ "timeout is not a float or out of range") != C_OK)
+ return C_ERR;
+
+ ftval *= 1000.0; /* seconds => millisec */
+ if (ftval > LLONG_MAX) {
+ addReplyError(c, "timeout is out of range");
+ return C_ERR;
+ }
+ tval = (long long) ceill(ftval);
+ } else {
+ if (getLongLongFromObjectOrReply(c,object,&tval,
+ "timeout is not an integer or out of range") != C_OK)
+ return C_ERR;
+ }
+
+ if (tval < 0) {
+ addReplyError(c,"timeout is negative");
+ return C_ERR;
+ }
+
+ if (tval > 0) {
+ if (tval > LLONG_MAX - now) {
+ addReplyError(c,"timeout is out of range"); /* 'tval+now' would overflow */
+ return C_ERR;
+ }
+ tval += now;
+ }
+ *timeout = tval;
+
+ return C_OK;
+}
diff --git a/src/tls.c b/src/tls.c
new file mode 100644
index 0000000..e709c99
--- /dev/null
+++ b/src/tls.c
@@ -0,0 +1,1204 @@
+/*
+ * Copyright (c) 2019, Redis Labs
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define REDISMODULE_CORE_MODULE /* A module that's part of the redis core, uses server.h too. */
+
+#include "server.h"
+#include "connhelpers.h"
+#include "adlist.h"
+
+#if (USE_OPENSSL == 1 /* BUILD_YES */ ) || ((USE_OPENSSL == 2 /* BUILD_MODULE */) && (BUILD_TLS_MODULE == 2))
+
+#include <openssl/conf.h>
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <openssl/rand.h>
+#include <openssl/pem.h>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+#include <openssl/decoder.h>
+#endif
+#include <sys/uio.h>
+#include <arpa/inet.h>
+
+#define REDIS_TLS_PROTO_TLSv1 (1<<0)
+#define REDIS_TLS_PROTO_TLSv1_1 (1<<1)
+#define REDIS_TLS_PROTO_TLSv1_2 (1<<2)
+#define REDIS_TLS_PROTO_TLSv1_3 (1<<3)
+
+/* Use safe defaults */
+#ifdef TLS1_3_VERSION
+#define REDIS_TLS_PROTO_DEFAULT (REDIS_TLS_PROTO_TLSv1_2|REDIS_TLS_PROTO_TLSv1_3)
+#else
+#define REDIS_TLS_PROTO_DEFAULT (REDIS_TLS_PROTO_TLSv1_2)
+#endif
+
+SSL_CTX *redis_tls_ctx = NULL;
+SSL_CTX *redis_tls_client_ctx = NULL;
+
+static int parseProtocolsConfig(const char *str) {
+ int i, count = 0;
+ int protocols = 0;
+
+ if (!str) return REDIS_TLS_PROTO_DEFAULT;
+ sds *tokens = sdssplitlen(str, strlen(str), " ", 1, &count);
+
+ if (!tokens) {
+ serverLog(LL_WARNING, "Invalid tls-protocols configuration string");
+ return -1;
+ }
+ for (i = 0; i < count; i++) {
+ if (!strcasecmp(tokens[i], "tlsv1")) protocols |= REDIS_TLS_PROTO_TLSv1;
+ else if (!strcasecmp(tokens[i], "tlsv1.1")) protocols |= REDIS_TLS_PROTO_TLSv1_1;
+ else if (!strcasecmp(tokens[i], "tlsv1.2")) protocols |= REDIS_TLS_PROTO_TLSv1_2;
+ else if (!strcasecmp(tokens[i], "tlsv1.3")) {
+#ifdef TLS1_3_VERSION
+ protocols |= REDIS_TLS_PROTO_TLSv1_3;
+#else
+ serverLog(LL_WARNING, "TLSv1.3 is specified in tls-protocols but not supported by OpenSSL.");
+ protocols = -1;
+ break;
+#endif
+ } else {
+ serverLog(LL_WARNING, "Invalid tls-protocols specified. "
+ "Use a combination of 'TLSv1', 'TLSv1.1', 'TLSv1.2' and 'TLSv1.3'.");
+ protocols = -1;
+ break;
+ }
+ }
+ sdsfreesplitres(tokens, count);
+
+ return protocols;
+}
+
+/* list of connections with pending data already read from the socket, but not
+ * served to the reader yet. */
+static list *pending_list = NULL;
+
+/**
+ * OpenSSL global initialization and locking handling callbacks.
+ * Note that this is only required for OpenSSL < 1.1.0.
+ */
+
+#if OPENSSL_VERSION_NUMBER < 0x10100000L
+#define USE_CRYPTO_LOCKS
+#endif
+
+#ifdef USE_CRYPTO_LOCKS
+
+static pthread_mutex_t *openssl_locks;
+
+static void sslLockingCallback(int mode, int lock_id, const char *f, int line) {
+ pthread_mutex_t *mt = openssl_locks + lock_id;
+
+ if (mode & CRYPTO_LOCK) {
+ pthread_mutex_lock(mt);
+ } else {
+ pthread_mutex_unlock(mt);
+ }
+
+ (void)f;
+ (void)line;
+}
+
+static void initCryptoLocks(void) {
+ unsigned i, nlocks;
+ if (CRYPTO_get_locking_callback() != NULL) {
+ /* Someone already set the callback before us. Don't destroy it! */
+ return;
+ }
+ nlocks = CRYPTO_num_locks();
+ openssl_locks = zmalloc(sizeof(*openssl_locks) * nlocks);
+ for (i = 0; i < nlocks; i++) {
+ pthread_mutex_init(openssl_locks + i, NULL);
+ }
+ CRYPTO_set_locking_callback(sslLockingCallback);
+}
+#endif /* USE_CRYPTO_LOCKS */
+
+static void tlsInit(void) {
+ /* Enable configuring OpenSSL using the standard openssl.cnf
+ * OPENSSL_config()/OPENSSL_init_crypto() should be the first
+ * call to the OpenSSL* library.
+ * - OPENSSL_config() should be used for OpenSSL versions < 1.1.0
+ * - OPENSSL_init_crypto() should be used for OpenSSL versions >= 1.1.0
+ */
+ #if OPENSSL_VERSION_NUMBER < 0x10100000L
+ OPENSSL_config(NULL);
+ SSL_load_error_strings();
+ SSL_library_init();
+ #elif OPENSSL_VERSION_NUMBER < 0x10101000L
+ OPENSSL_init_crypto(OPENSSL_INIT_LOAD_CONFIG, NULL);
+ #else
+ OPENSSL_init_crypto(OPENSSL_INIT_LOAD_CONFIG|OPENSSL_INIT_ATFORK, NULL);
+ #endif
+
+#ifdef USE_CRYPTO_LOCKS
+ initCryptoLocks();
+#endif
+
+ if (!RAND_poll()) {
+ serverLog(LL_WARNING, "OpenSSL: Failed to seed random number generator.");
+ }
+
+ pending_list = listCreate();
+}
+
+static void tlsCleanup(void) {
+ if (redis_tls_ctx) {
+ SSL_CTX_free(redis_tls_ctx);
+ redis_tls_ctx = NULL;
+ }
+ if (redis_tls_client_ctx) {
+ SSL_CTX_free(redis_tls_client_ctx);
+ redis_tls_client_ctx = NULL;
+ }
+
+ #if OPENSSL_VERSION_NUMBER >= 0x10100000L && !defined(LIBRESSL_VERSION_NUMBER)
+ // unavailable on LibreSSL
+ OPENSSL_cleanup();
+ #endif
+}
+
+/* Callback for passing a keyfile password stored as an sds to OpenSSL */
+static int tlsPasswordCallback(char *buf, int size, int rwflag, void *u) {
+ UNUSED(rwflag);
+
+ const char *pass = u;
+ size_t pass_len;
+
+ if (!pass) return -1;
+ pass_len = strlen(pass);
+ if (pass_len > (size_t) size) return -1;
+ memcpy(buf, pass, pass_len);
+
+ return (int) pass_len;
+}
+
+/* Create a *base* SSL_CTX using the SSL configuration provided. The base context
+ * includes everything that's common for both client-side and server-side connections.
+ */
+static SSL_CTX *createSSLContext(redisTLSContextConfig *ctx_config, int protocols, int client) {
+ const char *cert_file = client ? ctx_config->client_cert_file : ctx_config->cert_file;
+ const char *key_file = client ? ctx_config->client_key_file : ctx_config->key_file;
+ const char *key_file_pass = client ? ctx_config->client_key_file_pass : ctx_config->key_file_pass;
+ char errbuf[256];
+ SSL_CTX *ctx = NULL;
+
+ ctx = SSL_CTX_new(SSLv23_method());
+
+ SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3);
+
+#ifdef SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS
+ SSL_CTX_set_options(ctx, SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS);
+#endif
+
+ if (!(protocols & REDIS_TLS_PROTO_TLSv1))
+ SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1);
+ if (!(protocols & REDIS_TLS_PROTO_TLSv1_1))
+ SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1);
+#ifdef SSL_OP_NO_TLSv1_2
+ if (!(protocols & REDIS_TLS_PROTO_TLSv1_2))
+ SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_2);
+#endif
+#ifdef SSL_OP_NO_TLSv1_3
+ if (!(protocols & REDIS_TLS_PROTO_TLSv1_3))
+ SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_3);
+#endif
+
+#ifdef SSL_OP_NO_COMPRESSION
+ SSL_CTX_set_options(ctx, SSL_OP_NO_COMPRESSION);
+#endif
+
+ SSL_CTX_set_mode(ctx, SSL_MODE_ENABLE_PARTIAL_WRITE|SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER);
+ SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL);
+
+ SSL_CTX_set_default_passwd_cb(ctx, tlsPasswordCallback);
+ SSL_CTX_set_default_passwd_cb_userdata(ctx, (void *) key_file_pass);
+
+ if (SSL_CTX_use_certificate_chain_file(ctx, cert_file) <= 0) {
+ ERR_error_string_n(ERR_get_error(), errbuf, sizeof(errbuf));
+ serverLog(LL_WARNING, "Failed to load certificate: %s: %s", cert_file, errbuf);
+ goto error;
+ }
+
+ if (SSL_CTX_use_PrivateKey_file(ctx, key_file, SSL_FILETYPE_PEM) <= 0) {
+ ERR_error_string_n(ERR_get_error(), errbuf, sizeof(errbuf));
+ serverLog(LL_WARNING, "Failed to load private key: %s: %s", key_file, errbuf);
+ goto error;
+ }
+
+ if ((ctx_config->ca_cert_file || ctx_config->ca_cert_dir) &&
+ SSL_CTX_load_verify_locations(ctx, ctx_config->ca_cert_file, ctx_config->ca_cert_dir) <= 0) {
+ ERR_error_string_n(ERR_get_error(), errbuf, sizeof(errbuf));
+ serverLog(LL_WARNING, "Failed to configure CA certificate(s) file/directory: %s", errbuf);
+ goto error;
+ }
+
+ if (ctx_config->ciphers && !SSL_CTX_set_cipher_list(ctx, ctx_config->ciphers)) {
+ serverLog(LL_WARNING, "Failed to configure ciphers: %s", ctx_config->ciphers);
+ goto error;
+ }
+
+#ifdef TLS1_3_VERSION
+ if (ctx_config->ciphersuites && !SSL_CTX_set_ciphersuites(ctx, ctx_config->ciphersuites)) {
+ serverLog(LL_WARNING, "Failed to configure ciphersuites: %s", ctx_config->ciphersuites);
+ goto error;
+ }
+#endif
+
+ return ctx;
+
+error:
+ if (ctx) SSL_CTX_free(ctx);
+ return NULL;
+}
+
+/* Attempt to configure/reconfigure TLS. This operation is atomic and will
+ * leave the SSL_CTX unchanged if fails.
+ * @priv: config of redisTLSContextConfig.
+ * @reconfigure: if true, ignore the previous configure; if false, only
+ * configure from @ctx_config if redis_tls_ctx is NULL.
+ */
+static int tlsConfigure(void *priv, int reconfigure) {
+ redisTLSContextConfig *ctx_config = (redisTLSContextConfig *)priv;
+ char errbuf[256];
+ SSL_CTX *ctx = NULL;
+ SSL_CTX *client_ctx = NULL;
+
+ if (!reconfigure && redis_tls_ctx) {
+ return C_OK;
+ }
+
+ if (!ctx_config->cert_file) {
+ serverLog(LL_WARNING, "No tls-cert-file configured!");
+ goto error;
+ }
+
+ if (!ctx_config->key_file) {
+ serverLog(LL_WARNING, "No tls-key-file configured!");
+ goto error;
+ }
+
+ if (((server.tls_auth_clients != TLS_CLIENT_AUTH_NO) || server.tls_cluster || server.tls_replication) &&
+ !ctx_config->ca_cert_file && !ctx_config->ca_cert_dir) {
+ serverLog(LL_WARNING, "Either tls-ca-cert-file or tls-ca-cert-dir must be specified when tls-cluster, tls-replication or tls-auth-clients are enabled!");
+ goto error;
+ }
+
+ int protocols = parseProtocolsConfig(ctx_config->protocols);
+ if (protocols == -1) goto error;
+
+ /* Create server side/general context */
+ ctx = createSSLContext(ctx_config, protocols, 0);
+ if (!ctx) goto error;
+
+ if (ctx_config->session_caching) {
+ SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_SERVER);
+ SSL_CTX_sess_set_cache_size(ctx, ctx_config->session_cache_size);
+ SSL_CTX_set_timeout(ctx, ctx_config->session_cache_timeout);
+ SSL_CTX_set_session_id_context(ctx, (void *) "redis", 5);
+ } else {
+ SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_OFF);
+ }
+
+#ifdef SSL_OP_NO_CLIENT_RENEGOTIATION
+ SSL_CTX_set_options(ctx, SSL_OP_NO_CLIENT_RENEGOTIATION);
+#endif
+
+ if (ctx_config->prefer_server_ciphers)
+ SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE);
+
+#if ((OPENSSL_VERSION_NUMBER < 0x30000000L) && defined(SSL_CTX_set_ecdh_auto))
+ SSL_CTX_set_ecdh_auto(ctx, 1);
+#endif
+ SSL_CTX_set_options(ctx, SSL_OP_SINGLE_DH_USE);
+
+ if (ctx_config->dh_params_file) {
+ FILE *dhfile = fopen(ctx_config->dh_params_file, "r");
+ if (!dhfile) {
+ serverLog(LL_WARNING, "Failed to load %s: %s", ctx_config->dh_params_file, strerror(errno));
+ goto error;
+ }
+
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L)
+ EVP_PKEY *pkey = NULL;
+ OSSL_DECODER_CTX *dctx = OSSL_DECODER_CTX_new_for_pkey(
+ &pkey, "PEM", NULL, "DH", OSSL_KEYMGMT_SELECT_DOMAIN_PARAMETERS, NULL, NULL);
+ if (!dctx) {
+ serverLog(LL_WARNING, "No decoder for DH params.");
+ fclose(dhfile);
+ goto error;
+ }
+ if (!OSSL_DECODER_from_fp(dctx, dhfile)) {
+ serverLog(LL_WARNING, "%s: failed to read DH params.", ctx_config->dh_params_file);
+ OSSL_DECODER_CTX_free(dctx);
+ fclose(dhfile);
+ goto error;
+ }
+
+ OSSL_DECODER_CTX_free(dctx);
+ fclose(dhfile);
+
+ if (SSL_CTX_set0_tmp_dh_pkey(ctx, pkey) <= 0) {
+ ERR_error_string_n(ERR_get_error(), errbuf, sizeof(errbuf));
+ serverLog(LL_WARNING, "Failed to load DH params file: %s: %s", ctx_config->dh_params_file, errbuf);
+ EVP_PKEY_free(pkey);
+ goto error;
+ }
+ /* Not freeing pkey, it is owned by OpenSSL now */
+#else
+ DH *dh = PEM_read_DHparams(dhfile, NULL, NULL, NULL);
+ fclose(dhfile);
+ if (!dh) {
+ serverLog(LL_WARNING, "%s: failed to read DH params.", ctx_config->dh_params_file);
+ goto error;
+ }
+
+ if (SSL_CTX_set_tmp_dh(ctx, dh) <= 0) {
+ ERR_error_string_n(ERR_get_error(), errbuf, sizeof(errbuf));
+ serverLog(LL_WARNING, "Failed to load DH params file: %s: %s", ctx_config->dh_params_file, errbuf);
+ DH_free(dh);
+ goto error;
+ }
+
+ DH_free(dh);
+#endif
+ } else {
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L)
+ SSL_CTX_set_dh_auto(ctx, 1);
+#endif
+ }
+
+ /* If a client-side certificate is configured, create an explicit client context */
+ if (ctx_config->client_cert_file && ctx_config->client_key_file) {
+ client_ctx = createSSLContext(ctx_config, protocols, 1);
+ if (!client_ctx) goto error;
+ }
+
+ SSL_CTX_free(redis_tls_ctx);
+ SSL_CTX_free(redis_tls_client_ctx);
+ redis_tls_ctx = ctx;
+ redis_tls_client_ctx = client_ctx;
+
+ return C_OK;
+
+error:
+ if (ctx) SSL_CTX_free(ctx);
+ if (client_ctx) SSL_CTX_free(client_ctx);
+ return C_ERR;
+}
+
+#ifdef TLS_DEBUGGING
+#define TLSCONN_DEBUG(fmt, ...) \
+ serverLog(LL_DEBUG, "TLSCONN: " fmt, __VA_ARGS__)
+#else
+#define TLSCONN_DEBUG(fmt, ...)
+#endif
+
+static ConnectionType CT_TLS;
+
+/* Normal socket connections have a simple events/handler correlation.
+ *
+ * With TLS connections we need to handle cases where during a logical read
+ * or write operation, the SSL library asks to block for the opposite
+ * socket operation.
+ *
+ * When this happens, we need to do two things:
+ * 1. Make sure we register for the event.
+ * 2. Make sure we know which handler needs to execute when the
+ * event fires. That is, if we notify the caller of a write operation
+ * that it blocks, and SSL asks for a read, we need to trigger the
+ * write handler again on the next read event.
+ *
+ */
+
+typedef enum {
+ WANT_READ = 1,
+ WANT_WRITE
+} WantIOType;
+
+#define TLS_CONN_FLAG_READ_WANT_WRITE (1<<0)
+#define TLS_CONN_FLAG_WRITE_WANT_READ (1<<1)
+#define TLS_CONN_FLAG_FD_SET (1<<2)
+
+typedef struct tls_connection {
+ connection c;
+ int flags;
+ SSL *ssl;
+ char *ssl_error;
+ listNode *pending_list_node;
+} tls_connection;
+
+static connection *createTLSConnection(int client_side) {
+ SSL_CTX *ctx = redis_tls_ctx;
+ if (client_side && redis_tls_client_ctx)
+ ctx = redis_tls_client_ctx;
+ tls_connection *conn = zcalloc(sizeof(tls_connection));
+ conn->c.type = &CT_TLS;
+ conn->c.fd = -1;
+ conn->c.iovcnt = IOV_MAX;
+ conn->ssl = SSL_new(ctx);
+ return (connection *) conn;
+}
+
+static connection *connCreateTLS(void) {
+ return createTLSConnection(1);
+}
+
+/* Fetch the latest OpenSSL error and store it in the connection */
+static void updateTLSError(tls_connection *conn) {
+ conn->c.last_errno = 0;
+ if (conn->ssl_error) zfree(conn->ssl_error);
+ conn->ssl_error = zmalloc(512);
+ ERR_error_string_n(ERR_get_error(), conn->ssl_error, 512);
+}
+
+/* Create a new TLS connection that is already associated with
+ * an accepted underlying file descriptor.
+ *
+ * The socket is not ready for I/O until connAccept() was called and
+ * invoked the connection-level accept handler.
+ *
+ * Callers should use connGetState() and verify the created connection
+ * is not in an error state.
+ */
+static connection *connCreateAcceptedTLS(int fd, void *priv) {
+ int require_auth = *(int *)priv;
+ tls_connection *conn = (tls_connection *) createTLSConnection(0);
+ conn->c.fd = fd;
+ conn->c.state = CONN_STATE_ACCEPTING;
+
+ if (!conn->ssl) {
+ updateTLSError(conn);
+ conn->c.state = CONN_STATE_ERROR;
+ return (connection *) conn;
+ }
+
+ switch (require_auth) {
+ case TLS_CLIENT_AUTH_NO:
+ SSL_set_verify(conn->ssl, SSL_VERIFY_NONE, NULL);
+ break;
+ case TLS_CLIENT_AUTH_OPTIONAL:
+ SSL_set_verify(conn->ssl, SSL_VERIFY_PEER, NULL);
+ break;
+ default: /* TLS_CLIENT_AUTH_YES, also fall-secure */
+ SSL_set_verify(conn->ssl, SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL);
+ break;
+ }
+
+ SSL_set_fd(conn->ssl, conn->c.fd);
+ SSL_set_accept_state(conn->ssl);
+
+ return (connection *) conn;
+}
+
+static void tlsEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask);
+static void updateSSLEvent(tls_connection *conn);
+
+/* Process the return code received from OpenSSL>
+ * Update the want parameter with expected I/O.
+ * Update the connection's error state if a real error has occurred.
+ * Returns an SSL error code, or 0 if no further handling is required.
+ */
+static int handleSSLReturnCode(tls_connection *conn, int ret_value, WantIOType *want) {
+ if (ret_value <= 0) {
+ int ssl_err = SSL_get_error(conn->ssl, ret_value);
+ switch (ssl_err) {
+ case SSL_ERROR_WANT_WRITE:
+ *want = WANT_WRITE;
+ return 0;
+ case SSL_ERROR_WANT_READ:
+ *want = WANT_READ;
+ return 0;
+ case SSL_ERROR_SYSCALL:
+ conn->c.last_errno = errno;
+ if (conn->ssl_error) zfree(conn->ssl_error);
+ conn->ssl_error = errno ? zstrdup(strerror(errno)) : NULL;
+ break;
+ default:
+ /* Error! */
+ updateTLSError(conn);
+ break;
+ }
+
+ return ssl_err;
+ }
+
+ return 0;
+}
+
+/* Handle OpenSSL return code following SSL_write() or SSL_read():
+ *
+ * - Updates conn state and last_errno.
+ * - If update_event is nonzero, calls updateSSLEvent() when necessary.
+ *
+ * Returns ret_value, or -1 on error or dropped connection.
+ */
+static int updateStateAfterSSLIO(tls_connection *conn, int ret_value, int update_event) {
+ /* If system call was interrupted, there's no need to go through the full
+ * OpenSSL error handling and just report this for the caller to retry the
+ * operation.
+ */
+ if (errno == EINTR) {
+ conn->c.last_errno = EINTR;
+ return -1;
+ }
+
+ if (ret_value <= 0) {
+ WantIOType want = 0;
+ int ssl_err;
+ if (!(ssl_err = handleSSLReturnCode(conn, ret_value, &want))) {
+ if (want == WANT_READ) conn->flags |= TLS_CONN_FLAG_WRITE_WANT_READ;
+ if (want == WANT_WRITE) conn->flags |= TLS_CONN_FLAG_READ_WANT_WRITE;
+ if (update_event) updateSSLEvent(conn);
+ errno = EAGAIN;
+ return -1;
+ } else {
+ if (ssl_err == SSL_ERROR_ZERO_RETURN ||
+ ((ssl_err == SSL_ERROR_SYSCALL && !errno))) {
+ conn->c.state = CONN_STATE_CLOSED;
+ return -1;
+ } else {
+ conn->c.state = CONN_STATE_ERROR;
+ return -1;
+ }
+ }
+ }
+
+ return ret_value;
+}
+
+static void registerSSLEvent(tls_connection *conn, WantIOType want) {
+ int mask = aeGetFileEvents(server.el, conn->c.fd);
+
+ switch (want) {
+ case WANT_READ:
+ if (mask & AE_WRITABLE) aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE);
+ if (!(mask & AE_READABLE)) aeCreateFileEvent(server.el, conn->c.fd, AE_READABLE,
+ tlsEventHandler, conn);
+ break;
+ case WANT_WRITE:
+ if (mask & AE_READABLE) aeDeleteFileEvent(server.el, conn->c.fd, AE_READABLE);
+ if (!(mask & AE_WRITABLE)) aeCreateFileEvent(server.el, conn->c.fd, AE_WRITABLE,
+ tlsEventHandler, conn);
+ break;
+ default:
+ serverAssert(0);
+ break;
+ }
+}
+
+static void updateSSLEvent(tls_connection *conn) {
+ int mask = aeGetFileEvents(server.el, conn->c.fd);
+ int need_read = conn->c.read_handler || (conn->flags & TLS_CONN_FLAG_WRITE_WANT_READ);
+ int need_write = conn->c.write_handler || (conn->flags & TLS_CONN_FLAG_READ_WANT_WRITE);
+
+ if (need_read && !(mask & AE_READABLE))
+ aeCreateFileEvent(server.el, conn->c.fd, AE_READABLE, tlsEventHandler, conn);
+ if (!need_read && (mask & AE_READABLE))
+ aeDeleteFileEvent(server.el, conn->c.fd, AE_READABLE);
+
+ if (need_write && !(mask & AE_WRITABLE))
+ aeCreateFileEvent(server.el, conn->c.fd, AE_WRITABLE, tlsEventHandler, conn);
+ if (!need_write && (mask & AE_WRITABLE))
+ aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE);
+}
+
+static void tlsHandleEvent(tls_connection *conn, int mask) {
+ int ret, conn_error;
+
+ TLSCONN_DEBUG("tlsEventHandler(): fd=%d, state=%d, mask=%d, r=%d, w=%d, flags=%d",
+ fd, conn->c.state, mask, conn->c.read_handler != NULL, conn->c.write_handler != NULL,
+ conn->flags);
+
+ ERR_clear_error();
+
+ switch (conn->c.state) {
+ case CONN_STATE_CONNECTING:
+ conn_error = anetGetError(conn->c.fd);
+ if (conn_error) {
+ conn->c.last_errno = conn_error;
+ conn->c.state = CONN_STATE_ERROR;
+ } else {
+ if (!(conn->flags & TLS_CONN_FLAG_FD_SET)) {
+ SSL_set_fd(conn->ssl, conn->c.fd);
+ conn->flags |= TLS_CONN_FLAG_FD_SET;
+ }
+ ret = SSL_connect(conn->ssl);
+ if (ret <= 0) {
+ WantIOType want = 0;
+ if (!handleSSLReturnCode(conn, ret, &want)) {
+ registerSSLEvent(conn, want);
+
+ /* Avoid hitting UpdateSSLEvent, which knows nothing
+ * of what SSL_connect() wants and instead looks at our
+ * R/W handlers.
+ */
+ return;
+ }
+
+ /* If not handled, it's an error */
+ conn->c.state = CONN_STATE_ERROR;
+ } else {
+ conn->c.state = CONN_STATE_CONNECTED;
+ }
+ }
+
+ if (!callHandler((connection *) conn, conn->c.conn_handler)) return;
+ conn->c.conn_handler = NULL;
+ break;
+ case CONN_STATE_ACCEPTING:
+ ret = SSL_accept(conn->ssl);
+ if (ret <= 0) {
+ WantIOType want = 0;
+ if (!handleSSLReturnCode(conn, ret, &want)) {
+ /* Avoid hitting UpdateSSLEvent, which knows nothing
+ * of what SSL_connect() wants and instead looks at our
+ * R/W handlers.
+ */
+ registerSSLEvent(conn, want);
+ return;
+ }
+
+ /* If not handled, it's an error */
+ conn->c.state = CONN_STATE_ERROR;
+ } else {
+ conn->c.state = CONN_STATE_CONNECTED;
+ }
+
+ if (!callHandler((connection *) conn, conn->c.conn_handler)) return;
+ conn->c.conn_handler = NULL;
+ break;
+ case CONN_STATE_CONNECTED:
+ {
+ int call_read = ((mask & AE_READABLE) && conn->c.read_handler) ||
+ ((mask & AE_WRITABLE) && (conn->flags & TLS_CONN_FLAG_READ_WANT_WRITE));
+ int call_write = ((mask & AE_WRITABLE) && conn->c.write_handler) ||
+ ((mask & AE_READABLE) && (conn->flags & TLS_CONN_FLAG_WRITE_WANT_READ));
+
+ /* Normally we execute the readable event first, and the writable
+ * event laster. This is useful as sometimes we may be able
+ * to serve the reply of a query immediately after processing the
+ * query.
+ *
+ * However if WRITE_BARRIER is set in the mask, our application is
+ * asking us to do the reverse: never fire the writable event
+ * after the readable. In such a case, we invert the calls.
+ * This is useful when, for instance, we want to do things
+ * in the beforeSleep() hook, like fsynching a file to disk,
+ * before replying to a client. */
+ int invert = conn->c.flags & CONN_FLAG_WRITE_BARRIER;
+
+ if (!invert && call_read) {
+ conn->flags &= ~TLS_CONN_FLAG_READ_WANT_WRITE;
+ if (!callHandler((connection *) conn, conn->c.read_handler)) return;
+ }
+
+ /* Fire the writable event. */
+ if (call_write) {
+ conn->flags &= ~TLS_CONN_FLAG_WRITE_WANT_READ;
+ if (!callHandler((connection *) conn, conn->c.write_handler)) return;
+ }
+
+ /* If we have to invert the call, fire the readable event now
+ * after the writable one. */
+ if (invert && call_read) {
+ conn->flags &= ~TLS_CONN_FLAG_READ_WANT_WRITE;
+ if (!callHandler((connection *) conn, conn->c.read_handler)) return;
+ }
+
+ /* If SSL has pending that, already read from the socket, we're at
+ * risk of not calling the read handler again, make sure to add it
+ * to a list of pending connection that should be handled anyway. */
+ if ((mask & AE_READABLE)) {
+ if (SSL_pending(conn->ssl) > 0) {
+ if (!conn->pending_list_node) {
+ listAddNodeTail(pending_list, conn);
+ conn->pending_list_node = listLast(pending_list);
+ }
+ } else if (conn->pending_list_node) {
+ listDelNode(pending_list, conn->pending_list_node);
+ conn->pending_list_node = NULL;
+ }
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ updateSSLEvent(conn);
+}
+
+static void tlsEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask) {
+ UNUSED(el);
+ UNUSED(fd);
+ tls_connection *conn = clientData;
+ tlsHandleEvent(conn, mask);
+}
+
+static void tlsAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
+ int cport, cfd, max = MAX_ACCEPTS_PER_CALL;
+ char cip[NET_IP_STR_LEN];
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
+
+ while(max--) {
+ cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
+ if (cfd == ANET_ERR) {
+ if (errno != EWOULDBLOCK)
+ serverLog(LL_WARNING,
+ "Accepting client connection: %s", server.neterr);
+ return;
+ }
+ serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
+ acceptCommonHandler(connCreateAcceptedTLS(cfd, &server.tls_auth_clients),0,cip);
+ }
+}
+
+static int connTLSAddr(connection *conn, char *ip, size_t ip_len, int *port, int remote) {
+ return anetFdToString(conn->fd, ip, ip_len, port, remote);
+}
+
+static int connTLSIsLocal(connection *conn) {
+ return connectionTypeTcp()->is_local(conn);
+}
+
+static int connTLSListen(connListener *listener) {
+ return listenToPort(listener);
+}
+
+static void connTLSShutdown(connection *conn_) {
+ tls_connection *conn = (tls_connection *) conn_;
+
+ if (conn->ssl) {
+ if (conn->c.state == CONN_STATE_CONNECTED)
+ SSL_shutdown(conn->ssl);
+ SSL_free(conn->ssl);
+ conn->ssl = NULL;
+ }
+
+ connectionTypeTcp()->shutdown(conn_);
+}
+
+static void connTLSClose(connection *conn_) {
+ tls_connection *conn = (tls_connection *) conn_;
+
+ if (conn->ssl) {
+ if (conn->c.state == CONN_STATE_CONNECTED)
+ SSL_shutdown(conn->ssl);
+ SSL_free(conn->ssl);
+ conn->ssl = NULL;
+ }
+
+ if (conn->ssl_error) {
+ zfree(conn->ssl_error);
+ conn->ssl_error = NULL;
+ }
+
+ if (conn->pending_list_node) {
+ listDelNode(pending_list, conn->pending_list_node);
+ conn->pending_list_node = NULL;
+ }
+
+ connectionTypeTcp()->close(conn_);
+}
+
+static int connTLSAccept(connection *_conn, ConnectionCallbackFunc accept_handler) {
+ tls_connection *conn = (tls_connection *) _conn;
+ int ret;
+
+ if (conn->c.state != CONN_STATE_ACCEPTING) return C_ERR;
+ ERR_clear_error();
+
+ /* Try to accept */
+ conn->c.conn_handler = accept_handler;
+ ret = SSL_accept(conn->ssl);
+
+ if (ret <= 0) {
+ WantIOType want = 0;
+ if (!handleSSLReturnCode(conn, ret, &want)) {
+ registerSSLEvent(conn, want); /* We'll fire back */
+ return C_OK;
+ } else {
+ conn->c.state = CONN_STATE_ERROR;
+ return C_ERR;
+ }
+ }
+
+ conn->c.state = CONN_STATE_CONNECTED;
+ if (!callHandler((connection *) conn, conn->c.conn_handler)) return C_OK;
+ conn->c.conn_handler = NULL;
+
+ return C_OK;
+}
+
+static int connTLSConnect(connection *conn_, const char *addr, int port, const char *src_addr, ConnectionCallbackFunc connect_handler) {
+ tls_connection *conn = (tls_connection *) conn_;
+ unsigned char addr_buf[sizeof(struct in6_addr)];
+
+ if (conn->c.state != CONN_STATE_NONE) return C_ERR;
+ ERR_clear_error();
+
+ /* Check whether addr is an IP address, if not, use the value for Server Name Indication */
+ if (inet_pton(AF_INET, addr, addr_buf) != 1 && inet_pton(AF_INET6, addr, addr_buf) != 1) {
+ SSL_set_tlsext_host_name(conn->ssl, addr);
+ }
+
+ /* Initiate Socket connection first */
+ if (connectionTypeTcp()->connect(conn_, addr, port, src_addr, connect_handler) == C_ERR) return C_ERR;
+
+ /* Return now, once the socket is connected we'll initiate
+ * TLS connection from the event handler.
+ */
+ return C_OK;
+}
+
+static int connTLSWrite(connection *conn_, const void *data, size_t data_len) {
+ tls_connection *conn = (tls_connection *) conn_;
+ int ret;
+
+ if (conn->c.state != CONN_STATE_CONNECTED) return -1;
+ ERR_clear_error();
+ ret = SSL_write(conn->ssl, data, data_len);
+ return updateStateAfterSSLIO(conn, ret, 1);
+}
+
+static int connTLSWritev(connection *conn_, const struct iovec *iov, int iovcnt) {
+ if (iovcnt == 1) return connTLSWrite(conn_, iov[0].iov_base, iov[0].iov_len);
+
+ /* Accumulate the amount of bytes of each buffer and check if it exceeds NET_MAX_WRITES_PER_EVENT. */
+ size_t iov_bytes_len = 0;
+ for (int i = 0; i < iovcnt; i++) {
+ iov_bytes_len += iov[i].iov_len;
+ if (iov_bytes_len > NET_MAX_WRITES_PER_EVENT) break;
+ }
+
+ /* The amount of all buffers is greater than NET_MAX_WRITES_PER_EVENT,
+ * which is not worth doing so much memory copying to reduce system calls,
+ * therefore, invoke connTLSWrite() multiple times to avoid memory copies. */
+ if (iov_bytes_len > NET_MAX_WRITES_PER_EVENT) {
+ ssize_t tot_sent = 0;
+ for (int i = 0; i < iovcnt; i++) {
+ ssize_t sent = connTLSWrite(conn_, iov[i].iov_base, iov[i].iov_len);
+ if (sent <= 0) return tot_sent > 0 ? tot_sent : sent;
+ tot_sent += sent;
+ if ((size_t) sent != iov[i].iov_len) break;
+ }
+ return tot_sent;
+ }
+
+ /* The amount of all buffers is less than NET_MAX_WRITES_PER_EVENT,
+ * which is worth doing more memory copies in exchange for fewer system calls,
+ * so concatenate these scattered buffers into a contiguous piece of memory
+ * and send it away by one call to connTLSWrite(). */
+ char buf[iov_bytes_len];
+ size_t offset = 0;
+ for (int i = 0; i < iovcnt; i++) {
+ memcpy(buf + offset, iov[i].iov_base, iov[i].iov_len);
+ offset += iov[i].iov_len;
+ }
+ return connTLSWrite(conn_, buf, iov_bytes_len);
+}
+
+static int connTLSRead(connection *conn_, void *buf, size_t buf_len) {
+ tls_connection *conn = (tls_connection *) conn_;
+ int ret;
+
+ if (conn->c.state != CONN_STATE_CONNECTED) return -1;
+ ERR_clear_error();
+ ret = SSL_read(conn->ssl, buf, buf_len);
+ return updateStateAfterSSLIO(conn, ret, 1);
+}
+
+static const char *connTLSGetLastError(connection *conn_) {
+ tls_connection *conn = (tls_connection *) conn_;
+
+ if (conn->ssl_error) return conn->ssl_error;
+ return NULL;
+}
+
+static int connTLSSetWriteHandler(connection *conn, ConnectionCallbackFunc func, int barrier) {
+ conn->write_handler = func;
+ if (barrier)
+ conn->flags |= CONN_FLAG_WRITE_BARRIER;
+ else
+ conn->flags &= ~CONN_FLAG_WRITE_BARRIER;
+ updateSSLEvent((tls_connection *) conn);
+ return C_OK;
+}
+
+static int connTLSSetReadHandler(connection *conn, ConnectionCallbackFunc func) {
+ conn->read_handler = func;
+ updateSSLEvent((tls_connection *) conn);
+ return C_OK;
+}
+
+static void setBlockingTimeout(tls_connection *conn, long long timeout) {
+ anetBlock(NULL, conn->c.fd);
+ anetSendTimeout(NULL, conn->c.fd, timeout);
+ anetRecvTimeout(NULL, conn->c.fd, timeout);
+}
+
+static void unsetBlockingTimeout(tls_connection *conn) {
+ anetNonBlock(NULL, conn->c.fd);
+ anetSendTimeout(NULL, conn->c.fd, 0);
+ anetRecvTimeout(NULL, conn->c.fd, 0);
+}
+
+static int connTLSBlockingConnect(connection *conn_, const char *addr, int port, long long timeout) {
+ tls_connection *conn = (tls_connection *) conn_;
+ int ret;
+
+ if (conn->c.state != CONN_STATE_NONE) return C_ERR;
+
+ /* Initiate socket blocking connect first */
+ if (connectionTypeTcp()->blocking_connect(conn_, addr, port, timeout) == C_ERR) return C_ERR;
+
+ /* Initiate TLS connection now. We set up a send/recv timeout on the socket,
+ * which means the specified timeout will not be enforced accurately. */
+ SSL_set_fd(conn->ssl, conn->c.fd);
+ setBlockingTimeout(conn, timeout);
+
+ if ((ret = SSL_connect(conn->ssl)) <= 0) {
+ conn->c.state = CONN_STATE_ERROR;
+ return C_ERR;
+ }
+ unsetBlockingTimeout(conn);
+
+ conn->c.state = CONN_STATE_CONNECTED;
+ return C_OK;
+}
+
+static ssize_t connTLSSyncWrite(connection *conn_, char *ptr, ssize_t size, long long timeout) {
+ tls_connection *conn = (tls_connection *) conn_;
+
+ setBlockingTimeout(conn, timeout);
+ SSL_clear_mode(conn->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
+ ERR_clear_error();
+ int ret = SSL_write(conn->ssl, ptr, size);
+ ret = updateStateAfterSSLIO(conn, ret, 0);
+ SSL_set_mode(conn->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
+ unsetBlockingTimeout(conn);
+
+ return ret;
+}
+
+static ssize_t connTLSSyncRead(connection *conn_, char *ptr, ssize_t size, long long timeout) {
+ tls_connection *conn = (tls_connection *) conn_;
+
+ setBlockingTimeout(conn, timeout);
+ ERR_clear_error();
+ int ret = SSL_read(conn->ssl, ptr, size);
+ ret = updateStateAfterSSLIO(conn, ret, 0);
+ unsetBlockingTimeout(conn);
+
+ return ret;
+}
+
+static ssize_t connTLSSyncReadLine(connection *conn_, char *ptr, ssize_t size, long long timeout) {
+ tls_connection *conn = (tls_connection *) conn_;
+ ssize_t nread = 0;
+
+ setBlockingTimeout(conn, timeout);
+
+ size--;
+ while(size) {
+ char c;
+
+ ERR_clear_error();
+ int ret = SSL_read(conn->ssl, &c, 1);
+ ret = updateStateAfterSSLIO(conn, ret, 0);
+ if (ret <= 0) {
+ nread = -1;
+ goto exit;
+ }
+ if (c == '\n') {
+ *ptr = '\0';
+ if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
+ goto exit;
+ } else {
+ *ptr++ = c;
+ *ptr = '\0';
+ nread++;
+ }
+ size--;
+ }
+exit:
+ unsetBlockingTimeout(conn);
+ return nread;
+}
+
+static const char *connTLSGetType(connection *conn_) {
+ (void) conn_;
+
+ return CONN_TYPE_TLS;
+}
+
+static int tlsHasPendingData(void) {
+ if (!pending_list)
+ return 0;
+ return listLength(pending_list) > 0;
+}
+
+static int tlsProcessPendingData(void) {
+ listIter li;
+ listNode *ln;
+
+ int processed = listLength(pending_list);
+ listRewind(pending_list,&li);
+ while((ln = listNext(&li))) {
+ tls_connection *conn = listNodeValue(ln);
+ tlsHandleEvent(conn, AE_READABLE);
+ }
+ return processed;
+}
+
+/* Fetch the peer certificate used for authentication on the specified
+ * connection and return it as a PEM-encoded sds.
+ */
+static sds connTLSGetPeerCert(connection *conn_) {
+ tls_connection *conn = (tls_connection *) conn_;
+ if ((conn_->type != connectionTypeTls()) || !conn->ssl) return NULL;
+
+ X509 *cert = SSL_get_peer_certificate(conn->ssl);
+ if (!cert) return NULL;
+
+ BIO *bio = BIO_new(BIO_s_mem());
+ if (bio == NULL || !PEM_write_bio_X509(bio, cert)) {
+ if (bio != NULL) BIO_free(bio);
+ return NULL;
+ }
+
+ const char *bio_ptr;
+ long long bio_len = BIO_get_mem_data(bio, &bio_ptr);
+ sds cert_pem = sdsnewlen(bio_ptr, bio_len);
+ BIO_free(bio);
+
+ return cert_pem;
+}
+
+static ConnectionType CT_TLS = {
+ /* connection type */
+ .get_type = connTLSGetType,
+
+ /* connection type initialize & finalize & configure */
+ .init = tlsInit,
+ .cleanup = tlsCleanup,
+ .configure = tlsConfigure,
+
+ /* ae & accept & listen & error & address handler */
+ .ae_handler = tlsEventHandler,
+ .accept_handler = tlsAcceptHandler,
+ .addr = connTLSAddr,
+ .is_local = connTLSIsLocal,
+ .listen = connTLSListen,
+
+ /* create/shutdown/close connection */
+ .conn_create = connCreateTLS,
+ .conn_create_accepted = connCreateAcceptedTLS,
+ .shutdown = connTLSShutdown,
+ .close = connTLSClose,
+
+ /* connect & accept */
+ .connect = connTLSConnect,
+ .blocking_connect = connTLSBlockingConnect,
+ .accept = connTLSAccept,
+
+ /* IO */
+ .read = connTLSRead,
+ .write = connTLSWrite,
+ .writev = connTLSWritev,
+ .set_write_handler = connTLSSetWriteHandler,
+ .set_read_handler = connTLSSetReadHandler,
+ .get_last_error = connTLSGetLastError,
+ .sync_write = connTLSSyncWrite,
+ .sync_read = connTLSSyncRead,
+ .sync_readline = connTLSSyncReadLine,
+
+ /* pending data */
+ .has_pending_data = tlsHasPendingData,
+ .process_pending_data = tlsProcessPendingData,
+
+ /* TLS specified methods */
+ .get_peer_cert = connTLSGetPeerCert,
+};
+
+int RedisRegisterConnectionTypeTLS(void) {
+ return connTypeRegister(&CT_TLS);
+}
+
+#else /* USE_OPENSSL */
+
+int RedisRegisterConnectionTypeTLS(void) {
+ serverLog(LL_VERBOSE, "Connection type %s not builtin", CONN_TYPE_TLS);
+ return C_ERR;
+}
+
+#endif
+
+#if BUILD_TLS_MODULE == 2 /* BUILD_MODULE */
+
+#include "release.h"
+
+int RedisModule_OnLoad(void *ctx, RedisModuleString **argv, int argc) {
+ UNUSED(argv);
+ UNUSED(argc);
+
+ /* Connection modules must be part of the same build as redis. */
+ if (strcmp(REDIS_BUILD_ID_RAW, redisBuildIdRaw())) {
+ serverLog(LL_NOTICE, "Connection type %s was not built together with the redis-server used.", CONN_TYPE_TLS);
+ return REDISMODULE_ERR;
+ }
+
+ if (RedisModule_Init(ctx,"tls",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ /* Connection modules is available only bootup. */
+ if ((RedisModule_GetContextFlags(ctx) & REDISMODULE_CTX_FLAGS_SERVER_STARTUP) == 0) {
+ serverLog(LL_NOTICE, "Connection type %s can be loaded only during bootup", CONN_TYPE_TLS);
+ return REDISMODULE_ERR;
+ }
+
+ RedisModule_SetModuleOptions(ctx, REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD);
+
+ if(connTypeRegister(&CT_TLS) != C_OK)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
+
+int RedisModule_OnUnload(void *arg) {
+ UNUSED(arg);
+ serverLog(LL_NOTICE, "Connection type %s can not be unloaded", CONN_TYPE_TLS);
+ return REDISMODULE_ERR;
+}
+#endif
diff --git a/src/tracking.c b/src/tracking.c
new file mode 100644
index 0000000..5a9b114
--- /dev/null
+++ b/src/tracking.c
@@ -0,0 +1,660 @@
+/* tracking.c - Client side caching: keys tracking and invalidation
+ *
+ * Copyright (c) 2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+/* The tracking table is constituted by a radix tree of keys, each pointing
+ * to a radix tree of client IDs, used to track the clients that may have
+ * certain keys in their local, client side, cache.
+ *
+ * When a client enables tracking with "CLIENT TRACKING on", each key served to
+ * the client is remembered in the table mapping the keys to the client IDs.
+ * Later, when a key is modified, all the clients that may have local copy
+ * of such key will receive an invalidation message.
+ *
+ * Clients will normally take frequently requested objects in memory, removing
+ * them when invalidation messages are received. */
+rax *TrackingTable = NULL;
+rax *PrefixTable = NULL;
+uint64_t TrackingTableTotalItems = 0; /* Total number of IDs stored across
+ the whole tracking table. This gives
+ an hint about the total memory we
+ are using server side for CSC. */
+robj *TrackingChannelName;
+
+/* This is the structure that we have as value of the PrefixTable, and
+ * represents the list of keys modified, and the list of clients that need
+ * to be notified, for a given prefix. */
+typedef struct bcastState {
+ rax *keys; /* Keys modified in the current event loop cycle. */
+ rax *clients; /* Clients subscribed to the notification events for this
+ prefix. */
+} bcastState;
+
+/* Remove the tracking state from the client 'c'. Note that there is not much
+ * to do for us here, if not to decrement the counter of the clients in
+ * tracking mode, because we just store the ID of the client in the tracking
+ * table, so we'll remove the ID reference in a lazy way. Otherwise when a
+ * client with many entries in the table is removed, it would cost a lot of
+ * time to do the cleanup. */
+void disableTracking(client *c) {
+ /* If this client is in broadcasting mode, we need to unsubscribe it
+ * from all the prefixes it is registered to. */
+ if (c->flags & CLIENT_TRACKING_BCAST) {
+ raxIterator ri;
+ raxStart(&ri,c->client_tracking_prefixes);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ bcastState *bs = raxFind(PrefixTable,ri.key,ri.key_len);
+ serverAssert(bs != raxNotFound);
+ raxRemove(bs->clients,(unsigned char*)&c,sizeof(c),NULL);
+ /* Was it the last client? Remove the prefix from the
+ * table. */
+ if (raxSize(bs->clients) == 0) {
+ raxFree(bs->clients);
+ raxFree(bs->keys);
+ zfree(bs);
+ raxRemove(PrefixTable,ri.key,ri.key_len,NULL);
+ }
+ }
+ raxStop(&ri);
+ raxFree(c->client_tracking_prefixes);
+ c->client_tracking_prefixes = NULL;
+ }
+
+ /* Clear flags and adjust the count. */
+ if (c->flags & CLIENT_TRACKING) {
+ server.tracking_clients--;
+ c->flags &= ~(CLIENT_TRACKING|CLIENT_TRACKING_BROKEN_REDIR|
+ CLIENT_TRACKING_BCAST|CLIENT_TRACKING_OPTIN|
+ CLIENT_TRACKING_OPTOUT|CLIENT_TRACKING_CACHING|
+ CLIENT_TRACKING_NOLOOP);
+ }
+}
+
+static int stringCheckPrefix(unsigned char *s1, size_t s1_len, unsigned char *s2, size_t s2_len) {
+ size_t min_length = s1_len < s2_len ? s1_len : s2_len;
+ return memcmp(s1,s2,min_length) == 0;
+}
+
+/* Check if any of the provided prefixes collide with one another or
+ * with an existing prefix for the client. A collision is defined as two
+ * prefixes that will emit an invalidation for the same key. If no prefix
+ * collision is found, 1 is return, otherwise 0 is returned and the client
+ * has an error emitted describing the error. */
+int checkPrefixCollisionsOrReply(client *c, robj **prefixes, size_t numprefix) {
+ for (size_t i = 0; i < numprefix; i++) {
+ /* Check input list has no overlap with existing prefixes. */
+ if (c->client_tracking_prefixes) {
+ raxIterator ri;
+ raxStart(&ri,c->client_tracking_prefixes);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ if (stringCheckPrefix(ri.key,ri.key_len,
+ prefixes[i]->ptr,sdslen(prefixes[i]->ptr)))
+ {
+ sds collision = sdsnewlen(ri.key,ri.key_len);
+ addReplyErrorFormat(c,
+ "Prefix '%s' overlaps with an existing prefix '%s'. "
+ "Prefixes for a single client must not overlap.",
+ (unsigned char *)prefixes[i]->ptr,
+ (unsigned char *)collision);
+ sdsfree(collision);
+ raxStop(&ri);
+ return 0;
+ }
+ }
+ raxStop(&ri);
+ }
+ /* Check input has no overlap with itself. */
+ for (size_t j = i + 1; j < numprefix; j++) {
+ if (stringCheckPrefix(prefixes[i]->ptr,sdslen(prefixes[i]->ptr),
+ prefixes[j]->ptr,sdslen(prefixes[j]->ptr)))
+ {
+ addReplyErrorFormat(c,
+ "Prefix '%s' overlaps with another provided prefix '%s'. "
+ "Prefixes for a single client must not overlap.",
+ (unsigned char *)prefixes[i]->ptr,
+ (unsigned char *)prefixes[j]->ptr);
+ return i;
+ }
+ }
+ }
+ return 1;
+}
+
+/* Set the client 'c' to track the prefix 'prefix'. If the client 'c' is
+ * already registered for the specified prefix, no operation is performed. */
+void enableBcastTrackingForPrefix(client *c, char *prefix, size_t plen) {
+ bcastState *bs = raxFind(PrefixTable,(unsigned char*)prefix,plen);
+ /* If this is the first client subscribing to such prefix, create
+ * the prefix in the table. */
+ if (bs == raxNotFound) {
+ bs = zmalloc(sizeof(*bs));
+ bs->keys = raxNew();
+ bs->clients = raxNew();
+ raxInsert(PrefixTable,(unsigned char*)prefix,plen,bs,NULL);
+ }
+ if (raxTryInsert(bs->clients,(unsigned char*)&c,sizeof(c),NULL,NULL)) {
+ if (c->client_tracking_prefixes == NULL)
+ c->client_tracking_prefixes = raxNew();
+ raxInsert(c->client_tracking_prefixes,
+ (unsigned char*)prefix,plen,NULL,NULL);
+ }
+}
+
+/* Enable the tracking state for the client 'c', and as a side effect allocates
+ * the tracking table if needed. If the 'redirect_to' argument is non zero, the
+ * invalidation messages for this client will be sent to the client ID
+ * specified by the 'redirect_to' argument. Note that if such client will
+ * eventually get freed, we'll send a message to the original client to
+ * inform it of the condition. Multiple clients can redirect the invalidation
+ * messages to the same client ID. */
+void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **prefix, size_t numprefix) {
+ if (!(c->flags & CLIENT_TRACKING)) server.tracking_clients++;
+ c->flags |= CLIENT_TRACKING;
+ c->flags &= ~(CLIENT_TRACKING_BROKEN_REDIR|CLIENT_TRACKING_BCAST|
+ CLIENT_TRACKING_OPTIN|CLIENT_TRACKING_OPTOUT|
+ CLIENT_TRACKING_NOLOOP);
+ c->client_tracking_redirection = redirect_to;
+
+ /* This may be the first client we ever enable. Create the tracking
+ * table if it does not exist. */
+ if (TrackingTable == NULL) {
+ TrackingTable = raxNew();
+ PrefixTable = raxNew();
+ TrackingChannelName = createStringObject("__redis__:invalidate",20);
+ }
+
+ /* For broadcasting, set the list of prefixes in the client. */
+ if (options & CLIENT_TRACKING_BCAST) {
+ c->flags |= CLIENT_TRACKING_BCAST;
+ if (numprefix == 0) enableBcastTrackingForPrefix(c,"",0);
+ for (size_t j = 0; j < numprefix; j++) {
+ sds sdsprefix = prefix[j]->ptr;
+ enableBcastTrackingForPrefix(c,sdsprefix,sdslen(sdsprefix));
+ }
+ }
+
+ /* Set the remaining flags that don't need any special handling. */
+ c->flags |= options & (CLIENT_TRACKING_OPTIN|CLIENT_TRACKING_OPTOUT|
+ CLIENT_TRACKING_NOLOOP);
+}
+
+/* This function is called after the execution of a readonly command in the
+ * case the client 'c' has keys tracking enabled and the tracking is not
+ * in BCAST mode. It will populate the tracking invalidation table according
+ * to the keys the user fetched, so that Redis will know what are the clients
+ * that should receive an invalidation message with certain groups of keys
+ * are modified. */
+void trackingRememberKeys(client *tracking, client *executing) {
+ /* Return if we are in optin/out mode and the right CACHING command
+ * was/wasn't given in order to modify the default behavior. */
+ uint64_t optin = tracking->flags & CLIENT_TRACKING_OPTIN;
+ uint64_t optout = tracking->flags & CLIENT_TRACKING_OPTOUT;
+ uint64_t caching_given = tracking->flags & CLIENT_TRACKING_CACHING;
+ if ((optin && !caching_given) || (optout && caching_given)) return;
+
+ getKeysResult result = GETKEYS_RESULT_INIT;
+ int numkeys = getKeysFromCommand(executing->cmd,executing->argv,executing->argc,&result);
+ if (!numkeys) {
+ getKeysFreeResult(&result);
+ return;
+ }
+ /* Shard channels are treated as special keys for client
+ * library to rely on `COMMAND` command to discover the node
+ * to connect to. These channels doesn't need to be tracked. */
+ if (executing->cmd->flags & CMD_PUBSUB) {
+ return;
+ }
+
+ keyReference *keys = result.keys;
+
+ for(int j = 0; j < numkeys; j++) {
+ int idx = keys[j].pos;
+ sds sdskey = executing->argv[idx]->ptr;
+ rax *ids = raxFind(TrackingTable,(unsigned char*)sdskey,sdslen(sdskey));
+ if (ids == raxNotFound) {
+ ids = raxNew();
+ int inserted = raxTryInsert(TrackingTable,(unsigned char*)sdskey,
+ sdslen(sdskey),ids, NULL);
+ serverAssert(inserted == 1);
+ }
+ if (raxTryInsert(ids,(unsigned char*)&tracking->id,sizeof(tracking->id),NULL,NULL))
+ TrackingTableTotalItems++;
+ }
+ getKeysFreeResult(&result);
+}
+
+/* Given a key name, this function sends an invalidation message in the
+ * proper channel (depending on RESP version: PubSub or Push message) and
+ * to the proper client (in case of redirection), in the context of the
+ * client 'c' with tracking enabled.
+ *
+ * In case the 'proto' argument is non zero, the function will assume that
+ * 'keyname' points to a buffer of 'keylen' bytes already expressed in the
+ * form of Redis RESP protocol. This is used for:
+ * - In BCAST mode, to send an array of invalidated keys to all
+ * applicable clients
+ * - Following a flush command, to send a single RESP NULL to indicate
+ * that all keys are now invalid. */
+void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
+ uint64_t old_flags = c->flags;
+ c->flags |= CLIENT_PUSHING;
+
+ int using_redirection = 0;
+ if (c->client_tracking_redirection) {
+ client *redir = lookupClientByID(c->client_tracking_redirection);
+ if (!redir) {
+ c->flags |= CLIENT_TRACKING_BROKEN_REDIR;
+ /* We need to signal to the original connection that we
+ * are unable to send invalidation messages to the redirected
+ * connection, because the client no longer exist. */
+ if (c->resp > 2) {
+ addReplyPushLen(c,2);
+ addReplyBulkCBuffer(c,"tracking-redir-broken",21);
+ addReplyLongLong(c,c->client_tracking_redirection);
+ }
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+ return;
+ }
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+ c = redir;
+ using_redirection = 1;
+ old_flags = c->flags;
+ c->flags |= CLIENT_PUSHING;
+ }
+
+ /* Only send such info for clients in RESP version 3 or more. However
+ * if redirection is active, and the connection we redirect to is
+ * in Pub/Sub mode, we can support the feature with RESP 2 as well,
+ * by sending Pub/Sub messages in the __redis__:invalidate channel. */
+ if (c->resp > 2) {
+ addReplyPushLen(c,2);
+ addReplyBulkCBuffer(c,"invalidate",10);
+ } else if (using_redirection && c->flags & CLIENT_PUBSUB) {
+ /* We use a static object to speedup things, however we assume
+ * that addReplyPubsubMessage() will not take a reference. */
+ addReplyPubsubMessage(c,TrackingChannelName,NULL,shared.messagebulk);
+ } else {
+ /* If are here, the client is not using RESP3, nor is
+ * redirecting to another client. We can't send anything to
+ * it since RESP2 does not support push messages in the same
+ * connection. */
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+ return;
+ }
+
+ /* Send the "value" part, which is the array of keys. */
+ if (proto) {
+ addReplyProto(c,keyname,keylen);
+ } else {
+ addReplyArrayLen(c,1);
+ addReplyBulkCBuffer(c,keyname,keylen);
+ }
+ updateClientMemUsageAndBucket(c);
+ if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+}
+
+/* This function is called when a key is modified in Redis and in the case
+ * we have at least one client with the BCAST mode enabled.
+ * Its goal is to set the key in the right broadcast state if the key
+ * matches one or more prefixes in the prefix table. Later when we
+ * return to the event loop, we'll send invalidation messages to the
+ * clients subscribed to each prefix. */
+void trackingRememberKeyToBroadcast(client *c, char *keyname, size_t keylen) {
+ raxIterator ri;
+ raxStart(&ri,PrefixTable);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ if (ri.key_len > keylen) continue;
+ if (ri.key_len != 0 && memcmp(ri.key,keyname,ri.key_len) != 0)
+ continue;
+ bcastState *bs = ri.data;
+ /* We insert the client pointer as associated value in the radix
+ * tree. This way we know who was the client that did the last
+ * change to the key, and can avoid sending the notification in the
+ * case the client is in NOLOOP mode. */
+ raxInsert(bs->keys,(unsigned char*)keyname,keylen,c,NULL);
+ }
+ raxStop(&ri);
+}
+
+/* This function is called from signalModifiedKey() or other places in Redis
+ * when a key changes value. In the context of keys tracking, our task here is
+ * to send a notification to every client that may have keys about such caching
+ * slot.
+ *
+ * Note that 'c' may be NULL in case the operation was performed outside the
+ * context of a client modifying the database (for instance when we delete a
+ * key because of expire).
+ *
+ * The last argument 'bcast' tells the function if it should also schedule
+ * the key for broadcasting to clients in BCAST mode. This is the case when
+ * the function is called from the Redis core once a key is modified, however
+ * we also call the function in order to evict keys in the key table in case
+ * of memory pressure: in that case the key didn't really change, so we want
+ * just to notify the clients that are in the table for this key, that would
+ * otherwise miss the fact we are no longer tracking the key for them. */
+void trackingInvalidateKey(client *c, robj *keyobj, int bcast) {
+ if (TrackingTable == NULL) return;
+
+ unsigned char *key = (unsigned char*)keyobj->ptr;
+ size_t keylen = sdslen(keyobj->ptr);
+
+ if (bcast && raxSize(PrefixTable) > 0)
+ trackingRememberKeyToBroadcast(c,(char *)key,keylen);
+
+ rax *ids = raxFind(TrackingTable,key,keylen);
+ if (ids == raxNotFound) return;
+
+ raxIterator ri;
+ raxStart(&ri,ids);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ uint64_t id;
+ memcpy(&id,ri.key,sizeof(id));
+ client *target = lookupClientByID(id);
+ /* Note that if the client is in BCAST mode, we don't want to
+ * send invalidation messages that were pending in the case
+ * previously the client was not in BCAST mode. This can happen if
+ * TRACKING is enabled normally, and then the client switches to
+ * BCAST mode. */
+ if (target == NULL ||
+ !(target->flags & CLIENT_TRACKING)||
+ target->flags & CLIENT_TRACKING_BCAST)
+ {
+ continue;
+ }
+
+ /* If the client enabled the NOLOOP mode, don't send notifications
+ * about keys changed by the client itself. */
+ if (target->flags & CLIENT_TRACKING_NOLOOP &&
+ target == server.current_client)
+ {
+ continue;
+ }
+
+ /* If target is current client and it's executing a command, we need schedule key invalidation.
+ * As the invalidation messages may be interleaved with command
+ * response and should after command response. */
+ if (target == server.current_client && (server.current_client->flags & CLIENT_EXECUTING_COMMAND)) {
+ incrRefCount(keyobj);
+ listAddNodeTail(server.tracking_pending_keys, keyobj);
+ } else {
+ sendTrackingMessage(target,(char *)keyobj->ptr,sdslen(keyobj->ptr),0);
+ }
+ }
+ raxStop(&ri);
+
+ /* Free the tracking table: we'll create the radix tree and populate it
+ * again if more keys will be modified in this caching slot. */
+ TrackingTableTotalItems -= raxSize(ids);
+ raxFree(ids);
+ raxRemove(TrackingTable,(unsigned char*)key,keylen,NULL);
+}
+
+void trackingHandlePendingKeyInvalidations(void) {
+ if (!listLength(server.tracking_pending_keys)) return;
+
+ /* Flush pending invalidation messages only when we are not in nested call.
+ * So the messages are not interleaved with transaction response. */
+ if (server.execution_nesting) return;
+
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.tracking_pending_keys,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ robj *key = listNodeValue(ln);
+ /* current_client maybe freed, so we need to send invalidation
+ * message only when current_client is still alive */
+ if (server.current_client != NULL) {
+ if (key != NULL) {
+ sendTrackingMessage(server.current_client,(char *)key->ptr,sdslen(key->ptr),0);
+ } else {
+ sendTrackingMessage(server.current_client,shared.null[server.current_client->resp]->ptr,
+ sdslen(shared.null[server.current_client->resp]->ptr),1);
+ }
+ }
+ if (key != NULL) decrRefCount(key);
+ }
+ listEmpty(server.tracking_pending_keys);
+}
+
+/* This function is called when one or all the Redis databases are
+ * flushed. Caching keys are not specific for each DB but are global:
+ * currently what we do is send a special notification to clients with
+ * tracking enabled, sending a RESP NULL, which means, "all the keys",
+ * in order to avoid flooding clients with many invalidation messages
+ * for all the keys they may hold.
+ */
+void freeTrackingRadixTreeCallback(void *rt) {
+ raxFree(rt);
+}
+
+void freeTrackingRadixTree(rax *rt) {
+ raxFreeWithCallback(rt,freeTrackingRadixTreeCallback);
+}
+
+/* A RESP NULL is sent to indicate that all keys are invalid */
+void trackingInvalidateKeysOnFlush(int async) {
+ if (server.tracking_clients) {
+ listNode *ln;
+ listIter li;
+ listRewind(server.clients,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ client *c = listNodeValue(ln);
+ if (c->flags & CLIENT_TRACKING) {
+ if (c == server.current_client) {
+ /* We use a special NULL to indicate that we should send null */
+ listAddNodeTail(server.tracking_pending_keys,NULL);
+ } else {
+ sendTrackingMessage(c,shared.null[c->resp]->ptr,sdslen(shared.null[c->resp]->ptr),1);
+ }
+ }
+ }
+ }
+
+ /* In case of FLUSHALL, reclaim all the memory used by tracking. */
+ if (TrackingTable) {
+ if (async) {
+ freeTrackingRadixTreeAsync(TrackingTable);
+ } else {
+ freeTrackingRadixTree(TrackingTable);
+ }
+ TrackingTable = raxNew();
+ TrackingTableTotalItems = 0;
+ }
+}
+
+/* Tracking forces Redis to remember information about which client may have
+ * certain keys. In workloads where there are a lot of reads, but keys are
+ * hardly modified, the amount of information we have to remember server side
+ * could be a lot, with the number of keys being totally not bound.
+ *
+ * So Redis allows the user to configure a maximum number of keys for the
+ * invalidation table. This function makes sure that we don't go over the
+ * specified fill rate: if we are over, we can just evict information about
+ * a random key, and send invalidation messages to clients like if the key was
+ * modified. */
+void trackingLimitUsedSlots(void) {
+ static unsigned int timeout_counter = 0;
+ if (TrackingTable == NULL) return;
+ if (server.tracking_table_max_keys == 0) return; /* No limits set. */
+ size_t max_keys = server.tracking_table_max_keys;
+ if (raxSize(TrackingTable) <= max_keys) {
+ timeout_counter = 0;
+ return; /* Limit not reached. */
+ }
+
+ /* We have to invalidate a few keys to reach the limit again. The effort
+ * we do here is proportional to the number of times we entered this
+ * function and found that we are still over the limit. */
+ int effort = 100 * (timeout_counter+1);
+
+ /* We just remove one key after another by using a random walk. */
+ raxIterator ri;
+ raxStart(&ri,TrackingTable);
+ while(effort > 0) {
+ effort--;
+ raxSeek(&ri,"^",NULL,0);
+ raxRandomWalk(&ri,0);
+ if (raxEOF(&ri)) break;
+ robj *keyobj = createStringObject((char*)ri.key,ri.key_len);
+ trackingInvalidateKey(NULL,keyobj,0);
+ decrRefCount(keyobj);
+ if (raxSize(TrackingTable) <= max_keys) {
+ timeout_counter = 0;
+ raxStop(&ri);
+ return; /* Return ASAP: we are again under the limit. */
+ }
+ }
+
+ /* If we reach this point, we were not able to go under the configured
+ * limit using the maximum effort we had for this run. */
+ raxStop(&ri);
+ timeout_counter++;
+}
+
+/* Generate Redis protocol for an array containing all the key names
+ * in the 'keys' radix tree. If the client is not NULL, the list will not
+ * include keys that were modified the last time by this client, in order
+ * to implement the NOLOOP option.
+ *
+ * If the resulting array would be empty, NULL is returned instead. */
+sds trackingBuildBroadcastReply(client *c, rax *keys) {
+ raxIterator ri;
+ uint64_t count;
+
+ if (c == NULL) {
+ count = raxSize(keys);
+ } else {
+ count = 0;
+ raxStart(&ri,keys);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ if (ri.data != c) count++;
+ }
+ raxStop(&ri);
+
+ if (count == 0) return NULL;
+ }
+
+ /* Create the array reply with the list of keys once, then send
+ * it to all the clients subscribed to this prefix. */
+ char buf[32];
+ size_t len = ll2string(buf,sizeof(buf),count);
+ sds proto = sdsempty();
+ proto = sdsMakeRoomFor(proto,count*15);
+ proto = sdscatlen(proto,"*",1);
+ proto = sdscatlen(proto,buf,len);
+ proto = sdscatlen(proto,"\r\n",2);
+ raxStart(&ri,keys);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ if (c && ri.data == c) continue;
+ len = ll2string(buf,sizeof(buf),ri.key_len);
+ proto = sdscatlen(proto,"$",1);
+ proto = sdscatlen(proto,buf,len);
+ proto = sdscatlen(proto,"\r\n",2);
+ proto = sdscatlen(proto,ri.key,ri.key_len);
+ proto = sdscatlen(proto,"\r\n",2);
+ }
+ raxStop(&ri);
+ return proto;
+}
+
+/* This function will run the prefixes of clients in BCAST mode and
+ * keys that were modified about each prefix, and will send the
+ * notifications to each client in each prefix. */
+void trackingBroadcastInvalidationMessages(void) {
+ raxIterator ri, ri2;
+
+ /* Return ASAP if there is nothing to do here. */
+ if (TrackingTable == NULL || !server.tracking_clients) return;
+
+ raxStart(&ri,PrefixTable);
+ raxSeek(&ri,"^",NULL,0);
+
+ /* For each prefix... */
+ while(raxNext(&ri)) {
+ bcastState *bs = ri.data;
+
+ if (raxSize(bs->keys)) {
+ /* Generate the common protocol for all the clients that are
+ * not using the NOLOOP option. */
+ sds proto = trackingBuildBroadcastReply(NULL,bs->keys);
+
+ /* Send this array of keys to every client in the list. */
+ raxStart(&ri2,bs->clients);
+ raxSeek(&ri2,"^",NULL,0);
+ while(raxNext(&ri2)) {
+ client *c;
+ memcpy(&c,ri2.key,sizeof(c));
+ if (c->flags & CLIENT_TRACKING_NOLOOP) {
+ /* This client may have certain keys excluded. */
+ sds adhoc = trackingBuildBroadcastReply(c,bs->keys);
+ if (adhoc) {
+ sendTrackingMessage(c,adhoc,sdslen(adhoc),1);
+ sdsfree(adhoc);
+ }
+ } else {
+ sendTrackingMessage(c,proto,sdslen(proto),1);
+ }
+ }
+ raxStop(&ri2);
+
+ /* Clean up: we can remove everything from this state, because we
+ * want to only track the new keys that will be accumulated starting
+ * from now. */
+ sdsfree(proto);
+ }
+ raxFree(bs->keys);
+ bs->keys = raxNew();
+ }
+ raxStop(&ri);
+}
+
+/* This is just used in order to access the amount of used slots in the
+ * tracking table. */
+uint64_t trackingGetTotalItems(void) {
+ return TrackingTableTotalItems;
+}
+
+uint64_t trackingGetTotalKeys(void) {
+ if (TrackingTable == NULL) return 0;
+ return raxSize(TrackingTable);
+}
+
+uint64_t trackingGetTotalPrefixes(void) {
+ if (PrefixTable == NULL) return 0;
+ return raxSize(PrefixTable);
+}
diff --git a/src/unix.c b/src/unix.c
new file mode 100644
index 0000000..bd146d0
--- /dev/null
+++ b/src/unix.c
@@ -0,0 +1,207 @@
+/* ==========================================================================
+ * unix.c - unix socket connection implementation
+ * --------------------------------------------------------------------------
+ * Copyright (C) 2022 zhenwei pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+ * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * ==========================================================================
+ */
+
+#include "server.h"
+#include "connection.h"
+
+static ConnectionType CT_Unix;
+
+static const char *connUnixGetType(connection *conn) {
+ UNUSED(conn);
+
+ return CONN_TYPE_UNIX;
+}
+
+static void connUnixEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask) {
+ connectionTypeTcp()->ae_handler(el, fd, clientData, mask);
+}
+
+static int connUnixAddr(connection *conn, char *ip, size_t ip_len, int *port, int remote) {
+ return connectionTypeTcp()->addr(conn, ip, ip_len, port, remote);
+}
+
+static int connUnixIsLocal(connection *conn) {
+ UNUSED(conn);
+
+ return 1; /* Unix socket is always local connection */
+}
+
+static int connUnixListen(connListener *listener) {
+ int fd;
+ mode_t *perm = (mode_t *)listener->priv;
+
+ if (listener->bindaddr_count == 0)
+ return C_OK;
+
+ /* currently listener->bindaddr_count is always 1, we still use a loop here in case Redis supports multi Unix socket in the future */
+ for (int j = 0; j < listener->bindaddr_count; j++) {
+ char *addr = listener->bindaddr[j];
+
+ unlink(addr); /* don't care if this fails */
+ fd = anetUnixServer(server.neterr, addr, *perm, server.tcp_backlog);
+ if (fd == ANET_ERR) {
+ serverLog(LL_WARNING, "Failed opening Unix socket: %s", server.neterr);
+ exit(1);
+ }
+ anetNonBlock(NULL, fd);
+ anetCloexec(fd);
+ listener->fd[listener->count++] = fd;
+ }
+
+ return C_OK;
+}
+
+static connection *connCreateUnix(void) {
+ connection *conn = zcalloc(sizeof(connection));
+ conn->type = &CT_Unix;
+ conn->fd = -1;
+ conn->iovcnt = IOV_MAX;
+
+ return conn;
+}
+
+static connection *connCreateAcceptedUnix(int fd, void *priv) {
+ UNUSED(priv);
+ connection *conn = connCreateUnix();
+ conn->fd = fd;
+ conn->state = CONN_STATE_ACCEPTING;
+ return conn;
+}
+
+static void connUnixAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
+ int cfd, max = MAX_ACCEPTS_PER_CALL;
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
+
+ while(max--) {
+ cfd = anetUnixAccept(server.neterr, fd);
+ if (cfd == ANET_ERR) {
+ if (errno != EWOULDBLOCK)
+ serverLog(LL_WARNING,
+ "Accepting client connection: %s", server.neterr);
+ return;
+ }
+ serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket);
+ acceptCommonHandler(connCreateAcceptedUnix(cfd, NULL),CLIENT_UNIX_SOCKET,NULL);
+ }
+}
+
+static void connUnixShutdown(connection *conn) {
+ connectionTypeTcp()->shutdown(conn);
+}
+
+static void connUnixClose(connection *conn) {
+ connectionTypeTcp()->close(conn);
+}
+
+static int connUnixAccept(connection *conn, ConnectionCallbackFunc accept_handler) {
+ return connectionTypeTcp()->accept(conn, accept_handler);
+}
+
+static int connUnixWrite(connection *conn, const void *data, size_t data_len) {
+ return connectionTypeTcp()->write(conn, data, data_len);
+}
+
+static int connUnixWritev(connection *conn, const struct iovec *iov, int iovcnt) {
+ return connectionTypeTcp()->writev(conn, iov, iovcnt);
+}
+
+static int connUnixRead(connection *conn, void *buf, size_t buf_len) {
+ return connectionTypeTcp()->read(conn, buf, buf_len);
+}
+
+static int connUnixSetWriteHandler(connection *conn, ConnectionCallbackFunc func, int barrier) {
+ return connectionTypeTcp()->set_write_handler(conn, func, barrier);
+}
+
+static int connUnixSetReadHandler(connection *conn, ConnectionCallbackFunc func) {
+ return connectionTypeTcp()->set_read_handler(conn, func);
+}
+
+static const char *connUnixGetLastError(connection *conn) {
+ return strerror(conn->last_errno);
+}
+
+static ssize_t connUnixSyncWrite(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return syncWrite(conn->fd, ptr, size, timeout);
+}
+
+static ssize_t connUnixSyncRead(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return syncRead(conn->fd, ptr, size, timeout);
+}
+
+static ssize_t connUnixSyncReadLine(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return syncReadLine(conn->fd, ptr, size, timeout);
+}
+
+static ConnectionType CT_Unix = {
+ /* connection type */
+ .get_type = connUnixGetType,
+
+ /* connection type initialize & finalize & configure */
+ .init = NULL,
+ .cleanup = NULL,
+ .configure = NULL,
+
+ /* ae & accept & listen & error & address handler */
+ .ae_handler = connUnixEventHandler,
+ .accept_handler = connUnixAcceptHandler,
+ .addr = connUnixAddr,
+ .is_local = connUnixIsLocal,
+ .listen = connUnixListen,
+
+ /* create/shutdown/close connection */
+ .conn_create = connCreateUnix,
+ .conn_create_accepted = connCreateAcceptedUnix,
+ .shutdown = connUnixShutdown,
+ .close = connUnixClose,
+
+ /* connect & accept */
+ .connect = NULL,
+ .blocking_connect = NULL,
+ .accept = connUnixAccept,
+
+ /* IO */
+ .write = connUnixWrite,
+ .writev = connUnixWritev,
+ .read = connUnixRead,
+ .set_write_handler = connUnixSetWriteHandler,
+ .set_read_handler = connUnixSetReadHandler,
+ .get_last_error = connUnixGetLastError,
+ .sync_write = connUnixSyncWrite,
+ .sync_read = connUnixSyncRead,
+ .sync_readline = connUnixSyncReadLine,
+
+ /* pending data */
+ .has_pending_data = NULL,
+ .process_pending_data = NULL,
+};
+
+int RedisRegisterConnectionTypeUnix(void)
+{
+ return connTypeRegister(&CT_Unix);
+}
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 0000000..26d92b9
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,1431 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fmacros.h"
+#include "fpconv_dtoa.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <math.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <float.h>
+#include <stdint.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+
+#include "util.h"
+#include "sha256.h"
+#include "config.h"
+
+#define UNUSED(x) ((void)(x))
+
+/* Glob-style pattern matching. */
+static int stringmatchlen_impl(const char *pattern, int patternLen,
+ const char *string, int stringLen, int nocase, int *skipLongerMatches)
+{
+ while(patternLen && stringLen) {
+ switch(pattern[0]) {
+ case '*':
+ while (patternLen && pattern[1] == '*') {
+ pattern++;
+ patternLen--;
+ }
+ if (patternLen == 1)
+ return 1; /* match */
+ while(stringLen) {
+ if (stringmatchlen_impl(pattern+1, patternLen-1,
+ string, stringLen, nocase, skipLongerMatches))
+ return 1; /* match */
+ if (*skipLongerMatches)
+ return 0; /* no match */
+ string++;
+ stringLen--;
+ }
+ /* There was no match for the rest of the pattern starting
+ * from anywhere in the rest of the string. If there were
+ * any '*' earlier in the pattern, we can terminate the
+ * search early without trying to match them to longer
+ * substrings. This is because a longer match for the
+ * earlier part of the pattern would require the rest of the
+ * pattern to match starting later in the string, and we
+ * have just determined that there is no match for the rest
+ * of the pattern starting from anywhere in the current
+ * string. */
+ *skipLongerMatches = 1;
+ return 0; /* no match */
+ break;
+ case '?':
+ string++;
+ stringLen--;
+ break;
+ case '[':
+ {
+ int not, match;
+
+ pattern++;
+ patternLen--;
+ not = pattern[0] == '^';
+ if (not) {
+ pattern++;
+ patternLen--;
+ }
+ match = 0;
+ while(1) {
+ if (pattern[0] == '\\' && patternLen >= 2) {
+ pattern++;
+ patternLen--;
+ if (pattern[0] == string[0])
+ match = 1;
+ } else if (pattern[0] == ']') {
+ break;
+ } else if (patternLen == 0) {
+ pattern--;
+ patternLen++;
+ break;
+ } else if (patternLen >= 3 && pattern[1] == '-') {
+ int start = pattern[0];
+ int end = pattern[2];
+ int c = string[0];
+ if (start > end) {
+ int t = start;
+ start = end;
+ end = t;
+ }
+ if (nocase) {
+ start = tolower(start);
+ end = tolower(end);
+ c = tolower(c);
+ }
+ pattern += 2;
+ patternLen -= 2;
+ if (c >= start && c <= end)
+ match = 1;
+ } else {
+ if (!nocase) {
+ if (pattern[0] == string[0])
+ match = 1;
+ } else {
+ if (tolower((int)pattern[0]) == tolower((int)string[0]))
+ match = 1;
+ }
+ }
+ pattern++;
+ patternLen--;
+ }
+ if (not)
+ match = !match;
+ if (!match)
+ return 0; /* no match */
+ string++;
+ stringLen--;
+ break;
+ }
+ case '\\':
+ if (patternLen >= 2) {
+ pattern++;
+ patternLen--;
+ }
+ /* fall through */
+ default:
+ if (!nocase) {
+ if (pattern[0] != string[0])
+ return 0; /* no match */
+ } else {
+ if (tolower((int)pattern[0]) != tolower((int)string[0]))
+ return 0; /* no match */
+ }
+ string++;
+ stringLen--;
+ break;
+ }
+ pattern++;
+ patternLen--;
+ if (stringLen == 0) {
+ while(*pattern == '*') {
+ pattern++;
+ patternLen--;
+ }
+ break;
+ }
+ }
+ if (patternLen == 0 && stringLen == 0)
+ return 1;
+ return 0;
+}
+
+int stringmatchlen(const char *pattern, int patternLen,
+ const char *string, int stringLen, int nocase) {
+ int skipLongerMatches = 0;
+ return stringmatchlen_impl(pattern,patternLen,string,stringLen,nocase,&skipLongerMatches);
+}
+
+int stringmatch(const char *pattern, const char *string, int nocase) {
+ return stringmatchlen(pattern,strlen(pattern),string,strlen(string),nocase);
+}
+
+/* Fuzz stringmatchlen() trying to crash it with bad input. */
+int stringmatchlen_fuzz_test(void) {
+ char str[32];
+ char pat[32];
+ int cycles = 10000000;
+ int total_matches = 0;
+ while(cycles--) {
+ int strlen = rand() % sizeof(str);
+ int patlen = rand() % sizeof(pat);
+ for (int j = 0; j < strlen; j++) str[j] = rand() % 128;
+ for (int j = 0; j < patlen; j++) pat[j] = rand() % 128;
+ total_matches += stringmatchlen(pat, patlen, str, strlen, 0);
+ }
+ return total_matches;
+}
+
+
+/* Convert a string representing an amount of memory into the number of
+ * bytes, so for instance memtoull("1Gb") will return 1073741824 that is
+ * (1024*1024*1024).
+ *
+ * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
+ * set to 0. On error the function return value is 0, regardless of the
+ * fact 'err' is NULL or not. */
+unsigned long long memtoull(const char *p, int *err) {
+ const char *u;
+ char buf[128];
+ long mul; /* unit multiplier */
+ unsigned long long val;
+ unsigned int digits;
+
+ if (err) *err = 0;
+
+ /* Search the first non digit character. */
+ u = p;
+ if (*u == '-') {
+ if (err) *err = 1;
+ return 0;
+ }
+ while(*u && isdigit(*u)) u++;
+ if (*u == '\0' || !strcasecmp(u,"b")) {
+ mul = 1;
+ } else if (!strcasecmp(u,"k")) {
+ mul = 1000;
+ } else if (!strcasecmp(u,"kb")) {
+ mul = 1024;
+ } else if (!strcasecmp(u,"m")) {
+ mul = 1000*1000;
+ } else if (!strcasecmp(u,"mb")) {
+ mul = 1024*1024;
+ } else if (!strcasecmp(u,"g")) {
+ mul = 1000L*1000*1000;
+ } else if (!strcasecmp(u,"gb")) {
+ mul = 1024L*1024*1024;
+ } else {
+ if (err) *err = 1;
+ return 0;
+ }
+
+ /* Copy the digits into a buffer, we'll use strtoll() to convert
+ * the digit (without the unit) into a number. */
+ digits = u-p;
+ if (digits >= sizeof(buf)) {
+ if (err) *err = 1;
+ return 0;
+ }
+ memcpy(buf,p,digits);
+ buf[digits] = '\0';
+
+ char *endptr;
+ errno = 0;
+ val = strtoull(buf,&endptr,10);
+ if ((val == 0 && errno == EINVAL) || *endptr != '\0') {
+ if (err) *err = 1;
+ return 0;
+ }
+ return val*mul;
+}
+
+/* Search a memory buffer for any set of bytes, like strpbrk().
+ * Returns pointer to first found char or NULL.
+ */
+const char *mempbrk(const char *s, size_t len, const char *chars, size_t charslen) {
+ for (size_t j = 0; j < len; j++) {
+ for (size_t n = 0; n < charslen; n++)
+ if (s[j] == chars[n]) return &s[j];
+ }
+
+ return NULL;
+}
+
+/* Modify the buffer replacing all occurrences of chars from the 'from'
+ * set with the corresponding char in the 'to' set. Always returns s.
+ */
+char *memmapchars(char *s, size_t len, const char *from, const char *to, size_t setlen) {
+ for (size_t j = 0; j < len; j++) {
+ for (size_t i = 0; i < setlen; i++) {
+ if (s[j] == from[i]) {
+ s[j] = to[i];
+ break;
+ }
+ }
+ }
+ return s;
+}
+
+/* Return the number of digits of 'v' when converted to string in radix 10.
+ * See ll2string() for more information. */
+uint32_t digits10(uint64_t v) {
+ if (v < 10) return 1;
+ if (v < 100) return 2;
+ if (v < 1000) return 3;
+ if (v < 1000000000000UL) {
+ if (v < 100000000UL) {
+ if (v < 1000000) {
+ if (v < 10000) return 4;
+ return 5 + (v >= 100000);
+ }
+ return 7 + (v >= 10000000UL);
+ }
+ if (v < 10000000000UL) {
+ return 9 + (v >= 1000000000UL);
+ }
+ return 11 + (v >= 100000000000UL);
+ }
+ return 12 + digits10(v / 1000000000000UL);
+}
+
+/* Like digits10() but for signed values. */
+uint32_t sdigits10(int64_t v) {
+ if (v < 0) {
+ /* Abs value of LLONG_MIN requires special handling. */
+ uint64_t uv = (v != LLONG_MIN) ?
+ (uint64_t)-v : ((uint64_t) LLONG_MAX)+1;
+ return digits10(uv)+1; /* +1 for the minus. */
+ } else {
+ return digits10(v);
+ }
+}
+
+/* Convert a long long into a string. Returns the number of
+ * characters needed to represent the number.
+ * If the buffer is not big enough to store the string, 0 is returned. */
+int ll2string(char *dst, size_t dstlen, long long svalue) {
+ unsigned long long value;
+ int negative = 0;
+
+ /* The ull2string function with 64bit unsigned integers for simplicity, so
+ * we convert the number here and remember if it is negative. */
+ if (svalue < 0) {
+ if (svalue != LLONG_MIN) {
+ value = -svalue;
+ } else {
+ value = ((unsigned long long) LLONG_MAX)+1;
+ }
+ if (dstlen < 2)
+ goto err;
+ negative = 1;
+ dst[0] = '-';
+ dst++;
+ dstlen--;
+ } else {
+ value = svalue;
+ }
+
+ /* Converts the unsigned long long value to string*/
+ int length = ull2string(dst, dstlen, value);
+ if (length == 0) return 0;
+ return length + negative;
+
+err:
+ /* force add Null termination */
+ if (dstlen > 0)
+ dst[0] = '\0';
+ return 0;
+}
+
+/* Convert a unsigned long long into a string. Returns the number of
+ * characters needed to represent the number.
+ * If the buffer is not big enough to store the string, 0 is returned.
+ *
+ * Based on the following article (that apparently does not provide a
+ * novel approach but only publicizes an already used technique):
+ *
+ * https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920 */
+int ull2string(char *dst, size_t dstlen, unsigned long long value) {
+ static const char digits[201] =
+ "0001020304050607080910111213141516171819"
+ "2021222324252627282930313233343536373839"
+ "4041424344454647484950515253545556575859"
+ "6061626364656667686970717273747576777879"
+ "8081828384858687888990919293949596979899";
+
+ /* Check length. */
+ uint32_t length = digits10(value);
+ if (length >= dstlen) goto err;;
+
+ /* Null term. */
+ uint32_t next = length - 1;
+ dst[next + 1] = '\0';
+ while (value >= 100) {
+ int const i = (value % 100) * 2;
+ value /= 100;
+ dst[next] = digits[i + 1];
+ dst[next - 1] = digits[i];
+ next -= 2;
+ }
+
+ /* Handle last 1-2 digits. */
+ if (value < 10) {
+ dst[next] = '0' + (uint32_t) value;
+ } else {
+ int i = (uint32_t) value * 2;
+ dst[next] = digits[i + 1];
+ dst[next - 1] = digits[i];
+ }
+ return length;
+err:
+ /* force add Null termination */
+ if (dstlen > 0)
+ dst[0] = '\0';
+ return 0;
+}
+
+/* Convert a string into a long long. Returns 1 if the string could be parsed
+ * into a (non-overflowing) long long, 0 otherwise. The value will be set to
+ * the parsed value when appropriate.
+ *
+ * Note that this function demands that the string strictly represents
+ * a long long: no spaces or other characters before or after the string
+ * representing the number are accepted, nor zeroes at the start if not
+ * for the string "0" representing the zero number.
+ *
+ * Because of its strictness, it is safe to use this function to check if
+ * you can convert a string into a long long, and obtain back the string
+ * from the number without any loss in the string representation. */
+int string2ll(const char *s, size_t slen, long long *value) {
+ const char *p = s;
+ size_t plen = 0;
+ int negative = 0;
+ unsigned long long v;
+
+ /* A string of zero length or excessive length is not a valid number. */
+ if (plen == slen || slen >= LONG_STR_SIZE)
+ return 0;
+
+ /* Special case: first and only digit is 0. */
+ if (slen == 1 && p[0] == '0') {
+ if (value != NULL) *value = 0;
+ return 1;
+ }
+
+ /* Handle negative numbers: just set a flag and continue like if it
+ * was a positive number. Later convert into negative. */
+ if (p[0] == '-') {
+ negative = 1;
+ p++; plen++;
+
+ /* Abort on only a negative sign. */
+ if (plen == slen)
+ return 0;
+ }
+
+ /* First digit should be 1-9, otherwise the string should just be 0. */
+ if (p[0] >= '1' && p[0] <= '9') {
+ v = p[0]-'0';
+ p++; plen++;
+ } else {
+ return 0;
+ }
+
+ /* Parse all the other digits, checking for overflow at every step. */
+ while (plen < slen && p[0] >= '0' && p[0] <= '9') {
+ if (v > (ULLONG_MAX / 10)) /* Overflow. */
+ return 0;
+ v *= 10;
+
+ if (v > (ULLONG_MAX - (p[0]-'0'))) /* Overflow. */
+ return 0;
+ v += p[0]-'0';
+
+ p++; plen++;
+ }
+
+ /* Return if not all bytes were used. */
+ if (plen < slen)
+ return 0;
+
+ /* Convert to negative if needed, and do the final overflow check when
+ * converting from unsigned long long to long long. */
+ if (negative) {
+ if (v > ((unsigned long long)(-(LLONG_MIN+1))+1)) /* Overflow. */
+ return 0;
+ if (value != NULL) *value = -v;
+ } else {
+ if (v > LLONG_MAX) /* Overflow. */
+ return 0;
+ if (value != NULL) *value = v;
+ }
+ return 1;
+}
+
+/* Helper function to convert a string to an unsigned long long value.
+ * The function attempts to use the faster string2ll() function inside
+ * Redis: if it fails, strtoull() is used instead. The function returns
+ * 1 if the conversion happened successfully or 0 if the number is
+ * invalid or out of range. */
+int string2ull(const char *s, unsigned long long *value) {
+ long long ll;
+ if (string2ll(s,strlen(s),&ll)) {
+ if (ll < 0) return 0; /* Negative values are out of range. */
+ *value = ll;
+ return 1;
+ }
+ errno = 0;
+ char *endptr = NULL;
+ *value = strtoull(s,&endptr,10);
+ if (errno == EINVAL || errno == ERANGE || !(*s != '\0' && *endptr == '\0'))
+ return 0; /* strtoull() failed. */
+ return 1; /* Conversion done! */
+}
+
+/* Convert a string into a long. Returns 1 if the string could be parsed into a
+ * (non-overflowing) long, 0 otherwise. The value will be set to the parsed
+ * value when appropriate. */
+int string2l(const char *s, size_t slen, long *lval) {
+ long long llval;
+
+ if (!string2ll(s,slen,&llval))
+ return 0;
+
+ if (llval < LONG_MIN || llval > LONG_MAX)
+ return 0;
+
+ *lval = (long)llval;
+ return 1;
+}
+
+/* Convert a string into a double. Returns 1 if the string could be parsed
+ * into a (non-overflowing) double, 0 otherwise. The value will be set to
+ * the parsed value when appropriate.
+ *
+ * Note that this function demands that the string strictly represents
+ * a double: no spaces or other characters before or after the string
+ * representing the number are accepted. */
+int string2ld(const char *s, size_t slen, long double *dp) {
+ char buf[MAX_LONG_DOUBLE_CHARS];
+ long double value;
+ char *eptr;
+
+ if (slen == 0 || slen >= sizeof(buf)) return 0;
+ memcpy(buf,s,slen);
+ buf[slen] = '\0';
+
+ errno = 0;
+ value = strtold(buf, &eptr);
+ if (isspace(buf[0]) || eptr[0] != '\0' ||
+ (size_t)(eptr-buf) != slen ||
+ (errno == ERANGE &&
+ (value == HUGE_VAL || value == -HUGE_VAL || fpclassify(value) == FP_ZERO)) ||
+ errno == EINVAL ||
+ isnan(value))
+ return 0;
+
+ if (dp) *dp = value;
+ return 1;
+}
+
+/* Convert a string into a double. Returns 1 if the string could be parsed
+ * into a (non-overflowing) double, 0 otherwise. The value will be set to
+ * the parsed value when appropriate.
+ *
+ * Note that this function demands that the string strictly represents
+ * a double: no spaces or other characters before or after the string
+ * representing the number are accepted. */
+int string2d(const char *s, size_t slen, double *dp) {
+ errno = 0;
+ char *eptr;
+ *dp = strtod(s, &eptr);
+ if (slen == 0 ||
+ isspace(((const char*)s)[0]) ||
+ (size_t)(eptr-(char*)s) != slen ||
+ (errno == ERANGE &&
+ (*dp == HUGE_VAL || *dp == -HUGE_VAL || fpclassify(*dp) == FP_ZERO)) ||
+ isnan(*dp))
+ return 0;
+ return 1;
+}
+
+/* Returns 1 if the double value can safely be represented in long long without
+ * precision loss, in which case the corresponding long long is stored in the out variable. */
+int double2ll(double d, long long *out) {
+#if (DBL_MANT_DIG >= 52) && (DBL_MANT_DIG <= 63) && (LLONG_MAX == 0x7fffffffffffffffLL)
+ /* Check if the float is in a safe range to be casted into a
+ * long long. We are assuming that long long is 64 bit here.
+ * Also we are assuming that there are no implementations around where
+ * double has precision < 52 bit.
+ *
+ * Under this assumptions we test if a double is inside a range
+ * where casting to long long is safe. Then using two castings we
+ * make sure the decimal part is zero. If all this is true we can use
+ * integer without precision loss.
+ *
+ * Note that numbers above 2^52 and below 2^63 use all the fraction bits as real part,
+ * and the exponent bits are positive, which means the "decimal" part must be 0.
+ * i.e. all double values in that range are representable as a long without precision loss,
+ * but not all long values in that range can be represented as a double.
+ * we only care about the first part here. */
+ if (d < (double)(-LLONG_MAX/2) || d > (double)(LLONG_MAX/2))
+ return 0;
+ long long ll = d;
+ if (ll == d) {
+ *out = ll;
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+/* Convert a double to a string representation. Returns the number of bytes
+ * required. The representation should always be parsable by strtod(3).
+ * This function does not support human-friendly formatting like ld2string
+ * does. It is intended mainly to be used inside t_zset.c when writing scores
+ * into a listpack representing a sorted set. */
+int d2string(char *buf, size_t len, double value) {
+ if (isnan(value)) {
+ /* Libc in some systems will format nan in a different way,
+ * like nan, -nan, NAN, nan(char-sequence).
+ * So we normalize it and create a single nan form in an explicit way. */
+ len = snprintf(buf,len,"nan");
+ } else if (isinf(value)) {
+ /* Libc in odd systems (Hi Solaris!) will format infinite in a
+ * different way, so better to handle it in an explicit way. */
+ if (value < 0)
+ len = snprintf(buf,len,"-inf");
+ else
+ len = snprintf(buf,len,"inf");
+ } else if (value == 0) {
+ /* See: http://en.wikipedia.org/wiki/Signed_zero, "Comparisons". */
+ if (1.0/value < 0)
+ len = snprintf(buf,len,"-0");
+ else
+ len = snprintf(buf,len,"0");
+ } else {
+ long long lvalue;
+ /* Integer printing function is much faster, check if we can safely use it. */
+ if (double2ll(value, &lvalue))
+ len = ll2string(buf,len,lvalue);
+ else {
+ len = fpconv_dtoa(value, buf);
+ buf[len] = '\0';
+ }
+ }
+
+ return len;
+}
+
+/* Convert a double into a string with 'fractional_digits' digits after the dot precision.
+ * This is an optimized version of snprintf "%.<fractional_digits>f".
+ * We convert the double to long and multiply it by 10 ^ <fractional_digits> to shift
+ * the decimal places.
+ * Note that multiply it of input value by 10 ^ <fractional_digits> can overflow but on the scenario
+ * that we currently use within redis this that is not possible.
+ * After we get the long representation we use the logic from ull2string function on this file
+ * which is based on the following article:
+ * https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920
+ *
+ * Input values:
+ * char: the buffer to store the string representation
+ * dstlen: the buffer length
+ * dvalue: the input double
+ * fractional_digits: the number of fractional digits after the dot precision. between 1 and 17
+ *
+ * Return values:
+ * Returns the number of characters needed to represent the number.
+ * If the buffer is not big enough to store the string, 0 is returned.
+ */
+int fixedpoint_d2string(char *dst, size_t dstlen, double dvalue, int fractional_digits) {
+ if (fractional_digits < 1 || fractional_digits > 17)
+ goto err;
+ /* min size of 2 ( due to 0. ) + n fractional_digitits + \0 */
+ if ((int)dstlen < (fractional_digits+3))
+ goto err;
+ if (dvalue == 0) {
+ dst[0] = '0';
+ dst[1] = '.';
+ memset(dst + 2, '0', fractional_digits);
+ dst[fractional_digits+2] = '\0';
+ return fractional_digits + 2;
+ }
+ /* scale and round */
+ static double powers_of_ten[] = {1.0, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0,
+ 10000000.0, 100000000.0, 1000000000.0, 10000000000.0, 100000000000.0, 1000000000000.0,
+ 10000000000000.0, 100000000000000.0, 1000000000000000.0, 10000000000000000.0,
+ 100000000000000000.0 };
+ long long svalue = llrint(dvalue * powers_of_ten[fractional_digits]);
+ unsigned long long value;
+ /* write sign */
+ int negative = 0;
+ if (svalue < 0) {
+ if (svalue != LLONG_MIN) {
+ value = -svalue;
+ } else {
+ value = ((unsigned long long) LLONG_MAX)+1;
+ }
+ if (dstlen < 2)
+ goto err;
+ negative = 1;
+ dst[0] = '-';
+ dst++;
+ dstlen--;
+ } else {
+ value = svalue;
+ }
+
+ static const char digitsd[201] =
+ "0001020304050607080910111213141516171819"
+ "2021222324252627282930313233343536373839"
+ "4041424344454647484950515253545556575859"
+ "6061626364656667686970717273747576777879"
+ "8081828384858687888990919293949596979899";
+
+ /* Check length. */
+ uint32_t ndigits = digits10(value);
+ if (ndigits >= dstlen) goto err;
+ int integer_digits = ndigits - fractional_digits;
+ /* Fractional only check to avoid representing 0.7750 as .7750.
+ * This means we need to increment the length and store 0 as the first character.
+ */
+ if (integer_digits < 1) {
+ dst[0] = '0';
+ integer_digits = 1;
+ }
+ dst[integer_digits] = '.';
+ int size = integer_digits + 1 + fractional_digits;
+ /* fill with 0 from fractional digits until size */
+ memset(dst + integer_digits + 1, '0', fractional_digits);
+ int next = size - 1;
+ while (value >= 100) {
+ int const i = (value % 100) * 2;
+ value /= 100;
+ dst[next] = digitsd[i + 1];
+ dst[next - 1] = digitsd[i];
+ next -= 2;
+ /* dot position */
+ if (next == integer_digits) {
+ next--;
+ }
+ }
+
+ /* Handle last 1-2 digits. */
+ if (value < 10) {
+ dst[next] = '0' + (uint32_t) value;
+ } else {
+ int i = (uint32_t) value * 2;
+ dst[next] = digitsd[i + 1];
+ dst[next - 1] = digitsd[i];
+ }
+ /* Null term. */
+ dst[size] = '\0';
+ return size + negative;
+err:
+ /* force add Null termination */
+ if (dstlen > 0)
+ dst[0] = '\0';
+ return 0;
+}
+
+/* Trims off trailing zeros from a string representing a double. */
+int trimDoubleString(char *buf, size_t len) {
+ if (strchr(buf,'.') != NULL) {
+ char *p = buf+len-1;
+ while(*p == '0') {
+ p--;
+ len--;
+ }
+ if (*p == '.') len--;
+ }
+ buf[len] = '\0';
+ return len;
+}
+
+/* Create a string object from a long double.
+ * If mode is humanfriendly it does not use exponential format and trims trailing
+ * zeroes at the end (may result in loss of precision).
+ * If mode is default exp format is used and the output of snprintf()
+ * is not modified (may result in loss of precision).
+ * If mode is hex hexadecimal format is used (no loss of precision)
+ *
+ * The function returns the length of the string or zero if there was not
+ * enough buffer room to store it. */
+int ld2string(char *buf, size_t len, long double value, ld2string_mode mode) {
+ size_t l = 0;
+
+ if (isinf(value)) {
+ /* Libc in odd systems (Hi Solaris!) will format infinite in a
+ * different way, so better to handle it in an explicit way. */
+ if (len < 5) goto err; /* No room. 5 is "-inf\0" */
+ if (value > 0) {
+ memcpy(buf,"inf",3);
+ l = 3;
+ } else {
+ memcpy(buf,"-inf",4);
+ l = 4;
+ }
+ } else if (isnan(value)) {
+ /* Libc in some systems will format nan in a different way,
+ * like nan, -nan, NAN, nan(char-sequence).
+ * So we normalize it and create a single nan form in an explicit way. */
+ if (len < 4) goto err; /* No room. 4 is "nan\0" */
+ memcpy(buf, "nan", 3);
+ l = 3;
+ } else {
+ switch (mode) {
+ case LD_STR_AUTO:
+ l = snprintf(buf,len,"%.17Lg",value);
+ if (l+1 > len) goto err;; /* No room. */
+ break;
+ case LD_STR_HEX:
+ l = snprintf(buf,len,"%La",value);
+ if (l+1 > len) goto err; /* No room. */
+ break;
+ case LD_STR_HUMAN:
+ /* We use 17 digits precision since with 128 bit floats that precision
+ * after rounding is able to represent most small decimal numbers in a
+ * way that is "non surprising" for the user (that is, most small
+ * decimal numbers will be represented in a way that when converted
+ * back into a string are exactly the same as what the user typed.) */
+ l = snprintf(buf,len,"%.17Lf",value);
+ if (l+1 > len) goto err; /* No room. */
+ /* Now remove trailing zeroes after the '.' */
+ if (strchr(buf,'.') != NULL) {
+ char *p = buf+l-1;
+ while(*p == '0') {
+ p--;
+ l--;
+ }
+ if (*p == '.') l--;
+ }
+ if (l == 2 && buf[0] == '-' && buf[1] == '0') {
+ buf[0] = '0';
+ l = 1;
+ }
+ break;
+ default: goto err; /* Invalid mode. */
+ }
+ }
+ buf[l] = '\0';
+ return l;
+err:
+ /* force add Null termination */
+ if (len > 0)
+ buf[0] = '\0';
+ return 0;
+}
+
+/* Get random bytes, attempts to get an initial seed from /dev/urandom and
+ * the uses a one way hash function in counter mode to generate a random
+ * stream. However if /dev/urandom is not available, a weaker seed is used.
+ *
+ * This function is not thread safe, since the state is global. */
+void getRandomBytes(unsigned char *p, size_t len) {
+ /* Global state. */
+ static int seed_initialized = 0;
+ static unsigned char seed[64]; /* 512 bit internal block size. */
+ static uint64_t counter = 0; /* The counter we hash with the seed. */
+
+ if (!seed_initialized) {
+ /* Initialize a seed and use SHA1 in counter mode, where we hash
+ * the same seed with a progressive counter. For the goals of this
+ * function we just need non-colliding strings, there are no
+ * cryptographic security needs. */
+ FILE *fp = fopen("/dev/urandom","r");
+ if (fp == NULL || fread(seed,sizeof(seed),1,fp) != 1) {
+ /* Revert to a weaker seed, and in this case reseed again
+ * at every call.*/
+ for (unsigned int j = 0; j < sizeof(seed); j++) {
+ struct timeval tv;
+ gettimeofday(&tv,NULL);
+ pid_t pid = getpid();
+ seed[j] = tv.tv_sec ^ tv.tv_usec ^ pid ^ (long)fp;
+ }
+ } else {
+ seed_initialized = 1;
+ }
+ if (fp) fclose(fp);
+ }
+
+ while(len) {
+ /* This implements SHA256-HMAC. */
+ unsigned char digest[SHA256_BLOCK_SIZE];
+ unsigned char kxor[64];
+ unsigned int copylen =
+ len > SHA256_BLOCK_SIZE ? SHA256_BLOCK_SIZE : len;
+
+ /* IKEY: key xored with 0x36. */
+ memcpy(kxor,seed,sizeof(kxor));
+ for (unsigned int i = 0; i < sizeof(kxor); i++) kxor[i] ^= 0x36;
+
+ /* Obtain HASH(IKEY||MESSAGE). */
+ SHA256_CTX ctx;
+ sha256_init(&ctx);
+ sha256_update(&ctx,kxor,sizeof(kxor));
+ sha256_update(&ctx,(unsigned char*)&counter,sizeof(counter));
+ sha256_final(&ctx,digest);
+
+ /* OKEY: key xored with 0x5c. */
+ memcpy(kxor,seed,sizeof(kxor));
+ for (unsigned int i = 0; i < sizeof(kxor); i++) kxor[i] ^= 0x5C;
+
+ /* Obtain HASH(OKEY || HASH(IKEY||MESSAGE)). */
+ sha256_init(&ctx);
+ sha256_update(&ctx,kxor,sizeof(kxor));
+ sha256_update(&ctx,digest,SHA256_BLOCK_SIZE);
+ sha256_final(&ctx,digest);
+
+ /* Increment the counter for the next iteration. */
+ counter++;
+
+ memcpy(p,digest,copylen);
+ len -= copylen;
+ p += copylen;
+ }
+}
+
+/* Generate the Redis "Run ID", a SHA1-sized random number that identifies a
+ * given execution of Redis, so that if you are talking with an instance
+ * having run_id == A, and you reconnect and it has run_id == B, you can be
+ * sure that it is either a different instance or it was restarted. */
+void getRandomHexChars(char *p, size_t len) {
+ char *charset = "0123456789abcdef";
+ size_t j;
+
+ getRandomBytes((unsigned char*)p,len);
+ for (j = 0; j < len; j++) p[j] = charset[p[j] & 0x0F];
+}
+
+/* Given the filename, return the absolute path as an SDS string, or NULL
+ * if it fails for some reason. Note that "filename" may be an absolute path
+ * already, this will be detected and handled correctly.
+ *
+ * The function does not try to normalize everything, but only the obvious
+ * case of one or more "../" appearing at the start of "filename"
+ * relative path. */
+sds getAbsolutePath(char *filename) {
+ char cwd[1024];
+ sds abspath;
+ sds relpath = sdsnew(filename);
+
+ relpath = sdstrim(relpath," \r\n\t");
+ if (relpath[0] == '/') return relpath; /* Path is already absolute. */
+
+ /* If path is relative, join cwd and relative path. */
+ if (getcwd(cwd,sizeof(cwd)) == NULL) {
+ sdsfree(relpath);
+ return NULL;
+ }
+ abspath = sdsnew(cwd);
+ if (sdslen(abspath) && abspath[sdslen(abspath)-1] != '/')
+ abspath = sdscat(abspath,"/");
+
+ /* At this point we have the current path always ending with "/", and
+ * the trimmed relative path. Try to normalize the obvious case of
+ * trailing ../ elements at the start of the path.
+ *
+ * For every "../" we find in the filename, we remove it and also remove
+ * the last element of the cwd, unless the current cwd is "/". */
+ while (sdslen(relpath) >= 3 &&
+ relpath[0] == '.' && relpath[1] == '.' && relpath[2] == '/')
+ {
+ sdsrange(relpath,3,-1);
+ if (sdslen(abspath) > 1) {
+ char *p = abspath + sdslen(abspath)-2;
+ int trimlen = 1;
+
+ while(*p != '/') {
+ p--;
+ trimlen++;
+ }
+ sdsrange(abspath,0,-(trimlen+1));
+ }
+ }
+
+ /* Finally glue the two parts together. */
+ abspath = sdscatsds(abspath,relpath);
+ sdsfree(relpath);
+ return abspath;
+}
+
+/*
+ * Gets the proper timezone in a more portable fashion
+ * i.e timezone variables are linux specific.
+ */
+long getTimeZone(void) {
+#if defined(__linux__) || defined(__sun)
+ return timezone;
+#else
+ struct timeval tv;
+ struct timezone tz;
+
+ gettimeofday(&tv, &tz);
+
+ return tz.tz_minuteswest * 60L;
+#endif
+}
+
+/* Return true if the specified path is just a file basename without any
+ * relative or absolute path. This function just checks that no / or \
+ * character exists inside the specified path, that's enough in the
+ * environments where Redis runs. */
+int pathIsBaseName(char *path) {
+ return strchr(path,'/') == NULL && strchr(path,'\\') == NULL;
+}
+
+int fileExist(char *filename) {
+ struct stat statbuf;
+ return stat(filename, &statbuf) == 0 && S_ISREG(statbuf.st_mode);
+}
+
+int dirExists(char *dname) {
+ struct stat statbuf;
+ return stat(dname, &statbuf) == 0 && S_ISDIR(statbuf.st_mode);
+}
+
+int dirCreateIfMissing(char *dname) {
+ if (mkdir(dname, 0755) != 0) {
+ if (errno != EEXIST) {
+ return -1;
+ } else if (!dirExists(dname)) {
+ errno = ENOTDIR;
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int dirRemove(char *dname) {
+ DIR *dir;
+ struct stat stat_entry;
+ struct dirent *entry;
+ char full_path[PATH_MAX + 1];
+
+ if ((dir = opendir(dname)) == NULL) {
+ return -1;
+ }
+
+ while ((entry = readdir(dir)) != NULL) {
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue;
+
+ snprintf(full_path, sizeof(full_path), "%s/%s", dname, entry->d_name);
+
+ int fd = open(full_path, O_RDONLY|O_NONBLOCK);
+ if (fd == -1) {
+ closedir(dir);
+ return -1;
+ }
+
+ if (fstat(fd, &stat_entry) == -1) {
+ close(fd);
+ closedir(dir);
+ return -1;
+ }
+ close(fd);
+
+ if (S_ISDIR(stat_entry.st_mode) != 0) {
+ if (dirRemove(full_path) == -1) {
+ return -1;
+ }
+ continue;
+ }
+
+ if (unlink(full_path) != 0) {
+ closedir(dir);
+ return -1;
+ }
+ }
+
+ if (rmdir(dname) != 0) {
+ closedir(dir);
+ return -1;
+ }
+
+ closedir(dir);
+ return 0;
+}
+
+sds makePath(char *path, char *filename) {
+ return sdscatfmt(sdsempty(), "%s/%s", path, filename);
+}
+
+/* Given the filename, sync the corresponding directory.
+ *
+ * Usually a portable and safe pattern to overwrite existing files would be like:
+ * 1. create a new temp file (on the same file system!)
+ * 2. write data to the temp file
+ * 3. fsync() the temp file
+ * 4. rename the temp file to the appropriate name
+ * 5. fsync() the containing directory */
+int fsyncFileDir(const char *filename) {
+#ifdef _AIX
+ /* AIX is unable to fsync a directory */
+ return 0;
+#endif
+ char temp_filename[PATH_MAX + 1];
+ char *dname;
+ int dir_fd;
+
+ if (strlen(filename) > PATH_MAX) {
+ errno = ENAMETOOLONG;
+ return -1;
+ }
+
+ /* In the glibc implementation dirname may modify their argument. */
+ memcpy(temp_filename, filename, strlen(filename) + 1);
+ dname = dirname(temp_filename);
+
+ dir_fd = open(dname, O_RDONLY);
+ if (dir_fd == -1) {
+ /* Some OSs don't allow us to open directories at all, just
+ * ignore the error in that case */
+ if (errno == EISDIR) {
+ return 0;
+ }
+ return -1;
+ }
+ /* Some OSs don't allow us to fsync directories at all, so we can ignore
+ * those errors. */
+ if (redis_fsync(dir_fd) == -1 && !(errno == EBADF || errno == EINVAL)) {
+ int save_errno = errno;
+ close(dir_fd);
+ errno = save_errno;
+ return -1;
+ }
+
+ close(dir_fd);
+ return 0;
+}
+
+ /* free OS pages backed by file */
+int reclaimFilePageCache(int fd, size_t offset, size_t length) {
+#ifdef HAVE_FADVISE
+ int ret = posix_fadvise(fd, offset, length, POSIX_FADV_DONTNEED);
+ if (ret) return -1;
+ return 0;
+#else
+ UNUSED(fd);
+ UNUSED(offset);
+ UNUSED(length);
+ return 0;
+#endif
+}
+
+#ifdef REDIS_TEST
+#include <assert.h>
+#include <sys/mman.h>
+#include "testhelp.h"
+
+static void test_string2ll(void) {
+ char buf[32];
+ long long v;
+
+ /* May not start with +. */
+ redis_strlcpy(buf,"+1",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 0);
+
+ /* Leading space. */
+ redis_strlcpy(buf," 1",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 0);
+
+ /* Trailing space. */
+ redis_strlcpy(buf,"1 ",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 0);
+
+ /* May not start with 0. */
+ redis_strlcpy(buf,"01",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 0);
+
+ redis_strlcpy(buf,"-1",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 1);
+ assert(v == -1);
+
+ redis_strlcpy(buf,"0",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 1);
+ assert(v == 0);
+
+ redis_strlcpy(buf,"1",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 1);
+ assert(v == 1);
+
+ redis_strlcpy(buf,"99",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 1);
+ assert(v == 99);
+
+ redis_strlcpy(buf,"-99",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 1);
+ assert(v == -99);
+
+ redis_strlcpy(buf,"-9223372036854775808",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 1);
+ assert(v == LLONG_MIN);
+
+ redis_strlcpy(buf,"-9223372036854775809",sizeof(buf)); /* overflow */
+ assert(string2ll(buf,strlen(buf),&v) == 0);
+
+ redis_strlcpy(buf,"9223372036854775807",sizeof(buf));
+ assert(string2ll(buf,strlen(buf),&v) == 1);
+ assert(v == LLONG_MAX);
+
+ redis_strlcpy(buf,"9223372036854775808",sizeof(buf)); /* overflow */
+ assert(string2ll(buf,strlen(buf),&v) == 0);
+}
+
+static void test_string2l(void) {
+ char buf[32];
+ long v;
+
+ /* May not start with +. */
+ redis_strlcpy(buf,"+1",sizeof(buf));
+ assert(string2l(buf,strlen(buf),&v) == 0);
+
+ /* May not start with 0. */
+ redis_strlcpy(buf,"01",sizeof(buf));
+ assert(string2l(buf,strlen(buf),&v) == 0);
+
+ redis_strlcpy(buf,"-1",sizeof(buf));
+ assert(string2l(buf,strlen(buf),&v) == 1);
+ assert(v == -1);
+
+ redis_strlcpy(buf,"0",sizeof(buf));
+ assert(string2l(buf,strlen(buf),&v) == 1);
+ assert(v == 0);
+
+ redis_strlcpy(buf,"1",sizeof(buf));
+ assert(string2l(buf,strlen(buf),&v) == 1);
+ assert(v == 1);
+
+ redis_strlcpy(buf,"99",sizeof(buf));
+ assert(string2l(buf,strlen(buf),&v) == 1);
+ assert(v == 99);
+
+ redis_strlcpy(buf,"-99",sizeof(buf));
+ assert(string2l(buf,strlen(buf),&v) == 1);
+ assert(v == -99);
+
+#if LONG_MAX != LLONG_MAX
+ redis_strlcpy(buf,"-2147483648",sizeof(buf));
+ assert(string2l(buf,strlen(buf),&v) == 1);
+ assert(v == LONG_MIN);
+
+ redis_strlcpy(buf,"-2147483649",sizeof(buf)); /* overflow */
+ assert(string2l(buf,strlen(buf),&v) == 0);
+
+ redis_strlcpy(buf,"2147483647",sizeof(buf));
+ assert(string2l(buf,strlen(buf),&v) == 1);
+ assert(v == LONG_MAX);
+
+ redis_strlcpy(buf,"2147483648",sizeof(buf)); /* overflow */
+ assert(string2l(buf,strlen(buf),&v) == 0);
+#endif
+}
+
+static void test_ll2string(void) {
+ char buf[32];
+ long long v;
+ int sz;
+
+ v = 0;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 1);
+ assert(!strcmp(buf, "0"));
+
+ v = -1;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 2);
+ assert(!strcmp(buf, "-1"));
+
+ v = 99;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 2);
+ assert(!strcmp(buf, "99"));
+
+ v = -99;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 3);
+ assert(!strcmp(buf, "-99"));
+
+ v = -2147483648;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 11);
+ assert(!strcmp(buf, "-2147483648"));
+
+ v = LLONG_MIN;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 20);
+ assert(!strcmp(buf, "-9223372036854775808"));
+
+ v = LLONG_MAX;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 19);
+ assert(!strcmp(buf, "9223372036854775807"));
+}
+
+static void test_ld2string(void) {
+ char buf[32];
+ long double v;
+ int sz;
+
+ v = 0.0 / 0.0;
+ sz = ld2string(buf, sizeof(buf), v, LD_STR_AUTO);
+ assert(sz == 3);
+ assert(!strcmp(buf, "nan"));
+}
+
+static void test_fixedpoint_d2string(void) {
+ char buf[32];
+ double v;
+ int sz;
+ v = 0.0;
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 4);
+ assert(sz == 6);
+ assert(!strcmp(buf, "0.0000"));
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 1);
+ assert(sz == 3);
+ assert(!strcmp(buf, "0.0"));
+ /* set junk in buffer */
+ memset(buf,'A',32);
+ v = 0.0001;
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 4);
+ assert(sz == 6);
+ assert(buf[sz] == '\0');
+ assert(!strcmp(buf, "0.0001"));
+ /* set junk in buffer */
+ memset(buf,'A',32);
+ v = 6.0642951598391699e-05;
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 4);
+ assert(sz == 6);
+ assert(buf[sz] == '\0');
+ assert(!strcmp(buf, "0.0001"));
+ v = 0.01;
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 4);
+ assert(sz == 6);
+ assert(!strcmp(buf, "0.0100"));
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 1);
+ assert(sz == 3);
+ assert(!strcmp(buf, "0.0"));
+ v = -0.01;
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 4);
+ assert(sz == 7);
+ assert(!strcmp(buf, "-0.0100"));
+ v = -0.1;
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 1);
+ assert(sz == 4);
+ assert(!strcmp(buf, "-0.1"));
+ v = 0.1;
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 1);
+ assert(sz == 3);
+ assert(!strcmp(buf, "0.1"));
+ v = 0.01;
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 17);
+ assert(sz == 19);
+ assert(!strcmp(buf, "0.01000000000000000"));
+ v = 10.01;
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 4);
+ assert(sz == 7);
+ assert(!strcmp(buf, "10.0100"));
+ /* negative tests */
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 18);
+ assert(sz == 0);
+ sz = fixedpoint_d2string(buf, sizeof buf, v, 0);
+ assert(sz == 0);
+ sz = fixedpoint_d2string(buf, 1, v, 1);
+ assert(sz == 0);
+}
+
+#if defined(__linux__)
+/* Since fadvise and mincore is only supported in specific platforms like
+ * Linux, we only verify the fadvise mechanism works in Linux */
+static int cache_exist(int fd) {
+ unsigned char flag;
+ void *m = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd, 0);
+ assert(m);
+ assert(mincore(m, 4096, &flag) == 0);
+ munmap(m, 4096);
+ /* the least significant bit of the byte will be set if the corresponding
+ * page is currently resident in memory */
+ return flag&1;
+}
+
+static void test_reclaimFilePageCache(void) {
+ char *tmpfile = "/tmp/redis-reclaim-cache-test";
+ int fd = open(tmpfile, O_RDWR|O_CREAT, 0644);
+ assert(fd >= 0);
+
+ /* test write file */
+ char buf[4] = "foo";
+ assert(write(fd, buf, sizeof(buf)) > 0);
+ assert(cache_exist(fd));
+ assert(redis_fsync(fd) == 0);
+ assert(reclaimFilePageCache(fd, 0, 0) == 0);
+ assert(!cache_exist(fd));
+
+ /* test read file */
+ assert(pread(fd, buf, sizeof(buf), 0) > 0);
+ assert(cache_exist(fd));
+ assert(reclaimFilePageCache(fd, 0, 0) == 0);
+ assert(!cache_exist(fd));
+
+ unlink(tmpfile);
+ printf("reclaimFilePageCach test is ok\n");
+}
+#endif
+
+int utilTest(int argc, char **argv, int flags) {
+ UNUSED(argc);
+ UNUSED(argv);
+ UNUSED(flags);
+
+ test_string2ll();
+ test_string2l();
+ test_ll2string();
+ test_ld2string();
+ test_fixedpoint_d2string();
+#if defined(__linux__)
+ if (!(flags & REDIS_TEST_VALGRIND)) {
+ test_reclaimFilePageCache();
+ }
+#endif
+ printf("Done testing util\n");
+ return 0;
+}
+#endif
+
+
diff --git a/src/util.h b/src/util.h
new file mode 100644
index 0000000..7f64e63
--- /dev/null
+++ b/src/util.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __REDIS_UTIL_H
+#define __REDIS_UTIL_H
+
+#include <stdint.h>
+#include "sds.h"
+
+/* The maximum number of characters needed to represent a long double
+ * as a string (long double has a huge range of some 4952 chars, see LDBL_MAX).
+ * This should be the size of the buffer given to ld2string */
+#define MAX_LONG_DOUBLE_CHARS 5*1024
+
+/* The maximum number of characters needed to represent a double
+ * as a string (double has a huge range of some 328 chars, see DBL_MAX).
+ * This should be the size of the buffer for sprintf with %f */
+#define MAX_DOUBLE_CHARS 400
+
+/* The maximum number of characters needed to for d2string/fpconv_dtoa call.
+ * Since it uses %g and not %f, some 40 chars should be enough. */
+#define MAX_D2STRING_CHARS 128
+
+/* Bytes needed for long -> str + '\0' */
+#define LONG_STR_SIZE 21
+
+/* long double to string conversion options */
+typedef enum {
+ LD_STR_AUTO, /* %.17Lg */
+ LD_STR_HUMAN, /* %.17Lf + Trimming of trailing zeros */
+ LD_STR_HEX /* %La */
+} ld2string_mode;
+
+int stringmatchlen(const char *p, int plen, const char *s, int slen, int nocase);
+int stringmatch(const char *p, const char *s, int nocase);
+int stringmatchlen_fuzz_test(void);
+unsigned long long memtoull(const char *p, int *err);
+const char *mempbrk(const char *s, size_t len, const char *chars, size_t charslen);
+char *memmapchars(char *s, size_t len, const char *from, const char *to, size_t setlen);
+uint32_t digits10(uint64_t v);
+uint32_t sdigits10(int64_t v);
+int ll2string(char *s, size_t len, long long value);
+int ull2string(char *s, size_t len, unsigned long long value);
+int string2ll(const char *s, size_t slen, long long *value);
+int string2ull(const char *s, unsigned long long *value);
+int string2l(const char *s, size_t slen, long *value);
+int string2ld(const char *s, size_t slen, long double *dp);
+int string2d(const char *s, size_t slen, double *dp);
+int trimDoubleString(char *buf, size_t len);
+int d2string(char *buf, size_t len, double value);
+int fixedpoint_d2string(char *dst, size_t dstlen, double dvalue, int fractional_digits);
+int ld2string(char *buf, size_t len, long double value, ld2string_mode mode);
+int double2ll(double d, long long *out);
+int yesnotoi(char *s);
+sds getAbsolutePath(char *filename);
+long getTimeZone(void);
+int pathIsBaseName(char *path);
+int dirCreateIfMissing(char *dname);
+int dirExists(char *dname);
+int dirRemove(char *dname);
+int fileExist(char *filename);
+sds makePath(char *path, char *filename);
+int fsyncFileDir(const char *filename);
+int reclaimFilePageCache(int fd, size_t offset, size_t length);
+
+size_t redis_strlcpy(char *dst, const char *src, size_t dsize);
+size_t redis_strlcat(char *dst, const char *src, size_t dsize);
+
+#ifdef REDIS_TEST
+int utilTest(int argc, char **argv, int flags);
+#endif
+
+#endif
diff --git a/src/valgrind.sup b/src/valgrind.sup
new file mode 100644
index 0000000..5d6367e
--- /dev/null
+++ b/src/valgrind.sup
@@ -0,0 +1,26 @@
+{
+ <lzf_uninitialized_hash_table>
+ Memcheck:Cond
+ fun:lzf_compress
+}
+
+{
+ <lzf_uninitialized_hash_table>
+ Memcheck:Value4
+ fun:lzf_compress
+}
+
+{
+ <lzf_uninitialized_hash_table>
+ Memcheck:Value8
+ fun:lzf_compress
+}
+
+{
+ <negative size allocatoin, see integration/corrupt-dump>
+ Memcheck:FishyValue
+ malloc(size)
+ fun:malloc
+ fun:ztrymalloc_usable
+ fun:ztrymalloc
+}
diff --git a/src/version.h b/src/version.h
new file mode 100644
index 0000000..7c6eea6
--- /dev/null
+++ b/src/version.h
@@ -0,0 +1,2 @@
+#define REDIS_VERSION "7.2.4"
+#define REDIS_VERSION_NUM 0x00070204
diff --git a/src/ziplist.c b/src/ziplist.c
new file mode 100644
index 0000000..c891625
--- /dev/null
+++ b/src/ziplist.c
@@ -0,0 +1,2666 @@
+/* The ziplist is a specially encoded dually linked list that is designed
+ * to be very memory efficient. It stores both strings and integer values,
+ * where integers are encoded as actual integers instead of a series of
+ * characters. It allows push and pop operations on either side of the list
+ * in O(1) time. However, because every operation requires a reallocation of
+ * the memory used by the ziplist, the actual complexity is related to the
+ * amount of memory used by the ziplist.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * ZIPLIST OVERALL LAYOUT
+ * ======================
+ *
+ * The general layout of the ziplist is as follows:
+ *
+ * <zlbytes> <zltail> <zllen> <entry> <entry> ... <entry> <zlend>
+ *
+ * NOTE: all fields are stored in little endian, if not specified otherwise.
+ *
+ * <uint32_t zlbytes> is an unsigned integer to hold the number of bytes that
+ * the ziplist occupies, including the four bytes of the zlbytes field itself.
+ * This value needs to be stored to be able to resize the entire structure
+ * without the need to traverse it first.
+ *
+ * <uint32_t zltail> is the offset to the last entry in the list. This allows
+ * a pop operation on the far side of the list without the need for full
+ * traversal.
+ *
+ * <uint16_t zllen> is the number of entries. When there are more than
+ * 2^16-2 entries, this value is set to 2^16-1 and we need to traverse the
+ * entire list to know how many items it holds.
+ *
+ * <uint8_t zlend> is a special entry representing the end of the ziplist.
+ * Is encoded as a single byte equal to 255. No other normal entry starts
+ * with a byte set to the value of 255.
+ *
+ * ZIPLIST ENTRIES
+ * ===============
+ *
+ * Every entry in the ziplist is prefixed by metadata that contains two pieces
+ * of information. First, the length of the previous entry is stored to be
+ * able to traverse the list from back to front. Second, the entry encoding is
+ * provided. It represents the entry type, integer or string, and in the case
+ * of strings it also represents the length of the string payload.
+ * So a complete entry is stored like this:
+ *
+ * <prevlen> <encoding> <entry-data>
+ *
+ * Sometimes the encoding represents the entry itself, like for small integers
+ * as we'll see later. In such a case the <entry-data> part is missing, and we
+ * could have just:
+ *
+ * <prevlen> <encoding>
+ *
+ * The length of the previous entry, <prevlen>, is encoded in the following way:
+ * If this length is smaller than 254 bytes, it will only consume a single
+ * byte representing the length as an unsigned 8 bit integer. When the length
+ * is greater than or equal to 254, it will consume 5 bytes. The first byte is
+ * set to 254 (FE) to indicate a larger value is following. The remaining 4
+ * bytes take the length of the previous entry as value.
+ *
+ * So practically an entry is encoded in the following way:
+ *
+ * <prevlen from 0 to 253> <encoding> <entry>
+ *
+ * Or alternatively if the previous entry length is greater than 253 bytes
+ * the following encoding is used:
+ *
+ * 0xFE <4 bytes unsigned little endian prevlen> <encoding> <entry>
+ *
+ * The encoding field of the entry depends on the content of the
+ * entry. When the entry is a string, the first 2 bits of the encoding first
+ * byte will hold the type of encoding used to store the length of the string,
+ * followed by the actual length of the string. When the entry is an integer
+ * the first 2 bits are both set to 1. The following 2 bits are used to specify
+ * what kind of integer will be stored after this header. An overview of the
+ * different types and encodings is as follows. The first byte is always enough
+ * to determine the kind of entry.
+ *
+ * |00pppppp| - 1 byte
+ * String value with length less than or equal to 63 bytes (6 bits).
+ * "pppppp" represents the unsigned 6 bit length.
+ * |01pppppp|qqqqqqqq| - 2 bytes
+ * String value with length less than or equal to 16383 bytes (14 bits).
+ * IMPORTANT: The 14 bit number is stored in big endian.
+ * |10000000|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
+ * String value with length greater than or equal to 16384 bytes.
+ * Only the 4 bytes following the first byte represents the length
+ * up to 2^32-1. The 6 lower bits of the first byte are not used and
+ * are set to zero.
+ * IMPORTANT: The 32 bit number is stored in big endian.
+ * |11000000| - 3 bytes
+ * Integer encoded as int16_t (2 bytes).
+ * |11010000| - 5 bytes
+ * Integer encoded as int32_t (4 bytes).
+ * |11100000| - 9 bytes
+ * Integer encoded as int64_t (8 bytes).
+ * |11110000| - 4 bytes
+ * Integer encoded as 24 bit signed (3 bytes).
+ * |11111110| - 2 bytes
+ * Integer encoded as 8 bit signed (1 byte).
+ * |1111xxxx| - (with xxxx between 0001 and 1101) immediate 4 bit integer.
+ * Unsigned integer from 0 to 12. The encoded value is actually from
+ * 1 to 13 because 0000 and 1111 can not be used, so 1 should be
+ * subtracted from the encoded 4 bit value to obtain the right value.
+ * |11111111| - End of ziplist special entry.
+ *
+ * Like for the ziplist header, all the integers are represented in little
+ * endian byte order, even when this code is compiled in big endian systems.
+ *
+ * EXAMPLES OF ACTUAL ZIPLISTS
+ * ===========================
+ *
+ * The following is a ziplist containing the two elements representing
+ * the strings "2" and "5". It is composed of 15 bytes, that we visually
+ * split into sections:
+ *
+ * [0f 00 00 00] [0c 00 00 00] [02 00] [00 f3] [02 f6] [ff]
+ * | | | | | |
+ * zlbytes zltail zllen "2" "5" end
+ *
+ * The first 4 bytes represent the number 15, that is the number of bytes
+ * the whole ziplist is composed of. The second 4 bytes are the offset
+ * at which the last ziplist entry is found, that is 12, in fact the
+ * last entry, that is "5", is at offset 12 inside the ziplist.
+ * The next 16 bit integer represents the number of elements inside the
+ * ziplist, its value is 2 since there are just two elements inside.
+ * Finally "00 f3" is the first entry representing the number 2. It is
+ * composed of the previous entry length, which is zero because this is
+ * our first entry, and the byte F3 which corresponds to the encoding
+ * |1111xxxx| with xxxx between 0001 and 1101. We need to remove the "F"
+ * higher order bits 1111, and subtract 1 from the "3", so the entry value
+ * is "2". The next entry has a prevlen of 02, since the first entry is
+ * composed of exactly two bytes. The entry itself, F6, is encoded exactly
+ * like the first entry, and 6-1 = 5, so the value of the entry is 5.
+ * Finally the special entry FF signals the end of the ziplist.
+ *
+ * Adding another element to the above string with the value "Hello World"
+ * allows us to show how the ziplist encodes small strings. We'll just show
+ * the hex dump of the entry itself. Imagine the bytes as following the
+ * entry that stores "5" in the ziplist above:
+ *
+ * [02] [0b] [48 65 6c 6c 6f 20 57 6f 72 6c 64]
+ *
+ * The first byte, 02, is the length of the previous entry. The next
+ * byte represents the encoding in the pattern |00pppppp| that means
+ * that the entry is a string of length <pppppp>, so 0B means that
+ * an 11 bytes string follows. From the third byte (48) to the last (64)
+ * there are just the ASCII characters for "Hello World".
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2020, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include "zmalloc.h"
+#include "util.h"
+#include "ziplist.h"
+#include "config.h"
+#include "endianconv.h"
+#include "redisassert.h"
+
+#define ZIP_END 255 /* Special "end of ziplist" entry. */
+#define ZIP_BIG_PREVLEN 254 /* ZIP_BIG_PREVLEN - 1 is the max number of bytes of
+ the previous entry, for the "prevlen" field prefixing
+ each entry, to be represented with just a single byte.
+ Otherwise it is represented as FE AA BB CC DD, where
+ AA BB CC DD are a 4 bytes unsigned integer
+ representing the previous entry len. */
+
+/* Different encoding/length possibilities */
+#define ZIP_STR_MASK 0xc0
+#define ZIP_INT_MASK 0x30
+#define ZIP_STR_06B (0 << 6)
+#define ZIP_STR_14B (1 << 6)
+#define ZIP_STR_32B (2 << 6)
+#define ZIP_INT_16B (0xc0 | 0<<4)
+#define ZIP_INT_32B (0xc0 | 1<<4)
+#define ZIP_INT_64B (0xc0 | 2<<4)
+#define ZIP_INT_24B (0xc0 | 3<<4)
+#define ZIP_INT_8B 0xfe
+
+/* 4 bit integer immediate encoding |1111xxxx| with xxxx between
+ * 0001 and 1101. */
+#define ZIP_INT_IMM_MASK 0x0f /* Mask to extract the 4 bits value. To add
+ one is needed to reconstruct the value. */
+#define ZIP_INT_IMM_MIN 0xf1 /* 11110001 */
+#define ZIP_INT_IMM_MAX 0xfd /* 11111101 */
+
+#define INT24_MAX 0x7fffff
+#define INT24_MIN (-INT24_MAX - 1)
+
+/* Macro to determine if the entry is a string. String entries never start
+ * with "11" as most significant bits of the first byte. */
+#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK)
+
+/* Utility macros.*/
+
+/* Return total bytes a ziplist is composed of. */
+#define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl)))
+
+/* Return the offset of the last item inside the ziplist. */
+#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
+
+/* Return the length of a ziplist, or UINT16_MAX if the length cannot be
+ * determined without scanning the whole ziplist. */
+#define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))
+
+/* The size of a ziplist header: two 32 bit integers for the total
+ * bytes count and last item offset. One 16 bit integer for the number
+ * of items field. */
+#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
+
+/* Size of the "end of ziplist" entry. Just one byte. */
+#define ZIPLIST_END_SIZE (sizeof(uint8_t))
+
+/* Return the pointer to the first entry of a ziplist. */
+#define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE)
+
+/* Return the pointer to the last entry of a ziplist, using the
+ * last entry offset inside the ziplist header. */
+#define ZIPLIST_ENTRY_TAIL(zl) ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))
+
+/* Return the pointer to the last byte of a ziplist, which is, the
+ * end of ziplist FF entry. */
+#define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-ZIPLIST_END_SIZE)
+
+/* Increment the number of items field in the ziplist header. Note that this
+ * macro should never overflow the unsigned 16 bit integer, since entries are
+ * always pushed one at a time. When UINT16_MAX is reached we want the count
+ * to stay there to signal that a full scan is needed to get the number of
+ * items inside the ziplist. */
+#define ZIPLIST_INCR_LENGTH(zl,incr) { \
+ if (intrev16ifbe(ZIPLIST_LENGTH(zl)) < UINT16_MAX) \
+ ZIPLIST_LENGTH(zl) = intrev16ifbe(intrev16ifbe(ZIPLIST_LENGTH(zl))+incr); \
+}
+
+/* Don't let ziplists grow over 1GB in any case, don't wanna risk overflow in
+ * zlbytes */
+#define ZIPLIST_MAX_SAFETY_SIZE (1<<30)
+int ziplistSafeToAdd(unsigned char* zl, size_t add) {
+ size_t len = zl? ziplistBlobLen(zl): 0;
+ if (len + add > ZIPLIST_MAX_SAFETY_SIZE)
+ return 0;
+ return 1;
+}
+
+
+/* We use this function to receive information about a ziplist entry.
+ * Note that this is not how the data is actually encoded, is just what we
+ * get filled by a function in order to operate more easily. */
+typedef struct zlentry {
+ unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
+ unsigned int prevrawlen; /* Previous entry len. */
+ unsigned int lensize; /* Bytes used to encode this entry type/len.
+ For example strings have a 1, 2 or 5 bytes
+ header. Integers always use a single byte.*/
+ unsigned int len; /* Bytes used to represent the actual entry.
+ For strings this is just the string length
+ while for integers it is 1, 2, 3, 4, 8 or
+ 0 (for 4 bit immediate) depending on the
+ number range. */
+ unsigned int headersize; /* prevrawlensize + lensize. */
+ unsigned char encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on
+ the entry encoding. However for 4 bits
+ immediate integers this can assume a range
+ of values and must be range-checked. */
+ unsigned char *p; /* Pointer to the very start of the entry, that
+ is, this points to prev-entry-len field. */
+} zlentry;
+
+#define ZIPLIST_ENTRY_ZERO(zle) { \
+ (zle)->prevrawlensize = (zle)->prevrawlen = 0; \
+ (zle)->lensize = (zle)->len = (zle)->headersize = 0; \
+ (zle)->encoding = 0; \
+ (zle)->p = NULL; \
+}
+
+/* Extract the encoding from the byte pointed by 'ptr' and set it into
+ * 'encoding' field of the zlentry structure. */
+#define ZIP_ENTRY_ENCODING(ptr, encoding) do { \
+ (encoding) = ((ptr)[0]); \
+ if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \
+} while(0)
+
+#define ZIP_ENCODING_SIZE_INVALID 0xff
+/* Return the number of bytes required to encode the entry type + length.
+ * On error, return ZIP_ENCODING_SIZE_INVALID */
+static inline unsigned int zipEncodingLenSize(unsigned char encoding) {
+ if (encoding == ZIP_INT_16B || encoding == ZIP_INT_32B ||
+ encoding == ZIP_INT_24B || encoding == ZIP_INT_64B ||
+ encoding == ZIP_INT_8B)
+ return 1;
+ if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)
+ return 1;
+ if (encoding == ZIP_STR_06B)
+ return 1;
+ if (encoding == ZIP_STR_14B)
+ return 2;
+ if (encoding == ZIP_STR_32B)
+ return 5;
+ return ZIP_ENCODING_SIZE_INVALID;
+}
+
+#define ZIP_ASSERT_ENCODING(encoding) do { \
+ assert(zipEncodingLenSize(encoding) != ZIP_ENCODING_SIZE_INVALID); \
+} while (0)
+
+/* Return bytes needed to store integer encoded by 'encoding' */
+static inline unsigned int zipIntSize(unsigned char encoding) {
+ switch(encoding) {
+ case ZIP_INT_8B: return 1;
+ case ZIP_INT_16B: return 2;
+ case ZIP_INT_24B: return 3;
+ case ZIP_INT_32B: return 4;
+ case ZIP_INT_64B: return 8;
+ }
+ if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)
+ return 0; /* 4 bit immediate */
+ /* bad encoding, covered by a previous call to ZIP_ASSERT_ENCODING */
+ redis_unreachable();
+ return 0;
+}
+
+/* Write the encoding header of the entry in 'p'. If p is NULL it just returns
+ * the amount of bytes required to encode such a length. Arguments:
+ *
+ * 'encoding' is the encoding we are using for the entry. It could be
+ * ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX
+ * for single-byte small immediate integers.
+ *
+ * 'rawlen' is only used for ZIP_STR_* encodings and is the length of the
+ * string that this entry represents.
+ *
+ * The function returns the number of bytes used by the encoding/length
+ * header stored in 'p'. */
+unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
+ unsigned char len = 1, buf[5];
+
+ if (ZIP_IS_STR(encoding)) {
+ /* Although encoding is given it may not be set for strings,
+ * so we determine it here using the raw length. */
+ if (rawlen <= 0x3f) {
+ if (!p) return len;
+ buf[0] = ZIP_STR_06B | rawlen;
+ } else if (rawlen <= 0x3fff) {
+ len += 1;
+ if (!p) return len;
+ buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f);
+ buf[1] = rawlen & 0xff;
+ } else {
+ len += 4;
+ if (!p) return len;
+ buf[0] = ZIP_STR_32B;
+ buf[1] = (rawlen >> 24) & 0xff;
+ buf[2] = (rawlen >> 16) & 0xff;
+ buf[3] = (rawlen >> 8) & 0xff;
+ buf[4] = rawlen & 0xff;
+ }
+ } else {
+ /* Implies integer encoding, so length is always 1. */
+ if (!p) return len;
+ buf[0] = encoding;
+ }
+
+ /* Store this length at p. */
+ memcpy(p,buf,len);
+ return len;
+}
+
+/* Decode the entry encoding type and data length (string length for strings,
+ * number of bytes used for the integer for integer entries) encoded in 'ptr'.
+ * The 'encoding' variable is input, extracted by the caller, the 'lensize'
+ * variable will hold the number of bytes required to encode the entry
+ * length, and the 'len' variable will hold the entry length.
+ * On invalid encoding error, lensize is set to 0. */
+#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do { \
+ if ((encoding) < ZIP_STR_MASK) { \
+ if ((encoding) == ZIP_STR_06B) { \
+ (lensize) = 1; \
+ (len) = (ptr)[0] & 0x3f; \
+ } else if ((encoding) == ZIP_STR_14B) { \
+ (lensize) = 2; \
+ (len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1]; \
+ } else if ((encoding) == ZIP_STR_32B) { \
+ (lensize) = 5; \
+ (len) = ((uint32_t)(ptr)[1] << 24) | \
+ ((uint32_t)(ptr)[2] << 16) | \
+ ((uint32_t)(ptr)[3] << 8) | \
+ ((uint32_t)(ptr)[4]); \
+ } else { \
+ (lensize) = 0; /* bad encoding, should be covered by a previous */ \
+ (len) = 0; /* ZIP_ASSERT_ENCODING / zipEncodingLenSize, or */ \
+ /* match the lensize after this macro with 0. */ \
+ } \
+ } else { \
+ (lensize) = 1; \
+ if ((encoding) == ZIP_INT_8B) (len) = 1; \
+ else if ((encoding) == ZIP_INT_16B) (len) = 2; \
+ else if ((encoding) == ZIP_INT_24B) (len) = 3; \
+ else if ((encoding) == ZIP_INT_32B) (len) = 4; \
+ else if ((encoding) == ZIP_INT_64B) (len) = 8; \
+ else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) \
+ (len) = 0; /* 4 bit immediate */ \
+ else \
+ (lensize) = (len) = 0; /* bad encoding */ \
+ } \
+} while(0)
+
+/* Encode the length of the previous entry and write it to "p". This only
+ * uses the larger encoding (required in __ziplistCascadeUpdate). */
+int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) {
+ uint32_t u32;
+ if (p != NULL) {
+ p[0] = ZIP_BIG_PREVLEN;
+ u32 = len;
+ memcpy(p+1,&u32,sizeof(u32));
+ memrev32ifbe(p+1);
+ }
+ return 1 + sizeof(uint32_t);
+}
+
+/* Encode the length of the previous entry and write it to "p". Return the
+ * number of bytes needed to encode this length if "p" is NULL. */
+unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
+ if (p == NULL) {
+ return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(uint32_t) + 1;
+ } else {
+ if (len < ZIP_BIG_PREVLEN) {
+ p[0] = len;
+ return 1;
+ } else {
+ return zipStorePrevEntryLengthLarge(p,len);
+ }
+ }
+}
+
+/* Return the number of bytes used to encode the length of the previous
+ * entry. The length is returned by setting the var 'prevlensize'. */
+#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do { \
+ if ((ptr)[0] < ZIP_BIG_PREVLEN) { \
+ (prevlensize) = 1; \
+ } else { \
+ (prevlensize) = 5; \
+ } \
+} while(0)
+
+/* Return the length of the previous element, and the number of bytes that
+ * are used in order to encode the previous element length.
+ * 'ptr' must point to the prevlen prefix of an entry (that encodes the
+ * length of the previous entry in order to navigate the elements backward).
+ * The length of the previous entry is stored in 'prevlen', the number of
+ * bytes needed to encode the previous entry length are stored in
+ * 'prevlensize'. */
+#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do { \
+ ZIP_DECODE_PREVLENSIZE(ptr, prevlensize); \
+ if ((prevlensize) == 1) { \
+ (prevlen) = (ptr)[0]; \
+ } else { /* prevlensize == 5 */ \
+ (prevlen) = ((ptr)[4] << 24) | \
+ ((ptr)[3] << 16) | \
+ ((ptr)[2] << 8) | \
+ ((ptr)[1]); \
+ } \
+} while(0)
+
+/* Given a pointer 'p' to the prevlen info that prefixes an entry, this
+ * function returns the difference in number of bytes needed to encode
+ * the prevlen if the previous entry changes of size.
+ *
+ * So if A is the number of bytes used right now to encode the 'prevlen'
+ * field.
+ *
+ * And B is the number of bytes that are needed in order to encode the
+ * 'prevlen' if the previous element will be updated to one of size 'len'.
+ *
+ * Then the function returns B - A
+ *
+ * So the function returns a positive number if more space is needed,
+ * a negative number if less space is needed, or zero if the same space
+ * is needed. */
+int zipPrevLenByteDiff(unsigned char *p, unsigned int len) {
+ unsigned int prevlensize;
+ ZIP_DECODE_PREVLENSIZE(p, prevlensize);
+ return zipStorePrevEntryLength(NULL, len) - prevlensize;
+}
+
+/* Check if string pointed to by 'entry' can be encoded as an integer.
+ * Stores the integer value in 'v' and its encoding in 'encoding'. */
+int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
+ long long value;
+
+ if (entrylen >= 32 || entrylen == 0) return 0;
+ if (string2ll((char*)entry,entrylen,&value)) {
+ /* Great, the string can be encoded. Check what's the smallest
+ * of our encoding types that can hold this value. */
+ if (value >= 0 && value <= 12) {
+ *encoding = ZIP_INT_IMM_MIN+value;
+ } else if (value >= INT8_MIN && value <= INT8_MAX) {
+ *encoding = ZIP_INT_8B;
+ } else if (value >= INT16_MIN && value <= INT16_MAX) {
+ *encoding = ZIP_INT_16B;
+ } else if (value >= INT24_MIN && value <= INT24_MAX) {
+ *encoding = ZIP_INT_24B;
+ } else if (value >= INT32_MIN && value <= INT32_MAX) {
+ *encoding = ZIP_INT_32B;
+ } else {
+ *encoding = ZIP_INT_64B;
+ }
+ *v = value;
+ return 1;
+ }
+ return 0;
+}
+
+/* Store integer 'value' at 'p', encoded as 'encoding' */
+void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) {
+ int16_t i16;
+ int32_t i32;
+ int64_t i64;
+ if (encoding == ZIP_INT_8B) {
+ ((int8_t*)p)[0] = (int8_t)value;
+ } else if (encoding == ZIP_INT_16B) {
+ i16 = value;
+ memcpy(p,&i16,sizeof(i16));
+ memrev16ifbe(p);
+ } else if (encoding == ZIP_INT_24B) {
+ i32 = ((uint64_t)value)<<8;
+ memrev32ifbe(&i32);
+ memcpy(p,((uint8_t*)&i32)+1,sizeof(i32)-sizeof(uint8_t));
+ } else if (encoding == ZIP_INT_32B) {
+ i32 = value;
+ memcpy(p,&i32,sizeof(i32));
+ memrev32ifbe(p);
+ } else if (encoding == ZIP_INT_64B) {
+ i64 = value;
+ memcpy(p,&i64,sizeof(i64));
+ memrev64ifbe(p);
+ } else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) {
+ /* Nothing to do, the value is stored in the encoding itself. */
+ } else {
+ assert(NULL);
+ }
+}
+
+/* Read integer encoded as 'encoding' from 'p' */
+int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
+ int16_t i16;
+ int32_t i32;
+ int64_t i64, ret = 0;
+ if (encoding == ZIP_INT_8B) {
+ ret = ((int8_t*)p)[0];
+ } else if (encoding == ZIP_INT_16B) {
+ memcpy(&i16,p,sizeof(i16));
+ memrev16ifbe(&i16);
+ ret = i16;
+ } else if (encoding == ZIP_INT_32B) {
+ memcpy(&i32,p,sizeof(i32));
+ memrev32ifbe(&i32);
+ ret = i32;
+ } else if (encoding == ZIP_INT_24B) {
+ i32 = 0;
+ memcpy(((uint8_t*)&i32)+1,p,sizeof(i32)-sizeof(uint8_t));
+ memrev32ifbe(&i32);
+ ret = i32>>8;
+ } else if (encoding == ZIP_INT_64B) {
+ memcpy(&i64,p,sizeof(i64));
+ memrev64ifbe(&i64);
+ ret = i64;
+ } else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) {
+ ret = (encoding & ZIP_INT_IMM_MASK)-1;
+ } else {
+ assert(NULL);
+ }
+ return ret;
+}
+
+/* Fills a struct with all information about an entry.
+ * This function is the "unsafe" alternative to the one below.
+ * Generally, all function that return a pointer to an element in the ziplist
+ * will assert that this element is valid, so it can be freely used.
+ * Generally functions such ziplistGet assume the input pointer is already
+ * validated (since it's the return value of another function). */
+static inline void zipEntry(unsigned char *p, zlentry *e) {
+ ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
+ ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
+ ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
+ assert(e->lensize != 0); /* check that encoding was valid. */
+ e->headersize = e->prevrawlensize + e->lensize;
+ e->p = p;
+}
+
+/* Fills a struct with all information about an entry.
+ * This function is safe to use on untrusted pointers, it'll make sure not to
+ * try to access memory outside the ziplist payload.
+ * Returns 1 if the entry is valid, and 0 otherwise. */
+static inline int zipEntrySafe(unsigned char* zl, size_t zlbytes, unsigned char *p, zlentry *e, int validate_prevlen) {
+ unsigned char *zlfirst = zl + ZIPLIST_HEADER_SIZE;
+ unsigned char *zllast = zl + zlbytes - ZIPLIST_END_SIZE;
+#define OUT_OF_RANGE(p) (unlikely((p) < zlfirst || (p) > zllast))
+
+ /* If there's no possibility for the header to reach outside the ziplist,
+ * take the fast path. (max lensize and prevrawlensize are both 5 bytes) */
+ if (p >= zlfirst && p + 10 < zllast) {
+ ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
+ ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
+ ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
+ e->headersize = e->prevrawlensize + e->lensize;
+ e->p = p;
+ /* We didn't call ZIP_ASSERT_ENCODING, so we check lensize was set to 0. */
+ if (unlikely(e->lensize == 0))
+ return 0;
+ /* Make sure the entry doesn't reach outside the edge of the ziplist */
+ if (OUT_OF_RANGE(p + e->headersize + e->len))
+ return 0;
+ /* Make sure prevlen doesn't reach outside the edge of the ziplist */
+ if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen))
+ return 0;
+ return 1;
+ }
+
+ /* Make sure the pointer doesn't reach outside the edge of the ziplist */
+ if (OUT_OF_RANGE(p))
+ return 0;
+
+ /* Make sure the encoded prevlen header doesn't reach outside the allocation */
+ ZIP_DECODE_PREVLENSIZE(p, e->prevrawlensize);
+ if (OUT_OF_RANGE(p + e->prevrawlensize))
+ return 0;
+
+ /* Make sure encoded entry header is valid. */
+ ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
+ e->lensize = zipEncodingLenSize(e->encoding);
+ if (unlikely(e->lensize == ZIP_ENCODING_SIZE_INVALID))
+ return 0;
+
+ /* Make sure the encoded entry header doesn't reach outside the allocation */
+ if (OUT_OF_RANGE(p + e->prevrawlensize + e->lensize))
+ return 0;
+
+ /* Decode the prevlen and entry len headers. */
+ ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
+ ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
+ e->headersize = e->prevrawlensize + e->lensize;
+
+ /* Make sure the entry doesn't reach outside the edge of the ziplist */
+ if (OUT_OF_RANGE(p + e->headersize + e->len))
+ return 0;
+
+ /* Make sure prevlen doesn't reach outside the edge of the ziplist */
+ if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen))
+ return 0;
+
+ e->p = p;
+ return 1;
+#undef OUT_OF_RANGE
+}
+
+/* Return the total number of bytes used by the entry pointed to by 'p'. */
+static inline unsigned int zipRawEntryLengthSafe(unsigned char* zl, size_t zlbytes, unsigned char *p) {
+ zlentry e;
+ assert(zipEntrySafe(zl, zlbytes, p, &e, 0));
+ return e.headersize + e.len;
+}
+
+/* Return the total number of bytes used by the entry pointed to by 'p'. */
+static inline unsigned int zipRawEntryLength(unsigned char *p) {
+ zlentry e;
+ zipEntry(p, &e);
+ return e.headersize + e.len;
+}
+
+/* Validate that the entry doesn't reach outside the ziplist allocation. */
+static inline void zipAssertValidEntry(unsigned char* zl, size_t zlbytes, unsigned char *p) {
+ zlentry e;
+ assert(zipEntrySafe(zl, zlbytes, p, &e, 1));
+}
+
+/* Create a new empty ziplist. */
+unsigned char *ziplistNew(void) {
+ unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE;
+ unsigned char *zl = zmalloc(bytes);
+ ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
+ ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
+ ZIPLIST_LENGTH(zl) = 0;
+ zl[bytes-1] = ZIP_END;
+ return zl;
+}
+
+/* Resize the ziplist. */
+unsigned char *ziplistResize(unsigned char *zl, size_t len) {
+ assert(len < UINT32_MAX);
+ zl = zrealloc(zl,len);
+ ZIPLIST_BYTES(zl) = intrev32ifbe(len);
+ zl[len-1] = ZIP_END;
+ return zl;
+}
+
+/* When an entry is inserted, we need to set the prevlen field of the next
+ * entry to equal the length of the inserted entry. It can occur that this
+ * length cannot be encoded in 1 byte and the next entry needs to be grow
+ * a bit larger to hold the 5-byte encoded prevlen. This can be done for free,
+ * because this only happens when an entry is already being inserted (which
+ * causes a realloc and memmove). However, encoding the prevlen may require
+ * that this entry is grown as well. This effect may cascade throughout
+ * the ziplist when there are consecutive entries with a size close to
+ * ZIP_BIG_PREVLEN, so we need to check that the prevlen can be encoded in
+ * every consecutive entry.
+ *
+ * Note that this effect can also happen in reverse, where the bytes required
+ * to encode the prevlen field can shrink. This effect is deliberately ignored,
+ * because it can cause a "flapping" effect where a chain prevlen fields is
+ * first grown and then shrunk again after consecutive inserts. Rather, the
+ * field is allowed to stay larger than necessary, because a large prevlen
+ * field implies the ziplist is holding large entries anyway.
+ *
+ * The pointer "p" points to the first entry that does NOT need to be
+ * updated, i.e. consecutive fields MAY need an update. */
+unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
+ zlentry cur;
+ size_t prevlen, prevlensize, prevoffset; /* Informat of the last changed entry. */
+ size_t firstentrylen; /* Used to handle insert at head. */
+ size_t rawlen, curlen = intrev32ifbe(ZIPLIST_BYTES(zl));
+ size_t extra = 0, cnt = 0, offset;
+ size_t delta = 4; /* Extra bytes needed to update a entry's prevlen (5-1). */
+ unsigned char *tail = zl + intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl));
+
+ /* Empty ziplist */
+ if (p[0] == ZIP_END) return zl;
+
+ zipEntry(p, &cur); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */
+ firstentrylen = prevlen = cur.headersize + cur.len;
+ prevlensize = zipStorePrevEntryLength(NULL, prevlen);
+ prevoffset = p - zl;
+ p += prevlen;
+
+ /* Iterate ziplist to find out how many extra bytes do we need to update it. */
+ while (p[0] != ZIP_END) {
+ assert(zipEntrySafe(zl, curlen, p, &cur, 0));
+
+ /* Abort when "prevlen" has not changed. */
+ if (cur.prevrawlen == prevlen) break;
+
+ /* Abort when entry's "prevlensize" is big enough. */
+ if (cur.prevrawlensize >= prevlensize) {
+ if (cur.prevrawlensize == prevlensize) {
+ zipStorePrevEntryLength(p, prevlen);
+ } else {
+ /* This would result in shrinking, which we want to avoid.
+ * So, set "prevlen" in the available bytes. */
+ zipStorePrevEntryLengthLarge(p, prevlen);
+ }
+ break;
+ }
+
+ /* cur.prevrawlen means cur is the former head entry. */
+ assert(cur.prevrawlen == 0 || cur.prevrawlen + delta == prevlen);
+
+ /* Update prev entry's info and advance the cursor. */
+ rawlen = cur.headersize + cur.len;
+ prevlen = rawlen + delta;
+ prevlensize = zipStorePrevEntryLength(NULL, prevlen);
+ prevoffset = p - zl;
+ p += rawlen;
+ extra += delta;
+ cnt++;
+ }
+
+ /* Extra bytes is zero all update has been done(or no need to update). */
+ if (extra == 0) return zl;
+
+ /* Update tail offset after loop. */
+ if (tail == zl + prevoffset) {
+ /* When the last entry we need to update is also the tail, update tail offset
+ * unless this is the only entry that was updated (so the tail offset didn't change). */
+ if (extra - delta != 0) {
+ ZIPLIST_TAIL_OFFSET(zl) =
+ intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra-delta);
+ }
+ } else {
+ /* Update the tail offset in cases where the last entry we updated is not the tail. */
+ ZIPLIST_TAIL_OFFSET(zl) =
+ intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra);
+ }
+
+ /* Now "p" points at the first unchanged byte in original ziplist,
+ * move data after that to new ziplist. */
+ offset = p - zl;
+ zl = ziplistResize(zl, curlen + extra);
+ p = zl + offset;
+ memmove(p + extra, p, curlen - offset - 1);
+ p += extra;
+
+ /* Iterate all entries that need to be updated tail to head. */
+ while (cnt) {
+ zipEntry(zl + prevoffset, &cur); /* no need for "safe" variant since we already iterated on all these entries above. */
+ rawlen = cur.headersize + cur.len;
+ /* Move entry to tail and reset prevlen. */
+ memmove(p - (rawlen - cur.prevrawlensize),
+ zl + prevoffset + cur.prevrawlensize,
+ rawlen - cur.prevrawlensize);
+ p -= (rawlen + delta);
+ if (cur.prevrawlen == 0) {
+ /* "cur" is the previous head entry, update its prevlen with firstentrylen. */
+ zipStorePrevEntryLength(p, firstentrylen);
+ } else {
+ /* An entry's prevlen can only increment 4 bytes. */
+ zipStorePrevEntryLength(p, cur.prevrawlen+delta);
+ }
+ /* Forward to previous entry. */
+ prevoffset -= cur.prevrawlen;
+ cnt--;
+ }
+ return zl;
+}
+
+/* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */
+unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
+ unsigned int i, totlen, deleted = 0;
+ size_t offset;
+ int nextdiff = 0;
+ zlentry first, tail;
+ size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));
+
+ zipEntry(p, &first); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */
+ for (i = 0; p[0] != ZIP_END && i < num; i++) {
+ p += zipRawEntryLengthSafe(zl, zlbytes, p);
+ deleted++;
+ }
+
+ assert(p >= first.p);
+ totlen = p-first.p; /* Bytes taken by the element(s) to delete. */
+ if (totlen > 0) {
+ uint32_t set_tail;
+ if (p[0] != ZIP_END) {
+ /* Storing `prevrawlen` in this entry may increase or decrease the
+ * number of bytes required compare to the current `prevrawlen`.
+ * There always is room to store this, because it was previously
+ * stored by an entry that is now being deleted. */
+ nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);
+
+ /* Note that there is always space when p jumps backward: if
+ * the new previous entry is large, one of the deleted elements
+ * had a 5 bytes prevlen header, so there is for sure at least
+ * 5 bytes free and we need just 4. */
+ p -= nextdiff;
+ assert(p >= first.p && p<zl+zlbytes-1);
+ zipStorePrevEntryLength(p,first.prevrawlen);
+
+ /* Update offset for tail */
+ set_tail = intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))-totlen;
+
+ /* When the tail contains more than one entry, we need to take
+ * "nextdiff" in account as well. Otherwise, a change in the
+ * size of prevlen doesn't have an effect on the *tail* offset. */
+ assert(zipEntrySafe(zl, zlbytes, p, &tail, 1));
+ if (p[tail.headersize+tail.len] != ZIP_END) {
+ set_tail = set_tail + nextdiff;
+ }
+
+ /* Move tail to the front of the ziplist */
+ /* since we asserted that p >= first.p. we know totlen >= 0,
+ * so we know that p > first.p and this is guaranteed not to reach
+ * beyond the allocation, even if the entries lens are corrupted. */
+ size_t bytes_to_move = zlbytes-(p-zl)-1;
+ memmove(first.p,p,bytes_to_move);
+ } else {
+ /* The entire tail was deleted. No need to move memory. */
+ set_tail = (first.p-zl)-first.prevrawlen;
+ }
+
+ /* Resize the ziplist */
+ offset = first.p-zl;
+ zlbytes -= totlen - nextdiff;
+ zl = ziplistResize(zl, zlbytes);
+ p = zl+offset;
+
+ /* Update record count */
+ ZIPLIST_INCR_LENGTH(zl,-deleted);
+
+ /* Set the tail offset computed above */
+ assert(set_tail <= zlbytes - ZIPLIST_END_SIZE);
+ ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(set_tail);
+
+ /* When nextdiff != 0, the raw length of the next entry has changed, so
+ * we need to cascade the update throughout the ziplist */
+ if (nextdiff != 0)
+ zl = __ziplistCascadeUpdate(zl,p);
+ }
+ return zl;
+}
+
+/* Insert item at "p". */
+unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
+ size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, newlen;
+ unsigned int prevlensize, prevlen = 0;
+ size_t offset;
+ int nextdiff = 0;
+ unsigned char encoding = 0;
+ long long value = 123456789; /* initialized to avoid warning. Using a value
+ that is easy to see if for some reason
+ we use it uninitialized. */
+ zlentry tail;
+
+ /* Find out prevlen for the entry that is inserted. */
+ if (p[0] != ZIP_END) {
+ ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
+ } else {
+ unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
+ if (ptail[0] != ZIP_END) {
+ prevlen = zipRawEntryLengthSafe(zl, curlen, ptail);
+ }
+ }
+
+ /* See if the entry can be encoded */
+ if (zipTryEncoding(s,slen,&value,&encoding)) {
+ /* 'encoding' is set to the appropriate integer encoding */
+ reqlen = zipIntSize(encoding);
+ } else {
+ /* 'encoding' is untouched, however zipStoreEntryEncoding will use the
+ * string length to figure out how to encode it. */
+ reqlen = slen;
+ }
+ /* We need space for both the length of the previous entry and
+ * the length of the payload. */
+ reqlen += zipStorePrevEntryLength(NULL,prevlen);
+ reqlen += zipStoreEntryEncoding(NULL,encoding,slen);
+
+ /* When the insert position is not equal to the tail, we need to
+ * make sure that the next entry can hold this entry's length in
+ * its prevlen field. */
+ int forcelarge = 0;
+ nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
+ if (nextdiff == -4 && reqlen < 4) {
+ nextdiff = 0;
+ forcelarge = 1;
+ }
+
+ /* Store offset because a realloc may change the address of zl. */
+ offset = p-zl;
+ newlen = curlen+reqlen+nextdiff;
+ zl = ziplistResize(zl,newlen);
+ p = zl+offset;
+
+ /* Apply memory move when necessary and update tail offset. */
+ if (p[0] != ZIP_END) {
+ /* Subtract one because of the ZIP_END bytes */
+ memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);
+
+ /* Encode this entry's raw length in the next entry. */
+ if (forcelarge)
+ zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
+ else
+ zipStorePrevEntryLength(p+reqlen,reqlen);
+
+ /* Update offset for tail */
+ ZIPLIST_TAIL_OFFSET(zl) =
+ intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);
+
+ /* When the tail contains more than one entry, we need to take
+ * "nextdiff" in account as well. Otherwise, a change in the
+ * size of prevlen doesn't have an effect on the *tail* offset. */
+ assert(zipEntrySafe(zl, newlen, p+reqlen, &tail, 1));
+ if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
+ ZIPLIST_TAIL_OFFSET(zl) =
+ intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
+ }
+ } else {
+ /* This element will be the new tail. */
+ ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
+ }
+
+ /* When nextdiff != 0, the raw length of the next entry has changed, so
+ * we need to cascade the update throughout the ziplist */
+ if (nextdiff != 0) {
+ offset = p-zl;
+ zl = __ziplistCascadeUpdate(zl,p+reqlen);
+ p = zl+offset;
+ }
+
+ /* Write the entry */
+ p += zipStorePrevEntryLength(p,prevlen);
+ p += zipStoreEntryEncoding(p,encoding,slen);
+ if (ZIP_IS_STR(encoding)) {
+ memcpy(p,s,slen);
+ } else {
+ zipSaveInteger(p,value,encoding);
+ }
+ ZIPLIST_INCR_LENGTH(zl,1);
+ return zl;
+}
+
+/* Merge ziplists 'first' and 'second' by appending 'second' to 'first'.
+ *
+ * NOTE: The larger ziplist is reallocated to contain the new merged ziplist.
+ * Either 'first' or 'second' can be used for the result. The parameter not
+ * used will be free'd and set to NULL.
+ *
+ * After calling this function, the input parameters are no longer valid since
+ * they are changed and free'd in-place.
+ *
+ * The result ziplist is the contents of 'first' followed by 'second'.
+ *
+ * On failure: returns NULL if the merge is impossible.
+ * On success: returns the merged ziplist (which is expanded version of either
+ * 'first' or 'second', also frees the other unused input ziplist, and sets the
+ * input ziplist argument equal to newly reallocated ziplist return value. */
+unsigned char *ziplistMerge(unsigned char **first, unsigned char **second) {
+ /* If any params are null, we can't merge, so NULL. */
+ if (first == NULL || *first == NULL || second == NULL || *second == NULL)
+ return NULL;
+
+ /* Can't merge same list into itself. */
+ if (*first == *second)
+ return NULL;
+
+ size_t first_bytes = intrev32ifbe(ZIPLIST_BYTES(*first));
+ size_t first_len = intrev16ifbe(ZIPLIST_LENGTH(*first));
+
+ size_t second_bytes = intrev32ifbe(ZIPLIST_BYTES(*second));
+ size_t second_len = intrev16ifbe(ZIPLIST_LENGTH(*second));
+
+ int append;
+ unsigned char *source, *target;
+ size_t target_bytes, source_bytes;
+ /* Pick the largest ziplist so we can resize easily in-place.
+ * We must also track if we are now appending or prepending to
+ * the target ziplist. */
+ if (first_len >= second_len) {
+ /* retain first, append second to first. */
+ target = *first;
+ target_bytes = first_bytes;
+ source = *second;
+ source_bytes = second_bytes;
+ append = 1;
+ } else {
+ /* else, retain second, prepend first to second. */
+ target = *second;
+ target_bytes = second_bytes;
+ source = *first;
+ source_bytes = first_bytes;
+ append = 0;
+ }
+
+ /* Calculate final bytes (subtract one pair of metadata) */
+ size_t zlbytes = first_bytes + second_bytes -
+ ZIPLIST_HEADER_SIZE - ZIPLIST_END_SIZE;
+ size_t zllength = first_len + second_len;
+
+ /* Combined zl length should be limited within UINT16_MAX */
+ zllength = zllength < UINT16_MAX ? zllength : UINT16_MAX;
+
+ /* larger values can't be stored into ZIPLIST_BYTES */
+ assert(zlbytes < UINT32_MAX);
+
+ /* Save offset positions before we start ripping memory apart. */
+ size_t first_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*first));
+ size_t second_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*second));
+
+ /* Extend target to new zlbytes then append or prepend source. */
+ target = zrealloc(target, zlbytes);
+ if (append) {
+ /* append == appending to target */
+ /* Copy source after target (copying over original [END]):
+ * [TARGET - END, SOURCE - HEADER] */
+ memcpy(target + target_bytes - ZIPLIST_END_SIZE,
+ source + ZIPLIST_HEADER_SIZE,
+ source_bytes - ZIPLIST_HEADER_SIZE);
+ } else {
+ /* !append == prepending to target */
+ /* Move target *contents* exactly size of (source - [END]),
+ * then copy source into vacated space (source - [END]):
+ * [SOURCE - END, TARGET - HEADER] */
+ memmove(target + source_bytes - ZIPLIST_END_SIZE,
+ target + ZIPLIST_HEADER_SIZE,
+ target_bytes - ZIPLIST_HEADER_SIZE);
+ memcpy(target, source, source_bytes - ZIPLIST_END_SIZE);
+ }
+
+ /* Update header metadata. */
+ ZIPLIST_BYTES(target) = intrev32ifbe(zlbytes);
+ ZIPLIST_LENGTH(target) = intrev16ifbe(zllength);
+ /* New tail offset is:
+ * + N bytes of first ziplist
+ * - 1 byte for [END] of first ziplist
+ * + M bytes for the offset of the original tail of the second ziplist
+ * - J bytes for HEADER because second_offset keeps no header. */
+ ZIPLIST_TAIL_OFFSET(target) = intrev32ifbe(
+ (first_bytes - ZIPLIST_END_SIZE) +
+ (second_offset - ZIPLIST_HEADER_SIZE));
+
+ /* __ziplistCascadeUpdate just fixes the prev length values until it finds a
+ * correct prev length value (then it assumes the rest of the list is okay).
+ * We tell CascadeUpdate to start at the first ziplist's tail element to fix
+ * the merge seam. */
+ target = __ziplistCascadeUpdate(target, target+first_offset);
+
+ /* Now free and NULL out what we didn't realloc */
+ if (append) {
+ zfree(*second);
+ *second = NULL;
+ *first = target;
+ } else {
+ zfree(*first);
+ *first = NULL;
+ *second = target;
+ }
+ return target;
+}
+
+unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {
+ unsigned char *p;
+ p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);
+ return __ziplistInsert(zl,p,s,slen);
+}
+
+/* Returns an offset to use for iterating with ziplistNext. When the given
+ * index is negative, the list is traversed back to front. When the list
+ * doesn't contain an element at the provided index, NULL is returned. */
+unsigned char *ziplistIndex(unsigned char *zl, int index) {
+ unsigned char *p;
+ unsigned int prevlensize, prevlen = 0;
+ size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));
+ if (index < 0) {
+ index = (-index)-1;
+ p = ZIPLIST_ENTRY_TAIL(zl);
+ if (p[0] != ZIP_END) {
+ /* No need for "safe" check: when going backwards, we know the header
+ * we're parsing is in the range, we just need to assert (below) that
+ * the size we take doesn't cause p to go outside the allocation. */
+ ZIP_DECODE_PREVLENSIZE(p, prevlensize);
+ assert(p + prevlensize < zl + zlbytes - ZIPLIST_END_SIZE);
+ ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
+ while (prevlen > 0 && index--) {
+ p -= prevlen;
+ assert(p >= zl + ZIPLIST_HEADER_SIZE && p < zl + zlbytes - ZIPLIST_END_SIZE);
+ ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
+ }
+ }
+ } else {
+ p = ZIPLIST_ENTRY_HEAD(zl);
+ while (index--) {
+ /* Use the "safe" length: When we go forward, we need to be careful
+ * not to decode an entry header if it's past the ziplist allocation. */
+ p += zipRawEntryLengthSafe(zl, zlbytes, p);
+ if (p[0] == ZIP_END)
+ break;
+ }
+ }
+ if (p[0] == ZIP_END || index > 0)
+ return NULL;
+ zipAssertValidEntry(zl, zlbytes, p);
+ return p;
+}
+
+/* Return pointer to next entry in ziplist.
+ *
+ * zl is the pointer to the ziplist
+ * p is the pointer to the current element
+ *
+ * The element after 'p' is returned, otherwise NULL if we are at the end. */
+unsigned char *ziplistNext(unsigned char *zl, unsigned char *p) {
+ ((void) zl);
+ size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));
+
+ /* "p" could be equal to ZIP_END, caused by ziplistDelete,
+ * and we should return NULL. Otherwise, we should return NULL
+ * when the *next* element is ZIP_END (there is no next entry). */
+ if (p[0] == ZIP_END) {
+ return NULL;
+ }
+
+ p += zipRawEntryLength(p);
+ if (p[0] == ZIP_END) {
+ return NULL;
+ }
+
+ zipAssertValidEntry(zl, zlbytes, p);
+ return p;
+}
+
+/* Return pointer to previous entry in ziplist. */
+unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p) {
+ unsigned int prevlensize, prevlen = 0;
+
+ /* Iterating backwards from ZIP_END should return the tail. When "p" is
+ * equal to the first element of the list, we're already at the head,
+ * and should return NULL. */
+ if (p[0] == ZIP_END) {
+ p = ZIPLIST_ENTRY_TAIL(zl);
+ return (p[0] == ZIP_END) ? NULL : p;
+ } else if (p == ZIPLIST_ENTRY_HEAD(zl)) {
+ return NULL;
+ } else {
+ ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
+ assert(prevlen > 0);
+ p-=prevlen;
+ size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));
+ zipAssertValidEntry(zl, zlbytes, p);
+ return p;
+ }
+}
+
+/* Get entry pointed to by 'p' and store in either '*sstr' or 'sval' depending
+ * on the encoding of the entry. '*sstr' is always set to NULL to be able
+ * to find out whether the string pointer or the integer value was set.
+ * Return 0 if 'p' points to the end of the ziplist, 1 otherwise. */
+unsigned int ziplistGet(unsigned char *p, unsigned char **sstr, unsigned int *slen, long long *sval) {
+ zlentry entry;
+ if (p == NULL || p[0] == ZIP_END) return 0;
+ if (sstr) *sstr = NULL;
+
+ zipEntry(p, &entry); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */
+ if (ZIP_IS_STR(entry.encoding)) {
+ if (sstr) {
+ *slen = entry.len;
+ *sstr = p+entry.headersize;
+ }
+ } else {
+ if (sval) {
+ *sval = zipLoadInteger(p+entry.headersize,entry.encoding);
+ }
+ }
+ return 1;
+}
+
+/* Insert an entry at "p". */
+unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
+ return __ziplistInsert(zl,p,s,slen);
+}
+
+/* Delete a single entry from the ziplist, pointed to by *p.
+ * Also update *p in place, to be able to iterate over the
+ * ziplist, while deleting entries. */
+unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {
+ size_t offset = *p-zl;
+ zl = __ziplistDelete(zl,*p,1);
+
+ /* Store pointer to current element in p, because ziplistDelete will
+ * do a realloc which might result in a different "zl"-pointer.
+ * When the delete direction is back to front, we might delete the last
+ * entry and end up with "p" pointing to ZIP_END, so check this. */
+ *p = zl+offset;
+ return zl;
+}
+
+/* Delete a range of entries from the ziplist. */
+unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num) {
+ unsigned char *p = ziplistIndex(zl,index);
+ return (p == NULL) ? zl : __ziplistDelete(zl,p,num);
+}
+
+/* Replaces the entry at p. This is equivalent to a delete and an insert,
+ * but avoids some overhead when replacing a value of the same size. */
+unsigned char *ziplistReplace(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
+
+ /* get metadata of the current entry */
+ zlentry entry;
+ zipEntry(p, &entry);
+
+ /* compute length of entry to store, excluding prevlen */
+ unsigned int reqlen;
+ unsigned char encoding = 0;
+ long long value = 123456789; /* initialized to avoid warning. */
+ if (zipTryEncoding(s,slen,&value,&encoding)) {
+ reqlen = zipIntSize(encoding); /* encoding is set */
+ } else {
+ reqlen = slen; /* encoding == 0 */
+ }
+ reqlen += zipStoreEntryEncoding(NULL,encoding,slen);
+
+ if (reqlen == entry.lensize + entry.len) {
+ /* Simply overwrite the element. */
+ p += entry.prevrawlensize;
+ p += zipStoreEntryEncoding(p,encoding,slen);
+ if (ZIP_IS_STR(encoding)) {
+ memcpy(p,s,slen);
+ } else {
+ zipSaveInteger(p,value,encoding);
+ }
+ } else {
+ /* Fallback. */
+ zl = ziplistDelete(zl,&p);
+ zl = ziplistInsert(zl,p,s,slen);
+ }
+ return zl;
+}
+
+/* Compare entry pointer to by 'p' with 'sstr' of length 'slen'. */
+/* Return 1 if equal. */
+unsigned int ziplistCompare(unsigned char *p, unsigned char *sstr, unsigned int slen) {
+ zlentry entry;
+ unsigned char sencoding;
+ long long zval, sval;
+ if (p[0] == ZIP_END) return 0;
+
+ zipEntry(p, &entry); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */
+ if (ZIP_IS_STR(entry.encoding)) {
+ /* Raw compare */
+ if (entry.len == slen) {
+ return memcmp(p+entry.headersize,sstr,slen) == 0;
+ } else {
+ return 0;
+ }
+ } else {
+ /* Try to compare encoded values. Don't compare encoding because
+ * different implementations may encoded integers differently. */
+ if (zipTryEncoding(sstr,slen,&sval,&sencoding)) {
+ zval = zipLoadInteger(p+entry.headersize,entry.encoding);
+ return zval == sval;
+ }
+ }
+ return 0;
+}
+
+/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries
+ * between every comparison. Returns NULL when the field could not be found. */
+unsigned char *ziplistFind(unsigned char *zl, unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip) {
+ int skipcnt = 0;
+ unsigned char vencoding = 0;
+ long long vll = 0;
+ size_t zlbytes = ziplistBlobLen(zl);
+
+ while (p[0] != ZIP_END) {
+ struct zlentry e;
+ unsigned char *q;
+
+ assert(zipEntrySafe(zl, zlbytes, p, &e, 1));
+ q = p + e.prevrawlensize + e.lensize;
+
+ if (skipcnt == 0) {
+ /* Compare current entry with specified entry */
+ if (ZIP_IS_STR(e.encoding)) {
+ if (e.len == vlen && memcmp(q, vstr, vlen) == 0) {
+ return p;
+ }
+ } else {
+ /* Find out if the searched field can be encoded. Note that
+ * we do it only the first time, once done vencoding is set
+ * to non-zero and vll is set to the integer value. */
+ if (vencoding == 0) {
+ if (!zipTryEncoding(vstr, vlen, &vll, &vencoding)) {
+ /* If the entry can't be encoded we set it to
+ * UCHAR_MAX so that we don't retry again the next
+ * time. */
+ vencoding = UCHAR_MAX;
+ }
+ /* Must be non-zero by now */
+ assert(vencoding);
+ }
+
+ /* Compare current entry with specified entry, do it only
+ * if vencoding != UCHAR_MAX because if there is no encoding
+ * possible for the field it can't be a valid integer. */
+ if (vencoding != UCHAR_MAX) {
+ long long ll = zipLoadInteger(q, e.encoding);
+ if (ll == vll) {
+ return p;
+ }
+ }
+ }
+
+ /* Reset skip count */
+ skipcnt = skip;
+ } else {
+ /* Skip entry */
+ skipcnt--;
+ }
+
+ /* Move to next entry */
+ p = q + e.len;
+ }
+
+ return NULL;
+}
+
+/* Return length of ziplist. */
+unsigned int ziplistLen(unsigned char *zl) {
+ unsigned int len = 0;
+ if (intrev16ifbe(ZIPLIST_LENGTH(zl)) < UINT16_MAX) {
+ len = intrev16ifbe(ZIPLIST_LENGTH(zl));
+ } else {
+ unsigned char *p = zl+ZIPLIST_HEADER_SIZE;
+ size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));
+ while (*p != ZIP_END) {
+ p += zipRawEntryLengthSafe(zl, zlbytes, p);
+ len++;
+ }
+
+ /* Re-store length if small enough */
+ if (len < UINT16_MAX) ZIPLIST_LENGTH(zl) = intrev16ifbe(len);
+ }
+ return len;
+}
+
+/* Return ziplist blob size in bytes. */
+size_t ziplistBlobLen(unsigned char *zl) {
+ return intrev32ifbe(ZIPLIST_BYTES(zl));
+}
+
+void ziplistRepr(unsigned char *zl) {
+ unsigned char *p;
+ int index = 0;
+ zlentry entry;
+ size_t zlbytes = ziplistBlobLen(zl);
+
+ printf(
+ "{total bytes %u} "
+ "{num entries %u}\n"
+ "{tail offset %u}\n",
+ intrev32ifbe(ZIPLIST_BYTES(zl)),
+ intrev16ifbe(ZIPLIST_LENGTH(zl)),
+ intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)));
+ p = ZIPLIST_ENTRY_HEAD(zl);
+ while(*p != ZIP_END) {
+ assert(zipEntrySafe(zl, zlbytes, p, &entry, 1));
+ printf(
+ "{\n"
+ "\taddr 0x%08lx,\n"
+ "\tindex %2d,\n"
+ "\toffset %5lu,\n"
+ "\thdr+entry len: %5u,\n"
+ "\thdr len%2u,\n"
+ "\tprevrawlen: %5u,\n"
+ "\tprevrawlensize: %2u,\n"
+ "\tpayload %5u\n",
+ (long unsigned)p,
+ index,
+ (unsigned long) (p-zl),
+ entry.headersize+entry.len,
+ entry.headersize,
+ entry.prevrawlen,
+ entry.prevrawlensize,
+ entry.len);
+ printf("\tbytes: ");
+ for (unsigned int i = 0; i < entry.headersize+entry.len; i++) {
+ printf("%02x|",p[i]);
+ }
+ printf("\n");
+ p += entry.headersize;
+ if (ZIP_IS_STR(entry.encoding)) {
+ printf("\t[str]");
+ if (entry.len > 40) {
+ if (fwrite(p,40,1,stdout) == 0) perror("fwrite");
+ printf("...");
+ } else {
+ if (entry.len &&
+ fwrite(p,entry.len,1,stdout) == 0) perror("fwrite");
+ }
+ } else {
+ printf("\t[int]%lld", (long long) zipLoadInteger(p,entry.encoding));
+ }
+ printf("\n}\n");
+ p += entry.len;
+ index++;
+ }
+ printf("{end}\n\n");
+}
+
+/* Validate the integrity of the data structure.
+ * when `deep` is 0, only the integrity of the header is validated.
+ * when `deep` is 1, we scan all the entries one by one. */
+int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
+ ziplistValidateEntryCB entry_cb, void *cb_userdata) {
+ /* check that we can actually read the header. (and ZIP_END) */
+ if (size < ZIPLIST_HEADER_SIZE + ZIPLIST_END_SIZE)
+ return 0;
+
+ /* check that the encoded size in the header must match the allocated size. */
+ size_t bytes = intrev32ifbe(ZIPLIST_BYTES(zl));
+ if (bytes != size)
+ return 0;
+
+ /* the last byte must be the terminator. */
+ if (zl[size - ZIPLIST_END_SIZE] != ZIP_END)
+ return 0;
+
+ /* make sure the tail offset isn't reaching outside the allocation. */
+ if (intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)) > size - ZIPLIST_END_SIZE)
+ return 0;
+
+ if (!deep)
+ return 1;
+
+ unsigned int count = 0;
+ unsigned int header_count = intrev16ifbe(ZIPLIST_LENGTH(zl));
+ unsigned char *p = ZIPLIST_ENTRY_HEAD(zl);
+ unsigned char *prev = NULL;
+ size_t prev_raw_size = 0;
+ while(*p != ZIP_END) {
+ struct zlentry e;
+ /* Decode the entry headers and fail if invalid or reaches outside the allocation */
+ if (!zipEntrySafe(zl, size, p, &e, 1))
+ return 0;
+
+ /* Make sure the record stating the prev entry size is correct. */
+ if (e.prevrawlen != prev_raw_size)
+ return 0;
+
+ /* Optionally let the caller validate the entry too. */
+ if (entry_cb && !entry_cb(p, header_count, cb_userdata))
+ return 0;
+
+ /* Move to the next entry */
+ prev_raw_size = e.headersize + e.len;
+ prev = p;
+ p += e.headersize + e.len;
+ count++;
+ }
+
+ /* Make sure 'p' really does point to the end of the ziplist. */
+ if (p != zl + bytes - ZIPLIST_END_SIZE)
+ return 0;
+
+ /* Make sure the <zltail> entry really do point to the start of the last entry. */
+ if (prev != NULL && prev != ZIPLIST_ENTRY_TAIL(zl))
+ return 0;
+
+ /* Check that the count in the header is correct */
+ if (header_count != UINT16_MAX && count != header_count)
+ return 0;
+
+ return 1;
+}
+
+/* Randomly select a pair of key and value.
+ * total_count is a pre-computed length/2 of the ziplist (to avoid calls to ziplistLen)
+ * 'key' and 'val' are used to store the result key value pair.
+ * 'val' can be NULL if the value is not needed. */
+void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val) {
+ int ret;
+ unsigned char *p;
+
+ /* Avoid div by zero on corrupt ziplist */
+ assert(total_count);
+
+ /* Generate even numbers, because ziplist saved K-V pair */
+ int r = (rand() % total_count) * 2;
+ p = ziplistIndex(zl, r);
+ ret = ziplistGet(p, &key->sval, &key->slen, &key->lval);
+ assert(ret != 0);
+
+ if (!val)
+ return;
+ p = ziplistNext(zl, p);
+ ret = ziplistGet(p, &val->sval, &val->slen, &val->lval);
+ assert(ret != 0);
+}
+
+/* int compare for qsort */
+int uintCompare(const void *a, const void *b) {
+ return (*(unsigned int *) a - *(unsigned int *) b);
+}
+
+/* Helper method to store a string into from val or lval into dest */
+static inline void ziplistSaveValue(unsigned char *val, unsigned int len, long long lval, ziplistEntry *dest) {
+ dest->sval = val;
+ dest->slen = len;
+ dest->lval = lval;
+}
+
+/* Randomly select count of key value pairs and store into 'keys' and
+ * 'vals' args. The order of the picked entries is random, and the selections
+ * are non-unique (repetitions are possible).
+ * The 'vals' arg can be NULL in which case we skip these. */
+void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
+ unsigned char *p, *key, *value;
+ unsigned int klen = 0, vlen = 0;
+ long long klval = 0, vlval = 0;
+
+ /* Notice: the index member must be first due to the use in uintCompare */
+ typedef struct {
+ unsigned int index;
+ unsigned int order;
+ } rand_pick;
+ rand_pick *picks = zmalloc(sizeof(rand_pick)*count);
+ unsigned int total_size = ziplistLen(zl)/2;
+
+ /* Avoid div by zero on corrupt ziplist */
+ assert(total_size);
+
+ /* create a pool of random indexes (some may be duplicate). */
+ for (unsigned int i = 0; i < count; i++) {
+ picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */
+ /* keep track of the order we picked them */
+ picks[i].order = i;
+ }
+
+ /* sort by indexes. */
+ qsort(picks, count, sizeof(rand_pick), uintCompare);
+
+ /* fetch the elements form the ziplist into a output array respecting the original order. */
+ unsigned int zipindex = picks[0].index, pickindex = 0;
+ p = ziplistIndex(zl, zipindex);
+ while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) {
+ p = ziplistNext(zl, p);
+ assert(ziplistGet(p, &value, &vlen, &vlval));
+ while (pickindex < count && zipindex == picks[pickindex].index) {
+ int storeorder = picks[pickindex].order;
+ ziplistSaveValue(key, klen, klval, &keys[storeorder]);
+ if (vals)
+ ziplistSaveValue(value, vlen, vlval, &vals[storeorder]);
+ pickindex++;
+ }
+ zipindex += 2;
+ p = ziplistNext(zl, p);
+ }
+
+ zfree(picks);
+}
+
+/* Randomly select count of key value pairs and store into 'keys' and
+ * 'vals' args. The selections are unique (no repetitions), and the order of
+ * the picked entries is NOT-random.
+ * The 'vals' arg can be NULL in which case we skip these.
+ * The return value is the number of items picked which can be lower than the
+ * requested count if the ziplist doesn't hold enough pairs. */
+unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
+ unsigned char *p, *key;
+ unsigned int klen = 0;
+ long long klval = 0;
+ unsigned int total_size = ziplistLen(zl)/2;
+ unsigned int index = 0;
+ if (count > total_size)
+ count = total_size;
+
+ /* To only iterate once, every time we try to pick a member, the probability
+ * we pick it is the quotient of the count left we want to pick and the
+ * count still we haven't visited in the dict, this way, we could make every
+ * member be equally picked.*/
+ p = ziplistIndex(zl, 0);
+ unsigned int picked = 0, remaining = count;
+ while (picked < count && p) {
+ double randomDouble = ((double)rand()) / RAND_MAX;
+ double threshold = ((double)remaining) / (total_size - index);
+ if (randomDouble <= threshold) {
+ assert(ziplistGet(p, &key, &klen, &klval));
+ ziplistSaveValue(key, klen, klval, &keys[picked]);
+ p = ziplistNext(zl, p);
+ assert(p);
+ if (vals) {
+ assert(ziplistGet(p, &key, &klen, &klval));
+ ziplistSaveValue(key, klen, klval, &vals[picked]);
+ }
+ remaining--;
+ picked++;
+ } else {
+ p = ziplistNext(zl, p);
+ assert(p);
+ }
+ p = ziplistNext(zl, p);
+ index++;
+ }
+ return picked;
+}
+
+#ifdef REDIS_TEST
+#include <sys/time.h>
+#include "adlist.h"
+#include "sds.h"
+#include "testhelp.h"
+
+#define debug(f, ...) { if (DEBUG) printf(f, __VA_ARGS__); }
+
+static unsigned char *createList(void) {
+ unsigned char *zl = ziplistNew();
+ zl = ziplistPush(zl, (unsigned char*)"foo", 3, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"quux", 4, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"hello", 5, ZIPLIST_HEAD);
+ zl = ziplistPush(zl, (unsigned char*)"1024", 4, ZIPLIST_TAIL);
+ return zl;
+}
+
+static unsigned char *createIntList(void) {
+ unsigned char *zl = ziplistNew();
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), "100");
+ zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_TAIL);
+ snprintf(buf, sizeof(buf), "128000");
+ zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_TAIL);
+ snprintf(buf, sizeof(buf), "-100");
+ zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_HEAD);
+ snprintf(buf, sizeof(buf), "4294967296");
+ zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_HEAD);
+ snprintf(buf, sizeof(buf), "non integer");
+ zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_TAIL);
+ snprintf(buf,sizeof(buf), "much much longer non integer");
+ zl = ziplistPush(zl, (unsigned char*)buf, strlen(buf), ZIPLIST_TAIL);
+ return zl;
+}
+
+static long long usec(void) {
+ struct timeval tv;
+ gettimeofday(&tv,NULL);
+ return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
+}
+
+static void stress(int pos, int num, int maxsize, int dnum) {
+ int i,j,k;
+ unsigned char *zl;
+ char posstr[2][5] = { "HEAD", "TAIL" };
+ long long start;
+ for (i = 0; i < maxsize; i+=dnum) {
+ zl = ziplistNew();
+ for (j = 0; j < i; j++) {
+ zl = ziplistPush(zl,(unsigned char*)"quux",4,ZIPLIST_TAIL);
+ }
+
+ /* Do num times a push+pop from pos */
+ start = usec();
+ for (k = 0; k < num; k++) {
+ zl = ziplistPush(zl,(unsigned char*)"quux",4,pos);
+ zl = ziplistDeleteRange(zl,0,1);
+ }
+ printf("List size: %8d, bytes: %8d, %dx push+pop (%s): %6lld usec\n",
+ i,intrev32ifbe(ZIPLIST_BYTES(zl)),num,posstr[pos],usec()-start);
+ zfree(zl);
+ }
+}
+
+static unsigned char *pop(unsigned char *zl, int where) {
+ unsigned char *p, *vstr;
+ unsigned int vlen;
+ long long vlong = 0;
+
+ p = ziplistIndex(zl,where == ZIPLIST_HEAD ? 0 : -1);
+ if (ziplistGet(p,&vstr,&vlen,&vlong)) {
+ if (where == ZIPLIST_HEAD)
+ printf("Pop head: ");
+ else
+ printf("Pop tail: ");
+
+ if (vstr) {
+ if (vlen && fwrite(vstr,vlen,1,stdout) == 0) perror("fwrite");
+ }
+ else {
+ printf("%lld", vlong);
+ }
+
+ printf("\n");
+ return ziplistDelete(zl,&p);
+ } else {
+ printf("ERROR: Could not pop\n");
+ exit(1);
+ }
+}
+
+static int randstring(char *target, unsigned int min, unsigned int max) {
+ int p = 0;
+ int len = min+rand()%(max-min+1);
+ int minval, maxval;
+ switch(rand() % 3) {
+ case 0:
+ minval = 0;
+ maxval = 255;
+ break;
+ case 1:
+ minval = 48;
+ maxval = 122;
+ break;
+ case 2:
+ minval = 48;
+ maxval = 52;
+ break;
+ default:
+ assert(NULL);
+ }
+
+ while(p < len)
+ target[p++] = minval+rand()%(maxval-minval+1);
+ return len;
+}
+
+static void verify(unsigned char *zl, zlentry *e) {
+ int len = ziplistLen(zl);
+ zlentry _e;
+
+ ZIPLIST_ENTRY_ZERO(&_e);
+
+ for (int i = 0; i < len; i++) {
+ memset(&e[i], 0, sizeof(zlentry));
+ zipEntry(ziplistIndex(zl, i), &e[i]);
+
+ memset(&_e, 0, sizeof(zlentry));
+ zipEntry(ziplistIndex(zl, -len+i), &_e);
+
+ assert(memcmp(&e[i], &_e, sizeof(zlentry)) == 0);
+ }
+}
+
+static unsigned char *insertHelper(unsigned char *zl, char ch, size_t len, unsigned char *pos) {
+ assert(len <= ZIP_BIG_PREVLEN);
+ unsigned char data[ZIP_BIG_PREVLEN] = {0};
+ memset(data, ch, len);
+ return ziplistInsert(zl, pos, data, len);
+}
+
+static int compareHelper(unsigned char *zl, char ch, size_t len, int index) {
+ assert(len <= ZIP_BIG_PREVLEN);
+ unsigned char data[ZIP_BIG_PREVLEN] = {0};
+ memset(data, ch, len);
+ unsigned char *p = ziplistIndex(zl, index);
+ assert(p != NULL);
+ return ziplistCompare(p, data, len);
+}
+
+static size_t strEntryBytesSmall(size_t slen) {
+ return slen + zipStorePrevEntryLength(NULL, 0) + zipStoreEntryEncoding(NULL, 0, slen);
+}
+
+static size_t strEntryBytesLarge(size_t slen) {
+ return slen + zipStorePrevEntryLength(NULL, ZIP_BIG_PREVLEN) + zipStoreEntryEncoding(NULL, 0, slen);
+}
+
+/* ./redis-server test ziplist <randomseed> */
+int ziplistTest(int argc, char **argv, int flags) {
+ int accurate = (flags & REDIS_TEST_ACCURATE);
+ unsigned char *zl, *p;
+ unsigned char *entry;
+ unsigned int elen;
+ long long value;
+ int iteration;
+
+ /* If an argument is given, use it as the random seed. */
+ if (argc >= 4)
+ srand(atoi(argv[3]));
+
+ zl = createIntList();
+ ziplistRepr(zl);
+
+ zfree(zl);
+
+ zl = createList();
+ ziplistRepr(zl);
+
+ zl = pop(zl,ZIPLIST_TAIL);
+ ziplistRepr(zl);
+
+ zl = pop(zl,ZIPLIST_HEAD);
+ ziplistRepr(zl);
+
+ zl = pop(zl,ZIPLIST_TAIL);
+ ziplistRepr(zl);
+
+ zl = pop(zl,ZIPLIST_TAIL);
+ ziplistRepr(zl);
+
+ zfree(zl);
+
+ printf("Get element at index 3:\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, 3);
+ if (!ziplistGet(p, &entry, &elen, &value)) {
+ printf("ERROR: Could not access index 3\n");
+ return 1;
+ }
+ if (entry) {
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
+ printf("\n");
+ } else {
+ printf("%lld\n", value);
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Get element at index 4 (out of range):\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, 4);
+ if (p == NULL) {
+ printf("No entry\n");
+ } else {
+ printf("ERROR: Out of range index should return NULL, returned offset: %ld\n", (long)(p-zl));
+ return 1;
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Get element at index -1 (last element):\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, -1);
+ if (!ziplistGet(p, &entry, &elen, &value)) {
+ printf("ERROR: Could not access index -1\n");
+ return 1;
+ }
+ if (entry) {
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
+ printf("\n");
+ } else {
+ printf("%lld\n", value);
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Get element at index -4 (first element):\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, -4);
+ if (!ziplistGet(p, &entry, &elen, &value)) {
+ printf("ERROR: Could not access index -4\n");
+ return 1;
+ }
+ if (entry) {
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
+ printf("\n");
+ } else {
+ printf("%lld\n", value);
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Get element at index -5 (reverse out of range):\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, -5);
+ if (p == NULL) {
+ printf("No entry\n");
+ } else {
+ printf("ERROR: Out of range index should return NULL, returned offset: %ld\n", (long)(p-zl));
+ return 1;
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Iterate list from 0 to end:\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, 0);
+ while (ziplistGet(p, &entry, &elen, &value)) {
+ printf("Entry: ");
+ if (entry) {
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
+ } else {
+ printf("%lld", value);
+ }
+ p = ziplistNext(zl,p);
+ printf("\n");
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Iterate list from 1 to end:\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, 1);
+ while (ziplistGet(p, &entry, &elen, &value)) {
+ printf("Entry: ");
+ if (entry) {
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
+ } else {
+ printf("%lld", value);
+ }
+ p = ziplistNext(zl,p);
+ printf("\n");
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Iterate list from 2 to end:\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, 2);
+ while (ziplistGet(p, &entry, &elen, &value)) {
+ printf("Entry: ");
+ if (entry) {
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
+ } else {
+ printf("%lld", value);
+ }
+ p = ziplistNext(zl,p);
+ printf("\n");
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Iterate starting out of range:\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, 4);
+ if (!ziplistGet(p, &entry, &elen, &value)) {
+ printf("No entry\n");
+ } else {
+ printf("ERROR\n");
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Iterate from back to front:\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, -1);
+ while (ziplistGet(p, &entry, &elen, &value)) {
+ printf("Entry: ");
+ if (entry) {
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
+ } else {
+ printf("%lld", value);
+ }
+ p = ziplistPrev(zl,p);
+ printf("\n");
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Iterate from back to front, deleting all items:\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl, -1);
+ while (ziplistGet(p, &entry, &elen, &value)) {
+ printf("Entry: ");
+ if (entry) {
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
+ } else {
+ printf("%lld", value);
+ }
+ zl = ziplistDelete(zl,&p);
+ p = ziplistPrev(zl,p);
+ printf("\n");
+ }
+ printf("\n");
+ zfree(zl);
+ }
+
+ printf("Delete inclusive range 0,0:\n");
+ {
+ zl = createList();
+ zl = ziplistDeleteRange(zl, 0, 1);
+ ziplistRepr(zl);
+ zfree(zl);
+ }
+
+ printf("Delete inclusive range 0,1:\n");
+ {
+ zl = createList();
+ zl = ziplistDeleteRange(zl, 0, 2);
+ ziplistRepr(zl);
+ zfree(zl);
+ }
+
+ printf("Delete inclusive range 1,2:\n");
+ {
+ zl = createList();
+ zl = ziplistDeleteRange(zl, 1, 2);
+ ziplistRepr(zl);
+ zfree(zl);
+ }
+
+ printf("Delete with start index out of range:\n");
+ {
+ zl = createList();
+ zl = ziplistDeleteRange(zl, 5, 1);
+ ziplistRepr(zl);
+ zfree(zl);
+ }
+
+ printf("Delete with num overflow:\n");
+ {
+ zl = createList();
+ zl = ziplistDeleteRange(zl, 1, 5);
+ ziplistRepr(zl);
+ zfree(zl);
+ }
+
+ printf("Delete foo while iterating:\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl,0);
+ while (ziplistGet(p,&entry,&elen,&value)) {
+ if (entry && strncmp("foo",(char*)entry,elen) == 0) {
+ printf("Delete foo\n");
+ zl = ziplistDelete(zl,&p);
+ } else {
+ printf("Entry: ");
+ if (entry) {
+ if (elen && fwrite(entry,elen,1,stdout) == 0)
+ perror("fwrite");
+ } else {
+ printf("%lld",value);
+ }
+ p = ziplistNext(zl,p);
+ printf("\n");
+ }
+ }
+ printf("\n");
+ ziplistRepr(zl);
+ zfree(zl);
+ }
+
+ printf("Replace with same size:\n");
+ {
+ zl = createList(); /* "hello", "foo", "quux", "1024" */
+ unsigned char *orig_zl = zl;
+ p = ziplistIndex(zl, 0);
+ zl = ziplistReplace(zl, p, (unsigned char*)"zoink", 5);
+ p = ziplistIndex(zl, 3);
+ zl = ziplistReplace(zl, p, (unsigned char*)"yy", 2);
+ p = ziplistIndex(zl, 1);
+ zl = ziplistReplace(zl, p, (unsigned char*)"65536", 5);
+ p = ziplistIndex(zl, 0);
+ assert(!memcmp((char*)p,
+ "\x00\x05zoink"
+ "\x07\xf0\x00\x00\x01" /* 65536 as int24 */
+ "\x05\x04quux" "\x06\x02yy" "\xff",
+ 23));
+ assert(zl == orig_zl); /* no reallocations have happened */
+ zfree(zl);
+ printf("SUCCESS\n\n");
+ }
+
+ printf("Replace with different size:\n");
+ {
+ zl = createList(); /* "hello", "foo", "quux", "1024" */
+ p = ziplistIndex(zl, 1);
+ zl = ziplistReplace(zl, p, (unsigned char*)"squirrel", 8);
+ p = ziplistIndex(zl, 0);
+ assert(!strncmp((char*)p,
+ "\x00\x05hello" "\x07\x08squirrel" "\x0a\x04quux"
+ "\x06\xc0\x00\x04" "\xff",
+ 28));
+ zfree(zl);
+ printf("SUCCESS\n\n");
+ }
+
+ printf("Regression test for >255 byte strings:\n");
+ {
+ char v1[257] = {0}, v2[257] = {0};
+ memset(v1,'x',256);
+ memset(v2,'y',256);
+ zl = ziplistNew();
+ zl = ziplistPush(zl,(unsigned char*)v1,strlen(v1),ZIPLIST_TAIL);
+ zl = ziplistPush(zl,(unsigned char*)v2,strlen(v2),ZIPLIST_TAIL);
+
+ /* Pop values again and compare their value. */
+ p = ziplistIndex(zl,0);
+ assert(ziplistGet(p,&entry,&elen,&value));
+ assert(strncmp(v1,(char*)entry,elen) == 0);
+ p = ziplistIndex(zl,1);
+ assert(ziplistGet(p,&entry,&elen,&value));
+ assert(strncmp(v2,(char*)entry,elen) == 0);
+ printf("SUCCESS\n\n");
+ zfree(zl);
+ }
+
+ printf("Regression test deleting next to last entries:\n");
+ {
+ char v[3][257] = {{0}};
+ zlentry e[3] = {{.prevrawlensize = 0, .prevrawlen = 0, .lensize = 0,
+ .len = 0, .headersize = 0, .encoding = 0, .p = NULL}};
+ size_t i;
+
+ for (i = 0; i < (sizeof(v)/sizeof(v[0])); i++) {
+ memset(v[i], 'a' + i, sizeof(v[0]));
+ }
+
+ v[0][256] = '\0';
+ v[1][ 1] = '\0';
+ v[2][256] = '\0';
+
+ zl = ziplistNew();
+ for (i = 0; i < (sizeof(v)/sizeof(v[0])); i++) {
+ zl = ziplistPush(zl, (unsigned char *) v[i], strlen(v[i]), ZIPLIST_TAIL);
+ }
+
+ verify(zl, e);
+
+ assert(e[0].prevrawlensize == 1);
+ assert(e[1].prevrawlensize == 5);
+ assert(e[2].prevrawlensize == 1);
+
+ /* Deleting entry 1 will increase `prevrawlensize` for entry 2 */
+ unsigned char *p = e[1].p;
+ zl = ziplistDelete(zl, &p);
+
+ verify(zl, e);
+
+ assert(e[0].prevrawlensize == 1);
+ assert(e[1].prevrawlensize == 5);
+
+ printf("SUCCESS\n\n");
+ zfree(zl);
+ }
+
+ printf("Create long list and check indices:\n");
+ {
+ unsigned long long start = usec();
+ zl = ziplistNew();
+ char buf[32];
+ int i,len;
+ for (i = 0; i < 1000; i++) {
+ len = snprintf(buf,sizeof(buf),"%d",i);
+ zl = ziplistPush(zl,(unsigned char*)buf,len,ZIPLIST_TAIL);
+ }
+ for (i = 0; i < 1000; i++) {
+ p = ziplistIndex(zl,i);
+ assert(ziplistGet(p,NULL,NULL,&value));
+ assert(i == value);
+
+ p = ziplistIndex(zl,-i-1);
+ assert(ziplistGet(p,NULL,NULL,&value));
+ assert(999-i == value);
+ }
+ printf("SUCCESS. usec=%lld\n\n", usec()-start);
+ zfree(zl);
+ }
+
+ printf("Compare strings with ziplist entries:\n");
+ {
+ zl = createList();
+ p = ziplistIndex(zl,0);
+ if (!ziplistCompare(p,(unsigned char*)"hello",5)) {
+ printf("ERROR: not \"hello\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"hella",5)) {
+ printf("ERROR: \"hella\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl,3);
+ if (!ziplistCompare(p,(unsigned char*)"1024",4)) {
+ printf("ERROR: not \"1024\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"1025",4)) {
+ printf("ERROR: \"1025\"\n");
+ return 1;
+ }
+ printf("SUCCESS\n\n");
+ zfree(zl);
+ }
+
+ printf("Merge test:\n");
+ {
+ /* create list gives us: [hello, foo, quux, 1024] */
+ zl = createList();
+ unsigned char *zl2 = createList();
+
+ unsigned char *zl3 = ziplistNew();
+ unsigned char *zl4 = ziplistNew();
+
+ if (ziplistMerge(&zl4, &zl4)) {
+ printf("ERROR: Allowed merging of one ziplist into itself.\n");
+ return 1;
+ }
+
+ /* Merge two empty ziplists, get empty result back. */
+ zl4 = ziplistMerge(&zl3, &zl4);
+ ziplistRepr(zl4);
+ if (ziplistLen(zl4)) {
+ printf("ERROR: Merging two empty ziplists created entries.\n");
+ return 1;
+ }
+ zfree(zl4);
+
+ zl2 = ziplistMerge(&zl, &zl2);
+ /* merge gives us: [hello, foo, quux, 1024, hello, foo, quux, 1024] */
+ ziplistRepr(zl2);
+
+ if (ziplistLen(zl2) != 8) {
+ printf("ERROR: Merged length not 8, but: %u\n", ziplistLen(zl2));
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,0);
+ if (!ziplistCompare(p,(unsigned char*)"hello",5)) {
+ printf("ERROR: not \"hello\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"hella",5)) {
+ printf("ERROR: \"hella\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,3);
+ if (!ziplistCompare(p,(unsigned char*)"1024",4)) {
+ printf("ERROR: not \"1024\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"1025",4)) {
+ printf("ERROR: \"1025\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,4);
+ if (!ziplistCompare(p,(unsigned char*)"hello",5)) {
+ printf("ERROR: not \"hello\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"hella",5)) {
+ printf("ERROR: \"hella\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,7);
+ if (!ziplistCompare(p,(unsigned char*)"1024",4)) {
+ printf("ERROR: not \"1024\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"1025",4)) {
+ printf("ERROR: \"1025\"\n");
+ return 1;
+ }
+ printf("SUCCESS\n\n");
+ zfree(zl);
+ }
+
+ printf("Stress with random payloads of different encoding:\n");
+ {
+ unsigned long long start = usec();
+ int i,j,len,where;
+ unsigned char *p;
+ char buf[1024];
+ int buflen;
+ list *ref;
+ listNode *refnode;
+
+ /* Hold temp vars from ziplist */
+ unsigned char *sstr;
+ unsigned int slen;
+ long long sval;
+
+ iteration = accurate ? 20000 : 20;
+ for (i = 0; i < iteration; i++) {
+ zl = ziplistNew();
+ ref = listCreate();
+ listSetFreeMethod(ref,(void (*)(void*))sdsfree);
+ len = rand() % 256;
+
+ /* Create lists */
+ for (j = 0; j < len; j++) {
+ where = (rand() & 1) ? ZIPLIST_HEAD : ZIPLIST_TAIL;
+ if (rand() % 2) {
+ buflen = randstring(buf,1,sizeof(buf)-1);
+ } else {
+ switch(rand() % 3) {
+ case 0:
+ buflen = snprintf(buf,sizeof(buf),"%lld",(0LL + rand()) >> 20);
+ break;
+ case 1:
+ buflen = snprintf(buf,sizeof(buf),"%lld",(0LL + rand()));
+ break;
+ case 2:
+ buflen = snprintf(buf,sizeof(buf),"%lld",(0LL + rand()) << 20);
+ break;
+ default:
+ assert(NULL);
+ }
+ }
+
+ /* Add to ziplist */
+ zl = ziplistPush(zl, (unsigned char*)buf, buflen, where);
+
+ /* Add to reference list */
+ if (where == ZIPLIST_HEAD) {
+ listAddNodeHead(ref,sdsnewlen(buf, buflen));
+ } else if (where == ZIPLIST_TAIL) {
+ listAddNodeTail(ref,sdsnewlen(buf, buflen));
+ } else {
+ assert(NULL);
+ }
+ }
+
+ assert(listLength(ref) == ziplistLen(zl));
+ for (j = 0; j < len; j++) {
+ /* Naive way to get elements, but similar to the stresser
+ * executed from the Tcl test suite. */
+ p = ziplistIndex(zl,j);
+ refnode = listIndex(ref,j);
+
+ assert(ziplistGet(p,&sstr,&slen,&sval));
+ if (sstr == NULL) {
+ buflen = snprintf(buf,sizeof(buf),"%lld",sval);
+ } else {
+ buflen = slen;
+ memcpy(buf,sstr,buflen);
+ buf[buflen] = '\0';
+ }
+ assert(memcmp(buf,listNodeValue(refnode),buflen) == 0);
+ }
+ zfree(zl);
+ listRelease(ref);
+ }
+ printf("Done. usec=%lld\n\n", usec()-start);
+ }
+
+ printf("Stress with variable ziplist size:\n");
+ {
+ unsigned long long start = usec();
+ int maxsize = accurate ? 16384 : 16;
+ stress(ZIPLIST_HEAD,100000,maxsize,256);
+ stress(ZIPLIST_TAIL,100000,maxsize,256);
+ printf("Done. usec=%lld\n\n", usec()-start);
+ }
+
+ /* Benchmarks */
+ {
+ zl = ziplistNew();
+ iteration = accurate ? 100000 : 100;
+ for (int i=0; i<iteration; i++) {
+ char buf[4096] = "asdf";
+ zl = ziplistPush(zl, (unsigned char*)buf, 4, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)buf, 40, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)buf, 400, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)buf, 4000, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"1", 1, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"10", 2, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"100", 3, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"1000", 4, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"10000", 5, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"100000", 6, ZIPLIST_TAIL);
+ }
+
+ printf("Benchmark ziplistFind:\n");
+ {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ unsigned char *fptr = ziplistIndex(zl, ZIPLIST_HEAD);
+ fptr = ziplistFind(zl, fptr, (unsigned char*)"nothing", 7, 1);
+ }
+ printf("%lld\n", usec()-start);
+ }
+
+ printf("Benchmark ziplistIndex:\n");
+ {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ ziplistIndex(zl, 99999);
+ }
+ printf("%lld\n", usec()-start);
+ }
+
+ printf("Benchmark ziplistValidateIntegrity:\n");
+ {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ ziplistValidateIntegrity(zl, ziplistBlobLen(zl), 1, NULL, NULL);
+ }
+ printf("%lld\n", usec()-start);
+ }
+
+ printf("Benchmark ziplistCompare with string\n");
+ {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ unsigned char *eptr = ziplistIndex(zl,0);
+ while (eptr != NULL) {
+ ziplistCompare(eptr,(unsigned char*)"nothing",7);
+ eptr = ziplistNext(zl,eptr);
+ }
+ }
+ printf("Done. usec=%lld\n", usec()-start);
+ }
+
+ printf("Benchmark ziplistCompare with number\n");
+ {
+ unsigned long long start = usec();
+ for (int i = 0; i < 2000; i++) {
+ unsigned char *eptr = ziplistIndex(zl,0);
+ while (eptr != NULL) {
+ ziplistCompare(eptr,(unsigned char*)"99999",5);
+ eptr = ziplistNext(zl,eptr);
+ }
+ }
+ printf("Done. usec=%lld\n", usec()-start);
+ }
+
+ zfree(zl);
+ }
+
+ printf("Stress __ziplistCascadeUpdate:\n");
+ {
+ char data[ZIP_BIG_PREVLEN];
+ zl = ziplistNew();
+ iteration = accurate ? 100000 : 100;
+ for (int i = 0; i < iteration; i++) {
+ zl = ziplistPush(zl, (unsigned char*)data, ZIP_BIG_PREVLEN-4, ZIPLIST_TAIL);
+ }
+ unsigned long long start = usec();
+ zl = ziplistPush(zl, (unsigned char*)data, ZIP_BIG_PREVLEN-3, ZIPLIST_HEAD);
+ printf("Done. usec=%lld\n\n", usec()-start);
+ zfree(zl);
+ }
+
+ printf("Edge cases of __ziplistCascadeUpdate:\n");
+ {
+ /* Inserting a entry with data length greater than ZIP_BIG_PREVLEN-4
+ * will leads to cascade update. */
+ size_t s1 = ZIP_BIG_PREVLEN-4, s2 = ZIP_BIG_PREVLEN-3;
+ zl = ziplistNew();
+
+ zlentry e[4] = {{.prevrawlensize = 0, .prevrawlen = 0, .lensize = 0,
+ .len = 0, .headersize = 0, .encoding = 0, .p = NULL}};
+
+ zl = insertHelper(zl, 'a', s1, ZIPLIST_ENTRY_HEAD(zl));
+ verify(zl, e);
+
+ assert(e[0].prevrawlensize == 1 && e[0].prevrawlen == 0);
+ assert(compareHelper(zl, 'a', s1, 0));
+ ziplistRepr(zl);
+
+ /* No expand. */
+ zl = insertHelper(zl, 'b', s1, ZIPLIST_ENTRY_HEAD(zl));
+ verify(zl, e);
+
+ assert(e[0].prevrawlensize == 1 && e[0].prevrawlen == 0);
+ assert(compareHelper(zl, 'b', s1, 0));
+
+ assert(e[1].prevrawlensize == 1 && e[1].prevrawlen == strEntryBytesSmall(s1));
+ assert(compareHelper(zl, 'a', s1, 1));
+
+ ziplistRepr(zl);
+
+ /* Expand(tail included). */
+ zl = insertHelper(zl, 'c', s2, ZIPLIST_ENTRY_HEAD(zl));
+ verify(zl, e);
+
+ assert(e[0].prevrawlensize == 1 && e[0].prevrawlen == 0);
+ assert(compareHelper(zl, 'c', s2, 0));
+
+ assert(e[1].prevrawlensize == 5 && e[1].prevrawlen == strEntryBytesSmall(s2));
+ assert(compareHelper(zl, 'b', s1, 1));
+
+ assert(e[2].prevrawlensize == 5 && e[2].prevrawlen == strEntryBytesLarge(s1));
+ assert(compareHelper(zl, 'a', s1, 2));
+
+ ziplistRepr(zl);
+
+ /* Expand(only previous head entry). */
+ zl = insertHelper(zl, 'd', s2, ZIPLIST_ENTRY_HEAD(zl));
+ verify(zl, e);
+
+ assert(e[0].prevrawlensize == 1 && e[0].prevrawlen == 0);
+ assert(compareHelper(zl, 'd', s2, 0));
+
+ assert(e[1].prevrawlensize == 5 && e[1].prevrawlen == strEntryBytesSmall(s2));
+ assert(compareHelper(zl, 'c', s2, 1));
+
+ assert(e[2].prevrawlensize == 5 && e[2].prevrawlen == strEntryBytesLarge(s2));
+ assert(compareHelper(zl, 'b', s1, 2));
+
+ assert(e[3].prevrawlensize == 5 && e[3].prevrawlen == strEntryBytesLarge(s1));
+ assert(compareHelper(zl, 'a', s1, 3));
+
+ ziplistRepr(zl);
+
+ /* Delete from mid. */
+ unsigned char *p = ziplistIndex(zl, 2);
+ zl = ziplistDelete(zl, &p);
+ verify(zl, e);
+
+ assert(e[0].prevrawlensize == 1 && e[0].prevrawlen == 0);
+ assert(compareHelper(zl, 'd', s2, 0));
+
+ assert(e[1].prevrawlensize == 5 && e[1].prevrawlen == strEntryBytesSmall(s2));
+ assert(compareHelper(zl, 'c', s2, 1));
+
+ assert(e[2].prevrawlensize == 5 && e[2].prevrawlen == strEntryBytesLarge(s2));
+ assert(compareHelper(zl, 'a', s1, 2));
+
+ ziplistRepr(zl);
+
+ zfree(zl);
+ }
+
+ printf("__ziplistInsert nextdiff == -4 && reqlen < 4 (issue #7170):\n");
+ {
+ zl = ziplistNew();
+
+ /* We set some values to almost reach the critical point - 254 */
+ char A_252[253] = {0}, A_250[251] = {0};
+ memset(A_252, 'A', 252);
+ memset(A_250, 'A', 250);
+
+ /* After the rpush, the list look like: [one two A_252 A_250 three 10] */
+ zl = ziplistPush(zl, (unsigned char*)"one", 3, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"two", 3, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)A_252, strlen(A_252), ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)A_250, strlen(A_250), ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"three", 5, ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)"10", 2, ZIPLIST_TAIL);
+ ziplistRepr(zl);
+
+ p = ziplistIndex(zl, 2);
+ if (!ziplistCompare(p, (unsigned char*)A_252, strlen(A_252))) {
+ printf("ERROR: not \"A_252\"\n");
+ return 1;
+ }
+
+ /* When we remove A_252, the list became: [one two A_250 three 10]
+ * A_250's prev node became node two, because node two quite small
+ * So A_250's prevlenSize shrink to 1, A_250's total size became 253(1+2+250)
+ * The prev node of node three is still node A_250.
+ * We will not shrink the node three's prevlenSize, keep it at 5 bytes */
+ zl = ziplistDelete(zl, &p);
+ ziplistRepr(zl);
+
+ p = ziplistIndex(zl, 3);
+ if (!ziplistCompare(p, (unsigned char*)"three", 5)) {
+ printf("ERROR: not \"three\"\n");
+ return 1;
+ }
+
+ /* We want to insert a node after A_250, the list became: [one two A_250 10 three 10]
+ * Because the new node is quite small, node three prevlenSize will shrink to 1 */
+ zl = ziplistInsert(zl, p, (unsigned char*)"10", 2);
+ ziplistRepr(zl);
+
+ /* Last element should equal 10 */
+ p = ziplistIndex(zl, -1);
+ if (!ziplistCompare(p, (unsigned char*)"10", 2)) {
+ printf("ERROR: not \"10\"\n");
+ return 1;
+ }
+
+ zfree(zl);
+ }
+
+ printf("ALL TESTS PASSED!\n");
+ return 0;
+}
+#endif
diff --git a/src/ziplist.h b/src/ziplist.h
new file mode 100644
index 0000000..f210ba6
--- /dev/null
+++ b/src/ziplist.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ZIPLIST_H
+#define _ZIPLIST_H
+
+#define ZIPLIST_HEAD 0
+#define ZIPLIST_TAIL 1
+
+/* Each entry in the ziplist is either a string or an integer. */
+typedef struct {
+ /* When string is used, it is provided with the length (slen). */
+ unsigned char *sval;
+ unsigned int slen;
+ /* When integer is used, 'sval' is NULL, and lval holds the value. */
+ long long lval;
+} ziplistEntry;
+
+unsigned char *ziplistNew(void);
+unsigned char *ziplistMerge(unsigned char **first, unsigned char **second);
+unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);
+unsigned char *ziplistIndex(unsigned char *zl, int index);
+unsigned char *ziplistNext(unsigned char *zl, unsigned char *p);
+unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);
+unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval);
+unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);
+unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p);
+unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num);
+unsigned char *ziplistReplace(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);
+unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);
+unsigned char *ziplistFind(unsigned char *zl, unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip);
+unsigned int ziplistLen(unsigned char *zl);
+size_t ziplistBlobLen(unsigned char *zl);
+void ziplistRepr(unsigned char *zl);
+typedef int (*ziplistValidateEntryCB)(unsigned char* p, unsigned int head_count, void* userdata);
+int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
+ ziplistValidateEntryCB entry_cb, void *cb_userdata);
+void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val);
+void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);
+unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);
+int ziplistSafeToAdd(unsigned char* zl, size_t add);
+
+#ifdef REDIS_TEST
+int ziplistTest(int argc, char *argv[], int flags);
+#endif
+
+#endif /* _ZIPLIST_H */
diff --git a/src/zipmap.c b/src/zipmap.c
new file mode 100644
index 0000000..4e984ba
--- /dev/null
+++ b/src/zipmap.c
@@ -0,0 +1,542 @@
+/* String -> String Map data structure optimized for size.
+ * This file implements a data structure mapping strings to other strings
+ * implementing an O(n) lookup data structure designed to be very memory
+ * efficient.
+ *
+ * The Redis Hash type uses this data structure for hashes composed of a small
+ * number of elements, to switch to a hash table once a given number of
+ * elements is reached.
+ *
+ * Given that many times Redis Hashes are used to represent objects composed
+ * of few fields, this is a very big win in terms of used memory.
+ *
+ * --------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Memory layout of a zipmap, for the map "foo" => "bar", "hello" => "world":
+ *
+ * <zmlen><len>"foo"<len><free>"bar"<len>"hello"<len><free>"world"
+ *
+ * <zmlen> is 1 byte length that holds the current size of the zipmap.
+ * When the zipmap length is greater than or equal to 254, this value
+ * is not used and the zipmap needs to be traversed to find out the length.
+ *
+ * <len> is the length of the following string (key or value).
+ * <len> lengths are encoded in a single value or in a 5 bytes value.
+ * If the first byte value (as an unsigned 8 bit value) is between 0 and
+ * 253, it's a single-byte length. If it is 254 then a four bytes unsigned
+ * integer follows (in the host byte ordering). A value of 255 is used to
+ * signal the end of the hash.
+ *
+ * <free> is the number of free unused bytes after the string, resulting
+ * from modification of values associated to a key. For instance if "foo"
+ * is set to "bar", and later "foo" will be set to "hi", it will have a
+ * free byte to use if the value will enlarge again later, or even in
+ * order to add a key/value pair if it fits.
+ *
+ * <free> is always an unsigned 8 bit number, because if after an
+ * update operation there are more than a few free bytes, the zipmap will be
+ * reallocated to make sure it is as small as possible.
+ *
+ * The most compact representation of the above two elements hash is actually:
+ *
+ * "\x02\x03foo\x03\x00bar\x05hello\x05\x00world\xff"
+ *
+ * Note that because keys and values are prefixed length "objects",
+ * the lookup will take O(N) where N is the number of elements
+ * in the zipmap and *not* the number of bytes needed to represent the zipmap.
+ * This lowers the constant times considerably.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "zmalloc.h"
+#include "endianconv.h"
+
+#define ZIPMAP_BIGLEN 254
+#define ZIPMAP_END 255
+
+/* The following defines the max value for the <free> field described in the
+ * comments above, that is, the max number of trailing bytes in a value. */
+#define ZIPMAP_VALUE_MAX_FREE 4
+
+/* The following macro returns the number of bytes needed to encode the length
+ * for the integer value _l, that is, 1 byte for lengths < ZIPMAP_BIGLEN and
+ * 5 bytes for all the other lengths. */
+#define ZIPMAP_LEN_BYTES(_l) (((_l) < ZIPMAP_BIGLEN) ? 1 : sizeof(unsigned int)+1)
+
+/* Create a new empty zipmap. */
+unsigned char *zipmapNew(void) {
+ unsigned char *zm = zmalloc(2);
+
+ zm[0] = 0; /* Length */
+ zm[1] = ZIPMAP_END;
+ return zm;
+}
+
+/* Decode the encoded length pointed by 'p' */
+static unsigned int zipmapDecodeLength(unsigned char *p) {
+ unsigned int len = *p;
+
+ if (len < ZIPMAP_BIGLEN) return len;
+ memcpy(&len,p+1,sizeof(unsigned int));
+ memrev32ifbe(&len);
+ return len;
+}
+
+static unsigned int zipmapGetEncodedLengthSize(unsigned char *p) {
+ return (*p < ZIPMAP_BIGLEN) ? 1: 5;
+}
+
+/* Encode the length 'l' writing it in 'p'. If p is NULL it just returns
+ * the amount of bytes required to encode such a length. */
+static unsigned int zipmapEncodeLength(unsigned char *p, unsigned int len) {
+ if (p == NULL) {
+ return ZIPMAP_LEN_BYTES(len);
+ } else {
+ if (len < ZIPMAP_BIGLEN) {
+ p[0] = len;
+ return 1;
+ } else {
+ p[0] = ZIPMAP_BIGLEN;
+ memcpy(p+1,&len,sizeof(len));
+ memrev32ifbe(p+1);
+ return 1+sizeof(len);
+ }
+ }
+}
+
+/* Search for a matching key, returning a pointer to the entry inside the
+ * zipmap. Returns NULL if the key is not found.
+ *
+ * If NULL is returned, and totlen is not NULL, it is set to the entire
+ * size of the zipmap, so that the calling function will be able to
+ * reallocate the original zipmap to make room for more entries. */
+static unsigned char *zipmapLookupRaw(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned int *totlen) {
+ unsigned char *p = zm+1, *k = NULL;
+ unsigned int l,llen;
+
+ while(*p != ZIPMAP_END) {
+ unsigned char free;
+
+ /* Match or skip the key */
+ l = zipmapDecodeLength(p);
+ llen = zipmapEncodeLength(NULL,l);
+ if (key != NULL && k == NULL && l == klen && !memcmp(p+llen,key,l)) {
+ /* Only return when the user doesn't care
+ * for the total length of the zipmap. */
+ if (totlen != NULL) {
+ k = p;
+ } else {
+ return p;
+ }
+ }
+ p += llen+l;
+ /* Skip the value as well */
+ l = zipmapDecodeLength(p);
+ p += zipmapEncodeLength(NULL,l);
+ free = p[0];
+ p += l+1+free; /* +1 to skip the free byte */
+ }
+ if (totlen != NULL) *totlen = (unsigned int)(p-zm)+1;
+ return k;
+}
+
+static unsigned long zipmapRequiredLength(unsigned int klen, unsigned int vlen) {
+ unsigned int l;
+
+ l = klen+vlen+3;
+ if (klen >= ZIPMAP_BIGLEN) l += 4;
+ if (vlen >= ZIPMAP_BIGLEN) l += 4;
+ return l;
+}
+
+/* Return the total amount used by a key (encoded length + payload) */
+static unsigned int zipmapRawKeyLength(unsigned char *p) {
+ unsigned int l = zipmapDecodeLength(p);
+ return zipmapEncodeLength(NULL,l) + l;
+}
+
+/* Return the total amount used by a value
+ * (encoded length + single byte free count + payload) */
+static unsigned int zipmapRawValueLength(unsigned char *p) {
+ unsigned int l = zipmapDecodeLength(p);
+ unsigned int used;
+
+ used = zipmapEncodeLength(NULL,l);
+ used += p[used] + 1 + l;
+ return used;
+}
+
+/* If 'p' points to a key, this function returns the total amount of
+ * bytes used to store this entry (entry = key + associated value + trailing
+ * free space if any). */
+static unsigned int zipmapRawEntryLength(unsigned char *p) {
+ unsigned int l = zipmapRawKeyLength(p);
+ return l + zipmapRawValueLength(p+l);
+}
+
+static inline unsigned char *zipmapResize(unsigned char *zm, unsigned int len) {
+ zm = zrealloc(zm, len);
+ zm[len-1] = ZIPMAP_END;
+ return zm;
+}
+
+/* Set key to value, creating the key if it does not already exist.
+ * If 'update' is not NULL, *update is set to 1 if the key was
+ * already preset, otherwise to 0. */
+unsigned char *zipmapSet(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned char *val, unsigned int vlen, int *update) {
+ unsigned int zmlen, offset;
+ unsigned int freelen, reqlen = zipmapRequiredLength(klen,vlen);
+ unsigned int empty, vempty;
+ unsigned char *p;
+
+ freelen = reqlen;
+ if (update) *update = 0;
+ p = zipmapLookupRaw(zm,key,klen,&zmlen);
+ if (p == NULL) {
+ /* Key not found: enlarge */
+ zm = zipmapResize(zm, zmlen+reqlen);
+ p = zm+zmlen-1;
+ zmlen = zmlen+reqlen;
+
+ /* Increase zipmap length (this is an insert) */
+ if (zm[0] < ZIPMAP_BIGLEN) zm[0]++;
+ } else {
+ /* Key found. Is there enough space for the new value? */
+ /* Compute the total length: */
+ if (update) *update = 1;
+ freelen = zipmapRawEntryLength(p);
+ if (freelen < reqlen) {
+ /* Store the offset of this key within the current zipmap, so
+ * it can be resized. Then, move the tail backwards so this
+ * pair fits at the current position. */
+ offset = p-zm;
+ zm = zipmapResize(zm, zmlen-freelen+reqlen);
+ p = zm+offset;
+
+ /* The +1 in the number of bytes to be moved is caused by the
+ * end-of-zipmap byte. Note: the *original* zmlen is used. */
+ memmove(p+reqlen, p+freelen, zmlen-(offset+freelen+1));
+ zmlen = zmlen-freelen+reqlen;
+ freelen = reqlen;
+ }
+ }
+
+ /* We now have a suitable block where the key/value entry can
+ * be written. If there is too much free space, move the tail
+ * of the zipmap a few bytes to the front and shrink the zipmap,
+ * as we want zipmaps to be very space efficient. */
+ empty = freelen-reqlen;
+ if (empty >= ZIPMAP_VALUE_MAX_FREE) {
+ /* First, move the tail <empty> bytes to the front, then resize
+ * the zipmap to be <empty> bytes smaller. */
+ offset = p-zm;
+ memmove(p+reqlen, p+freelen, zmlen-(offset+freelen+1));
+ zmlen -= empty;
+ zm = zipmapResize(zm, zmlen);
+ p = zm+offset;
+ vempty = 0;
+ } else {
+ vempty = empty;
+ }
+
+ /* Just write the key + value and we are done. */
+ /* Key: */
+ p += zipmapEncodeLength(p,klen);
+ memcpy(p,key,klen);
+ p += klen;
+ /* Value: */
+ p += zipmapEncodeLength(p,vlen);
+ *p++ = vempty;
+ memcpy(p,val,vlen);
+ return zm;
+}
+
+/* Remove the specified key. If 'deleted' is not NULL the pointed integer is
+ * set to 0 if the key was not found, to 1 if it was found and deleted. */
+unsigned char *zipmapDel(unsigned char *zm, unsigned char *key, unsigned int klen, int *deleted) {
+ unsigned int zmlen, freelen;
+ unsigned char *p = zipmapLookupRaw(zm,key,klen,&zmlen);
+ if (p) {
+ freelen = zipmapRawEntryLength(p);
+ memmove(p, p+freelen, zmlen-((p-zm)+freelen+1));
+ zm = zipmapResize(zm, zmlen-freelen);
+
+ /* Decrease zipmap length */
+ if (zm[0] < ZIPMAP_BIGLEN) zm[0]--;
+
+ if (deleted) *deleted = 1;
+ } else {
+ if (deleted) *deleted = 0;
+ }
+ return zm;
+}
+
+/* Call before iterating through elements via zipmapNext() */
+unsigned char *zipmapRewind(unsigned char *zm) {
+ return zm+1;
+}
+
+/* This function is used to iterate through all the zipmap elements.
+ * In the first call the first argument is the pointer to the zipmap + 1.
+ * In the next calls what zipmapNext returns is used as first argument.
+ * Example:
+ *
+ * unsigned char *i = zipmapRewind(my_zipmap);
+ * while((i = zipmapNext(i,&key,&klen,&value,&vlen)) != NULL) {
+ * printf("%d bytes key at $p\n", klen, key);
+ * printf("%d bytes value at $p\n", vlen, value);
+ * }
+ */
+unsigned char *zipmapNext(unsigned char *zm, unsigned char **key, unsigned int *klen, unsigned char **value, unsigned int *vlen) {
+ if (zm[0] == ZIPMAP_END) return NULL;
+ if (key) {
+ *key = zm;
+ *klen = zipmapDecodeLength(zm);
+ *key += ZIPMAP_LEN_BYTES(*klen);
+ }
+ zm += zipmapRawKeyLength(zm);
+ if (value) {
+ *value = zm+1;
+ *vlen = zipmapDecodeLength(zm);
+ *value += ZIPMAP_LEN_BYTES(*vlen);
+ }
+ zm += zipmapRawValueLength(zm);
+ return zm;
+}
+
+/* Search a key and retrieve the pointer and len of the associated value.
+ * If the key is found the function returns 1, otherwise 0. */
+int zipmapGet(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned char **value, unsigned int *vlen) {
+ unsigned char *p;
+
+ if ((p = zipmapLookupRaw(zm,key,klen,NULL)) == NULL) return 0;
+ p += zipmapRawKeyLength(p);
+ *vlen = zipmapDecodeLength(p);
+ *value = p + ZIPMAP_LEN_BYTES(*vlen) + 1;
+ return 1;
+}
+
+/* Return 1 if the key exists, otherwise 0 is returned. */
+int zipmapExists(unsigned char *zm, unsigned char *key, unsigned int klen) {
+ return zipmapLookupRaw(zm,key,klen,NULL) != NULL;
+}
+
+/* Return the number of entries inside a zipmap */
+unsigned int zipmapLen(unsigned char *zm) {
+ unsigned int len = 0;
+ if (zm[0] < ZIPMAP_BIGLEN) {
+ len = zm[0];
+ } else {
+ unsigned char *p = zipmapRewind(zm);
+ while((p = zipmapNext(p,NULL,NULL,NULL,NULL)) != NULL) len++;
+
+ /* Re-store length if small enough */
+ if (len < ZIPMAP_BIGLEN) zm[0] = len;
+ }
+ return len;
+}
+
+/* Return the raw size in bytes of a zipmap, so that we can serialize
+ * the zipmap on disk (or everywhere is needed) just writing the returned
+ * amount of bytes of the C array starting at the zipmap pointer. */
+size_t zipmapBlobLen(unsigned char *zm) {
+ unsigned int totlen;
+ zipmapLookupRaw(zm,NULL,0,&totlen);
+ return totlen;
+}
+
+/* Validate the integrity of the data structure.
+ * when `deep` is 0, only the integrity of the header is validated.
+ * when `deep` is 1, we scan all the entries one by one. */
+int zipmapValidateIntegrity(unsigned char *zm, size_t size, int deep) {
+#define OUT_OF_RANGE(p) ( \
+ (p) < zm + 2 || \
+ (p) > zm + size - 1)
+ unsigned int l, s, e;
+
+ /* check that we can actually read the header (or ZIPMAP_END). */
+ if (size < 2)
+ return 0;
+
+ /* the last byte must be the terminator. */
+ if (zm[size-1] != ZIPMAP_END)
+ return 0;
+
+ if (!deep)
+ return 1;
+
+ unsigned int count = 0;
+ unsigned char *p = zm + 1; /* skip the count */
+ while(*p != ZIPMAP_END) {
+ /* read the field name length encoding type */
+ s = zipmapGetEncodedLengthSize(p);
+ /* make sure the entry length doesn't reach outside the edge of the zipmap */
+ if (OUT_OF_RANGE(p+s))
+ return 0;
+
+ /* read the field name length */
+ l = zipmapDecodeLength(p);
+ p += s; /* skip the encoded field size */
+ p += l; /* skip the field */
+
+ /* make sure the entry doesn't reach outside the edge of the zipmap */
+ if (OUT_OF_RANGE(p))
+ return 0;
+
+ /* read the value length encoding type */
+ s = zipmapGetEncodedLengthSize(p);
+ /* make sure the entry length doesn't reach outside the edge of the zipmap */
+ if (OUT_OF_RANGE(p+s))
+ return 0;
+
+ /* read the value length */
+ l = zipmapDecodeLength(p);
+ p += s; /* skip the encoded value size*/
+ e = *p++; /* skip the encoded free space (always encoded in one byte) */
+ p += l+e; /* skip the value and free space */
+ count++;
+
+ /* make sure the entry doesn't reach outside the edge of the zipmap */
+ if (OUT_OF_RANGE(p))
+ return 0;
+ }
+
+ /* check that the zipmap is not empty. */
+ if (count == 0) return 0;
+
+ /* check that the count in the header is correct */
+ if (zm[0] != ZIPMAP_BIGLEN && zm[0] != count)
+ return 0;
+
+ return 1;
+#undef OUT_OF_RANGE
+}
+
+#ifdef REDIS_TEST
+static void zipmapRepr(unsigned char *p) {
+ unsigned int l;
+
+ printf("{status %u}",*p++);
+ while(1) {
+ if (p[0] == ZIPMAP_END) {
+ printf("{end}");
+ break;
+ } else {
+ unsigned char e;
+
+ l = zipmapDecodeLength(p);
+ printf("{key %u}",l);
+ p += zipmapEncodeLength(NULL,l);
+ if (l != 0 && fwrite(p,l,1,stdout) == 0) perror("fwrite");
+ p += l;
+
+ l = zipmapDecodeLength(p);
+ printf("{value %u}",l);
+ p += zipmapEncodeLength(NULL,l);
+ e = *p++;
+ if (l != 0 && fwrite(p,l,1,stdout) == 0) perror("fwrite");
+ p += l+e;
+ if (e) {
+ printf("[");
+ while(e--) printf(".");
+ printf("]");
+ }
+ }
+ }
+ printf("\n");
+}
+
+#define UNUSED(x) (void)(x)
+int zipmapTest(int argc, char *argv[], int flags) {
+ unsigned char *zm;
+
+ UNUSED(argc);
+ UNUSED(argv);
+ UNUSED(flags);
+
+ zm = zipmapNew();
+
+ zm = zipmapSet(zm,(unsigned char*) "name",4, (unsigned char*) "foo",3,NULL);
+ zm = zipmapSet(zm,(unsigned char*) "surname",7, (unsigned char*) "foo",3,NULL);
+ zm = zipmapSet(zm,(unsigned char*) "age",3, (unsigned char*) "foo",3,NULL);
+ zipmapRepr(zm);
+
+ zm = zipmapSet(zm,(unsigned char*) "hello",5, (unsigned char*) "world!",6,NULL);
+ zm = zipmapSet(zm,(unsigned char*) "foo",3, (unsigned char*) "bar",3,NULL);
+ zm = zipmapSet(zm,(unsigned char*) "foo",3, (unsigned char*) "!",1,NULL);
+ zipmapRepr(zm);
+ zm = zipmapSet(zm,(unsigned char*) "foo",3, (unsigned char*) "12345",5,NULL);
+ zipmapRepr(zm);
+ zm = zipmapSet(zm,(unsigned char*) "new",3, (unsigned char*) "xx",2,NULL);
+ zm = zipmapSet(zm,(unsigned char*) "noval",5, (unsigned char*) "",0,NULL);
+ zipmapRepr(zm);
+ zm = zipmapDel(zm,(unsigned char*) "new",3,NULL);
+ zipmapRepr(zm);
+
+ printf("\nLook up large key:\n");
+ {
+ unsigned char buf[512];
+ unsigned char *value;
+ unsigned int vlen, i;
+ for (i = 0; i < 512; i++) buf[i] = 'a';
+
+ zm = zipmapSet(zm,buf,512,(unsigned char*) "long",4,NULL);
+ if (zipmapGet(zm,buf,512,&value,&vlen)) {
+ printf(" <long key> is associated to the %d bytes value: %.*s\n",
+ vlen, vlen, value);
+ }
+ }
+
+ printf("\nPerform a direct lookup:\n");
+ {
+ unsigned char *value;
+ unsigned int vlen;
+
+ if (zipmapGet(zm,(unsigned char*) "foo",3,&value,&vlen)) {
+ printf(" foo is associated to the %d bytes value: %.*s\n",
+ vlen, vlen, value);
+ }
+ }
+ printf("\nIterate through elements:\n");
+ {
+ unsigned char *i = zipmapRewind(zm);
+ unsigned char *key, *value;
+ unsigned int klen, vlen;
+
+ while((i = zipmapNext(i,&key,&klen,&value,&vlen)) != NULL) {
+ printf(" %d:%.*s => %d:%.*s\n", klen, klen, key, vlen, vlen, value);
+ }
+ }
+ zfree(zm);
+ return 0;
+}
+#endif
diff --git a/src/zipmap.h b/src/zipmap.h
new file mode 100644
index 0000000..482c96d
--- /dev/null
+++ b/src/zipmap.h
@@ -0,0 +1,54 @@
+/* String -> String Map data structure optimized for size.
+ *
+ * See zipmap.c for more info.
+ *
+ * --------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ZIPMAP_H
+#define _ZIPMAP_H
+
+unsigned char *zipmapNew(void);
+unsigned char *zipmapSet(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned char *val, unsigned int vlen, int *update);
+unsigned char *zipmapDel(unsigned char *zm, unsigned char *key, unsigned int klen, int *deleted);
+unsigned char *zipmapRewind(unsigned char *zm);
+unsigned char *zipmapNext(unsigned char *zm, unsigned char **key, unsigned int *klen, unsigned char **value, unsigned int *vlen);
+int zipmapGet(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned char **value, unsigned int *vlen);
+int zipmapExists(unsigned char *zm, unsigned char *key, unsigned int klen);
+unsigned int zipmapLen(unsigned char *zm);
+size_t zipmapBlobLen(unsigned char *zm);
+void zipmapRepr(unsigned char *p);
+int zipmapValidateIntegrity(unsigned char *zm, size_t size, int deep);
+
+#ifdef REDIS_TEST
+int zipmapTest(int argc, char *argv[], int flags);
+#endif
+
+#endif
diff --git a/src/zmalloc.c b/src/zmalloc.c
new file mode 100644
index 0000000..bbfa386
--- /dev/null
+++ b/src/zmalloc.c
@@ -0,0 +1,851 @@
+/* zmalloc - total amount of allocated memory aware version of malloc()
+ *
+ * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fmacros.h"
+#include "config.h"
+#include "solarisfixes.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <assert.h>
+
+#ifdef __linux__
+#include <sys/mman.h>
+#endif
+
+/* This function provide us access to the original libc free(). This is useful
+ * for instance to free results obtained by backtrace_symbols(). We need
+ * to define this function before including zmalloc.h that may shadow the
+ * free implementation if we use jemalloc or another non standard allocator. */
+void zlibc_free(void *ptr) {
+ free(ptr);
+}
+
+#include <string.h>
+#include <pthread.h>
+#include "zmalloc.h"
+#include "atomicvar.h"
+
+#define UNUSED(x) ((void)(x))
+
+#ifdef HAVE_MALLOC_SIZE
+#define PREFIX_SIZE (0)
+#else
+/* Use at least 8 bits alignment on all systems. */
+#if SIZE_MAX < 0xffffffffffffffffull
+#define PREFIX_SIZE 8
+#else
+#define PREFIX_SIZE (sizeof(size_t))
+#endif
+#endif
+
+/* When using the libc allocator, use a minimum allocation size to match the
+ * jemalloc behavior that doesn't return NULL in this case.
+ */
+#define MALLOC_MIN_SIZE(x) ((x) > 0 ? (x) : sizeof(long))
+
+/* Explicitly override malloc/free etc when using tcmalloc. */
+#if defined(USE_TCMALLOC)
+#define malloc(size) tc_malloc(size)
+#define calloc(count,size) tc_calloc(count,size)
+#define realloc(ptr,size) tc_realloc(ptr,size)
+#define free(ptr) tc_free(ptr)
+#elif defined(USE_JEMALLOC)
+#define malloc(size) je_malloc(size)
+#define calloc(count,size) je_calloc(count,size)
+#define realloc(ptr,size) je_realloc(ptr,size)
+#define free(ptr) je_free(ptr)
+#define mallocx(size,flags) je_mallocx(size,flags)
+#define dallocx(ptr,flags) je_dallocx(ptr,flags)
+#endif
+
+#define update_zmalloc_stat_alloc(__n) atomicIncr(used_memory,(__n))
+#define update_zmalloc_stat_free(__n) atomicDecr(used_memory,(__n))
+
+static redisAtomic size_t used_memory = 0;
+
+static void zmalloc_default_oom(size_t size) {
+ fprintf(stderr, "zmalloc: Out of memory trying to allocate %zu bytes\n",
+ size);
+ fflush(stderr);
+ abort();
+}
+
+static void (*zmalloc_oom_handler)(size_t) = zmalloc_default_oom;
+
+#ifdef HAVE_MALLOC_SIZE
+void *extend_to_usable(void *ptr, size_t size) {
+ UNUSED(size);
+ return ptr;
+}
+#endif
+
+/* Try allocating memory, and return NULL if failed.
+ * '*usable' is set to the usable size if non NULL. */
+static inline void *ztrymalloc_usable_internal(size_t size, size_t *usable) {
+ /* Possible overflow, return NULL, so that the caller can panic or handle a failed allocation. */
+ if (size >= SIZE_MAX/2) return NULL;
+ void *ptr = malloc(MALLOC_MIN_SIZE(size)+PREFIX_SIZE);
+
+ if (!ptr) return NULL;
+#ifdef HAVE_MALLOC_SIZE
+ size = zmalloc_size(ptr);
+ update_zmalloc_stat_alloc(size);
+ if (usable) *usable = size;
+ return ptr;
+#else
+ *((size_t*)ptr) = size;
+ update_zmalloc_stat_alloc(size+PREFIX_SIZE);
+ if (usable) *usable = size;
+ return (char*)ptr+PREFIX_SIZE;
+#endif
+}
+
+void *ztrymalloc_usable(size_t size, size_t *usable) {
+ size_t usable_size = 0;
+ void *ptr = ztrymalloc_usable_internal(size, &usable_size);
+#ifdef HAVE_MALLOC_SIZE
+ ptr = extend_to_usable(ptr, usable_size);
+#endif
+ if (usable) *usable = usable_size;
+ return ptr;
+}
+
+/* Allocate memory or panic */
+void *zmalloc(size_t size) {
+ void *ptr = ztrymalloc_usable_internal(size, NULL);
+ if (!ptr) zmalloc_oom_handler(size);
+ return ptr;
+}
+
+/* Try allocating memory, and return NULL if failed. */
+void *ztrymalloc(size_t size) {
+ void *ptr = ztrymalloc_usable_internal(size, NULL);
+ return ptr;
+}
+
+/* Allocate memory or panic.
+ * '*usable' is set to the usable size if non NULL. */
+void *zmalloc_usable(size_t size, size_t *usable) {
+ size_t usable_size = 0;
+ void *ptr = ztrymalloc_usable_internal(size, &usable_size);
+ if (!ptr) zmalloc_oom_handler(size);
+#ifdef HAVE_MALLOC_SIZE
+ ptr = extend_to_usable(ptr, usable_size);
+#endif
+ if (usable) *usable = usable_size;
+ return ptr;
+}
+
+/* Allocation and free functions that bypass the thread cache
+ * and go straight to the allocator arena bins.
+ * Currently implemented only for jemalloc. Used for online defragmentation. */
+#ifdef HAVE_DEFRAG
+void *zmalloc_no_tcache(size_t size) {
+ if (size >= SIZE_MAX/2) zmalloc_oom_handler(size);
+ void *ptr = mallocx(size+PREFIX_SIZE, MALLOCX_TCACHE_NONE);
+ if (!ptr) zmalloc_oom_handler(size);
+ update_zmalloc_stat_alloc(zmalloc_size(ptr));
+ return ptr;
+}
+
+void zfree_no_tcache(void *ptr) {
+ if (ptr == NULL) return;
+ update_zmalloc_stat_free(zmalloc_size(ptr));
+ dallocx(ptr, MALLOCX_TCACHE_NONE);
+}
+#endif
+
+/* Try allocating memory and zero it, and return NULL if failed.
+ * '*usable' is set to the usable size if non NULL. */
+static inline void *ztrycalloc_usable_internal(size_t size, size_t *usable) {
+ /* Possible overflow, return NULL, so that the caller can panic or handle a failed allocation. */
+ if (size >= SIZE_MAX/2) return NULL;
+ void *ptr = calloc(1, MALLOC_MIN_SIZE(size)+PREFIX_SIZE);
+ if (ptr == NULL) return NULL;
+
+#ifdef HAVE_MALLOC_SIZE
+ size = zmalloc_size(ptr);
+ update_zmalloc_stat_alloc(size);
+ if (usable) *usable = size;
+ return ptr;
+#else
+ *((size_t*)ptr) = size;
+ update_zmalloc_stat_alloc(size+PREFIX_SIZE);
+ if (usable) *usable = size;
+ return (char*)ptr+PREFIX_SIZE;
+#endif
+}
+
+void *ztrycalloc_usable(size_t size, size_t *usable) {
+ size_t usable_size = 0;
+ void *ptr = ztrycalloc_usable_internal(size, &usable_size);
+#ifdef HAVE_MALLOC_SIZE
+ ptr = extend_to_usable(ptr, usable_size);
+#endif
+ if (usable) *usable = usable_size;
+ return ptr;
+}
+
+/* Allocate memory and zero it or panic.
+ * We need this wrapper to have a calloc compatible signature */
+void *zcalloc_num(size_t num, size_t size) {
+ /* Ensure that the arguments to calloc(), when multiplied, do not wrap.
+ * Division operations are susceptible to divide-by-zero errors so we also check it. */
+ if ((size == 0) || (num > SIZE_MAX/size)) {
+ zmalloc_oom_handler(SIZE_MAX);
+ return NULL;
+ }
+ void *ptr = ztrycalloc_usable_internal(num*size, NULL);
+ if (!ptr) zmalloc_oom_handler(num*size);
+ return ptr;
+}
+
+/* Allocate memory and zero it or panic */
+void *zcalloc(size_t size) {
+ void *ptr = ztrycalloc_usable_internal(size, NULL);
+ if (!ptr) zmalloc_oom_handler(size);
+ return ptr;
+}
+
+/* Try allocating memory, and return NULL if failed. */
+void *ztrycalloc(size_t size) {
+ void *ptr = ztrycalloc_usable_internal(size, NULL);
+ return ptr;
+}
+
+/* Allocate memory or panic.
+ * '*usable' is set to the usable size if non NULL. */
+void *zcalloc_usable(size_t size, size_t *usable) {
+ size_t usable_size = 0;
+ void *ptr = ztrycalloc_usable_internal(size, &usable_size);
+ if (!ptr) zmalloc_oom_handler(size);
+#ifdef HAVE_MALLOC_SIZE
+ ptr = extend_to_usable(ptr, usable_size);
+#endif
+ if (usable) *usable = usable_size;
+ return ptr;
+}
+
+/* Try reallocating memory, and return NULL if failed.
+ * '*usable' is set to the usable size if non NULL. */
+static inline void *ztryrealloc_usable_internal(void *ptr, size_t size, size_t *usable) {
+#ifndef HAVE_MALLOC_SIZE
+ void *realptr;
+#endif
+ size_t oldsize;
+ void *newptr;
+
+ /* not allocating anything, just redirect to free. */
+ if (size == 0 && ptr != NULL) {
+ zfree(ptr);
+ if (usable) *usable = 0;
+ return NULL;
+ }
+ /* Not freeing anything, just redirect to malloc. */
+ if (ptr == NULL)
+ return ztrymalloc_usable(size, usable);
+
+ /* Possible overflow, return NULL, so that the caller can panic or handle a failed allocation. */
+ if (size >= SIZE_MAX/2) {
+ zfree(ptr);
+ if (usable) *usable = 0;
+ return NULL;
+ }
+
+#ifdef HAVE_MALLOC_SIZE
+ oldsize = zmalloc_size(ptr);
+ newptr = realloc(ptr,size);
+ if (newptr == NULL) {
+ if (usable) *usable = 0;
+ return NULL;
+ }
+
+ update_zmalloc_stat_free(oldsize);
+ size = zmalloc_size(newptr);
+ update_zmalloc_stat_alloc(size);
+ if (usable) *usable = size;
+ return newptr;
+#else
+ realptr = (char*)ptr-PREFIX_SIZE;
+ oldsize = *((size_t*)realptr);
+ newptr = realloc(realptr,size+PREFIX_SIZE);
+ if (newptr == NULL) {
+ if (usable) *usable = 0;
+ return NULL;
+ }
+
+ *((size_t*)newptr) = size;
+ update_zmalloc_stat_free(oldsize);
+ update_zmalloc_stat_alloc(size);
+ if (usable) *usable = size;
+ return (char*)newptr+PREFIX_SIZE;
+#endif
+}
+
+void *ztryrealloc_usable(void *ptr, size_t size, size_t *usable) {
+ size_t usable_size = 0;
+ ptr = ztryrealloc_usable_internal(ptr, size, &usable_size);
+#ifdef HAVE_MALLOC_SIZE
+ ptr = extend_to_usable(ptr, usable_size);
+#endif
+ if (usable) *usable = usable_size;
+ return ptr;
+}
+
+/* Reallocate memory and zero it or panic */
+void *zrealloc(void *ptr, size_t size) {
+ ptr = ztryrealloc_usable_internal(ptr, size, NULL);
+ if (!ptr && size != 0) zmalloc_oom_handler(size);
+ return ptr;
+}
+
+/* Try Reallocating memory, and return NULL if failed. */
+void *ztryrealloc(void *ptr, size_t size) {
+ ptr = ztryrealloc_usable_internal(ptr, size, NULL);
+ return ptr;
+}
+
+/* Reallocate memory or panic.
+ * '*usable' is set to the usable size if non NULL. */
+void *zrealloc_usable(void *ptr, size_t size, size_t *usable) {
+ size_t usable_size = 0;
+ ptr = ztryrealloc_usable(ptr, size, &usable_size);
+ if (!ptr && size != 0) zmalloc_oom_handler(size);
+#ifdef HAVE_MALLOC_SIZE
+ ptr = extend_to_usable(ptr, usable_size);
+#endif
+ if (usable) *usable = usable_size;
+ return ptr;
+}
+
+/* Provide zmalloc_size() for systems where this function is not provided by
+ * malloc itself, given that in that case we store a header with this
+ * information as the first bytes of every allocation. */
+#ifndef HAVE_MALLOC_SIZE
+size_t zmalloc_size(void *ptr) {
+ void *realptr = (char*)ptr-PREFIX_SIZE;
+ size_t size = *((size_t*)realptr);
+ return size+PREFIX_SIZE;
+}
+size_t zmalloc_usable_size(void *ptr) {
+ return zmalloc_size(ptr)-PREFIX_SIZE;
+}
+#endif
+
+void zfree(void *ptr) {
+#ifndef HAVE_MALLOC_SIZE
+ void *realptr;
+ size_t oldsize;
+#endif
+
+ if (ptr == NULL) return;
+#ifdef HAVE_MALLOC_SIZE
+ update_zmalloc_stat_free(zmalloc_size(ptr));
+ free(ptr);
+#else
+ realptr = (char*)ptr-PREFIX_SIZE;
+ oldsize = *((size_t*)realptr);
+ update_zmalloc_stat_free(oldsize+PREFIX_SIZE);
+ free(realptr);
+#endif
+}
+
+/* Similar to zfree, '*usable' is set to the usable size being freed. */
+void zfree_usable(void *ptr, size_t *usable) {
+#ifndef HAVE_MALLOC_SIZE
+ void *realptr;
+ size_t oldsize;
+#endif
+
+ if (ptr == NULL) return;
+#ifdef HAVE_MALLOC_SIZE
+ update_zmalloc_stat_free(*usable = zmalloc_size(ptr));
+ free(ptr);
+#else
+ realptr = (char*)ptr-PREFIX_SIZE;
+ *usable = oldsize = *((size_t*)realptr);
+ update_zmalloc_stat_free(oldsize+PREFIX_SIZE);
+ free(realptr);
+#endif
+}
+
+char *zstrdup(const char *s) {
+ size_t l = strlen(s)+1;
+ char *p = zmalloc(l);
+
+ memcpy(p,s,l);
+ return p;
+}
+
+size_t zmalloc_used_memory(void) {
+ size_t um;
+ atomicGet(used_memory,um);
+ return um;
+}
+
+void zmalloc_set_oom_handler(void (*oom_handler)(size_t)) {
+ zmalloc_oom_handler = oom_handler;
+}
+
+/* Use 'MADV_DONTNEED' to release memory to operating system quickly.
+ * We do that in a fork child process to avoid CoW when the parent modifies
+ * these shared pages. */
+void zmadvise_dontneed(void *ptr) {
+#if defined(USE_JEMALLOC) && defined(__linux__)
+ static size_t page_size = 0;
+ if (page_size == 0) page_size = sysconf(_SC_PAGESIZE);
+ size_t page_size_mask = page_size - 1;
+
+ size_t real_size = zmalloc_size(ptr);
+ if (real_size < page_size) return;
+
+ /* We need to align the pointer upwards according to page size, because
+ * the memory address is increased upwards and we only can free memory
+ * based on page. */
+ char *aligned_ptr = (char *)(((size_t)ptr+page_size_mask) & ~page_size_mask);
+ real_size -= (aligned_ptr-(char*)ptr);
+ if (real_size >= page_size) {
+ madvise((void *)aligned_ptr, real_size&~page_size_mask, MADV_DONTNEED);
+ }
+#else
+ (void)(ptr);
+#endif
+}
+
+/* Get the RSS information in an OS-specific way.
+ *
+ * WARNING: the function zmalloc_get_rss() is not designed to be fast
+ * and may not be called in the busy loops where Redis tries to release
+ * memory expiring or swapping out objects.
+ *
+ * For this kind of "fast RSS reporting" usages use instead the
+ * function RedisEstimateRSS() that is a much faster (and less precise)
+ * version of the function. */
+
+#if defined(HAVE_PROC_STAT)
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#endif
+
+/* Get the i'th field from "/proc/self/stats" note i is 1 based as appears in the 'proc' man page */
+int get_proc_stat_ll(int i, long long *res) {
+#if defined(HAVE_PROC_STAT)
+ char buf[4096];
+ int fd, l;
+ char *p, *x;
+
+ if ((fd = open("/proc/self/stat",O_RDONLY)) == -1) return 0;
+ if ((l = read(fd,buf,sizeof(buf)-1)) <= 0) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+ buf[l] = '\0';
+ if (buf[l-1] == '\n') buf[l-1] = '\0';
+
+ /* Skip pid and process name (surrounded with parentheses) */
+ p = strrchr(buf, ')');
+ if (!p) return 0;
+ p++;
+ while (*p == ' ') p++;
+ if (*p == '\0') return 0;
+ i -= 3;
+ if (i < 0) return 0;
+
+ while (p && i--) {
+ p = strchr(p, ' ');
+ if (p) p++;
+ else return 0;
+ }
+ x = strchr(p,' ');
+ if (x) *x = '\0';
+
+ *res = strtoll(p,&x,10);
+ if (*x != '\0') return 0;
+ return 1;
+#else
+ UNUSED(i);
+ UNUSED(res);
+ return 0;
+#endif
+}
+
+#if defined(HAVE_PROC_STAT)
+size_t zmalloc_get_rss(void) {
+ int page = sysconf(_SC_PAGESIZE);
+ long long rss;
+
+ /* RSS is the 24th field in /proc/<pid>/stat */
+ if (!get_proc_stat_ll(24, &rss)) return 0;
+ rss *= page;
+ return rss;
+}
+#elif defined(HAVE_TASKINFO)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <mach/task.h>
+#include <mach/mach_init.h>
+
+size_t zmalloc_get_rss(void) {
+ task_t task = MACH_PORT_NULL;
+ struct task_basic_info t_info;
+ mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
+
+ if (task_for_pid(current_task(), getpid(), &task) != KERN_SUCCESS)
+ return 0;
+ task_info(task, TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count);
+
+ return t_info.resident_size;
+}
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/user.h>
+
+size_t zmalloc_get_rss(void) {
+ struct kinfo_proc info;
+ size_t infolen = sizeof(info);
+ int mib[4];
+ mib[0] = CTL_KERN;
+ mib[1] = KERN_PROC;
+ mib[2] = KERN_PROC_PID;
+ mib[3] = getpid();
+
+ if (sysctl(mib, 4, &info, &infolen, NULL, 0) == 0)
+#if defined(__FreeBSD__)
+ return (size_t)info.ki_rssize * getpagesize();
+#else
+ return (size_t)info.kp_vm_rssize * getpagesize();
+#endif
+
+ return 0L;
+}
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+#if defined(__OpenBSD__)
+#define kinfo_proc2 kinfo_proc
+#define KERN_PROC2 KERN_PROC
+#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
+#endif
+
+size_t zmalloc_get_rss(void) {
+ struct kinfo_proc2 info;
+ size_t infolen = sizeof(info);
+ int mib[6];
+ mib[0] = CTL_KERN;
+ mib[1] = KERN_PROC2;
+ mib[2] = KERN_PROC_PID;
+ mib[3] = getpid();
+ mib[4] = sizeof(info);
+ mib[5] = 1;
+ if (sysctl(mib, __arraycount(mib), &info, &infolen, NULL, 0) == 0)
+ return (size_t)info.p_vm_rssize * getpagesize();
+
+ return 0L;
+}
+#elif defined(__HAIKU__)
+#include <OS.h>
+
+size_t zmalloc_get_rss(void) {
+ area_info info;
+ thread_info th;
+ size_t rss = 0;
+ ssize_t cookie = 0;
+
+ if (get_thread_info(find_thread(0), &th) != B_OK)
+ return 0;
+
+ while (get_next_area_info(th.team, &cookie, &info) == B_OK)
+ rss += info.ram_size;
+
+ return rss;
+}
+#elif defined(HAVE_PSINFO)
+#include <unistd.h>
+#include <sys/procfs.h>
+#include <fcntl.h>
+
+size_t zmalloc_get_rss(void) {
+ struct prpsinfo info;
+ char filename[256];
+ int fd;
+
+ snprintf(filename,256,"/proc/%ld/psinfo",(long) getpid());
+
+ if ((fd = open(filename,O_RDONLY)) == -1) return 0;
+ if (ioctl(fd, PIOCPSINFO, &info) == -1) {
+ close(fd);
+ return 0;
+ }
+
+ close(fd);
+ return info.pr_rssize;
+}
+#else
+size_t zmalloc_get_rss(void) {
+ /* If we can't get the RSS in an OS-specific way for this system just
+ * return the memory usage we estimated in zmalloc()..
+ *
+ * Fragmentation will appear to be always 1 (no fragmentation)
+ * of course... */
+ return zmalloc_used_memory();
+}
+#endif
+
+#if defined(USE_JEMALLOC)
+
+int zmalloc_get_allocator_info(size_t *allocated,
+ size_t *active,
+ size_t *resident) {
+ uint64_t epoch = 1;
+ size_t sz;
+ *allocated = *resident = *active = 0;
+ /* Update the statistics cached by mallctl. */
+ sz = sizeof(epoch);
+ je_mallctl("epoch", &epoch, &sz, &epoch, sz);
+ sz = sizeof(size_t);
+ /* Unlike RSS, this does not include RSS from shared libraries and other non
+ * heap mappings. */
+ je_mallctl("stats.resident", resident, &sz, NULL, 0);
+ /* Unlike resident, this doesn't not include the pages jemalloc reserves
+ * for re-use (purge will clean that). */
+ je_mallctl("stats.active", active, &sz, NULL, 0);
+ /* Unlike zmalloc_used_memory, this matches the stats.resident by taking
+ * into account all allocations done by this process (not only zmalloc). */
+ je_mallctl("stats.allocated", allocated, &sz, NULL, 0);
+ return 1;
+}
+
+void set_jemalloc_bg_thread(int enable) {
+ /* let jemalloc do purging asynchronously, required when there's no traffic
+ * after flushdb */
+ char val = !!enable;
+ je_mallctl("background_thread", NULL, 0, &val, 1);
+}
+
+int jemalloc_purge(void) {
+ /* return all unused (reserved) pages to the OS */
+ char tmp[32];
+ unsigned narenas = 0;
+ size_t sz = sizeof(unsigned);
+ if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) {
+ snprintf(tmp, sizeof(tmp), "arena.%d.purge", narenas);
+ if (!je_mallctl(tmp, NULL, 0, NULL, 0))
+ return 0;
+ }
+ return -1;
+}
+
+#else
+
+int zmalloc_get_allocator_info(size_t *allocated,
+ size_t *active,
+ size_t *resident) {
+ *allocated = *resident = *active = 0;
+ return 1;
+}
+
+void set_jemalloc_bg_thread(int enable) {
+ ((void)(enable));
+}
+
+int jemalloc_purge(void) {
+ return 0;
+}
+
+#endif
+
+#if defined(__APPLE__)
+/* For proc_pidinfo() used later in zmalloc_get_smap_bytes_by_field().
+ * Note that this file cannot be included in zmalloc.h because it includes
+ * a Darwin queue.h file where there is a "LIST_HEAD" macro (!) defined
+ * conficting with Redis user code. */
+#include <libproc.h>
+#endif
+
+/* Get the sum of the specified field (converted form kb to bytes) in
+ * /proc/self/smaps. The field must be specified with trailing ":" as it
+ * apperas in the smaps output.
+ *
+ * If a pid is specified, the information is extracted for such a pid,
+ * otherwise if pid is -1 the information is reported is about the
+ * current process.
+ *
+ * Example: zmalloc_get_smap_bytes_by_field("Rss:",-1);
+ */
+#if defined(HAVE_PROC_SMAPS)
+size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
+ char line[1024];
+ size_t bytes = 0;
+ int flen = strlen(field);
+ FILE *fp;
+
+ if (pid == -1) {
+ fp = fopen("/proc/self/smaps","r");
+ } else {
+ char filename[128];
+ snprintf(filename,sizeof(filename),"/proc/%ld/smaps",pid);
+ fp = fopen(filename,"r");
+ }
+
+ if (!fp) return 0;
+ while(fgets(line,sizeof(line),fp) != NULL) {
+ if (strncmp(line,field,flen) == 0) {
+ char *p = strchr(line,'k');
+ if (p) {
+ *p = '\0';
+ bytes += strtol(line+flen,NULL,10) * 1024;
+ }
+ }
+ }
+ fclose(fp);
+ return bytes;
+}
+#else
+/* Get sum of the specified field from libproc api call.
+ * As there are per page value basis we need to convert
+ * them accordingly.
+ *
+ * Note that AnonHugePages is a no-op as THP feature
+ * is not supported in this platform
+ */
+size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
+#if defined(__APPLE__)
+ struct proc_regioninfo pri;
+ if (pid == -1) pid = getpid();
+ if (proc_pidinfo(pid, PROC_PIDREGIONINFO, 0, &pri,
+ PROC_PIDREGIONINFO_SIZE) == PROC_PIDREGIONINFO_SIZE)
+ {
+ int pagesize = getpagesize();
+ if (!strcmp(field, "Private_Dirty:")) {
+ return (size_t)pri.pri_pages_dirtied * pagesize;
+ } else if (!strcmp(field, "Rss:")) {
+ return (size_t)pri.pri_pages_resident * pagesize;
+ } else if (!strcmp(field, "AnonHugePages:")) {
+ return 0;
+ }
+ }
+ return 0;
+#endif
+ ((void) field);
+ ((void) pid);
+ return 0;
+}
+#endif
+
+/* Return the total number bytes in pages marked as Private Dirty.
+ *
+ * Note: depending on the platform and memory footprint of the process, this
+ * call can be slow, exceeding 1000ms!
+ */
+size_t zmalloc_get_private_dirty(long pid) {
+ return zmalloc_get_smap_bytes_by_field("Private_Dirty:",pid);
+}
+
+/* Returns the size of physical memory (RAM) in bytes.
+ * It looks ugly, but this is the cleanest way to achieve cross platform results.
+ * Cleaned up from:
+ *
+ * http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system
+ *
+ * Note that this function:
+ * 1) Was released under the following CC attribution license:
+ * http://creativecommons.org/licenses/by/3.0/deed.en_US.
+ * 2) Was originally implemented by David Robert Nadeau.
+ * 3) Was modified for Redis by Matt Stancliff.
+ * 4) This note exists in order to comply with the original license.
+ */
+size_t zmalloc_get_memory_size(void) {
+#if defined(__unix__) || defined(__unix) || defined(unix) || \
+ (defined(__APPLE__) && defined(__MACH__))
+#if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64))
+ int mib[2];
+ mib[0] = CTL_HW;
+#if defined(HW_MEMSIZE)
+ mib[1] = HW_MEMSIZE; /* OSX. --------------------- */
+#elif defined(HW_PHYSMEM64)
+ mib[1] = HW_PHYSMEM64; /* NetBSD, OpenBSD. --------- */
+#endif
+ int64_t size = 0; /* 64-bit */
+ size_t len = sizeof(size);
+ if (sysctl( mib, 2, &size, &len, NULL, 0) == 0)
+ return (size_t)size;
+ return 0L; /* Failed? */
+
+#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
+ /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */
+ return (size_t)sysconf(_SC_PHYS_PAGES) * (size_t)sysconf(_SC_PAGESIZE);
+
+#elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM))
+ /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */
+ int mib[2];
+ mib[0] = CTL_HW;
+#if defined(HW_REALMEM)
+ mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */
+#elif defined(HW_PHYSMEM)
+ mib[1] = HW_PHYSMEM; /* Others. ------------------ */
+#endif
+ unsigned int size = 0; /* 32-bit */
+ size_t len = sizeof(size);
+ if (sysctl(mib, 2, &size, &len, NULL, 0) == 0)
+ return (size_t)size;
+ return 0L; /* Failed? */
+#else
+ return 0L; /* Unknown method to get the data. */
+#endif
+#else
+ return 0L; /* Unknown OS. */
+#endif
+}
+
+#ifdef REDIS_TEST
+int zmalloc_test(int argc, char **argv, int flags) {
+ void *ptr;
+
+ UNUSED(argc);
+ UNUSED(argv);
+ UNUSED(flags);
+ printf("Malloc prefix size: %d\n", (int) PREFIX_SIZE);
+ printf("Initial used memory: %zu\n", zmalloc_used_memory());
+ ptr = zmalloc(123);
+ printf("Allocated 123 bytes; used: %zu\n", zmalloc_used_memory());
+ ptr = zrealloc(ptr, 456);
+ printf("Reallocated to 456 bytes; used: %zu\n", zmalloc_used_memory());
+ zfree(ptr);
+ printf("Freed pointer; used: %zu\n", zmalloc_used_memory());
+ return 0;
+}
+#endif
diff --git a/src/zmalloc.h b/src/zmalloc.h
new file mode 100644
index 0000000..491013a
--- /dev/null
+++ b/src/zmalloc.h
@@ -0,0 +1,167 @@
+/* zmalloc - total amount of allocated memory aware version of malloc()
+ *
+ * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ZMALLOC_H
+#define __ZMALLOC_H
+
+/* Double expansion needed for stringification of macro values. */
+#define __xstr(s) __str(s)
+#define __str(s) #s
+
+#if defined(USE_TCMALLOC)
+#define ZMALLOC_LIB ("tcmalloc-" __xstr(TC_VERSION_MAJOR) "." __xstr(TC_VERSION_MINOR))
+#include <google/tcmalloc.h>
+#if (TC_VERSION_MAJOR == 1 && TC_VERSION_MINOR >= 6) || (TC_VERSION_MAJOR > 1)
+#define HAVE_MALLOC_SIZE 1
+#define zmalloc_size(p) tc_malloc_size(p)
+#else
+#error "Newer version of tcmalloc required"
+#endif
+
+#elif defined(USE_JEMALLOC)
+#define ZMALLOC_LIB ("jemalloc-" __xstr(JEMALLOC_VERSION_MAJOR) "." __xstr(JEMALLOC_VERSION_MINOR) "." __xstr(JEMALLOC_VERSION_BUGFIX))
+#include <jemalloc/jemalloc.h>
+#if (JEMALLOC_VERSION_MAJOR == 2 && JEMALLOC_VERSION_MINOR >= 1) || (JEMALLOC_VERSION_MAJOR > 2)
+#define HAVE_MALLOC_SIZE 1
+#define zmalloc_size(p) je_malloc_usable_size(p)
+#else
+#error "Newer version of jemalloc required"
+#endif
+
+#elif defined(__APPLE__)
+#include <malloc/malloc.h>
+#define HAVE_MALLOC_SIZE 1
+#define zmalloc_size(p) malloc_size(p)
+#endif
+
+/* On native libc implementations, we should still do our best to provide a
+ * HAVE_MALLOC_SIZE capability. This can be set explicitly as well:
+ *
+ * NO_MALLOC_USABLE_SIZE disables it on all platforms, even if they are
+ * known to support it.
+ * USE_MALLOC_USABLE_SIZE forces use of malloc_usable_size() regardless
+ * of platform.
+ */
+#ifndef ZMALLOC_LIB
+#define ZMALLOC_LIB "libc"
+
+#if !defined(NO_MALLOC_USABLE_SIZE) && \
+ (defined(__GLIBC__) || defined(__FreeBSD__) || \
+ defined(__DragonFly__) || defined(__HAIKU__) || \
+ defined(USE_MALLOC_USABLE_SIZE))
+
+/* Includes for malloc_usable_size() */
+#ifdef __FreeBSD__
+#include <malloc_np.h>
+#else
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <malloc.h>
+#endif
+
+#define HAVE_MALLOC_SIZE 1
+#define zmalloc_size(p) malloc_usable_size(p)
+
+#endif
+#endif
+
+/* We can enable the Redis defrag capabilities only if we are using Jemalloc
+ * and the version used is our special version modified for Redis having
+ * the ability to return per-allocation fragmentation hints. */
+#if defined(USE_JEMALLOC) && defined(JEMALLOC_FRAG_HINT)
+#define HAVE_DEFRAG
+#endif
+
+/* 'noinline' attribute is intended to prevent the `-Wstringop-overread` warning
+ * when using gcc-12 later with LTO enabled. It may be removed once the
+ * bug[https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96503] is fixed. */
+__attribute__((malloc,alloc_size(1),noinline)) void *zmalloc(size_t size);
+__attribute__((malloc,alloc_size(1),noinline)) void *zcalloc(size_t size);
+__attribute__((malloc,alloc_size(1,2),noinline)) void *zcalloc_num(size_t num, size_t size);
+__attribute__((alloc_size(2),noinline)) void *zrealloc(void *ptr, size_t size);
+__attribute__((malloc,alloc_size(1),noinline)) void *ztrymalloc(size_t size);
+__attribute__((malloc,alloc_size(1),noinline)) void *ztrycalloc(size_t size);
+__attribute__((alloc_size(2),noinline)) void *ztryrealloc(void *ptr, size_t size);
+void zfree(void *ptr);
+void *zmalloc_usable(size_t size, size_t *usable);
+void *zcalloc_usable(size_t size, size_t *usable);
+void *zrealloc_usable(void *ptr, size_t size, size_t *usable);
+void *ztrymalloc_usable(size_t size, size_t *usable);
+void *ztrycalloc_usable(size_t size, size_t *usable);
+void *ztryrealloc_usable(void *ptr, size_t size, size_t *usable);
+void zfree_usable(void *ptr, size_t *usable);
+__attribute__((malloc)) char *zstrdup(const char *s);
+size_t zmalloc_used_memory(void);
+void zmalloc_set_oom_handler(void (*oom_handler)(size_t));
+size_t zmalloc_get_rss(void);
+int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident);
+void set_jemalloc_bg_thread(int enable);
+int jemalloc_purge(void);
+size_t zmalloc_get_private_dirty(long pid);
+size_t zmalloc_get_smap_bytes_by_field(char *field, long pid);
+size_t zmalloc_get_memory_size(void);
+void zlibc_free(void *ptr);
+void zmadvise_dontneed(void *ptr);
+
+#ifdef HAVE_DEFRAG
+void zfree_no_tcache(void *ptr);
+__attribute__((malloc)) void *zmalloc_no_tcache(size_t size);
+#endif
+
+#ifndef HAVE_MALLOC_SIZE
+size_t zmalloc_size(void *ptr);
+size_t zmalloc_usable_size(void *ptr);
+#else
+/* If we use 'zmalloc_usable_size()' to obtain additional available memory size
+ * and manipulate it, we need to call 'extend_to_usable()' afterwards to ensure
+ * the compiler recognizes this extra memory. However, if we use the pointer
+ * obtained from z[*]_usable() family functions, there is no need for this step. */
+#define zmalloc_usable_size(p) zmalloc_size(p)
+
+/* derived from https://github.com/systemd/systemd/pull/25688
+ * We use zmalloc_usable_size() everywhere to use memory blocks, but that is an abuse since the
+ * malloc_usable_size() isn't meant for this kind of use, it is for diagnostics only. That is also why the
+ * behavior is flaky when built with _FORTIFY_SOURCE, the compiler can sense that we reach outside
+ * the allocated block and SIGABRT.
+ * We use a dummy allocator function to tell the compiler that the new size of ptr is newsize.
+ * The implementation returns the pointer as is; the only reason for its existence is as a conduit for the
+ * alloc_size attribute. This cannot be a static inline because gcc then loses the attributes on the function.
+ * See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96503 */
+__attribute__((alloc_size(2),noinline)) void *extend_to_usable(void *ptr, size_t size);
+#endif
+
+int get_proc_stat_ll(int i, long long *res);
+
+#ifdef REDIS_TEST
+int zmalloc_test(int argc, char **argv, int flags);
+#endif
+
+#endif /* __ZMALLOC_H */