1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
|
#!/usr/bin/env python3
import os
import glob
import json
import sys
import jsonschema
import subprocess
import redis
import time
import argparse
import multiprocessing
import collections
import io
import traceback
from datetime import timedelta
from functools import partial
try:
from jsonschema import Draft201909Validator as schema_validator
except ImportError:
from jsonschema import Draft7Validator as schema_validator
"""
The purpose of this file is to validate the reply_schema values of COMMAND DOCS.
Basically, this is what it does:
1. Goes over req-res files, generated by redis-servers, spawned by the testsuite (see logreqres.c)
2. For each request-response pair, it validates the response against the request's reply_schema (obtained from COMMAND DOCS)
This script spins up a redis-server and a redis-cli in order to obtain COMMAND DOCS.
In order to use this file you must run the redis testsuite with the following flags:
./runtest --dont-clean --force-resp3 --log-req-res
And then:
./utils/req-res-log-validator.py
The script will fail only if:
1. One or more of the replies doesn't comply with its schema.
2. One or more of the commands in COMMANDS DOCS doesn't have the reply_schema field (with --fail-missing-reply-schemas)
3. The testsuite didn't execute all of the commands (with --fail-commands-not-all-hit)
Future validations:
1. Fail the script if one or more of the branches of the reply schema (e.g. oneOf, anyOf) was not hit.
"""
IGNORED_COMMANDS = {
# Commands that don't work in a req-res manner (see logreqres.c)
"debug", # because of DEBUG SEGFAULT
"sync",
"psync",
"monitor",
"subscribe",
"unsubscribe",
"ssubscribe",
"sunsubscribe",
"psubscribe",
"punsubscribe",
# Commands to which we decided not write a reply schema
"pfdebug",
"lolwut",
}
class Request(object):
"""
This class represents a Redis request (AKA command, argv)
"""
def __init__(self, f, docs, line_counter):
"""
Read lines from `f` (generated by logreqres.c) and populates the argv array
"""
self.command = None
self.schema = None
self.argv = []
while True:
line = f.readline()
line_counter[0] += 1
if not line:
break
length = int(line)
arg = str(f.read(length))
f.read(2) # read \r\n
line_counter[0] += 1
if arg == "__argv_end__":
break
self.argv.append(arg)
if not self.argv:
return
self.command = self.argv[0].lower()
doc = docs.get(self.command, {})
if not doc and len(self.argv) > 1:
self.command = f"{self.argv[0].lower()}|{self.argv[1].lower()}"
doc = docs.get(self.command, {})
if not doc:
self.command = None
return
self.schema = doc.get("reply_schema")
def __str__(self):
return json.dumps(self.argv)
class Response(object):
"""
This class represents a Redis response in RESP3
"""
def __init__(self, f, line_counter):
"""
Read lines from `f` (generated by logreqres.c) and build the JSON representing the response in RESP3
"""
self.error = False
self.queued = False
self.json = None
line = f.readline()[:-2]
line_counter[0] += 1
if line[0] == '+':
self.json = line[1:]
if self.json == "QUEUED":
self.queued = True
elif line[0] == '-':
self.json = line[1:]
self.error = True
elif line[0] == '$':
self.json = str(f.read(int(line[1:])))
f.read(2) # read \r\n
line_counter[0] += 1
elif line[0] == ':':
self.json = int(line[1:])
elif line[0] == ',':
self.json = float(line[1:])
elif line[0] == '_':
self.json = None
elif line[0] == '#':
self.json = line[1] == 't'
elif line[0] == '!':
self.json = str(f.read(int(line[1:])))
f.read(2) # read \r\n
line_counter[0] += 1
self.error = True
elif line[0] == '=':
self.json = str(f.read(int(line[1:])))[4:] # skip "txt:" or "mkd:"
f.read(2) # read \r\n
line_counter[0] += 1 + self.json.count("\r\n")
elif line[0] == '(':
self.json = line[1:] # big-number is actually a string
elif line[0] in ['*', '~', '>']: # unfortunately JSON doesn't tell the difference between a list and a set
self.json = []
count = int(line[1:])
for i in range(count):
ele = Response(f, line_counter)
self.json.append(ele.json)
elif line[0] in ['%', '|']:
self.json = {}
count = int(line[1:])
for i in range(count):
field = Response(f, line_counter)
# Redis allows fields to be non-strings but JSON doesn't.
# Luckily, for any kind of response we can validate, the fields are
# always strings (example: XINFO STREAM)
# The reason we can't always convert to string is because of DEBUG PROTOCOL MAP
# which anyway doesn't have a schema
if isinstance(field.json, str):
field = field.json
value = Response(f, line_counter)
self.json[field] = value.json
if line[0] == '|':
# We don't care about the attributes, read the real response
real_res = Response(f, line_counter)
self.__dict__.update(real_res.__dict__)
def __str__(self):
return json.dumps(self.json)
def process_file(docs, path):
"""
This function processes a single file generated by logreqres.c
"""
line_counter = [0] # A list with one integer: to force python to pass it by reference
command_counter = dict()
print(f"Processing {path} ...")
# Convert file to StringIO in order to minimize IO operations
with open(path, "r", newline="\r\n", encoding="latin-1") as f:
content = f.read()
with io.StringIO(content) as fakefile:
while True:
try:
req = Request(fakefile, docs, line_counter)
if not req.argv:
# EOF
break
res = Response(fakefile, line_counter)
except json.decoder.JSONDecodeError as err:
print(f"JSON decoder error while processing {path}:{line_counter[0]}: {err}")
print(traceback.format_exc())
raise
except Exception as err:
print(f"General error while processing {path}:{line_counter[0]}: {err}")
print(traceback.format_exc())
raise
if not req.command:
# Unknown command
continue
command_counter[req.command] = command_counter.get(req.command, 0) + 1
if res.error or res.queued:
continue
if req.command in IGNORED_COMMANDS:
continue
try:
jsonschema.validate(instance=res.json, schema=req.schema, cls=schema_validator)
except (jsonschema.ValidationError, jsonschema.exceptions.SchemaError) as err:
print(f"JSON schema validation error on {path}: {err}")
print(f"argv: {req.argv}")
try:
print(f"Response: {res}")
except UnicodeDecodeError as err:
print("Response: (unprintable)")
print(f"Schema: {json.dumps(req.schema, indent=2)}")
print(traceback.format_exc())
raise
return command_counter
def fetch_schemas(cli, port, args, docs):
redis_proc = subprocess.Popen(args, stdout=subprocess.PIPE)
while True:
try:
print('Connecting to Redis...')
r = redis.Redis(port=port)
r.ping()
break
except Exception as e:
time.sleep(0.1)
print('Connected')
cli_proc = subprocess.Popen([cli, '-p', str(port), '--json', 'command', 'docs'], stdout=subprocess.PIPE)
stdout, stderr = cli_proc.communicate()
docs_response = json.loads(stdout)
for name, doc in docs_response.items():
if "subcommands" in doc:
for subname, subdoc in doc["subcommands"].items():
docs[subname] = subdoc
else:
docs[name] = doc
redis_proc.terminate()
redis_proc.wait()
if __name__ == '__main__':
# Figure out where the sources are
srcdir = os.path.abspath(os.path.dirname(os.path.abspath(__file__)) + "/../src")
testdir = os.path.abspath(os.path.dirname(os.path.abspath(__file__)) + "/../tests")
parser = argparse.ArgumentParser()
parser.add_argument('--server', type=str, default='%s/redis-server' % srcdir)
parser.add_argument('--port', type=int, default=6534)
parser.add_argument('--cli', type=str, default='%s/redis-cli' % srcdir)
parser.add_argument('--module', type=str, action='append', default=[])
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--fail-commands-not-all-hit', action='store_true')
parser.add_argument('--fail-missing-reply-schemas', action='store_true')
args = parser.parse_args()
docs = dict()
# Fetch schemas from a Redis instance
print('Starting Redis server')
redis_args = [args.server, '--port', str(args.port)]
for module in args.module:
redis_args += ['--loadmodule', 'tests/modules/%s.so' % module]
fetch_schemas(args.cli, args.port, redis_args, docs)
# Fetch schemas from a sentinel
print('Starting Redis sentinel')
# Sentinel needs a config file to start
config_file = "tmpsentinel.conf"
open(config_file, 'a').close()
sentinel_args = [args.server, config_file, '--port', str(args.port), "--sentinel"]
fetch_schemas(args.cli, args.port, sentinel_args, docs)
os.unlink(config_file)
missing_schema = [k for k, v in docs.items()
if "reply_schema" not in v and k not in IGNORED_COMMANDS]
if missing_schema:
print("WARNING! The following commands are missing a reply_schema:")
for k in sorted(missing_schema):
print(f" {k}")
if args.fail_missing_reply_schemas:
print("ERROR! at least one command does not have a reply_schema")
sys.exit(1)
start = time.time()
# Obtain all the files to processes
paths = []
for path in glob.glob('%s/tmp/*/*.reqres' % testdir):
paths.append(path)
for path in glob.glob('%s/cluster/tmp/*/*.reqres' % testdir):
paths.append(path)
for path in glob.glob('%s/sentinel/tmp/*/*.reqres' % testdir):
paths.append(path)
counter = collections.Counter()
# Spin several processes to handle the files in parallel
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
func = partial(process_file, docs)
# pool.map blocks until all the files have been processed
for result in pool.map(func, paths):
counter.update(result)
command_counter = dict(counter)
elapsed = time.time() - start
print(f"Done. ({timedelta(seconds=elapsed)})")
print("Hits per command:")
for k, v in sorted(command_counter.items()):
print(f" {k}: {v}")
not_hit = set(set(docs.keys()) - set(command_counter.keys()) - set(IGNORED_COMMANDS))
if not_hit:
if args.verbose:
print("WARNING! The following commands were not hit at all:")
for k in sorted(not_hit):
print(f" {k}")
if args.fail_commands_not_all_hit:
print("ERROR! at least one command was not hit by the tests")
sys.exit(1)
|