utils/req-res-log-validator.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350

#!/usr/bin/env python3
import os
import glob
import json
import sys

import jsonschema
import subprocess
import redis
import time
import argparse
import multiprocessing
import collections
import io
import traceback
from datetime import timedelta
from functools import partial
try:
    from jsonschema import Draft201909Validator as schema_validator
except ImportError:
    from jsonschema import Draft7Validator as schema_validator

"""
The purpose of this file is to validate the reply_schema values of COMMAND DOCS.
Basically, this is what it does:
1. Goes over req-res files, generated by redis-servers, spawned by the testsuite (see logreqres.c)
2. For each request-response pair, it validates the response against the request's reply_schema (obtained from COMMAND DOCS)

This script spins up a redis-server and a redis-cli in order to obtain COMMAND DOCS.

In order to use this file you must run the redis testsuite with the following flags:
./runtest --dont-clean --force-resp3 --log-req-res

And then:
./utils/req-res-log-validator.py

The script will fail only if:
1. One or more of the replies doesn't comply with its schema.
2. One or more of the commands in COMMANDS DOCS doesn't have the reply_schema field (with --fail-missing-reply-schemas)
3. The testsuite didn't execute all of the commands (with --fail-commands-not-all-hit)

Future validations:
1. Fail the script if one or more of the branches of the reply schema (e.g. oneOf, anyOf) was not hit.
"""

IGNORED_COMMANDS = {
    # Commands that don't work in a req-res manner (see logreqres.c)
    "debug",  # because of DEBUG SEGFAULT
    "sync",
    "psync",
    "monitor",
    "subscribe",
    "unsubscribe",
    "ssubscribe",
    "sunsubscribe",
    "psubscribe",
    "punsubscribe",
    # Commands to which we decided not write a reply schema
    "pfdebug",
    "lolwut",
}

class Request(object):
    """
    This class represents a Redis request (AKA command, argv)
    """
    def __init__(self, f, docs, line_counter):
        """
        Read lines from `f` (generated by logreqres.c) and populates the argv array
        """
        self.command = None
        self.schema = None
        self.argv = []

        while True:
            line = f.readline()
            line_counter[0] += 1
            if not line:
                break
            length = int(line)
            arg = str(f.read(length))
            f.read(2)  # read \r\n
            line_counter[0] += 1
            if arg == "__argv_end__":
                break
            self.argv.append(arg)

        if not self.argv:
            return

        self.command = self.argv[0].lower()
        doc = docs.get(self.command, {})
        if not doc and len(self.argv) > 1:
            self.command = f"{self.argv[0].lower()}|{self.argv[1].lower()}"
            doc = docs.get(self.command, {})

        if not doc:
            self.command = None
            return

        self.schema = doc.get("reply_schema")

    def __str__(self):
        return json.dumps(self.argv)


class Response(object):
    """
    This class represents a Redis response in RESP3
    """
    def __init__(self, f, line_counter):
        """
        Read lines from `f` (generated by logreqres.c) and build the JSON representing the response in RESP3
        """
        self.error = False
        self.queued = False
        self.json = None

        line = f.readline()[:-2]
        line_counter[0] += 1
        if line[0] == '+':
            self.json = line[1:]
            if self.json == "QUEUED":
                self.queued = True
        elif line[0] == '-':
            self.json = line[1:]
            self.error = True
        elif line[0] == '$':
            self.json = str(f.read(int(line[1:])))
            f.read(2)  # read \r\n
            line_counter[0] += 1
        elif line[0] == ':':
            self.json = int(line[1:])
        elif line[0] == ',':
            self.json = float(line[1:])
        elif line[0] == '_':
            self.json = None
        elif line[0] == '#':
            self.json = line[1] == 't'
        elif line[0] == '!':
            self.json = str(f.read(int(line[1:])))
            f.read(2)  # read \r\n
            line_counter[0] += 1
            self.error = True
        elif line[0] == '=':
            self.json = str(f.read(int(line[1:])))[4:]   # skip "txt:" or "mkd:"
            f.read(2)  # read \r\n
            line_counter[0] += 1 + self.json.count("\r\n")
        elif line[0] == '(':
            self.json = line[1:]  # big-number is actually a string
        elif line[0] in ['*', '~', '>']:  # unfortunately JSON doesn't tell the difference between a list and a set
            self.json = []
            count = int(line[1:])
            for i in range(count):
                ele = Response(f, line_counter)
                self.json.append(ele.json)
        elif line[0] in ['%', '|']:
            self.json = {}
            count = int(line[1:])
            for i in range(count):
                field = Response(f, line_counter)
                # Redis allows fields to be non-strings but JSON doesn't.
                # Luckily, for any kind of response we can validate, the fields are
                # always strings (example: XINFO STREAM)
                # The reason we can't always convert to string is because of DEBUG PROTOCOL MAP
                # which anyway doesn't have a schema
                if isinstance(field.json, str):
                    field = field.json
                value = Response(f, line_counter)
                self.json[field] = value.json
            if line[0] == '|':
                # We don't care about the attributes, read the real response
                real_res = Response(f, line_counter)
                self.__dict__.update(real_res.__dict__)


    def __str__(self):
        return json.dumps(self.json)


def process_file(docs, path):
    """
    This function processes a single file generated by logreqres.c
    """
    line_counter = [0]  # A list with one integer: to force python to pass it by reference
    command_counter = dict()

    print(f"Processing {path} ...")

    # Convert file to StringIO in order to minimize IO operations
    with open(path, "r", newline="\r\n", encoding="latin-1") as f:
        content = f.read()

    with io.StringIO(content) as fakefile:
        while True:
            try:
                req = Request(fakefile, docs, line_counter)
                if not req.argv:
                    # EOF
                    break
                res = Response(fakefile, line_counter)
            except json.decoder.JSONDecodeError as err:
                print(f"JSON decoder error while processing {path}:{line_counter[0]}: {err}")
                print(traceback.format_exc())
                raise
            except Exception as err:
                print(f"General error while processing {path}:{line_counter[0]}: {err}")
                print(traceback.format_exc())
                raise

            if not req.command:
                # Unknown command
                continue

            command_counter[req.command] = command_counter.get(req.command, 0) + 1

            if res.error or res.queued:
                continue

            if req.command in IGNORED_COMMANDS:
                continue

            try:
                jsonschema.validate(instance=res.json, schema=req.schema, cls=schema_validator)
            except (jsonschema.ValidationError, jsonschema.exceptions.SchemaError) as err:
                print(f"JSON schema validation error on {path}: {err}")
                print(f"argv: {req.argv}")
                try:
                    print(f"Response: {res}")
                except UnicodeDecodeError as err:
                   print("Response: (unprintable)")
                print(f"Schema: {json.dumps(req.schema, indent=2)}")
                print(traceback.format_exc())
                raise

    return command_counter


def fetch_schemas(cli, port, args, docs):
    redis_proc = subprocess.Popen(args, stdout=subprocess.PIPE)

    while True:
        try:
            print('Connecting to Redis...')
            r = redis.Redis(port=port)
            r.ping()
            break
        except Exception as e:
            time.sleep(0.1)

    print('Connected')

    cli_proc = subprocess.Popen([cli, '-p', str(port), '--json', 'command', 'docs'], stdout=subprocess.PIPE)
    stdout, stderr = cli_proc.communicate()
    docs_response = json.loads(stdout)

    for name, doc in docs_response.items():
        if "subcommands" in doc:
            for subname, subdoc in doc["subcommands"].items():
                docs[subname] = subdoc
        else:
            docs[name] = doc

    redis_proc.terminate()
    redis_proc.wait()


if __name__ == '__main__':
    # Figure out where the sources are
    srcdir = os.path.abspath(os.path.dirname(os.path.abspath(__file__)) + "/../src")
    testdir = os.path.abspath(os.path.dirname(os.path.abspath(__file__)) + "/../tests")

    parser = argparse.ArgumentParser()
    parser.add_argument('--server', type=str, default='%s/redis-server' % srcdir)
    parser.add_argument('--port', type=int, default=6534)
    parser.add_argument('--cli', type=str, default='%s/redis-cli' % srcdir)
    parser.add_argument('--module', type=str, action='append', default=[])
    parser.add_argument('--verbose', action='store_true')
    parser.add_argument('--fail-commands-not-all-hit', action='store_true')
    parser.add_argument('--fail-missing-reply-schemas', action='store_true')
    args = parser.parse_args()

    docs = dict()

    # Fetch schemas from a Redis instance
    print('Starting Redis server')
    redis_args = [args.server, '--port', str(args.port)]
    for module in args.module:
        redis_args += ['--loadmodule', 'tests/modules/%s.so' % module]

    fetch_schemas(args.cli, args.port, redis_args, docs)

    # Fetch schemas from a sentinel
    print('Starting Redis sentinel')

    # Sentinel needs a config file to start
    config_file = "tmpsentinel.conf"
    open(config_file, 'a').close()

    sentinel_args = [args.server, config_file, '--port', str(args.port), "--sentinel"]
    fetch_schemas(args.cli, args.port, sentinel_args, docs)
    os.unlink(config_file)

    missing_schema = [k for k, v in docs.items()
                      if "reply_schema" not in v and k not in IGNORED_COMMANDS]
    if missing_schema:
        print("WARNING! The following commands are missing a reply_schema:")
        for k in sorted(missing_schema):
            print(f"  {k}")
        if args.fail_missing_reply_schemas:
            print("ERROR! at least one command does not have a reply_schema")
            sys.exit(1)

    start = time.time()

    # Obtain all the files to processes
    paths = []
    for path in glob.glob('%s/tmp/*/*.reqres' % testdir):
        paths.append(path)

    for path in glob.glob('%s/cluster/tmp/*/*.reqres' % testdir):
        paths.append(path)

    for path in glob.glob('%s/sentinel/tmp/*/*.reqres' % testdir):
        paths.append(path)

    counter = collections.Counter()
    # Spin several processes to handle the files in parallel
    with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
        func = partial(process_file, docs)
        # pool.map blocks until all the files have been processed
        for result in pool.map(func, paths):
            counter.update(result)
    command_counter = dict(counter)

    elapsed = time.time() - start
    print(f"Done. ({timedelta(seconds=elapsed)})")
    print("Hits per command:")
    for k, v in sorted(command_counter.items()):
        print(f"  {k}: {v}")
    not_hit = set(set(docs.keys()) - set(command_counter.keys()) - set(IGNORED_COMMANDS))
    if not_hit:
        if args.verbose:
            print("WARNING! The following commands were not hit at all:")
            for k in sorted(not_hit):
                print(f"  {k}")
        if args.fail_commands_not_all_hit:
            print("ERROR! at least one command was not hit by the tests")
            sys.exit(1)