diff options
Diffstat (limited to 'src/is_json.c')
-rw-r--r-- | src/is_json.c | 500 |
1 files changed, 500 insertions, 0 deletions
diff --git a/src/is_json.c b/src/is_json.c new file mode 100644 index 0000000..eca2a49 --- /dev/null +++ b/src/is_json.c @@ -0,0 +1,500 @@ +/*- + * Copyright (c) 2018 Christos Zoulas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Parse JSON object serialization format (RFC-7159) + */ + +#ifndef TEST +#include "file.h" + +#ifndef lint +FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $") +#endif + +#include "magic.h" +#else +#include <stdio.h> +#include <stddef.h> +#endif +#include <string.h> + +#ifdef DEBUG +#include <stdio.h> +#define DPRINTF(a, b, c) \ + printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \ + (int)(b - c), (const char *)(c)) +#define __file_debugused +#else +#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) +#define __file_debugused __attribute__((__unused__)) +#endif + +#define JSON_ARRAY 0 +#define JSON_CONSTANT 1 +#define JSON_NUMBER 2 +#define JSON_OBJECT 3 +#define JSON_STRING 4 +#define JSON_ARRAYN 5 +#define JSON_MAX 6 + +/* + * if JSON_COUNT != 0: + * count all the objects, require that we have the whole data file + * otherwise: + * stop if we find an object or an array + */ +#ifndef JSON_COUNT +#define JSON_COUNT 0 +#endif + +static int json_parse(const unsigned char **, const unsigned char *, size_t *, + size_t); + +static int +json_isspace(const unsigned char uc) +{ + switch (uc) { + case ' ': + case '\n': + case '\r': + case '\t': + return 1; + default: + return 0; + } +} + +static int +json_isdigit(unsigned char uc) +{ + switch (uc) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return 1; + default: + return 0; + } +} + +static int +json_isxdigit(unsigned char uc) +{ + if (json_isdigit(uc)) + return 1; + switch (uc) { + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + return 1; + default: + return 0; + } +} + +static const unsigned char * +json_skip_space(const unsigned char *uc, const unsigned char *ue) +{ + while (uc < ue && json_isspace(*uc)) + uc++; + return uc; +} + +/*ARGSUSED*/ +static int +json_parse_string(const unsigned char **ucp, const unsigned char *ue, + size_t lvl __file_debugused) +{ + const unsigned char *uc = *ucp; + size_t i; + + DPRINTF("Parse string: ", uc, *ucp); + while (uc < ue) { + switch (*uc++) { + case '\0': + goto out; + case '\\': + if (uc == ue) + goto out; + switch (*uc++) { + case '\0': + goto out; + case '"': + case '\\': + case '/': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + continue; + case 'u': + if (ue - uc < 4) { + uc = ue; + goto out; + } + for (i = 0; i < 4; i++) + if (!json_isxdigit(*uc++)) + goto out; + continue; + default: + goto out; + } + case '"': + DPRINTF("Good string: ", uc, *ucp); + *ucp = uc; + return 1; + default: + continue; + } + } +out: + DPRINTF("Bad string: ", uc, *ucp); + *ucp = uc; + return 0; +} + +static int +json_parse_array(const unsigned char **ucp, const unsigned char *ue, + size_t *st, size_t lvl) +{ + const unsigned char *uc = *ucp; + + DPRINTF("Parse array: ", uc, *ucp); + while (uc < ue) { + uc = json_skip_space(uc, ue); + if (uc == ue) + goto out; + if (*uc == ']') + goto done; + if (!json_parse(&uc, ue, st, lvl + 1)) + goto out; + if (uc == ue) + goto out; + switch (*uc) { + case ',': + uc++; + continue; + case ']': + done: + st[JSON_ARRAYN]++; + DPRINTF("Good array: ", uc, *ucp); + *ucp = uc + 1; + return 1; + default: + goto out; + } + } +out: + DPRINTF("Bad array: ", uc, *ucp); + *ucp = uc; + return 0; +} + +static int +json_parse_object(const unsigned char **ucp, const unsigned char *ue, + size_t *st, size_t lvl) +{ + const unsigned char *uc = *ucp; + DPRINTF("Parse object: ", uc, *ucp); + while (uc < ue) { + uc = json_skip_space(uc, ue); + if (uc == ue) + goto out; + if (*uc == '}') { + uc++; + goto done; + } + if (*uc++ != '"') { + DPRINTF("not string", uc, *ucp); + goto out; + } + DPRINTF("next field", uc, *ucp); + if (!json_parse_string(&uc, ue, lvl)) { + DPRINTF("not string", uc, *ucp); + goto out; + } + uc = json_skip_space(uc, ue); + if (uc == ue) + goto out; + if (*uc++ != ':') { + DPRINTF("not colon", uc, *ucp); + goto out; + } + if (!json_parse(&uc, ue, st, lvl + 1)) { + DPRINTF("not json", uc, *ucp); + goto out; + } + if (uc == ue) + goto out; + switch (*uc++) { + case ',': + continue; + case '}': /* { */ + done: + DPRINTF("Good object: ", uc, *ucp); + *ucp = uc; + return 1; + default: + DPRINTF("not more", uc, *ucp); + *ucp = uc - 1; + goto out; + } + } +out: + DPRINTF("Bad object: ", uc, *ucp); + *ucp = uc; + return 0; +} + +/*ARGSUSED*/ +static int +json_parse_number(const unsigned char **ucp, const unsigned char *ue, + size_t lvl __file_debugused) +{ + const unsigned char *uc = *ucp; + int got = 0; + + DPRINTF("Parse number: ", uc, *ucp); + if (uc == ue) + return 0; + if (*uc == '-') + uc++; + + for (; uc < ue; uc++) { + if (!json_isdigit(*uc)) + break; + got = 1; + } + if (uc == ue) + goto out; + if (*uc == '.') + uc++; + for (; uc < ue; uc++) { + if (!json_isdigit(*uc)) + break; + got = 1; + } + if (uc == ue) + goto out; + if (got && (*uc == 'e' || *uc == 'E')) { + uc++; + got = 0; + if (uc == ue) + goto out; + if (*uc == '+' || *uc == '-') + uc++; + for (; uc < ue; uc++) { + if (!json_isdigit(*uc)) + break; + got = 1; + } + } +out: + if (!got) + DPRINTF("Bad number: ", uc, *ucp); + else + DPRINTF("Good number: ", uc, *ucp); + *ucp = uc; + return got; +} + +/*ARGSUSED*/ +static int +json_parse_const(const unsigned char **ucp, const unsigned char *ue, + const char *str, size_t len, size_t lvl __file_debugused) +{ + const unsigned char *uc = *ucp; + + DPRINTF("Parse const: ", uc, *ucp); + *ucp += --len - 1; + if (*ucp > ue) + *ucp = ue; + for (; uc < ue && --len;) { + if (*uc++ != *++str) { + DPRINTF("Bad const: ", uc, *ucp); + return 0; + } + } + DPRINTF("Good const: ", uc, *ucp); + return 1; +} + +static int +json_parse(const unsigned char **ucp, const unsigned char *ue, + size_t *st, size_t lvl) +{ + const unsigned char *uc, *ouc; + int rv = 0; + int t; + + ouc = uc = json_skip_space(*ucp, ue); + if (uc == ue) + goto out; + + // Avoid recursion + if (lvl > 500) { + DPRINTF("Too many levels", uc, *ucp); + return 0; + } +#if JSON_COUNT + /* bail quickly if not counting */ + if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) + return 1; +#endif + + DPRINTF("Parse general: ", uc, *ucp); + switch (*uc++) { + case '"': + rv = json_parse_string(&uc, ue, lvl + 1); + t = JSON_STRING; + break; + case '[': + rv = json_parse_array(&uc, ue, st, lvl + 1); + t = JSON_ARRAY; + break; + case '{': /* '}' */ + rv = json_parse_object(&uc, ue, st, lvl + 1); + t = JSON_OBJECT; + break; + case 't': + rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1); + t = JSON_CONSTANT; + break; + case 'f': + rv = json_parse_const(&uc, ue, "false", sizeof("false"), + lvl + 1); + t = JSON_CONSTANT; + break; + case 'n': + rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1); + t = JSON_CONSTANT; + break; + default: + --uc; + rv = json_parse_number(&uc, ue, lvl + 1); + t = JSON_NUMBER; + break; + } + if (rv) + st[t]++; + uc = json_skip_space(uc, ue); +out: + DPRINTF("End general: ", uc, *ucp); + *ucp = uc; + if (lvl == 0) { + if (!rv) + return 0; + if (uc == ue) + return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0; + if (*ouc == *uc && json_parse(&uc, ue, st, 1)) + return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0; + else + return 0; + } + return rv; +} + +#ifndef TEST +int +file_is_json(struct magic_set *ms, const struct buffer *b) +{ + const unsigned char *uc = CAST(const unsigned char *, b->fbuf); + const unsigned char *ue = uc + b->flen; + size_t st[JSON_MAX]; + int mime = ms->flags & MAGIC_MIME; + int jt; + + + if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) + return 0; + + memset(st, 0, sizeof(st)); + + if ((jt = json_parse(&uc, ue, st, 0)) == 0) + return 0; + + if (mime == MAGIC_MIME_ENCODING) + return 1; + if (mime) { + if (file_printf(ms, "application/%s", + jt == 1 ? "json" : "x-ndjson") == -1) + return -1; + return 1; + } + if (file_printf(ms, "%sJSON text data", + jt == 1 ? "" : "New Line Delimited ") == -1) + return -1; +#if JSON_COUNT +#define P(n) st[n], st[n] > 1 ? "s" : "" + if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT + "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT + "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT + "u >1array%s)", + P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), + P(JSON_NUMBER), P(JSON_ARRAYN)) + == -1) + return -1; +#endif + return 1; +} + +#else + +#include <sys/types.h> +#include <sys/stat.h> +#include <stdio.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <err.h> + +int +main(int argc, char *argv[]) +{ + int fd; + struct stat st; + unsigned char *p; + size_t stats[JSON_MAX]; + + if ((fd = open(argv[1], O_RDONLY)) == -1) + err(EXIT_FAILURE, "Can't open `%s'", argv[1]); + + if (fstat(fd, &st) == -1) + err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); + + if ((p = CAST(char *, malloc(st.st_size))) == NULL) + err(EXIT_FAILURE, "Can't allocate %jd bytes", + (intmax_t)st.st_size); + if (read(fd, p, st.st_size) != st.st_size) + err(EXIT_FAILURE, "Can't read %jd bytes", + (intmax_t)st.st_size); + memset(stats, 0, sizeof(stats)); + printf("is json %d\n", json_parse((const unsigned char **)&p, + p + st.st_size, stats, 0)); + return 0; +} +#endif |