summaryrefslogtreecommitdiffstats
path: root/src/is_json.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/is_json.c')
-rw-r--r--src/is_json.c500
1 files changed, 500 insertions, 0 deletions
diff --git a/src/is_json.c b/src/is_json.c
new file mode 100644
index 0000000..eca2a49
--- /dev/null
+++ b/src/is_json.c
@@ -0,0 +1,500 @@
+/*-
+ * Copyright (c) 2018 Christos Zoulas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Parse JSON object serialization format (RFC-7159)
+ */
+
+#ifndef TEST
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
+#endif
+
+#include "magic.h"
+#else
+#include <stdio.h>
+#include <stddef.h>
+#endif
+#include <string.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#define DPRINTF(a, b, c) \
+ printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
+ (int)(b - c), (const char *)(c))
+#define __file_debugused
+#else
+#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
+#define __file_debugused __attribute__((__unused__))
+#endif
+
+#define JSON_ARRAY 0
+#define JSON_CONSTANT 1
+#define JSON_NUMBER 2
+#define JSON_OBJECT 3
+#define JSON_STRING 4
+#define JSON_ARRAYN 5
+#define JSON_MAX 6
+
+/*
+ * if JSON_COUNT != 0:
+ * count all the objects, require that we have the whole data file
+ * otherwise:
+ * stop if we find an object or an array
+ */
+#ifndef JSON_COUNT
+#define JSON_COUNT 0
+#endif
+
+static int json_parse(const unsigned char **, const unsigned char *, size_t *,
+ size_t);
+
+static int
+json_isspace(const unsigned char uc)
+{
+ switch (uc) {
+ case ' ':
+ case '\n':
+ case '\r':
+ case '\t':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int
+json_isdigit(unsigned char uc)
+{
+ switch (uc) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int
+json_isxdigit(unsigned char uc)
+{
+ if (json_isdigit(uc))
+ return 1;
+ switch (uc) {
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static const unsigned char *
+json_skip_space(const unsigned char *uc, const unsigned char *ue)
+{
+ while (uc < ue && json_isspace(*uc))
+ uc++;
+ return uc;
+}
+
+/*ARGSUSED*/
+static int
+json_parse_string(const unsigned char **ucp, const unsigned char *ue,
+ size_t lvl __file_debugused)
+{
+ const unsigned char *uc = *ucp;
+ size_t i;
+
+ DPRINTF("Parse string: ", uc, *ucp);
+ while (uc < ue) {
+ switch (*uc++) {
+ case '\0':
+ goto out;
+ case '\\':
+ if (uc == ue)
+ goto out;
+ switch (*uc++) {
+ case '\0':
+ goto out;
+ case '"':
+ case '\\':
+ case '/':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ continue;
+ case 'u':
+ if (ue - uc < 4) {
+ uc = ue;
+ goto out;
+ }
+ for (i = 0; i < 4; i++)
+ if (!json_isxdigit(*uc++))
+ goto out;
+ continue;
+ default:
+ goto out;
+ }
+ case '"':
+ DPRINTF("Good string: ", uc, *ucp);
+ *ucp = uc;
+ return 1;
+ default:
+ continue;
+ }
+ }
+out:
+ DPRINTF("Bad string: ", uc, *ucp);
+ *ucp = uc;
+ return 0;
+}
+
+static int
+json_parse_array(const unsigned char **ucp, const unsigned char *ue,
+ size_t *st, size_t lvl)
+{
+ const unsigned char *uc = *ucp;
+
+ DPRINTF("Parse array: ", uc, *ucp);
+ while (uc < ue) {
+ uc = json_skip_space(uc, ue);
+ if (uc == ue)
+ goto out;
+ if (*uc == ']')
+ goto done;
+ if (!json_parse(&uc, ue, st, lvl + 1))
+ goto out;
+ if (uc == ue)
+ goto out;
+ switch (*uc) {
+ case ',':
+ uc++;
+ continue;
+ case ']':
+ done:
+ st[JSON_ARRAYN]++;
+ DPRINTF("Good array: ", uc, *ucp);
+ *ucp = uc + 1;
+ return 1;
+ default:
+ goto out;
+ }
+ }
+out:
+ DPRINTF("Bad array: ", uc, *ucp);
+ *ucp = uc;
+ return 0;
+}
+
+static int
+json_parse_object(const unsigned char **ucp, const unsigned char *ue,
+ size_t *st, size_t lvl)
+{
+ const unsigned char *uc = *ucp;
+ DPRINTF("Parse object: ", uc, *ucp);
+ while (uc < ue) {
+ uc = json_skip_space(uc, ue);
+ if (uc == ue)
+ goto out;
+ if (*uc == '}') {
+ uc++;
+ goto done;
+ }
+ if (*uc++ != '"') {
+ DPRINTF("not string", uc, *ucp);
+ goto out;
+ }
+ DPRINTF("next field", uc, *ucp);
+ if (!json_parse_string(&uc, ue, lvl)) {
+ DPRINTF("not string", uc, *ucp);
+ goto out;
+ }
+ uc = json_skip_space(uc, ue);
+ if (uc == ue)
+ goto out;
+ if (*uc++ != ':') {
+ DPRINTF("not colon", uc, *ucp);
+ goto out;
+ }
+ if (!json_parse(&uc, ue, st, lvl + 1)) {
+ DPRINTF("not json", uc, *ucp);
+ goto out;
+ }
+ if (uc == ue)
+ goto out;
+ switch (*uc++) {
+ case ',':
+ continue;
+ case '}': /* { */
+ done:
+ DPRINTF("Good object: ", uc, *ucp);
+ *ucp = uc;
+ return 1;
+ default:
+ DPRINTF("not more", uc, *ucp);
+ *ucp = uc - 1;
+ goto out;
+ }
+ }
+out:
+ DPRINTF("Bad object: ", uc, *ucp);
+ *ucp = uc;
+ return 0;
+}
+
+/*ARGSUSED*/
+static int
+json_parse_number(const unsigned char **ucp, const unsigned char *ue,
+ size_t lvl __file_debugused)
+{
+ const unsigned char *uc = *ucp;
+ int got = 0;
+
+ DPRINTF("Parse number: ", uc, *ucp);
+ if (uc == ue)
+ return 0;
+ if (*uc == '-')
+ uc++;
+
+ for (; uc < ue; uc++) {
+ if (!json_isdigit(*uc))
+ break;
+ got = 1;
+ }
+ if (uc == ue)
+ goto out;
+ if (*uc == '.')
+ uc++;
+ for (; uc < ue; uc++) {
+ if (!json_isdigit(*uc))
+ break;
+ got = 1;
+ }
+ if (uc == ue)
+ goto out;
+ if (got && (*uc == 'e' || *uc == 'E')) {
+ uc++;
+ got = 0;
+ if (uc == ue)
+ goto out;
+ if (*uc == '+' || *uc == '-')
+ uc++;
+ for (; uc < ue; uc++) {
+ if (!json_isdigit(*uc))
+ break;
+ got = 1;
+ }
+ }
+out:
+ if (!got)
+ DPRINTF("Bad number: ", uc, *ucp);
+ else
+ DPRINTF("Good number: ", uc, *ucp);
+ *ucp = uc;
+ return got;
+}
+
+/*ARGSUSED*/
+static int
+json_parse_const(const unsigned char **ucp, const unsigned char *ue,
+ const char *str, size_t len, size_t lvl __file_debugused)
+{
+ const unsigned char *uc = *ucp;
+
+ DPRINTF("Parse const: ", uc, *ucp);
+ *ucp += --len - 1;
+ if (*ucp > ue)
+ *ucp = ue;
+ for (; uc < ue && --len;) {
+ if (*uc++ != *++str) {
+ DPRINTF("Bad const: ", uc, *ucp);
+ return 0;
+ }
+ }
+ DPRINTF("Good const: ", uc, *ucp);
+ return 1;
+}
+
+static int
+json_parse(const unsigned char **ucp, const unsigned char *ue,
+ size_t *st, size_t lvl)
+{
+ const unsigned char *uc, *ouc;
+ int rv = 0;
+ int t;
+
+ ouc = uc = json_skip_space(*ucp, ue);
+ if (uc == ue)
+ goto out;
+
+ // Avoid recursion
+ if (lvl > 500) {
+ DPRINTF("Too many levels", uc, *ucp);
+ return 0;
+ }
+#if JSON_COUNT
+ /* bail quickly if not counting */
+ if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
+ return 1;
+#endif
+
+ DPRINTF("Parse general: ", uc, *ucp);
+ switch (*uc++) {
+ case '"':
+ rv = json_parse_string(&uc, ue, lvl + 1);
+ t = JSON_STRING;
+ break;
+ case '[':
+ rv = json_parse_array(&uc, ue, st, lvl + 1);
+ t = JSON_ARRAY;
+ break;
+ case '{': /* '}' */
+ rv = json_parse_object(&uc, ue, st, lvl + 1);
+ t = JSON_OBJECT;
+ break;
+ case 't':
+ rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
+ t = JSON_CONSTANT;
+ break;
+ case 'f':
+ rv = json_parse_const(&uc, ue, "false", sizeof("false"),
+ lvl + 1);
+ t = JSON_CONSTANT;
+ break;
+ case 'n':
+ rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
+ t = JSON_CONSTANT;
+ break;
+ default:
+ --uc;
+ rv = json_parse_number(&uc, ue, lvl + 1);
+ t = JSON_NUMBER;
+ break;
+ }
+ if (rv)
+ st[t]++;
+ uc = json_skip_space(uc, ue);
+out:
+ DPRINTF("End general: ", uc, *ucp);
+ *ucp = uc;
+ if (lvl == 0) {
+ if (!rv)
+ return 0;
+ if (uc == ue)
+ return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
+ if (*ouc == *uc && json_parse(&uc, ue, st, 1))
+ return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
+ else
+ return 0;
+ }
+ return rv;
+}
+
+#ifndef TEST
+int
+file_is_json(struct magic_set *ms, const struct buffer *b)
+{
+ const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
+ const unsigned char *ue = uc + b->flen;
+ size_t st[JSON_MAX];
+ int mime = ms->flags & MAGIC_MIME;
+ int jt;
+
+
+ if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
+ return 0;
+
+ memset(st, 0, sizeof(st));
+
+ if ((jt = json_parse(&uc, ue, st, 0)) == 0)
+ return 0;
+
+ if (mime == MAGIC_MIME_ENCODING)
+ return 1;
+ if (mime) {
+ if (file_printf(ms, "application/%s",
+ jt == 1 ? "json" : "x-ndjson") == -1)
+ return -1;
+ return 1;
+ }
+ if (file_printf(ms, "%sJSON text data",
+ jt == 1 ? "" : "New Line Delimited ") == -1)
+ return -1;
+#if JSON_COUNT
+#define P(n) st[n], st[n] > 1 ? "s" : ""
+ if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
+ "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
+ "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
+ "u >1array%s)",
+ P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
+ P(JSON_NUMBER), P(JSON_ARRAYN))
+ == -1)
+ return -1;
+#endif
+ return 1;
+}
+
+#else
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <err.h>
+
+int
+main(int argc, char *argv[])
+{
+ int fd;
+ struct stat st;
+ unsigned char *p;
+ size_t stats[JSON_MAX];
+
+ if ((fd = open(argv[1], O_RDONLY)) == -1)
+ err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
+
+ if (fstat(fd, &st) == -1)
+ err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
+
+ if ((p = CAST(char *, malloc(st.st_size))) == NULL)
+ err(EXIT_FAILURE, "Can't allocate %jd bytes",
+ (intmax_t)st.st_size);
+ if (read(fd, p, st.st_size) != st.st_size)
+ err(EXIT_FAILURE, "Can't read %jd bytes",
+ (intmax_t)st.st_size);
+ memset(stats, 0, sizeof(stats));
+ printf("is json %d\n", json_parse((const unsigned char **)&p,
+ p + st.st_size, stats, 0));
+ return 0;
+}
+#endif