summaryrefslogtreecommitdiffstats
path: root/src/zstd/tests/seqgen.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/zstd/tests/seqgen.c')
-rw-r--r--src/zstd/tests/seqgen.c260
1 files changed, 260 insertions, 0 deletions
diff --git a/src/zstd/tests/seqgen.c b/src/zstd/tests/seqgen.c
new file mode 100644
index 000000000..29c0c4054
--- /dev/null
+++ b/src/zstd/tests/seqgen.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2017-2020, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "seqgen.h"
+#include "mem.h"
+#include <string.h>
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+static const size_t kMatchBytes = 128;
+
+#define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+static BYTE SEQ_randByte(unsigned* src)
+{
+ static const U32 prime1 = 2654435761U;
+ static const U32 prime2 = 2246822519U;
+ U32 rand32 = *src;
+ rand32 *= prime1;
+ rand32 ^= prime2;
+ rand32 = SEQ_rotl32(rand32, 13);
+ *src = rand32;
+ return (BYTE)(rand32 >> 5);
+}
+
+SEQ_stream SEQ_initStream(unsigned seed)
+{
+ SEQ_stream stream;
+ stream.state = 0;
+ XXH64_reset(&stream.xxh, 0);
+ stream.seed = seed;
+ return stream;
+}
+
+/* Generates a single guard byte, then match length + 1 of a different byte,
+ * then another guard byte.
+ */
+static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
+ SEQ_outBuffer* out)
+{
+ typedef enum {
+ ml_first_byte = 0,
+ ml_match_bytes,
+ ml_last_byte,
+ } ml_state;
+ BYTE* const ostart = (BYTE*)out->dst;
+ BYTE* const oend = ostart + out->size;
+ BYTE* op = ostart + out->pos;
+
+ switch ((ml_state)stream->state) {
+ case ml_first_byte:
+ /* Generate a single byte and pick a different byte for the match */
+ if (op >= oend) {
+ stream->bytesLeft = 1;
+ break;
+ }
+ *op = SEQ_randByte(&stream->seed) & 0xFF;
+ do {
+ stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
+ } while (*op == stream->saved);
+ ++op;
+ /* State transition */
+ stream->state = ml_match_bytes;
+ stream->bytesLeft = value + 1;
+ /* fall-through */
+ case ml_match_bytes: {
+ /* Copy matchLength + 1 bytes to the output buffer */
+ size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
+ if (setLength > 0) {
+ memset(op, stream->saved, setLength);
+ op += setLength;
+ stream->bytesLeft -= setLength;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ /* State transition */
+ stream->state = ml_last_byte;
+ }
+ /* fall-through */
+ case ml_last_byte:
+ /* Generate a single byte and pick a different byte for the match */
+ if (op >= oend) {
+ stream->bytesLeft = 1;
+ break;
+ }
+ do {
+ *op = SEQ_randByte(&stream->seed) & 0xFF;
+ } while (*op == stream->saved);
+ ++op;
+ /* State transition */
+ /* fall-through */
+ default:
+ stream->state = 0;
+ stream->bytesLeft = 0;
+ break;
+ }
+ XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
+ out->pos = op - ostart;
+ return stream->bytesLeft;
+}
+
+/* Saves the current seed then generates kMatchBytes random bytes >= 128.
+ * Generates literal length - kMatchBytes random bytes < 128.
+ * Generates another kMatchBytes using the saved seed to generate a match.
+ * This way the match is easy to find for the compressors.
+ */
+static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
+{
+ typedef enum {
+ ll_start = 0,
+ ll_run_bytes,
+ ll_literals,
+ ll_run_match,
+ } ll_state;
+ BYTE* const ostart = (BYTE*)out->dst;
+ BYTE* const oend = ostart + out->size;
+ BYTE* op = ostart + out->pos;
+
+ switch ((ll_state)stream->state) {
+ case ll_start:
+ stream->state = ll_run_bytes;
+ stream->saved = stream->seed;
+ stream->bytesLeft = MIN(kMatchBytes, value);
+ /* fall-through */
+ case ll_run_bytes:
+ while (stream->bytesLeft > 0 && op < oend) {
+ *op++ = SEQ_randByte(&stream->seed) | 0x80;
+ --stream->bytesLeft;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ /* State transition */
+ stream->state = ll_literals;
+ stream->bytesLeft = value - MIN(kMatchBytes, value);
+ /* fall-through */
+ case ll_literals:
+ while (stream->bytesLeft > 0 && op < oend) {
+ *op++ = SEQ_randByte(&stream->seed) & 0x7F;
+ --stream->bytesLeft;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ /* State transition */
+ stream->state = ll_run_match;
+ stream->bytesLeft = MIN(kMatchBytes, value);
+ /* fall-through */
+ case ll_run_match: {
+ while (stream->bytesLeft > 0 && op < oend) {
+ *op++ = SEQ_randByte(&stream->saved) | 0x80;
+ --stream->bytesLeft;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ }
+ /* fall-through */
+ default:
+ stream->state = 0;
+ stream->bytesLeft = 0;
+ break;
+ }
+ XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
+ out->pos = op - ostart;
+ return stream->bytesLeft;
+}
+
+/* Saves the current seed then generates kMatchBytes random bytes >= 128.
+ * Generates offset - kMatchBytes of zeros to get a large offset without
+ * polluting the hash tables.
+ * Generates another kMatchBytes using the saved seed to generate a with the
+ * required offset.
+ */
+static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
+{
+ typedef enum {
+ of_start = 0,
+ of_run_bytes,
+ of_offset,
+ of_run_match,
+ } of_state;
+ BYTE* const ostart = (BYTE*)out->dst;
+ BYTE* const oend = ostart + out->size;
+ BYTE* op = ostart + out->pos;
+
+ switch ((of_state)stream->state) {
+ case of_start:
+ stream->state = of_run_bytes;
+ stream->saved = stream->seed;
+ stream->bytesLeft = MIN(value, kMatchBytes);
+ /* fall-through */
+ case of_run_bytes: {
+ while (stream->bytesLeft > 0 && op < oend) {
+ *op++ = SEQ_randByte(&stream->seed) | 0x80;
+ --stream->bytesLeft;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ /* State transition */
+ stream->state = of_offset;
+ stream->bytesLeft = value - MIN(value, kMatchBytes);
+ }
+ /* fall-through */
+ case of_offset: {
+ /* Copy matchLength + 1 bytes to the output buffer */
+ size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
+ if (setLength > 0) {
+ memset(op, 0, setLength);
+ op += setLength;
+ stream->bytesLeft -= setLength;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ /* State transition */
+ stream->state = of_run_match;
+ stream->bytesLeft = MIN(value, kMatchBytes);
+ }
+ /* fall-through */
+ case of_run_match: {
+ while (stream->bytesLeft > 0 && op < oend) {
+ *op++ = SEQ_randByte(&stream->saved) | 0x80;
+ --stream->bytesLeft;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ }
+ /* fall-through */
+ default:
+ stream->state = 0;
+ stream->bytesLeft = 0;
+ break;
+ }
+ XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
+ out->pos = op - ostart;
+ return stream->bytesLeft;
+}
+
+/* Returns the number of bytes left to generate.
+ * Must pass the same type/value until it returns 0.
+ */
+size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
+{
+ switch (type) {
+ case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
+ case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
+ case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
+ case SEQ_gen_max: /* fall-through */
+ default: return 0;
+ }
+}
+
+/* Returns the xxhash of the data produced so far */
+XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
+{
+ return XXH64_digest(&stream->xxh);
+}