1 files changed, 204 insertions, 125 deletions
diff --git a/reftable/block.c b/reftable/block.c
index 34d4d07..3e87460 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -51,12 +51,7 @@ static int block_writer_register_restart(struct block_writer *w, int n,
 	if (2 + 3 * rlen + n > w->block_size - w->next)
 		return -1;
 	if (is_restart) {
-		if (w->restart_len == w->restart_cap) {
-			w->restart_cap = w->restart_cap * 2 + 1;
-			w->restarts = reftable_realloc(
-				w->restarts, sizeof(uint32_t) * w->restart_cap);
-		}
-
+		REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap);
 		w->restarts[w->restart_len++] = w->next;
 	}
 
@@ -148,8 +143,10 @@ int block_writer_finish(struct block_writer *w)
 		int block_header_skip = 4 + w->header_off;
 		uLongf src_len = w->next - block_header_skip;
 		uLongf dest_cap = src_len * 1.001 + 12;
+		uint8_t *compressed;
+
+		REFTABLE_ALLOC_ARRAY(compressed, dest_cap);
 
-		uint8_t *compressed = reftable_malloc(dest_cap);
 		while (1) {
 			uLongf out_dest_len = dest_cap;
 			int zresult = compress2(compressed, &out_dest_len,
@@ -178,11 +175,6 @@ int block_writer_finish(struct block_writer *w)
 	return w->next;
 }
 
-uint8_t block_reader_type(struct block_reader *r)
-{
-	return r->block.data[r->header_off];
-}
-
 int block_reader_init(struct block_reader *br, struct reftable_block *block,
 		      uint32_t header_off, uint32_t table_block_size,
 		      int hash_size)
@@ -194,7 +186,8 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
 	uint16_t restart_count = 0;
 	uint32_t restart_start = 0;
 	uint8_t *restart_bytes = NULL;
-	uint8_t *uncompressed = NULL;
+
+	reftable_block_done(&br->block);
 
 	if (!reftable_is_block_type(typ)) {
 		err =  REFTABLE_FORMAT_ERROR;
@@ -202,37 +195,57 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
 	}
 
 	if (typ == BLOCK_TYPE_LOG) {
-		int block_header_skip = 4 + header_off;
-		uLongf dst_len = sz - block_header_skip; /* total size of dest
-							    buffer. */
-		uLongf src_len = block->len - block_header_skip;
-		/* Log blocks specify the *uncompressed* size in their header.
-		 */
-		uncompressed = reftable_malloc(sz);
+		uint32_t block_header_skip = 4 + header_off;
+		uLong dst_len = sz - block_header_skip;
+		uLong src_len = block->len - block_header_skip;
+
+		/* Log blocks specify the *uncompressed* size in their header. */
+		REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
+				    br->uncompressed_cap);
 
 		/* Copy over the block header verbatim. It's not compressed. */
-		memcpy(uncompressed, block->data, block_header_skip);
+		memcpy(br->uncompressed_data, block->data, block_header_skip);
 
-		/* Uncompress */
-		if (Z_OK !=
-		    uncompress2(uncompressed + block_header_skip, &dst_len,
-				block->data + block_header_skip, &src_len)) {
+		if (!br->zstream) {
+			REFTABLE_CALLOC_ARRAY(br->zstream, 1);
+			err = inflateInit(br->zstream);
+		} else {
+			err = inflateReset(br->zstream);
+		}
+		if (err != Z_OK) {
+			err = REFTABLE_ZLIB_ERROR;
+			goto done;
+		}
+
+		br->zstream->next_in = block->data + block_header_skip;
+		br->zstream->avail_in = src_len;
+		br->zstream->next_out = br->uncompressed_data + block_header_skip;
+		br->zstream->avail_out = dst_len;
+
+		/*
+		 * We know both input as well as output size, and we know that
+		 * the sizes should never be bigger than `uInt_MAX` because
+		 * blocks can at most be 16MB large. We can thus use `Z_FINISH`
+		 * here to instruct zlib to inflate the data in one go, which
+		 * is more efficient than using `Z_NO_FLUSH`.
+		 */
+		err = inflate(br->zstream, Z_FINISH);
+		if (err != Z_STREAM_END) {
 			err = REFTABLE_ZLIB_ERROR;
 			goto done;
 		}
+		err = 0;
 
-		if (dst_len + block_header_skip != sz) {
+		if (br->zstream->total_out + block_header_skip != sz) {
 			err = REFTABLE_FORMAT_ERROR;
 			goto done;
 		}
 
 		/* We're done with the input data. */
 		reftable_block_done(block);
-		block->data = uncompressed;
-		uncompressed = NULL;
+		block->data = br->uncompressed_data;
 		block->len = sz;
-		block->source = malloc_block_source();
-		full_block_size = src_len + block_header_skip;
+		full_block_size = src_len + block_header_skip - br->zstream->avail_in;
 	} else if (full_block_size == 0) {
 		full_block_size = sz;
 	} else if (sz < full_block_size && sz < block->len &&
@@ -260,176 +273,242 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
 	br->restart_bytes = restart_bytes;
 
 done:
-	reftable_free(uncompressed);
 	return err;
 }
 
-static uint32_t block_reader_restart_offset(struct block_reader *br, int i)
+void block_reader_release(struct block_reader *br)
+{
+	inflateEnd(br->zstream);
+	reftable_free(br->zstream);
+	reftable_free(br->uncompressed_data);
+	reftable_block_done(&br->block);
+}
+
+uint8_t block_reader_type(const struct block_reader *r)
+{
+	return r->block.data[r->header_off];
+}
+
+int block_reader_first_key(const struct block_reader *br, struct strbuf *key)
+{
+	int off = br->header_off + 4, n;
+	struct string_view in = {
+		.buf = br->block.data + off,
+		.len = br->block_len - off,
+	};
+	uint8_t extra = 0;
+
+	strbuf_reset(key);
+
+	n = reftable_decode_key(key, &extra, in);
+	if (n < 0)
+		return n;
+	if (!key->len)
+		return REFTABLE_FORMAT_ERROR;
+
+	return 0;
+}
+
+static uint32_t block_reader_restart_offset(const struct block_reader *br, int i)
 {
 	return get_be24(br->restart_bytes + 3 * i);
 }
 
-void block_reader_start(struct block_reader *br, struct block_iter *it)
+void block_iter_seek_start(struct block_iter *it, const struct block_reader *br)
 {
-	it->br = br;
+	it->block = br->block.data;
+	it->block_len = br->block_len;
+	it->hash_size = br->hash_size;
 	strbuf_reset(&it->last_key);
 	it->next_off = br->header_off + 4;
 }
 
-struct restart_find_args {
+struct restart_needle_less_args {
 	int error;
-	struct strbuf key;
-	struct block_reader *r;
+	struct strbuf needle;
+	const struct block_reader *reader;
 };
 
-static int restart_key_less(size_t idx, void *args)
+static int restart_needle_less(size_t idx, void *_args)
 {
-	struct restart_find_args *a = args;
-	uint32_t off = block_reader_restart_offset(a->r, idx);
+	struct restart_needle_less_args *args = _args;
+	uint32_t off = block_reader_restart_offset(args->reader, idx);
 	struct string_view in = {
-		.buf = a->r->block.data + off,
-		.len = a->r->block_len - off,
+		.buf = args->reader->block.data + off,
+		.len = args->reader->block_len - off,
 	};
-
-	/* the restart key is verbatim in the block, so this could avoid the
-	   alloc for decoding the key */
-	struct strbuf rkey = STRBUF_INIT;
-	struct strbuf last_key = STRBUF_INIT;
-	uint8_t unused_extra;
-	int n = reftable_decode_key(&rkey, &unused_extra, last_key, in);
-	int result;
-	if (n < 0) {
-		a->error = 1;
+	uint64_t prefix_len, suffix_len;
+	uint8_t extra;
+	int n;
+
+	/*
+	 * Records at restart points are stored without prefix compression, so
+	 * there is no need to fully decode the record key here. This removes
+	 * the need for allocating memory.
+	 */
+	n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
+	if (n < 0 || prefix_len) {
+		args->error = 1;
 		return -1;
 	}
 
-	result = strbuf_cmp(&a->key, &rkey);
-	strbuf_release(&rkey);
-	return result;
-}
+	string_view_consume(&in, n);
+	if (suffix_len > in.len) {
+		args->error = 1;
+		return -1;
+	}
 
-void block_iter_copy_from(struct block_iter *dest, struct block_iter *src)
-{
-	dest->br = src->br;
-	dest->next_off = src->next_off;
-	strbuf_reset(&dest->last_key);
-	strbuf_addbuf(&dest->last_key, &src->last_key);
+	n = memcmp(args->needle.buf, in.buf,
+		   args->needle.len < suffix_len ? args->needle.len : suffix_len);
+	if (n)
+		return n < 0;
+	return args->needle.len < suffix_len;
 }
 
 int block_iter_next(struct block_iter *it, struct reftable_record *rec)
 {
 	struct string_view in = {
-		.buf = it->br->block.data + it->next_off,
-		.len = it->br->block_len - it->next_off,
+		.buf = (unsigned char *) it->block + it->next_off,
+		.len = it->block_len - it->next_off,
 	};
 	struct string_view start = in;
-	struct strbuf key = STRBUF_INIT;
 	uint8_t extra = 0;
 	int n = 0;
 
-	if (it->next_off >= it->br->block_len)
+	if (it->next_off >= it->block_len)
 		return 1;
 
-	n = reftable_decode_key(&key, &extra, it->last_key, in);
+	n = reftable_decode_key(&it->last_key, &extra, in);
 	if (n < 0)
 		return -1;
-
-	if (!key.len)
+	if (!it->last_key.len)
 		return REFTABLE_FORMAT_ERROR;
 
 	string_view_consume(&in, n);
-	n = reftable_record_decode(rec, key, extra, in, it->br->hash_size);
+	n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
+				   &it->scratch);
 	if (n < 0)
 		return -1;
 	string_view_consume(&in, n);
 
-	strbuf_reset(&it->last_key);
-	strbuf_addbuf(&it->last_key, &key);
 	it->next_off += start.len - in.len;
-	strbuf_release(&key);
 	return 0;
 }
 
-int block_reader_first_key(struct block_reader *br, struct strbuf *key)
+void block_iter_reset(struct block_iter *it)
 {
-	struct strbuf empty = STRBUF_INIT;
-	int off = br->header_off + 4;
-	struct string_view in = {
-		.buf = br->block.data + off,
-		.len = br->block_len - off,
-	};
-
-	uint8_t extra = 0;
-	int n = reftable_decode_key(key, &extra, empty, in);
-	if (n < 0)
-		return n;
-	if (!key->len)
-		return REFTABLE_FORMAT_ERROR;
-
-	return 0;
-}
-
-int block_iter_seek(struct block_iter *it, struct strbuf *want)
-{
-	return block_reader_seek(it->br, it, want);
+	strbuf_reset(&it->last_key);
+	it->next_off = 0;
+	it->block = NULL;
+	it->block_len = 0;
+	it->hash_size = 0;
 }
 
 void block_iter_close(struct block_iter *it)
 {
 	strbuf_release(&it->last_key);
+	strbuf_release(&it->scratch);
 }
 
-int block_reader_seek(struct block_reader *br, struct block_iter *it,
-		      struct strbuf *want)
+int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
+			struct strbuf *want)
 {
-	struct restart_find_args args = {
-		.key = *want,
-		.r = br,
+	struct restart_needle_less_args args = {
+		.needle = *want,
+		.reader = br,
 	};
-	struct reftable_record rec = reftable_new_record(block_reader_type(br));
-	struct strbuf key = STRBUF_INIT;
+	struct reftable_record rec;
 	int err = 0;
-	struct block_iter next = {
-		.last_key = STRBUF_INIT,
-	};
-
-	int i = binsearch(br->restart_count, &restart_key_less, &args);
+	size_t i;
+
+	/*
+	 * Perform a binary search over the block's restart points, which
+	 * avoids doing a linear scan over the whole block. Like this, we
+	 * identify the section of the block that should contain our key.
+	 *
+	 * Note that we explicitly search for the first restart point _greater_
+	 * than the sought-after record, not _greater or equal_ to it. In case
+	 * the sought-after record is located directly at the restart point we
+	 * would otherwise start doing the linear search at the preceding
+	 * restart point. While that works alright, we would end up scanning
+	 * too many record.
+	 */
+	i = binsearch(br->restart_count, &restart_needle_less, &args);
 	if (args.error) {
 		err = REFTABLE_FORMAT_ERROR;
 		goto done;
 	}
 
-	it->br = br;
-	if (i > 0) {
-		i--;
-		it->next_off = block_reader_restart_offset(br, i);
-	} else {
+	/*
+	 * Now there are multiple cases:
+	 *
+	 *   - `i == 0`: The wanted record is smaller than the record found at
+	 *     the first restart point. As the first restart point is the first
+	 *     record in the block, our wanted record cannot be located in this
+	 *     block at all. We still need to position the iterator so that the
+	 *     next call to `block_iter_next()` will yield an end-of-iterator
+	 *     signal.
+	 *
+	 *   - `i == restart_count`: The wanted record was not found at any of
+	 *     the restart points. As there is no restart point at the end of
+	 *     the section the record may thus be contained in the last block.
+	 *
+	 *   - `i > 0`: The wanted record must be contained in the section
+	 *     before the found restart point. We thus do a linear search
+	 *     starting from the preceding restart point.
+	 */
+	if (i > 0)
+		it->next_off = block_reader_restart_offset(br, i - 1);
+	else
 		it->next_off = br->header_off + 4;
-	}
-
-	/* We're looking for the last entry less/equal than the wanted key, so
-	   we have to go one entry too far and then back up.
-	*/
+	it->block = br->block.data;
+	it->block_len = br->block_len;
+	it->hash_size = br->hash_size;
+
+	reftable_record_init(&rec, block_reader_type(br));
+
+	/*
+	 * We're looking for the last entry less than the wanted key so that
+	 * the next call to `block_reader_next()` would yield the wanted
+	 * record. We thus don't want to position our reader at the sought
+	 * after record, but one before. To do so, we have to go one entry too
+	 * far and then back up.
+	 */
 	while (1) {
-		block_iter_copy_from(&next, it);
-		err = block_iter_next(&next, &rec);
+		size_t prev_off = it->next_off;
+
+		err = block_iter_next(it, &rec);
 		if (err < 0)
 			goto done;
-
-		reftable_record_key(&rec, &key);
-		if (err > 0 || strbuf_cmp(&key, want) >= 0) {
+		if (err > 0) {
+			it->next_off = prev_off;
 			err = 0;
 			goto done;
 		}
 
-		block_iter_copy_from(it, &next);
+		/*
+		 * Check whether the current key is greater or equal to the
+		 * sought-after key. In case it is greater we know that the
+		 * record does not exist in the block and can thus abort early.
+		 * In case it is equal to the sought-after key we have found
+		 * the desired record.
+		 *
+		 * Note that we store the next record's key record directly in
+		 * `last_key` without restoring the key of the preceding record
+		 * in case we need to go one record back. This is safe to do as
+		 * `block_iter_next()` would return the ref whose key is equal
+		 * to `last_key` now, and naturally all keys share a prefix
+		 * with themselves.
+		 */
+		reftable_record_key(&rec, &it->last_key);
+		if (strbuf_cmp(&it->last_key, want) >= 0) {
+			it->next_off = prev_off;
+			goto done;
+		}
 	}
 
 done:
-	strbuf_release(&key);
-	strbuf_release(&next.last_key);
 	reftable_record_release(&rec);
-
 	return err;
 }