diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-20 05:14:36 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-20 05:14:36 +0000 |
commit | 037de004c68d704abf839eebe075c58c9603f8f3 (patch) | |
tree | 7ac13a7fbb70193e7d04fc193f75de839e914d45 /reftable | |
parent | Adding upstream version 1:2.43.0. (diff) | |
download | git-upstream/1%2.45.1.tar.xz git-upstream/1%2.45.1.zip |
Adding upstream version 1:2.45.1.upstream/1%2.45.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
40 files changed, 1961 insertions, 1452 deletions
diff --git a/reftable/basics.c b/reftable/basics.c index f761e48..fea711d 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -27,7 +27,7 @@ void put_be16(uint8_t *out, uint16_t i) out[1] = (uint8_t)(i & 0xff); } -int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) +size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) { size_t lo = 0; size_t hi = sz; @@ -39,8 +39,11 @@ int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) */ while (hi - lo > 1) { size_t mid = lo + (hi - lo) / 2; + int ret = f(mid, args); + if (ret < 0) + return sz; - if (f(mid, args)) + if (ret > 0) hi = mid; else lo = mid; @@ -64,12 +67,11 @@ void free_names(char **a) reftable_free(a); } -int names_length(char **names) +size_t names_length(char **names) { char **p = names; - for (; *p; p++) { - /* empty */ - } + while (*p) + p++; return p - names; } @@ -89,17 +91,13 @@ void parse_names(char *buf, int size, char ***namesp) next = end; } if (p < next) { - if (names_len == names_cap) { - names_cap = 2 * names_cap + 1; - names = reftable_realloc( - names, names_cap * sizeof(*names)); - } + REFTABLE_ALLOC_GROW(names, names_len + 1, names_cap); names[names_len++] = xstrdup(p); } p = next + 1; } - names = reftable_realloc(names, (names_len + 1) * sizeof(*names)); + REFTABLE_REALLOC_ARRAY(names, names_len + 1); names[names_len] = NULL; *namesp = names; } diff --git a/reftable/basics.h b/reftable/basics.h index 096b368..523ecd5 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -22,13 +22,14 @@ uint32_t get_be24(uint8_t *in); void put_be16(uint8_t *out, uint16_t i); /* - * find smallest index i in [0, sz) at which f(i) is true, assuming - * that f is ascending. Return sz if f(i) is false for all indices. + * find smallest index i in [0, sz) at which `f(i) > 0`, assuming that f is + * ascending. Return sz if `f(i) == 0` for all indices. The search is aborted + * and `sz` is returned in case `f(i) < 0`. * * Contrary to bsearch(3), this returns something useful if the argument is not * found. */ -int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args); +size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args); /* * Frees a NULL terminated array of malloced strings. The array itself is also @@ -44,14 +45,27 @@ void parse_names(char *buf, int size, char ***namesp); int names_equal(char **a, char **b); /* returns the array size of a NULL-terminated array of strings. */ -int names_length(char **names); +size_t names_length(char **names); /* Allocation routines; they invoke the functions set through * reftable_set_alloc() */ void *reftable_malloc(size_t sz); void *reftable_realloc(void *p, size_t sz); void reftable_free(void *p); -void *reftable_calloc(size_t sz); +void *reftable_calloc(size_t nelem, size_t elsize); + +#define REFTABLE_ALLOC_ARRAY(x, alloc) (x) = reftable_malloc(st_mult(sizeof(*(x)), (alloc))) +#define REFTABLE_CALLOC_ARRAY(x, alloc) (x) = reftable_calloc((alloc), sizeof(*(x))) +#define REFTABLE_REALLOC_ARRAY(x, alloc) (x) = reftable_realloc((x), st_mult(sizeof(*(x)), (alloc))) +#define REFTABLE_ALLOC_GROW(x, nr, alloc) \ + do { \ + if ((nr) > alloc) { \ + alloc = 2 * (alloc) + 1; \ + if (alloc < (nr)) \ + alloc = (nr); \ + REFTABLE_REALLOC_ARRAY(x, alloc); \ + } \ + } while (0) /* Find the longest shared prefix size of `a` and `b` */ struct strbuf; diff --git a/reftable/basics_test.c b/reftable/basics_test.c index 1fcd229..997c4d9 100644 --- a/reftable/basics_test.c +++ b/reftable/basics_test.c @@ -12,40 +12,47 @@ https://developers.google.com/open-source/licenses/bsd #include "test_framework.h" #include "reftable-tests.h" -struct binsearch_args { - int key; - int *arr; +struct integer_needle_lesseq_args { + int needle; + int *haystack; }; -static int binsearch_func(size_t i, void *void_args) +static int integer_needle_lesseq(size_t i, void *_args) { - struct binsearch_args *args = void_args; - - return args->key < args->arr[i]; + struct integer_needle_lesseq_args *args = _args; + return args->needle <= args->haystack[i]; } static void test_binsearch(void) { - int arr[] = { 2, 4, 6, 8, 10 }; - size_t sz = ARRAY_SIZE(arr); - struct binsearch_args args = { - .arr = arr, + int haystack[] = { 2, 4, 6, 8, 10 }; + struct { + int needle; + size_t expected_idx; + } testcases[] = { + {-9000, 0}, + {-1, 0}, + {0, 0}, + {2, 0}, + {3, 1}, + {4, 1}, + {7, 3}, + {9, 4}, + {10, 4}, + {11, 5}, + {9000, 5}, }; + size_t i = 0; - int i = 0; - for (i = 1; i < 11; i++) { - int res; - args.key = i; - res = binsearch(sz, &binsearch_func, &args); + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + struct integer_needle_lesseq_args args = { + .haystack = haystack, + .needle = testcases[i].needle, + }; + size_t idx; - if (res < sz) { - EXPECT(args.key < arr[res]); - if (res > 0) { - EXPECT(args.key >= arr[res - 1]); - } - } else { - EXPECT(args.key == 10 || args.key == 11); - } + idx = binsearch(ARRAY_SIZE(haystack), &integer_needle_lesseq, &args); + EXPECT(idx == testcases[i].expected_idx); } } diff --git a/reftable/block.c b/reftable/block.c index 34d4d07..3e87460 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -51,12 +51,7 @@ static int block_writer_register_restart(struct block_writer *w, int n, if (2 + 3 * rlen + n > w->block_size - w->next) return -1; if (is_restart) { - if (w->restart_len == w->restart_cap) { - w->restart_cap = w->restart_cap * 2 + 1; - w->restarts = reftable_realloc( - w->restarts, sizeof(uint32_t) * w->restart_cap); - } - + REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap); w->restarts[w->restart_len++] = w->next; } @@ -148,8 +143,10 @@ int block_writer_finish(struct block_writer *w) int block_header_skip = 4 + w->header_off; uLongf src_len = w->next - block_header_skip; uLongf dest_cap = src_len * 1.001 + 12; + uint8_t *compressed; + + REFTABLE_ALLOC_ARRAY(compressed, dest_cap); - uint8_t *compressed = reftable_malloc(dest_cap); while (1) { uLongf out_dest_len = dest_cap; int zresult = compress2(compressed, &out_dest_len, @@ -178,11 +175,6 @@ int block_writer_finish(struct block_writer *w) return w->next; } -uint8_t block_reader_type(struct block_reader *r) -{ - return r->block.data[r->header_off]; -} - int block_reader_init(struct block_reader *br, struct reftable_block *block, uint32_t header_off, uint32_t table_block_size, int hash_size) @@ -194,7 +186,8 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, uint16_t restart_count = 0; uint32_t restart_start = 0; uint8_t *restart_bytes = NULL; - uint8_t *uncompressed = NULL; + + reftable_block_done(&br->block); if (!reftable_is_block_type(typ)) { err = REFTABLE_FORMAT_ERROR; @@ -202,37 +195,57 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, } if (typ == BLOCK_TYPE_LOG) { - int block_header_skip = 4 + header_off; - uLongf dst_len = sz - block_header_skip; /* total size of dest - buffer. */ - uLongf src_len = block->len - block_header_skip; - /* Log blocks specify the *uncompressed* size in their header. - */ - uncompressed = reftable_malloc(sz); + uint32_t block_header_skip = 4 + header_off; + uLong dst_len = sz - block_header_skip; + uLong src_len = block->len - block_header_skip; + + /* Log blocks specify the *uncompressed* size in their header. */ + REFTABLE_ALLOC_GROW(br->uncompressed_data, sz, + br->uncompressed_cap); /* Copy over the block header verbatim. It's not compressed. */ - memcpy(uncompressed, block->data, block_header_skip); + memcpy(br->uncompressed_data, block->data, block_header_skip); - /* Uncompress */ - if (Z_OK != - uncompress2(uncompressed + block_header_skip, &dst_len, - block->data + block_header_skip, &src_len)) { + if (!br->zstream) { + REFTABLE_CALLOC_ARRAY(br->zstream, 1); + err = inflateInit(br->zstream); + } else { + err = inflateReset(br->zstream); + } + if (err != Z_OK) { + err = REFTABLE_ZLIB_ERROR; + goto done; + } + + br->zstream->next_in = block->data + block_header_skip; + br->zstream->avail_in = src_len; + br->zstream->next_out = br->uncompressed_data + block_header_skip; + br->zstream->avail_out = dst_len; + + /* + * We know both input as well as output size, and we know that + * the sizes should never be bigger than `uInt_MAX` because + * blocks can at most be 16MB large. We can thus use `Z_FINISH` + * here to instruct zlib to inflate the data in one go, which + * is more efficient than using `Z_NO_FLUSH`. + */ + err = inflate(br->zstream, Z_FINISH); + if (err != Z_STREAM_END) { err = REFTABLE_ZLIB_ERROR; goto done; } + err = 0; - if (dst_len + block_header_skip != sz) { + if (br->zstream->total_out + block_header_skip != sz) { err = REFTABLE_FORMAT_ERROR; goto done; } /* We're done with the input data. */ reftable_block_done(block); - block->data = uncompressed; - uncompressed = NULL; + block->data = br->uncompressed_data; block->len = sz; - block->source = malloc_block_source(); - full_block_size = src_len + block_header_skip; + full_block_size = src_len + block_header_skip - br->zstream->avail_in; } else if (full_block_size == 0) { full_block_size = sz; } else if (sz < full_block_size && sz < block->len && @@ -260,176 +273,242 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, br->restart_bytes = restart_bytes; done: - reftable_free(uncompressed); return err; } -static uint32_t block_reader_restart_offset(struct block_reader *br, int i) +void block_reader_release(struct block_reader *br) +{ + inflateEnd(br->zstream); + reftable_free(br->zstream); + reftable_free(br->uncompressed_data); + reftable_block_done(&br->block); +} + +uint8_t block_reader_type(const struct block_reader *r) +{ + return r->block.data[r->header_off]; +} + +int block_reader_first_key(const struct block_reader *br, struct strbuf *key) +{ + int off = br->header_off + 4, n; + struct string_view in = { + .buf = br->block.data + off, + .len = br->block_len - off, + }; + uint8_t extra = 0; + + strbuf_reset(key); + + n = reftable_decode_key(key, &extra, in); + if (n < 0) + return n; + if (!key->len) + return REFTABLE_FORMAT_ERROR; + + return 0; +} + +static uint32_t block_reader_restart_offset(const struct block_reader *br, int i) { return get_be24(br->restart_bytes + 3 * i); } -void block_reader_start(struct block_reader *br, struct block_iter *it) +void block_iter_seek_start(struct block_iter *it, const struct block_reader *br) { - it->br = br; + it->block = br->block.data; + it->block_len = br->block_len; + it->hash_size = br->hash_size; strbuf_reset(&it->last_key); it->next_off = br->header_off + 4; } -struct restart_find_args { +struct restart_needle_less_args { int error; - struct strbuf key; - struct block_reader *r; + struct strbuf needle; + const struct block_reader *reader; }; -static int restart_key_less(size_t idx, void *args) +static int restart_needle_less(size_t idx, void *_args) { - struct restart_find_args *a = args; - uint32_t off = block_reader_restart_offset(a->r, idx); + struct restart_needle_less_args *args = _args; + uint32_t off = block_reader_restart_offset(args->reader, idx); struct string_view in = { - .buf = a->r->block.data + off, - .len = a->r->block_len - off, + .buf = args->reader->block.data + off, + .len = args->reader->block_len - off, }; - - /* the restart key is verbatim in the block, so this could avoid the - alloc for decoding the key */ - struct strbuf rkey = STRBUF_INIT; - struct strbuf last_key = STRBUF_INIT; - uint8_t unused_extra; - int n = reftable_decode_key(&rkey, &unused_extra, last_key, in); - int result; - if (n < 0) { - a->error = 1; + uint64_t prefix_len, suffix_len; + uint8_t extra; + int n; + + /* + * Records at restart points are stored without prefix compression, so + * there is no need to fully decode the record key here. This removes + * the need for allocating memory. + */ + n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra); + if (n < 0 || prefix_len) { + args->error = 1; return -1; } - result = strbuf_cmp(&a->key, &rkey); - strbuf_release(&rkey); - return result; -} + string_view_consume(&in, n); + if (suffix_len > in.len) { + args->error = 1; + return -1; + } -void block_iter_copy_from(struct block_iter *dest, struct block_iter *src) -{ - dest->br = src->br; - dest->next_off = src->next_off; - strbuf_reset(&dest->last_key); - strbuf_addbuf(&dest->last_key, &src->last_key); + n = memcmp(args->needle.buf, in.buf, + args->needle.len < suffix_len ? args->needle.len : suffix_len); + if (n) + return n < 0; + return args->needle.len < suffix_len; } int block_iter_next(struct block_iter *it, struct reftable_record *rec) { struct string_view in = { - .buf = it->br->block.data + it->next_off, - .len = it->br->block_len - it->next_off, + .buf = (unsigned char *) it->block + it->next_off, + .len = it->block_len - it->next_off, }; struct string_view start = in; - struct strbuf key = STRBUF_INIT; uint8_t extra = 0; int n = 0; - if (it->next_off >= it->br->block_len) + if (it->next_off >= it->block_len) return 1; - n = reftable_decode_key(&key, &extra, it->last_key, in); + n = reftable_decode_key(&it->last_key, &extra, in); if (n < 0) return -1; - - if (!key.len) + if (!it->last_key.len) return REFTABLE_FORMAT_ERROR; string_view_consume(&in, n); - n = reftable_record_decode(rec, key, extra, in, it->br->hash_size); + n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size, + &it->scratch); if (n < 0) return -1; string_view_consume(&in, n); - strbuf_reset(&it->last_key); - strbuf_addbuf(&it->last_key, &key); it->next_off += start.len - in.len; - strbuf_release(&key); return 0; } -int block_reader_first_key(struct block_reader *br, struct strbuf *key) +void block_iter_reset(struct block_iter *it) { - struct strbuf empty = STRBUF_INIT; - int off = br->header_off + 4; - struct string_view in = { - .buf = br->block.data + off, - .len = br->block_len - off, - }; - - uint8_t extra = 0; - int n = reftable_decode_key(key, &extra, empty, in); - if (n < 0) - return n; - if (!key->len) - return REFTABLE_FORMAT_ERROR; - - return 0; -} - -int block_iter_seek(struct block_iter *it, struct strbuf *want) -{ - return block_reader_seek(it->br, it, want); + strbuf_reset(&it->last_key); + it->next_off = 0; + it->block = NULL; + it->block_len = 0; + it->hash_size = 0; } void block_iter_close(struct block_iter *it) { strbuf_release(&it->last_key); + strbuf_release(&it->scratch); } -int block_reader_seek(struct block_reader *br, struct block_iter *it, - struct strbuf *want) +int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, + struct strbuf *want) { - struct restart_find_args args = { - .key = *want, - .r = br, + struct restart_needle_less_args args = { + .needle = *want, + .reader = br, }; - struct reftable_record rec = reftable_new_record(block_reader_type(br)); - struct strbuf key = STRBUF_INIT; + struct reftable_record rec; int err = 0; - struct block_iter next = { - .last_key = STRBUF_INIT, - }; - - int i = binsearch(br->restart_count, &restart_key_less, &args); + size_t i; + + /* + * Perform a binary search over the block's restart points, which + * avoids doing a linear scan over the whole block. Like this, we + * identify the section of the block that should contain our key. + * + * Note that we explicitly search for the first restart point _greater_ + * than the sought-after record, not _greater or equal_ to it. In case + * the sought-after record is located directly at the restart point we + * would otherwise start doing the linear search at the preceding + * restart point. While that works alright, we would end up scanning + * too many record. + */ + i = binsearch(br->restart_count, &restart_needle_less, &args); if (args.error) { err = REFTABLE_FORMAT_ERROR; goto done; } - it->br = br; - if (i > 0) { - i--; - it->next_off = block_reader_restart_offset(br, i); - } else { + /* + * Now there are multiple cases: + * + * - `i == 0`: The wanted record is smaller than the record found at + * the first restart point. As the first restart point is the first + * record in the block, our wanted record cannot be located in this + * block at all. We still need to position the iterator so that the + * next call to `block_iter_next()` will yield an end-of-iterator + * signal. + * + * - `i == restart_count`: The wanted record was not found at any of + * the restart points. As there is no restart point at the end of + * the section the record may thus be contained in the last block. + * + * - `i > 0`: The wanted record must be contained in the section + * before the found restart point. We thus do a linear search + * starting from the preceding restart point. + */ + if (i > 0) + it->next_off = block_reader_restart_offset(br, i - 1); + else it->next_off = br->header_off + 4; - } - - /* We're looking for the last entry less/equal than the wanted key, so - we have to go one entry too far and then back up. - */ + it->block = br->block.data; + it->block_len = br->block_len; + it->hash_size = br->hash_size; + + reftable_record_init(&rec, block_reader_type(br)); + + /* + * We're looking for the last entry less than the wanted key so that + * the next call to `block_reader_next()` would yield the wanted + * record. We thus don't want to position our reader at the sought + * after record, but one before. To do so, we have to go one entry too + * far and then back up. + */ while (1) { - block_iter_copy_from(&next, it); - err = block_iter_next(&next, &rec); + size_t prev_off = it->next_off; + + err = block_iter_next(it, &rec); if (err < 0) goto done; - - reftable_record_key(&rec, &key); - if (err > 0 || strbuf_cmp(&key, want) >= 0) { + if (err > 0) { + it->next_off = prev_off; err = 0; goto done; } - block_iter_copy_from(it, &next); + /* + * Check whether the current key is greater or equal to the + * sought-after key. In case it is greater we know that the + * record does not exist in the block and can thus abort early. + * In case it is equal to the sought-after key we have found + * the desired record. + * + * Note that we store the next record's key record directly in + * `last_key` without restoring the key of the preceding record + * in case we need to go one record back. This is safe to do as + * `block_iter_next()` would return the ref whose key is equal + * to `last_key` now, and naturally all keys share a prefix + * with themselves. + */ + reftable_record_key(&rec, &it->last_key); + if (strbuf_cmp(&it->last_key, want) >= 0) { + it->next_off = prev_off; + goto done; + } } done: - strbuf_release(&key); - strbuf_release(&next.last_key); reftable_record_release(&rec); - return err; } diff --git a/reftable/block.h b/reftable/block.h index 87c7753..ea4384a 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -56,6 +56,8 @@ int block_writer_finish(struct block_writer *w); /* clears out internally allocated block_writer members. */ void block_writer_release(struct block_writer *bw); +struct z_stream; + /* Read a block. */ struct block_reader { /* offset of the block header; nonzero for the first block in a @@ -66,6 +68,11 @@ struct block_reader { struct reftable_block block; int hash_size; + /* Uncompressed data for log entries. */ + z_stream *zstream; + unsigned char *uncompressed_data; + size_t uncompressed_cap; + /* size of the data, excluding restart data. */ uint32_t block_len; uint8_t *restart_bytes; @@ -76,41 +83,49 @@ struct block_reader { uint32_t full_block_size; }; +/* initializes a block reader. */ +int block_reader_init(struct block_reader *br, struct reftable_block *bl, + uint32_t header_off, uint32_t table_block_size, + int hash_size); + +void block_reader_release(struct block_reader *br); + +/* Returns the block type (eg. 'r' for refs) */ +uint8_t block_reader_type(const struct block_reader *r); + +/* Decodes the first key in the block */ +int block_reader_first_key(const struct block_reader *br, struct strbuf *key); + /* Iterate over entries in a block */ struct block_iter { /* offset within the block of the next entry to read. */ uint32_t next_off; - struct block_reader *br; + const unsigned char *block; + size_t block_len; + int hash_size; /* key for last entry we read. */ struct strbuf last_key; + struct strbuf scratch; }; -/* initializes a block reader. */ -int block_reader_init(struct block_reader *br, struct reftable_block *bl, - uint32_t header_off, uint32_t table_block_size, - int hash_size); +#define BLOCK_ITER_INIT { \ + .last_key = STRBUF_INIT, \ + .scratch = STRBUF_INIT, \ +} /* Position `it` at start of the block */ -void block_reader_start(struct block_reader *br, struct block_iter *it); +void block_iter_seek_start(struct block_iter *it, const struct block_reader *br); /* Position `it` to the `want` key in the block */ -int block_reader_seek(struct block_reader *br, struct block_iter *it, - struct strbuf *want); - -/* Returns the block type (eg. 'r' for refs) */ -uint8_t block_reader_type(struct block_reader *r); - -/* Decodes the first key in the block */ -int block_reader_first_key(struct block_reader *br, struct strbuf *key); - -void block_iter_copy_from(struct block_iter *dest, struct block_iter *src); +int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, + struct strbuf *want); /* return < 0 for error, 0 for OK, > 0 for EOF. */ int block_iter_next(struct block_iter *it, struct reftable_record *rec); -/* Seek to `want` with in the block pointed to by `it` */ -int block_iter_seek(struct block_iter *it, struct strbuf *want); +/* Reset the block iterator to pristine state without releasing its memory. */ +void block_iter_reset(struct block_iter *it); /* deallocate memory for `it`. The block reader and its block is left intact. */ void block_iter_close(struct block_iter *it); diff --git a/reftable/block_test.c b/reftable/block_test.c index cb88af4..26a9cfb 100644 --- a/reftable/block_test.c +++ b/reftable/block_test.c @@ -32,11 +32,11 @@ static void test_block_read_write(void) int i = 0; int n; struct block_reader br = { 0 }; - struct block_iter it = { .last_key = STRBUF_INIT }; + struct block_iter it = BLOCK_ITER_INIT; int j = 0; struct strbuf want = STRBUF_INIT; - block.data = reftable_calloc(block_size); + REFTABLE_CALLOC_ARRAY(block.data, block_size); block.len = block_size; block.source = malloc_block_source(); block_writer_init(&bw, BLOCK_TYPE_REF, block.data, block_size, @@ -49,13 +49,11 @@ static void test_block_read_write(void) for (i = 0; i < N; i++) { char name[100]; - uint8_t hash[GIT_SHA1_RAWSZ]; snprintf(name, sizeof(name), "branch%02d", i); - memset(hash, i, sizeof(hash)); rec.u.ref.refname = name; rec.u.ref.value_type = REFTABLE_REF_VAL1; - rec.u.ref.value.val1 = hash; + memset(rec.u.ref.value.val1, i, GIT_SHA1_RAWSZ); names[i] = xstrdup(name); n = block_writer_add(&bw, &rec); @@ -71,7 +69,7 @@ static void test_block_read_write(void) block_reader_init(&br, &block, header_off, block_size, GIT_SHA1_RAWSZ); - block_reader_start(&br, &it); + block_iter_seek_start(&it, &br); while (1) { int r = block_iter_next(&it, &rec); @@ -87,11 +85,11 @@ static void test_block_read_write(void) block_iter_close(&it); for (i = 0; i < N; i++) { - struct block_iter it = { .last_key = STRBUF_INIT }; + struct block_iter it = BLOCK_ITER_INIT; strbuf_reset(&want); strbuf_addstr(&want, names[i]); - n = block_reader_seek(&br, &it, &want); + n = block_iter_seek_key(&it, &br, &want); EXPECT(n == 0); n = block_iter_next(&it, &rec); @@ -100,7 +98,7 @@ static void test_block_read_write(void) EXPECT_STREQ(names[i], rec.u.ref.refname); want.len--; - n = block_reader_seek(&br, &it, &want); + n = block_iter_seek_key(&it, &br, &want); EXPECT(n == 0); n = block_iter_next(&it, &rec); diff --git a/reftable/blocksource.c b/reftable/blocksource.c index 8331b34..eeed254 100644 --- a/reftable/blocksource.c +++ b/reftable/blocksource.c @@ -29,7 +29,7 @@ static int strbuf_read_block(void *v, struct reftable_block *dest, uint64_t off, { struct strbuf *b = v; assert(off + size <= b->len); - dest->data = reftable_calloc(size); + REFTABLE_CALLOC_ARRAY(dest->data, size); memcpy(dest->data, b->buf + off, size); dest->len = size; return size; @@ -76,8 +76,8 @@ struct reftable_block_source malloc_block_source(void) } struct file_block_source { - int fd; uint64_t size; + unsigned char *data; }; static uint64_t file_size(void *b) @@ -87,19 +87,12 @@ static uint64_t file_size(void *b) static void file_return_block(void *b, struct reftable_block *dest) { - if (dest->len) - memset(dest->data, 0xff, dest->len); - reftable_free(dest->data); } -static void file_close(void *b) +static void file_close(void *v) { - int fd = ((struct file_block_source *)b)->fd; - if (fd > 0) { - close(fd); - ((struct file_block_source *)b)->fd = 0; - } - + struct file_block_source *b = v; + munmap(b->data, b->size); reftable_free(b); } @@ -108,9 +101,7 @@ static int file_read_block(void *v, struct reftable_block *dest, uint64_t off, { struct file_block_source *b = v; assert(off + size <= b->size); - dest->data = reftable_malloc(size); - if (pread(b->fd, dest->data, size, off) != size) - return -1; + dest->data = b->data + off; dest->len = size; return size; } @@ -125,26 +116,26 @@ static struct reftable_block_source_vtable file_vtable = { int reftable_block_source_from_file(struct reftable_block_source *bs, const char *name) { - struct stat st = { 0 }; - int err = 0; - int fd = open(name, O_RDONLY); - struct file_block_source *p = NULL; + struct file_block_source *p; + struct stat st; + int fd; + + fd = open(name, O_RDONLY); if (fd < 0) { - if (errno == ENOENT) { + if (errno == ENOENT) return REFTABLE_NOT_EXIST_ERROR; - } return -1; } - err = fstat(fd, &st); - if (err < 0) { + if (fstat(fd, &st) < 0) { close(fd); return REFTABLE_IO_ERROR; } - p = reftable_calloc(sizeof(struct file_block_source)); + REFTABLE_CALLOC_ARRAY(p, 1); p->size = st.st_size; - p->fd = fd; + p->data = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); assert(!bs->ops); bs->ops = &file_vtable; diff --git a/reftable/dump.c b/reftable/dump.c index ce936b4..26e0393 100644 --- a/reftable/dump.c +++ b/reftable/dump.c @@ -11,14 +11,12 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-blocksource.h" #include "reftable-error.h" -#include "reftable-merged.h" #include "reftable-record.h" #include "reftable-tests.h" #include "reftable-writer.h" #include "reftable-iterator.h" #include "reftable-reader.h" #include "reftable-stack.h" -#include "reftable-generic.h" #include <stddef.h> #include <stdio.h> diff --git a/reftable/error.c b/reftable/error.c index 0d17667..cfb7a0f 100644 --- a/reftable/error.c +++ b/reftable/error.c @@ -22,7 +22,7 @@ const char *reftable_error_str(int err) case REFTABLE_NOT_EXIST_ERROR: return "file does not exist"; case REFTABLE_LOCK_ERROR: - return "data is outdated"; + return "data is locked"; case REFTABLE_API_ERROR: return "misuse of the reftable API"; case REFTABLE_ZLIB_ERROR: @@ -35,6 +35,8 @@ const char *reftable_error_str(int err) return "invalid refname"; case REFTABLE_ENTRY_TOO_BIG_ERROR: return "entry too large"; + case REFTABLE_OUTDATED_ERROR: + return "data concurrently modified"; case -1: return "general error"; default: diff --git a/reftable/generic.c b/reftable/generic.c index 57f8032..b9f1c7c 100644 --- a/reftable/generic.c +++ b/reftable/generic.c @@ -6,7 +6,6 @@ license that can be found in the LICENSE file or at https://developers.google.com/open-source/licenses/bsd */ -#include "basics.h" #include "constants.h" #include "record.h" #include "generic.h" diff --git a/reftable/iter.c b/reftable/iter.c index a8d174c..aa9ac19 100644 --- a/reftable/iter.c +++ b/reftable/iter.c @@ -16,11 +16,6 @@ https://developers.google.com/open-source/licenses/bsd #include "reader.h" #include "reftable-error.h" -int iterator_is_null(struct reftable_iterator *it) -{ - return !it->ops; -} - static void filtering_ref_iterator_close(void *iter_arg) { struct filtering_ref_iterator *fri = iter_arg; @@ -120,7 +115,7 @@ static int indexed_table_ref_iter_next_block(struct indexed_table_ref_iter *it) /* indexed block does not exist. */ return REFTABLE_FORMAT_ERROR; } - block_reader_start(&it->block_reader, &it->cur); + block_iter_seek_start(&it->cur, &it->block_reader); return 0; } @@ -160,8 +155,7 @@ int new_indexed_table_ref_iter(struct indexed_table_ref_iter **dest, int oid_len, uint64_t *offsets, int offset_len) { struct indexed_table_ref_iter empty = INDEXED_TABLE_REF_ITER_INIT; - struct indexed_table_ref_iter *itr = - reftable_calloc(sizeof(struct indexed_table_ref_iter)); + struct indexed_table_ref_iter *itr = reftable_calloc(1, sizeof(*itr)); int err = 0; *itr = empty; diff --git a/reftable/iter.h b/reftable/iter.h index 09eb0cb..537431b 100644 --- a/reftable/iter.h +++ b/reftable/iter.h @@ -16,10 +16,6 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-iterator.h" #include "reftable-generic.h" -/* Returns true for a zeroed out iterator, such as the one returned from - * iterator_destroy. */ -int iterator_is_null(struct reftable_iterator *it); - /* iterator that produces only ref records that point to `oid` */ struct filtering_ref_iterator { int double_check; @@ -53,10 +49,10 @@ struct indexed_table_ref_iter { int is_finished; }; -#define INDEXED_TABLE_REF_ITER_INIT \ - { \ - .cur = { .last_key = STRBUF_INIT }, .oid = STRBUF_INIT, \ - } +#define INDEXED_TABLE_REF_ITER_INIT { \ + .cur = BLOCK_ITER_INIT, \ + .oid = STRBUF_INIT, \ +} void iterator_from_indexed_table_ref_iter(struct reftable_iterator *it, struct indexed_table_ref_iter *itr); diff --git a/reftable/merged.c b/reftable/merged.c index 5ded470..f85a24c 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -11,33 +11,45 @@ https://developers.google.com/open-source/licenses/bsd #include "constants.h" #include "iter.h" #include "pq.h" -#include "reader.h" #include "record.h" #include "generic.h" #include "reftable-merged.h" #include "reftable-error.h" #include "system.h" +struct merged_subiter { + struct reftable_iterator iter; + struct reftable_record rec; +}; + +struct merged_iter { + struct merged_subiter *subiters; + struct merged_iter_pqueue pq; + uint32_t hash_id; + size_t stack_len; + uint8_t typ; + int suppress_deletions; + ssize_t advance_index; +}; + static int merged_iter_init(struct merged_iter *mi) { - int i = 0; - for (i = 0; i < mi->stack_len; i++) { - struct reftable_record rec = reftable_new_record(mi->typ); - int err = iterator_next(&mi->stack[i], &rec); - if (err < 0) { + for (size_t i = 0; i < mi->stack_len; i++) { + struct pq_entry e = { + .index = i, + .rec = &mi->subiters[i].rec, + }; + int err; + + reftable_record_init(&mi->subiters[i].rec, mi->typ); + err = iterator_next(&mi->subiters[i].iter, + &mi->subiters[i].rec); + if (err < 0) return err; - } + if (err > 0) + continue; - if (err > 0) { - reftable_iterator_destroy(&mi->stack[i]); - reftable_record_release(&rec); - } else { - struct pq_entry e = { - .rec = rec, - .index = i, - }; - merged_iter_pqueue_add(&mi->pq, &e); - } + merged_iter_pqueue_add(&mi->pq, &e); } return 0; @@ -46,56 +58,68 @@ static int merged_iter_init(struct merged_iter *mi) static void merged_iter_close(void *p) { struct merged_iter *mi = p; - int i = 0; + merged_iter_pqueue_release(&mi->pq); - for (i = 0; i < mi->stack_len; i++) { - reftable_iterator_destroy(&mi->stack[i]); + for (size_t i = 0; i < mi->stack_len; i++) { + reftable_iterator_destroy(&mi->subiters[i].iter); + reftable_record_release(&mi->subiters[i].rec); } - reftable_free(mi->stack); + reftable_free(mi->subiters); } -static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, - size_t idx) +static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) { struct pq_entry e = { - .rec = reftable_new_record(mi->typ), .index = idx, + .rec = &mi->subiters[idx].rec, }; - int err = iterator_next(&mi->stack[idx], &e.rec); - if (err < 0) - return err; + int err; - if (err > 0) { - reftable_iterator_destroy(&mi->stack[idx]); - reftable_record_release(&e.rec); - return 0; - } + err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec); + if (err) + return err; merged_iter_pqueue_add(&mi->pq, &e); return 0; } -static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) -{ - if (iterator_is_null(&mi->stack[idx])) - return 0; - return merged_iter_advance_nonnull_subiter(mi, idx); -} - static int merged_iter_next_entry(struct merged_iter *mi, struct reftable_record *rec) { - struct strbuf entry_key = STRBUF_INIT; struct pq_entry entry = { 0 }; - int err = 0; + int err = 0, empty; + + empty = merged_iter_pqueue_is_empty(mi->pq); + + if (mi->advance_index >= 0) { + /* + * When there are no pqueue entries then we only have a single + * subiter left. There is no need to use the pqueue in that + * case anymore as we know that the subiter will return entries + * in the correct order already. + * + * While this may sound like a very specific edge case, it may + * happen more frequently than you think. Most repositories + * will end up having a single large base table that contains + * most of the refs. It's thus likely that we exhaust all + * subiters but the one from that base ref. + */ + if (empty) + return iterator_next(&mi->subiters[mi->advance_index].iter, + rec); + + err = merged_iter_advance_subiter(mi, mi->advance_index); + if (err < 0) + return err; + if (!err) + empty = 0; + mi->advance_index = -1; + } - if (merged_iter_pqueue_is_empty(mi->pq)) + if (empty) return 1; entry = merged_iter_pqueue_remove(&mi->pq); - err = merged_iter_advance_subiter(mi, entry.index); - if (err < 0) - return err; /* One can also use reftable as datacenter-local storage, where the ref @@ -105,57 +129,38 @@ static int merged_iter_next_entry(struct merged_iter *mi, such a deployment, the loop below must be changed to collect all entries for the same key, and return new the newest one. */ - reftable_record_key(&entry.rec, &entry_key); while (!merged_iter_pqueue_is_empty(mi->pq)) { struct pq_entry top = merged_iter_pqueue_top(mi->pq); - struct strbuf k = STRBUF_INIT; - int err = 0, cmp = 0; - - reftable_record_key(&top.rec, &k); + int cmp; - cmp = strbuf_cmp(&k, &entry_key); - strbuf_release(&k); - - if (cmp > 0) { + cmp = reftable_record_cmp(top.rec, entry.rec); + if (cmp > 0) break; - } merged_iter_pqueue_remove(&mi->pq); err = merged_iter_advance_subiter(mi, top.index); - if (err < 0) { + if (err < 0) return err; - } - reftable_record_release(&top.rec); } - reftable_record_copy_from(rec, &entry.rec, hash_size(mi->hash_id)); - reftable_record_release(&entry.rec); - strbuf_release(&entry_key); + mi->advance_index = entry.index; + SWAP(*rec, *entry.rec); return 0; } -static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec) +static int merged_iter_next_void(void *p, struct reftable_record *rec) { + struct merged_iter *mi = p; while (1) { int err = merged_iter_next_entry(mi, rec); - if (err == 0 && mi->suppress_deletions && - reftable_record_is_deletion(rec)) { + if (err) + return err; + if (mi->suppress_deletions && reftable_record_is_deletion(rec)) continue; - } - - return err; + return 0; } } -static int merged_iter_next_void(void *p, struct reftable_record *rec) -{ - struct merged_iter *mi = p; - if (merged_iter_pqueue_is_empty(mi->pq)) - return 1; - - return merged_iter_next(mi, rec); -} - static struct reftable_iterator_vtable merged_iter_vtable = { .next = &merged_iter_next_void, .close = &merged_iter_close, @@ -170,14 +175,14 @@ static void iterator_from_merged_iter(struct reftable_iterator *it, } int reftable_new_merged_table(struct reftable_merged_table **dest, - struct reftable_table *stack, int n, + struct reftable_table *stack, size_t n, uint32_t hash_id) { struct reftable_merged_table *m = NULL; uint64_t last_max = 0; uint64_t first_min = 0; - int i = 0; - for (i = 0; i < n; i++) { + + for (size_t i = 0; i < n; i++) { uint64_t min = reftable_table_min_update_index(&stack[i]); uint64_t max = reftable_table_max_update_index(&stack[i]); @@ -192,7 +197,7 @@ int reftable_new_merged_table(struct reftable_merged_table **dest, } } - m = reftable_calloc(sizeof(struct reftable_merged_table)); + REFTABLE_CALLOC_ARRAY(m, 1); m->stack = stack; m->stack_len = n; m->min = first_min; @@ -241,48 +246,37 @@ static int merged_table_seek_record(struct reftable_merged_table *mt, struct reftable_iterator *it, struct reftable_record *rec) { - struct reftable_iterator *iters = reftable_calloc( - sizeof(struct reftable_iterator) * mt->stack_len); struct merged_iter merged = { - .stack = iters, .typ = reftable_record_type(rec), .hash_id = mt->hash_id, .suppress_deletions = mt->suppress_deletions, + .advance_index = -1, }; - int n = 0; - int err = 0; - int i = 0; - for (i = 0; i < mt->stack_len && err == 0; i++) { - int e = reftable_table_seek_record(&mt->stack[i], &iters[n], - rec); - if (e < 0) { - err = e; - } - if (e == 0) { - n++; - } - } - if (err < 0) { - int i = 0; - for (i = 0; i < n; i++) { - reftable_iterator_destroy(&iters[i]); - } - reftable_free(iters); - return err; + struct merged_iter *p; + int err; + + REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len); + for (size_t i = 0; i < mt->stack_len; i++) { + err = reftable_table_seek_record(&mt->stack[i], + &merged.subiters[merged.stack_len].iter, rec); + if (err < 0) + goto out; + if (!err) + merged.stack_len++; } - merged.stack_len = n; err = merged_iter_init(&merged); - if (err < 0) { + if (err < 0) + goto out; + + p = reftable_malloc(sizeof(struct merged_iter)); + *p = merged; + iterator_from_merged_iter(it, p); + +out: + if (err < 0) merged_iter_close(&merged); - return err; - } else { - struct merged_iter *p = - reftable_malloc(sizeof(struct merged_iter)); - *p = merged; - iterator_from_merged_iter(it, p); - } - return 0; + return err; } int reftable_merged_table_seek_ref(struct reftable_merged_table *mt, diff --git a/reftable/merged.h b/reftable/merged.h index 7d9f95d..a2571db 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -9,7 +9,7 @@ https://developers.google.com/open-source/licenses/bsd #ifndef MERGED_H #define MERGED_H -#include "pq.h" +#include "system.h" struct reftable_merged_table { struct reftable_table *stack; @@ -24,15 +24,6 @@ struct reftable_merged_table { uint64_t max; }; -struct merged_iter { - struct reftable_iterator *stack; - uint32_t hash_id; - size_t stack_len; - uint8_t typ; - int suppress_deletions; - struct merged_iter_pqueue pq; -}; - void merged_table_release(struct reftable_merged_table *mt); #endif diff --git a/reftable/merged_test.c b/reftable/merged_test.c index d08c16a..530fc82 100644 --- a/reftable/merged_test.c +++ b/reftable/merged_test.c @@ -12,7 +12,6 @@ https://developers.google.com/open-source/licenses/bsd #include "basics.h" #include "blocksource.h" -#include "constants.h" #include "reader.h" #include "record.h" #include "test_framework.h" @@ -43,7 +42,7 @@ static void write_test_table(struct strbuf *buf, } } - w = reftable_new_writer(&strbuf_add_void, buf, &opts); + w = reftable_new_writer(&strbuf_add_void, &noop_flush, buf, &opts); reftable_writer_set_limits(w, min, max); for (i = 0; i < n; i++) { @@ -71,7 +70,7 @@ static void write_test_log_table(struct strbuf *buf, .exact_log_message = 1, }; struct reftable_writer *w = NULL; - w = reftable_new_writer(&strbuf_add_void, buf, &opts); + w = reftable_new_writer(&strbuf_add_void, &noop_flush, buf, &opts); reftable_writer_set_limits(w, update_index, update_index); for (i = 0; i < n; i++) { @@ -89,16 +88,17 @@ static struct reftable_merged_table * merged_table_from_records(struct reftable_ref_record **refs, struct reftable_block_source **source, struct reftable_reader ***readers, int *sizes, - struct strbuf *buf, int n) + struct strbuf *buf, size_t n) { - int i = 0; struct reftable_merged_table *mt = NULL; + struct reftable_table *tabs; int err; - struct reftable_table *tabs = - reftable_calloc(n * sizeof(struct reftable_table)); - *readers = reftable_calloc(n * sizeof(struct reftable_reader *)); - *source = reftable_calloc(n * sizeof(**source)); - for (i = 0; i < n; i++) { + + REFTABLE_CALLOC_ARRAY(tabs, n); + REFTABLE_CALLOC_ARRAY(*readers, n); + REFTABLE_CALLOC_ARRAY(*source, n); + + for (size_t i = 0; i < n; i++) { write_test_table(&buf[i], refs[i], sizes[i]); block_source_from_strbuf(&(*source)[i], &buf[i]); @@ -123,13 +123,11 @@ static void readers_destroy(struct reftable_reader **readers, size_t n) static void test_merged_between(void) { - uint8_t hash1[GIT_SHA1_RAWSZ] = { 1, 2, 3, 0 }; - struct reftable_ref_record r1[] = { { .refname = "b", .update_index = 1, .value_type = REFTABLE_REF_VAL1, - .value.val1 = hash1, + .value.val1 = { 1, 2, 3, 0 }, } }; struct reftable_ref_record r2[] = { { .refname = "a", @@ -165,26 +163,24 @@ static void test_merged_between(void) static void test_merged(void) { - uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 }; - uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 }; struct reftable_ref_record r1[] = { { .refname = "a", .update_index = 1, .value_type = REFTABLE_REF_VAL1, - .value.val1 = hash1, + .value.val1 = { 1 }, }, { .refname = "b", .update_index = 1, .value_type = REFTABLE_REF_VAL1, - .value.val1 = hash1, + .value.val1 = { 1 }, }, { .refname = "c", .update_index = 1, .value_type = REFTABLE_REF_VAL1, - .value.val1 = hash1, + .value.val1 = { 1 }, } }; struct reftable_ref_record r2[] = { { @@ -197,13 +193,13 @@ static void test_merged(void) .refname = "c", .update_index = 3, .value_type = REFTABLE_REF_VAL1, - .value.val1 = hash2, + .value.val1 = { 2 }, }, { .refname = "d", .update_index = 3, .value_type = REFTABLE_REF_VAL1, - .value.val1 = hash1, + .value.val1 = { 1 }, }, }; @@ -236,14 +232,10 @@ static void test_merged(void) while (len < 100) { /* cap loops/recursion. */ struct reftable_ref_record ref = { NULL }; int err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) { + if (err > 0) break; - } - if (len == cap) { - cap = 2 * cap + 1; - out = reftable_realloc( - out, sizeof(struct reftable_ref_record) * cap); - } + + REFTABLE_ALLOC_GROW(out, len + 1, cap); out[len++] = ref; } reftable_iterator_destroy(&it); @@ -270,16 +262,17 @@ static struct reftable_merged_table * merged_table_from_log_records(struct reftable_log_record **logs, struct reftable_block_source **source, struct reftable_reader ***readers, int *sizes, - struct strbuf *buf, int n) + struct strbuf *buf, size_t n) { - int i = 0; struct reftable_merged_table *mt = NULL; + struct reftable_table *tabs; int err; - struct reftable_table *tabs = - reftable_calloc(n * sizeof(struct reftable_table)); - *readers = reftable_calloc(n * sizeof(struct reftable_reader *)); - *source = reftable_calloc(n * sizeof(**source)); - for (i = 0; i < n; i++) { + + REFTABLE_CALLOC_ARRAY(tabs, n); + REFTABLE_CALLOC_ARRAY(*readers, n); + REFTABLE_CALLOC_ARRAY(*source, n); + + for (size_t i = 0; i < n; i++) { write_test_log_table(&buf[i], logs[i], sizes[i], i + 1); block_source_from_strbuf(&(*source)[i], &buf[i]); @@ -296,16 +289,13 @@ merged_table_from_log_records(struct reftable_log_record **logs, static void test_merged_logs(void) { - uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 }; - uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 }; - uint8_t hash3[GIT_SHA1_RAWSZ] = { 3 }; struct reftable_log_record r1[] = { { .refname = "a", .update_index = 2, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .old_hash = hash2, + .old_hash = { 2 }, /* deletion */ .name = "jane doe", .email = "jane@invalid", @@ -317,8 +307,8 @@ static void test_merged_logs(void) .update_index = 1, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .old_hash = hash1, - .new_hash = hash2, + .old_hash = { 1 }, + .new_hash = { 2 }, .name = "jane doe", .email = "jane@invalid", .message = "message1", @@ -331,7 +321,7 @@ static void test_merged_logs(void) .update_index = 3, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .new_hash = hash3, + .new_hash = { 3 }, .name = "jane doe", .email = "jane@invalid", .message = "message3", @@ -373,14 +363,10 @@ static void test_merged_logs(void) while (len < 100) { /* cap loops/recursion. */ struct reftable_log_record log = { NULL }; int err = reftable_iterator_next_log(&it, &log); - if (err > 0) { + if (err > 0) break; - } - if (len == cap) { - cap = 2 * cap + 1; - out = reftable_realloc( - out, sizeof(struct reftable_log_record) * cap); - } + + REFTABLE_ALLOC_GROW(out, len + 1, cap); out[len++] = log; } reftable_iterator_destroy(&it); @@ -417,7 +403,7 @@ static void test_default_write_opts(void) struct reftable_write_options opts = { 0 }; struct strbuf buf = STRBUF_INIT; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); struct reftable_ref_record rec = { .refname = "master", @@ -425,7 +411,7 @@ static void test_default_write_opts(void) }; int err; struct reftable_block_source source = { NULL }; - struct reftable_table *tab = reftable_calloc(sizeof(*tab) * 1); + struct reftable_table *tab = reftable_calloc(1, sizeof(*tab)); uint32_t hash_id; struct reftable_reader *rd = NULL; struct reftable_merged_table *merged = NULL; diff --git a/reftable/pq.c b/reftable/pq.c index dcefeb7..7fb45d8 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -14,33 +14,12 @@ https://developers.google.com/open-source/licenses/bsd int pq_less(struct pq_entry *a, struct pq_entry *b) { - struct strbuf ak = STRBUF_INIT; - struct strbuf bk = STRBUF_INIT; - int cmp = 0; - reftable_record_key(&a->rec, &ak); - reftable_record_key(&b->rec, &bk); - - cmp = strbuf_cmp(&ak, &bk); - - strbuf_release(&ak); - strbuf_release(&bk); - + int cmp = reftable_record_cmp(a->rec, b->rec); if (cmp == 0) return a->index > b->index; - return cmp < 0; } -struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) -{ - return pq.heap[0]; -} - -int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) -{ - return pq.len == 0; -} - struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) { int i = 0; @@ -75,13 +54,9 @@ void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry { int i = 0; - if (pq->len == pq->cap) { - pq->cap = 2 * pq->cap + 1; - pq->heap = reftable_realloc(pq->heap, - pq->cap * sizeof(struct pq_entry)); - } - + REFTABLE_ALLOC_GROW(pq->heap, pq->len + 1, pq->cap); pq->heap[pq->len++] = *e; + i = pq->len - 1; while (i > 0) { int j = (i - 1) / 2; @@ -97,10 +72,6 @@ void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry void merged_iter_pqueue_release(struct merged_iter_pqueue *pq) { - int i = 0; - for (i = 0; i < pq->len; i++) { - reftable_record_release(&pq->heap[i].rec); - } FREE_AND_NULL(pq->heap); - pq->len = pq->cap = 0; + memset(pq, 0, sizeof(*pq)); } diff --git a/reftable/pq.h b/reftable/pq.h index e85bac9..f796c23 100644 --- a/reftable/pq.h +++ b/reftable/pq.h @@ -12,8 +12,8 @@ https://developers.google.com/open-source/licenses/bsd #include "record.h" struct pq_entry { - int index; - struct reftable_record rec; + size_t index; + struct reftable_record *rec; }; struct merged_iter_pqueue { @@ -22,12 +22,20 @@ struct merged_iter_pqueue { size_t cap; }; -struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq); -int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq); void merged_iter_pqueue_check(struct merged_iter_pqueue pq); struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq); void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e); void merged_iter_pqueue_release(struct merged_iter_pqueue *pq); int pq_less(struct pq_entry *a, struct pq_entry *b); +static inline struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) +{ + return pq.heap[0]; +} + +static inline int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) +{ + return pq.len == 0; +} + #endif diff --git a/reftable/pq_test.c b/reftable/pq_test.c index 011b5c7..b7d3c80 100644 --- a/reftable/pq_test.c +++ b/reftable/pq_test.c @@ -27,48 +27,43 @@ void merged_iter_pqueue_check(struct merged_iter_pqueue pq) static void test_pq(void) { - char *names[54] = { NULL }; - int N = ARRAY_SIZE(names) - 1; - struct merged_iter_pqueue pq = { NULL }; + struct reftable_record recs[54]; + int N = ARRAY_SIZE(recs) - 1, i; char *last = NULL; - int i = 0; for (i = 0; i < N; i++) { - char name[100]; - snprintf(name, sizeof(name), "%02d", i); - names[i] = xstrdup(name); + struct strbuf refname = STRBUF_INIT; + strbuf_addf(&refname, "%02d", i); + + reftable_record_init(&recs[i], BLOCK_TYPE_REF); + recs[i].u.ref.refname = strbuf_detach(&refname, NULL); } i = 1; do { - struct pq_entry e = { .rec = { .type = BLOCK_TYPE_REF, - .u.ref = { - .refname = names[i], - } } }; + struct pq_entry e = { + .rec = &recs[i], + }; + merged_iter_pqueue_add(&pq, &e); merged_iter_pqueue_check(pq); + i = (i * 7) % N; } while (i != 1); while (!merged_iter_pqueue_is_empty(pq)) { struct pq_entry e = merged_iter_pqueue_remove(&pq); - struct reftable_record *rec = &e.rec; merged_iter_pqueue_check(pq); - EXPECT(reftable_record_type(rec) == BLOCK_TYPE_REF); - if (last) { - EXPECT(strcmp(last, rec->u.ref.refname) < 0); - } - // this is names[i], so don't dealloc. - last = rec->u.ref.refname; - rec->u.ref.refname = NULL; - reftable_record_release(rec); - } - for (i = 0; i < N; i++) { - reftable_free(names[i]); + EXPECT(reftable_record_type(e.rec) == BLOCK_TYPE_REF); + if (last) + EXPECT(strcmp(last, e.rec->u.ref.refname) < 0); + last = e.rec->u.ref.refname; } + for (i = 0; i < N; i++) + reftable_record_release(&recs[i]); merged_iter_pqueue_release(&pq); } diff --git a/reftable/publicbasics.c b/reftable/publicbasics.c index bcb8253..44b84a1 100644 --- a/reftable/publicbasics.c +++ b/reftable/publicbasics.c @@ -37,8 +37,9 @@ void reftable_free(void *p) free(p); } -void *reftable_calloc(size_t sz) +void *reftable_calloc(size_t nelem, size_t elsize) { + size_t sz = st_mult(nelem, elsize); void *p = reftable_malloc(sz); memset(p, 0, sz); return p; diff --git a/reftable/reader.c b/reftable/reader.c index b4db23c..481dff1 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -16,7 +16,6 @@ https://developers.google.com/open-source/licenses/bsd #include "record.h" #include "reftable-error.h" #include "reftable-generic.h" -#include "tree.h" uint64_t block_source_size(struct reftable_block_source *source) { @@ -221,22 +220,12 @@ struct table_iter { struct reftable_reader *r; uint8_t typ; uint64_t block_off; + struct block_reader br; struct block_iter bi; int is_finished; }; -#define TABLE_ITER_INIT \ - { \ - .bi = {.last_key = STRBUF_INIT } \ - } - -static void table_iter_copy_from(struct table_iter *dest, - struct table_iter *src) -{ - dest->r = src->r; - dest->typ = src->typ; - dest->block_off = src->block_off; - dest->is_finished = src->is_finished; - block_iter_copy_from(&dest->bi, &src->bi); +#define TABLE_ITER_INIT { \ + .bi = BLOCK_ITER_INIT \ } static int table_iter_next_in_block(struct table_iter *ti, @@ -252,14 +241,8 @@ static int table_iter_next_in_block(struct table_iter *ti, static void table_iter_block_done(struct table_iter *ti) { - if (!ti->bi.br) { - return; - } - reftable_block_done(&ti->bi.br->block); - FREE_AND_NULL(ti->bi.br); - - ti->bi.last_key.len = 0; - ti->bi.next_off = 0; + block_reader_release(&ti->br); + block_iter_reset(&ti->bi); } static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off, @@ -323,32 +306,27 @@ done: return err; } -static int table_iter_next_block(struct table_iter *dest, - struct table_iter *src) +static void table_iter_close(struct table_iter *ti) { - uint64_t next_block_off = src->block_off + src->bi.br->full_block_size; - struct block_reader br = { 0 }; - int err = 0; + table_iter_block_done(ti); + block_iter_close(&ti->bi); +} - dest->r = src->r; - dest->typ = src->typ; - dest->block_off = next_block_off; +static int table_iter_next_block(struct table_iter *ti) +{ + uint64_t next_block_off = ti->block_off + ti->br.full_block_size; + int err; - err = reader_init_block_reader(src->r, &br, next_block_off, src->typ); - if (err > 0) { - dest->is_finished = 1; - return 1; - } - if (err != 0) + err = reader_init_block_reader(ti->r, &ti->br, next_block_off, ti->typ); + if (err > 0) + ti->is_finished = 1; + if (err) return err; - else { - struct block_reader *brp = - reftable_malloc(sizeof(struct block_reader)); - *brp = br; - dest->is_finished = 0; - block_reader_start(brp, &dest->bi); - } + ti->block_off = next_block_off; + ti->is_finished = 0; + block_iter_seek_start(&ti->bi, &ti->br); + return 0; } @@ -358,27 +336,30 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec) return REFTABLE_API_ERROR; while (1) { - struct table_iter next = TABLE_ITER_INIT; - int err = 0; - if (ti->is_finished) { + int err; + + if (ti->is_finished) return 1; - } + /* + * Check whether the current block still has more records. If + * so, return it. If the iterator returns positive then the + * current block has been exhausted. + */ err = table_iter_next_in_block(ti, rec); - if (err <= 0) { + if (err <= 0) return err; - } - err = table_iter_next_block(&next, ti); - if (err != 0) { + /* + * Otherwise, we need to continue to the next block in the + * table and retry. If there are no more blocks then the + * iterator is drained. + */ + err = table_iter_next_block(ti); + if (err) { ti->is_finished = 1; - } - table_iter_block_done(ti); - if (err != 0) { return err; } - table_iter_copy_from(ti, &next); - block_iter_close(&next.bi); } } @@ -387,16 +368,14 @@ static int table_iter_next_void(void *ti, struct reftable_record *rec) return table_iter_next(ti, rec); } -static void table_iter_close(void *p) +static void table_iter_close_void(void *ti) { - struct table_iter *ti = p; - table_iter_block_done(ti); - block_iter_close(&ti->bi); + table_iter_close(ti); } static struct reftable_iterator_vtable table_iter_vtable = { .next = &table_iter_next_void, - .close = &table_iter_close, + .close = &table_iter_close_void, }; static void iterator_from_table_iter(struct reftable_iterator *it, @@ -411,19 +390,16 @@ static int reader_table_iter_at(struct reftable_reader *r, struct table_iter *ti, uint64_t off, uint8_t typ) { - struct block_reader br = { 0 }; - struct block_reader *brp = NULL; + int err; - int err = reader_init_block_reader(r, &br, off, typ); + err = reader_init_block_reader(r, &ti->br, off, typ); if (err != 0) return err; - brp = reftable_malloc(sizeof(struct block_reader)); - *brp = br; ti->r = r; - ti->typ = block_reader_type(brp); + ti->typ = block_reader_type(&ti->br); ti->block_off = off; - block_reader_start(brp, &ti->bi); + block_iter_seek_start(&ti->bi, &ti->br); return 0; } @@ -446,25 +422,54 @@ static int reader_start(struct reftable_reader *r, struct table_iter *ti, static int reader_seek_linear(struct table_iter *ti, struct reftable_record *want) { - struct reftable_record rec = - reftable_new_record(reftable_record_type(want)); struct strbuf want_key = STRBUF_INIT; struct strbuf got_key = STRBUF_INIT; - struct table_iter next = TABLE_ITER_INIT; + struct reftable_record rec; int err = -1; + reftable_record_init(&rec, reftable_record_type(want)); reftable_record_key(want, &want_key); + /* + * First we need to locate the block that must contain our record. To + * do so we scan through blocks linearly until we find the first block + * whose first key is bigger than our wanted key. Once we have found + * that block we know that the key must be contained in the preceding + * block. + * + * This algorithm is somewhat unfortunate because it means that we + * always have to seek one block too far and then back up. But as we + * can only decode the _first_ key of a block but not its _last_ key we + * have no other way to do this. + */ while (1) { - err = table_iter_next_block(&next, ti); + struct table_iter next = *ti; + + /* + * We must be careful to not modify underlying data of `ti` + * because we may find that `next` does not contain our desired + * block, but that `ti` does. In that case, we would discard + * `next` and continue with `ti`. + * + * This also means that we cannot reuse allocated memory for + * `next` here. While it would be great if we could, it should + * in practice not be too bad given that we should only ever + * end up doing linear seeks with at most three blocks. As soon + * as we have more than three blocks we would have an index, so + * we would not do a linear search there anymore. + */ + memset(&next.br.block, 0, sizeof(next.br.block)); + next.br.zstream = NULL; + next.br.uncompressed_data = NULL; + next.br.uncompressed_cap = 0; + + err = table_iter_next_block(&next); if (err < 0) goto done; - - if (err > 0) { + if (err > 0) break; - } - err = block_reader_first_key(next.bi.br, &got_key); + err = block_reader_first_key(&next.br, &got_key); if (err < 0) goto done; @@ -474,16 +479,20 @@ static int reader_seek_linear(struct table_iter *ti, } table_iter_block_done(ti); - table_iter_copy_from(ti, &next); + *ti = next; } - err = block_iter_seek(&ti->bi, &want_key); + /* + * We have located the block that must contain our record, so we seek + * the wanted key inside of it. If the block does not contain our key + * we know that the corresponding record does not exist. + */ + err = block_iter_seek_key(&ti->bi, &ti->br, &want_key); if (err < 0) goto done; err = 0; done: - block_iter_close(&next.bi); reftable_record_release(&rec); strbuf_release(&want_key); strbuf_release(&got_key); @@ -502,6 +511,7 @@ static int reader_seek_indexed(struct reftable_reader *r, .u.idx = { .last_key = STRBUF_INIT }, }; struct table_iter index_iter = TABLE_ITER_INIT; + struct table_iter empty = TABLE_ITER_INIT; struct table_iter next = TABLE_ITER_INIT; int err = 0; @@ -510,10 +520,39 @@ static int reader_seek_indexed(struct reftable_reader *r, if (err < 0) goto done; + /* + * The index may consist of multiple levels, where each level may have + * multiple index blocks. We start by doing a linear search in the + * highest layer that identifies the relevant index block as well as + * the record inside that block that corresponds to our wanted key. + */ err = reader_seek_linear(&index_iter, &want_index); + if (err < 0) + goto done; + + /* + * Traverse down the levels until we find a non-index entry. + */ while (1) { + /* + * In case we seek a record that does not exist the index iter + * will tell us that the iterator is over. This works because + * the last index entry of the current level will contain the + * last key it knows about. So in case our seeked key is larger + * than the last indexed key we know that it won't exist. + * + * There is one subtlety in the layout of the index section + * that makes this work as expected: the highest-level index is + * at end of the section and will point backwards and thus we + * start reading from the end of the index section, not the + * beginning. + * + * If that wasn't the case and the order was reversed then the + * linear seek would seek into the lower levels and traverse + * all levels of the index only to find out that the key does + * not exist. + */ err = table_iter_next(&index_iter, &index_result); - table_iter_block_done(&index_iter); if (err != 0) goto done; @@ -522,7 +561,7 @@ static int reader_seek_indexed(struct reftable_reader *r, if (err != 0) goto done; - err = block_iter_seek(&next.bi, &want_index.u.idx.last_key); + err = block_iter_seek_key(&next.bi, &next.br, &want_index.u.idx.last_key); if (err < 0) goto done; @@ -536,19 +575,20 @@ static int reader_seek_indexed(struct reftable_reader *r, break; } - table_iter_copy_from(&index_iter, &next); + table_iter_close(&index_iter); + index_iter = next; + next = empty; } if (err == 0) { - struct table_iter empty = TABLE_ITER_INIT; - struct table_iter *malloced = - reftable_calloc(sizeof(struct table_iter)); - *malloced = empty; - table_iter_copy_from(malloced, &next); + struct table_iter *malloced = reftable_calloc(1, sizeof(*malloced)); + *malloced = next; + next = empty; iterator_from_table_iter(it, malloced); } + done: - block_iter_close(&next.bi); + table_iter_close(&next); table_iter_close(&index_iter); reftable_record_release(&want_index); reftable_record_release(&index_result); @@ -562,25 +602,28 @@ static int reader_seek_internal(struct reftable_reader *r, struct reftable_reader_offsets *offs = reader_offsets_for(r, reftable_record_type(rec)); uint64_t idx = offs->index_offset; - struct table_iter ti = TABLE_ITER_INIT; - int err = 0; + struct table_iter ti = TABLE_ITER_INIT, *p; + int err; + if (idx > 0) return reader_seek_indexed(r, it, rec); err = reader_start(r, &ti, reftable_record_type(rec), 0); if (err < 0) - return err; + goto out; + err = reader_seek_linear(&ti, rec); if (err < 0) - return err; - else { - struct table_iter *p = - reftable_malloc(sizeof(struct table_iter)); - *p = ti; - iterator_from_table_iter(it, p); - } + goto out; - return 0; + REFTABLE_ALLOC_ARRAY(p, 1); + *p = ti; + iterator_from_table_iter(it, p); + +out: + if (err) + table_iter_close(&ti); + return err; } static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it, @@ -637,8 +680,7 @@ void reader_close(struct reftable_reader *r) int reftable_new_reader(struct reftable_reader **p, struct reftable_block_source *src, char const *name) { - struct reftable_reader *rd = - reftable_calloc(sizeof(struct reftable_reader)); + struct reftable_reader *rd = reftable_calloc(1, sizeof(*rd)); int err = init_reader(rd, src, name); if (err == 0) { *p = rd; @@ -713,7 +755,7 @@ static int reftable_reader_refs_for_unindexed(struct reftable_reader *r, uint8_t *oid) { struct table_iter ti_empty = TABLE_ITER_INIT; - struct table_iter *ti = reftable_calloc(sizeof(struct table_iter)); + struct table_iter *ti = reftable_calloc(1, sizeof(*ti)); struct filtering_ref_iterator *filter = NULL; struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT; int oid_len = hash_size(r->hash_id); diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index 469ab79..a6dbd21 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -11,7 +11,6 @@ https://developers.google.com/open-source/licenses/bsd #include "basics.h" #include "block.h" #include "blocksource.h" -#include "constants.h" #include "reader.h" #include "record.h" #include "test_framework.h" @@ -52,26 +51,25 @@ static void write_table(char ***names, struct strbuf *buf, int N, .hash_id = hash_id, }; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, buf, &opts); + reftable_new_writer(&strbuf_add_void, &noop_flush, buf, &opts); struct reftable_ref_record ref = { NULL }; int i = 0, n; struct reftable_log_record log = { NULL }; const struct reftable_stats *stats = NULL; - *names = reftable_calloc(sizeof(char *) * (N + 1)); + + REFTABLE_CALLOC_ARRAY(*names, N + 1); + reftable_writer_set_limits(w, update_index, update_index); for (i = 0; i < N; i++) { - uint8_t hash[GIT_SHA256_RAWSZ] = { 0 }; char name[100]; int n; - set_test_hash(hash, i); - snprintf(name, sizeof(name), "refs/heads/branch%02d", i); ref.refname = name; ref.update_index = update_index; ref.value_type = REFTABLE_REF_VAL1; - ref.value.val1 = hash; + set_test_hash(ref.value.val1, i); (*names)[i] = xstrdup(name); n = reftable_writer_add_ref(w, &ref); @@ -79,18 +77,15 @@ static void write_table(char ***names, struct strbuf *buf, int N, } for (i = 0; i < N; i++) { - uint8_t hash[GIT_SHA256_RAWSZ] = { 0 }; char name[100]; int n; - set_test_hash(hash, i); - snprintf(name, sizeof(name), "refs/heads/branch%02d", i); log.refname = name; log.update_index = update_index; log.value_type = REFTABLE_LOG_UPDATE; - log.value.update.new_hash = hash; + set_test_hash(log.value.update.new_hash, i); log.value.update.message = "message"; n = reftable_writer_add_log(w, &log); @@ -134,18 +129,15 @@ static void test_log_buffer_size(void) .message = "commit: 9\n", } } }; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); /* This tests buffer extension for log compression. Must use a random hash, to ensure that the compressed part is larger than the original. */ - uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ]; for (i = 0; i < GIT_SHA1_RAWSZ; i++) { - hash1[i] = (uint8_t)(rand() % 256); - hash2[i] = (uint8_t)(rand() % 256); + log.value.update.old_hash[i] = (uint8_t)(git_rand() % 256); + log.value.update.new_hash[i] = (uint8_t)(git_rand() % 256); } - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; reftable_writer_set_limits(w, update_index, update_index); err = reftable_writer_add_log(w, &log); EXPECT_ERR(err); @@ -163,25 +155,26 @@ static void test_log_overflow(void) .block_size = ARRAY_SIZE(msg), }; int err; - struct reftable_log_record - log = { .refname = "refs/heads/master", - .update_index = 0xa, - .value_type = REFTABLE_LOG_UPDATE, - .value = { .update = { - .name = "Han-Wen Nienhuys", - .email = "hanwen@google.com", - .tz_offset = 100, - .time = 0x5e430672, - .message = msg, - } } }; + struct reftable_log_record log = { + .refname = "refs/heads/master", + .update_index = 0xa, + .value_type = REFTABLE_LOG_UPDATE, + .value = { + .update = { + .old_hash = { 1 }, + .new_hash = { 2 }, + .name = "Han-Wen Nienhuys", + .email = "hanwen@google.com", + .tz_offset = 100, + .time = 0x5e430672, + .message = msg, + }, + }, + }; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); - - uint8_t hash1[GIT_SHA1_RAWSZ] = {1}, hash2[GIT_SHA1_RAWSZ] = { 2 }; + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); memset(msg, 'x', sizeof(msg) - 1); - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; reftable_writer_set_limits(w, update_index, update_index); err = reftable_writer_add_log(w, &log); EXPECT(err == REFTABLE_ENTRY_TOO_BIG_ERROR); @@ -192,7 +185,7 @@ static void test_log_overflow(void) static void test_log_write_read(void) { int N = 2; - char **names = reftable_calloc(sizeof(char *) * (N + 1)); + char **names = reftable_calloc(N + 1, sizeof(*names)); int err; struct reftable_write_options opts = { .block_size = 256, @@ -206,7 +199,7 @@ static void test_log_write_read(void) struct reftable_block_source source = { NULL }; struct strbuf buf = STRBUF_INIT; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); const struct reftable_stats *stats = NULL; reftable_writer_set_limits(w, 0, N); for (i = 0; i < N; i++) { @@ -221,16 +214,13 @@ static void test_log_write_read(void) EXPECT_ERR(err); } for (i = 0; i < N; i++) { - uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ]; struct reftable_log_record log = { NULL }; - set_test_hash(hash1, i); - set_test_hash(hash2, i + 1); log.refname = names[i]; log.update_index = i; log.value_type = REFTABLE_LOG_UPDATE; - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; + set_test_hash(log.value.update.old_hash, i); + set_test_hash(log.value.update.new_hash, i + 1); err = reftable_writer_add_log(w, &log); EXPECT_ERR(err); @@ -298,20 +288,17 @@ static void test_log_zlib_corruption(void) struct reftable_block_source source = { 0 }; struct strbuf buf = STRBUF_INIT; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); const struct reftable_stats *stats = NULL; - uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 }; - uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 }; char message[100] = { 0 }; int err, i, n; - struct reftable_log_record log = { .refname = "refname", .value_type = REFTABLE_LOG_UPDATE, .value = { .update = { - .new_hash = hash1, - .old_hash = hash2, + .new_hash = { 1 }, + .old_hash = { 2 }, .name = "My Name", .email = "myname@invalid", .message = message, @@ -320,7 +307,7 @@ static void test_log_zlib_corruption(void) }; for (i = 0; i < sizeof(message) - 1; i++) - message[i] = (uint8_t)(rand() % 64 + ' '); + message[i] = (uint8_t)(git_rand() % 64 + ' '); reftable_writer_set_limits(w, 1, 1); @@ -523,7 +510,7 @@ static void test_table_read_write_seek_index(void) static void test_table_refs_for(int indexed) { int N = 50; - char **want_names = reftable_calloc(sizeof(char *) * (N + 1)); + char **want_names = reftable_calloc(N + 1, sizeof(*want_names)); int want_names_len = 0; uint8_t want_hash[GIT_SHA1_RAWSZ]; @@ -539,7 +526,7 @@ static void test_table_refs_for(int indexed) struct strbuf buf = STRBUF_INIT; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); struct reftable_iterator it = { NULL }; int j; @@ -550,8 +537,6 @@ static void test_table_refs_for(int indexed) uint8_t hash[GIT_SHA1_RAWSZ]; char fill[51] = { 0 }; char name[100]; - uint8_t hash1[GIT_SHA1_RAWSZ]; - uint8_t hash2[GIT_SHA1_RAWSZ]; struct reftable_ref_record ref = { NULL }; memset(hash, i, sizeof(hash)); @@ -561,11 +546,9 @@ static void test_table_refs_for(int indexed) name[40] = 0; ref.refname = name; - set_test_hash(hash1, i / 4); - set_test_hash(hash2, 3 + i / 4); ref.value_type = REFTABLE_REF_VAL2; - ref.value.val2.value = hash1; - ref.value.val2.target_value = hash2; + set_test_hash(ref.value.val2.value, i / 4); + set_test_hash(ref.value.val2.target_value, 3 + i / 4); /* 80 bytes / entry, so 3 entries per block. Yields 17 */ @@ -573,8 +556,8 @@ static void test_table_refs_for(int indexed) n = reftable_writer_add_ref(w, &ref); EXPECT(n == 0); - if (!memcmp(hash1, want_hash, GIT_SHA1_RAWSZ) || - !memcmp(hash2, want_hash, GIT_SHA1_RAWSZ)) { + if (!memcmp(ref.value.val2.value, want_hash, GIT_SHA1_RAWSZ) || + !memcmp(ref.value.val2.target_value, want_hash, GIT_SHA1_RAWSZ)) { want_names[want_names_len++] = xstrdup(name); } } @@ -636,7 +619,7 @@ static void test_write_empty_table(void) struct reftable_write_options opts = { 0 }; struct strbuf buf = STRBUF_INIT; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); struct reftable_block_source source = { NULL }; struct reftable_reader *rd = NULL; struct reftable_ref_record rec = { NULL }; @@ -674,12 +657,11 @@ static void test_write_object_id_min_length(void) }; struct strbuf buf = STRBUF_INIT; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); - uint8_t hash[GIT_SHA1_RAWSZ] = {42}; + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); struct reftable_ref_record ref = { .update_index = 1, .value_type = REFTABLE_REF_VAL1, - .value.val1 = hash, + .value.val1 = {42}, }; int err; int i; @@ -710,12 +692,11 @@ static void test_write_object_id_length(void) }; struct strbuf buf = STRBUF_INIT; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); - uint8_t hash[GIT_SHA1_RAWSZ] = {42}; + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); struct reftable_ref_record ref = { .update_index = 1, .value_type = REFTABLE_REF_VAL1, - .value.val1 = hash, + .value.val1 = {42}, }; int err; int i; @@ -745,7 +726,7 @@ static void test_write_empty_key(void) struct reftable_write_options opts = { 0 }; struct strbuf buf = STRBUF_INIT; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); struct reftable_ref_record ref = { .refname = "", .update_index = 1, @@ -768,7 +749,7 @@ static void test_write_key_order(void) struct reftable_write_options opts = { 0 }; struct strbuf buf = STRBUF_INIT; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); struct reftable_ref_record refs[2] = { { .refname = "b", @@ -798,6 +779,138 @@ static void test_write_key_order(void) strbuf_release(&buf); } +static void test_write_multiple_indices(void) +{ + struct reftable_write_options opts = { + .block_size = 100, + }; + struct strbuf writer_buf = STRBUF_INIT, buf = STRBUF_INIT; + struct reftable_block_source source = { 0 }; + struct reftable_iterator it = { 0 }; + const struct reftable_stats *stats; + struct reftable_writer *writer; + struct reftable_reader *reader; + int err, i; + + writer = reftable_new_writer(&strbuf_add_void, &noop_flush, &writer_buf, &opts); + reftable_writer_set_limits(writer, 1, 1); + for (i = 0; i < 100; i++) { + struct reftable_ref_record ref = { + .update_index = 1, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = {i}, + }; + + strbuf_reset(&buf); + strbuf_addf(&buf, "refs/heads/%04d", i); + ref.refname = buf.buf, + + err = reftable_writer_add_ref(writer, &ref); + EXPECT_ERR(err); + } + + for (i = 0; i < 100; i++) { + struct reftable_log_record log = { + .update_index = 1, + .value_type = REFTABLE_LOG_UPDATE, + .value.update = { + .old_hash = { i }, + .new_hash = { i }, + }, + }; + + strbuf_reset(&buf); + strbuf_addf(&buf, "refs/heads/%04d", i); + log.refname = buf.buf, + + err = reftable_writer_add_log(writer, &log); + EXPECT_ERR(err); + } + + reftable_writer_close(writer); + + /* + * The written data should be sufficiently large to result in indices + * for each of the block types. + */ + stats = reftable_writer_stats(writer); + EXPECT(stats->ref_stats.index_offset > 0); + EXPECT(stats->obj_stats.index_offset > 0); + EXPECT(stats->log_stats.index_offset > 0); + + block_source_from_strbuf(&source, &writer_buf); + err = reftable_new_reader(&reader, &source, "filename"); + EXPECT_ERR(err); + + /* + * Seeking the log uses the log index now. In case there is any + * confusion regarding indices we would notice here. + */ + err = reftable_reader_seek_log(reader, &it, ""); + EXPECT_ERR(err); + + reftable_iterator_destroy(&it); + reftable_writer_free(writer); + reftable_reader_free(reader); + strbuf_release(&writer_buf); + strbuf_release(&buf); +} + +static void test_write_multi_level_index(void) +{ + struct reftable_write_options opts = { + .block_size = 100, + }; + struct strbuf writer_buf = STRBUF_INIT, buf = STRBUF_INIT; + struct reftable_block_source source = { 0 }; + struct reftable_iterator it = { 0 }; + const struct reftable_stats *stats; + struct reftable_writer *writer; + struct reftable_reader *reader; + int err; + + writer = reftable_new_writer(&strbuf_add_void, &noop_flush, &writer_buf, &opts); + reftable_writer_set_limits(writer, 1, 1); + for (size_t i = 0; i < 200; i++) { + struct reftable_ref_record ref = { + .update_index = 1, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = {i}, + }; + + strbuf_reset(&buf); + strbuf_addf(&buf, "refs/heads/%03" PRIuMAX, (uintmax_t)i); + ref.refname = buf.buf, + + err = reftable_writer_add_ref(writer, &ref); + EXPECT_ERR(err); + } + reftable_writer_close(writer); + + /* + * The written refs should be sufficiently large to result in a + * multi-level index. + */ + stats = reftable_writer_stats(writer); + EXPECT(stats->ref_stats.max_index_level == 2); + + block_source_from_strbuf(&source, &writer_buf); + err = reftable_new_reader(&reader, &source, "filename"); + EXPECT_ERR(err); + + /* + * Seeking the last ref should work as expected. + */ + err = reftable_reader_seek_ref(reader, &it, "refs/heads/199"); + EXPECT_ERR(err); + + reftable_iterator_destroy(&it); + reftable_writer_free(writer); + reftable_reader_free(reader); + strbuf_release(&writer_buf); + strbuf_release(&buf); +} + static void test_corrupt_table_empty(void) { struct strbuf buf = STRBUF_INIT; @@ -847,5 +960,7 @@ int readwrite_test_main(int argc, const char *argv[]) RUN_TEST(test_log_overflow); RUN_TEST(test_write_object_id_length); RUN_TEST(test_write_object_id_min_length); + RUN_TEST(test_write_multiple_indices); + RUN_TEST(test_write_multi_level_index); return 0; } diff --git a/reftable/record.c b/reftable/record.c index fbaa1fb..5506f3e 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -76,7 +76,7 @@ int reftable_is_block_type(uint8_t typ) return 0; } -uint8_t *reftable_ref_record_val1(const struct reftable_ref_record *rec) +const unsigned char *reftable_ref_record_val1(const struct reftable_ref_record *rec) { switch (rec->value_type) { case REFTABLE_REF_VAL1: @@ -88,7 +88,7 @@ uint8_t *reftable_ref_record_val1(const struct reftable_ref_record *rec) } } -uint8_t *reftable_ref_record_val2(const struct reftable_ref_record *rec) +const unsigned char *reftable_ref_record_val2(const struct reftable_ref_record *rec) { switch (rec->value_type) { case REFTABLE_REF_VAL2: @@ -159,34 +159,49 @@ int reftable_encode_key(int *restart, struct string_view dest, return start.len - dest.len; } -int reftable_decode_key(struct strbuf *key, uint8_t *extra, - struct strbuf last_key, struct string_view in) +int reftable_decode_keylen(struct string_view in, + uint64_t *prefix_len, + uint64_t *suffix_len, + uint8_t *extra) { - int start_len = in.len; - uint64_t prefix_len = 0; - uint64_t suffix_len = 0; - int n = get_var_int(&prefix_len, &in); + size_t start_len = in.len; + int n; + + n = get_var_int(prefix_len, &in); if (n < 0) return -1; string_view_consume(&in, n); - if (prefix_len > last_key.len) - return -1; - - n = get_var_int(&suffix_len, &in); + n = get_var_int(suffix_len, &in); if (n <= 0) return -1; string_view_consume(&in, n); - *extra = (uint8_t)(suffix_len & 0x7); - suffix_len >>= 3; + *extra = (uint8_t)(*suffix_len & 0x7); + *suffix_len >>= 3; + + return start_len - in.len; +} + +int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, + struct string_view in) +{ + int start_len = in.len; + uint64_t prefix_len = 0; + uint64_t suffix_len = 0; + int n; + + n = reftable_decode_keylen(in, &prefix_len, &suffix_len, extra); + if (n < 0) + return -1; + string_view_consume(&in, n); - if (in.len < suffix_len) + if (in.len < suffix_len || + prefix_len > last_key->len) return -1; - strbuf_reset(key); - strbuf_add(key, last_key.buf, prefix_len); - strbuf_add(key, in.buf, suffix_len); + strbuf_setlen(last_key, prefix_len); + strbuf_add(last_key, in.buf, suffix_len); string_view_consume(&in, suffix_len); return start_len - in.len; @@ -205,27 +220,36 @@ static void reftable_ref_record_copy_from(void *rec, const void *src_rec, { struct reftable_ref_record *ref = rec; const struct reftable_ref_record *src = src_rec; + char *refname = NULL; + size_t refname_cap = 0; + assert(hash_size > 0); - /* This is simple and correct, but we could probably reuse the hash - * fields. */ + SWAP(refname, ref->refname); + SWAP(refname_cap, ref->refname_cap); reftable_ref_record_release(ref); + SWAP(ref->refname, refname); + SWAP(ref->refname_cap, refname_cap); + if (src->refname) { - ref->refname = xstrdup(src->refname); + size_t refname_len = strlen(src->refname); + + REFTABLE_ALLOC_GROW(ref->refname, refname_len + 1, + ref->refname_cap); + memcpy(ref->refname, src->refname, refname_len); + ref->refname[refname_len] = 0; } + ref->update_index = src->update_index; ref->value_type = src->value_type; switch (src->value_type) { case REFTABLE_REF_DELETION: break; case REFTABLE_REF_VAL1: - ref->value.val1 = reftable_malloc(hash_size); memcpy(ref->value.val1, src->value.val1, hash_size); break; case REFTABLE_REF_VAL2: - ref->value.val2.value = reftable_malloc(hash_size); memcpy(ref->value.val2.value, src->value.val2.value, hash_size); - ref->value.val2.target_value = reftable_malloc(hash_size); memcpy(ref->value.val2.target_value, src->value.val2.target_value, hash_size); break; @@ -242,7 +266,7 @@ static char hexdigit(int c) return 'a' + (c - 10); } -static void hex_format(char *dest, uint8_t *src, int hash_size) +static void hex_format(char *dest, const unsigned char *src, int hash_size) { assert(hash_size > 0); if (src) { @@ -299,11 +323,8 @@ void reftable_ref_record_release(struct reftable_ref_record *ref) reftable_free(ref->value.symref); break; case REFTABLE_REF_VAL2: - reftable_free(ref->value.val2.target_value); - reftable_free(ref->value.val2.value); break; case REFTABLE_REF_VAL1: - reftable_free(ref->value.val1); break; case REFTABLE_REF_DELETION: break; @@ -369,24 +390,33 @@ static int reftable_ref_record_encode(const void *rec, struct string_view s, static int reftable_ref_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch) { struct reftable_ref_record *r = rec; struct string_view start = in; uint64_t update_index = 0; - int n = get_var_int(&update_index, &in); + const char *refname = NULL; + size_t refname_cap = 0; + int n; + + assert(hash_size > 0); + + n = get_var_int(&update_index, &in); if (n < 0) return n; string_view_consume(&in, n); + SWAP(refname, r->refname); + SWAP(refname_cap, r->refname_cap); reftable_ref_record_release(r); + SWAP(r->refname, refname); + SWAP(r->refname_cap, refname_cap); - assert(hash_size > 0); - - r->refname = reftable_realloc(r->refname, key.len + 1); + REFTABLE_ALLOC_GROW(r->refname, key.len + 1, r->refname_cap); memcpy(r->refname, key.buf, key.len); - r->update_index = update_index; r->refname[key.len] = 0; + + r->update_index = update_index; r->value_type = val_type; switch (val_type) { case REFTABLE_REF_VAL1: @@ -394,7 +424,6 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key, return -1; } - r->value.val1 = reftable_malloc(hash_size); memcpy(r->value.val1, in.buf, hash_size); string_view_consume(&in, hash_size); break; @@ -404,23 +433,20 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key, return -1; } - r->value.val2.value = reftable_malloc(hash_size); memcpy(r->value.val2.value, in.buf, hash_size); string_view_consume(&in, hash_size); - r->value.val2.target_value = reftable_malloc(hash_size); memcpy(r->value.val2.target_value, in.buf, hash_size); string_view_consume(&in, hash_size); break; case REFTABLE_REF_SYMREF: { - struct strbuf dest = STRBUF_INIT; - int n = decode_string(&dest, in); + int n = decode_string(scratch, in); if (n < 0) { return -1; } string_view_consume(&in, n); - r->value.symref = dest.buf; + r->value.symref = strbuf_detach(scratch, NULL); } break; case REFTABLE_REF_DELETION: @@ -439,7 +465,6 @@ static int reftable_ref_record_is_deletion_void(const void *p) (const struct reftable_ref_record *)p); } - static int reftable_ref_record_equal_void(const void *a, const void *b, int hash_size) { @@ -448,6 +473,13 @@ static int reftable_ref_record_equal_void(const void *a, return reftable_ref_record_equal(ra, rb, hash_size); } +static int reftable_ref_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_ref_record *a = _a; + const struct reftable_ref_record *b = _b; + return strcmp(a->refname, b->refname); +} + static void reftable_ref_record_print_void(const void *rec, int hash_size) { @@ -464,6 +496,7 @@ static struct reftable_record_vtable reftable_ref_record_vtable = { .release = &reftable_ref_record_release_void, .is_deletion = &reftable_ref_record_is_deletion_void, .equal = &reftable_ref_record_equal_void, + .cmp = &reftable_ref_record_cmp_void, .print = &reftable_ref_record_print_void, }; @@ -506,12 +539,13 @@ static void reftable_obj_record_copy_from(void *rec, const void *src_rec, (const struct reftable_obj_record *)src_rec; reftable_obj_record_release(obj); - obj->hash_prefix = reftable_malloc(src->hash_prefix_len); + + REFTABLE_ALLOC_ARRAY(obj->hash_prefix, src->hash_prefix_len); obj->hash_prefix_len = src->hash_prefix_len; if (src->hash_prefix_len) memcpy(obj->hash_prefix, src->hash_prefix, obj->hash_prefix_len); - obj->offsets = reftable_malloc(src->offset_len * sizeof(uint64_t)); + REFTABLE_ALLOC_ARRAY(obj->offsets, src->offset_len); obj->offset_len = src->offset_len; COPY_ARRAY(obj->offsets, src->offsets, src->offset_len); } @@ -560,7 +594,7 @@ static int reftable_obj_record_encode(const void *rec, struct string_view s, static int reftable_obj_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch UNUSED) { struct string_view start = in; struct reftable_obj_record *r = rec; @@ -568,7 +602,10 @@ static int reftable_obj_record_decode(void *rec, struct strbuf key, int n = 0; uint64_t last; int j; - r->hash_prefix = reftable_malloc(key.len); + + reftable_obj_record_release(r); + + REFTABLE_ALLOC_ARRAY(r->hash_prefix, key.len); memcpy(r->hash_prefix, key.buf, key.len); r->hash_prefix_len = key.len; @@ -586,7 +623,7 @@ static int reftable_obj_record_decode(void *rec, struct strbuf key, if (count == 0) return start.len - in.len; - r->offsets = reftable_malloc(count * sizeof(uint64_t)); + REFTABLE_ALLOC_ARRAY(r->offsets, count); r->offset_len = count; n = get_var_int(&r->offsets[0], &in); @@ -634,6 +671,25 @@ static int reftable_obj_record_equal_void(const void *a, const void *b, int hash return 1; } +static int reftable_obj_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_obj_record *a = _a; + const struct reftable_obj_record *b = _b; + int cmp; + + cmp = memcmp(a->hash_prefix, b->hash_prefix, + a->hash_prefix_len > b->hash_prefix_len ? + a->hash_prefix_len : b->hash_prefix_len); + if (cmp) + return cmp; + + /* + * When the prefix is the same then the object record that is longer is + * considered to be bigger. + */ + return a->hash_prefix_len - b->hash_prefix_len; +} + static struct reftable_record_vtable reftable_obj_record_vtable = { .key = &reftable_obj_record_key, .type = BLOCK_TYPE_OBJ, @@ -644,6 +700,7 @@ static struct reftable_record_vtable reftable_obj_record_vtable = { .release = &reftable_obj_record_release, .is_deletion = ¬_a_deletion, .equal = &reftable_obj_record_equal_void, + .cmp = &reftable_obj_record_cmp_void, .print = &reftable_obj_record_print, }; @@ -723,16 +780,10 @@ static void reftable_log_record_copy_from(void *rec, const void *src_rec, xstrdup(dst->value.update.message); } - if (dst->value.update.new_hash) { - dst->value.update.new_hash = reftable_malloc(hash_size); - memcpy(dst->value.update.new_hash, - src->value.update.new_hash, hash_size); - } - if (dst->value.update.old_hash) { - dst->value.update.old_hash = reftable_malloc(hash_size); - memcpy(dst->value.update.old_hash, - src->value.update.old_hash, hash_size); - } + memcpy(dst->value.update.new_hash, + src->value.update.new_hash, hash_size); + memcpy(dst->value.update.old_hash, + src->value.update.old_hash, hash_size); break; } } @@ -750,8 +801,6 @@ void reftable_log_record_release(struct reftable_log_record *r) case REFTABLE_LOG_DELETION: break; case REFTABLE_LOG_UPDATE: - reftable_free(r->value.update.new_hash); - reftable_free(r->value.update.old_hash); reftable_free(r->value.update.name); reftable_free(r->value.update.email); reftable_free(r->value.update.message); @@ -768,33 +817,20 @@ static uint8_t reftable_log_record_val_type(const void *rec) return reftable_log_record_is_deletion(log) ? 0 : 1; } -static uint8_t zero[GIT_SHA256_RAWSZ] = { 0 }; - static int reftable_log_record_encode(const void *rec, struct string_view s, int hash_size) { const struct reftable_log_record *r = rec; struct string_view start = s; int n = 0; - uint8_t *oldh = NULL; - uint8_t *newh = NULL; if (reftable_log_record_is_deletion(r)) return 0; - oldh = r->value.update.old_hash; - newh = r->value.update.new_hash; - if (!oldh) { - oldh = zero; - } - if (!newh) { - newh = zero; - } - if (s.len < 2 * hash_size) return -1; - memcpy(s.buf, oldh, hash_size); - memcpy(s.buf + hash_size, newh, hash_size); + memcpy(s.buf, r->value.update.old_hash, hash_size); + memcpy(s.buf + hash_size, r->value.update.new_hash, hash_size); string_view_consume(&s, 2 * hash_size); n = encode_string(r->value.update.name ? r->value.update.name : "", s); @@ -830,19 +866,18 @@ static int reftable_log_record_encode(const void *rec, struct string_view s, static int reftable_log_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch) { struct string_view start = in; struct reftable_log_record *r = rec; uint64_t max = 0; uint64_t ts = 0; - struct strbuf dest = STRBUF_INIT; int n; if (key.len <= 9 || key.buf[key.len - 9] != 0) return REFTABLE_FORMAT_ERROR; - r->refname = reftable_realloc(r->refname, key.len - 8); + REFTABLE_ALLOC_GROW(r->refname, key.len - 8, r->refname_cap); memcpy(r->refname, key.buf, key.len - 8); ts = get_be64(key.buf + key.len - 8); @@ -851,9 +886,8 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, if (val_type != r->value_type) { switch (r->value_type) { case REFTABLE_LOG_UPDATE: - FREE_AND_NULL(r->value.update.old_hash); - FREE_AND_NULL(r->value.update.new_hash); FREE_AND_NULL(r->value.update.message); + r->value.update.message_cap = 0; FREE_AND_NULL(r->value.update.email); FREE_AND_NULL(r->value.update.name); break; @@ -869,36 +903,43 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, if (in.len < 2 * hash_size) return REFTABLE_FORMAT_ERROR; - r->value.update.old_hash = - reftable_realloc(r->value.update.old_hash, hash_size); - r->value.update.new_hash = - reftable_realloc(r->value.update.new_hash, hash_size); - memcpy(r->value.update.old_hash, in.buf, hash_size); memcpy(r->value.update.new_hash, in.buf + hash_size, hash_size); string_view_consume(&in, 2 * hash_size); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); - r->value.update.name = - reftable_realloc(r->value.update.name, dest.len + 1); - memcpy(r->value.update.name, dest.buf, dest.len); - r->value.update.name[dest.len] = 0; + /* + * In almost all cases we can expect the reflog name to not change for + * reflog entries as they are tied to the local identity, not to the + * target commits. As an optimization for this common case we can thus + * skip copying over the name in case it's accurate already. + */ + if (!r->value.update.name || + strcmp(r->value.update.name, scratch->buf)) { + r->value.update.name = + reftable_realloc(r->value.update.name, scratch->len + 1); + memcpy(r->value.update.name, scratch->buf, scratch->len); + r->value.update.name[scratch->len] = 0; + } - strbuf_reset(&dest); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); - r->value.update.email = - reftable_realloc(r->value.update.email, dest.len + 1); - memcpy(r->value.update.email, dest.buf, dest.len); - r->value.update.email[dest.len] = 0; + /* Same as above, but for the reflog email. */ + if (!r->value.update.email || + strcmp(r->value.update.email, scratch->buf)) { + r->value.update.email = + reftable_realloc(r->value.update.email, scratch->len + 1); + memcpy(r->value.update.email, scratch->buf, scratch->len); + r->value.update.email[scratch->len] = 0; + } ts = 0; n = get_var_int(&ts, &in); @@ -912,22 +953,19 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, r->value.update.tz_offset = get_be16(in.buf); string_view_consume(&in, 2); - strbuf_reset(&dest); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); - r->value.update.message = - reftable_realloc(r->value.update.message, dest.len + 1); - memcpy(r->value.update.message, dest.buf, dest.len); - r->value.update.message[dest.len] = 0; + REFTABLE_ALLOC_GROW(r->value.update.message, scratch->len + 1, + r->value.update.message_cap); + memcpy(r->value.update.message, scratch->buf, scratch->len); + r->value.update.message[scratch->len] = 0; - strbuf_release(&dest); return start.len - in.len; done: - strbuf_release(&dest); return REFTABLE_FORMAT_ERROR; } @@ -943,17 +981,6 @@ static int null_streq(char *a, char *b) return 0 == strcmp(a, b); } -static int zero_hash_eq(uint8_t *a, uint8_t *b, int sz) -{ - if (!a) - a = zero; - - if (!b) - b = zero; - - return !memcmp(a, b, sz); -} - static int reftable_log_record_equal_void(const void *a, const void *b, int hash_size) { @@ -962,6 +989,22 @@ static int reftable_log_record_equal_void(const void *a, hash_size); } +static int reftable_log_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_log_record *a = _a; + const struct reftable_log_record *b = _b; + int cmp = strcmp(a->refname, b->refname); + if (cmp) + return cmp; + + /* + * Note that the comparison here is reversed. This is because the + * update index is reversed when comparing keys. For reference, see how + * we handle this in reftable_log_record_key()`. + */ + return b->update_index - a->update_index; +} + int reftable_log_record_equal(const struct reftable_log_record *a, const struct reftable_log_record *b, int hash_size) { @@ -981,10 +1024,10 @@ int reftable_log_record_equal(const struct reftable_log_record *a, b->value.update.email) && null_streq(a->value.update.message, b->value.update.message) && - zero_hash_eq(a->value.update.old_hash, - b->value.update.old_hash, hash_size) && - zero_hash_eq(a->value.update.new_hash, - b->value.update.new_hash, hash_size); + !memcmp(a->value.update.old_hash, + b->value.update.old_hash, hash_size) && + !memcmp(a->value.update.new_hash, + b->value.update.new_hash, hash_size); } abort(); @@ -1011,6 +1054,7 @@ static struct reftable_record_vtable reftable_log_record_vtable = { .release = &reftable_log_record_release_void, .is_deletion = &reftable_log_record_is_deletion_void, .equal = &reftable_log_record_equal_void, + .cmp = &reftable_log_record_cmp_void, .print = &reftable_log_record_print_void, }; @@ -1061,7 +1105,7 @@ static int reftable_index_record_encode(const void *rec, struct string_view out, static int reftable_index_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch UNUSED) { struct string_view start = in; struct reftable_index_record *r = rec; @@ -1086,6 +1130,13 @@ static int reftable_index_record_equal(const void *a, const void *b, int hash_si return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key); } +static int reftable_index_record_cmp(const void *_a, const void *_b) +{ + const struct reftable_index_record *a = _a; + const struct reftable_index_record *b = _b; + return strbuf_cmp(&a->last_key, &b->last_key); +} + static void reftable_index_record_print(const void *rec, int hash_size) { const struct reftable_index_record *idx = rec; @@ -1103,6 +1154,7 @@ static struct reftable_record_vtable reftable_index_record_vtable = { .release = &reftable_index_record_release, .is_deletion = ¬_a_deletion, .equal = &reftable_index_record_equal, + .cmp = &reftable_index_record_cmp, .print = &reftable_index_record_print, }; @@ -1111,11 +1163,6 @@ void reftable_record_key(struct reftable_record *rec, struct strbuf *dest) reftable_record_vtable(rec)->key(reftable_record_data(rec), dest); } -uint8_t reftable_record_type(struct reftable_record *rec) -{ - return rec->type; -} - int reftable_record_encode(struct reftable_record *rec, struct string_view dest, int hash_size) { @@ -1139,10 +1186,12 @@ uint8_t reftable_record_val_type(struct reftable_record *rec) } int reftable_record_decode(struct reftable_record *rec, struct strbuf key, - uint8_t extra, struct string_view src, int hash_size) + uint8_t extra, struct string_view src, int hash_size, + struct strbuf *scratch) { return reftable_record_vtable(rec)->decode(reftable_record_data(rec), - key, extra, src, hash_size); + key, extra, src, hash_size, + scratch); } void reftable_record_release(struct reftable_record *rec) @@ -1156,6 +1205,14 @@ int reftable_record_is_deletion(struct reftable_record *rec) reftable_record_data(rec)); } +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b) +{ + if (a->type != b->type) + BUG("cannot compare reftable records of different type"); + return reftable_record_vtable(a)->cmp( + reftable_record_data(a), reftable_record_data(b)); +} + int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size) { if (a->type != b->type) @@ -1164,7 +1221,7 @@ int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, reftable_record_data(a), reftable_record_data(b), hash_size); } -static int hash_equal(uint8_t *a, uint8_t *b, int hash_size) +static int hash_equal(const unsigned char *a, const unsigned char *b, int hash_size) { if (a && b) return !memcmp(a, b, hash_size); @@ -1229,12 +1286,6 @@ int reftable_log_record_is_deletion(const struct reftable_log_record *log) return (log->value_type == REFTABLE_LOG_DELETION); } -void string_view_consume(struct string_view *s, int n) -{ - s->buf += n; - s->len -= n; -} - static void *reftable_record_data(struct reftable_record *rec) { switch (rec->type) { @@ -1266,45 +1317,22 @@ reftable_record_vtable(struct reftable_record *rec) abort(); } -struct reftable_record reftable_new_record(uint8_t typ) +void reftable_record_init(struct reftable_record *rec, uint8_t typ) { - struct reftable_record clean = { - .type = typ, - }; + memset(rec, 0, sizeof(*rec)); + rec->type = typ; - /* the following is involved, but the naive solution (just return - * `clean` as is, except for BLOCK_TYPE_INDEX), returns a garbage - * clean.u.obj.offsets pointer on Windows VS CI. Go figure. - */ switch (typ) { - case BLOCK_TYPE_OBJ: - { - struct reftable_obj_record obj = { 0 }; - clean.u.obj = obj; - break; - } - case BLOCK_TYPE_INDEX: - { - struct reftable_index_record idx = { - .last_key = STRBUF_INIT, - }; - clean.u.idx = idx; - break; - } case BLOCK_TYPE_REF: - { - struct reftable_ref_record ref = { 0 }; - clean.u.ref = ref; - break; - } case BLOCK_TYPE_LOG: - { - struct reftable_log_record log = { 0 }; - clean.u.log = log; - break; - } + case BLOCK_TYPE_OBJ: + return; + case BLOCK_TYPE_INDEX: + strbuf_init(&rec->u.idx.last_key, 0); + return; + default: + BUG("unhandled record type"); } - return clean; } void reftable_record_print(struct reftable_record *rec, int hash_size) diff --git a/reftable/record.h b/reftable/record.h index fd80cd4..d778133 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -25,7 +25,11 @@ struct string_view { }; /* Advance `s.buf` by `n`, and decrease length. */ -void string_view_consume(struct string_view *s, int n); +static inline void string_view_consume(struct string_view *s, int n) +{ + s->buf += n; + s->len -= n; +} /* utilities for de/encoding varints */ @@ -51,7 +55,8 @@ struct reftable_record_vtable { /* decode data from `src` into the record. */ int (*decode)(void *rec, struct strbuf key, uint8_t extra, - struct string_view src, int hash_size); + struct string_view src, int hash_size, + struct strbuf *scratch); /* deallocate and null the record. */ void (*release)(void *rec); @@ -62,6 +67,12 @@ struct reftable_record_vtable { /* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */ int (*equal)(const void *a, const void *b, int hash_size); + /* + * Compare keys of two records with each other. The records must have + * the same type. + */ + int (*cmp)(const void *a, const void *b); + /* Print on stdout, for debugging. */ void (*print)(const void *rec, int hash_size); }; @@ -69,18 +80,24 @@ struct reftable_record_vtable { /* returns true for recognized block types. Block start with the block type. */ int reftable_is_block_type(uint8_t typ); -/* return an initialized record for the given type */ -struct reftable_record reftable_new_record(uint8_t typ); - /* Encode `key` into `dest`. Sets `is_restart` to indicate a restart. Returns * number of bytes written. */ int reftable_encode_key(int *is_restart, struct string_view dest, struct strbuf prev_key, struct strbuf key, uint8_t extra); -/* Decode into `key` and `extra` from `in` */ -int reftable_decode_key(struct strbuf *key, uint8_t *extra, - struct strbuf last_key, struct string_view in); +/* Decode a record's key lengths. */ +int reftable_decode_keylen(struct string_view in, + uint64_t *prefix_len, + uint64_t *suffix_len, + uint8_t *extra); + +/* + * Decode into `last_key` and `extra` from `in`. `last_key` is expected to + * contain the decoded key of the preceding record, if any. + */ +int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, + struct string_view in); /* reftable_index_record are used internally to speed up lookups. */ struct reftable_index_record { @@ -100,8 +117,8 @@ struct reftable_obj_record { /* record is a generic wrapper for different types of records. It is normally * created on the stack, or embedded within another struct. If the type is * known, a fresh instance can be initialized explicitly. Otherwise, use - * reftable_new_record() to initialize generically (as the index_record is not - * valid as 0-initialized structure) + * `reftable_record_init()` to initialize generically (as the index_record is + * not valid as 0-initialized structure) */ struct reftable_record { uint8_t type; @@ -113,11 +130,14 @@ struct reftable_record { } u; }; +/* Initialize the reftable record for the given type */ +void reftable_record_init(struct reftable_record *rec, uint8_t typ); + /* see struct record_vtable */ +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); void reftable_record_print(struct reftable_record *rec, int hash_size); void reftable_record_key(struct reftable_record *rec, struct strbuf *dest); -uint8_t reftable_record_type(struct reftable_record *rec); void reftable_record_copy_from(struct reftable_record *rec, struct reftable_record *src, int hash_size); uint8_t reftable_record_val_type(struct reftable_record *rec); @@ -125,9 +145,14 @@ int reftable_record_encode(struct reftable_record *rec, struct string_view dest, int hash_size); int reftable_record_decode(struct reftable_record *rec, struct strbuf key, uint8_t extra, struct string_view src, - int hash_size); + int hash_size, struct strbuf *scratch); int reftable_record_is_deletion(struct reftable_record *rec); +static inline uint8_t reftable_record_type(struct reftable_record *rec) +{ + return rec->type; +} + /* frees and zeroes out the embedded record */ void reftable_record_release(struct reftable_record *rec); diff --git a/reftable/record_test.c b/reftable/record_test.c index 70ae78f..c158ee7 100644 --- a/reftable/record_test.c +++ b/reftable/record_test.c @@ -16,11 +16,11 @@ static void test_copy(struct reftable_record *rec) { - struct reftable_record copy = { 0 }; + struct reftable_record copy; uint8_t typ; typ = reftable_record_type(rec); - copy = reftable_new_record(typ); + reftable_record_init(©, typ); reftable_record_copy_from(©, rec, GIT_SHA1_RAWSZ); /* do it twice to catch memory leaks */ reftable_record_copy_from(©, rec, GIT_SHA1_RAWSZ); @@ -99,6 +99,7 @@ static void set_hash(uint8_t *h, int j) static void test_reftable_ref_record_roundtrip(void) { + struct strbuf scratch = STRBUF_INIT; int i = 0; for (i = REFTABLE_REF_DELETION; i < REFTABLE_NR_REF_VALUETYPES; i++) { @@ -119,15 +120,10 @@ static void test_reftable_ref_record_roundtrip(void) case REFTABLE_REF_DELETION: break; case REFTABLE_REF_VAL1: - in.u.ref.value.val1 = reftable_malloc(GIT_SHA1_RAWSZ); set_hash(in.u.ref.value.val1, 1); break; case REFTABLE_REF_VAL2: - in.u.ref.value.val2.value = - reftable_malloc(GIT_SHA1_RAWSZ); set_hash(in.u.ref.value.val2.value, 1); - in.u.ref.value.val2.target_value = - reftable_malloc(GIT_SHA1_RAWSZ); set_hash(in.u.ref.value.val2.target_value, 2); break; case REFTABLE_REF_SYMREF: @@ -145,7 +141,7 @@ static void test_reftable_ref_record_roundtrip(void) EXPECT(n > 0); /* decode into a non-zero reftable_record to test for leaks. */ - m = reftable_record_decode(&out, key, i, dest, GIT_SHA1_RAWSZ); + m = reftable_record_decode(&out, key, i, dest, GIT_SHA1_RAWSZ, &scratch); EXPECT(n == m); EXPECT(reftable_ref_record_equal(&in.u.ref, &out.u.ref, @@ -155,6 +151,8 @@ static void test_reftable_ref_record_roundtrip(void) strbuf_release(&key); reftable_record_release(&out); } + + strbuf_release(&scratch); } static void test_reftable_log_record_equal(void) @@ -180,7 +178,6 @@ static void test_reftable_log_record_equal(void) static void test_reftable_log_record_roundtrip(void) { int i; - struct reftable_log_record in[] = { { .refname = xstrdup("refs/heads/master"), @@ -188,8 +185,6 @@ static void test_reftable_log_record_roundtrip(void) .value_type = REFTABLE_LOG_UPDATE, .value = { .update = { - .old_hash = reftable_malloc(GIT_SHA1_RAWSZ), - .new_hash = reftable_malloc(GIT_SHA1_RAWSZ), .name = xstrdup("han-wen"), .email = xstrdup("hanwen@google.com"), .message = xstrdup("test"), @@ -207,15 +202,10 @@ static void test_reftable_log_record_roundtrip(void) .refname = xstrdup("branch"), .update_index = 33, .value_type = REFTABLE_LOG_UPDATE, - .value = { - .update = { - .old_hash = reftable_malloc(GIT_SHA1_RAWSZ), - .new_hash = reftable_malloc(GIT_SHA1_RAWSZ), - /* rest of fields left empty. */ - }, - }, } }; + struct strbuf scratch = STRBUF_INIT; + set_test_hash(in[0].value.update.new_hash, 1); set_test_hash(in[0].value.update.old_hash, 2); set_test_hash(in[2].value.update.new_hash, 3); @@ -236,8 +226,6 @@ static void test_reftable_log_record_roundtrip(void) .value_type = REFTABLE_LOG_UPDATE, .value = { .update = { - .new_hash = reftable_calloc(GIT_SHA1_RAWSZ), - .old_hash = reftable_calloc(GIT_SHA1_RAWSZ), .name = xstrdup("old name"), .email = xstrdup("old@email"), .message = xstrdup("old message"), @@ -257,7 +245,7 @@ static void test_reftable_log_record_roundtrip(void) EXPECT(n >= 0); valtype = reftable_record_val_type(&rec); m = reftable_record_decode(&out, key, valtype, dest, - GIT_SHA1_RAWSZ); + GIT_SHA1_RAWSZ, &scratch); EXPECT(n == m); EXPECT(reftable_log_record_equal(&in[i], &out.u.log, @@ -266,6 +254,8 @@ static void test_reftable_log_record_roundtrip(void) strbuf_release(&key); reftable_record_release(&out); } + + strbuf_release(&scratch); } static void test_u24_roundtrip(void) @@ -300,7 +290,8 @@ static void test_key_roundtrip(void) EXPECT(!restart); EXPECT(n > 0); - m = reftable_decode_key(&roundtrip, &rt_extra, last_key, dest); + strbuf_addstr(&roundtrip, "refs/heads/master"); + m = reftable_decode_key(&roundtrip, &rt_extra, dest); EXPECT(n == m); EXPECT(0 == strbuf_cmp(&key, &roundtrip)); EXPECT(rt_extra == extra); @@ -314,23 +305,27 @@ static void test_reftable_obj_record_roundtrip(void) { uint8_t testHash1[GIT_SHA1_RAWSZ] = { 1, 2, 3, 4, 0 }; uint64_t till9[] = { 1, 2, 3, 4, 500, 600, 700, 800, 9000 }; - struct reftable_obj_record recs[3] = { { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - .offsets = till9, - .offset_len = 3, - }, - { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - .offsets = till9, - .offset_len = 9, - }, - { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - } }; + struct reftable_obj_record recs[3] = { + { + .hash_prefix = testHash1, + .hash_prefix_len = 5, + .offsets = till9, + .offset_len = 3, + }, + { + .hash_prefix = testHash1, + .hash_prefix_len = 5, + .offsets = till9, + .offset_len = 9, + }, + { + .hash_prefix = testHash1, + .hash_prefix_len = 5, + }, + }; + struct strbuf scratch = STRBUF_INIT; int i = 0; + for (i = 0; i < ARRAY_SIZE(recs); i++) { uint8_t buffer[1024] = { 0 }; struct string_view dest = { @@ -354,13 +349,15 @@ static void test_reftable_obj_record_roundtrip(void) EXPECT(n > 0); extra = reftable_record_val_type(&in); m = reftable_record_decode(&out, key, extra, dest, - GIT_SHA1_RAWSZ); + GIT_SHA1_RAWSZ, &scratch); EXPECT(n == m); EXPECT(reftable_record_equal(&in, &out, GIT_SHA1_RAWSZ)); strbuf_release(&key); reftable_record_release(&out); } + + strbuf_release(&scratch); } static void test_reftable_index_record_roundtrip(void) @@ -377,6 +374,7 @@ static void test_reftable_index_record_roundtrip(void) .buf = buffer, .len = sizeof(buffer), }; + struct strbuf scratch = STRBUF_INIT; struct strbuf key = STRBUF_INIT; struct reftable_record out = { .type = BLOCK_TYPE_INDEX, @@ -394,13 +392,15 @@ static void test_reftable_index_record_roundtrip(void) EXPECT(n > 0); extra = reftable_record_val_type(&in); - m = reftable_record_decode(&out, key, extra, dest, GIT_SHA1_RAWSZ); + m = reftable_record_decode(&out, key, extra, dest, GIT_SHA1_RAWSZ, + &scratch); EXPECT(m == n); EXPECT(reftable_record_equal(&in, &out, GIT_SHA1_RAWSZ)); reftable_record_release(&out); strbuf_release(&key); + strbuf_release(&scratch); strbuf_release(&in.u.idx.last_key); } diff --git a/reftable/refname.c b/reftable/refname.c index 9573496..bbfde15 100644 --- a/reftable/refname.c +++ b/reftable/refname.c @@ -12,15 +12,15 @@ #include "refname.h" #include "reftable-iterator.h" -struct find_arg { - char **names; - const char *want; +struct refname_needle_lesseq_args { + char **haystack; + const char *needle; }; -static int find_name(size_t k, void *arg) +static int refname_needle_lesseq(size_t k, void *_args) { - struct find_arg *f_arg = arg; - return strcmp(f_arg->names[k], f_arg->want) >= 0; + struct refname_needle_lesseq_args *args = _args; + return strcmp(args->needle, args->haystack[k]) <= 0; } static int modification_has_ref(struct modification *mod, const char *name) @@ -29,25 +29,23 @@ static int modification_has_ref(struct modification *mod, const char *name) int err = 0; if (mod->add_len > 0) { - struct find_arg arg = { - .names = mod->add, - .want = name, + struct refname_needle_lesseq_args args = { + .haystack = mod->add, + .needle = name, }; - int idx = binsearch(mod->add_len, find_name, &arg); - if (idx < mod->add_len && !strcmp(mod->add[idx], name)) { + size_t idx = binsearch(mod->add_len, refname_needle_lesseq, &args); + if (idx < mod->add_len && !strcmp(mod->add[idx], name)) return 0; - } } if (mod->del_len > 0) { - struct find_arg arg = { - .names = mod->del, - .want = name, + struct refname_needle_lesseq_args args = { + .haystack = mod->del, + .needle = name, }; - int idx = binsearch(mod->del_len, find_name, &arg); - if (idx < mod->del_len && !strcmp(mod->del[idx], name)) { + size_t idx = binsearch(mod->del_len, refname_needle_lesseq, &args); + if (idx < mod->del_len && !strcmp(mod->del[idx], name)) return 1; - } } err = reftable_table_read_ref(&mod->tab, name, &ref); @@ -73,11 +71,11 @@ static int modification_has_ref_with_prefix(struct modification *mod, int err = 0; if (mod->add_len > 0) { - struct find_arg arg = { - .names = mod->add, - .want = prefix, + struct refname_needle_lesseq_args args = { + .haystack = mod->add, + .needle = prefix, }; - int idx = binsearch(mod->add_len, find_name, &arg); + size_t idx = binsearch(mod->add_len, refname_needle_lesseq, &args); if (idx < mod->add_len && !strncmp(prefix, mod->add[idx], strlen(prefix))) goto done; @@ -92,15 +90,14 @@ static int modification_has_ref_with_prefix(struct modification *mod, goto done; if (mod->del_len > 0) { - struct find_arg arg = { - .names = mod->del, - .want = ref.refname, + struct refname_needle_lesseq_args args = { + .haystack = mod->del, + .needle = ref.refname, }; - int idx = binsearch(mod->del_len, find_name, &arg); + size_t idx = binsearch(mod->del_len, refname_needle_lesseq, &args); if (idx < mod->del_len && - !strcmp(ref.refname, mod->del[idx])) { + !strcmp(ref.refname, mod->del[idx])) continue; - } } if (strncmp(ref.refname, prefix, strlen(prefix))) { @@ -140,8 +137,8 @@ int validate_ref_record_addition(struct reftable_table tab, { struct modification mod = { .tab = tab, - .add = reftable_calloc(sizeof(char *) * sz), - .del = reftable_calloc(sizeof(char *) * sz), + .add = reftable_calloc(sz, sizeof(*mod.add)), + .del = reftable_calloc(sz, sizeof(*mod.del)), }; int i = 0; int err = 0; diff --git a/reftable/refname_test.c b/reftable/refname_test.c index 8645cd9..b9cc625 100644 --- a/reftable/refname_test.c +++ b/reftable/refname_test.c @@ -9,7 +9,6 @@ https://developers.google.com/open-source/licenses/bsd #include "basics.h" #include "block.h" #include "blocksource.h" -#include "constants.h" #include "reader.h" #include "record.h" #include "refname.h" @@ -31,7 +30,7 @@ static void test_conflict(void) struct reftable_write_options opts = { 0 }; struct strbuf buf = STRBUF_INIT; struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &buf, &opts); + reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); struct reftable_ref_record rec = { .refname = "a/b", .value_type = REFTABLE_REF_SYMREF, diff --git a/reftable/reftable-error.h b/reftable/reftable-error.h index 4c457aa..e9b07c9 100644 --- a/reftable/reftable-error.h +++ b/reftable/reftable-error.h @@ -25,7 +25,7 @@ enum reftable_error { */ REFTABLE_NOT_EXIST_ERROR = -4, - /* Trying to write out-of-date data. */ + /* Trying to access locked data. */ REFTABLE_LOCK_ERROR = -5, /* Misuse of the API: @@ -57,6 +57,9 @@ enum reftable_error { /* Entry does not fit. This can happen when writing outsize reflog messages. */ REFTABLE_ENTRY_TOO_BIG_ERROR = -11, + + /* Trying to write out-of-date data. */ + REFTABLE_OUTDATED_ERROR = -12, }; /* convert the numeric error code to a string. The string should not be diff --git a/reftable/reftable-merged.h b/reftable/reftable-merged.h index 1a6d169..c91a2d8 100644 --- a/reftable/reftable-merged.h +++ b/reftable/reftable-merged.h @@ -33,7 +33,7 @@ struct reftable_table; the stack array. */ int reftable_new_merged_table(struct reftable_merged_table **dest, - struct reftable_table *stack, int n, + struct reftable_table *stack, size_t n, uint32_t hash_id); /* returns an iterator positioned just before 'name' */ diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index 67104f8..2a2943c 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -9,6 +9,7 @@ https://developers.google.com/open-source/licenses/bsd #ifndef REFTABLE_RECORD_H #define REFTABLE_RECORD_H +#include "hash-ll.h" #include <stdint.h> /* @@ -21,6 +22,7 @@ https://developers.google.com/open-source/licenses/bsd /* reftable_ref_record holds a ref database entry target_value */ struct reftable_ref_record { char *refname; /* Name of the ref, malloced. */ + size_t refname_cap; uint64_t update_index; /* Logical timestamp at which this value is * written */ @@ -38,10 +40,10 @@ struct reftable_ref_record { #define REFTABLE_NR_REF_VALUETYPES 4 } value_type; union { - uint8_t *val1; /* malloced hash. */ + unsigned char val1[GIT_MAX_RAWSZ]; struct { - uint8_t *value; /* first value, malloced hash */ - uint8_t *target_value; /* second value, malloced hash */ + unsigned char value[GIT_MAX_RAWSZ]; /* first hash */ + unsigned char target_value[GIT_MAX_RAWSZ]; /* second hash */ } val2; char *symref; /* referent, malloced 0-terminated string */ } value; @@ -49,11 +51,11 @@ struct reftable_ref_record { /* Returns the first hash, or NULL if `rec` is not of type * REFTABLE_REF_VAL1 or REFTABLE_REF_VAL2. */ -uint8_t *reftable_ref_record_val1(const struct reftable_ref_record *rec); +const unsigned char *reftable_ref_record_val1(const struct reftable_ref_record *rec); /* Returns the second hash, or NULL if `rec` is not of type * REFTABLE_REF_VAL2. */ -uint8_t *reftable_ref_record_val2(const struct reftable_ref_record *rec); +const unsigned char *reftable_ref_record_val2(const struct reftable_ref_record *rec); /* returns whether 'ref' represents a deletion */ int reftable_ref_record_is_deletion(const struct reftable_ref_record *ref); @@ -72,6 +74,7 @@ int reftable_ref_record_equal(const struct reftable_ref_record *a, /* reftable_log_record holds a reflog entry */ struct reftable_log_record { char *refname; + size_t refname_cap; uint64_t update_index; /* logical timestamp of a transactional update. */ @@ -86,13 +89,14 @@ struct reftable_log_record { union { struct { - uint8_t *new_hash; - uint8_t *old_hash; + unsigned char new_hash[GIT_MAX_RAWSZ]; + unsigned char old_hash[GIT_MAX_RAWSZ]; char *name; char *email; uint64_t time; int16_t tz_offset; char *message; + size_t message_cap; } update; } value; }; diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index db8de19..155bf0b 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -46,6 +46,9 @@ struct reftable_write_options { * is a single line, and add '\n' if missing. */ unsigned exact_log_message : 1; + + /* boolean: Prevent auto-compaction of tables. */ + unsigned disable_auto_compact : 1; }; /* reftable_block_stats holds statistics for a single block type */ @@ -88,6 +91,7 @@ struct reftable_stats { /* reftable_new_writer creates a new writer */ struct reftable_writer * reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t), + int (*flush_func)(void *), void *writer_arg, struct reftable_write_options *opts); /* Set the range of update indices for the records we will add. When writing a diff --git a/reftable/stack.c b/reftable/stack.c index ddbdf1b..80266bc 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -8,6 +8,7 @@ https://developers.google.com/open-source/licenses/bsd #include "stack.h" +#include "../write-or-die.h" #include "system.h" #include "merged.h" #include "reader.h" @@ -16,13 +17,15 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-record.h" #include "reftable-merged.h" #include "writer.h" +#include "tempfile.h" static int stack_try_add(struct reftable_stack *st, int (*write_table)(struct reftable_writer *wr, void *arg), void *arg); static int stack_write_compact(struct reftable_stack *st, - struct reftable_writer *wr, int first, int last, + struct reftable_writer *wr, + size_t first, size_t last, struct reftable_log_expiry_config *config); static int stack_check_addition(struct reftable_stack *st, const char *new_tab_name); @@ -42,14 +45,20 @@ static void stack_filename(struct strbuf *dest, struct reftable_stack *st, static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz) { int *fdp = (int *)arg; - return write(*fdp, data, sz); + return write_in_full(*fdp, data, sz); +} + +static int reftable_fd_flush(void *arg) +{ + int *fdp = (int *)arg; + + return fsync_component(FSYNC_COMPONENT_REFERENCE, *fdp); } int reftable_new_stack(struct reftable_stack **dest, const char *dir, struct reftable_write_options config) { - struct reftable_stack *p = - reftable_calloc(sizeof(struct reftable_stack)); + struct reftable_stack *p = reftable_calloc(1, sizeof(*p)); struct strbuf list_file_name = STRBUF_INIT; int err = 0; @@ -64,6 +73,7 @@ int reftable_new_stack(struct reftable_stack **dest, const char *dir, strbuf_addstr(&list_file_name, "/tables.list"); p->list_file = strbuf_detach(&list_file_name, NULL); + p->list_fd = -1; p->reftable_dir = xstrdup(dir); p->config = config; @@ -91,8 +101,8 @@ static int fd_read_lines(int fd, char ***namesp) goto done; } - buf = reftable_malloc(size + 1); - if (read(fd, buf, size) != size) { + REFTABLE_ALLOC_ARRAY(buf, size + 1); + if (read_in_full(fd, buf, size) != size) { err = REFTABLE_IO_ERROR; goto done; } @@ -111,7 +121,7 @@ int read_lines(const char *filename, char ***namesp) int err = 0; if (fd < 0) { if (errno == ENOENT) { - *namesp = reftable_calloc(sizeof(char *)); + REFTABLE_CALLOC_ARRAY(*namesp, 1); return 0; } @@ -173,6 +183,12 @@ void reftable_stack_destroy(struct reftable_stack *st) st->readers_len = 0; FREE_AND_NULL(st->readers); } + + if (st->list_fd >= 0) { + close(st->list_fd); + st->list_fd = -1; + } + FREE_AND_NULL(st->list_file); FREE_AND_NULL(st->reftable_dir); reftable_free(st); @@ -182,8 +198,7 @@ void reftable_stack_destroy(struct reftable_stack *st) static struct reftable_reader **stack_copy_readers(struct reftable_stack *st, int cur_len) { - struct reftable_reader **cur = - reftable_calloc(sizeof(struct reftable_reader *) * cur_len); + struct reftable_reader **cur = reftable_calloc(cur_len, sizeof(*cur)); int i = 0; for (i = 0; i < cur_len; i++) { cur[i] = st->readers[i]; @@ -194,17 +209,18 @@ static struct reftable_reader **stack_copy_readers(struct reftable_stack *st, static int reftable_stack_reload_once(struct reftable_stack *st, char **names, int reuse_open) { - int cur_len = !st->merged ? 0 : st->merged->stack_len; + size_t cur_len = !st->merged ? 0 : st->merged->stack_len; struct reftable_reader **cur = stack_copy_readers(st, cur_len); - int err = 0; - int names_len = names_length(names); + size_t names_len = names_length(names); struct reftable_reader **new_readers = - reftable_calloc(sizeof(struct reftable_reader *) * names_len); + reftable_calloc(names_len, sizeof(*new_readers)); struct reftable_table *new_tables = - reftable_calloc(sizeof(struct reftable_table) * names_len); - int new_readers_len = 0; + reftable_calloc(names_len, sizeof(*new_tables)); + size_t new_readers_len = 0; struct reftable_merged_table *new_merged = NULL; - int i; + struct strbuf table_path = STRBUF_INIT; + int err = 0; + size_t i; while (*names) { struct reftable_reader *rd = NULL; @@ -212,24 +228,20 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, /* this is linear; we assume compaction keeps the number of tables under control so this is not quadratic. */ - int j = 0; - for (j = 0; reuse_open && j < cur_len; j++) { - if (cur[j] && 0 == strcmp(cur[j]->name, name)) { - rd = cur[j]; - cur[j] = NULL; + for (i = 0; reuse_open && i < cur_len; i++) { + if (cur[i] && 0 == strcmp(cur[i]->name, name)) { + rd = cur[i]; + cur[i] = NULL; break; } } if (!rd) { struct reftable_block_source src = { NULL }; - struct strbuf table_path = STRBUF_INIT; stack_filename(&table_path, st, name); err = reftable_block_source_from_file(&src, table_path.buf); - strbuf_release(&table_path); - if (err < 0) goto done; @@ -267,16 +279,13 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, for (i = 0; i < cur_len; i++) { if (cur[i]) { const char *name = reader_name(cur[i]); - struct strbuf filename = STRBUF_INIT; - stack_filename(&filename, st, name); + stack_filename(&table_path, st, name); reader_close(cur[i]); reftable_reader_free(cur[i]); /* On Windows, can only unlink after closing. */ - unlink(filename.buf); - - strbuf_release(&filename); + unlink(table_path.buf); } } @@ -288,6 +297,7 @@ done: reftable_free(new_readers); reftable_free(new_tables); reftable_free(cur); + strbuf_release(&table_path); return err; } @@ -306,69 +316,134 @@ static int tv_cmp(struct timeval *a, struct timeval *b) static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, int reuse_open) { - struct timeval deadline = { 0 }; - int err = gettimeofday(&deadline, NULL); + char **names = NULL, **names_after = NULL; + struct timeval deadline; int64_t delay = 0; - int tries = 0; - if (err < 0) - return err; + int tries = 0, err; + int fd = -1; + err = gettimeofday(&deadline, NULL); + if (err < 0) + goto out; deadline.tv_sec += 3; + while (1) { - char **names = NULL; - char **names_after = NULL; - struct timeval now = { 0 }; - int err = gettimeofday(&now, NULL); - int err2 = 0; - if (err < 0) { - return err; - } + struct timeval now; + + err = gettimeofday(&now, NULL); + if (err < 0) + goto out; - /* Only look at deadlines after the first few times. This - simplifies debugging in GDB */ + /* + * Only look at deadlines after the first few times. This + * simplifies debugging in GDB. + */ tries++; - if (tries > 3 && tv_cmp(&now, &deadline) >= 0) { - break; - } + if (tries > 3 && tv_cmp(&now, &deadline) >= 0) + goto out; - err = read_lines(st->list_file, &names); - if (err < 0) { - free_names(names); - return err; - } - err = reftable_stack_reload_once(st, names, reuse_open); - if (err == 0) { - free_names(names); - break; - } - if (err != REFTABLE_NOT_EXIST_ERROR) { - free_names(names); - return err; - } + fd = open(st->list_file, O_RDONLY); + if (fd < 0) { + if (errno != ENOENT) { + err = REFTABLE_IO_ERROR; + goto out; + } - /* err == REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent - writer. Check if there was one by checking if the name list - changed. - */ - err2 = read_lines(st->list_file, &names_after); - if (err2 < 0) { - free_names(names); - return err2; + REFTABLE_CALLOC_ARRAY(names, 1); + } else { + err = fd_read_lines(fd, &names); + if (err < 0) + goto out; } + err = reftable_stack_reload_once(st, names, reuse_open); + if (!err) + break; + if (err != REFTABLE_NOT_EXIST_ERROR) + goto out; + + /* + * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent + * writer. Check if there was one by checking if the name list + * changed. + */ + err = read_lines(st->list_file, &names_after); + if (err < 0) + goto out; if (names_equal(names_after, names)) { - free_names(names); - free_names(names_after); - return err; + err = REFTABLE_NOT_EXIST_ERROR; + goto out; } + free_names(names); + names = NULL; free_names(names_after); + names_after = NULL; + close(fd); + fd = -1; delay = delay + (delay * rand()) / RAND_MAX + 1; sleep_millisec(delay); } - return 0; +out: + /* + * Invalidate the stat cache. It is sufficient to only close the file + * descriptor and keep the cached stat info because we never use the + * latter when the former is negative. + */ + if (st->list_fd >= 0) { + close(st->list_fd); + st->list_fd = -1; + } + + /* + * Cache stat information in case it provides a useful signal to us. + * According to POSIX, "The st_ino and st_dev fields taken together + * uniquely identify the file within the system." That being said, + * Windows is not POSIX compliant and we do not have these fields + * available. So the information we have there is insufficient to + * determine whether two file descriptors point to the same file. + * + * While we could fall back to using other signals like the file's + * mtime, those are not sufficient to avoid races. We thus refrain from + * using the stat cache on such systems and fall back to the secondary + * caching mechanism, which is to check whether contents of the file + * have changed. + * + * On other systems which are POSIX compliant we must keep the file + * descriptor open. This is to avoid a race condition where two + * processes access the reftable stack at the same point in time: + * + * 1. A reads the reftable stack and caches its stat info. + * + * 2. B updates the stack, appending a new table to "tables.list". + * This will both use a new inode and result in a different file + * size, thus invalidating A's cache in theory. + * + * 3. B decides to auto-compact the stack and merges two tables. The + * file size now matches what A has cached again. Furthermore, the + * filesystem may decide to recycle the inode number of the file + * we have replaced in (2) because it is not in use anymore. + * + * 4. A reloads the reftable stack. Neither the inode number nor the + * file size changed. If the timestamps did not change either then + * we think the cached copy of our stack is up-to-date. + * + * By keeping the file descriptor open the inode number cannot be + * recycled, mitigating the race. + */ + if (!err && fd >= 0 && !fstat(fd, &st->list_st) && + st->list_st.st_dev && st->list_st.st_ino) { + st->list_fd = fd; + fd = -1; + } + + if (fd >= 0) + close(fd); + free_names(names); + free_names(names_after); + return err; } /* -1 = error @@ -377,8 +452,44 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, static int stack_uptodate(struct reftable_stack *st) { char **names = NULL; - int err = read_lines(st->list_file, &names); + int err; int i = 0; + + /* + * When we have cached stat information available then we use it to + * verify whether the file has been rewritten. + * + * Note that we explicitly do not want to use `stat_validity_check()` + * and friends here because they may end up not comparing the `st_dev` + * and `st_ino` fields. These functions thus cannot guarantee that we + * indeed still have the same file. + */ + if (st->list_fd >= 0) { + struct stat list_st; + + if (stat(st->list_file, &list_st) < 0) { + /* + * It's fine for "tables.list" to not exist. In that + * case, we have to refresh when the loaded stack has + * any readers. + */ + if (errno == ENOENT) + return !!st->readers_len; + return REFTABLE_IO_ERROR; + } + + /* + * When "tables.list" refers to the same file we can assume + * that it didn't change. This is because we always use + * rename(3P) to update the file and never write to it + * directly. + */ + if (st->list_st.st_dev == list_st.st_dev && + st->list_st.st_ino == list_st.st_ino) + return 0; + } + + err = read_lines(st->list_file, &names); if (err < 0) return err; @@ -418,25 +529,22 @@ int reftable_stack_add(struct reftable_stack *st, { int err = stack_try_add(st, write, arg); if (err < 0) { - if (err == REFTABLE_LOCK_ERROR) { + if (err == REFTABLE_OUTDATED_ERROR) { /* Ignore error return, we want to propagate - REFTABLE_LOCK_ERROR. + REFTABLE_OUTDATED_ERROR. */ reftable_stack_reload(st); } return err; } - if (!st->disable_auto_compact) - return reftable_stack_auto_compact(st); - return 0; } static void format_name(struct strbuf *dest, uint64_t min, uint64_t max) { char buf[100]; - uint32_t rnd = (uint32_t)rand(); + uint32_t rnd = (uint32_t)git_rand(); snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x", min, max, rnd); strbuf_reset(dest); @@ -444,33 +552,27 @@ static void format_name(struct strbuf *dest, uint64_t min, uint64_t max) } struct reftable_addition { - int lock_file_fd; - struct strbuf lock_file_name; + struct tempfile *lock_file; struct reftable_stack *stack; char **new_tables; - int new_tables_len; + size_t new_tables_len, new_tables_cap; uint64_t next_update_index; }; -#define REFTABLE_ADDITION_INIT \ - { \ - .lock_file_name = STRBUF_INIT \ - } +#define REFTABLE_ADDITION_INIT {0} static int reftable_stack_init_addition(struct reftable_addition *add, struct reftable_stack *st) { + struct strbuf lock_file_name = STRBUF_INIT; int err = 0; add->stack = st; - strbuf_reset(&add->lock_file_name); - strbuf_addstr(&add->lock_file_name, st->list_file); - strbuf_addstr(&add->lock_file_name, ".lock"); + strbuf_addf(&lock_file_name, "%s.lock", st->list_file); - add->lock_file_fd = open(add->lock_file_name.buf, - O_EXCL | O_CREAT | O_WRONLY, 0666); - if (add->lock_file_fd < 0) { + add->lock_file = create_tempfile(lock_file_name.buf); + if (!add->lock_file) { if (errno == EEXIST) { err = REFTABLE_LOCK_ERROR; } else { @@ -479,7 +581,7 @@ static int reftable_stack_init_addition(struct reftable_addition *add, goto done; } if (st->config.default_permissions) { - if (chmod(add->lock_file_name.buf, st->config.default_permissions) < 0) { + if (chmod(add->lock_file->filename.buf, st->config.default_permissions) < 0) { err = REFTABLE_IO_ERROR; goto done; } @@ -488,9 +590,8 @@ static int reftable_stack_init_addition(struct reftable_addition *add, err = stack_uptodate(st); if (err < 0) goto done; - - if (err > 1) { - err = REFTABLE_LOCK_ERROR; + if (err > 0) { + err = REFTABLE_OUTDATED_ERROR; goto done; } @@ -499,13 +600,15 @@ done: if (err) { reftable_addition_close(add); } + strbuf_release(&lock_file_name); return err; } static void reftable_addition_close(struct reftable_addition *add) { - int i = 0; struct strbuf nm = STRBUF_INIT; + size_t i; + for (i = 0; i < add->new_tables_len; i++) { stack_filename(&nm, add->stack, add->new_tables[i]); unlink(nm.buf); @@ -515,16 +618,9 @@ static void reftable_addition_close(struct reftable_addition *add) reftable_free(add->new_tables); add->new_tables = NULL; add->new_tables_len = 0; + add->new_tables_cap = 0; - if (add->lock_file_fd > 0) { - close(add->lock_file_fd); - add->lock_file_fd = 0; - } - if (add->lock_file_name.len > 0) { - unlink(add->lock_file_name.buf); - strbuf_release(&add->lock_file_name); - } - + delete_tempfile(&add->lock_file); strbuf_release(&nm); } @@ -540,8 +636,10 @@ void reftable_addition_destroy(struct reftable_addition *add) int reftable_addition_commit(struct reftable_addition *add) { struct strbuf table_list = STRBUF_INIT; - int i = 0; + int lock_file_fd = get_tempfile_fd(add->lock_file); int err = 0; + size_t i; + if (add->new_tables_len == 0) goto done; @@ -554,36 +652,48 @@ int reftable_addition_commit(struct reftable_addition *add) strbuf_addstr(&table_list, "\n"); } - err = write(add->lock_file_fd, table_list.buf, table_list.len); + err = write_in_full(lock_file_fd, table_list.buf, table_list.len); strbuf_release(&table_list); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } - err = close(add->lock_file_fd); - add->lock_file_fd = 0; - if (err < 0) { - err = REFTABLE_IO_ERROR; - goto done; - } + fsync_component_or_die(FSYNC_COMPONENT_REFERENCE, lock_file_fd, + get_tempfile_path(add->lock_file)); - err = rename(add->lock_file_name.buf, add->stack->list_file); + err = rename_tempfile(&add->lock_file, add->stack->list_file); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } /* success, no more state to clean up. */ - strbuf_release(&add->lock_file_name); - for (i = 0; i < add->new_tables_len; i++) { + for (i = 0; i < add->new_tables_len; i++) reftable_free(add->new_tables[i]); - } reftable_free(add->new_tables); add->new_tables = NULL; add->new_tables_len = 0; + add->new_tables_cap = 0; + + err = reftable_stack_reload_maybe_reuse(add->stack, 1); + if (err) + goto done; + + if (!add->stack->config.disable_auto_compact) { + /* + * Auto-compact the stack to keep the number of tables in + * control. It is possible that a concurrent writer is already + * trying to compact parts of the stack, which would lead to a + * `REFTABLE_LOCK_ERROR` because parts of the stack are locked + * already. This is a benign error though, so we ignore it. + */ + err = reftable_stack_auto_compact(add->stack); + if (err < 0 && err != REFTABLE_LOCK_ERROR) + goto done; + err = 0; + } - err = reftable_stack_reload(add->stack); done: reftable_addition_close(add); return err; @@ -594,7 +704,7 @@ int reftable_stack_new_addition(struct reftable_addition **dest, { int err = 0; struct reftable_addition empty = REFTABLE_ADDITION_INIT; - *dest = reftable_calloc(sizeof(**dest)); + REFTABLE_CALLOC_ARRAY(*dest, 1); **dest = empty; err = reftable_stack_init_addition(*dest, st); if (err) { @@ -613,10 +723,6 @@ static int stack_try_add(struct reftable_stack *st, int err = reftable_stack_init_addition(&add, st); if (err < 0) goto done; - if (err > 0) { - err = REFTABLE_LOCK_ERROR; - goto done; - } err = reftable_addition_add(&add, write_table, arg); if (err < 0) @@ -637,8 +743,9 @@ int reftable_addition_add(struct reftable_addition *add, struct strbuf tab_file_name = STRBUF_INIT; struct strbuf next_name = STRBUF_INIT; struct reftable_writer *wr = NULL; + struct tempfile *tab_file = NULL; int err = 0; - int tab_fd = 0; + int tab_fd; strbuf_reset(&next_name); format_name(&next_name, add->next_update_index, add->next_update_index); @@ -646,18 +753,21 @@ int reftable_addition_add(struct reftable_addition *add, stack_filename(&temp_tab_file_name, add->stack, next_name.buf); strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX"); - tab_fd = mkstemp(temp_tab_file_name.buf); - if (tab_fd < 0) { + tab_file = mks_tempfile(temp_tab_file_name.buf); + if (!tab_file) { err = REFTABLE_IO_ERROR; goto done; } if (add->stack->config.default_permissions) { - if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) { + if (chmod(get_tempfile_path(tab_file), + add->stack->config.default_permissions)) { err = REFTABLE_IO_ERROR; goto done; } } - wr = reftable_new_writer(reftable_fd_write, &tab_fd, + tab_fd = get_tempfile_fd(tab_file); + + wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd, &add->stack->config); err = write_table(wr, arg); if (err < 0) @@ -671,14 +781,13 @@ int reftable_addition_add(struct reftable_addition *add, if (err < 0) goto done; - err = close(tab_fd); - tab_fd = 0; + err = close_tempfile_gently(tab_file); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } - err = stack_check_addition(add->stack, temp_tab_file_name.buf); + err = stack_check_addition(add->stack, get_tempfile_path(tab_file)); if (err < 0) goto done; @@ -689,33 +798,23 @@ int reftable_addition_add(struct reftable_addition *add, format_name(&next_name, wr->min_update_index, wr->max_update_index); strbuf_addstr(&next_name, ".ref"); - stack_filename(&tab_file_name, add->stack, next_name.buf); /* On windows, this relies on rand() picking a unique destination name. Maybe we should do retry loop as well? */ - err = rename(temp_tab_file_name.buf, tab_file_name.buf); + err = rename_tempfile(&tab_file, tab_file_name.buf); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } - add->new_tables = reftable_realloc(add->new_tables, - sizeof(*add->new_tables) * - (add->new_tables_len + 1)); - add->new_tables[add->new_tables_len] = strbuf_detach(&next_name, NULL); - add->new_tables_len++; + REFTABLE_ALLOC_GROW(add->new_tables, add->new_tables_len + 1, + add->new_tables_cap); + add->new_tables[add->new_tables_len++] = strbuf_detach(&next_name, NULL); done: - if (tab_fd > 0) { - close(tab_fd); - tab_fd = 0; - } - if (temp_tab_file_name.len > 0) { - unlink(temp_tab_file_name.buf); - } - + delete_tempfile(&tab_file); strbuf_release(&temp_tab_file_name); strbuf_release(&tab_file_name); strbuf_release(&next_name); @@ -732,66 +831,77 @@ uint64_t reftable_stack_next_update_index(struct reftable_stack *st) return 1; } -static int stack_compact_locked(struct reftable_stack *st, int first, int last, - struct strbuf *temp_tab, - struct reftable_log_expiry_config *config) +static int stack_compact_locked(struct reftable_stack *st, + size_t first, size_t last, + struct reftable_log_expiry_config *config, + struct tempfile **tab_file_out) { struct strbuf next_name = STRBUF_INIT; - int tab_fd = -1; + struct strbuf tab_file_path = STRBUF_INIT; struct reftable_writer *wr = NULL; - int err = 0; + struct tempfile *tab_file; + int tab_fd, err = 0; format_name(&next_name, reftable_reader_min_update_index(st->readers[first]), reftable_reader_max_update_index(st->readers[last])); + stack_filename(&tab_file_path, st, next_name.buf); + strbuf_addstr(&tab_file_path, ".temp.XXXXXX"); - stack_filename(temp_tab, st, next_name.buf); - strbuf_addstr(temp_tab, ".temp.XXXXXX"); + tab_file = mks_tempfile(tab_file_path.buf); + if (!tab_file) { + err = REFTABLE_IO_ERROR; + goto done; + } + tab_fd = get_tempfile_fd(tab_file); - tab_fd = mkstemp(temp_tab->buf); - wr = reftable_new_writer(reftable_fd_write, &tab_fd, &st->config); + if (st->config.default_permissions && + chmod(get_tempfile_path(tab_file), st->config.default_permissions) < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, + &tab_fd, &st->config); err = stack_write_compact(st, wr, first, last, config); if (err < 0) goto done; + err = reftable_writer_close(wr); if (err < 0) goto done; - err = close(tab_fd); - tab_fd = 0; + err = close_tempfile_gently(tab_file); + if (err < 0) + goto done; + + *tab_file_out = tab_file; + tab_file = NULL; done: + delete_tempfile(&tab_file); reftable_writer_free(wr); - if (tab_fd > 0) { - close(tab_fd); - tab_fd = 0; - } - if (err != 0 && temp_tab->len > 0) { - unlink(temp_tab->buf); - strbuf_release(temp_tab); - } strbuf_release(&next_name); + strbuf_release(&tab_file_path); return err; } static int stack_write_compact(struct reftable_stack *st, - struct reftable_writer *wr, int first, int last, + struct reftable_writer *wr, + size_t first, size_t last, struct reftable_log_expiry_config *config) { - int subtabs_len = last - first + 1; + size_t subtabs_len = last - first + 1; struct reftable_table *subtabs = reftable_calloc( - sizeof(struct reftable_table) * (last - first + 1)); + last - first + 1, sizeof(*subtabs)); struct reftable_merged_table *mt = NULL; - int err = 0; struct reftable_iterator it = { NULL }; struct reftable_ref_record ref = { NULL }; struct reftable_log_record log = { NULL }; - uint64_t entries = 0; + int err = 0; - int i = 0, j = 0; - for (i = first, j = 0; i <= last; i++) { + for (size_t i = first, j = 0; i <= last; i++) { struct reftable_reader *t = st->readers[i]; reftable_table_from_reader(&subtabs[j++], t); st->stats.bytes += t->size; @@ -816,18 +926,16 @@ static int stack_write_compact(struct reftable_stack *st, err = 0; break; } - if (err < 0) { - break; - } + if (err < 0) + goto done; if (first == 0 && reftable_ref_record_is_deletion(&ref)) { continue; } err = reftable_writer_add_ref(wr, &ref); - if (err < 0) { - break; - } + if (err < 0) + goto done; entries++; } reftable_iterator_destroy(&it); @@ -842,9 +950,8 @@ static int stack_write_compact(struct reftable_stack *st, err = 0; break; } - if (err < 0) { - break; - } + if (err < 0) + goto done; if (first == 0 && reftable_log_record_is_deletion(&log)) { continue; } @@ -860,9 +967,8 @@ static int stack_write_compact(struct reftable_stack *st, } err = reftable_writer_add_log(wr, &log); - if (err < 0) { - break; - } + if (err < 0) + goto done; entries++; } @@ -878,27 +984,28 @@ done: return err; } -/* < 0: error. 0 == OK, > 0 attempt failed; could retry. */ -static int stack_compact_range(struct reftable_stack *st, int first, int last, +/* + * Compact all tables in the range `[first, last)` into a single new table. + * + * This function returns `0` on success or a code `< 0` on failure. When the + * stack or any of the tables in the specified range are already locked then + * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that + * callers can either ignore, or they may choose to retry compaction after some + * amount of time. + */ +static int stack_compact_range(struct reftable_stack *st, + size_t first, size_t last, struct reftable_log_expiry_config *expiry) { - struct strbuf temp_tab_file_name = STRBUF_INIT; + struct strbuf tables_list_buf = STRBUF_INIT; struct strbuf new_table_name = STRBUF_INIT; - struct strbuf lock_file_name = STRBUF_INIT; - struct strbuf ref_list_contents = STRBUF_INIT; struct strbuf new_table_path = STRBUF_INIT; - int err = 0; - int have_lock = 0; - int lock_file_fd = -1; - int compact_count = last - first + 1; - char **listp = NULL; - char **delete_on_success = - reftable_calloc(sizeof(char *) * (compact_count + 1)); - char **subtable_locks = - reftable_calloc(sizeof(char *) * (compact_count + 1)); - int i = 0; - int j = 0; - int is_empty_table = 0; + struct strbuf table_name = STRBUF_INIT; + struct lock_file tables_list_lock = LOCK_INIT; + struct lock_file *table_locks = NULL; + struct tempfile *new_table = NULL; + int is_empty_table = 0, err = 0; + size_t i; if (first > last || (!expiry && first == last)) { err = 0; @@ -907,196 +1014,200 @@ static int stack_compact_range(struct reftable_stack *st, int first, int last, st->stats.attempts++; - strbuf_reset(&lock_file_name); - strbuf_addstr(&lock_file_name, st->list_file); - strbuf_addstr(&lock_file_name, ".lock"); - - lock_file_fd = - open(lock_file_name.buf, O_EXCL | O_CREAT | O_WRONLY, 0666); - if (lock_file_fd < 0) { - if (errno == EEXIST) { - err = 1; - } else { + /* + * Hold the lock so that we can read "tables.list" and lock all tables + * which are part of the user-specified range. + */ + err = hold_lock_file_for_update(&tables_list_lock, st->list_file, + LOCK_NO_DEREF); + if (err < 0) { + if (errno == EEXIST) + err = REFTABLE_LOCK_ERROR; + else err = REFTABLE_IO_ERROR; - } goto done; } - /* Don't want to write to the lock for now. */ - close(lock_file_fd); - lock_file_fd = -1; - have_lock = 1; err = stack_uptodate(st); - if (err != 0) + if (err) goto done; - for (i = first, j = 0; i <= last; i++) { - struct strbuf subtab_file_name = STRBUF_INIT; - struct strbuf subtab_lock = STRBUF_INIT; - int sublock_file_fd = -1; - - stack_filename(&subtab_file_name, st, - reader_name(st->readers[i])); - - strbuf_reset(&subtab_lock); - strbuf_addbuf(&subtab_lock, &subtab_file_name); - strbuf_addstr(&subtab_lock, ".lock"); - - sublock_file_fd = open(subtab_lock.buf, - O_EXCL | O_CREAT | O_WRONLY, 0666); - if (sublock_file_fd >= 0) { - close(sublock_file_fd); - } else if (sublock_file_fd < 0) { - if (errno == EEXIST) { - err = 1; - } else { + /* + * Lock all tables in the user-provided range. This is the slice of our + * stack which we'll compact. + */ + REFTABLE_CALLOC_ARRAY(table_locks, last - first + 1); + for (i = first; i <= last; i++) { + stack_filename(&table_name, st, reader_name(st->readers[i])); + + err = hold_lock_file_for_update(&table_locks[i - first], + table_name.buf, LOCK_NO_DEREF); + if (err < 0) { + if (errno == EEXIST) + err = REFTABLE_LOCK_ERROR; + else err = REFTABLE_IO_ERROR; - } + goto done; } - subtable_locks[j] = subtab_lock.buf; - delete_on_success[j] = subtab_file_name.buf; - j++; - - if (err != 0) + /* + * We need to close the lockfiles as we might otherwise easily + * run into file descriptor exhaustion when we compress a lot + * of tables. + */ + err = close_lock_file_gently(&table_locks[i - first]); + if (err < 0) { + err = REFTABLE_IO_ERROR; goto done; + } } - err = unlink(lock_file_name.buf); - if (err < 0) + /* + * We have locked all tables in our range and can thus release the + * "tables.list" lock while compacting the locked tables. This allows + * concurrent updates to the stack to proceed. + */ + err = rollback_lock_file(&tables_list_lock); + if (err < 0) { + err = REFTABLE_IO_ERROR; goto done; - have_lock = 0; - - err = stack_compact_locked(st, first, last, &temp_tab_file_name, - expiry); - /* Compaction + tombstones can create an empty table out of non-empty - * tables. */ - is_empty_table = (err == REFTABLE_EMPTY_TABLE_ERROR); - if (is_empty_table) { - err = 0; } - if (err < 0) - goto done; - lock_file_fd = - open(lock_file_name.buf, O_EXCL | O_CREAT | O_WRONLY, 0666); - if (lock_file_fd < 0) { - if (errno == EEXIST) { - err = 1; - } else { + /* + * Compact the now-locked tables into a new table. Note that compacting + * these tables may end up with an empty new table in case tombstones + * end up cancelling out all refs in that range. + */ + err = stack_compact_locked(st, first, last, expiry, &new_table); + if (err < 0) { + if (err != REFTABLE_EMPTY_TABLE_ERROR) + goto done; + is_empty_table = 1; + } + + /* + * Now that we have written the new, compacted table we need to re-lock + * "tables.list". We'll then replace the compacted range of tables with + * the new table. + */ + err = hold_lock_file_for_update(&tables_list_lock, st->list_file, + LOCK_NO_DEREF); + if (err < 0) { + if (errno == EEXIST) + err = REFTABLE_LOCK_ERROR; + else err = REFTABLE_IO_ERROR; - } goto done; } - have_lock = 1; + if (st->config.default_permissions) { - if (chmod(lock_file_name.buf, st->config.default_permissions) < 0) { + if (chmod(get_lock_file_path(&tables_list_lock), + st->config.default_permissions) < 0) { err = REFTABLE_IO_ERROR; goto done; } } - format_name(&new_table_name, st->readers[first]->min_update_index, - st->readers[last]->max_update_index); - strbuf_addstr(&new_table_name, ".ref"); - - stack_filename(&new_table_path, st, new_table_name.buf); - + /* + * If the resulting compacted table is not empty, then we need to move + * it into place now. + */ if (!is_empty_table) { - /* retry? */ - err = rename(temp_tab_file_name.buf, new_table_path.buf); + format_name(&new_table_name, st->readers[first]->min_update_index, + st->readers[last]->max_update_index); + strbuf_addstr(&new_table_name, ".ref"); + stack_filename(&new_table_path, st, new_table_name.buf); + + err = rename_tempfile(&new_table, new_table_path.buf); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } } - for (i = 0; i < first; i++) { - strbuf_addstr(&ref_list_contents, st->readers[i]->name); - strbuf_addstr(&ref_list_contents, "\n"); - } - if (!is_empty_table) { - strbuf_addbuf(&ref_list_contents, &new_table_name); - strbuf_addstr(&ref_list_contents, "\n"); - } - for (i = last + 1; i < st->merged->stack_len; i++) { - strbuf_addstr(&ref_list_contents, st->readers[i]->name); - strbuf_addstr(&ref_list_contents, "\n"); - } - - err = write(lock_file_fd, ref_list_contents.buf, ref_list_contents.len); + /* + * Write the new "tables.list" contents with the compacted table we + * have just written. In case the compacted table became empty we + * simply skip writing it. + */ + for (i = 0; i < first; i++) + strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name); + if (!is_empty_table) + strbuf_addf(&tables_list_buf, "%s\n", new_table_name.buf); + for (i = last + 1; i < st->merged->stack_len; i++) + strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name); + + err = write_in_full(get_lock_file_fd(&tables_list_lock), + tables_list_buf.buf, tables_list_buf.len); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); goto done; } - err = close(lock_file_fd); - lock_file_fd = -1; + + err = fsync_component(FSYNC_COMPONENT_REFERENCE, get_lock_file_fd(&tables_list_lock)); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); goto done; } - err = rename(lock_file_name.buf, st->list_file); + err = commit_lock_file(&tables_list_lock); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); goto done; } - have_lock = 0; - /* Reload the stack before deleting. On windows, we can only delete the - files after we closed them. - */ + /* + * Reload the stack before deleting the compacted tables. We can only + * delete the files after we closed them on Windows, so this needs to + * happen first. + */ err = reftable_stack_reload_maybe_reuse(st, first < last); + if (err < 0) + goto done; - listp = delete_on_success; - while (*listp) { - if (strcmp(*listp, new_table_path.buf)) { - unlink(*listp); - } - listp++; + /* + * Delete the old tables. They may still be in use by concurrent + * readers, so it is expected that unlinking tables may fail. + */ + for (i = first; i <= last; i++) { + struct lock_file *table_lock = &table_locks[i - first]; + char *table_path = get_locked_file_path(table_lock); + unlink(table_path); + free(table_path); } done: - free_names(delete_on_success); + rollback_lock_file(&tables_list_lock); + for (i = first; table_locks && i <= last; i++) + rollback_lock_file(&table_locks[i - first]); + reftable_free(table_locks); - listp = subtable_locks; - while (*listp) { - unlink(*listp); - listp++; - } - free_names(subtable_locks); - if (lock_file_fd >= 0) { - close(lock_file_fd); - lock_file_fd = -1; - } - if (have_lock) { - unlink(lock_file_name.buf); - } + delete_tempfile(&new_table); strbuf_release(&new_table_name); strbuf_release(&new_table_path); - strbuf_release(&ref_list_contents); - strbuf_release(&temp_tab_file_name); - strbuf_release(&lock_file_name); + + strbuf_release(&tables_list_buf); + strbuf_release(&table_name); return err; } int reftable_stack_compact_all(struct reftable_stack *st, struct reftable_log_expiry_config *config) { - return stack_compact_range(st, 0, st->merged->stack_len - 1, config); + return stack_compact_range(st, 0, st->merged->stack_len ? + st->merged->stack_len - 1 : 0, config); } -static int stack_compact_range_stats(struct reftable_stack *st, int first, - int last, +static int stack_compact_range_stats(struct reftable_stack *st, + size_t first, size_t last, struct reftable_log_expiry_config *config) { int err = stack_compact_range(st, first, last, config); - if (err > 0) { + if (err == REFTABLE_LOCK_ERROR) st->stats.failures++; - } return err; } @@ -1105,84 +1216,82 @@ static int segment_size(struct segment *s) return s->end - s->start; } -int fastlog2(uint64_t sz) +struct segment suggest_compaction_segment(uint64_t *sizes, size_t n) { - int l = 0; - if (sz == 0) - return 0; - for (; sz; sz /= 2) { - l++; - } - return l - 1; -} - -struct segment *sizes_to_segments(int *seglen, uint64_t *sizes, int n) -{ - struct segment *segs = reftable_calloc(sizeof(struct segment) * n); - int next = 0; - struct segment cur = { 0 }; - int i = 0; + struct segment seg = { 0 }; + uint64_t bytes; + size_t i; - if (n == 0) { - *seglen = 0; - return segs; - } - for (i = 0; i < n; i++) { - int log = fastlog2(sizes[i]); - if (cur.log != log && cur.bytes > 0) { - struct segment fresh = { - .start = i, - }; + /* + * If there are no tables or only a single one then we don't have to + * compact anything. The sequence is geometric by definition already. + */ + if (n <= 1) + return seg; - segs[next++] = cur; - cur = fresh; + /* + * Find the ending table of the compaction segment needed to restore the + * geometric sequence. Note that the segment end is exclusive. + * + * To do so, we iterate backwards starting from the most recent table + * until a valid segment end is found. If the preceding table is smaller + * than the current table multiplied by the geometric factor (2), the + * compaction segment end has been identified. + * + * Tables after the ending point are not added to the byte count because + * they are already valid members of the geometric sequence. Due to the + * properties of a geometric sequence, it is not possible for the sum of + * these tables to exceed the value of the ending point table. + * + * Example table size sequence requiring no compaction: + * 64, 32, 16, 8, 4, 2, 1 + * + * Example table size sequence where compaction segment end is set to + * the last table. Since the segment end is exclusive, the last table is + * excluded during subsequent compaction and the table with size 3 is + * the final table included: + * 64, 32, 16, 8, 4, 3, 1 + */ + for (i = n - 1; i > 0; i--) { + if (sizes[i - 1] < sizes[i] * 2) { + seg.end = i + 1; + bytes = sizes[i]; + break; } - - cur.log = log; - cur.end = i + 1; - cur.bytes += sizes[i]; } - segs[next++] = cur; - *seglen = next; - return segs; -} -struct segment suggest_compaction_segment(uint64_t *sizes, int n) -{ - int seglen = 0; - struct segment *segs = sizes_to_segments(&seglen, sizes, n); - struct segment min_seg = { - .log = 64, - }; - int i = 0; - for (i = 0; i < seglen; i++) { - if (segment_size(&segs[i]) == 1) { - continue; - } - - if (segs[i].log < min_seg.log) { - min_seg = segs[i]; - } - } + /* + * Find the starting table of the compaction segment by iterating + * through the remaining tables and keeping track of the accumulated + * size of all tables seen from the segment end table. The previous + * table is compared to the accumulated size because the tables from the + * segment end are merged backwards recursively. + * + * Note that we keep iterating even after we have found the first + * starting point. This is because there may be tables in the stack + * preceding that first starting point which violate the geometric + * sequence. + * + * Example compaction segment start set to table with size 32: + * 128, 32, 16, 8, 4, 3, 1 + */ + for (; i > 0; i--) { + uint64_t curr = bytes; + bytes += sizes[i - 1]; - while (min_seg.start > 0) { - int prev = min_seg.start - 1; - if (fastlog2(min_seg.bytes) < fastlog2(sizes[prev])) { - break; + if (sizes[i - 1] < curr * 2) { + seg.start = i - 1; + seg.bytes = bytes; } - - min_seg.start = prev; - min_seg.bytes += sizes[prev]; } - reftable_free(segs); - return min_seg; + return seg; } static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st) { uint64_t *sizes = - reftable_calloc(sizeof(uint64_t) * st->merged->stack_len); + reftable_calloc(st->merged->stack_len, sizeof(*sizes)); int version = (st->config.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2; int overhead = header_size(version) - 1; int i = 0; @@ -1281,17 +1390,12 @@ static int stack_check_addition(struct reftable_stack *st, while (1) { struct reftable_ref_record ref = { NULL }; err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) { + if (err > 0) break; - } if (err < 0) goto done; - if (len >= cap) { - cap = 2 * cap + 1; - refs = reftable_realloc(refs, cap * sizeof(refs[0])); - } - + REFTABLE_ALLOC_GROW(refs, len + 1, cap); refs[len++] = ref; } diff --git a/reftable/stack.h b/reftable/stack.h index f570058..d43efa4 100644 --- a/reftable/stack.h +++ b/reftable/stack.h @@ -14,9 +14,11 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-stack.h" struct reftable_stack { + struct stat list_st; char *list_file; + int list_fd; + char *reftable_dir; - int disable_auto_compact; struct reftable_write_options config; @@ -29,13 +31,10 @@ struct reftable_stack { int read_lines(const char *filename, char ***lines); struct segment { - int start, end; - int log; + size_t start, end; uint64_t bytes; }; -int fastlog2(uint64_t sz); -struct segment *sizes_to_segments(int *seglen, uint64_t *sizes, int n); -struct segment suggest_compaction_segment(uint64_t *sizes, int n); +struct segment suggest_compaction_segment(uint64_t *sizes, size_t n); #endif diff --git a/reftable/stack_test.c b/reftable/stack_test.c index d0b7175..1df3ffc 100644 --- a/reftable/stack_test.c +++ b/reftable/stack_test.c @@ -13,7 +13,6 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-reader.h" #include "merged.h" #include "basics.h" -#include "constants.h" #include "record.h" #include "test_framework.h" #include "reftable-tests.h" @@ -39,7 +38,17 @@ static int count_dir_entries(const char *dirname) return 0; while ((d = readdir(dir))) { - if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, ".")) + /* + * Besides skipping over "." and "..", we also need to + * skip over other files that have a leading ".". This + * is due to behaviour of NFS, which will rename files + * to ".nfs*" to emulate delete-on-last-close. + * + * In any case this should be fine as the reftable + * library will never write files with leading dots + * anyway. + */ + if (starts_with(d->d_name, ".")) continue; len++; } @@ -78,7 +87,7 @@ static void test_read_file(void) int i = 0; EXPECT(fd > 0); - n = write(fd, out, strlen(out)); + n = write_in_full(fd, out, strlen(out)); EXPECT(n == strlen(out)); err = close(fd); EXPECT(err >= 0); @@ -233,7 +242,7 @@ static void test_reftable_stack_uptodate(void) EXPECT_ERR(err); err = reftable_stack_add(st2, &write_test_ref, &ref2); - EXPECT(err == REFTABLE_LOCK_ERROR); + EXPECT(err == REFTABLE_OUTDATED_ERROR); err = reftable_stack_reload(st2); EXPECT_ERR(err); @@ -289,6 +298,104 @@ static void test_reftable_stack_transaction_api(void) clear_dir(dir); } +static void test_reftable_stack_transaction_api_performs_auto_compaction(void) +{ + char *dir = get_tmp_dir(__LINE__); + struct reftable_write_options cfg = {0}; + struct reftable_addition *add = NULL; + struct reftable_stack *st = NULL; + int i, n = 20, err; + + err = reftable_new_stack(&st, dir, cfg); + EXPECT_ERR(err); + + for (i = 0; i <= n; i++) { + struct reftable_ref_record ref = { + .update_index = reftable_stack_next_update_index(st), + .value_type = REFTABLE_REF_SYMREF, + .value.symref = "master", + }; + char name[100]; + + snprintf(name, sizeof(name), "branch%04d", i); + ref.refname = name; + + /* + * Disable auto-compaction for all but the last runs. Like this + * we can ensure that we indeed honor this setting and have + * better control over when exactly auto compaction runs. + */ + st->config.disable_auto_compact = i != n; + + err = reftable_stack_new_addition(&add, st); + EXPECT_ERR(err); + + err = reftable_addition_add(add, &write_test_ref, &ref); + EXPECT_ERR(err); + + err = reftable_addition_commit(add); + EXPECT_ERR(err); + + reftable_addition_destroy(add); + + /* + * The stack length should grow continuously for all runs where + * auto compaction is disabled. When enabled, we should merge + * all tables in the stack. + */ + if (i != n) + EXPECT(st->merged->stack_len == i + 1); + else + EXPECT(st->merged->stack_len == 1); + } + + reftable_stack_destroy(st); + clear_dir(dir); +} + +static void test_reftable_stack_auto_compaction_fails_gracefully(void) +{ + struct reftable_ref_record ref = { + .refname = "refs/heads/master", + .update_index = 1, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = {0x01}, + }; + struct reftable_write_options cfg = {0}; + struct reftable_stack *st; + struct strbuf table_path = STRBUF_INIT; + char *dir = get_tmp_dir(__LINE__); + int err; + + err = reftable_new_stack(&st, dir, cfg); + EXPECT_ERR(err); + + err = reftable_stack_add(st, write_test_ref, &ref); + EXPECT_ERR(err); + EXPECT(st->merged->stack_len == 1); + EXPECT(st->stats.attempts == 0); + EXPECT(st->stats.failures == 0); + + /* + * Lock the newly written table such that it cannot be compacted. + * Adding a new table to the stack should not be impacted by this, even + * though auto-compaction will now fail. + */ + strbuf_addf(&table_path, "%s/%s.lock", dir, st->readers[0]->name); + write_file_buf(table_path.buf, "", 0); + + ref.update_index = 2; + err = reftable_stack_add(st, write_test_ref, &ref); + EXPECT_ERR(err); + EXPECT(st->merged->stack_len == 2); + EXPECT(st->stats.attempts == 1); + EXPECT(st->stats.failures == 1); + + reftable_stack_destroy(st); + strbuf_release(&table_path); + clear_dir(dir); +} + static void test_reftable_stack_validate_refname(void) { struct reftable_write_options cfg = { 0 }; @@ -389,18 +496,19 @@ static void test_reftable_stack_add(void) int err = 0; struct reftable_write_options cfg = { .exact_log_message = 1, + .default_permissions = 0660, + .disable_auto_compact = 1, }; struct reftable_stack *st = NULL; char *dir = get_tmp_dir(__LINE__); - struct reftable_ref_record refs[2] = { { NULL } }; struct reftable_log_record logs[2] = { { NULL } }; + struct strbuf path = STRBUF_INIT; + struct stat stat_result; int N = ARRAY_SIZE(refs); - err = reftable_new_stack(&st, dir, cfg); EXPECT_ERR(err); - st->disable_auto_compact = 1; for (i = 0; i < N; i++) { char buf[256]; @@ -408,14 +516,11 @@ static void test_reftable_stack_add(void) refs[i].refname = xstrdup(buf); refs[i].update_index = i + 1; refs[i].value_type = REFTABLE_REF_VAL1; - refs[i].value.val1 = reftable_malloc(GIT_SHA1_RAWSZ); set_test_hash(refs[i].value.val1, i); logs[i].refname = xstrdup(buf); logs[i].update_index = N + i + 1; logs[i].value_type = REFTABLE_LOG_UPDATE; - - logs[i].value.update.new_hash = reftable_malloc(GIT_SHA1_RAWSZ); logs[i].value.update.email = xstrdup("identity@invalid"); set_test_hash(logs[i].value.update.new_hash, i); } @@ -456,12 +561,32 @@ static void test_reftable_stack_add(void) reftable_log_record_release(&dest); } +#ifndef GIT_WINDOWS_NATIVE + strbuf_addstr(&path, dir); + strbuf_addstr(&path, "/tables.list"); + err = stat(path.buf, &stat_result); + EXPECT(!err); + EXPECT((stat_result.st_mode & 0777) == cfg.default_permissions); + + strbuf_reset(&path); + strbuf_addstr(&path, dir); + strbuf_addstr(&path, "/"); + /* do not try at home; not an external API for reftable. */ + strbuf_addstr(&path, st->readers[0]->name); + err = stat(path.buf, &stat_result); + EXPECT(!err); + EXPECT((stat_result.st_mode & 0777) == cfg.default_permissions); +#else + (void) stat_result; +#endif + /* cleanup */ reftable_stack_destroy(st); for (i = 0; i < N; i++) { reftable_ref_record_release(&refs[i]); reftable_log_record_release(&logs[i]); } + strbuf_release(&path); clear_dir(dir); } @@ -473,16 +598,17 @@ static void test_reftable_stack_log_normalize(void) }; struct reftable_stack *st = NULL; char *dir = get_tmp_dir(__LINE__); - - uint8_t h1[GIT_SHA1_RAWSZ] = { 0x01 }, h2[GIT_SHA1_RAWSZ] = { 0x02 }; - - struct reftable_log_record input = { .refname = "branch", - .update_index = 1, - .value_type = REFTABLE_LOG_UPDATE, - .value = { .update = { - .new_hash = h1, - .old_hash = h2, - } } }; + struct reftable_log_record input = { + .refname = "branch", + .update_index = 1, + .value_type = REFTABLE_LOG_UPDATE, + .value = { + .update = { + .new_hash = { 1 }, + .old_hash = { 2 }, + }, + }, + }; struct reftable_log_record dest = { .update_index = 0, }; @@ -545,7 +671,6 @@ static void test_reftable_stack_tombstone(void) refs[i].update_index = i + 1; if (i % 2 == 0) { refs[i].value_type = REFTABLE_REF_VAL1; - refs[i].value.val1 = reftable_malloc(GIT_SHA1_RAWSZ); set_test_hash(refs[i].value.val1, i); } @@ -554,8 +679,6 @@ static void test_reftable_stack_tombstone(void) logs[i].update_index = 42; if (i % 2 == 0) { logs[i].value_type = REFTABLE_LOG_UPDATE; - logs[i].value.update.new_hash = - reftable_malloc(GIT_SHA1_RAWSZ); set_test_hash(logs[i].value.update.new_hash, i); logs[i].value.update.email = xstrdup("identity@invalid"); @@ -647,60 +770,13 @@ static void test_reftable_stack_hash_id(void) clear_dir(dir); } -static void test_log2(void) -{ - EXPECT(1 == fastlog2(3)); - EXPECT(2 == fastlog2(4)); - EXPECT(2 == fastlog2(5)); -} - -static void test_sizes_to_segments(void) -{ - uint64_t sizes[] = { 2, 3, 4, 5, 7, 9 }; - /* .................0 1 2 3 4 5 */ - - int seglen = 0; - struct segment *segs = - sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes)); - EXPECT(segs[2].log == 3); - EXPECT(segs[2].start == 5); - EXPECT(segs[2].end == 6); - - EXPECT(segs[1].log == 2); - EXPECT(segs[1].start == 2); - EXPECT(segs[1].end == 5); - reftable_free(segs); -} - -static void test_sizes_to_segments_empty(void) -{ - int seglen = 0; - struct segment *segs = sizes_to_segments(&seglen, NULL, 0); - EXPECT(seglen == 0); - reftable_free(segs); -} - -static void test_sizes_to_segments_all_equal(void) -{ - uint64_t sizes[] = { 5, 5 }; - - int seglen = 0; - struct segment *segs = - sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes)); - EXPECT(seglen == 1); - EXPECT(segs[0].start == 0); - EXPECT(segs[0].end == 2); - reftable_free(segs); -} - static void test_suggest_compaction_segment(void) { - uint64_t sizes[] = { 128, 64, 17, 16, 9, 9, 9, 16, 16 }; - /* .................0 1 2 3 4 5 6 */ + uint64_t sizes[] = { 512, 64, 17, 16, 9, 9, 9, 16, 2, 16 }; struct segment min = suggest_compaction_segment(sizes, ARRAY_SIZE(sizes)); - EXPECT(min.start == 2); - EXPECT(min.end == 7); + EXPECT(min.start == 1); + EXPECT(min.end == 10); } static void test_suggest_compaction_segment_nothing(void) @@ -738,7 +814,6 @@ static void test_reflog_expire(void) logs[i].update_index = i; logs[i].value_type = REFTABLE_LOG_UPDATE; logs[i].value.update.time = i; - logs[i].value.update.new_hash = reftable_malloc(GIT_SHA1_RAWSZ); logs[i].value.update.email = xstrdup("identity@invalid"); set_test_hash(logs[i].value.update.new_hash, i); } @@ -812,9 +887,21 @@ static void test_empty_add(void) reftable_stack_destroy(st2); } +static int fastlog2(uint64_t sz) +{ + int l = 0; + if (sz == 0) + return 0; + for (; sz; sz /= 2) + l++; + return l - 1; +} + static void test_reftable_stack_auto_compaction(void) { - struct reftable_write_options cfg = { 0 }; + struct reftable_write_options cfg = { + .disable_auto_compact = 1, + }; struct reftable_stack *st = NULL; char *dir = get_tmp_dir(__LINE__); @@ -824,7 +911,6 @@ static void test_reftable_stack_auto_compaction(void) err = reftable_new_stack(&st, dir, cfg); EXPECT_ERR(err); - st->disable_auto_compact = 1; /* call manually below for coverage. */ for (i = 0; i < N; i++) { char name[100]; struct reftable_ref_record ref = { @@ -850,6 +936,54 @@ static void test_reftable_stack_auto_compaction(void) clear_dir(dir); } +static void test_reftable_stack_add_performs_auto_compaction(void) +{ + struct reftable_write_options cfg = { 0 }; + struct reftable_stack *st = NULL; + struct strbuf refname = STRBUF_INIT; + char *dir = get_tmp_dir(__LINE__); + int err, i, n = 20; + + err = reftable_new_stack(&st, dir, cfg); + EXPECT_ERR(err); + + for (i = 0; i <= n; i++) { + struct reftable_ref_record ref = { + .update_index = reftable_stack_next_update_index(st), + .value_type = REFTABLE_REF_SYMREF, + .value.symref = "master", + }; + + /* + * Disable auto-compaction for all but the last runs. Like this + * we can ensure that we indeed honor this setting and have + * better control over when exactly auto compaction runs. + */ + st->config.disable_auto_compact = i != n; + + strbuf_reset(&refname); + strbuf_addf(&refname, "branch-%04d", i); + ref.refname = refname.buf; + + err = reftable_stack_add(st, &write_test_ref, &ref); + EXPECT_ERR(err); + + /* + * The stack length should grow continuously for all runs where + * auto compaction is disabled. When enabled, we should merge + * all tables in the stack. + */ + if (i != n) + EXPECT(st->merged->stack_len == i + 1); + else + EXPECT(st->merged->stack_len == 1); + } + + reftable_stack_destroy(st); + strbuf_release(&refname); + clear_dir(dir); +} + static void test_reftable_stack_compaction_concurrent(void) { struct reftable_write_options cfg = { 0 }; @@ -952,7 +1086,6 @@ static void test_reftable_stack_compaction_concurrent_clean(void) int stack_test_main(int argc, const char *argv[]) { RUN_TEST(test_empty_add); - RUN_TEST(test_log2); RUN_TEST(test_names_equal); RUN_TEST(test_parse_names); RUN_TEST(test_read_file); @@ -960,6 +1093,7 @@ int stack_test_main(int argc, const char *argv[]) RUN_TEST(test_reftable_stack_add); RUN_TEST(test_reftable_stack_add_one); RUN_TEST(test_reftable_stack_auto_compaction); + RUN_TEST(test_reftable_stack_add_performs_auto_compaction); RUN_TEST(test_reftable_stack_compaction_concurrent); RUN_TEST(test_reftable_stack_compaction_concurrent_clean); RUN_TEST(test_reftable_stack_hash_id); @@ -967,12 +1101,11 @@ int stack_test_main(int argc, const char *argv[]) RUN_TEST(test_reftable_stack_log_normalize); RUN_TEST(test_reftable_stack_tombstone); RUN_TEST(test_reftable_stack_transaction_api); + RUN_TEST(test_reftable_stack_transaction_api_performs_auto_compaction); + RUN_TEST(test_reftable_stack_auto_compaction_fails_gracefully); RUN_TEST(test_reftable_stack_update_index_check); RUN_TEST(test_reftable_stack_uptodate); RUN_TEST(test_reftable_stack_validate_refname); - RUN_TEST(test_sizes_to_segments); - RUN_TEST(test_sizes_to_segments_all_equal); - RUN_TEST(test_sizes_to_segments_empty); RUN_TEST(test_suggest_compaction_segment); RUN_TEST(test_suggest_compaction_segment_nothing); return 0; diff --git a/reftable/system.h b/reftable/system.h index 6b74a81..5d8b6de 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -12,7 +12,9 @@ https://developers.google.com/open-source/licenses/bsd /* This header glues the reftable library to the rest of Git */ #include "git-compat-util.h" +#include "lockfile.h" #include "strbuf.h" +#include "tempfile.h" #include "hash-ll.h" /* hash ID, sizes.*/ #include "dir.h" /* remove_dir_recursively, for tests.*/ diff --git a/reftable/test_framework.c b/reftable/test_framework.c index 84ac972..4066924 100644 --- a/reftable/test_framework.c +++ b/reftable/test_framework.c @@ -9,7 +9,6 @@ https://developers.google.com/open-source/licenses/bsd #include "system.h" #include "test_framework.h" -#include "basics.h" void set_test_hash(uint8_t *p, int i) { @@ -21,3 +20,8 @@ ssize_t strbuf_add_void(void *b, const void *data, size_t sz) strbuf_add(b, data, sz); return sz; } + +int noop_flush(void *arg) +{ + return 0; +} diff --git a/reftable/test_framework.h b/reftable/test_framework.h index 774cb27..687390f 100644 --- a/reftable/test_framework.h +++ b/reftable/test_framework.h @@ -12,32 +12,38 @@ https://developers.google.com/open-source/licenses/bsd #include "system.h" #include "reftable-error.h" -#define EXPECT_ERR(c) \ - if (c != 0) { \ - fflush(stderr); \ - fflush(stdout); \ - fprintf(stderr, "%s: %d: error == %d (%s), want 0\n", \ - __FILE__, __LINE__, c, reftable_error_str(c)); \ - abort(); \ - } - -#define EXPECT_STREQ(a, b) \ - if (strcmp(a, b)) { \ - fflush(stderr); \ - fflush(stdout); \ - fprintf(stderr, "%s:%d: %s (%s) != %s (%s)\n", __FILE__, \ - __LINE__, #a, a, #b, b); \ - abort(); \ - } - -#define EXPECT(c) \ - if (!(c)) { \ - fflush(stderr); \ - fflush(stdout); \ - fprintf(stderr, "%s: %d: failed assertion %s\n", __FILE__, \ - __LINE__, #c); \ - abort(); \ - } +#define EXPECT_ERR(c) \ + do { \ + if (c != 0) { \ + fflush(stderr); \ + fflush(stdout); \ + fprintf(stderr, "%s: %d: error == %d (%s), want 0\n", \ + __FILE__, __LINE__, c, reftable_error_str(c)); \ + abort(); \ + } \ + } while (0) + +#define EXPECT_STREQ(a, b) \ + do { \ + if (strcmp(a, b)) { \ + fflush(stderr); \ + fflush(stdout); \ + fprintf(stderr, "%s:%d: %s (%s) != %s (%s)\n", __FILE__, \ + __LINE__, #a, a, #b, b); \ + abort(); \ + } \ + } while (0) + +#define EXPECT(c) \ + do { \ + if (!(c)) { \ + fflush(stderr); \ + fflush(stdout); \ + fprintf(stderr, "%s: %d: failed assertion %s\n", __FILE__, \ + __LINE__, #c); \ + abort(); \ + } \ + } while (0) #define RUN_TEST(f) \ fprintf(stderr, "running %s\n", #f); \ @@ -50,4 +56,6 @@ void set_test_hash(uint8_t *p, int i); */ ssize_t strbuf_add_void(void *b, const void *data, size_t sz); +int noop_flush(void *); + #endif diff --git a/reftable/tree.c b/reftable/tree.c index a5bf880..528f33a 100644 --- a/reftable/tree.c +++ b/reftable/tree.c @@ -20,8 +20,8 @@ struct tree_node *tree_search(void *key, struct tree_node **rootp, if (!insert) { return NULL; } else { - struct tree_node *n = - reftable_calloc(sizeof(struct tree_node)); + struct tree_node *n; + REFTABLE_CALLOC_ARRAY(n, 1); n->key = key; *rootp = n; return *rootp; diff --git a/reftable/tree_test.c b/reftable/tree_test.c index ac3a045..6961a65 100644 --- a/reftable/tree_test.c +++ b/reftable/tree_test.c @@ -9,8 +9,6 @@ https://developers.google.com/open-source/licenses/bsd #include "system.h" #include "tree.h" -#include "basics.h" -#include "record.h" #include "test_framework.h" #include "reftable-tests.h" diff --git a/reftable/writer.c b/reftable/writer.c index 2e322a5..1d9ff0f 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -49,7 +49,7 @@ static int padded_write(struct reftable_writer *w, uint8_t *data, size_t len, { int n = 0; if (w->pending_padding > 0) { - uint8_t *zeroed = reftable_calloc(w->pending_padding); + uint8_t *zeroed = reftable_calloc(w->pending_padding, sizeof(*zeroed)); int n = w->write(w->write_arg, zeroed, w->pending_padding); if (n < 0) return n; @@ -121,10 +121,10 @@ static struct strbuf reftable_empty_strbuf = STRBUF_INIT; struct reftable_writer * reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t), + int (*flush_func)(void *), void *writer_arg, struct reftable_write_options *opts) { - struct reftable_writer *wp = - reftable_calloc(sizeof(struct reftable_writer)); + struct reftable_writer *wp = reftable_calloc(1, sizeof(*wp)); strbuf_init(&wp->block_writer_data.last_key, 0); options_set_defaults(opts); if (opts->block_size >= (1 << 24)) { @@ -132,10 +132,11 @@ reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t), abort(); } wp->last_key = reftable_empty_strbuf; - wp->block = reftable_calloc(opts->block_size); + REFTABLE_CALLOC_ARRAY(wp->block, opts->block_size); wp->write = writer_func; wp->write_arg = writer_arg; wp->opts = *opts; + wp->flush = flush_func; writer_reinit_block_writer(wp, BLOCK_TYPE_REF); return wp; @@ -200,12 +201,7 @@ static void writer_index_hash(struct reftable_writer *w, struct strbuf *hash) return; } - if (key->offset_len == key->offset_cap) { - key->offset_cap = 2 * key->offset_cap + 1; - key->offsets = reftable_realloc( - key->offsets, sizeof(uint64_t) * key->offset_cap); - } - + REFTABLE_ALLOC_GROW(key->offsets, key->offset_len + 1, key->offset_cap); key->offsets[key->offset_len++] = off; } @@ -377,20 +373,39 @@ int reftable_writer_add_logs(struct reftable_writer *w, static int writer_finish_section(struct reftable_writer *w) { + struct reftable_block_stats *bstats = NULL; uint8_t typ = block_writer_type(w->block_writer); uint64_t index_start = 0; int max_level = 0; - int threshold = w->opts.unpadded ? 1 : 3; + size_t threshold = w->opts.unpadded ? 1 : 3; int before_blocks = w->stats.idx_stats.blocks; - int err = writer_flush_block(w); - int i = 0; - struct reftable_block_stats *bstats = NULL; + int err; + + err = writer_flush_block(w); if (err < 0) return err; + /* + * When the section we are about to index has a lot of blocks then the + * index itself may span across multiple blocks, as well. This would + * require a linear scan over index blocks only to find the desired + * indexed block, which is inefficient. Instead, we write a multi-level + * index where index records of level N+1 will refer to index blocks of + * level N. This isn't constant time, either, but at least logarithmic. + * + * This loop handles writing this multi-level index. Note that we write + * the lowest-level index pointing to the indexed blocks first. We then + * continue writing additional index levels until the current level has + * less blocks than the threshold so that the highest level will be at + * the end of the index section. + * + * Readers are thus required to start reading the index section from + * its end, which is why we set `index_start` to the beginning of the + * last index section. + */ while (w->index_len > threshold) { struct reftable_index_record *idx = NULL; - int idx_len = 0; + size_t i, idx_len; max_level++; index_start = w->next; @@ -409,35 +424,28 @@ static int writer_finish_section(struct reftable_writer *w) .idx = idx[i], }, }; - if (block_writer_add(w->block_writer, &rec) == 0) { - continue; - } - err = writer_flush_block(w); + err = writer_add_record(w, &rec); if (err < 0) return err; + } - writer_reinit_block_writer(w, BLOCK_TYPE_INDEX); + err = writer_flush_block(w); + if (err < 0) + return err; - err = block_writer_add(w->block_writer, &rec); - if (err != 0) { - /* write into fresh block should always succeed - */ - abort(); - } - } - for (i = 0; i < idx_len; i++) { + for (i = 0; i < idx_len; i++) strbuf_release(&idx[i].last_key); - } reftable_free(idx); } + /* + * The index may still contain a number of index blocks lower than the + * threshold. Clear it so that these entries don't leak into the next + * index section. + */ writer_clear_index(w); - err = writer_flush_block(w); - if (err < 0) - return err; - bstats = writer_reftable_block_stats(w, typ); bstats->index_blocks = w->stats.idx_stats.blocks - before_blocks; bstats->index_offset = index_start; @@ -603,6 +611,12 @@ int reftable_writer_close(struct reftable_writer *w) put_be32(p, crc32(0, footer, p - footer)); p += 4; + err = w->flush(w->write_arg); + if (err < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + err = padded_write(w, footer, footer_size(writer_version(w)), 0); if (err < 0) goto done; @@ -622,11 +636,8 @@ done: static void writer_clear_index(struct reftable_writer *w) { - int i = 0; - for (i = 0; i < w->index_len; i++) { + for (size_t i = 0; i < w->index_len; i++) strbuf_release(&w->index[i].last_key); - } - FREE_AND_NULL(w->index); w->index_len = 0; w->index_cap = 0; @@ -674,12 +685,7 @@ static int writer_flush_nonempty_block(struct reftable_writer *w) if (err < 0) return err; - if (w->index_cap == w->index_len) { - w->index_cap = 2 * w->index_cap + 1; - w->index = reftable_realloc( - w->index, - sizeof(struct reftable_index_record) * w->index_cap); - } + REFTABLE_ALLOC_GROW(w->index, w->index_len + 1, w->index_cap); ir.offset = w->next; strbuf_reset(&ir.last_key); diff --git a/reftable/writer.h b/reftable/writer.h index 09b8867..8d0df9c 100644 --- a/reftable/writer.h +++ b/reftable/writer.h @@ -16,6 +16,7 @@ https://developers.google.com/open-source/licenses/bsd struct reftable_writer { ssize_t (*write)(void *, const void *, size_t); + int (*flush)(void *); void *write_arg; int pending_padding; struct strbuf last_key; |