diff options
Diffstat (limited to 'vendor/gix-pack')
69 files changed, 11090 insertions, 0 deletions
diff --git a/vendor/gix-pack/.cargo-checksum.json b/vendor/gix-pack/.cargo-checksum.json new file mode 100644 index 000000000..a674b0cd1 --- /dev/null +++ b/vendor/gix-pack/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"f02c5514c7219fed9f62a7e5a26802a5a89dea94f39b4d09a6f7a7c4422b164c","Cargo.toml":"0fb94c37339c369db2b8ab81d9866669270710ff4a2761bd5834221be9133c73","src/bundle/find.rs":"6d8373ae8863e14bcbbcbf2005e43e151cee8e25336a9f79dddd6bdbb85c7b27","src/bundle/init.rs":"6067e968404f52dcd38ed5c6c0114f880e9ff7bd0f883a07b7546e22678aabdc","src/bundle/mod.rs":"e4ee9d88ba45da113f0d9081535a3218bb3a3ebdb51ca7af52fe92fd33021b00","src/bundle/write/error.rs":"f978b1622ac1354f44a7160d2439d5ac697dd92b67631d8c846ae2621be489ae","src/bundle/write/mod.rs":"29330fa5aa340a90f8073aa965319f7186bc96773762a10cd6b754f0a40f2d41","src/bundle/write/types.rs":"99f603747077a6bb0c4f4233ee6ccac34a77f84ac5e6da03decc436ddbee2d4b","src/cache/delta/from_offsets.rs":"721b2f9dc805f073b5a96a8dcf1f27ebc5c2795fe53476eb45116d464bc309b3","src/cache/delta/mod.rs":"d2d67cc96fb8e0fe48bc6eabdc2846c448f8bb297ad0512e0dfc40d54205b3cb","src/cache/delta/traverse/mod.rs":"d3efb10012c2cb73cb570f66b25f69d241cdd989ec3acdebc7d1dc2708331a15","src/cache/delta/traverse/resolve.rs":"dc67edb43d830c32cabad943cc93890fbfbc9ac498ba54fd584a8fc27d0bb973","src/cache/delta/traverse/util.rs":"da1bed028290df6b2730e097404a5590466f59bc4f7de3b1ab24d90e2c6f6692","src/cache/lru.rs":"b4acd8a8757c4d7b23ed00efe2c26cfdd100a54274f2da5ef2e1ffe21aab2194","src/cache/mod.rs":"1fd545e5944ec378f5a89825840fc869684ca571ea7859213ea38d674e991fb1","src/cache/object.rs":"a643dd8418a08c185a2d0c42e18ddaa3809f233b3846c96377e7c92c85492cf7","src/data/delta.rs":"915c10207f5897d473cc806ae31de98e90f41e5e99361e18cb1e42177137c024","src/data/entry/decode.rs":"629abef1cd8352bb57c34ff1b6282e6387a12ec67372b823e0d0dda4caafd901","src/data/entry/header.rs":"4f85658c8a11e0f6e6bfeee07fd8affd6a05b819a3f84905a7f1c26c7c8de8b6","src/data/entry/mod.rs":"6586af01ffdfe195c4d144f6a7d2baf4762ad7507fa6328fadfc43ececb617e4","src/data/file/decode/entry.rs":"6c35424b70f11f87aa7160ba751a850dbd0e287990f9054b2d47f46906317bec","src/data/file/decode/header.rs":"82b7b9481713825df5d5d68fb7e6b9d561adec18ccfb31acb390042d56197b5f","src/data/file/decode/mod.rs":"bed7398ba3c6a48d2c7af96a2044ee56ba3d9e757995e06493517db27c743712","src/data/file/init.rs":"16a594f4245be493e3ca61fabe730d796e1d2235de7c0b38609e394df9737c86","src/data/file/mod.rs":"4b6a8a99a4864365592733836d654cc30ab717bf8a69569d02fac3ef0b88fac2","src/data/file/verify.rs":"20aea340799f68087aa196be574fe48cff25047cd1dfdaa99b1fb2f60f4319d9","src/data/header.rs":"cc86b9b6d45c7764217bcea615e5640fbbb4af0d17d25cc24073d48d2b9fd206","src/data/input/bytes_to_entries.rs":"a52ebe1de8ea67cacd610d6999d165185b7c2b1c916126ffe7de5df402927a92","src/data/input/entries_to_bytes.rs":"01f23c0cf5822c1f3f5888d287a0f03f1e67dc519d83061ccbca9c5a38adfff0","src/data/input/entry.rs":"9e9d9f2a696e084e71a7a50d85b85003647e4a761f281b3f0623333454d5352a","src/data/input/lookup_ref_delta_objects.rs":"ad122bdfdb07fee85695b2d0909606720917c3ed0ea7f1d1a81de4cab2ce2eac","src/data/input/mod.rs":"842752c2132e82801692327be2172fe14a68c002dbf788fbec3def2c73cb6aa9","src/data/input/types.rs":"25cc12501e0bbff3ea64315bc3421083d8bedb4139be02bc44939983e0f292c8","src/data/mod.rs":"6f50a2abf8b6b691c2e56e799c8ca97139ddf6da205f58a93248ec370cf18957","src/data/output/bytes.rs":"85ae6a3e44c569ba842a770a65ce549dbf772d29fa55368f263c0cae3447443e","src/data/output/count/mod.rs":"9e3df0813735b47112c46ac79ef31aaad877b2d44f2924f21fe8a42ac2e9b18c","src/data/output/count/objects/mod.rs":"b04d5360c9c37643b17199b2cb89faa5d576d7cabe9d8ea9ab2c5f0ad217aead","src/data/output/count/objects/reduce.rs":"22371344975483bfd8b3a3dec67cd290a1cb526967b0c52032f817bcdba96014","src/data/output/count/objects/tree.rs":"7d6bfbe55d32c3c13fe1c9004e0671a8fc7379d73985ef69da0d1d2d153315e3","src/data/output/count/objects/types.rs":"325ca2d5faeb4e61c7ef872cb1e1b3976905290bcd7567dc5b09d2c7c45a1c1e","src/data/output/count/objects/util.rs":"877620e8506052976b43e0145eed3b81284a79e93d0efad9f0e7889b186107b3","src/data/output/entry/iter_from_counts.rs":"37dba5d9c1c2a30d705ec22f5dd7560c85719145a29d3cf805eb7779e6fe0303","src/data/output/entry/mod.rs":"0f9e46ddb022063a73eafbe150f05e3d59ad883bfc24f03f81564bec5d12ed0f","src/data/output/mod.rs":"8defcea274f20b50050a4d783f9fe26e6b9bd77831186c7a1c1a71a56061483c","src/find.rs":"b8b1b0420028dd1c08b024151abf7f3435e61ba8b49f6ca7108d926732f35447","src/find_traits.rs":"04cf9445ff46a29cb4f9c91db846cf4288b341671d60f3362bdd0e4f36c87a01","src/index/access.rs":"d12b5bc4fc045be854a8caf3973fbb9b18056af473a4cbfb16c7becb4f8c294f","src/index/init.rs":"d25b0865859f505fba4b02437faad6f02b264585906c9cdc1743b64f7f238148","src/index/mod.rs":"ac552ac4fcac76a21a1c3cc5a0cf82d0f266a44d6fb188b0fde60ea8024d71dd","src/index/traverse/error.rs":"d520a384e3737ac973a9d84cf6dbd7ebda6f459d26560e6e40555eacede8c7f6","src/index/traverse/mod.rs":"48bc2fe54983c0a28ddb9a4ef36fd1fe643b57a139e1b431df26aff32f0d17a9","src/index/traverse/reduce.rs":"0f3614232888f66c5ad13552875ced211c79dad656093e080b16bfc25ff5d7b1","src/index/traverse/types.rs":"c9a0a11ab8ff53fc208c344e13bdb46dfb3e3d5f5f1172a34fc79867364a3080","src/index/traverse/with_index.rs":"5bd064a735bffca6a20708998a78d9e75ea75947d6bc6b8c318d00854eb80ece","src/index/traverse/with_lookup.rs":"013aca8001fa4e019bf071c5b7ce2e1b2c274bd055f7a1705d367e72fa68b39c","src/index/util.rs":"546454f39d469b2b1cca384822e3004a48b9c6a91b899cce83b5759026911419","src/index/verify.rs":"414bd4e738e1c3daa0c38fe0acf9d8add660d3d181f9c6cbcbd8f84cd1f16fb0","src/index/write/encode.rs":"d92c85fd80ff380470f280720ddfe19422671ad24d6444c9f3a4864608efcd91","src/index/write/error.rs":"5294efe0508252d5942183fa5ab5139dc10e33ccfb28698a6101fc360990d688","src/index/write/mod.rs":"b57af225f33f12d1851deefe6f6ad5d1d34fc3ed63239d39e394d41559d081fb","src/lib.rs":"7b72df6596c2501d8bb9c0bde09620e6b497ce4561191c59eae0d4229356d97b","src/multi_index/access.rs":"bd79cba6d2f5ce00091fe77e65f451296309ae2188ecb8cbae9460e20228dc8f","src/multi_index/chunk.rs":"c3e2e08f485db6043c8ae74a1d3daab6f619ba17cdc0af32c5d010ec433f97d2","src/multi_index/init.rs":"290daf86cfe21127a27c5bea49d3b1ef4812bde968ff30b36e4cef278bc513c9","src/multi_index/mod.rs":"d8e12eaa8a27707031d216f4c12f70a081f20bae1697c12dac9c286de9890e01","src/multi_index/verify.rs":"b165566234f53abde696b741843000f55a5699c90d38e03173fa6f58279c4b3f","src/multi_index/write.rs":"caa956a4a5c504226492367facb0acd5f5ba410f9e4f7e86c7b668a5605998f7","src/verify.rs":"5e5d9decdbfb46963b5589dd49d76079e28a8aa6575d20d078492a7f2d50bad9"},"package":"e51db84e1459a8022e518d40a8778028d793dbb28e4d35c9a5eaf92658fb0775"}
\ No newline at end of file diff --git a/vendor/gix-pack/CHANGELOG.md b/vendor/gix-pack/CHANGELOG.md new file mode 100644 index 000000000..bfd408dc5 --- /dev/null +++ b/vendor/gix-pack/CHANGELOG.md @@ -0,0 +1,1581 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## 0.32.0 (2023-03-04) + +### Bug Fixes + + - <csr-id-ce182d6dae7e277d104893f0eec3285709946233/> don't over-estimate the number of objects to be received in the indexing phase. + Previously, it would work with a number that is pessimistically duplicated as each object + could in theory be a ref-delta, which then causes another base to be added to the stream, + duplicating the amount of objects we need to account for in the acceleration data structure, + which unfortunatelly cannot actually grow dynamically without violating invariants. + + Now we simply don't try to estimata the number of ref-deltas (which don't exist here anymore) + at this stage. + +### Commit Statistics + +<csr-read-only-do-not-edit/> + + - 2 commits contributed to the release. + - 3 days passed between releases. + - 1 commit was understood as [conventional](https://www.conventionalcommits.org). + - 0 issues like '(#ID)' were seen in commit messages + +### Commit Details + +<csr-read-only-do-not-edit/> + +<details><summary>view details</summary> + + * **Uncategorized** + - Prepare changelogs prior to release of `gix-pack` ([`6db30ef`](https://github.com/Byron/gitoxide/commit/6db30ef6b5e931bbf12135507a3d922051de4d4b)) + - Don't over-estimate the number of objects to be received in the indexing phase. ([`ce182d6`](https://github.com/Byron/gitoxide/commit/ce182d6dae7e277d104893f0eec3285709946233)) +</details> + +## 0.31.0 (2023-03-01) + +<csr-id-3ba25202240d13fdda998581297616afe06422ca/> + +### Chore + + - <csr-id-3ba25202240d13fdda998581297616afe06422ca/> remove `dashmap` in favor of own sharded concurrent hashmap. + This speeds up multi-threaded counting greatly, and despite it + using shared memory which makes it quite wasteful, it is possible to + outperform `git` with it if enough cores are thrown at the problem. + + Single-threaded performance is still lacking though, ultimately it needs + caches to accelerate the counting stage to hopefully be competitive. + +### New Features + + - <csr-id-f0e40ecddaf1211f76ed60ef30cf03dcfd53a7ab/> add `wasm` feature toggle to allow compilation to wasm32-unknown-unknown + - <csr-id-3ba25202240d13fdda998581297616afe06422ca/> remove `dashmap` in favor of own sharded concurrent hashmap. + This speeds up multi-threaded counting greatly, and despite it + using shared memory which makes it quite wasteful, it is possible to + outperform `git` with it if enough cores are thrown at the problem. + + Single-threaded performance is still lacking though, ultimately it needs + caches to accelerate the counting stage to hopefully be competitive. + +### Bug Fixes + + - <csr-id-e14dc7d475373d2c266e84ff8f1826c68a34ab92/> note that crates have been renamed from `git-*` to `gix-*`. + This also means that the `git-*` prefixed crates of the `gitoxide` project + are effectively unmaintained. + Use the crates with the `gix-*` prefix instead. + + If you were using `git-repository`, then `gix` is its substitute. + +### New Features (BREAKING) + + - <csr-id-6c4c196c9bc6c2171dc4dc58b69bd5ef53226e29/> add `wasm` feature toggle to let parts of `git-pack` build on wasm32. + It's a breaking change because we also start using the `dep:` syntax for declaring + references to optional dependencies, which will prevent them from being automatically + available as features. + + Besides that, it adds the `wasm` feature toggle to allow compiling to `wasm32` targets. + +### Commit Statistics + +<csr-read-only-do-not-edit/> + + - 11 commits contributed to the release over the course of 2 calendar days. + - 8 days passed between releases. + - 1 commit was understood as [conventional](https://www.conventionalcommits.org). + - 0 issues like '(#ID)' were seen in commit messages + +### Commit Details + +<csr-read-only-do-not-edit/> + +<details><summary>view details</summary> + + * **Uncategorized** + - Release gix-tempfile v4.1.0, gix-lock v4.0.0, gix-ref v0.25.0, gix-config v0.17.0, gix-url v0.14.0, gix-credentials v0.10.0, gix-diff v0.27.0, gix-discover v0.14.0, gix-hashtable v0.1.2, gix-bitmap v0.2.2, gix-traverse v0.23.0, gix-index v0.13.0, gix-mailmap v0.10.0, gix-pack v0.31.0, gix-odb v0.41.0, gix-transport v0.26.0, gix-protocol v0.27.0, gix-revision v0.11.0, gix-refspec v0.8.0, gix-worktree v0.13.0, gix v0.38.0, safety bump 6 crates ([`ea9fd1d`](https://github.com/Byron/gitoxide/commit/ea9fd1d9b60e1e9e17042e9e37c06525823c40a5)) + - Release gix-features v0.27.0, gix-actor v0.18.0, gix-quote v0.4.3, gix-attributes v0.9.0, gix-object v0.27.0, gix-ref v0.25.0, gix-config v0.17.0, gix-url v0.14.0, gix-credentials v0.10.0, gix-diff v0.27.0, gix-discover v0.14.0, gix-hashtable v0.1.2, gix-bitmap v0.2.2, gix-traverse v0.23.0, gix-index v0.13.0, gix-mailmap v0.10.0, gix-pack v0.31.0, gix-odb v0.41.0, gix-transport v0.26.0, gix-protocol v0.27.0, gix-revision v0.11.0, gix-refspec v0.8.0, gix-worktree v0.13.0, gix v0.38.0 ([`e6cc618`](https://github.com/Byron/gitoxide/commit/e6cc6184a7a49dbc2503c1c1bdd3688ca5cec5fe)) + - Remove versions from dev-dependencies to workspace crates. ([`3cfbf89`](https://github.com/Byron/gitoxide/commit/3cfbf89f8630dfc71c9085eee6ca286a5c96ad84)) + - Adjust manifests prior to release ([`addd789`](https://github.com/Byron/gitoxide/commit/addd78958fdd1e54eb702854e96079539d01965a)) + - Prepare changelogs prior to release ([`94c99c7`](https://github.com/Byron/gitoxide/commit/94c99c71520f33269cc8dbc26f82a74747cc7e16)) + - Merge branch 'adjustments-for-cargo' ([`d686d94`](https://github.com/Byron/gitoxide/commit/d686d94e1030a8591ba074757d56927a346c8351)) + - Remove `dashmap` in favor of own sharded concurrent hashmap. ([`3ba2520`](https://github.com/Byron/gitoxide/commit/3ba25202240d13fdda998581297616afe06422ca)) + - Adapt to changes in `gix-tempfile` ([`bfcd1e3`](https://github.com/Byron/gitoxide/commit/bfcd1e3a8f2d05d0d6d1f5cf06d369ac9e6e4cdf)) + - Adapt to changes in `gix-features` ([`a0ed614`](https://github.com/Byron/gitoxide/commit/a0ed6142c9a7ce7ed9fb6576117bb46e5497839c)) + - Adjust to changes in `gix-features`, use `process::count_with_decimals()` ([`3968133`](https://github.com/Byron/gitoxide/commit/3968133f60609d75806783234253fb8b3972f10e)) + - Prepare for git-tempfile release ([`56c005b`](https://github.com/Byron/gitoxide/commit/56c005b13c44376f71e61781e73c0bf93416d0e4)) +</details> + +## 0.30.3 (2023-02-20) + +### Bug Fixes + + - <csr-id-135d317065aae87af302beb6c26bb6ca8e30b6aa/> compatibility with `bstr` v1.3, use `*.as_bytes()` instead of `.as_ref()`. + `as_ref()` relies on a known target type which isn't always present. However, once + there is only one implementation, that's no problem, but when that changes compilation + fails due to ambiguity. + +### Commit Statistics + +<csr-read-only-do-not-edit/> + + - 2 commits contributed to the release. + - 3 days passed between releases. + - 1 commit was understood as [conventional](https://www.conventionalcommits.org). + - 0 issues like '(#ID)' were seen in commit messages + +### Commit Details + +<csr-read-only-do-not-edit/> + +<details><summary>view details</summary> + + * **Uncategorized** + - Release gix-date v0.4.3, gix-hash v0.10.3, gix-features v0.26.5, gix-actor v0.17.2, gix-glob v0.5.5, gix-path v0.7.2, gix-quote v0.4.2, gix-attributes v0.8.3, gix-validate v0.7.3, gix-object v0.26.2, gix-ref v0.24.1, gix-config v0.16.2, gix-command v0.2.4, gix-url v0.13.3, gix-credentials v0.9.2, gix-discover v0.13.1, gix-index v0.12.4, gix-mailmap v0.9.3, gix-pack v0.30.3, gix-packetline v0.14.3, gix-transport v0.25.6, gix-protocol v0.26.4, gix-revision v0.10.4, gix-refspec v0.7.3, gix-worktree v0.12.3, gix v0.36.1 ([`9604783`](https://github.com/Byron/gitoxide/commit/96047839a20a657a559376b0b14c65aeab96acbd)) + - Compatibility with `bstr` v1.3, use `*.as_bytes()` instead of `.as_ref()`. ([`135d317`](https://github.com/Byron/gitoxide/commit/135d317065aae87af302beb6c26bb6ca8e30b6aa)) +</details> + +## 0.30.2 (2023-02-17) + +<csr-id-ebc7f47708a63c3df4415ba0e702660d976dfb3e/> +<csr-id-2290d006705ff47ad780b009fe58ee422b3285af/> +<csr-id-598698b88c194bc0e6ef69539f9fa7246ebfab70/> +<csr-id-b46347fd3d50886eeca500e31e1e12b354711309/> +<csr-id-591afd56d9862a6348ef8b3af61798004b36aa19/> +<csr-id-2f2d856efe733d3cf81110c0e0607d2e7c40d968/> +<csr-id-9b9f10ad862b5e097c836c51df1eb98607df5ae1/> +<csr-id-e0b8636f96e4bfe1bc72b5aa6ad4c4c8538ff92c/> +<csr-id-71c628d46088ab455b54eb2330d24dcff96c911d/> +<csr-id-8fe461281842b58aa11437445637c6e587bedd63/> +<csr-id-e6ff1a885889cf88f6b34b1193aa03d8bce16af5/> +<csr-id-f48630ba8f745c2ec61a1e3c51fa63a1789a088c/> +<csr-id-f7f136dbe4f86e7dee1d54835c420ec07c96cd78/> +<csr-id-25209454d3f7e27e12e8ddca92e43b1ff01d58aa/> +<csr-id-c800fdd331e6d7a0b8d756ba822915259f26e9e8/> + +### Refactor (BREAKING) + + - <csr-id-ebc7f47708a63c3df4415ba0e702660d976dfb3e/> remove pack-cache from `Find::try_find(…)` + With the new architecture this can be an implementation detail without + forcing it to be Sync. + - <csr-id-2290d006705ff47ad780b009fe58ee422b3285af/> move git_pack::data::Object to git_object::Data, massively alter git_odb::Find trait + This will break a lot, but has to happen to prepare these traits for the + next generation of object databases. + - <csr-id-598698b88c194bc0e6ef69539f9fa7246ebfab70/> move loose header manipulation from git-pack to git-object + +### Other (BREAKING) + + - <csr-id-b46347fd3d50886eeca500e31e1e12b354711309/> `index::write::Outcome::index_kind` -> `::index_version`. + - <csr-id-591afd56d9862a6348ef8b3af61798004b36aa19/> `bundle::write::Options::index_kind` -> `::index_version`. + - <csr-id-2f2d856efe733d3cf81110c0e0607d2e7c40d968/> Avoid duplicate module paths in 'tree' and 'commit' + +### Bug Fixes (BREAKING) + + - <csr-id-5a75afe0467e4a84323ea10172eed835cc7fae4c/> Reading and writing empty packs is explicitly allowed. + This can happen when sending packs that don't actually contain changes, + but need to be sent to conform to the protocol. + +### New Features (BREAKING) + + - <csr-id-6c4c196c9bc6c2171dc4dc58b69bd5ef53226e29/> add `wasm` feature toggle to let parts of `git-pack` build on wasm32. + It's a breaking change because we also start using the `dep:` syntax for declaring + references to optional dependencies, which will prevent them from being automatically + available as features. + + Besides that, it adds the `wasm` feature toggle to allow compiling to `wasm32` targets. + - <csr-id-95210cb2ba85f75148b4ef48ccea9d9f8a0a0114/> Provide optional `candidates` for ambigious entries during `lookup_prefix()` + The candidate entries are all entries matching a given prefix. + +### Changed (BREAKING) + + - <csr-id-37f3a675d7fa931f5d3f38e91df74ec0b517422b/> move `data::ResolveBase` into `data::decode_entry::`; unify `decode_entry|decode_header::Error` into `decode::Error`. + - <csr-id-99905bacace8aed42b16d43f0f04cae996cb971c/> upgrade `bstr` to `1.0.1` + - <csr-id-8c5ae77f06a64c57df9a9ad1190266896a223dbe/> Remove deprecated compound and linked object databases + The dynamic/general store is the only maintained can-do-it-all + DB now. + - <csr-id-a79a7fb638b45df88af0d0d5fc9ada6d824bc328/> Improve method signatures of `cache::Tree::*` + - <csr-id-91d047658b114f372735116c9d8e6962a3873137/> cleanup and unify `verify_integrity()` method signature + Previously they used many different ways of handling their parameters + despite all boiling down to calling the same 'index::File::traverse()` + method. + + This allows for more reuse of `Options` structs and generally makes + clearer how these optinos are used. + - <csr-id-2cf7727228e1d8094ffd2eec6746006348c39eab/> `index::File::traverse()` now returns an `Outcome` struct instead of tuple of 3 fields + - <csr-id-bf04644ab75ed1969507f957dc8d4868790d462d/> remove `Option<impl Progress>` in favor of `impl Progress` + - <csr-id-6829e5e5d6aed1e6c87647144e2dd76a1e4b9f1f/> multi-index integrity check; use `integrity::Outcome` for various integrity checks + - <csr-id-d851bede97801096d188ff6af06c98a79fe276db/> remove unnecessary `Arc` around `should_interrupt` flag + - <csr-id-c2679a03358b9c19d63ed1af1cd57324c6381447/> remove Sha1 mentions in `index::verify::Mode::*` variants + The hash is repository defined and not hard-coded + - <csr-id-80b120d3278e46429f848df7af3db13413c36649/> introduce `index::File::verify_integrity(…, pack: Option<PackContext>, …)`, replacing tuple + This allows for more documentation on what input is required there and + generally makes for an easier to use API. + - <csr-id-79dc0d5ba6fa31ddd5c075693ffdc6496c1eaded/> rename `oid::try_from()` to `try_from_bytes()`, add `from_bytes_unchecked()` + This change was done in the name of consistency, as `from_bytes()` is + used in many other git-* crates + - <csr-id-2ef9a8424af51310db8c1e6df31dde9953ed3d21/> Change accessors named `hash_kind()` to `object_hash()` for consistency + - <csr-id-b76f6be6c5baa6cf613a174241f007e92bf5ba36/> consistently use `object_hash` instead of `hash_kind` + - <csr-id-629412b4cb192614b7eff08dbf203e3448c902c1/> data::Entry::from_read() now takes a hash lengths as parameter + That way ref-deltas can be interepreted without hard-coding SHA1 + - <csr-id-851dc2c52fa8e204ba2d5ced8fb0959a889869d8/> data::Entry::from_bytes(…, hash_len) takes new parameter + The hash-len tells it how to interpret ref-delta objects, which + store the complete hash of the base object. + + This is now entirely configurable. + - <csr-id-db8c8c41b4ced0fc296d3877883d801e77d550ae/> `index::File::at()` with git_hash::Kind parameter + It will allow to assume different hashes even in the index file format + which isn't yet capable of storing this information. + - <csr-id-e6a3c9f72332b524b143bc94ee9df0a6db11e864/> `data::File::at()` and `Bundle::at()` now have `hash_kind` parameter + It's used to configure the kind of hash to assume when reading packs and + indices. + - <csr-id-82b9b33bd5f4c3c1721a5093de2cedc62cb10565/> move `bundle::Location` to `data::entry::Location` + The latter place best describes its purpose. + - <csr-id-3f05fea55dc8acce1ed62ecbe4e0a1394f2720b7/> remove `make_object_cache` parameter from `git_pack::data::output::count::objects()` + It now is an implementation detail of the Find trait. + - <csr-id-e7526b2a7b51cbac4018e1ab3b623a85987fadc2/> parallel utilities now use `Send + Clone` insted of `Send + Sync` + This helps to assure that thread-local computations always work with the + kind of types we provide. The ones that are carrying out actions are + notably not `Sync` anymore. + + We cater to that by defining our bounds accordingly, but for those + who want to use other utilities that need Sync, using types like + `Repository` and `thread_local!()` is the only way to make this + work. + - <csr-id-e8b091943f0c9a26317da0003f7fcdf5a56ef21a/> Rename gix->ein and gixp->gix + +### Refactor + + - <csr-id-9b9f10ad862b5e097c836c51df1eb98607df5ae1/> remove unnecessary unsafe by using `chunks_mut()` + This was probably a left-over from times where there was a static + requirement on the chunks processing. Maybe… . + - <csr-id-e0b8636f96e4bfe1bc72b5aa6ad4c4c8538ff92c/> replace bare u32 `data::Id` typedef + - <csr-id-71c628d46088ab455b54eb2330d24dcff96c911d/> Use 'cache::Object' trait where it matters + - <csr-id-8fe461281842b58aa11437445637c6e587bedd63/> split data::output::count::objects into files + +### Performance + + - <csr-id-f9232acf8e52f8cd95520d122469e136eb07b39f/> ObjectID specific hashers, using the fact that object ids are hashes + +### Other + + - <csr-id-e6ff1a885889cf88f6b34b1193aa03d8bce16af5/> :File uses its hash_len parameter + - <csr-id-f48630ba8f745c2ec61a1e3c51fa63a1789a088c/> :Find implementation for Rc + +### Bug Fixes + + - <csr-id-ec58bef84d620360dc52e34e173ace1310f74c85/> reduce memory usage for pack resolution by 18%… + …compared to where it started out before. + + This is the result of using u32 intead of usize where possible, + leveraging the object limit of u32 in packs. + + This change makes the biggest difference by not keeping the decompressed + memory of leaf nodes alive for longer than needed, at the cost of + some code duplication which could be extracted into a function if one + was inclined to deal with the boilerplate and lots of generics. + - <csr-id-49d168a8859c6b5a9e7ef58cd836093212c2c7ad/> Allow verification of empty packs and indices. + Empty packs are fine, even though the implementation should probably + assure to not write them, or remove them after having been written. + - <csr-id-a745512185fb0a46e35daaa6d28829aec05edb55/> increase pack-receive performance using a BufWriter + Previously the NamedTempFile would receive every little write request + for millions of objects, consuming considerable amounts of time. + + Now a buf writer alleviates this issue entirely. + - <csr-id-0b6ed60f842f0a36f61f187651080540a358758e/> `bundle::write::Error` is now publicly available + - <csr-id-d9451e8d7fc39c252042f9d2447061262c16ae7a/> downgrade dashmap to 4.0 to avoid unsoundness. + See https://github.com/xacrimon/dashmap/issues/167 for tracking + progress on resolving the issue. + - <csr-id-42e0487286c1f745837c0ce337ed7c9d86b14516/> support Rust 1.52 + - <csr-id-84ade1d23060f10bf6c8529f8f693d06660b4f4e/> Allow resolution of in-pack ref-deltas + This finally allows delta tree caches to be used on typical small packs + returned by GitHub. + - <csr-id-ba92cc09ba41fe4c9a9097bfeb8d18016408fcdf/> don't try to short-cut detection of large offsets when writing index files + The code incorrectly assumed that the input is sorted by offsets, with + the largest offset being last, even though by all means that's not the + case. + - <csr-id-6d3f52dc13d7243a6bce6dab89a985114a75d94b/> Avoid the dashmap being cloned for each thread + Intead, share it by reference, it's sync after all. + + This issue was introduced when switching to a `Send + Clone` model, + instead of `Send + Sync`, to allow thread-local caches in database + handles of all kinds. + - <csr-id-b605c1fa0494b10872d3c2e6ecce0e39f1a90a9e/> linked::Store now assures unique IDs across compound stores + - <csr-id-20b3994206aa5bc5e35cbbc9c8f8f99187077f79/> Adjust size-hints of resolving entries iterator and use the upper bound in delta tree + The delta-tree is a data structure that actually heavily relies on + favorable allocation and a known amount of objects in order to + provide front and back buffers. However, this is an implementation + detail and they don't have to stay consistent at all especially + after growing the buffer by pushing to it. + + Interestingly, the VecDeque internally over-allocates as well which + definitely helps the example of `as_mut_slices()`, otherwise + it could also suffer from the assertions that trigger here. + - <csr-id-d8fe8141e80a9e9a433b5e1a072b850325c806c8/> don't put more objects into the pack cache than needed + Previously when accessing a packed object, it would store the base + object into the pack cache (if it wasn't retrieved from there) + which is great if that operation is free. + + Since it isn't, it's better not to stress the cache with puts + and trash more objects than necessary. + + Now only the last decompressed object will be put into the LRU cache. + - <csr-id-faf6f813927720c5adf62102f9ce46606ff2617c/> don't include submodules in count… + …to avoid dealing with missing objects. + + It's still a good idea to handle these gracefully though, git itself + seems to ignore them. + +### Chore + + - <csr-id-f7f136dbe4f86e7dee1d54835c420ec07c96cd78/> uniformize deny attributes + - <csr-id-25209454d3f7e27e12e8ddca92e43b1ff01d58aa/> upgrade dashmap to 5.1.0 (with security fix) + - <csr-id-c800fdd331e6d7a0b8d756ba822915259f26e9e8/> remove unused dependencies + +### Documentation + + - <csr-id-39ed9eda62b7718d5109135e5ad406fb1fe2978c/> fix typos + +### New Features + + - <csr-id-d792ea543246632bf1ca8d0e1d239bbe7f07e219/> use enumerations to advertise progress ids publicly. + Previously these were an implementation detail which also means they + couldn't be relied upon. + + Thanks to an intermediate enumeration, they become part of the public API + and their actual value is not exposed. + - <csr-id-f0e40ecddaf1211f76ed60ef30cf03dcfd53a7ab/> add `wasm` feature toggle to allow compilation to wasm32-unknown-unknown + - <csr-id-a85dcddba29a453bbea87ac865b8aedc97f41aed/> add `data::File::resolve_header()` to obtain object information without decompressing it. + - <csr-id-c8835c6edae784c9ffcb69a674c0a6545dbb2af3/> upgrade to `prodash 21.1` and add `Ids` to all progress instances. + That way callers can identify progress they are interested in, say, for + selective visualizations. + - <csr-id-450257eb596465352fb363a5c8c514f544e7f9ac/> `Bundle::write…` also produces a `.keep` file + These files are placed before moving the coresponding pack and index + files into place to prevent them from being picked up for collection + while their refs are still being placed. + + The caller is responsible for deleting them once refs integrate the + contained objects into the commit graph. + - <csr-id-6836cb148cbadf919bdac2e0e77bdec34caa9307/> more efficient distribution of tasks to threads during pack resolution. + This clearly is a peak-performance optimization as it will now + efficiently distribut tree-root nodes to threads one at a time by + means of shared memory, which is accessed mutably yet safely. + + This change can also further reduce peak memory usage as it will not + keep buffers of more resolved deltas then necessary, as it only handles + one at a time per thread. + - <csr-id-b1c40b0364ef092cd52d03b34f491b254816b18d/> use docsrs feature in code to show what is feature-gated automatically on docs.rs + - <csr-id-517677147f1c17304c62cf97a1dd09f232ebf5db/> pass --cfg docsrs when compiling for https://docs.rs + - <csr-id-503b1a1f8d4f39b44c166209d7a8ba8d74137859/> `index::File::lookup_prefix(…)` + - <csr-id-cb83beedd1aa389f6774e2296f79273e8c8f14f4/> git-hash::Prefix::from_id() + A way to obtain a prefix of an object id, with all non-prefix + bytes set to zero. + - <csr-id-16208306ab49ade30d8ffd6b067ebd8eefd84cd4/> in-manifest and in-lib documentation of feature toggles + - <csr-id-b80dec2323b81fb2172df76c7d897a4b5e6bdfea/> zero-objects check for index and multi-index integrity validation + - <csr-id-56fc99fb9c1cab61abd03c10e1b4af0d6e491bbf/> support for fan-checking in index and multi-index integrity verification + - <csr-id-28e3ea8612112f6a04cfaff591565eca5a1ffba2/> introduce type for entry indices within an index or multi-index + That way it's a littl emore descriptive than a bare u32. + - <csr-id-58c2edb76755ab71e10eef4cd9a51533825c291f/> git_pack::Find::try_find_cached(…, pack_cache) + With this method it's easier to bypass local caches and control + the cache oneself entirely. + - <csr-id-e25f4eadec679406aad6df10026e27e4832c2482/> A simplified version of the `Find` trait + It's meant for the next generation of object db handles which keep a + local cache of all the details of the actual object database. + - <csr-id-60c9fad8002b4e3f6b9607bba6361871752f4d3d/> control pack and object cache size in megabytes + - <csr-id-50cf610e8939812c3d2268c48835e2dac67d0c31/> cache::Object trait for caching and retrieving whole objects + - <csr-id-5a8c2da6cb1e2accf7cfdccc16bc3a1d0b2a7dbc/> object cache size is configurable + - <csr-id-d6c44e6ab8f436020d4fb235e423b018fd1e7a9f/> dynamically sized full-object speeds up diff-based object counting… + …which is what happens when counting objects for fetches where only + changed objects should be sent. + +### Commit Statistics + +<csr-read-only-do-not-edit/> + + - 717 commits contributed to the release over the course of 634 calendar days. + - 75 commits were understood as [conventional](https://www.conventionalcommits.org). + - 27 unique issues were worked on: [#164](https://github.com/Byron/gitoxide/issues/164), [#198](https://github.com/Byron/gitoxide/issues/198), [#222](https://github.com/Byron/gitoxide/issues/222), [#247](https://github.com/Byron/gitoxide/issues/247), [#250](https://github.com/Byron/gitoxide/issues/250), [#254](https://github.com/Byron/gitoxide/issues/254), [#259](https://github.com/Byron/gitoxide/issues/259), [#260](https://github.com/Byron/gitoxide/issues/260), [#263](https://github.com/Byron/gitoxide/issues/263), [#266](https://github.com/Byron/gitoxide/issues/266), [#279](https://github.com/Byron/gitoxide/issues/279), [#287](https://github.com/Byron/gitoxide/issues/287), [#293](https://github.com/Byron/gitoxide/issues/293), [#298](https://github.com/Byron/gitoxide/issues/298), [#301](https://github.com/Byron/gitoxide/issues/301), [#329](https://github.com/Byron/gitoxide/issues/329), [#331](https://github.com/Byron/gitoxide/issues/331), [#333](https://github.com/Byron/gitoxide/issues/333), [#364](https://github.com/Byron/gitoxide/issues/364), [#384](https://github.com/Byron/gitoxide/issues/384), [#422](https://github.com/Byron/gitoxide/issues/422), [#427](https://github.com/Byron/gitoxide/issues/427), [#450](https://github.com/Byron/gitoxide/issues/450), [#470](https://github.com/Byron/gitoxide/issues/470), [#67](https://github.com/Byron/gitoxide/issues/67), [#691](https://github.com/Byron/gitoxide/issues/691), [#XXX](https://github.com/Byron/gitoxide/issues/XXX) + +### Thanks Clippy + +<csr-read-only-do-not-edit/> + +[Clippy](https://github.com/rust-lang/rust-clippy) helped 26 times to make code idiomatic. + +### Commit Details + +<csr-read-only-do-not-edit/> + +<details><summary>view details</summary> + + * **[#164](https://github.com/Byron/gitoxide/issues/164)** + - Avoid duplicate module paths in 'tree' and 'commit' ([`2f2d856`](https://github.com/Byron/gitoxide/commit/2f2d856efe733d3cf81110c0e0607d2e7c40d968)) + * **[#198](https://github.com/Byron/gitoxide/issues/198)** + - Fix stop-release-for-changelog logic and fix all affected changelogs ([`52b38bc`](https://github.com/Byron/gitoxide/commit/52b38bc4856be5ba8b5372a3dd20f5d06504e7ed)) + - Deduplicate conventional message ids ([`e695eda`](https://github.com/Byron/gitoxide/commit/e695eda8cd183f703d9a3e59b7c3c7fa496ea1d2)) + - Regenerate all changelogs to get links ([`0c81769`](https://github.com/Byron/gitoxide/commit/0c817690bd444f52bed2936b2b451cafd87dde92)) + - Mention actual issues that where worked on ([`a517e39`](https://github.com/Byron/gitoxide/commit/a517e39a81145b331f6c7a6cc2fc22e25daf42e2)) + - Allow 'refactor' and 'other' in conventional messages if they have breaking changes ([`4eebaac`](https://github.com/Byron/gitoxide/commit/4eebaac669e590beed112b622752997c64772ef1)) + - Rebuild all changelogs to assure properly ordered headlines ([`4a9a05f`](https://github.com/Byron/gitoxide/commit/4a9a05f95930bad5938d4ce9c517ebf0e0b990f1)) + - Sort all commits by time, descending… ([`f536bad`](https://github.com/Byron/gitoxide/commit/f536bad20ffbac4dc353dfeb1a917bb88becbb78)) + - Greatly reduce changelog size now that the traversal fix is applied ([`a0bc98c`](https://github.com/Byron/gitoxide/commit/a0bc98c06c349de2fd6e0d4593606e68b98def72)) + - Don't put more objects into the pack cache than needed ([`d8fe814`](https://github.com/Byron/gitoxide/commit/d8fe8141e80a9e9a433b5e1a072b850325c806c8)) + - Fixup remaining changelogs… ([`2f75db2`](https://github.com/Byron/gitoxide/commit/2f75db294fcf20c325555822f65629611be52971)) + - Generate changelogs with details ([`e1861ca`](https://github.com/Byron/gitoxide/commit/e1861caa435d312953a9fea7ceff6d2e07b03443)) + - Update all changelogs with details ([`58ab2ae`](https://github.com/Byron/gitoxide/commit/58ab2aee23ba70a536e9487b44fb04c610374d1a)) + - Update changelogs ([`c857d61`](https://github.com/Byron/gitoxide/commit/c857d61ce3ce342012a2c4ba10a8327822aa530e)) + - Avoid adding newlines which make writing unstable ([`6b5c394`](https://github.com/Byron/gitoxide/commit/6b5c394f49282a8d09c2a9ffece840e4683572db)) + - Fix section headline level ([`9d6f263`](https://github.com/Byron/gitoxide/commit/9d6f263beef289d227dec1acc2d4240087cb9be6)) + - Write first version of changlogs thus far… ([`719b6bd`](https://github.com/Byron/gitoxide/commit/719b6bdf543b8269ccafad9ad6b46e0c55efaa38)) + - Parse more user generated section content, adapt existing changelogs to work correctly ([`2f43a54`](https://github.com/Byron/gitoxide/commit/2f43a54298e7ecfff2334627df149fe0882b5d1d)) + * **[#222](https://github.com/Byron/gitoxide/issues/222)** + - Update changelogs prior to release ([`9a493d0`](https://github.com/Byron/gitoxide/commit/9a493d0651b0b6d71cf230dc510a658be7f8cb19)) + - Stabilize changelogs ([`920e832`](https://github.com/Byron/gitoxide/commit/920e83219911df1c440d3fe42fd5ec3a295b0bb8)) + - Update changelogs prior to release ([`b3e2252`](https://github.com/Byron/gitoxide/commit/b3e2252f7461a003d9a4612da60ba931dd8c0bef)) + * **[#247](https://github.com/Byron/gitoxide/issues/247)** + - Rename gix->ein and gixp->gix ([`e8b0919`](https://github.com/Byron/gitoxide/commit/e8b091943f0c9a26317da0003f7fcdf5a56ef21a)) + * **[#250](https://github.com/Byron/gitoxide/issues/250)** + - Address FIXME related to git_pack::data::Object ([`96386fd`](https://github.com/Byron/gitoxide/commit/96386fd1379b32ce2333baf34f81133cb9817364)) + - Move loose header manipulation from git-pack to git-object ([`598698b`](https://github.com/Byron/gitoxide/commit/598698b88c194bc0e6ef69539f9fa7246ebfab70)) + * **[#254](https://github.com/Byron/gitoxide/issues/254)** + - Adjust changelogs prior to git-pack release ([`6776a3f`](https://github.com/Byron/gitoxide/commit/6776a3ff9fa5a283da06c9ec5723d13023a0b267)) + - Minor refactor ([`227c8b1`](https://github.com/Byron/gitoxide/commit/227c8b1859a6cbf96d48fd8564e575ef7e201db1)) + - Adjust size-hints of resolving entries iterator and use the upper bound in delta tree ([`20b3994`](https://github.com/Byron/gitoxide/commit/20b3994206aa5bc5e35cbbc9c8f8f99187077f79)) + * **[#259](https://github.com/Byron/gitoxide/issues/259)** + - Sketch a little more how packs could be accessed ([`3fce8f2`](https://github.com/Byron/gitoxide/commit/3fce8f2b35ec6c2076f66fdde16a5f99a68326ac)) + - Unify trait bounds for parallel code: prefer Clone over Sync ([`c805d0b`](https://github.com/Byron/gitoxide/commit/c805d0b231cf4d2f51dae7705bfbbc6562f86c32)) + - Remove trait bounds to allow single-threaded applications to exist ([`3c790e0`](https://github.com/Byron/gitoxide/commit/3c790e01de0dbd3ffa2683d5cf060723d11d64a5)) + - Turns out the new `PolicyStore` can co-exist with existing one… ([`5e9250f`](https://github.com/Byron/gitoxide/commit/5e9250f5027e4b2c701ceae72a6038ac2a4a2093)) + * **[#260](https://github.com/Byron/gitoxide/issues/260)** + - Linked::Store now assures unique IDs across compound stores ([`b605c1f`](https://github.com/Byron/gitoxide/commit/b605c1fa0494b10872d3c2e6ecce0e39f1a90a9e)) + * **[#263](https://github.com/Byron/gitoxide/issues/263)** + - Fmt ([`fbeddeb`](https://github.com/Byron/gitoxide/commit/fbeddebcab999f4898f768a3184906091f8ce0b8)) + - Parallel utilities now use `Send + Clone` insted of `Send + Sync` ([`e7526b2`](https://github.com/Byron/gitoxide/commit/e7526b2a7b51cbac4018e1ab3b623a85987fadc2)) + - A mad attempt to use thread-local everywhere and avoid Sync… ([`0af5077`](https://github.com/Byron/gitoxide/commit/0af5077e1f028c1c69bbdc098bb567e486282c37)) + * **[#266](https://github.com/Byron/gitoxide/issues/266)** + - Remove unused dependencies ([`c800fdd`](https://github.com/Byron/gitoxide/commit/c800fdd331e6d7a0b8d756ba822915259f26e9e8)) + - Upgrade dashmap to latest version ([`52d4fe5`](https://github.com/Byron/gitoxide/commit/52d4fe55b6dd88f72479abd4015cab063ddaaf97)) + - Refactor ([`b88f253`](https://github.com/Byron/gitoxide/commit/b88f253e46e7ad0a50b670b96c1bfa09eaaecaef)) + - Refactor ([`52a4dcd`](https://github.com/Byron/gitoxide/commit/52a4dcd3a6969fa8f423ab39c875f98f9d210e95)) + - Make single-threaded programs possible to use with git-repository ([`dde5c6b`](https://github.com/Byron/gitoxide/commit/dde5c6ba76ff849f69f742c985b4bc65ca830883)) + - Use new odb in place of the old one and it works ([`8ad25c5`](https://github.com/Byron/gitoxide/commit/8ad25c581bc79041545a72baf57b0a469d99cc30)) + - Make find::Entry self-contained ([`ad36fb9`](https://github.com/Byron/gitoxide/commit/ad36fb9b800c17931ce358ac262bef40d43dcfb3)) + - Remove iterator access in favor of fully owned data ([`62d3f10`](https://github.com/Byron/gitoxide/commit/62d3f106437e597a41aae592da28f48e8736b143)) + - Remove CRC32 check entirely as it doesn't seem to be important in the big picture ([`22d35bd`](https://github.com/Byron/gitoxide/commit/22d35bdbc271ccada8d68a1450d9a2533fc739ee)) + - Notes about multi-pack indices in the current data::entry::location ([`7eff6bf`](https://github.com/Byron/gitoxide/commit/7eff6bf525ea48fa913149911ea4c8fe742a25a3)) + - Move `bundle::Location` to `data::entry::Location` ([`82b9b33`](https://github.com/Byron/gitoxide/commit/82b9b33bd5f4c3c1721a5093de2cedc62cb10565)) + - Use existing git_features facilities ([`ed0c266`](https://github.com/Byron/gitoxide/commit/ed0c2662d95b74b4abc09b42fc24cb56219dd511)) + - Adjust pack-create to changes in git-pack ([`12db899`](https://github.com/Byron/gitoxide/commit/12db899a72da6decccd82931637d074059b578f5)) + - Remove `make_object_cache` parameter from `git_pack::data::output::count::objects()` ([`3f05fea`](https://github.com/Byron/gitoxide/commit/3f05fea55dc8acce1ed62ecbe4e0a1394f2720b7)) + - :Find implementation for Rc ([`f48630b`](https://github.com/Byron/gitoxide/commit/f48630ba8f745c2ec61a1e3c51fa63a1789a088c)) + - MultiPackIndex compatible pack::Find trait definition ([`5fa1a9d`](https://github.com/Byron/gitoxide/commit/5fa1a9dce59c2654374a532d024c8de5959d4d0f)) + - Git_pack::Find::try_find_cached(…, pack_cache) ([`58c2edb`](https://github.com/Byron/gitoxide/commit/58c2edb76755ab71e10eef4cd9a51533825c291f)) + - Refactor ([`3310d8f`](https://github.com/Byron/gitoxide/commit/3310d8f271f74fc6084e33dd9bd4c5f01b54e432)) + - Remove pack-cache from `Find::try_find(…)` ([`ebc7f47`](https://github.com/Byron/gitoxide/commit/ebc7f47708a63c3df4415ba0e702660d976dfb3e)) + - Fix docs ([`1bb4253`](https://github.com/Byron/gitoxide/commit/1bb425347e4b502e1c048908cd5f3641d2b16896)) + - Move git_pack::data::Object to git_object::Data, massively alter git_odb::Find trait ([`2290d00`](https://github.com/Byron/gitoxide/commit/2290d006705ff47ad780b009fe58ee422b3285af)) + - A simplified version of the `Find` trait ([`e25f4ea`](https://github.com/Byron/gitoxide/commit/e25f4eadec679406aad6df10026e27e4832c2482)) + - Add 'contains()' method to Find ([`dfdd6fb`](https://github.com/Byron/gitoxide/commit/dfdd6fb2c83e5d09c3a56936723bc6749ac4b99a)) + * **[#279](https://github.com/Byron/gitoxide/issues/279)** + - Add a less thorough and faster way of verifying multi-indices ([`7517482`](https://github.com/Byron/gitoxide/commit/75174825e1012cfb4c34c18391c681b49c2f0d29)) + - Refactor ([`91e6d38`](https://github.com/Byron/gitoxide/commit/91e6d382bb2e2430d5d3325a390b7d9bdc0034d6)) + - Allow interrupting multi-index creation more often ([`f223ecb`](https://github.com/Byron/gitoxide/commit/f223ecb6c69358ed8e38d796aca9bef21173cc92)) + - Also test pack-creation with multi-index repo ([`235a27a`](https://github.com/Byron/gitoxide/commit/235a27a925e9b5f6729056ac44e8107dcba55cfd)) + - Better multi-pack verification progress ([`2e16f13`](https://github.com/Byron/gitoxide/commit/2e16f1321bdccc2cef688d27efd9cc9be1360c31)) + - Handle large multi-pack indices correctly ([`4f6b030`](https://github.com/Byron/gitoxide/commit/4f6b0308f06b7705163ff624a98694e1d928fee1)) + - Fix progress and handling of large of multi-pack index offsets ([`5dc1f81`](https://github.com/Byron/gitoxide/commit/5dc1f813ead64ad13edb2b5ed9bd660d198c7ddb)) + - Add missing docs ([`4137327`](https://github.com/Byron/gitoxide/commit/41373274fc7f23e3fed17dc52e3e3e94c2e9e41a)) + - Write progress for multi-pack writing ([`1bea1d4`](https://github.com/Byron/gitoxide/commit/1bea1d47908d3ec44c83b2e39a5b67134ad51ee0)) + - Adapt to changes in git-features ([`542c0df`](https://github.com/Byron/gitoxide/commit/542c0df9f7498a53a4561e4286b8fdb888565cd3)) + - Progress for chunk writing ([`50fde01`](https://github.com/Byron/gitoxide/commit/50fde01b44a0a720ccb874bc23a818334238c6e0)) + - Multi-pack index writing complete with large-offset support ([`f7d5c7f`](https://github.com/Byron/gitoxide/commit/f7d5c7f815dbf52c668444b316ae2e1485463bcb)) + - Write pack-ids and offsets ([`bfc8069`](https://github.com/Byron/gitoxide/commit/bfc8069e6da2ec6d87fa40bbaaca247c1e247d5f)) + - Add chunk for oids ([`565a7ae`](https://github.com/Byron/gitoxide/commit/565a7aea9341a0f0005a41bc6687fbaacb0c0b97)) + - Write the fanout table ([`6a68ed7`](https://github.com/Byron/gitoxide/commit/6a68ed7708bdbb29c40bcea0dc7cf681c0aff75b)) + - Refactor ([`93dc660`](https://github.com/Byron/gitoxide/commit/93dc660aa34c18b5186c57c6a3fad547a63d5eec)) + - Write multi-index header along with path-names chunk ([`2fc6751`](https://github.com/Byron/gitoxide/commit/2fc67512f8be2860ab06dc5a282f4f6550c3fddb)) + - Sketch all the chunk-write API and use it from multi-index write ([`5457761`](https://github.com/Byron/gitoxide/commit/545776180f75cba87f7119f9bd862d39f081f1bd)) + - Add frame for writing a multi-pack index ([`9ce1e7f`](https://github.com/Byron/gitoxide/commit/9ce1e7f2d8c7133590f571919850eaa763f789e3)) + - `index::File::traverse()` now returns an `Outcome` struct instead of tuple of 3 fields ([`2cf7727`](https://github.com/Byron/gitoxide/commit/2cf7727228e1d8094ffd2eec6746006348c39eab)) + - Refactor ([`c361ee3`](https://github.com/Byron/gitoxide/commit/c361ee399e9c435b087387c1542b3838c21fad03)) + - Multi-index verification now matches that of git itself ([`3a76a28`](https://github.com/Byron/gitoxide/commit/3a76a28e6af11950e8a808d09c36c2ee8b655944)) + - Zero-objects check for index and multi-index integrity validation ([`b80dec2`](https://github.com/Byron/gitoxide/commit/b80dec2323b81fb2172df76c7d897a4b5e6bdfea)) + - Support for fan-checking in index and multi-index integrity verification ([`56fc99f`](https://github.com/Byron/gitoxide/commit/56fc99fb9c1cab61abd03c10e1b4af0d6e491bbf)) + - More detailed multi-index verification ([`8f9a55b`](https://github.com/Byron/gitoxide/commit/8f9a55bb31af32b266d7c53426bc925361a627b2)) + - Even nicer printing ([`d2bea27`](https://github.com/Byron/gitoxide/commit/d2bea270787597d6aef48ffe023ff49969c33bd9)) + - Nicer printing of index verification results ([`e3dfa12`](https://github.com/Byron/gitoxide/commit/e3dfa123b368e66f39567bd2a8f5d7d9c09d4fe6)) + - Very first experimental support for multi-pack index verification ([`bb35c69`](https://github.com/Byron/gitoxide/commit/bb35c6994765ec3bbbcfde247911d1ffe711a23d)) + - Remove `Option<impl Progress>` in favor of `impl Progress` ([`bf04644`](https://github.com/Byron/gitoxide/commit/bf04644ab75ed1969507f957dc8d4868790d462d)) + - Multi-index integrity check; use `integrity::Outcome` for various integrity checks ([`6829e5e`](https://github.com/Byron/gitoxide/commit/6829e5e5d6aed1e6c87647144e2dd76a1e4b9f1f)) + - Remove unnecessary `Arc` around `should_interrupt` flag ([`d851bed`](https://github.com/Byron/gitoxide/commit/d851bede97801096d188ff6af06c98a79fe276db)) + - Remove Sha1 mentions in `index::verify::Mode::*` variants ([`c2679a0`](https://github.com/Byron/gitoxide/commit/c2679a03358b9c19d63ed1af1cd57324c6381447)) + - Introduce `index::File::verify_integrity(…, pack: Option<PackContext>, …)`, replacing tuple ([`80b120d`](https://github.com/Byron/gitoxide/commit/80b120d3278e46429f848df7af3db13413c36649)) + - Multi-index verify checksum ([`853d468`](https://github.com/Byron/gitoxide/commit/853d4683aae5f4dd4667b452932bd57f99f6afab)) + - Fix docs ([`ce044ef`](https://github.com/Byron/gitoxide/commit/ce044ef146e3d67483bed382f5dd5c484699534e)) + - Introduce type for entry indices within an index or multi-index ([`28e3ea8`](https://github.com/Byron/gitoxide/commit/28e3ea8612112f6a04cfaff591565eca5a1ffba2)) + - Replace bare u32 `data::Id` typedef ([`e0b8636`](https://github.com/Byron/gitoxide/commit/e0b8636f96e4bfe1bc72b5aa6ad4c4c8538ff92c)) + - Adjust to changes in git-odb ([`710780c`](https://github.com/Byron/gitoxide/commit/710780cd355793ea638767213f250e026997a530)) + - Add remaining docs for multi-index ([`10a24c1`](https://github.com/Byron/gitoxide/commit/10a24c1860e63935b435e985900797b2d4c707a8)) + - Docs for multi_index::chunk ([`73fbc91`](https://github.com/Byron/gitoxide/commit/73fbc915847b7c458a17bdfbb7fa1de3f31ab437)) + - Refactor ([`eafdff4`](https://github.com/Byron/gitoxide/commit/eafdff405b3f408aa5203f40c7f0a570ce20655d)) + - Multi-index iteration ([`1c99903`](https://github.com/Byron/gitoxide/commit/1c999035cc3649ab9db02bd82644fb54c408f6d2)) + - Access pack-indices and pack-offsets of multi-pack indices ([`c2a6918`](https://github.com/Byron/gitoxide/commit/c2a69189f88c53ab555158245ce647fcd33fca6a)) + - Oid lookup for multi-pack indices ([`254f618`](https://github.com/Byron/gitoxide/commit/254f618ee410be4a2787f599529a6cca1284a0fb)) + - Add basic oid by multi-index file index ([`a54f552`](https://github.com/Byron/gitoxide/commit/a54f552741aed315b21112576d6e5b704a9439d4)) + - Rename `oid::try_from()` to `try_from_bytes()`, add `from_bytes_unchecked()` ([`79dc0d5`](https://github.com/Byron/gitoxide/commit/79dc0d5ba6fa31ddd5c075693ffdc6496c1eaded)) + - Change accessors named `hash_kind()` to `object_hash()` for consistency ([`2ef9a84`](https://github.com/Byron/gitoxide/commit/2ef9a8424af51310db8c1e6df31dde9953ed3d21)) + - Adapt to changes in git-hash ([`754a663`](https://github.com/Byron/gitoxide/commit/754a66344ff2cfcfc4a7a3d72f1240e939c48055)) + - Remove unnecessary `Default` implementation for user of Tree::traverse ([`9da20e9`](https://github.com/Byron/gitoxide/commit/9da20e92c96e4ce8dd75e141c24143e4ea1141a7)) + - Remove unnecessary Default bound for data in Tree nodes ([`d548f72`](https://github.com/Byron/gitoxide/commit/d548f726013df409b0e1a5fb0e39c15ff445228d)) + - Adjust to changes in git-hash ([`9bf25cc`](https://github.com/Byron/gitoxide/commit/9bf25cc4f2e44821f93e85997677bc4e86a67bd4)) + - Consistently use `object_hash` instead of `hash_kind` ([`b76f6be`](https://github.com/Byron/gitoxide/commit/b76f6be6c5baa6cf613a174241f007e92bf5ba36)) + - Adjust to changes in git-hash ([`ca35246`](https://github.com/Byron/gitoxide/commit/ca35246a91888ae41805d71082055c98d2ff7f0b)) + - Adjust to changes in git-hash and git-pack ([`0cae25b`](https://github.com/Byron/gitoxide/commit/0cae25b1bb3c902ec323f17a1d9743e42fe213d0)) + - Data::Entry::from_read() now takes a hash lengths as parameter ([`629412b`](https://github.com/Byron/gitoxide/commit/629412b4cb192614b7eff08dbf203e3448c902c1)) + - Data::Entry::from_bytes(…, hash_len) takes new parameter ([`851dc2c`](https://github.com/Byron/gitoxide/commit/851dc2c52fa8e204ba2d5ced8fb0959a889869d8)) + - Refactor ([`7331e99`](https://github.com/Byron/gitoxide/commit/7331e99cb88df19f7b1e04b1468584e9c7c79913)) + - Adjust to changes in git-hash ([`07aa1bc`](https://github.com/Byron/gitoxide/commit/07aa1bca225c30b168a597f920bda392b2cb2713)) + - :File uses its hash_len parameter ([`e6ff1a8`](https://github.com/Byron/gitoxide/commit/e6ff1a885889cf88f6b34b1193aa03d8bce16af5)) + - `index::File::at()` with git_hash::Kind parameter ([`db8c8c4`](https://github.com/Byron/gitoxide/commit/db8c8c41b4ced0fc296d3877883d801e77d550ae)) + - `data::File::at()` and `Bundle::at()` now have `hash_kind` parameter ([`e6a3c9f`](https://github.com/Byron/gitoxide/commit/e6a3c9f72332b524b143bc94ee9df0a6db11e864)) + - Remove unnecessary dev-depednency ([`b71ea6a`](https://github.com/Byron/gitoxide/commit/b71ea6a89d11d6cac01b7d9e9b1101f4d637617c)) + - Adapt to changes in git-hash ([`82fec95`](https://github.com/Byron/gitoxide/commit/82fec95e9ed4b924849bfcc84b5b2691a925a5b3)) + - Calculate trailer offset instead of storing it ([`bf62067`](https://github.com/Byron/gitoxide/commit/bf62067c690e407e2ace66220337359542e1846a)) + - Make pessimistic size-estimation instead of an optimistic one ([`69f1d2a`](https://github.com/Byron/gitoxide/commit/69f1d2a2063cfebae3ea70979d950f8ab7751eac)) + - Refactor ([`8b8b4c5`](https://github.com/Byron/gitoxide/commit/8b8b4c538823fb4d2c37be80340d843080f08d19)) + - Refactor ([`8c9c7fc`](https://github.com/Byron/gitoxide/commit/8c9c7fc3bc46afa9c8567a8bc8079cac12ed8422)) + - Adapt to changes in git-chunk ([`44ea5c3`](https://github.com/Byron/gitoxide/commit/44ea5c3c334399bc03d92fa20171d2c0c3afdf49)) + - Refactor ([`ac46765`](https://github.com/Byron/gitoxide/commit/ac4676534573e3ccfa219765e645526797c6d71b)) + - Adapt to latest changes to git-chunk ([`743d696`](https://github.com/Byron/gitoxide/commit/743d6967d6236a4bb6a9c8817f957e7604bc9264)) + - Provide multi-index checksum ([`a363de9`](https://github.com/Byron/gitoxide/commit/a363de9b8271986385b1d57e61a6c103c20a4055)) + - Update changelog prior to release ([`6ae49e3`](https://github.com/Byron/gitoxide/commit/6ae49e39b2251ad70b72a8f3b3840ebb9334ffd9)) + - Completely validate and parse multi-index file ([`e7e40c3`](https://github.com/Byron/gitoxide/commit/e7e40c30dea082d004e8781ef7d36bde0afdd8a7)) + - Read and validate index names contained in the multi-pack index ([`24a9790`](https://github.com/Byron/gitoxide/commit/24a979036df515f0616738825e669ec9c8dab1f1)) + - Read and validate fanout chunk ([`3ca04e3`](https://github.com/Byron/gitoxide/commit/3ca04e355a413975e55adf8b204d6962a9341d32)) + - Read all mandatory and optional chunks ([`99023bb`](https://github.com/Byron/gitoxide/commit/99023bbde027be82e9217868df7f73ecd09bf705)) + - Load chunk index of midx file ([`fac8efa`](https://github.com/Byron/gitoxide/commit/fac8efacb31935c2143717ebe82003a0916f233f)) + - Frame for git-chunk crate to share among git-pack and git-commitgraph ([`b2d2ae2`](https://github.com/Byron/gitoxide/commit/b2d2ae221d43cc14aa169ada3c471e2bd2adadf4)) + - Basic midx header parsing ([`edf02ae`](https://github.com/Byron/gitoxide/commit/edf02ae46ce6f3f981acd99310878e1d4a00d23b)) + - First pieces of header parsing; allow to respect multi-index desired hash kind in git-odb ([`1a2a049`](https://github.com/Byron/gitoxide/commit/1a2a04930ab56ba778091e10b15cecf415f5058d)) + - Frame for instantiation of multi-pack-index ([`5e085ec`](https://github.com/Byron/gitoxide/commit/5e085ecbea913e0b0191d8267e548fe859bdd5d9)) + * **[#287](https://github.com/Byron/gitoxide/issues/287)** + - Way nicer progress messages for repo verification ([`4b4f9f8`](https://github.com/Byron/gitoxide/commit/4b4f9f81879ad181744022eb0d7dc02392a5e91e)) + - Upgrade to prodash 17 ([`47860b7`](https://github.com/Byron/gitoxide/commit/47860b7e2769260cfb8522ae455c491605093423)) + - Refactor ([`831397c`](https://github.com/Byron/gitoxide/commit/831397c99fee9f2d6758124d993386cca5534f7b)) + - Allow resolution of in-pack ref-deltas ([`84ade1d`](https://github.com/Byron/gitoxide/commit/84ade1d23060f10bf6c8529f8f693d06660b4f4e)) + - Refactor ([`38426a1`](https://github.com/Byron/gitoxide/commit/38426a171844014201282a441ebfc7d1f4cfff94)) + - Test to reproduce ref-delta forward references and the issue it poses for index traversal ([`7db7195`](https://github.com/Byron/gitoxide/commit/7db7195953954ded32a410e8d11f07f4c5b61687)) + - Very rough version of repository verification ([`80a4a7a`](https://github.com/Byron/gitoxide/commit/80a4a7add688d16376b9bf2ed7f1c7f655b7c912)) + - Refactor ([`6c06659`](https://github.com/Byron/gitoxide/commit/6c066597f310b1bd5eb5611c1147b48846bc0ac0)) + - Improve method signatures of `cache::Tree::*` ([`a79a7fb`](https://github.com/Byron/gitoxide/commit/a79a7fb638b45df88af0d0d5fc9ada6d824bc328)) + - Cleanup and unify `verify_integrity()` method signature ([`91d0476`](https://github.com/Byron/gitoxide/commit/91d047658b114f372735116c9d8e6962a3873137)) + * **[#293](https://github.com/Byron/gitoxide/issues/293)** + - Fix docs, again ([`7b2ab26`](https://github.com/Byron/gitoxide/commit/7b2ab263b9dbb2ad33a4dddfe82f4cd7f3187271)) + - Fix build ([`e3977fe`](https://github.com/Byron/gitoxide/commit/e3977fe033550bfd3297cdd674934e40476aa38b)) + - Use InOrderIter from git-features ([`7721b5f`](https://github.com/Byron/gitoxide/commit/7721b5fc7cba86d785e0936fdfab2ea41163219f)) + - Basic IEOT parsing ([`35bdee4`](https://github.com/Byron/gitoxide/commit/35bdee4bf77787bcbe6c3dd715a677e2e46a8ad1)) + - Assure we are right about the leb64 buffer needed for a 64 bit int ([`7558844`](https://github.com/Byron/gitoxide/commit/7558844b40b6c9af5038fea6b8a4e81583c46bde)) + - Adapt to changes in git-features: use var-int decoding from there ([`52e3c6f`](https://github.com/Byron/gitoxide/commit/52e3c6f6f4cd1bf677c9189fb59db16173954669)) + - Remove byteorder from git-pack ([`4122306`](https://github.com/Byron/gitoxide/commit/41223061a2b919fd190066315b419ea17cabfde3)) + - Git-pack uses `memmap2` instead of `filebuffer` ([`d9011c7`](https://github.com/Byron/gitoxide/commit/d9011c71048ff34201917b0693586290c23b3ddf)) + * **[#298](https://github.com/Byron/gitoxide/issues/298)** + - Restrict signature changes to 'Ancestores::sorting()` ([`d71bd9d`](https://github.com/Byron/gitoxide/commit/d71bd9ded1e5e5a61a27be3d55f4b85ee4049bcf)) + - Adjust to changes in git-traverse ([`8240622`](https://github.com/Byron/gitoxide/commit/824062215865e6ec12afeb2d51b3c63f15291244)) + - `index::File::lookup_prefix(…)` ([`503b1a1`](https://github.com/Byron/gitoxide/commit/503b1a1f8d4f39b44c166209d7a8ba8d74137859)) + - Support MSRV ([`d09fd9b`](https://github.com/Byron/gitoxide/commit/d09fd9b37557f2dc199e8a4651c56b3b63423136)) + - Add documentation for lookup_prefix along with missing test ([`927b2ac`](https://github.com/Byron/gitoxide/commit/927b2ace875cdda63ce312eb7ad5329f2159608d)) + - Lookup_prefix() seems to work now ([`b558f11`](https://github.com/Byron/gitoxide/commit/b558f111520381e25a9500d3b2401fdd337db6f6)) + - A stab at implementing lookup_prefix - to no avail ([`69cb6d1`](https://github.com/Byron/gitoxide/commit/69cb6d1dd6b8df74fee1ead1ce15bcf0b51d7232)) + - Refactor ([`cff6f9f`](https://github.com/Byron/gitoxide/commit/cff6f9fc90e58c409e367912d0b38860fae9a205)) + - Refactor ([`5bc548e`](https://github.com/Byron/gitoxide/commit/5bc548ed500045491012ab0a93bcbe13e78b0dc8)) + - Prefix now validates all constraints and errors on violation ([`75efa79`](https://github.com/Byron/gitoxide/commit/75efa79f62efc29b343d2d2f53eaf001eef176df)) + - Git-hash::Prefix::from_id() ([`cb83bee`](https://github.com/Byron/gitoxide/commit/cb83beedd1aa389f6774e2296f79273e8c8f14f4)) + - Sketch for abbreviated method lookup ([`467453a`](https://github.com/Byron/gitoxide/commit/467453a7e625a3bc8e3e381ce50f24f1be8ba605)) + - Use hash_hasher based hash state for better keys/less collisions ([`814de07`](https://github.com/Byron/gitoxide/commit/814de079f4226f42efa49ad334a348bce67184e4)) + - Upgrade parking_lot and cargo_toml ([`f95c1a0`](https://github.com/Byron/gitoxide/commit/f95c1a0d9c19bcc6feb9b8739a09d86f9970a0e0)) + * **[#301](https://github.com/Byron/gitoxide/issues/301)** + - Update changelogs prior to release ([`84cb256`](https://github.com/Byron/gitoxide/commit/84cb25614a5fcddff297c1713eba4efbb6ff1596)) + - Adapt to changes in git-path ([`cc2d810`](https://github.com/Byron/gitoxide/commit/cc2d81012d107da7a61bf4de5b28342dea5083b7)) + - Use `git-path` crate instead of `git_features::path` ([`47e607d`](https://github.com/Byron/gitoxide/commit/47e607dc256a43a3411406c645eb7ff04239dd3a)) + - Salvage an alternative parallelization approach which might be good for index-creation ([`7e76796`](https://github.com/Byron/gitoxide/commit/7e76796d5c2956961bd998286bec05fca1ba8fc4)) + - Refactor ([`f86eacc`](https://github.com/Byron/gitoxide/commit/f86eacc5cfaf6d88ead4f8dbd65989d32674c213)) + - Switch index checkout to chunk-based operation ([`e5f6943`](https://github.com/Byron/gitoxide/commit/e5f69433e4a6cc7866b666e0baccfa32efb92a7f)) + * **[#329](https://github.com/Byron/gitoxide/issues/329)** + - In-manifest and in-lib documentation of feature toggles ([`1620830`](https://github.com/Byron/gitoxide/commit/16208306ab49ade30d8ffd6b067ebd8eefd84cd4)) + - Document all features related to serde1 ([`72b97f2`](https://github.com/Byron/gitoxide/commit/72b97f2ae4dc7642b160f183c6d5df4502dc186f)) + * **[#331](https://github.com/Byron/gitoxide/issues/331)** + - Adapt to changes in git_features::path to deal with Result ([`bba4c68`](https://github.com/Byron/gitoxide/commit/bba4c680c627a418efbd25f14bd168df19b8dedd)) + * **[#333](https://github.com/Byron/gitoxide/issues/333)** + - Use git_features::path everywhere where there is a path conversion ([`2e1437c`](https://github.com/Byron/gitoxide/commit/2e1437cb0b5dc77f2317881767f71eaf9b009ebf)) + - Gitoxide-core without os-str-bytes ([`909aa14`](https://github.com/Byron/gitoxide/commit/909aa1402c82c3128052023613a297b213716e3d)) + - Remove os_str_bytes from git-pack ([`86f6e50`](https://github.com/Byron/gitoxide/commit/86f6e5054ea11b7aeb9c85321913de090f71e3a1)) + * **[#364](https://github.com/Byron/gitoxide/issues/364)** + - Add some precaution to avoid strange interactions with packs ([`b052a9a`](https://github.com/Byron/gitoxide/commit/b052a9a3e9127fd9a4029594ea9de6e436db03c6)) + - Fix build ([`9c8e449`](https://github.com/Byron/gitoxide/commit/9c8e449e928b3190e5845606f79b12c529dede55)) + * **[#384](https://github.com/Byron/gitoxide/issues/384)** + - Prevent line-ending conversions for shell scripts on windows ([`96bb4d4`](https://github.com/Byron/gitoxide/commit/96bb4d460db420e18dfd0f925109c740e971820d)) + - No need to isolate archives by crate name ([`19d46f3`](https://github.com/Byron/gitoxide/commit/19d46f35440419b9911b6e2bca2cfc975865dce9)) + - Add archive files via git-lfs ([`7202a1c`](https://github.com/Byron/gitoxide/commit/7202a1c4734ad904c026ee3e4e2143c0461d51a2)) + - Auto-set commit.gpgsign=false when executing git ([`c23feb6`](https://github.com/Byron/gitoxide/commit/c23feb64ad157180cfba8a11c882b829733ea8f6)) + * **[#422](https://github.com/Byron/gitoxide/issues/422)** + - Prepare changelog ([`de2d587`](https://github.com/Byron/gitoxide/commit/de2d5874b8d75c53165a9fc3ed35e2b37142bf52)) + * **[#427](https://github.com/Byron/gitoxide/issues/427)** + - Make fmt ([`4b320e7`](https://github.com/Byron/gitoxide/commit/4b320e773368ac5e8c38dd8a779ef3d6d2d024ec)) + - Refactor ([`c28404b`](https://github.com/Byron/gitoxide/commit/c28404b3656ba6714db6898ce8151c0bcf4448e0)) + - Assure index ambiguous object range can represent 'none found' ([`5ffe54f`](https://github.com/Byron/gitoxide/commit/5ffe54ff88f026139474658fb470742751126119)) + - Avoid allocating index entries in case of ambiguity by using a range ([`4db4754`](https://github.com/Byron/gitoxide/commit/4db47547fa405542efd38b475e3e430548b9d160)) + - Fix build ([`8a63076`](https://github.com/Byron/gitoxide/commit/8a6307617580f72457d3f008e4fe40ae7dcfd360)) + - More efficient and clear addition of entries to candidates ([`a76a0e1`](https://github.com/Byron/gitoxide/commit/a76a0e161ec289d289e5683e541cff45d393756d)) + - Properly order ambiguous entries, ascending ([`0340896`](https://github.com/Byron/gitoxide/commit/0340896b49fe4f40a9dba1c947ec9a2050d8f9b8)) + - Provide optional `candidates` for ambigious entries during `lookup_prefix()` ([`95210cb`](https://github.com/Byron/gitoxide/commit/95210cb2ba85f75148b4ef48ccea9d9f8a0a0114)) + - Refactor ([`a2f9fa4`](https://github.com/Byron/gitoxide/commit/a2f9fa431a820093cb34158e71cb30dc21db1151)) + * **[#450](https://github.com/Byron/gitoxide/issues/450)** + - Less noisy way of writing trait bounds ([`b593806`](https://github.com/Byron/gitoxide/commit/b593806ca3571d680801130ad528f266d3eab83e)) + - Upgrade to `prodash` v21 ([`a0655dc`](https://github.com/Byron/gitoxide/commit/a0655dc7bc5dff388bc69a648e7f16b44fd1abd9)) + - Prefix created pack files with `pack-` ([`e489b10`](https://github.com/Byron/gitoxide/commit/e489b10878c85289e0a9869804abee2418de6989)) + - Increase pack-receive performance using a BufWriter ([`a745512`](https://github.com/Byron/gitoxide/commit/a745512185fb0a46e35daaa6d28829aec05edb55)) + - `index::write::Outcome::index_kind` -> `::index_version`. ([`b46347f`](https://github.com/Byron/gitoxide/commit/b46347fd3d50886eeca500e31e1e12b354711309)) + - Add test to show that empty packs won't be written as expected behaviour. ([`72ce7fd`](https://github.com/Byron/gitoxide/commit/72ce7fdced10b8359e74daea3bb35ab73b29e7c0)) + - `bundle::write::Error` is now publicly available ([`0b6ed60`](https://github.com/Byron/gitoxide/commit/0b6ed60f842f0a36f61f187651080540a358758e)) + - `bundle::write::Options::index_kind` -> `::index_version`. ([`591afd5`](https://github.com/Byron/gitoxide/commit/591afd56d9862a6348ef8b3af61798004b36aa19)) + - Upgrade `bstr` to `1.0.1` ([`99905ba`](https://github.com/Byron/gitoxide/commit/99905bacace8aed42b16d43f0f04cae996cb971c)) + * **[#470](https://github.com/Byron/gitoxide/issues/470)** + - Update changelogs prior to release ([`caa7a1b`](https://github.com/Byron/gitoxide/commit/caa7a1bdef74d7d3166a7e38127a59f5ab3cfbdd)) + * **[#67](https://github.com/Byron/gitoxide/issues/67)** + - Use an even faster way of counting ([`3877920`](https://github.com/Byron/gitoxide/commit/387792085542ebc8277ac0dcaf9e3dc3b522a69a)) + - Avoid the dashmap being cloned for each thread ([`6d3f52d`](https://github.com/Byron/gitoxide/commit/6d3f52dc13d7243a6bce6dab89a985114a75d94b)) + - Properly count total objects during pack creation ([`bcb3d37`](https://github.com/Byron/gitoxide/commit/bcb3d37a900a40fd70b7be7bad8b2d5db292d2af)) + - ObjectID specific hashers, using the fact that object ids are hashes ([`f9232ac`](https://github.com/Byron/gitoxide/commit/f9232acf8e52f8cd95520d122469e136eb07b39f)) + - Use a custom hasher for 'seen' objects hashset… ([`70179e2`](https://github.com/Byron/gitoxide/commit/70179e2cf8d15ba4e1cf8e94a9915bf5b02cf755)) + - Don't include submodules in count… ([`faf6f81`](https://github.com/Byron/gitoxide/commit/faf6f813927720c5adf62102f9ce46606ff2617c)) + - Control pack and object cache size in megabytes ([`60c9fad`](https://github.com/Byron/gitoxide/commit/60c9fad8002b4e3f6b9607bba6361871752f4d3d)) + - Use 'cache::Object' trait where it matters ([`71c628d`](https://github.com/Byron/gitoxide/commit/71c628d46088ab455b54eb2330d24dcff96c911d)) + - Split data::output::count::objects into files ([`8fe4612`](https://github.com/Byron/gitoxide/commit/8fe461281842b58aa11437445637c6e587bedd63)) + - Cache::Object trait for caching and retrieving whole objects ([`50cf610`](https://github.com/Byron/gitoxide/commit/50cf610e8939812c3d2268c48835e2dac67d0c31)) + - Object cache size is configurable ([`5a8c2da`](https://github.com/Byron/gitoxide/commit/5a8c2da6cb1e2accf7cfdccc16bc3a1d0b2a7dbc)) + - Dynamically sized full-object speeds up diff-based object counting… ([`d6c44e6`](https://github.com/Byron/gitoxide/commit/d6c44e6ab8f436020d4fb235e423b018fd1e7a9f)) + - Count ref-deltas in thin packs as well ([`80c6994`](https://github.com/Byron/gitoxide/commit/80c6994149d19917c25e36e1bdf0dc8c9678365e)) + - Add '--thin' flag to pack-create and pass it on ([`2664d73`](https://github.com/Byron/gitoxide/commit/2664d73f531a4b1f4bc784c1fe3a991711c86475)) + * **[#691](https://github.com/Byron/gitoxide/issues/691)** + - Set `rust-version` to 1.64 ([`55066ce`](https://github.com/Byron/gitoxide/commit/55066ce5fd71209abb5d84da2998b903504584bb)) + * **[#XXX](https://github.com/Byron/gitoxide/issues/XXX)** + - Prepare changelogs prior to release ([`8c0bca3`](https://github.com/Byron/gitoxide/commit/8c0bca37ff9fbaadbe55561fb2b0d649980c95b1)) + * **Uncategorized** + - Release gix-pack v0.30.2, gix-odb v0.40.2, gix-packetline v0.14.2, gix-transport v0.25.4, gix-protocol v0.26.3, gix-revision v0.10.3, gix-refspec v0.7.2, gix-worktree v0.12.2, gix v0.36.0 ([`59e9fac`](https://github.com/Byron/gitoxide/commit/59e9fac67d1b353e124300435b55f6b5468d7deb)) + - Release gix-index v0.12.3, gix-mailmap v0.9.2, gix-chunk v0.4.1, gix-pack v0.30.2, gix-odb v0.40.2, gix-packetline v0.14.2, gix-transport v0.25.4, gix-protocol v0.26.3, gix-revision v0.10.3, gix-refspec v0.7.2, gix-worktree v0.12.2, gix v0.36.0 ([`48f5bd2`](https://github.com/Byron/gitoxide/commit/48f5bd2014fa3dda6fbd60d091065c5537f69453)) + - Release gix-credentials v0.9.1, gix-diff v0.26.1, gix-discover v0.13.0, gix-hashtable v0.1.1, gix-bitmap v0.2.1, gix-traverse v0.22.1, gix-index v0.12.3, gix-mailmap v0.9.2, gix-chunk v0.4.1, gix-pack v0.30.2, gix-odb v0.40.2, gix-packetline v0.14.2, gix-transport v0.25.4, gix-protocol v0.26.3, gix-revision v0.10.3, gix-refspec v0.7.2, gix-worktree v0.12.2, gix v0.36.0 ([`a5869e0`](https://github.com/Byron/gitoxide/commit/a5869e0b223406820bca836e3e3a7fae2bfd9b04)) + - Release gix-config v0.16.1, gix-command v0.2.3, gix-prompt v0.3.2, gix-url v0.13.2, gix-credentials v0.9.1, gix-diff v0.26.1, gix-discover v0.13.0, gix-hashtable v0.1.1, gix-bitmap v0.2.1, gix-traverse v0.22.1, gix-index v0.12.3, gix-mailmap v0.9.2, gix-chunk v0.4.1, gix-pack v0.30.2, gix-odb v0.40.2, gix-packetline v0.14.2, gix-transport v0.25.4, gix-protocol v0.26.3, gix-revision v0.10.3, gix-refspec v0.7.2, gix-worktree v0.12.2, gix v0.36.0 ([`41d57b9`](https://github.com/Byron/gitoxide/commit/41d57b98964094fc1528adb09f69ca824229bf25)) + - Release gix-attributes v0.8.2, gix-config-value v0.10.1, gix-tempfile v3.0.2, gix-lock v3.0.2, gix-validate v0.7.2, gix-object v0.26.1, gix-ref v0.24.0, gix-sec v0.6.2, gix-config v0.16.1, gix-command v0.2.3, gix-prompt v0.3.2, gix-url v0.13.2, gix-credentials v0.9.1, gix-diff v0.26.1, gix-discover v0.13.0, gix-hashtable v0.1.1, gix-bitmap v0.2.1, gix-traverse v0.22.1, gix-index v0.12.3, gix-mailmap v0.9.2, gix-chunk v0.4.1, gix-pack v0.30.2, gix-odb v0.40.2, gix-packetline v0.14.2, gix-transport v0.25.4, gix-protocol v0.26.3, gix-revision v0.10.3, gix-refspec v0.7.2, gix-worktree v0.12.2, gix v0.36.0 ([`e313112`](https://github.com/Byron/gitoxide/commit/e31311257bd138b52042dea5fc40c3abab7f269b)) + - Release gix-features v0.26.4, gix-actor v0.17.1, gix-glob v0.5.3, gix-path v0.7.1, gix-quote v0.4.1, gix-attributes v0.8.2, gix-config-value v0.10.1, gix-tempfile v3.0.2, gix-lock v3.0.2, gix-validate v0.7.2, gix-object v0.26.1, gix-ref v0.24.0, gix-sec v0.6.2, gix-config v0.16.1, gix-command v0.2.3, gix-prompt v0.3.2, gix-url v0.13.2, gix-credentials v0.9.1, gix-diff v0.26.1, gix-discover v0.13.0, gix-hashtable v0.1.1, gix-bitmap v0.2.1, gix-traverse v0.22.1, gix-index v0.12.3, gix-mailmap v0.9.2, gix-chunk v0.4.1, gix-pack v0.30.2, gix-odb v0.40.2, gix-packetline v0.14.2, gix-transport v0.25.4, gix-protocol v0.26.3, gix-revision v0.10.3, gix-refspec v0.7.2, gix-worktree v0.12.2, gix v0.36.0 ([`6efd0d3`](https://github.com/Byron/gitoxide/commit/6efd0d31fbeca31ab7319aa2ac97bb31dc4ce055)) + - Release gix-date v0.4.2, gix-hash v0.10.2, gix-features v0.26.4, gix-actor v0.17.1, gix-glob v0.5.3, gix-path v0.7.1, gix-quote v0.4.1, gix-attributes v0.8.2, gix-config-value v0.10.1, gix-tempfile v3.0.2, gix-lock v3.0.2, gix-validate v0.7.2, gix-object v0.26.1, gix-ref v0.24.0, gix-sec v0.6.2, gix-config v0.16.1, gix-command v0.2.3, gix-prompt v0.3.2, gix-url v0.13.2, gix-credentials v0.9.1, gix-diff v0.26.1, gix-discover v0.13.0, gix-hashtable v0.1.1, gix-bitmap v0.2.1, gix-traverse v0.22.1, gix-index v0.12.3, gix-mailmap v0.9.2, gix-chunk v0.4.1, gix-pack v0.30.2, gix-odb v0.40.2, gix-packetline v0.14.2, gix-transport v0.25.4, gix-protocol v0.26.3, gix-revision v0.10.3, gix-refspec v0.7.2, gix-worktree v0.12.2, gix v0.36.0 ([`6ccc88a`](https://github.com/Byron/gitoxide/commit/6ccc88a8e4a56973b1a358cf72dc012ee3c75d56)) + - Merge branch 'rename-crates' into inform-about-gix-rename ([`c9275b9`](https://github.com/Byron/gitoxide/commit/c9275b99ea43949306d93775d9d78c98fb86cfb1)) + - Rename `git-testtools` to `gix-testtools` ([`b65c33d`](https://github.com/Byron/gitoxide/commit/b65c33d256cfed65d11adeff41132e3e58754089)) + - Adjust to renaming of `git-pack` to `gix-pack` ([`1ee81ad`](https://github.com/Byron/gitoxide/commit/1ee81ad310285ee4aa118118a2be3810dbace574)) + - Rename `git-pack` to `gix-pack` ([`134d70d`](https://github.com/Byron/gitoxide/commit/134d70dbb7fe9c7fcd4201a7bb028b22bca39a30)) + - Adjust to renaming of `git-odb` to `gix-odb` ([`476e2ad`](https://github.com/Byron/gitoxide/commit/476e2ad1a64e9e3f0d7c8651d5bcbee36cd78241)) + - Adjust to renaming of `git-index` to `gix-index` ([`86db5e0`](https://github.com/Byron/gitoxide/commit/86db5e09fc58ce66b252dc13b8d7e2c48e4d5062)) + - Adjust to renaming of `git-diff` to `gix-diff` ([`49a163e`](https://github.com/Byron/gitoxide/commit/49a163ec8b18f0e5fcd05a315de16d5d8be7650e)) + - Adjust to renaming of `git-commitgraph` to `gix-commitgraph` ([`f1dd0a3`](https://github.com/Byron/gitoxide/commit/f1dd0a3366e31259af029da73228e8af2f414244)) + - Adjust to renaming of `git-mailmap` to `gix-mailmap` ([`2e28c56`](https://github.com/Byron/gitoxide/commit/2e28c56bb9f70de6f97439818118d3a25859698f)) + - Adjust to renaming of `git-discover` to `gix-discover` ([`53adfe1`](https://github.com/Byron/gitoxide/commit/53adfe1c34e9ea3b27067a97b5e7ac80b351c441)) + - Adjust to renaming of `git-lfs` to `gix-lfs` ([`b9225c8`](https://github.com/Byron/gitoxide/commit/b9225c830daf1388484ee7e05f727990fdeff43c)) + - Adjust to renaming of `git-chunk` to `gix-chunk` ([`59194e3`](https://github.com/Byron/gitoxide/commit/59194e3a07853eae0624ebc4907478d1de4f7599)) + - Adjust to renaming of `git-bitmap` to `gix-bitmap` ([`75f2a07`](https://github.com/Byron/gitoxide/commit/75f2a079b17489f62bc43e1f1d932307375c4f9d)) + - Adjust to renaming for `git-protocol` to `gix-protocol` ([`823795a`](https://github.com/Byron/gitoxide/commit/823795addea3810243cab7936cd8ec0137cbc224)) + - Adjust to renaming of `git-refspec` to `gix-refspec` ([`c958802`](https://github.com/Byron/gitoxide/commit/c9588020561577736faa065e7e5b5bb486ca8fe1)) + - Adjust to renaming of `git-revision` to `gix-revision` ([`ee0ee84`](https://github.com/Byron/gitoxide/commit/ee0ee84607c2ffe11ee75f27a31903db68afed02)) + - Adjust to renaming of `git-transport` to `gix-transport` ([`b2ccf71`](https://github.com/Byron/gitoxide/commit/b2ccf716dc4425bb96651d4d58806a3cc2da219e)) + - Adjust to renaming of `git-credentials` to `gix-credentials` ([`6b18abc`](https://github.com/Byron/gitoxide/commit/6b18abcf2856f02ab938d535a65e51ac282bf94a)) + - Adjust to renaming of `git-prompt` to `gix-prompt` ([`6a4654e`](https://github.com/Byron/gitoxide/commit/6a4654e0d10ab773dd219cb4b731c0fc1471c36d)) + - Adjust to renaming of `git-command` to `gix-command` ([`d26b8e0`](https://github.com/Byron/gitoxide/commit/d26b8e046496894ae06b0bbfdba77196976cd975)) + - Adjust to renaming of `git-packetline` to `gix-packetline` ([`5cbd22c`](https://github.com/Byron/gitoxide/commit/5cbd22cf42efb760058561c6c3bbcd4dab8c8be1)) + - Adjust to renaming of `git-worktree` to `gix-worktree` ([`73a1282`](https://github.com/Byron/gitoxide/commit/73a12821b3d9b66ec1714d07dd27eb7a73e3a544)) + - Adjust to renamining of `git-hashtable` to `gix-hashtable` ([`26a0c98`](https://github.com/Byron/gitoxide/commit/26a0c98d0a389b03e3dc7bfc758b37155e285244)) + - Adjust to renamining of `git-worktree` to `gix-worktree` ([`108bb1a`](https://github.com/Byron/gitoxide/commit/108bb1a634f4828853fb590e9fc125f79441dd38)) + - Adjust to renaming of `git-url` to `gix-url` ([`b50817a`](https://github.com/Byron/gitoxide/commit/b50817aadb143e19f61f64e19b19ec1107d980c6)) + - Adjust to renaming of `git-date` to `gix-date` ([`9a79ff2`](https://github.com/Byron/gitoxide/commit/9a79ff2d5cc74c1efad9f41e21095ae498cce00b)) + - Adjust to renamining of `git-attributes` to `gix-attributes` ([`4a8b3b8`](https://github.com/Byron/gitoxide/commit/4a8b3b812ac26f2a2aee8ce8ca81591273383c84)) + - Adjust to renaminig of `git-quote` to `gix-quote` ([`648025b`](https://github.com/Byron/gitoxide/commit/648025b7ca94411fdd0d90c53e5faede5fde6c8d)) + - Adjust to renaming of `git-config` to `gix-config` ([`3a861c8`](https://github.com/Byron/gitoxide/commit/3a861c8f049f6502d3bcbdac752659aa1aeda46a)) + - Adjust to renaming of `git-ref` to `gix-ref` ([`1f5f695`](https://github.com/Byron/gitoxide/commit/1f5f695407b034377d94b172465ff573562b3fc3)) + - Adjust to renaming of `git-lock` to `gix-lock` ([`2028e78`](https://github.com/Byron/gitoxide/commit/2028e7884ae1821edeec81612f501e88e4722b17)) + - Adjust to renaming of `git-tempfile` to `gix-tempfile` ([`b6cc3eb`](https://github.com/Byron/gitoxide/commit/b6cc3ebb5137084a6327af16a7d9364d8f092cc9)) + - Adjust to renaming of `git-object` to `gix-object` ([`fc86a1e`](https://github.com/Byron/gitoxide/commit/fc86a1e710ad7bf076c25cc6f028ddcf1a5a4311)) + - Adjust to renaming of `git-actor` to `gix-actor` ([`4dc9b44`](https://github.com/Byron/gitoxide/commit/4dc9b44dc52f2486ffa2040585c6897c1bf55df4)) + - Adjust to renaming of `git-validate` to `gix-validate` ([`5e40ad0`](https://github.com/Byron/gitoxide/commit/5e40ad078af3d08cbc2ca81ce755c0ed8a065b4f)) + - Adjust to renaming of `git-hash` to `gix-hash` ([`4a9d025`](https://github.com/Byron/gitoxide/commit/4a9d0257110c3efa61d08c8457c4545b200226d1)) + - Adjust to renaming of `git-features` to `gix-features` ([`e2dd68a`](https://github.com/Byron/gitoxide/commit/e2dd68a417aad229e194ff20dbbfd77668096ec6)) + - Adjust to renaming of `git-glob` to `gix-glob` ([`35b2a3a`](https://github.com/Byron/gitoxide/commit/35b2a3acbc8f2a03f151bc0a3863163844e0ca86)) + - Adjust to renaming of `git-sec` to `gix-sec` ([`eabbb92`](https://github.com/Byron/gitoxide/commit/eabbb923bd5a32fc80fa80f96cfdc2ab7bb2ed17)) + - Adapt to renaming of `git-path` to `gix-path` ([`d3bbcfc`](https://github.com/Byron/gitoxide/commit/d3bbcfccad80fc44ea8e7bf819f23adaca06ba2d)) + - Adjust to rename of `git-config-value` to `gix-config-value` ([`622b3e1`](https://github.com/Byron/gitoxide/commit/622b3e1d0bffa0f8db73697960f9712024fac430)) + - Merge branch 'git-pack-wasm' ([`4bc19d1`](https://github.com/Byron/gitoxide/commit/4bc19d104233a3e3d3d2768c0e9b9ad027cc34c0)) + - Add `wasm` feature toggle to let parts of `git-pack` build on wasm32. ([`6c4c196`](https://github.com/Byron/gitoxide/commit/6c4c196c9bc6c2171dc4dc58b69bd5ef53226e29)) + - Add `wasm` feature toggle to allow compilation to wasm32-unknown-unknown ([`f0e40ec`](https://github.com/Byron/gitoxide/commit/f0e40ecddaf1211f76ed60ef30cf03dcfd53a7ab)) + - Release git-features v0.26.4 ([`109f434`](https://github.com/Byron/gitoxide/commit/109f434e66559a791d541f86876ded8df10766f1)) + - Release git-features v0.26.3 ([`1ecfb7f`](https://github.com/Byron/gitoxide/commit/1ecfb7f8bfb24432690d8f31367488f2e59a642a)) + - Release git-date v0.4.2, git-hash v0.10.2, git-features v0.26.2, git-actor v0.17.1, git-glob v0.5.3, git-path v0.7.1, git-quote v0.4.1, git-attributes v0.8.2, git-config-value v0.10.1, git-tempfile v3.0.2, git-lock v3.0.2, git-validate v0.7.2, git-object v0.26.1, git-ref v0.24.0, git-sec v0.6.2, git-config v0.16.0, git-command v0.2.3, git-prompt v0.3.2, git-url v0.13.2, git-credentials v0.9.1, git-diff v0.26.1, git-discover v0.13.0, git-hashtable v0.1.1, git-bitmap v0.2.1, git-traverse v0.22.1, git-index v0.12.3, git-mailmap v0.9.2, git-chunk v0.4.1, git-pack v0.30.2, git-odb v0.40.2, git-packetline v0.14.2, git-transport v0.25.4, git-protocol v0.26.3, git-revision v0.10.2, git-refspec v0.7.2, git-worktree v0.12.2, git-repository v0.34.0, safety bump 3 crates ([`c196d20`](https://github.com/Byron/gitoxide/commit/c196d206d57a310b1ce974a1cf0e7e6d6db5c4d6)) + - Prepare changelogs prior to release ([`7c846d2`](https://github.com/Byron/gitoxide/commit/7c846d2102dc767366771925212712ef8cc9bf07)) + - Merge branch 'Lioness100/main' ([`1e544e8`](https://github.com/Byron/gitoxide/commit/1e544e82455bf9ecb5e3c2146280eaf7ecd81f16)) + - Fix typos ([`39ed9ed`](https://github.com/Byron/gitoxide/commit/39ed9eda62b7718d5109135e5ad406fb1fe2978c)) + - Thanks clippy ([`bac57dd`](https://github.com/Byron/gitoxide/commit/bac57dd05ea2d5a4ee45ef9350fa3f2e19474bc0)) + - Make fmt ([`e22080e`](https://github.com/Byron/gitoxide/commit/e22080e4a29d0bad15a99d565a5e3e304a8743ec)) + - Merge branch 'adjustments-for-cargo' ([`7bba270`](https://github.com/Byron/gitoxide/commit/7bba2709488b7eb999b8136dbab03af977241678)) + - Break cyclical dev dependencies ([`1fea18f`](https://github.com/Byron/gitoxide/commit/1fea18f5f8b4189a23dc4fa3f041a672f6fbcfb3)) + - Use enumerations to advertise progress ids publicly. ([`d792ea5`](https://github.com/Byron/gitoxide/commit/d792ea543246632bf1ca8d0e1d239bbe7f07e219)) + - Release git-date v0.4.1, git-features v0.26.1, git-glob v0.5.2, git-attributes v0.8.1, git-tempfile v3.0.1, git-ref v0.23.1, git-sec v0.6.1, git-config v0.15.1, git-prompt v0.3.1, git-url v0.13.1, git-discover v0.12.1, git-index v0.12.2, git-mailmap v0.9.1, git-pack v0.30.1, git-odb v0.40.1, git-transport v0.25.3, git-protocol v0.26.2, git-revision v0.10.1, git-refspec v0.7.1, git-worktree v0.12.1, git-repository v0.33.0 ([`5b5b380`](https://github.com/Byron/gitoxide/commit/5b5b3809faa71c658db38b40dfc410224d08a367)) + - Prepare changelogs prior to release ([`93bef97`](https://github.com/Byron/gitoxide/commit/93bef97b3c0c75d4bf7119fdd787516e1efc77bf)) + - Merge branch 'patch-1' ([`b93f0c4`](https://github.com/Byron/gitoxide/commit/b93f0c49fc677b6c19aea332cbfc1445ce475375)) + - Thanks clippy ([`9e04685`](https://github.com/Byron/gitoxide/commit/9e04685dd3f109bfb27663f9dc7c04102e660bf2)) + - Release git-ref v0.23.0, git-config v0.15.0, git-command v0.2.2, git-diff v0.26.0, git-discover v0.12.0, git-mailmap v0.9.0, git-pack v0.30.0, git-odb v0.40.0, git-transport v0.25.2, git-protocol v0.26.1, git-revision v0.10.0, git-refspec v0.7.0, git-worktree v0.12.0, git-repository v0.32.0 ([`ffb5b6a`](https://github.com/Byron/gitoxide/commit/ffb5b6a21cb415315db6fd5294940c7c6deb4538)) + - Prepare changelogs prior to release ([`4381a03`](https://github.com/Byron/gitoxide/commit/4381a03a34c305f31713cce234c2afbf8ac60f01)) + - Release git-date v0.4.0, git-actor v0.17.0, git-object v0.26.0, git-traverse v0.22.0, git-index v0.12.0, safety bump 15 crates ([`0e3d0a5`](https://github.com/Byron/gitoxide/commit/0e3d0a56d7e6a60c6578138f2690b4fa54a2072d)) + - Release git-features v0.26.0, git-actor v0.16.0, git-attributes v0.8.0, git-object v0.25.0, git-ref v0.22.0, git-config v0.14.0, git-command v0.2.1, git-url v0.13.0, git-credentials v0.9.0, git-diff v0.25.0, git-discover v0.11.0, git-traverse v0.21.0, git-index v0.11.0, git-mailmap v0.8.0, git-pack v0.29.0, git-odb v0.39.0, git-transport v0.25.0, git-protocol v0.26.0, git-revision v0.9.0, git-refspec v0.6.0, git-worktree v0.11.0, git-repository v0.31.0, safety bump 24 crates ([`5ac9fbe`](https://github.com/Byron/gitoxide/commit/5ac9fbe265a5b61c533a2a6b3abfed2bdf7f89ad)) + - Prepare changelogs prior to release ([`30d8ca1`](https://github.com/Byron/gitoxide/commit/30d8ca19284049dcfbb0de2698cafae1d1a16b0c)) + - Make fmt ([`511ed00`](https://github.com/Byron/gitoxide/commit/511ed0000397a5b268530c8f5362e7d25b7c1594)) + - Release git-date v0.3.1, git-features v0.25.0, git-actor v0.15.0, git-glob v0.5.1, git-path v0.7.0, git-attributes v0.7.0, git-config-value v0.10.0, git-lock v3.0.1, git-validate v0.7.1, git-object v0.24.0, git-ref v0.21.0, git-sec v0.6.0, git-config v0.13.0, git-prompt v0.3.0, git-url v0.12.0, git-credentials v0.8.0, git-diff v0.24.0, git-discover v0.10.0, git-traverse v0.20.0, git-index v0.10.0, git-mailmap v0.7.0, git-pack v0.28.0, git-odb v0.38.0, git-packetline v0.14.1, git-transport v0.24.0, git-protocol v0.25.0, git-revision v0.8.0, git-refspec v0.5.0, git-worktree v0.10.0, git-repository v0.30.0, safety bump 26 crates ([`e6b9906`](https://github.com/Byron/gitoxide/commit/e6b9906c486b11057936da16ed6e0ec450a0fb83)) + - Prepare chnagelogs prior to git-repository release ([`7114bbb`](https://github.com/Byron/gitoxide/commit/7114bbb6732aa8571d4ab74f28ed3e26e9fbe4d0)) + - Merge branch 'odb-iteration' ([`693a469`](https://github.com/Byron/gitoxide/commit/693a46977e2b57b93ee921320e008c8ad1beb81b)) + - Assure deltas are counted correctly, even if the base is out of pack. ([`ddaf47f`](https://github.com/Byron/gitoxide/commit/ddaf47f023970e8acfb98e8874da22f2604a92d9)) + - Merge branch 'read-header' ([`3d01252`](https://github.com/Byron/gitoxide/commit/3d0125271ec7bd606734bd74757a7e31a18c7ce5)) + - Tweaks to immprove header decode performance ([`04df898`](https://github.com/Byron/gitoxide/commit/04df89835395d43e92ce3f0fee922f1799687aa4)) + - Move `data::ResolveBase` into `data::decode_entry::`; unify `decode_entry|decode_header::Error` into `decode::Error`. ([`37f3a67`](https://github.com/Byron/gitoxide/commit/37f3a675d7fa931f5d3f38e91df74ec0b517422b)) + - Add `data::File::resolve_header()` to obtain object information without decompressing it. ([`a85dcdd`](https://github.com/Byron/gitoxide/commit/a85dcddba29a453bbea87ac865b8aedc97f41aed)) + - Merge branch 'main' into read-split-index ([`c57bdde`](https://github.com/Byron/gitoxide/commit/c57bdde6de37eca9672ea715962bbd02aa3eb055)) + - Merge branch 'adjustments-for-cargo' ([`083909b`](https://github.com/Byron/gitoxide/commit/083909bc7eb902eeee2002034fdb6ed88280dc5c)) + - Thanks clippy ([`f1160fb`](https://github.com/Byron/gitoxide/commit/f1160fb42acf59b37cbeda546a7079af3c9bc050)) + - Adjust to changes in `git-testtools` ([`4eb842c`](https://github.com/Byron/gitoxide/commit/4eb842c7150b980e1c2637217e1f9657a671cea7)) + - Merge branch 'adjustments-for-cargo' ([`70ccbb2`](https://github.com/Byron/gitoxide/commit/70ccbb21b1113bdeb20b52d274141a9fdb75f579)) + - Upgrade clru, remove it from git-repository dependencies (unused) ([`7e7547d`](https://github.com/Byron/gitoxide/commit/7e7547d995afc16192a1ee08add5a87560197fc9)) + - Release git-hash v0.10.1, git-hashtable v0.1.0 ([`7717170`](https://github.com/Byron/gitoxide/commit/771717095d9a67b0625021eb0928828ab686e772)) + - Merge branch 'main' into http-config ([`6b9632e`](https://github.com/Byron/gitoxide/commit/6b9632e16c416841ffff1b767ee7a6c89b421220)) + - Merge branch 'optimize_hashtables' ([`95ad56c`](https://github.com/Byron/gitoxide/commit/95ad56c11489bc46d6eb2b2f48cf0bf01e954c58)) + - Use newly added git-hashtable ([`50cb436`](https://github.com/Byron/gitoxide/commit/50cb4362010e1a5799fe782df36ac5fcdb48dd8a)) + - Switch to custom Hasher implementation ([`269d59e`](https://github.com/Byron/gitoxide/commit/269d59e0bee1f072096667b143800a0d85b18403)) + - Release git-features v0.24.1, git-actor v0.14.1, git-index v0.9.1 ([`7893502`](https://github.com/Byron/gitoxide/commit/789350208efc9d5fc6f9bc4f113f77f9cb445156)) + - Upgrade to `prodash 21.1` and add `Ids` to all progress instances. ([`c8835c6`](https://github.com/Byron/gitoxide/commit/c8835c6edae784c9ffcb69a674c0a6545dbb2af3)) + - Merge branch 'main' into http-config ([`bcd9654`](https://github.com/Byron/gitoxide/commit/bcd9654e56169799eb706646da6ee1f4ef2021a9)) + - Release git-hash v0.10.0, git-features v0.24.0, git-date v0.3.0, git-actor v0.14.0, git-glob v0.5.0, git-path v0.6.0, git-quote v0.4.0, git-attributes v0.6.0, git-config-value v0.9.0, git-tempfile v3.0.0, git-lock v3.0.0, git-validate v0.7.0, git-object v0.23.0, git-ref v0.20.0, git-sec v0.5.0, git-config v0.12.0, git-command v0.2.0, git-prompt v0.2.0, git-url v0.11.0, git-credentials v0.7.0, git-diff v0.23.0, git-discover v0.9.0, git-bitmap v0.2.0, git-traverse v0.19.0, git-index v0.9.0, git-mailmap v0.6.0, git-chunk v0.4.0, git-pack v0.27.0, git-odb v0.37.0, git-packetline v0.14.0, git-transport v0.23.0, git-protocol v0.24.0, git-revision v0.7.0, git-refspec v0.4.0, git-worktree v0.9.0, git-repository v0.29.0, git-commitgraph v0.11.0, gitoxide-core v0.21.0, gitoxide v0.19.0, safety bump 28 crates ([`b2c301e`](https://github.com/Byron/gitoxide/commit/b2c301ef131ffe1871314e19f387cf10a8d2ac16)) + - Prepare changelogs prior to release ([`e4648f8`](https://github.com/Byron/gitoxide/commit/e4648f827c97e9d13636d1bbdc83dd63436e6e5c)) + - Merge branch 'main' into http-config ([`7c5b37d`](https://github.com/Byron/gitoxide/commit/7c5b37d28e98f59a6847368a0d0166d2dbb4acc1)) + - Release git-diff v0.22.0, git-index v0.7.1, git-pack v0.26.0, git-odb v0.36.0, git-transport v0.21.2, git-repository v0.27.0, safety bump 6 crates ([`f0cab31`](https://github.com/Byron/gitoxide/commit/f0cab317bb0c2799fa80d16f3ae1b89d6aee4284)) + - Prepare changelogs prior to release ([`f5f3a9e`](https://github.com/Byron/gitoxide/commit/f5f3a9edd038a89c8c6c4da02054e5439bcc0071)) + - Fix spelling where -> were ([`57355c7`](https://github.com/Byron/gitoxide/commit/57355c7b345d864aaf80fdbd9deec50ca78d94fd)) + - Release git-features v0.23.1, git-glob v0.4.1, git-config-value v0.8.1, git-tempfile v2.0.6, git-object v0.22.1, git-ref v0.18.0, git-sec v0.4.2, git-config v0.10.0, git-prompt v0.1.1, git-url v0.10.1, git-credentials v0.6.1, git-diff v0.21.0, git-discover v0.7.0, git-index v0.7.0, git-pack v0.25.0, git-odb v0.35.0, git-transport v0.21.1, git-protocol v0.22.0, git-refspec v0.3.1, git-worktree v0.7.0, git-repository v0.26.0, git-commitgraph v0.10.0, gitoxide-core v0.19.0, gitoxide v0.17.0, safety bump 9 crates ([`d071583`](https://github.com/Byron/gitoxide/commit/d071583c5576fdf5f7717765ffed5681792aa81f)) + - Prepare changelogs prior to release ([`423af90`](https://github.com/Byron/gitoxide/commit/423af90c8202d62dc1ea4a76a0df6421d1f0aa06)) + - Merge branch 'main' into write-sparse-index ([`c4e6849`](https://github.com/Byron/gitoxide/commit/c4e68496c368611ebe17c6693d06c8147c28c717)) + - Merge branch 'gix-clone' ([`def53b3`](https://github.com/Byron/gitoxide/commit/def53b36c3dec26fa78939ab0584fe4ff930909c)) + - Merge branch 'main' into gix-clone ([`3b48317`](https://github.com/Byron/gitoxide/commit/3b48317d6a9f41765d4f2a9f0a49c31afcdb68b6)) + - Thanks clippy ([`93e7691`](https://github.com/Byron/gitoxide/commit/93e7691be421e40cc72e3e2e0506584a2fbd4857)) + - `Bundle::write…` also produces a `.keep` file ([`450257e`](https://github.com/Byron/gitoxide/commit/450257eb596465352fb363a5c8c514f544e7f9ac)) + - Release git-hash v0.9.11, git-features v0.23.0, git-actor v0.13.0, git-attributes v0.5.0, git-object v0.22.0, git-ref v0.17.0, git-sec v0.4.1, git-config v0.9.0, git-url v0.10.0, git-credentials v0.6.0, git-diff v0.20.0, git-discover v0.6.0, git-traverse v0.18.0, git-index v0.6.0, git-mailmap v0.5.0, git-pack v0.24.0, git-odb v0.34.0, git-packetline v0.13.1, git-transport v0.21.0, git-protocol v0.21.0, git-revision v0.6.0, git-refspec v0.3.0, git-worktree v0.6.0, git-repository v0.25.0, safety bump 24 crates ([`104d922`](https://github.com/Byron/gitoxide/commit/104d922add61ab21c534c24ce8ed37cddf3e275a)) + - Prepare changelogs for release ([`d232567`](https://github.com/Byron/gitoxide/commit/d23256701a95284857dc8d1cb37c7c94cada973c)) + - Merge branch 'fix-smart-release' ([`aa80b60`](https://github.com/Byron/gitoxide/commit/aa80b606e5570f327660cca42ea81581a6e9d5e3)) + - Make fmt ([`7b9c065`](https://github.com/Byron/gitoxide/commit/7b9c06547b75929e3e5bf4240f43c7e9bc7d54e0)) + - Merge branch 'main' into new-http-impl ([`702a161`](https://github.com/Byron/gitoxide/commit/702a161ef11fc959611bf44b70e9ffe04561c7ad)) + - Merge branch 'main' into clone ([`acb0738`](https://github.com/Byron/gitoxide/commit/acb07382a9306d6962bea60e8977d83d021743f4)) + - Merge branch 'delta-tree-parallelization' ([`cca2ad5`](https://github.com/Byron/gitoxide/commit/cca2ad5ee9483d7da968658e0a4d610dbc4ad4d6)) + - Don't enforce Send bounds in serial version of `in_parallel_with_slice()` ([`dda661e`](https://github.com/Byron/gitoxide/commit/dda661e1b7cc0ace6cd9504233f20980e1e52387)) + - Thanks clippy ([`583f2ca`](https://github.com/Byron/gitoxide/commit/583f2cacbdaa79149b3741c1f08cabcc73a1a03f)) + - More efficient distribution of tasks to threads during pack resolution. ([`6836cb1`](https://github.com/Byron/gitoxide/commit/6836cb148cbadf919bdac2e0e77bdec34caa9307)) + - Refactor ([`0e2ca47`](https://github.com/Byron/gitoxide/commit/0e2ca47fab99fec72cf8c8a327f5c3627a916add)) + - Try to use counters directly in delta-tree resolution ([`c6ded26`](https://github.com/Byron/gitoxide/commit/c6ded26aa2513fefda0c19784a70ce5b046910f1)) + - Reduce memory usage for pack resolution by 18%… ([`ec58bef`](https://github.com/Byron/gitoxide/commit/ec58bef84d620360dc52e34e173ace1310f74c85)) + - Merge branch 'reduce-pack-resolve-mem-consumption' ([`a8ced57`](https://github.com/Byron/gitoxide/commit/a8ced575ef935b9a2078d6f2b15aff746a862317)) + - Minor refactor after trying more memory foot-print related changes ([`5852334`](https://github.com/Byron/gitoxide/commit/58523341d1203ff55144a981d7dd3b276bd47b31)) + - Simplify size-related tests ([`fb2b2a8`](https://github.com/Byron/gitoxide/commit/fb2b2a88193247d55f74c3e39cd8a12f3862b6d6)) + - Cut the size of bytes needed to store children in half for a lower footprint ([`38c96f2`](https://github.com/Byron/gitoxide/commit/38c96f2ab1fc2c3a91958e2b4ab213574ec9e8f0)) + - Merge branch 'crates-index-diff-fixes' ([`b09bad7`](https://github.com/Byron/gitoxide/commit/b09bad7b5702838c1119d052b322d90c9b5968bb)) + - Thanks clippy ([`851bc79`](https://github.com/Byron/gitoxide/commit/851bc798a76160701387d25119b101aa8014c93f)) + - Allow verification of empty packs and indices. ([`49d168a`](https://github.com/Byron/gitoxide/commit/49d168a8859c6b5a9e7ef58cd836093212c2c7ad)) + - Refactor ([`38a7452`](https://github.com/Byron/gitoxide/commit/38a74522b5c1ca5c255b8b132c25b90fc7fa85f9)) + - Reading and writing empty packs is explicitly allowed. ([`5a75afe`](https://github.com/Byron/gitoxide/commit/5a75afe0467e4a84323ea10172eed835cc7fae4c)) + - Make fmt ([`53acf25`](https://github.com/Byron/gitoxide/commit/53acf2565743eff7cead7a42011107b2fc8d7e0e)) + - Merge branch 'fetch-pack' ([`f47c891`](https://github.com/Byron/gitoxide/commit/f47c89129732bcb06fe76a4696fe38ab1151fb0c)) + - Merge branch 'fetch-pack' ([`3c49400`](https://github.com/Byron/gitoxide/commit/3c49400809c7c2120f4ce704c19a0421545b5acd)) + - Merge branch 'fix-git-features' ([`82fd251`](https://github.com/Byron/gitoxide/commit/82fd251ac80d07bc9da8a4d36e517aa35580d188)) + - Merge branch 'diff' ([`25a7726`](https://github.com/Byron/gitoxide/commit/25a7726377fbe400ea3c4927d04e9dec99802b7b)) + - Release git-command v0.1.0, git-prompt v0.1.0, git-url v0.9.0, git-credentials v0.5.0, git-diff v0.19.0, git-mailmap v0.4.0, git-chunk v0.3.2, git-pack v0.23.0, git-odb v0.33.0, git-packetline v0.13.0, git-transport v0.20.0, git-protocol v0.20.0, git-revision v0.5.0, git-refspec v0.2.0, git-repository v0.24.0, git-commitgraph v0.9.0, gitoxide-core v0.18.0, gitoxide v0.16.0 ([`f5c36d8`](https://github.com/Byron/gitoxide/commit/f5c36d85755d1f0f503b77d9a565fad6aecf6728)) + - Release git-hash v0.9.10, git-features v0.22.5, git-date v0.2.0, git-actor v0.12.0, git-glob v0.4.0, git-path v0.5.0, git-quote v0.3.0, git-attributes v0.4.0, git-config-value v0.8.0, git-tempfile v2.0.5, git-validate v0.6.0, git-object v0.21.0, git-ref v0.16.0, git-sec v0.4.0, git-config v0.8.0, git-discover v0.5.0, git-traverse v0.17.0, git-index v0.5.0, git-worktree v0.5.0, git-testtools v0.9.0, git-command v0.1.0, git-prompt v0.1.0, git-url v0.9.0, git-credentials v0.5.0, git-diff v0.19.0, git-mailmap v0.4.0, git-chunk v0.3.2, git-pack v0.23.0, git-odb v0.33.0, git-packetline v0.13.0, git-transport v0.20.0, git-protocol v0.20.0, git-revision v0.5.0, git-refspec v0.2.0, git-repository v0.24.0, git-commitgraph v0.9.0, gitoxide-core v0.18.0, gitoxide v0.16.0, safety bump 28 crates ([`29a043b`](https://github.com/Byron/gitoxide/commit/29a043be6808a3e9199a9b26bd076fe843afe4f4)) + - Merge branch 'filter-refs' ([`fd14489`](https://github.com/Byron/gitoxide/commit/fd14489f729172d615d0fa1e8dbd605e9eacf69d)) + - Release git-features v0.22.6 ([`c9eda72`](https://github.com/Byron/gitoxide/commit/c9eda729d8f8bc266c7516c613d38acfb83a4743)) + - Merge branch 'filter-refs-by-spec' ([`5c05198`](https://github.com/Byron/gitoxide/commit/5c051986bd89590a9287d85d84c713d83dfab83a)) + - Merge branch 'main' into filter-refs-by-spec ([`9aa1d3d`](https://github.com/Byron/gitoxide/commit/9aa1d3dc46d4b1c76af257f573aff3aeef2d3fa8)) + - Release git-features v0.22.4, git-url v0.8.0, safety bump 4 crates ([`1d4600a`](https://github.com/Byron/gitoxide/commit/1d4600ae51475c2e225f96c16c41e2c4a2b3f2aa)) + - Merge branch 'main' into filter-refs-by-spec ([`1f6e5ab`](https://github.com/Byron/gitoxide/commit/1f6e5ab15f5fd8d23719b13e6aea59cd231ac0fe)) + - Merge branch 'fix-522' ([`5869e9f`](https://github.com/Byron/gitoxide/commit/5869e9ff2508d5a93c07635277af8764fcb57713)) + - Release git-hash v0.9.9 ([`da0716f`](https://github.com/Byron/gitoxide/commit/da0716f8c27b4f29cfff0e5ce7fcb3d7240f4aeb)) + - Merge branch 'main' into index-from-tree ([`bc64b96`](https://github.com/Byron/gitoxide/commit/bc64b96a2ec781c72d1d4daad38aa7fb8b74f99b)) + - Release git-path v0.4.2, git-config-value v0.7.0 ([`c48fb31`](https://github.com/Byron/gitoxide/commit/c48fb3107d29f9a06868b0c6de40567063a656d1)) + - Merge branch 'main' into filter-refs-by-spec ([`cef0b51`](https://github.com/Byron/gitoxide/commit/cef0b51ade2a3301fa09ede7a425aa1fe3527e78)) + - Release git-object v0.20.3, git-ref v0.15.4, git-config v0.7.1, git-diff v0.18.0, git-traverse v0.16.3, git-pack v0.22.0, git-odb v0.32.0, git-url v0.7.3, git-transport v0.19.3, git-protocol v0.19.1, git-refspec v0.1.1, git-repository v0.23.0, safety bump 6 crates ([`85a3bed`](https://github.com/Byron/gitoxide/commit/85a3bedd68d2e5f36592a2f691c977dc55298279)) + - Adjust to changes in `git-diff` ([`54954ee`](https://github.com/Byron/gitoxide/commit/54954ee5022a900f9f97baec63e9a073eca514e9)) + - Release git-features v0.22.3, git-revision v0.4.4 ([`c2660e2`](https://github.com/Byron/gitoxide/commit/c2660e2503323531ba02519eaa51124ee22fec51)) + - Merge branch 'main' into filter-refs-by-spec ([`cfa1440`](https://github.com/Byron/gitoxide/commit/cfa144031dbcac2707ab0cec012bc35e78f9c475)) + - Release git-date v0.0.5, git-hash v0.9.8, git-features v0.22.2, git-actor v0.11.3, git-glob v0.3.2, git-quote v0.2.1, git-attributes v0.3.2, git-tempfile v2.0.4, git-lock v2.1.1, git-validate v0.5.5, git-object v0.20.2, git-ref v0.15.2, git-sec v0.3.1, git-config v0.7.0, git-credentials v0.4.0, git-diff v0.17.2, git-discover v0.4.1, git-bitmap v0.1.2, git-index v0.4.2, git-mailmap v0.3.2, git-chunk v0.3.1, git-traverse v0.16.2, git-pack v0.21.2, git-odb v0.31.2, git-packetline v0.12.7, git-url v0.7.2, git-transport v0.19.2, git-protocol v0.19.0, git-revision v0.4.2, git-refspec v0.1.0, git-worktree v0.4.2, git-repository v0.22.0, safety bump 4 crates ([`4974eca`](https://github.com/Byron/gitoxide/commit/4974eca96d525d1ee4f8cad79bb713af7a18bf9d)) + - Release git-path v0.4.1 ([`5e82346`](https://github.com/Byron/gitoxide/commit/5e823462b3deb904f5d6154a7bf114cef1988224)) + - Merge branch 'main' into remote-ls-refs ([`e2ee3de`](https://github.com/Byron/gitoxide/commit/e2ee3ded97e5c449933712883535b30d151c7c78)) + - Merge branch 'docsrs-show-features' ([`31c2351`](https://github.com/Byron/gitoxide/commit/31c235140cad212d16a56195763fbddd971d87ce)) + - Use docsrs feature in code to show what is feature-gated automatically on docs.rs ([`b1c40b0`](https://github.com/Byron/gitoxide/commit/b1c40b0364ef092cd52d03b34f491b254816b18d)) + - Uniformize deny attributes ([`f7f136d`](https://github.com/Byron/gitoxide/commit/f7f136dbe4f86e7dee1d54835c420ec07c96cd78)) + - Pass --cfg docsrs when compiling for https://docs.rs ([`5176771`](https://github.com/Byron/gitoxide/commit/517677147f1c17304c62cf97a1dd09f232ebf5db)) + - Merge branch 'main' into remote-ls-refs ([`bd5f3e8`](https://github.com/Byron/gitoxide/commit/bd5f3e8db7e0bb4abfb7b0f79f585ab82c3a14ab)) + - Release git-date v0.0.3, git-actor v0.11.1, git-attributes v0.3.1, git-tempfile v2.0.3, git-object v0.20.1, git-ref v0.15.1, git-config v0.6.1, git-diff v0.17.1, git-discover v0.4.0, git-bitmap v0.1.1, git-index v0.4.1, git-mailmap v0.3.1, git-traverse v0.16.1, git-pack v0.21.1, git-odb v0.31.1, git-packetline v0.12.6, git-url v0.7.1, git-transport v0.19.1, git-protocol v0.18.1, git-revision v0.4.0, git-worktree v0.4.1, git-repository v0.21.0, safety bump 5 crates ([`c96473d`](https://github.com/Byron/gitoxide/commit/c96473dce21c3464aacbc0a62d520c1a33172611)) + - Prepare changelogs prior to reelase ([`c06ae1c`](https://github.com/Byron/gitoxide/commit/c06ae1c606b6af9c2a12021103d99c2810750d60)) + - Release git-hash v0.9.7, git-features v0.22.1 ([`232784a`](https://github.com/Byron/gitoxide/commit/232784a59ded3e8016e4257c7e146ad385cdd64a)) + - Merge branch 'main' into write-index-v2 ([`a938986`](https://github.com/Byron/gitoxide/commit/a938986877302c197d1aed087594c5605416fe5f)) + - Merge branch 'main' into remote-ls-refs ([`de61c4d`](https://github.com/Byron/gitoxide/commit/de61c4db7855d6925d66961f62ae3d12cc4acf78)) + - Thanks clippy ([`4bd747c`](https://github.com/Byron/gitoxide/commit/4bd747cb3e126fe5b1d540270cfbd731cffd42ef)) + - Merge branch 'rev-parse-delegate' ([`2f506c7`](https://github.com/Byron/gitoxide/commit/2f506c7c2988477b0f97d272a9ac9ed47b236457)) + - Merge pull request #2 from SidneyDouw/main ([`ce885ad`](https://github.com/Byron/gitoxide/commit/ce885ad4c3324c09c83751c32e014f246c748766)) + - Merge branch 'Byron:main' into main ([`9b9ea02`](https://github.com/Byron/gitoxide/commit/9b9ea0275f8ff5862f24cf5a4ca53bb1cd610709)) + - Merge branch 'main' into rev-parse-delegate ([`6da8250`](https://github.com/Byron/gitoxide/commit/6da82507588d3bc849217c11d9a1d398b67f2ed6)) + - Merge branch 'main' into pathspec ([`7b61506`](https://github.com/Byron/gitoxide/commit/7b615060712565f515515e35a3e8346278ad770c)) + - Merge branch 'kianmeng-fix-typos' ([`4e7b343`](https://github.com/Byron/gitoxide/commit/4e7b34349c0a01ad8686bbb4eb987e9338259d9c)) + - Fix typos ([`e9fcb70`](https://github.com/Byron/gitoxide/commit/e9fcb70e429edb2974afa3f58d181f3ef14c3da3)) + - Release git-config v0.6.0, git-credentials v0.3.0, git-diff v0.17.0, git-discover v0.3.0, git-index v0.4.0, git-mailmap v0.3.0, git-traverse v0.16.0, git-pack v0.21.0, git-odb v0.31.0, git-url v0.7.0, git-transport v0.19.0, git-protocol v0.18.0, git-revision v0.3.0, git-worktree v0.4.0, git-repository v0.20.0, git-commitgraph v0.8.0, gitoxide-core v0.15.0, gitoxide v0.13.0 ([`aa639d8`](https://github.com/Byron/gitoxide/commit/aa639d8c43f3098cc4a5b50614c5ae94a8156928)) + - Release git-hash v0.9.6, git-features v0.22.0, git-date v0.0.2, git-actor v0.11.0, git-glob v0.3.1, git-path v0.4.0, git-attributes v0.3.0, git-tempfile v2.0.2, git-object v0.20.0, git-ref v0.15.0, git-sec v0.3.0, git-config v0.6.0, git-credentials v0.3.0, git-diff v0.17.0, git-discover v0.3.0, git-index v0.4.0, git-mailmap v0.3.0, git-traverse v0.16.0, git-pack v0.21.0, git-odb v0.31.0, git-url v0.7.0, git-transport v0.19.0, git-protocol v0.18.0, git-revision v0.3.0, git-worktree v0.4.0, git-repository v0.20.0, git-commitgraph v0.8.0, gitoxide-core v0.15.0, gitoxide v0.13.0, safety bump 22 crates ([`4737b1e`](https://github.com/Byron/gitoxide/commit/4737b1eea1d4c9a8d5a69fb63ecac5aa5d378ae5)) + - Prepare changelog prior to release ([`3c50625`](https://github.com/Byron/gitoxide/commit/3c50625fa51350ec885b0f38ec9e92f9444df0f9)) + - Merge pull request #1 from Byron/main ([`085e76b`](https://github.com/Byron/gitoxide/commit/085e76b121291ed9bd324139105d2bd4117bedf8)) + - Assure document-features are available in all 'usable' and 'early' crates ([`238581c`](https://github.com/Byron/gitoxide/commit/238581cc46c7288691eed37dc7de5069e3d86721)) + - Merge branch 'main' into pathspec ([`89ea12b`](https://github.com/Byron/gitoxide/commit/89ea12b558bcc056b892193ee8fb44b8664b5da4)) + - Merge branch 'main' into cont_include_if ([`41ea8ba`](https://github.com/Byron/gitoxide/commit/41ea8ba78e74f5c988148367386a1f4f304cb951)) + - Release git-path v0.3.0, safety bump 14 crates ([`400c9be`](https://github.com/Byron/gitoxide/commit/400c9bec49e4ec5351dc9357b246e7677a63ea35)) + - Release git-date v0.0.1, git-hash v0.9.5, git-features v0.21.1, git-actor v0.10.1, git-path v0.2.0, git-attributes v0.2.0, git-ref v0.14.0, git-sec v0.2.0, git-config v0.5.0, git-credentials v0.2.0, git-discover v0.2.0, git-pack v0.20.0, git-odb v0.30.0, git-url v0.6.0, git-transport v0.18.0, git-protocol v0.17.0, git-revision v0.2.1, git-worktree v0.3.0, git-repository v0.19.0, safety bump 13 crates ([`a417177`](https://github.com/Byron/gitoxide/commit/a41717712578f590f04a33d27adaa63171f25267)) + - Update changelogs prior to release ([`bb424f5`](https://github.com/Byron/gitoxide/commit/bb424f51068b8a8e762696890a55ab48900ab980)) + - Merge branch 'main' into SidneyDouw-pathspec ([`a22b1d8`](https://github.com/Byron/gitoxide/commit/a22b1d88a21311d44509018729c3ef1936cf052a)) + - Release git-path v0.1.2, git-sec v0.1.1, git-config v0.4.0, git-discover v0.1.1, git-pack v0.19.1, git-repository v0.18.0, cargo-smart-release v0.10.0, safety bump 2 crates ([`ceb6dff`](https://github.com/Byron/gitoxide/commit/ceb6dff13362a2b4318a551893217c1d11643b9f)) + - Thanks clippy ([`409a95b`](https://github.com/Byron/gitoxide/commit/409a95b6505db8568bfea24bc62c92640a5c3cbf)) + - Merge branch 'davidkna-discover-ceiling' ([`66944ba`](https://github.com/Byron/gitoxide/commit/66944ba986114ece2d3b31440c721d0e84b4f267)) + - Thanks clippy ([`4979d20`](https://github.com/Byron/gitoxide/commit/4979d2071cbd1a98f3d81aacd60dd99f07d3f746)) + - Merge branch 'main' into git_includeif ([`598c853`](https://github.com/Byron/gitoxide/commit/598c853087fcf8f77299aa5b9803bcec705c0cd0)) + - Release git-ref v0.13.0, git-discover v0.1.0, git-index v0.3.0, git-mailmap v0.2.0, git-traverse v0.15.0, git-pack v0.19.0, git-odb v0.29.0, git-packetline v0.12.5, git-url v0.5.0, git-transport v0.17.0, git-protocol v0.16.0, git-revision v0.2.0, git-worktree v0.2.0, git-repository v0.17.0 ([`349c590`](https://github.com/Byron/gitoxide/commit/349c5904b0dac350838a896759d51576b66880a7)) + - Release git-hash v0.9.4, git-features v0.21.0, git-actor v0.10.0, git-glob v0.3.0, git-path v0.1.1, git-attributes v0.1.0, git-sec v0.1.0, git-config v0.3.0, git-credentials v0.1.0, git-validate v0.5.4, git-object v0.19.0, git-diff v0.16.0, git-lock v2.1.0, git-ref v0.13.0, git-discover v0.1.0, git-index v0.3.0, git-mailmap v0.2.0, git-traverse v0.15.0, git-pack v0.19.0, git-odb v0.29.0, git-packetline v0.12.5, git-url v0.5.0, git-transport v0.17.0, git-protocol v0.16.0, git-revision v0.2.0, git-worktree v0.2.0, git-repository v0.17.0, safety bump 20 crates ([`654cf39`](https://github.com/Byron/gitoxide/commit/654cf39c92d5aa4c8d542a6cadf13d4acef6a78e)) + - Merge branch 'refs-and-worktrees' ([`8131227`](https://github.com/Byron/gitoxide/commit/8131227ddff6f36919b6a0f7b33792ebde0f8ae9)) + - Cleanup ([`199583a`](https://github.com/Byron/gitoxide/commit/199583abbec1ff58204c3cf90dd8be88730fc112)) + - Merge branch 'main' into msrv-for-windows ([`7cb1972`](https://github.com/Byron/gitoxide/commit/7cb19729133325bdfacedf44cdc0500cbcf36684)) + - Merge branch 'worktree-stack' ([`98da8ba`](https://github.com/Byron/gitoxide/commit/98da8ba52cef8ec27f705fcbc84773e5bacc4e10)) + - Merge branch 'main' into repo-status ([`0eb2372`](https://github.com/Byron/gitoxide/commit/0eb23721dca78f6e6bf864c5c3a3e44df8b419f0)) + - Merge branch 'test-archive-support' ([`350df01`](https://github.com/Byron/gitoxide/commit/350df01042d6ca8b93f8737fa101e69b50535a0f)) + - Fix clippy - many false positives this time ([`045e6fa`](https://github.com/Byron/gitoxide/commit/045e6fae17077555c3e115992905c8046f2c5d0b)) + - Fix clippy - many false positives this time ([`099bd5b`](https://github.com/Byron/gitoxide/commit/099bd5b86fb80b26a73863b80ad60a0394458b6d)) + - Release git-config v0.2.1, git-diff v0.15.0, git-traverse v0.14.0, git-pack v0.18.0, git-odb v0.28.0, git-ref v0.12.1, git-revision v0.1.0, git-repository v0.16.0, gitoxide-core v0.14.0, gitoxide v0.12.0, safety bump 6 crates ([`b612021`](https://github.com/Byron/gitoxide/commit/b612021683ba709b693bd48aef3e2e3c2f5b9ead)) + - Remove deprecated compound and linked object databases ([`8c5ae77`](https://github.com/Byron/gitoxide/commit/8c5ae77f06a64c57df9a9ad1190266896a223dbe)) + - Release git-diff v0.14.0, git-bitmap v0.1.0, git-index v0.2.0, git-tempfile v2.0.1, git-lock v2.0.0, git-mailmap v0.1.0, git-traverse v0.13.0, git-pack v0.17.0, git-quote v0.2.0, git-odb v0.27.0, git-packetline v0.12.4, git-url v0.4.0, git-transport v0.16.0, git-protocol v0.15.0, git-ref v0.12.0, git-worktree v0.1.0, git-repository v0.15.0, cargo-smart-release v0.9.0, safety bump 5 crates ([`e58dc30`](https://github.com/Byron/gitoxide/commit/e58dc3084cf17a9f618ae3a6554a7323e44428bf)) + - Merge branch 'for-onefetch' ([`8e5cb65`](https://github.com/Byron/gitoxide/commit/8e5cb65da75036a13ed469334e7ae6c527d9fff6)) + - Release git-hash v0.9.3, git-features v0.20.0, git-config v0.2.0, safety bump 12 crates ([`f0cbb24`](https://github.com/Byron/gitoxide/commit/f0cbb24b2e3d8f028be0e773f9da530da2656257)) + - Make fmt ([`7cf3545`](https://github.com/Byron/gitoxide/commit/7cf354509b545f7e7c99e159b5989ddfbe86273d)) + - Remove unnecessary unsafe by using `chunks_mut()` ([`9b9f10a`](https://github.com/Byron/gitoxide/commit/9b9f10ad862b5e097c836c51df1eb98607df5ae1)) + - Merge branch 'short-id' ([`5849d5b`](https://github.com/Byron/gitoxide/commit/5849d5b326b83f98a16cf1d956c720c7f0fd4445)) + - Thanks clippy ([`48be1ee`](https://github.com/Byron/gitoxide/commit/48be1ee666a88f1416896c5e8073d4d86dae7b8c)) + - Merge branch 'svetli-n-path_value' ([`e8383ca`](https://github.com/Byron/gitoxide/commit/e8383caf6db211beb57d70019fe4ad13ce9066ee)) + - Release git-tempfile v2.0.0, safety bump 6 crates ([`90b1c42`](https://github.com/Byron/gitoxide/commit/90b1c42d5487904a9f329362d185b035d0ddb975)) + - Upgrade document-features ([`c35e62e`](https://github.com/Byron/gitoxide/commit/c35e62e0da9ac1f7dcb863f5f9c69108c728d32e)) + - Merge branch 'unify-path-encoding' ([`566ff8a`](https://github.com/Byron/gitoxide/commit/566ff8a3597b889899d41ca15e5b9af7e05f1a4b)) + - Upgrade dashmap to 5.1.0 (with security fix) ([`2520945`](https://github.com/Byron/gitoxide/commit/25209454d3f7e27e12e8ddca92e43b1ff01d58aa)) + - Release git-pack v0.16.1 ([`d4a8f9f`](https://github.com/Byron/gitoxide/commit/d4a8f9f73bb829bcc83fa68b6b5a7495fcba6b19)) + - Release git-object v0.17.1, git-pack v0.16.1 ([`e959af8`](https://github.com/Byron/gitoxide/commit/e959af83fa92e8ed87edae6e2d1c6a797964c056)) + - Release git-hash v0.9.2, git-object v0.17.1, git-pack v0.16.1 ([`0db19b8`](https://github.com/Byron/gitoxide/commit/0db19b8deaf11a4d4cbc03fa3ae40eea104bc302)) + - Update changelogs prior to git-pack release ([`b7e3a4a`](https://github.com/Byron/gitoxide/commit/b7e3a4afdd6417a38aadad35c7f584617e7b47fa)) + - Downgrade dashmap to 4.0 to avoid unsoundness. ([`d9451e8`](https://github.com/Byron/gitoxide/commit/d9451e8d7fc39c252042f9d2447061262c16ae7a)) + - Release git-diff v0.13.0, git-tempfile v1.0.4, git-chunk v0.3.0, git-traverse v0.12.0, git-pack v0.16.0, git-odb v0.26.0, git-packetline v0.12.3, git-url v0.3.5, git-transport v0.15.0, git-protocol v0.14.0, git-ref v0.11.0, git-repository v0.14.0, cargo-smart-release v0.8.0 ([`1b76119`](https://github.com/Byron/gitoxide/commit/1b76119259b8168aeb99cbbec233f7ddaa2d7d2c)) + - Release git-actor v0.8.0, git-config v0.1.10, git-object v0.17.0, git-diff v0.13.0, git-tempfile v1.0.4, git-chunk v0.3.0, git-traverse v0.12.0, git-pack v0.16.0, git-odb v0.26.0, git-packetline v0.12.3, git-url v0.3.5, git-transport v0.15.0, git-protocol v0.14.0, git-ref v0.11.0, git-repository v0.14.0, cargo-smart-release v0.8.0 ([`8f57c29`](https://github.com/Byron/gitoxide/commit/8f57c297d7d6ed68cf51415ea7ede4bf9263326e)) + - Release git-features v0.19.1, git-actor v0.8.0, git-config v0.1.10, git-object v0.17.0, git-diff v0.13.0, git-tempfile v1.0.4, git-chunk v0.3.0, git-traverse v0.12.0, git-pack v0.16.0, git-odb v0.26.0, git-packetline v0.12.3, git-url v0.3.5, git-transport v0.15.0, git-protocol v0.14.0, git-ref v0.11.0, git-repository v0.14.0, cargo-smart-release v0.8.0 ([`d78aab7`](https://github.com/Byron/gitoxide/commit/d78aab7b9c4b431d437ac70a0ef96263acb64e46)) + - Release git-hash v0.9.1, git-features v0.19.1, git-actor v0.8.0, git-config v0.1.10, git-object v0.17.0, git-diff v0.13.0, git-tempfile v1.0.4, git-chunk v0.3.0, git-traverse v0.12.0, git-pack v0.16.0, git-odb v0.26.0, git-packetline v0.12.3, git-url v0.3.5, git-transport v0.15.0, git-protocol v0.14.0, git-ref v0.11.0, git-repository v0.14.0, cargo-smart-release v0.8.0, safety bump 4 crates ([`373cbc8`](https://github.com/Byron/gitoxide/commit/373cbc877f7ad60dac682e57c52a7b90f108ebe3)) + - Fix git-pack changelog to be stable ([`fd5b616`](https://github.com/Byron/gitoxide/commit/fd5b616d6ce8f353bd96b2c4994af9ba9c878b3e)) + - Prepar changelogs for cargo-smart-release release ([`8900d69`](https://github.com/Byron/gitoxide/commit/8900d699226eb0995be70d66249827ce348261df)) + - Support Rust 1.52 ([`42e0487`](https://github.com/Byron/gitoxide/commit/42e0487286c1f745837c0ce337ed7c9d86b14516)) + - Release git-bitmap v0.0.1, git-hash v0.9.0, git-features v0.19.0, git-index v0.1.0, safety bump 9 crates ([`4624725`](https://github.com/Byron/gitoxide/commit/4624725f54a34dd6b35d3632fb3516965922f60a)) + - Thanks clippy ([`d8925f5`](https://github.com/Byron/gitoxide/commit/d8925f5bd7ac8ef2c98f0e57a1373e5ffba8ce23)) + - Thanks clippy ([`5a68d2f`](https://github.com/Byron/gitoxide/commit/5a68d2feffc551ad5f07e90efb2307e966d2636b)) + - Thanks clippy ([`1e051dc`](https://github.com/Byron/gitoxide/commit/1e051dc23fb298b0bfe3e9ffb85a95ecb9c0f93f)) + - Don't try to short-cut detection of large offsets when writing index files ([`ba92cc0`](https://github.com/Byron/gitoxide/commit/ba92cc09ba41fe4c9a9097bfeb8d18016408fcdf)) + - Refactor ([`e7fbd9f`](https://github.com/Byron/gitoxide/commit/e7fbd9f3700496ad7bb7e71226c4d25429f0ccd5)) + - Merge branch 'use-midx-in-store' ([`338521b`](https://github.com/Byron/gitoxide/commit/338521b0443b9dc1007581de42ef6a950f6e0bbf)) + - Thanks clippy ([`533a532`](https://github.com/Byron/gitoxide/commit/533a532c86bcf0dae27558e66b1a5cd2e52983df)) + - Thanks clippy ([`7bd3ad3`](https://github.com/Byron/gitoxide/commit/7bd3ad3ab9f17eaf94490bea04a9b1297fa5fe64)) + - Release git-chunk v0.2.0, safety bump 4 crates ([`b792fab`](https://github.com/Byron/gitoxide/commit/b792fabf9f5f93ab906ac5a5bb3e4f01c179290a)) + - Thanks clippy ([`35cf46f`](https://github.com/Byron/gitoxide/commit/35cf46f87ecc42cf033ca001acf1b5918b3fea1b)) + - Refactor ([`0032223`](https://github.com/Byron/gitoxide/commit/003222365bb2f8ce7d915240db6ff84ccbca6db4)) + - Merge branch 'sync-db-draft' ([`7d2e20c`](https://github.com/Byron/gitoxide/commit/7d2e20c6fedc2c7e71a307d8d072412fa847a4aa)) + - Thanks clippy ([`4ca9e07`](https://github.com/Byron/gitoxide/commit/4ca9e07c7ac062d48d64ad7b516274e32dbc51c6)) + - Make fmt ([`066f3ff`](https://github.com/Byron/gitoxide/commit/066f3ffb8740f242c1b03e680c3c5c1a0e4c36c3)) + - Thanks clippy ([`7dd2313`](https://github.com/Byron/gitoxide/commit/7dd2313d980fe7c058319ae66d313b3097e3ae5f)) + - Release git-actor v0.7.0, git-config v0.1.9, git-object v0.16.0, git-diff v0.12.0, git-traverse v0.11.0, git-pack v0.15.0, git-odb v0.25.0, git-packetline v0.12.2, git-transport v0.14.0, git-protocol v0.13.0, git-ref v0.10.0, git-repository v0.13.0, cargo-smart-release v0.7.0 ([`d3f9227`](https://github.com/Byron/gitoxide/commit/d3f922781a81e8fbb81aa47afdbe9afeb06d666b)) + - Release git-features v0.18.0, git-actor v0.7.0, git-config v0.1.9, git-object v0.16.0, git-diff v0.12.0, git-traverse v0.11.0, git-pack v0.15.0, git-odb v0.25.0, git-packetline v0.12.2, git-transport v0.14.0, git-protocol v0.13.0, git-ref v0.10.0, git-repository v0.13.0, cargo-smart-release v0.7.0, safety bump 12 crates ([`acd3737`](https://github.com/Byron/gitoxide/commit/acd37371dcd92ebac3d1f039224d02f2b4e9fa0b)) + - Adjust changelogs prior to release ([`ec38950`](https://github.com/Byron/gitoxide/commit/ec3895005d141abe79764eaff7c0f04153e38d73)) + - Merge branch 'pack-consistency' ([`5982406`](https://github.com/Byron/gitoxide/commit/5982406b4e1b26fd383d9ec21a3cf652ec8ab25f)) + - Merge branch 'git-loose-objects' of https://github.com/xmo-odoo/gitoxide into xmo-odoo-git-loose-objects ([`ee737cd`](https://github.com/Byron/gitoxide/commit/ee737cd237ad70bf9f2c5e0d3e4557909e495bca)) + - Release git-config v0.1.8, git-object v0.15.1, git-diff v0.11.1, git-traverse v0.10.1, git-pack v0.14.0, git-odb v0.24.0, git-packetline v0.12.1, git-transport v0.13.1, git-protocol v0.12.1, git-ref v0.9.1, git-repository v0.12.0, cargo-smart-release v0.6.0 ([`f606fa9`](https://github.com/Byron/gitoxide/commit/f606fa9a0ca338534252df8921cd5e9d3875bf94)) + - Adjusting changelogs prior to release of git-config v0.1.8, git-object v0.15.1, git-diff v0.11.1, git-traverse v0.10.1, git-pack v0.14.0, git-odb v0.24.0, git-packetline v0.12.1, git-transport v0.13.1, git-protocol v0.12.1, git-ref v0.9.1, git-repository v0.12.0, cargo-smart-release v0.6.0, safety bump 5 crates ([`39b40c8`](https://github.com/Byron/gitoxide/commit/39b40c8c3691029cc146b893fa0d8d25d56d0819)) + - Adjust changelogs prior to git-pack release ([`ac8015d`](https://github.com/Byron/gitoxide/commit/ac8015de710142c2bedd0e4188e872e0cf1ceccc)) + - Move "loose object header" ser/de to git-object ([`3d1565a`](https://github.com/Byron/gitoxide/commit/3d1565acfc336baf6487edccefd72d0226141a08)) + - Release git-hash v0.8.0, git-features v0.17.0, git-actor v0.6.0, git-object v0.15.0, git-diff v0.11.0, git-traverse v0.10.0, git-pack v0.13.0, git-odb v0.23.0, git-packetline v0.12.0, git-transport v0.13.0, git-protocol v0.12.0, git-ref v0.9.0, git-repository v0.11.0, git-commitgraph v0.6.0, gitoxide-core v0.12.0, gitoxide v0.10.0, cargo-smart-release v0.5.0, safety bump 16 crates ([`0e02953`](https://github.com/Byron/gitoxide/commit/0e029537a7f6242d02ccf7e63d8d92f5246e6c5e)) + - Release git-hash v0.7.0, git-features v0.16.5, git-actor v0.5.3, git-config v0.1.7, git-validate v0.5.3, git-object v0.14.1, git-diff v0.10.0, git-tempfile v1.0.3, git-lock v1.0.1, git-traverse v0.9.0, git-pack v0.12.0, git-odb v0.22.0, git-packetline v0.11.0, git-url v0.3.4, git-transport v0.12.0, git-protocol v0.11.0, git-ref v0.8.0, git-repository v0.10.0, cargo-smart-release v0.4.0 ([`59ffbd9`](https://github.com/Byron/gitoxide/commit/59ffbd9f15583c8248b7f48b3f55ec6faffe7cfe)) + - Adjusting changelogs prior to release of git-hash v0.7.0, git-features v0.16.5, git-actor v0.5.3, git-validate v0.5.3, git-object v0.14.1, git-diff v0.10.0, git-tempfile v1.0.3, git-lock v1.0.1, git-traverse v0.9.0, git-pack v0.12.0, git-odb v0.22.0, git-packetline v0.11.0, git-url v0.3.4, git-transport v0.12.0, git-protocol v0.11.0, git-ref v0.8.0, git-repository v0.10.0, cargo-smart-release v0.4.0, safety bump 3 crates ([`a474395`](https://github.com/Byron/gitoxide/commit/a47439590e36b1cb8b516b6053fd5cbfc42efed7)) + - Make fmt, but now it picked up some parts that usually don't get altered… ([`01f7b72`](https://github.com/Byron/gitoxide/commit/01f7b729337bd2c99498321c479a9a13b1858e3e)) + - Update changelogs just for fun ([`21541b3`](https://github.com/Byron/gitoxide/commit/21541b3301de1e053fc0e84373be60d2162fbaae)) + - Merge branch 'changelog-generation' ([`bf0106e`](https://github.com/Byron/gitoxide/commit/bf0106ea21734d4e59d190b424c22743c22da966)) + - Bump git-traverse v0.9.0, safety bump 8 crates ([`d39fabb`](https://github.com/Byron/gitoxide/commit/d39fabb8757369aa19452a457f610fe21dc13a14)) + - Bump git-pack v0.11.0 ([`5ae6ff5`](https://github.com/Byron/gitoxide/commit/5ae6ff52cd2cd1ccd1e26bb987c154eb19603696)) + - Bump git-object v0.14.0 ([`d4fc81f`](https://github.com/Byron/gitoxide/commit/d4fc81f6390443f8c8561d91ac27ea4a6318fb62)) + - [repository #164] generic write_object() ([`c569f83`](https://github.com/Byron/gitoxide/commit/c569f83363489dde03c8b9cd01e75d35f5e04dbc)) + - Merge branch 'repository-integration' ([`49f5453`](https://github.com/Byron/gitoxide/commit/49f5453629646ac24d752f53c532e5f67eb09374)) + - Bump git-pack v0.10.0 ([`e5e3c80`](https://github.com/Byron/gitoxide/commit/e5e3c8024e1c2e5e90cee83abbdae41d58eee156)) + - [repository #190] first shot at ancestor iteration… ([`85f1a48`](https://github.com/Byron/gitoxide/commit/85f1a48ea39f3b224e8d0ba3728dd75e03a6edc3)) + - Bump git-hash v0.6.0 ([`6efd90d`](https://github.com/Byron/gitoxide/commit/6efd90db54f7f7441b76159dba3be80c15657a3d)) + - [repository #185] refactor ([`7604935`](https://github.com/Byron/gitoxide/commit/7604935b12eacb26a98bedc5f77636b5583629a5)) + - [repository #174] adjust various changelogs ([`081faf5`](https://github.com/Byron/gitoxide/commit/081faf5c3a21b34b7068b44d8206fb5770c392f5)) + - [pack #179] refactor ([`76e66d1`](https://github.com/Byron/gitoxide/commit/76e66d1b9d24bb25a9f681d9612e52c8ccd60e2c)) + - [pack #179] move Tree traversal cache private ([`34e45d7`](https://github.com/Byron/gitoxide/commit/34e45d745cb8756831c56dc441695a25cd0069a9)) + - [pack #179] refactor ([`5a3677d`](https://github.com/Byron/gitoxide/commit/5a3677dd3f3dcab26a3d9270b6184fd0fe18c54e)) + - [pack #179] refactor bundle ([`420dca2`](https://github.com/Byron/gitoxide/commit/420dca29bccca6e7d759880d8342f23b33eead0d)) + - [pack #179] fix docs ([`7ad7a44`](https://github.com/Byron/gitoxide/commit/7ad7a4428d0e38f2ff776f7efab6996505d2bba2)) + - [pack #179] refactor ([`ab6554b`](https://github.com/Byron/gitoxide/commit/ab6554b0cd5838f1ea4e82f6b5019798288076fa)) + - [pack #179] refactor ([`620d8a5`](https://github.com/Byron/gitoxide/commit/620d8a54db5cd8367ec85c8b837cab710c509e3e)) + - [pack #179] add changelog ([`2102569`](https://github.com/Byron/gitoxide/commit/210256932a338038adb55c5475d8f90560aa4c12)) + - Bump git-traverse v0.8.0 ([`54f3541`](https://github.com/Byron/gitoxide/commit/54f3541f1448a8afa044d3958fa1be5b074e4445)) + - Bump git-diff v0.9.0 ([`2e2e798`](https://github.com/Byron/gitoxide/commit/2e2e7983178b3af7e5684995de68ed5d020927ec)) + - [object #177] cleanup CommitRefIter imports and git_object::Error ([`058f68a`](https://github.com/Byron/gitoxide/commit/058f68a9e1cd79fd5a2a1235da42358bc92ed255)) + - [object #177] dissolve 'immutable' module ([`70e11c2`](https://github.com/Byron/gitoxide/commit/70e11c21b0637cd250f54381d5490e9976880ad9)) + - [object #177] commit::RefIter -> CommitRefIter ([`e603306`](https://github.com/Byron/gitoxide/commit/e603306e81f392af97aa5afd232653de56bf3ce9)) + - [object #177] migrate immutable::commit into crate::commit ([`45d3934`](https://github.com/Byron/gitoxide/commit/45d393438eac2c7ecd47670922437dd0de4cd69b)) + - [object #177] tag::RefIter -> TagRefIter ([`28587c6`](https://github.com/Byron/gitoxide/commit/28587c691eb74e5cb097afb2b63f9d9e2561c45d)) + - [object #177] into_mutable() -> into_owned() ([`7e701ce`](https://github.com/Byron/gitoxide/commit/7e701ce49efe5d40327770a988aae88692d88219)) + - [object #177] fix docs ([`25d8e7b`](https://github.com/Byron/gitoxide/commit/25d8e7b1862bd05489359b162a32c6ad45ecdf9a)) + - [object #177] move mutable objects to crate::* ([`c551c02`](https://github.com/Byron/gitoxide/commit/c551c0236c64f3237cb9be7f35159f753d4b871f)) + - [object #177] migrate immutable::tree to crate::tree ([`fa5cd06`](https://github.com/Byron/gitoxide/commit/fa5cd0648d5c855060ab2b75ee933851987c2dcf)) + - [object #177] fix docs ([`07be661`](https://github.com/Byron/gitoxide/commit/07be6611d1742633815566443f71eef8b85ad5c0)) + - [object #177] move immutable::* to crate::*Ref, start `iter` adjustments ([`461dc53`](https://github.com/Byron/gitoxide/commit/461dc53ba3bc07d55fdb4aad7570ba9176a8b360)) + - [object #177] rename immutable::* to immutable::*Ref ([`6deb012`](https://github.com/Byron/gitoxide/commit/6deb01291fb382b7fb9206682e319afa81bacc05)) + - Release git-object v0.13.0 ([`708fc5a`](https://github.com/Byron/gitoxide/commit/708fc5abd8af4dd7459f388c7092bf35915c6662)) + - [pack #172] A note about empty packs in Bundle writer ([`09a777f`](https://github.com/Byron/gitoxide/commit/09a777f1da5e792c5eb4c8ff9e83504ad8d19c5c)) + - Merge pull request #172 from mellowagain/main ([`61aebbf`](https://github.com/Byron/gitoxide/commit/61aebbfff02eb87e0e8c49438a093a21b1134baf)) + - [actor #173] fix docs ([`2d7956a`](https://github.com/Byron/gitoxide/commit/2d7956a22511d73b767e443dac21b60e93f286dd)) + - [actor #173] rename immutable::Signature to SignatureRef! ([`96461ac`](https://github.com/Byron/gitoxide/commit/96461ace776d6b351b313d4f2697f2d95b9e196e)) + - Release git-tempfile v1.0.0 ([`1238535`](https://github.com/Byron/gitoxide/commit/123853539dc30ddea2d822ab177ee09b191bdf1b)) + - Merge branch 'main' into stability ([`11bae43`](https://github.com/Byron/gitoxide/commit/11bae437e473fef6ed09c178d54ad11eee001b1d)) + - Cleanup imports ([`e669303`](https://github.com/Byron/gitoxide/commit/e6693032f1391416fd704c21617051ddfb862a3a)) + - Allow creation of empty indices ([`d122fc7`](https://github.com/Byron/gitoxide/commit/d122fc79cc9b9a52a2817bdd46d3215c10e61129)) + - [pack #170] there can only be one ([`dce4f97`](https://github.com/Byron/gitoxide/commit/dce4f97a84aa6a73e31e7397501cfce27241c5b8)) + - [pack #170] clru allows for free lists, reducing allocation pressure... ([`4d820d2`](https://github.com/Byron/gitoxide/commit/4d820d2f94dc3afc062bbd25e969c87410212c3a)) + - [pack #170] basic progress for resolution ([`ada0b96`](https://github.com/Byron/gitoxide/commit/ada0b96e3707c06d7d6f7e4002907e12b45f7419)) + - [pack #170] Basic entry resolution without progress ([`7461f31`](https://github.com/Byron/gitoxide/commit/7461f31f03d67ecc9fdf398adf3cb6d4eb365412)) + - [pack #170] first step towards resolving in multi-threaded mode… ([`f3c21f9`](https://github.com/Byron/gitoxide/commit/f3c21f99594ab4080b8aa1ffed9ea8a33e18fabd)) + - [pack #170] Don't double-lookup trees during traversal… ([`7b06829`](https://github.com/Byron/gitoxide/commit/7b068296fe5ca10af212d8fe2662940188b7359c)) + - Revert "[pack #67] Don't pre-fetch packed objects during counting" ([`811bb54`](https://github.com/Byron/gitoxide/commit/811bb54991636f7e517087b62cf0c8c8cc2ad9e6)) + - [pack #67] Don't pre-fetch packed objects during counting ([`d08b673`](https://github.com/Byron/gitoxide/commit/d08b6739d8e9294b795aba75e9c7f9f20645af2b)) + - Release git-pack v0.9.0 ([`7fbc961`](https://github.com/Byron/gitoxide/commit/7fbc9617da97d4ba4bb3784f41d4163c0839c03c)) + - [pack #67] refactor ([`14717f6`](https://github.com/Byron/gitoxide/commit/14717f6132672a5d271832a68de0b323b73abb2a)) + - [pack #67] Add cache debugging capabilities to git-features ([`8776c98`](https://github.com/Byron/gitoxide/commit/8776c9834ac4622b3057f5db464a9817ed9acdb0)) + - [pack #167] Use custom uluru version to avoid a lot of allocations… ([`7bd3671`](https://github.com/Byron/gitoxide/commit/7bd3671ad949d62f84147ef7ff3fde59937fee54)) + - [pack #164] fix docs ([`08ee674`](https://github.com/Byron/gitoxide/commit/08ee674c55cef6ab76520de2f836b246c907888c)) + - Merge branch 'main' into 162-repo-design-sketch ([`e63b634`](https://github.com/Byron/gitoxide/commit/e63b63412c02db469fbdb17da82cd1e9fda1ef0f)) + - Revert "[pack #167] Use custom uluru version to avoid a lot of allocations…" ([`4c2ea21`](https://github.com/Byron/gitoxide/commit/4c2ea212bbffb0ba3c21ba388dfc79cc7a1c4734)) + - [pack #167] Use custom uluru version to avoid a lot of allocations… ([`8d49976`](https://github.com/Byron/gitoxide/commit/8d499762b74c08437d901bb98806e0a1fc6f93bb)) + - [pack #167] a single-threaded special case for counting… ([`65e29de`](https://github.com/Byron/gitoxide/commit/65e29de45a92c82cebd832634ab194db19a1b590)) + - [pack #167] generalize over immutable insertions… ([`169f000`](https://github.com/Byron/gitoxide/commit/169f000087aab18f0257fb0c61dc3b3901e97505)) + - [pack #167] refactor ([`6bf0f7e`](https://github.com/Byron/gitoxide/commit/6bf0f7e86312b2a4d262c80979c61c94519bd4b0)) + - [pack #167] progress is handled by reducer… ([`a22f8e1`](https://github.com/Byron/gitoxide/commit/a22f8e171e705bc42fcf290789e8e05423bd72d1)) + - [pack #167] Error handling for object input ([`0aac40c`](https://github.com/Byron/gitoxide/commit/0aac40c88a5c26f7c295db8433b510b168f15ca3)) + - Thanks clippy ([`d689599`](https://github.com/Byron/gitoxide/commit/d689599d1b819c18a3be60075170dbe00462e216)) + - [pack #167] remove iterator based count objects impl… ([`7ec2f2b`](https://github.com/Byron/gitoxide/commit/7ec2f2b40e83aaa218360a8b5989792cd67de2ed)) + - [pack] A non-iterator version of parallel object counting… ([`04fe855`](https://github.com/Byron/gitoxide/commit/04fe855a37577d3da5bbd619807b44e449947893)) + - Release git-pack v0.8.2 ([`39a3f71`](https://github.com/Byron/gitoxide/commit/39a3f71ba5997ac26d9994cdc7c2145af3220f64)) + - Apply nightly rustfmt rules. ([`5e0edba`](https://github.com/Byron/gitoxide/commit/5e0edbadb39673d4de640f112fa306349fb11814)) + - Release git-pack v0.8.1 ([`045eb09`](https://github.com/Byron/gitoxide/commit/045eb094691324a398120f6039bbfa34b4fda1af)) + - Remove dev-dependency cycles by removing their version ([`c40faca`](https://github.com/Byron/gitoxide/commit/c40faca41632cd2a226daf4ddf5293b65d1fdc82)) + - Release git-diff v0.8.0, git-odb v0.20.0, git-pack v0.8.0, git-traverse v0.7.0 ([`f123f69`](https://github.com/Byron/gitoxide/commit/f123f69c7a4f9fd1c98bd2f60ebc953a6739fe04)) + - Release git-diff v0.7.0, git-odb v0.19.0, git-pack v0.7.0, git-traverse v0.6.0 ([`c67291f`](https://github.com/Byron/gitoxide/commit/c67291ff9bcdff9a747d87241f6a71015607af05)) + - Release git-object v0.12.0 ([`7006150`](https://github.com/Byron/gitoxide/commit/7006150ac314d19814608723f69f6e70a72f9262)) + - (cargo-release) version 0.18.0 ([`b327590`](https://github.com/Byron/gitoxide/commit/b327590d02fec5536c380b2d39dd7be089ca7c40)) + - (cargo-release) version 0.6.0 ([`d704bca`](https://github.com/Byron/gitoxide/commit/d704bca7de0a6591f35345c842d6418b36ecd206)) + - (cargo-release) version 0.6.0 ([`4b71e15`](https://github.com/Byron/gitoxide/commit/4b71e15c3ba4a17ff2da5a5ef79986a2832fa3f2)) + - (cargo-release) version 0.5.0 ([`e21142b`](https://github.com/Byron/gitoxide/commit/e21142ba1a113b2afc4725d4d4225dff519c513a)) + - (cargo-release) version 0.17.0 ([`c52a491`](https://github.com/Byron/gitoxide/commit/c52a49176bd294bb36db74b4293cdb684a2ab7f6)) + - (cargo-release) version 0.5.0 ([`c2f94a5`](https://github.com/Byron/gitoxide/commit/c2f94a51bce287be301090450cb00cde57e92f76)) + - (cargo-release) version 0.4.0 ([`d69d0ac`](https://github.com/Byron/gitoxide/commit/d69d0ac21989243fdafa514fa41579fd51bc2558)) + - (cargo-release) version 0.6.0 ([`d58f37e`](https://github.com/Byron/gitoxide/commit/d58f37e3b5a000fbe069aa869bd84f66d5c3210b)) + - (cargo-release) version 0.5.0 ([`1687e59`](https://github.com/Byron/gitoxide/commit/1687e599be98d97925fbab594f31cf5558e9d2b1)) + - (cargo-release) version 0.4.0 ([`28e58f6`](https://github.com/Byron/gitoxide/commit/28e58f6b43a44e010da749a5618df02441f0d2e8)) + - (cargo-release) version 0.11.0 ([`a5be31c`](https://github.com/Byron/gitoxide/commit/a5be31c4cf7c0b538a1ed4a52ff5c3a992c6feff)) + - (cargo-release) version 0.4.0 ([`70ef344`](https://github.com/Byron/gitoxide/commit/70ef3442775b54ba9e4ee9ebfffb37af9804cc5b)) + - [utils #154] refactor: bool.then(||this) - neat ([`1dec1c4`](https://github.com/Byron/gitoxide/commit/1dec1c49032c8acb449e463fde41f403cb640e45)) + - Revert "break more dev-depedency cycles up to git-odb" ([`22337ce`](https://github.com/Byron/gitoxide/commit/22337ce23995eee474e7dfb2e37fb56814522942)) + - (cargo-release) version 0.3.1 ([`8b24197`](https://github.com/Byron/gitoxide/commit/8b241977b31720e7f08809bca0b277267b29102e)) + - Break more dev-depedency cycles up to git-odb ([`7ee278b`](https://github.com/Byron/gitoxide/commit/7ee278bf5b04adc5e4ab82cb83a3519f93587176)) + - (cargo-release) version 0.3.0 ([`0e9c73a`](https://github.com/Byron/gitoxide/commit/0e9c73abd17e0dd21952275077ae53ad7e7aa1af)) + - (cargo-release) version 0.5.0 ([`ae02dab`](https://github.com/Byron/gitoxide/commit/ae02dabae961089a92a21e6a60a7006de4b56dad)) + - (cargo-release) version 0.16.0 ([`1231dbd`](https://github.com/Byron/gitoxide/commit/1231dbd16dacefb39adec8e067c312d313a82e3c)) + - (cargo-release) version 0.5.0 ([`0e11e98`](https://github.com/Byron/gitoxide/commit/0e11e98f0562c7baa9c90e18db6240731d165217)) + - [pack #153] finish transitioning to git-tempfile ([`38173fc`](https://github.com/Byron/gitoxide/commit/38173fcf62c04b485c4b309bdf7e6b7afacfcd58)) + - Thanks clippy ([`e1964e4`](https://github.com/Byron/gitoxide/commit/e1964e43979b3e32a5d4bfbe377a842d2c0b10ea)) + - [ref #139] add missing docs ([`5422ec8`](https://github.com/Byron/gitoxide/commit/5422ec8923a5f3c284f7094894a952a392812e63)) + - [pack] refactor ([`581fb51`](https://github.com/Byron/gitoxide/commit/581fb51a84567e341d315e6bacee8e681718f7a7)) + - [pack] refactor ([`b19f6b9`](https://github.com/Byron/gitoxide/commit/b19f6b9b1fcd5ebbc5b1f2a4bef0543b1c693bd1)) + - [pack] fix docs ([`e7b9d96`](https://github.com/Byron/gitoxide/commit/e7b9d9613874cd1ebaf740dc08db467c461a4751)) + - [pack] fix build ([`98dd557`](https://github.com/Byron/gitoxide/commit/98dd557b963acfe1c4e717451d222c187c46a5da)) + - [pack] update CRC values when changing entries to satisfy all consistency checks ([`990ea48`](https://github.com/Byron/gitoxide/commit/990ea4866be2d22ae2043da2dcd9577b748de255)) + - [pack] fix trailer of last entry to match expected recomputed pack hash… ([`8d0ec7d`](https://github.com/Byron/gitoxide/commit/8d0ec7d7c0afb6112e66518a2987907d2e4d29e3)) + - [pack] refactor ([`1852e3e`](https://github.com/Byron/gitoxide/commit/1852e3ea98a462958862ab05f110649e3b06e2b5)) + - [pack] all tests running for now, but… ([`aec8439`](https://github.com/Byron/gitoxide/commit/aec8439683c639f7b6e344cb76bf1dd9fc769d17)) + - [pack] hacky proof of concept that this actually works… ([`6085a92`](https://github.com/Byron/gitoxide/commit/6085a9201ecbd9285547c1d17c9834f09e22fef9)) + - [pack] on the way to 'quickly' get a proof of concept ([`cdc7582`](https://github.com/Byron/gitoxide/commit/cdc7582ab7e35ec1daac44401bf7cb62e0b592a2)) + - [pack] refactor ([`685cce6`](https://github.com/Byron/gitoxide/commit/685cce612eec99ed9f15d86d5ce2a7e6c270ae0d)) + - [pack] refactor ([`f822ebb`](https://github.com/Byron/gitoxide/commit/f822ebb9e899bd52d5baec8179a843c47d073e44)) + - Thanks clippy ([`96ef0b0`](https://github.com/Byron/gitoxide/commit/96ef0b036c3c94a45f3ab882a8b32bfcc1250653)) + - [pack] a quickly made iterator that writes input::Entries ([`116bdc4`](https://github.com/Byron/gitoxide/commit/116bdc4ba879da9785877ebca56ab3c57b9cfd98)) + - [pack] prepare a custom writing iterator for input::Entries… ([`a4d2764`](https://github.com/Byron/gitoxide/commit/a4d27648b4021bcf65c95dc5bcfa2b3d11f538fd)) + - Thanks clippy ([`bd517d6`](https://github.com/Byron/gitoxide/commit/bd517d6374f20670086eedce2776a8ecf7d0d22b)) + - [pack] prepare bundle writer for yet another iterator wrapper… ([`33be1a1`](https://github.com/Byron/gitoxide/commit/33be1a1ffba34a64eeb04b4479790fec2f50bcba)) + - [pack] refactor ([`50861e6`](https://github.com/Byron/gitoxide/commit/50861e6266a6e1800607eb19288e040846325c06)) + - [pack] refactor ([`dc07225`](https://github.com/Byron/gitoxide/commit/dc07225d7eea04e0cfe61c87b56009e06491726c)) + - [pack] another todo down, the last one ([`3fc8c8f`](https://github.com/Byron/gitoxide/commit/3fc8c8ff5ab1c49b55e3b9e1af3fa2f0aee68b94)) + - [pack] one more todo down, it should work now, right?… ([`69a9ff1`](https://github.com/Byron/gitoxide/commit/69a9ff17b3fe16de782ffabb76b87510e8a5b74e)) + - [pack] fix thin pack support test… ([`4bdebdd`](https://github.com/Byron/gitoxide/commit/4bdebddd3791ba71f3f6b4182229a1c48c5a4a95)) + - [pack] definitely not working yet ([`690d9b7`](https://github.com/Byron/gitoxide/commit/690d9b7fbc34b7d2393649d39290071f81cb8bb1)) + - [pack] a step closer, new cases show up ([`75eaba3`](https://github.com/Byron/gitoxide/commit/75eaba36072cf29e76a97fbbd425f0861eb657e2)) + - [pack] refactor ([`a8512f8`](https://github.com/Byron/gitoxide/commit/a8512f89a4e0dd7492fa208c1da41eed9d6a208f)) + - [pack] improved test to validate a fix ([`e3eeeb1`](https://github.com/Byron/gitoxide/commit/e3eeeb146a0ba3dbe701b2e4da560309ff181753)) + - [pack] attempt to get a more realistic test, but… ([`2890737`](https://github.com/Byron/gitoxide/commit/2890737c7e074d31f3bb55acb63664a2da93faaa)) + - [pack] refactor ([`cabc1e5`](https://github.com/Byron/gitoxide/commit/cabc1e5858d52806542ee8d9266bac36e5d39c96)) + - [pack] first succeeding test ([`f5da439`](https://github.com/Byron/gitoxide/commit/f5da439dce93cc203dacb4a5e9d0ae68a87b9be4)) + - [pack] first reasonably failing test showing that offset computation is indeed wrong ([`df1bc2f`](https://github.com/Byron/gitoxide/commit/df1bc2f66ff9e7046898b6937c5ad239313a70dc)) + - [pack] the first test for the lookup ref deltas iter ([`b162f9e`](https://github.com/Byron/gitoxide/commit/b162f9eb37f09f49e363376dc3f0c6c126442bbf)) + - [pack] Make use of thin-pack resolver when writing bundles… ([`9f43bf0`](https://github.com/Byron/gitoxide/commit/9f43bf029624f7c94346646465e366609b89e2e1)) + - [pack] handle the same ref-base correctly ([`2f94854`](https://github.com/Byron/gitoxide/commit/2f948545a935d2cb7c5a252ec74764440a9ff595)) + - [pack] thin pack resolver which might actually work ([`54f055a`](https://github.com/Byron/gitoxide/commit/54f055a53e888156459340e8ab160650a198ab13)) + - [pack] first sketch of resolver for thin pack entries ([`ee428e0`](https://github.com/Byron/gitoxide/commit/ee428e07bcc3df9bc795d06068a444beed71f2d0)) + - [pack] refactor ([`a8fd70f`](https://github.com/Byron/gitoxide/commit/a8fd70fdbff871779ad5a9ba491162ae49605c9f)) + - [pack] thanks clippy ([`7c2fc89`](https://github.com/Byron/gitoxide/commit/7c2fc89c70aa6de9cb0707799918e623267326a8)) + - [pack] actually, this is how it works, so this code should be unreachable ([`8f359e1`](https://github.com/Byron/gitoxide/commit/8f359e1fc8cb99fcf0003eaab1d97cdeaac20876)) + - [pack] first step towards fixing bad-objects properly ([`3c96507`](https://github.com/Byron/gitoxide/commit/3c965070a7c799f0507f9e7faae2896346bc9e65)) + - [pack] discard bad-object tracking in favor of delayed handling ([`31ce008`](https://github.com/Byron/gitoxide/commit/31ce008208cdd3bc4f093abab6fabf4c8074c130)) + - Revert "[pack] fix race to finally make pack-gen missing objects…" ([`ad0d2a8`](https://github.com/Byron/gitoxide/commit/ad0d2a8e4e92d11351225db0115de0ed1210f9e3)) + - [pack] fix race to finally make pack-gen missing objects… ([`73394db`](https://github.com/Byron/gitoxide/commit/73394db1b048d3dc87b8b4934737f27b6a8a0d3c)) + - [pack] it seems git is just skipping bad objects during pack-gen ([`0f29b82`](https://github.com/Byron/gitoxide/commit/0f29b82b48f45f509016eb16ea92af7f6dbf65a6)) + - Revert "[pack] FAIL: See if not looking up the pack location speeds up counting…" ([`d03fe97`](https://github.com/Byron/gitoxide/commit/d03fe9732b69c6ca3b7a6df96097233661e53a05)) + - [pack] FAIL: See if not looking up the pack location speeds up counting… ([`48c4930`](https://github.com/Byron/gitoxide/commit/48c49300a55e6443d5e4d94632979b6d07f2bc5a)) + - Revert "[pack] FAIL: speedup with Mutex<HashSet>" ([`df98edf`](https://github.com/Byron/gitoxide/commit/df98edf48c49717136a6e8e5d9b1f64aeda17db2)) + - [pack] FAIL: speedup with Mutex<HashSet> ([`f8aca03`](https://github.com/Byron/gitoxide/commit/f8aca03c2d126574541c136019df4e51b52a5b10)) + - [pack] In single-threaded mode, use a huge cache for some speedup ([`aec8a9b`](https://github.com/Byron/gitoxide/commit/aec8a9b4b9deb102b06390a19727eab7660621f9)) + - [pack] fix offset index properly by using chunk-absolute offsets ([`461c1ee`](https://github.com/Byron/gitoxide/commit/461c1eefe9214b07cd80a37292b23744846383d3)) + - [pack] forcefully fix issue with incorrect partition point ([`290bd65`](https://github.com/Byron/gitoxide/commit/290bd65f10f5a64de6735b09119b7bbffc44254b)) + - [pack] test for parital pack without thin pack allowance… ([`1f48d3b`](https://github.com/Byron/gitoxide/commit/1f48d3b58a1151a1fefce9bf4af5649837309a37)) + - [pack] pack-create with immediate counting and traversing… ([`b74a98f`](https://github.com/Byron/gitoxide/commit/b74a98fc87a92a8ccbaec59aeea5284731e2fe49)) + - [pack] entry writer now supports deltas and it seems to work even ([`fcda6f0`](https://github.com/Byron/gitoxide/commit/fcda6f096f95a6322122229ac364a2dd5ea0ce6b)) + - Thanks clippy ([`cc61f82`](https://github.com/Byron/gitoxide/commit/cc61f82f597d9a0ab43efaaccc2cb568b9aa746f)) + - [pack] on-demand cache for pack-offset to id lookup ([`0bfdea8`](https://github.com/Byron/gitoxide/commit/0bfdea843606673005ecab6a482a9fce89a4cb69)) + - [pack] refactor ([`4bb3ce4`](https://github.com/Byron/gitoxide/commit/4bb3ce4f2e89dd817c284ed8ae9e2559ed60f9a2)) + - [pack] thin pack offset to index lookup ([`121aca4`](https://github.com/Byron/gitoxide/commit/121aca45ecb1acce3496b1b2ac003aa95851f247)) + - [pack] refactor ([`372b9ce`](https://github.com/Byron/gitoxide/commit/372b9cee78a6b49eb7ebb5cf452a324e07775d98)) + - [pack] a way to obtain whole bundles for offset-to-index lookup ([`15fcbe2`](https://github.com/Byron/gitoxide/commit/15fcbe254b75e8f74652711cc339ae5ade74d24c)) + - [pack] refactor ([`64b1dcd`](https://github.com/Byron/gitoxide/commit/64b1dcdb0fb53749ce73017d0dc1e053689d17d4)) + - [pack] refactor ([`1d713b4`](https://github.com/Byron/gitoxide/commit/1d713b482264ddb0aba6a98e3918f8236ce12c80)) + - [pack] refactor ([`cdf020a`](https://github.com/Byron/gitoxide/commit/cdf020a3b29bc59062d3ccf56672e9c18201c67c)) + - [pack] refactor ([`2ccefb2`](https://github.com/Byron/gitoxide/commit/2ccefb2832b326966a24d0cbcfd79ca5309f91aa)) + - [pack] refactor; entry-iterator now produces delta-objects ([`5dc370b`](https://github.com/Byron/gitoxide/commit/5dc370ba01d25a6e8b7f4bfa03259c83e6b1d758)) + - [pack] rough version of obtaining object indices for deltas ([`a58e270`](https://github.com/Byron/gitoxide/commit/a58e270ef96011ffd2434539e3099cbe27aed3f3)) + - [pack] refactor ([`8cfa414`](https://github.com/Byron/gitoxide/commit/8cfa414482a4318ed385f42582ec885fb73134e3)) + - [pack] pass all data to where it belongs to… ([`af5cb1f`](https://github.com/Byron/gitoxide/commit/af5cb1f4b809ac268ca3d878896854c966dcea97)) + - [pack] add the notion of thin-packs to the pack generator ([`a289bba`](https://github.com/Byron/gitoxide/commit/a289bbaa36546109d3371a8fcd7a6dc3c363861f)) + - [pack] build an index of pack ranges as well ([`4d6ab7b`](https://github.com/Byron/gitoxide/commit/4d6ab7b74c325820a3760361faace380f958572f)) + - [pack] bundle::Location with pack offset; order counts by that… ([`f92f285`](https://github.com/Byron/gitoxide/commit/f92f285167c6b5bc4d86f255e360c4534e38bb29)) + - [pack] better identify the currently implemented pack generation mode. ([`f9e3b3c`](https://github.com/Byron/gitoxide/commit/f9e3b3ca3bbf063e8d71c62fe607b812c745a969)) + - [pack] refactor ([`f3dc3da`](https://github.com/Byron/gitoxide/commit/f3dc3da492e1dda5dd9e43fddc57da6a118081b3)) + - [pack] refactor ([`9ee1e22`](https://github.com/Byron/gitoxide/commit/9ee1e22fa5c5d97ff626f0dfc44706272433bfef)) + - [pack] refactor ([`78d46c1`](https://github.com/Byron/gitoxide/commit/78d46c13d0510ee3e2e2f33cd60d624d63e85900)) + - [pack] refactor ([`69af352`](https://github.com/Byron/gitoxide/commit/69af3526b0fcfa8a270238f3e2cf59d332bd187e)) + - Change wording ([`6c82a16`](https://github.com/Byron/gitoxide/commit/6c82a16d340acb9b11c5cf56c917c9fe6f2cdf0e)) + - Bump uluru from 2.1.1 to 2.2.0 ([`52e274f`](https://github.com/Byron/gitoxide/commit/52e274fe985948b6b742ff7066fcb9831e427ba3)) + - Don't use ASM on windows for Sha1 as it fails to build there. ([`ba1fb7a`](https://github.com/Byron/gitoxide/commit/ba1fb7ab5bc03f5a23ece32ff1e144544e1eaeae)) + - Merge branch 'remove-unnecessary-unsafe' ([`7a3c5c1`](https://github.com/Byron/gitoxide/commit/7a3c5c14dc56d8711548d1b219a969836693cbaa)) + - Remove unnecessary unsafe code ([`83e207a`](https://github.com/Byron/gitoxide/commit/83e207a44aece0ff4870e57990bd5aaf43f38e22)) + - Remove unnecessary pub(crate) exports ([`3d2456e`](https://github.com/Byron/gitoxide/commit/3d2456e11709f0461b37c6df55ecc3861ca4cab5)) + - Bump thiserror from 1.0.25 to 1.0.26 ([`9682590`](https://github.com/Byron/gitoxide/commit/9682590095dc3a502b0c84ccd206ca4797635092)) + - Thanks clippy ([`6200ed9`](https://github.com/Byron/gitoxide/commit/6200ed9ac5609c74de4254ab663c19cfe3591402)) + - Fix build ([`dbfa49a`](https://github.com/Byron/gitoxide/commit/dbfa49acf58b2c0763c5e98e5276860b43dfb27b)) + - Fix everything up so that… ([`5930563`](https://github.com/Byron/gitoxide/commit/5930563601d6c2148cf39e109f69f8b7c7dfcb36)) + - A first attempt to make intrerupt tools work, but… ([`8fb8d37`](https://github.com/Byron/gitoxide/commit/8fb8d374ecfeffa3ae1bd07bf9bc5014351730f5)) + - Fix pack tests ([`7968467`](https://github.com/Byron/gitoxide/commit/7968467cc0d392e3d223811ed36ae777531a5a36)) + - The last occurrence of the global git-features::interrupt usage gone ([`6820724`](https://github.com/Byron/gitoxide/commit/6820724be83ebf48c7ccf6a65a3d6383f766c9de)) + - Another one ([`0a8ed0e`](https://github.com/Byron/gitoxide/commit/0a8ed0ecc078d76dc3a5fe13518cf43bfbb121f0)) + - And another one down ([`abce75e`](https://github.com/Byron/gitoxide/commit/abce75eefff44b9538c112b60ad5e0596482e89c)) + - Refactor ([`7f9be36`](https://github.com/Byron/gitoxide/commit/7f9be36ea909ee67555591287bcb140fdc54c801)) + - And one less usage of the global interrupt handler… ([`5da57a3`](https://github.com/Byron/gitoxide/commit/5da57a3b0efef75ad82cb4d1cd496fc7fc0f1c23)) + - Thanks clippy ([`3b2e765`](https://github.com/Byron/gitoxide/commit/3b2e7650d8afe2c0e246e005ab1c321a157cbd44)) + - Make most interrupts local to the method or function ([`4588993`](https://github.com/Byron/gitoxide/commit/458899306a3f3c8578f185d7ecbf1ade2a7142dd)) + - [features] sketch of iterator to auto-check for interruptions ([`61d3a15`](https://github.com/Byron/gitoxide/commit/61d3a15c66b4c1be1d98715b8a60705a3a314455)) + - [pack] refactor ([`25f04ba`](https://github.com/Byron/gitoxide/commit/25f04baa100bd1996f48fbeb4c87e40ff1b27d90)) + - [pack] refactor ([`18cabb8`](https://github.com/Byron/gitoxide/commit/18cabb8618ffc324412302bfda208948abffb61f)) + - [pack] also put counts in order for stable packs ([`f299160`](https://github.com/Byron/gitoxide/commit/f299160cafd00f0fea00a2402901570f5ddf27d5)) + - [pack] fix run of 'cargo test --all' ([`e7ecdc1`](https://github.com/Byron/gitoxide/commit/e7ecdc195d03fa9a29ad1e44464b42e3ca6fb6a4)) + - [pack] a working in-order iterator ([`5fea926`](https://github.com/Byron/gitoxide/commit/5fea926803bcc7b2ef7d8f156e3d31a503831091)) + - [pack] tests for error handling of in-order iterator ([`44892cc`](https://github.com/Byron/gitoxide/commit/44892cca9309c4cca0eaa30dbedc65422a2699d1)) + - [pack] ground work for ordering in produced chunks ([`9680649`](https://github.com/Byron/gitoxide/commit/96806494d32243bd1798a89c094e220dbe050d68)) + - [pack] also run multi-threaded tests as part of unit-tests ([`5d3006a`](https://github.com/Byron/gitoxide/commit/5d3006a5d075bce9011b20920a84404952624c45)) + - Bump uluru from 2.0.0 to 2.1.1 ([`b6ac506`](https://github.com/Byron/gitoxide/commit/b6ac506ba2df0f82eaae64eaf023cc0c0376ddff)) + - [pack] hopefully fix tests on CI; verify determinism of pack ([`51dec8b`](https://github.com/Byron/gitoxide/commit/51dec8b3c661ba9071306ab89796aa93d9a25b65)) + - [pack] deterministic single-threaded pack generation ([`ddb6442`](https://github.com/Byron/gitoxide/commit/ddb6442fd6681a2dd3890a8a415003ec770c7d64)) + - [pack] refactor ([`cfdf802`](https://github.com/Byron/gitoxide/commit/cfdf8021ea1448ac4844b1f3bf252fefde2572fa)) + - [pack] basic statistics for entries ([`37229a6`](https://github.com/Byron/gitoxide/commit/37229a650ceb0155aa7ca87b499fe188ac4bb565)) + - Thanks clippy ([`18b2113`](https://github.com/Byron/gitoxide/commit/18b2113b1e3c372145bc9037ee6a9de7efe4e506)) + - [pack] write packs to a directory with the proper name ([`3fbca7d`](https://github.com/Byron/gitoxide/commit/3fbca7dd62752a7dd752b83a39ec8dfd7b2f2ea8)) + - [pack] refactor ([`f10adea`](https://github.com/Byron/gitoxide/commit/f10adea76d92eada3ca204fe69e7b5f81a06d8cc)) + - [pack] fix docs ([`6ba471d`](https://github.com/Byron/gitoxide/commit/6ba471d228c45a3821b4984905a4b4ecaff5b0b0)) + - [pack] fix build ([`81ee633`](https://github.com/Byron/gitoxide/commit/81ee633c7f482746bc28a2a43d74ebbaded7af5f)) + - [pack] statistics for counting objects seemingly work… ([`4e3deb1`](https://github.com/Byron/gitoxide/commit/4e3deb1364dd1bef0af79d6aa97086a95b4983bc)) + - [pack] actual counts statistics ([`3a9f6d8`](https://github.com/Byron/gitoxide/commit/3a9f6d8a53da3235bde4a3f32859381d4843cb7e)) + - [pack] aggregate the count outcome ([`c7ac0e6`](https://github.com/Byron/gitoxide/commit/c7ac0e60a5d69f3a948d47c3acc3060cddbafb98)) + - [pack] use statistics reducer ([`0974ab1`](https://github.com/Byron/gitoxide/commit/0974ab176777bfa02ac0ea32915f6d9c46e3ddeb)) + - [pack] count object reducer sketch ([`ea45692`](https://github.com/Byron/gitoxide/commit/ea4569282e2f63042869dd47205874c161bfecfe)) + - [pack] refactor ([`fdf485a`](https://github.com/Byron/gitoxide/commit/fdf485afa66af20abca586b04f588a33c167310f)) + - [pack] refactor ([`0514f1d`](https://github.com/Byron/gitoxide/commit/0514f1df113c5f6bf1c934b15741ca8ea47316ae)) + - [pack] refactor ([`37922d1`](https://github.com/Byron/gitoxide/commit/37922d12765c221e747fad4ca813597490525279)) + - (cargo-release) version 0.3.0 ([`6b33678`](https://github.com/Byron/gitoxide/commit/6b33678f83e6d261ca15c4a7634ff5b4e66d81dd)) + - (cargo-release) version 0.2.0 ([`3286e42`](https://github.com/Byron/gitoxide/commit/3286e42547b59df6365087cbae9ce1c9c959faad)) + - Refactor ([`a25a774`](https://github.com/Byron/gitoxide/commit/a25a774675e2e9db1c891351077d3af2fd5c72ed)) + - [git-transport] Show how to use blocking git-pack code in non-blocking transports ([`de2ba3c`](https://github.com/Byron/gitoxide/commit/de2ba3c4919d454894911c54fd4bb0e0a4665723)) + - (cargo-release) version 0.4.0 ([`866f86f`](https://github.com/Byron/gitoxide/commit/866f86f59e66652968dcafc1a57912f9849cb21d)) + - [git-repository] towards git-repository as one stop shop ([`aea6cc5`](https://github.com/Byron/gitoxide/commit/aea6cc536f438050cc0e02223de7702cd7912e75)) + - [git-ref] the first failing test ([`7e802a0`](https://github.com/Byron/gitoxide/commit/7e802a0576230dfc666c253d484ea255f265f92f)) + - (cargo-release) version 0.2.0 ([`b213628`](https://github.com/Byron/gitoxide/commit/b213628feeb8dfa87dab489c7d3155a60e6a236d)) + - [git-odb] prep release ([`4984ce3`](https://github.com/Byron/gitoxide/commit/4984ce3e19b60b89a4337f90ac4b9c44c42558a0)) + - [git-odb] refactor ([`2958145`](https://github.com/Byron/gitoxide/commit/2958145a0ae1ef582bbf88352f5567d5c2b5eaf0)) + - [git-pack] fix docs ([`efd20d4`](https://github.com/Byron/gitoxide/commit/efd20d4e1afbfbe573d620dea4761c06f948a296)) + - [git-pack] refactor ([`ea2b3de`](https://github.com/Byron/gitoxide/commit/ea2b3deab78882943e11270e4166ca7c340b03e1)) + - [git-pack] refactor ([`bc4b7b1`](https://github.com/Byron/gitoxide/commit/bc4b7b18a04506a3d08d66d1222d706b82a2f6e7)) + - [git-pack] refactor ([`157b6ff`](https://github.com/Byron/gitoxide/commit/157b6ff7b55ba2b7f8f90f66864212906426f8d7)) + - [git-pack] refactor ([`49c1c3e`](https://github.com/Byron/gitoxide/commit/49c1c3ea67379c5a122a8c3921d8ff713e14d371)) + - (cargo-release) version 0.16.0 ([`769c649`](https://github.com/Byron/gitoxide/commit/769c649c00c009bf5a3f7c0611a7b999618f2938)) + - [git-pack] refactor ([`be6ddaa`](https://github.com/Byron/gitoxide/commit/be6ddaa98fc1dcaf77dc0fd9c9d67754e74927e4)) + - [git-pack] used by git-odb ([`5d6ee07`](https://github.com/Byron/gitoxide/commit/5d6ee07a8dec64fe5f68c14c418d922077fad3df)) + - [git-pack] refactor ([`1b2a245`](https://github.com/Byron/gitoxide/commit/1b2a245aa494c0f9cacc2ad6b8ca02e9891fdb4c)) + - [git-pack] move hash-writer to git-features as it's quite general purpose ([`80e5640`](https://github.com/Byron/gitoxide/commit/80e5640169363910b4189fda58bb495c6677eaaa)) + - [git-pack] the very first version… ([`8c06cdb`](https://github.com/Byron/gitoxide/commit/8c06cdb14269e798b7ff771ea3864f85fa673ed7)) +</details> + +## 0.30.1 (2023-01-10) + +A maintenance release without user-facing changes. + +## 0.30.0 (2023-01-09) + +A maintenance release without user-facing changes. + +## 0.29.0 (2022-12-30) + +A maintenance release without user-facing changes. + +## 0.28.0 (2022-12-19) + +### New Features + + - <csr-id-a85dcddba29a453bbea87ac865b8aedc97f41aed/> add `data::File::resolve_header()` to obtain object information without decompressing it. + - <csr-id-c8835c6edae784c9ffcb69a674c0a6545dbb2af3/> upgrade to `prodash 21.1` and add `Ids` to all progress instances. + That way callers can identify progress they are interested in, say, for + selective visualizations. + +### Changed (BREAKING) + + - <csr-id-37f3a675d7fa931f5d3f38e91df74ec0b517422b/> move `data::ResolveBase` into `data::decode_entry::`; unify `decode_entry|decode_header::Error` into `decode::Error`. + +## 0.27.0 (2022-11-21) + +A maintenance release without user facing changes. + +## 0.26.0 (2022-11-08) + +A maintenance release without user-facing changes. + +## 0.25.0 (2022-11-06) + +### New Features + + - <csr-id-450257eb596465352fb363a5c8c514f544e7f9ac/> `Bundle::write…` also produces a `.keep` file + These files are placed before moving the corresponding pack and index + files into place to prevent them from being picked up for collection + while their refs are still being placed. + + The caller is responsible for deleting them once refs integrate the + contained objects into the commit graph. + +## 0.24.0 (2022-10-10) + +<csr-id-b46347fd3d50886eeca500e31e1e12b354711309/> +<csr-id-591afd56d9862a6348ef8b3af61798004b36aa19/> + +### New Features + + - <csr-id-6836cb148cbadf919bdac2e0e77bdec34caa9307/> more efficient distribution of tasks to threads during pack resolution. + This clearly is a peak-performance optimization as it will now + efficiently distribute tree-root nodes to threads one at a time by + means of shared memory, which is accessed mutably yet safely. + + This change can also further reduce peak memory usage as it will not + keep buffers of more resolved deltas then necessary, as it only handles + one at a time per thread. + +### Bug Fixes + + - <csr-id-ec58bef84d620360dc52e34e173ace1310f74c85/> reduce memory usage for pack resolution by 18%… + …compared to where it started out before. + + This is the result of using u32 instead of usize where possible, + leveraging the object limit of u32 in packs. + + This change makes the biggest difference by not keeping the decompressed + memory of leaf nodes alive for longer than needed, at the cost of + some code duplication which could be extracted into a function if one + was inclined to deal with the boilerplate and lots of generics. + - <csr-id-49d168a8859c6b5a9e7ef58cd836093212c2c7ad/> Allow verification of empty packs and indices. + Empty packs are fine, even though the implementation should probably + assure to not write them, or remove them after having been written. + - <csr-id-a745512185fb0a46e35daaa6d28829aec05edb55/> increase pack-receive performance using a BufWriter + Previously the NamedTempFile would receive every little write request + for millions of objects, consuming considerable amounts of time. + + Now a buf writer alleviates this issue entirely. + - <csr-id-0b6ed60f842f0a36f61f187651080540a358758e/> `bundle::write::Error` is now publicly available + +### Bug Fixes (BREAKING) + + - <csr-id-5a75afe0467e4a84323ea10172eed835cc7fae4c/> Reading and writing empty packs is explicitly allowed. + This can happen when sending packs that don't actually contain changes, + but need to be sent to conform to the protocol. + +### Other (BREAKING) + + - <csr-id-b46347fd3d50886eeca500e31e1e12b354711309/> `index::write::Outcome::index_kind` -> `::index_version`. + - <csr-id-591afd56d9862a6348ef8b3af61798004b36aa19/> `bundle::write::Options::index_kind` -> `::index_version`. + +## 0.23.0 (2022-09-20) + +### Changed (BREAKING) + + - <csr-id-99905bacace8aed42b16d43f0f04cae996cb971c/> upgrade `bstr` to `1.0.1` + +## 0.22.0 (2022-08-28) + +Maintenance release without user-facing changes. + +## 0.21.2 (2022-08-24) + +<csr-id-f7f136dbe4f86e7dee1d54835c420ec07c96cd78/> + +### Chore + + - <csr-id-f7f136dbe4f86e7dee1d54835c420ec07c96cd78/> uniformize deny attributes + +### New Features + + - <csr-id-b1c40b0364ef092cd52d03b34f491b254816b18d/> use docsrs feature in code to show what is feature-gated automatically on docs.rs + - <csr-id-517677147f1c17304c62cf97a1dd09f232ebf5db/> pass --cfg docsrs when compiling for https://docs.rs + +## 0.21.1 (2022-08-17) + +A maintenance release without user facing changes. + +## 0.21.0 (2022-07-22) + +This is a maintenance release with no functional changes. + +### New Features (BREAKING) + + - <csr-id-95210cb2ba85f75148b4ef48ccea9d9f8a0a0114/> Provide optional `candidates` for ambiguous entries during `lookup_prefix()` + The candidate entries are all entries matching a given prefix. + +## 0.20.0 (2022-06-13) + +A maintenance release without user-facing changes. + +## 0.19.1 (2022-05-21) + +A maintenance release without user-facing changes. + +## 0.19.0 (2022-05-18) + +A maintenance release without user-facing changes. + +## 0.18.0 (2022-04-05) + +### Changed (BREAKING) + + - <csr-id-8c5ae77f06a64c57df9a9ad1190266896a223dbe/> Remove deprecated compound and linked object databases + The dynamic/general store is the only maintained can-do-it-all + DB now. + +## 0.17.0 (2022-04-03) + +<csr-id-25209454d3f7e27e12e8ddca92e43b1ff01d58aa/> +<csr-id-9b9f10ad862b5e097c836c51df1eb98607df5ae1/> + +### Chore + + - <csr-id-25209454d3f7e27e12e8ddca92e43b1ff01d58aa/> upgrade dashmap to 5.1.0 (with security fix) + +### New Features + + - <csr-id-503b1a1f8d4f39b44c166209d7a8ba8d74137859/> `index::File::lookup_prefix(…)` + - <csr-id-cb83beedd1aa389f6774e2296f79273e8c8f14f4/> gix-hash::Prefix::from_id() + A way to obtain a prefix of an object id, with all non-prefix + bytes set to zero. + - <csr-id-16208306ab49ade30d8ffd6b067ebd8eefd84cd4/> in-manifest and in-lib documentation of feature toggles + +### Bug Fixes + + - <csr-id-42e0487286c1f745837c0ce337ed7c9d86b14516/> support Rust 1.52 + +### Refactor + + - <csr-id-9b9f10ad862b5e097c836c51df1eb98607df5ae1/> remove unnecessary unsafe by using `chunks_mut()` + This was probably a left-over from times where there was a static + requirement on the chunks processing. Maybe… . + +## 0.16.1 (2022-02-01) + +### Bug Fixes + + - <csr-id-d9451e8d7fc39c252042f9d2447061262c16ae7a/> downgrade dashmap to 4.0 to avoid unsoundness. + See https://github.com/xacrimon/dashmap/issues/167 for tracking + progress on resolving the issue. + +## 0.16.0 (2022-01-23) + +<csr-id-ebc7f47708a63c3df4415ba0e702660d976dfb3e/> +<csr-id-2290d006705ff47ad780b009fe58ee422b3285af/> +<csr-id-e0b8636f96e4bfe1bc72b5aa6ad4c4c8538ff92c/> +<csr-id-c800fdd331e6d7a0b8d756ba822915259f26e9e8/> +<csr-id-e6ff1a885889cf88f6b34b1193aa03d8bce16af5/> +<csr-id-f48630ba8f745c2ec61a1e3c51fa63a1789a088c/> + +### Refactor + + - <csr-id-e0b8636f96e4bfe1bc72b5aa6ad4c4c8538ff92c/> replace bare u32 `data::Id` typedef + +### Other + + - <csr-id-e6ff1a885889cf88f6b34b1193aa03d8bce16af5/> :File uses its hash_len parameter + - <csr-id-f48630ba8f745c2ec61a1e3c51fa63a1789a088c/> :Find implementation for Rc + +### Chore + + - <csr-id-c800fdd331e6d7a0b8d756ba822915259f26e9e8/> remove unused dependencies + +### New Features + + - <csr-id-b80dec2323b81fb2172df76c7d897a4b5e6bdfea/> zero-objects check for index and multi-index integrity validation + - <csr-id-56fc99fb9c1cab61abd03c10e1b4af0d6e491bbf/> support for fan-checking in index and multi-index integrity verification + - <csr-id-28e3ea8612112f6a04cfaff591565eca5a1ffba2/> introduce type for entry indices within an index or multi-index + That way it's a little more descriptive than a bare u32. + - <csr-id-58c2edb76755ab71e10eef4cd9a51533825c291f/> gix_pack::Find::try_find_cached(…, pack_cache) + With this method it's easier to bypass local caches and control + the cache oneself entirely. + - <csr-id-e25f4eadec679406aad6df10026e27e4832c2482/> A simplified version of the `Find` trait + It's meant for the next generation of object db handles which keep a + local cache of all the details of the actual object database. + +### Bug Fixes + + - <csr-id-42e0487286c1f745837c0ce337ed7c9d86b14516/> support Rust 1.52 + - <csr-id-84ade1d23060f10bf6c8529f8f693d06660b4f4e/> Allow resolution of in-pack ref-deltas + This finally allows delta tree caches to be used on typical small packs + returned by GitHub. + - <csr-id-ba92cc09ba41fe4c9a9097bfeb8d18016408fcdf/> don't try to short-cut detection of large offsets when writing index files + The code incorrectly assumed that the input is sorted by offsets, with + the largest offset being last, even though by all means that's not the + case. + - <csr-id-6d3f52dc13d7243a6bce6dab89a985114a75d94b/> Avoid the dashmap being cloned for each thread + Instead, share it by reference, it's sync after all. + + This issue was introduced when switching to a `Send + Clone` model, + instead of `Send + Sync`, to allow thread-local caches in database + handles of all kinds. + - <csr-id-b605c1fa0494b10872d3c2e6ecce0e39f1a90a9e/> linked::Store now assures unique IDs across compound stores + +### Changed (BREAKING) + + - <csr-id-a79a7fb638b45df88af0d0d5fc9ada6d824bc328/> Improve method signatures of `cache::Tree::*` + - <csr-id-91d047658b114f372735116c9d8e6962a3873137/> cleanup and unify `verify_integrity()` method signature + Previously they used many different ways of handling their parameters + despite all boiling down to calling the same 'index::File::traverse()` + method. + + This allows for more reuse of `Options` structs and generally makes + clearer how these options are used. + - <csr-id-2cf7727228e1d8094ffd2eec6746006348c39eab/> `index::File::traverse()` now returns an `Outcome` struct instead of tuple of 3 fields + - <csr-id-bf04644ab75ed1969507f957dc8d4868790d462d/> remove `Option<impl Progress>` in favor of `impl Progress` + - <csr-id-6829e5e5d6aed1e6c87647144e2dd76a1e4b9f1f/> multi-index integrity check; use `integrity::Outcome` for various integrity checks + - <csr-id-d851bede97801096d188ff6af06c98a79fe276db/> remove unnecessary `Arc` around `should_interrupt` flag + - <csr-id-c2679a03358b9c19d63ed1af1cd57324c6381447/> remove Sha1 mentions in `index::verify::Mode::*` variants + The hash is repository defined and not hard-coded + - <csr-id-80b120d3278e46429f848df7af3db13413c36649/> introduce `index::File::verify_integrity(…, pack: Option<PackContext>, …)`, replacing tuple + This allows for more documentation on what input is required there and + generally makes for an easier to use API. + - <csr-id-79dc0d5ba6fa31ddd5c075693ffdc6496c1eaded/> rename `oid::try_from()` to `try_from_bytes()`, add `from_bytes_unchecked()` + This change was done in the name of consistency, as `from_bytes()` is + used in many other git-* crates + - <csr-id-2ef9a8424af51310db8c1e6df31dde9953ed3d21/> Change accessors named `hash_kind()` to `object_hash()` for consistency + - <csr-id-b76f6be6c5baa6cf613a174241f007e92bf5ba36/> consistently use `object_hash` instead of `hash_kind` + - <csr-id-629412b4cb192614b7eff08dbf203e3448c902c1/> data::Entry::from_read() now takes a hash lengths as parameter + That way ref-deltas can be interpreted without hard-coding SHA1 + - <csr-id-851dc2c52fa8e204ba2d5ced8fb0959a889869d8/> data::Entry::from_bytes(…, hash_len) takes new parameter + The hash-len tells it how to interpret ref-delta objects, which + store the complete hash of the base object. + + This is now entirely configurable. + - <csr-id-db8c8c41b4ced0fc296d3877883d801e77d550ae/> `index::File::at()` with gix_hash::Kind parameter + It will allow to assume different hashes even in the index file format + which isn't yet capable of storing this information. + - <csr-id-e6a3c9f72332b524b143bc94ee9df0a6db11e864/> `data::File::at()` and `Bundle::at()` now have `hash_kind` parameter + It's used to configure the kind of hash to assume when reading packs and + indices. + - <csr-id-3f05fea55dc8acce1ed62ecbe4e0a1394f2720b7/> remove `make_object_cache` parameter from `gix_pack::data::output::count::objects()` + It now is an implementation detail of the Find trait. + - <csr-id-82b9b33bd5f4c3c1721a5093de2cedc62cb10565/> move `bundle::Location` to `data::entry::Location` + The latter place best describes its purpose. + - remove pack-cache from `Find::try_find(…)` + With the new architecture this can be an implementation detail without + forcing it to be Sync. + - move gix_pack::data::Object to gix_object::Data, massively alter gix_odb::Find trait + This will break a lot, but has to happen to prepare these traits for the + next generation of object databases. + +## 0.15.0 (2021-11-29) + +<csr-id-598698b88c194bc0e6ef69539f9fa7246ebfab70/> + +### Changed (BREAKING) + + - <csr-id-e7526b2a7b51cbac4018e1ab3b623a85987fadc2/> parallel utilities now use `Send + Clone` instead of `Send + Sync`. + + This helps to assure that thread-local computations always work with the + kind of types we provide. The ones that are carrying out actions are + notably not `Sync` anymore. + + We cater to that by defining our bounds accordingly, but for those + who want to use other utilities that need Sync, using types like + `Repository` and `thread_local!()` is the only way to make this + work. + +## 0.14.0 (2021-11-16) + +<csr-id-e8b091943f0c9a26317da0003f7fcdf5a56ef21a/> + +An important bugfix to prevent assertion failures when writing thin packs. + +### Bug Fixes + + - <csr-id-20b3994206aa5bc5e35cbbc9c8f8f99187077f79/> Adjust size-hints of resolving entries iterator and use the upper bound in delta tree. + + The delta-tree is a data structure that actually heavily relies on + favorable allocation and a known amount of objects in order to + provide front and back buffers. However, this is an implementation + detail and they don't have to stay consistent at all especially + after growing the buffer by pushing to it. + + Interestingly, the VecDeque internally over-allocates as well which + definitely helps the example of `as_mut_slices()`, otherwise + it could also suffer from the assertions that trigger here. + +## v0.13.0 (2021-10-19) + +A maintenance release to properly dealing with previously breaking changes in `gix-hash`. + +## v0.12.0 (2021-10-15) + +<csr-id-d8fe8141e80a9e9a433b5e1a072b850325c806c8/> +<csr-id-faf6f813927720c5adf62102f9ce46606ff2617c/> +<csr-id-2f2d856efe733d3cf81110c0e0607d2e7c40d968/> +<csr-id-71c628d46088ab455b54eb2330d24dcff96c911d/> +<csr-id-8fe461281842b58aa11437445637c6e587bedd63/> + +This release contains bugfixes and features, but is considered breaking as `gix-traverse` +signalled a breaking change which is one of our dependencies. + +### Refactor + + - <csr-id-71c628d46088ab455b54eb2330d24dcff96c911d/> Use 'cache::Object' trait where it matters + - <csr-id-8fe461281842b58aa11437445637c6e587bedd63/> split data::output::count::objects into files + +### New Features + + - <csr-id-d6c44e6ab8f436020d4fb235e423b018fd1e7a9f/> dynamically sized full-object speeds up diff-based object counting + which is what happens when counting objects for fetches where only changed objects should be sent. + - <csr-id-50cf610e8939812c3d2268c48835e2dac67d0c31/> `cache::Object` trait for caching and retrieving whole objects + - <csr-id-60c9fad8002b4e3f6b9607bba6361871752f4d3d/> control pack and object cache size in megabytes + - <csr-id-5a8c2da6cb1e2accf7cfdccc16bc3a1d0b2a7dbc/> object cache size is configurable + +### Bug Fixes + + - <csr-id-d8fe8141e80a9e9a433b5e1a072b850325c806c8/> don't put more objects into the pack cache than needed. + + Previously when accessing a packed object, it would store the base + object into the pack cache (if it wasn't retrieved from there) + which is great if that operation is free. + + Since it isn't, it's better not to stress the cache with puts + and trash more objects than necessary. + + Now only the last decompressed object will be put into the LRU cache. + - <csr-id-faf6f813927720c5adf62102f9ce46606ff2617c/> don't include submodules in count, + which avoids dealing with missing objects entirely. Those ominous missing objects where just git submodules after all. + + It's still a good idea to handle these gracefully though, git itself + seems to ignore them, too, and so do we at least for now. + +### Performance + + - <csr-id-f9232acf8e52f8cd95520d122469e136eb07b39f/> ObjectID specific hashers, using the fact that object ids are hashes + +## v0.11.0 (2021-09-08) + +- manual bump for safety as its dependencies have breaking changes + +## v0.10.0 (2021-09-07) + +- **renames** + - `data::Object::into_commit_iter()` -> `data::Object::try_into_commit_iter()` + - `data::Object::into_tree_iter()` -> `data::Object::try_into_tree_iter()` + - `data::Object::into_tag_iter()` -> `data::Object::try_into_tag_iter()` + +## v0.9.0 (2021-08-27) + +- **renames / moves / visibility** + - `find::Find` and `find::FindExt` only in `Find` and `FindExt` (not in `find` anymore) + - `data::output::count::Count` -> `data::output::Count` + - `data::output::entry::Entry` -> `data::output::Entry` + - `Find::find_existing_*` -> `Find::find_*` + - `Find::find_existing_*` -> `Find::find_*` + - `Find::find()-> `Find::try_find()` + - `bundle::Bundle` -> `Bundle` + - `bundle::Error` -> `bundle::init::Error` + - `pub tree::` -> `pub(crate) cache::delta::` + - `data::object::Object` -> `data::Object` + - `data::entry::Entry` -> `data::Entry` + +* **new methods** + - `Find::find_tag_iter()` + + +## v0.8.2 (2021-08-17) + +## v0.8.1 (2021-08-13) + +## v0.8.0 (2021-08-12) + +## v0.6.0 (2021-08-11) + +## v0.5.0 (2021-08-11) + +## v0.3.1 (2021-08-10) + +## v0.3.0 (2021-08-10) + +## v0.2.0 (2021-05-25) + +## v0.1.0 (2021-05-24) + diff --git a/vendor/gix-pack/Cargo.toml b/vendor/gix-pack/Cargo.toml new file mode 100644 index 000000000..eff0d9d46 --- /dev/null +++ b/vendor/gix-pack/Cargo.toml @@ -0,0 +1,119 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +rust-version = "1.64" +name = "gix-pack" +version = "0.32.0" +authors = ["Sebastian Thiel <sebastian.thiel@icloud.com>"] +include = [ + "src/**/*", + "CHANGELOG.md", +] +autotests = false +description = "Implements git packs and related data structures" +license = "MIT/Apache-2.0" +repository = "https://github.com/Byron/gitoxide" +resolver = "2" + +[package.metadata.docs.rs] +all-features = true +features = [ + "document-features", + "pack-cache-lru-dynamic", + "object-cache-dynamic", + "serde1", +] +rustdoc-args = [ + "--cfg", + "docsrs", +] + +[lib] +doctest = false + +[dependencies.clru] +version = "0.6.1" +optional = true + +[dependencies.document-features] +version = "0.2.0" +optional = true + +[dependencies.gix-chunk] +version = "^0.4.1" + +[dependencies.gix-diff] +version = "^0.28.0" + +[dependencies.gix-features] +version = "^0.28.0" +features = [ + "crc32", + "rustsha1", + "progress", + "zlib", +] + +[dependencies.gix-hash] +version = "^0.10.3" + +[dependencies.gix-hashtable] +version = "^0.1.2" + +[dependencies.gix-object] +version = "^0.28.0" + +[dependencies.gix-path] +version = "^0.7.2" + +[dependencies.gix-traverse] +version = "^0.24.0" + +[dependencies.memmap2] +version = "0.5.0" + +[dependencies.parking_lot] +version = "0.12.0" +default-features = false + +[dependencies.serde] +version = "1.0.114" +features = ["derive"] +optional = true +default-features = false + +[dependencies.smallvec] +version = "1.3.0" + +[dependencies.thiserror] +version = "1.0.26" + +[dependencies.uluru] +version = "3.0.0" +optional = true + +[dev-dependencies] + +[features] +object-cache-dynamic = ["dep:clru"] +pack-cache-lru-dynamic = ["dep:clru"] +pack-cache-lru-static = ["dep:uluru"] +serde1 = [ + "dep:serde", + "gix-object/serde1", +] +wasm = ["gix-diff/wasm"] + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.gix-tempfile] +version = "^4.0.0" +default-features = false diff --git a/vendor/gix-pack/src/bundle/find.rs b/vendor/gix-pack/src/bundle/find.rs new file mode 100644 index 000000000..d39ed49a9 --- /dev/null +++ b/vendor/gix-pack/src/bundle/find.rs @@ -0,0 +1,63 @@ +impl crate::Bundle { + /// Find an object with the given [`ObjectId`][gix_hash::ObjectId] and place its data into `out`. + /// + /// [`cache`][crate::cache::DecodeEntry] is used to accelerate the lookup. + /// + /// **Note** that ref deltas are automatically resolved within this pack only, which makes this implementation unusable + /// for thin packs, which by now are expected to be resolved already. + pub fn find<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + out: &'a mut Vec<u8>, + cache: &mut impl crate::cache::DecodeEntry, + ) -> Result<Option<(gix_object::Data<'a>, crate::data::entry::Location)>, crate::data::decode::Error> { + let idx = match self.index.lookup(id) { + Some(idx) => idx, + None => return Ok(None), + }; + self.get_object_by_index(idx, out, cache).map(Some) + } + + /// Special-use function to get an object given an index previously returned from + /// internal_find_pack_index. + /// + /// # Panics + /// + /// If `index` is out of bounds. + pub fn get_object_by_index<'a>( + &self, + idx: u32, + out: &'a mut Vec<u8>, + cache: &mut impl crate::cache::DecodeEntry, + ) -> Result<(gix_object::Data<'a>, crate::data::entry::Location), crate::data::decode::Error> { + let ofs = self.index.pack_offset_at_index(idx); + let pack_entry = self.pack.entry(ofs); + let header_size = pack_entry.header_size(); + self.pack + .decode_entry( + pack_entry, + out, + |id, _out| { + self.index.lookup(id).map(|idx| { + crate::data::decode::entry::ResolvedBase::InPack( + self.pack.entry(self.index.pack_offset_at_index(idx)), + ) + }) + }, + cache, + ) + .map(move |r| { + ( + gix_object::Data { + kind: r.kind, + data: out.as_slice(), + }, + crate::data::entry::Location { + pack_id: self.pack.id, + pack_offset: ofs, + entry_size: r.compressed_size + header_size, + }, + ) + }) + } +} diff --git a/vendor/gix-pack/src/bundle/init.rs b/vendor/gix-pack/src/bundle/init.rs new file mode 100644 index 000000000..3ba5257ed --- /dev/null +++ b/vendor/gix-pack/src/bundle/init.rs @@ -0,0 +1,46 @@ +use std::path::{Path, PathBuf}; + +use crate::Bundle; + +/// Returned by [`Bundle::at()`] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("An 'idx' extension is expected of an index file: '{0}'")] + InvalidPath(PathBuf), + #[error(transparent)] + Pack(#[from] crate::data::header::decode::Error), + #[error(transparent)] + Index(#[from] crate::index::init::Error), +} + +/// Initialization +impl Bundle { + /// Create a `Bundle` from `path`, which is either a pack file _(*.pack)_ or an index file _(*.idx)_. + /// + /// The corresponding complementary file is expected to be present. + /// + /// The `object_hash` is a way to read (and write) the same file format with different hashes, as the hash kind + /// isn't stored within the file format itself. + pub fn at(path: impl AsRef<Path>, object_hash: gix_hash::Kind) -> Result<Self, Error> { + Self::at_inner(path.as_ref(), object_hash) + } + + fn at_inner(path: &Path, object_hash: gix_hash::Kind) -> Result<Self, Error> { + let ext = path + .extension() + .and_then(|e| e.to_str()) + .ok_or_else(|| Error::InvalidPath(path.to_owned()))?; + Ok(match ext { + "idx" => Self { + index: crate::index::File::at(path, object_hash)?, + pack: crate::data::File::at(path.with_extension("pack"), object_hash)?, + }, + "pack" => Self { + pack: crate::data::File::at(path, object_hash)?, + index: crate::index::File::at(path.with_extension("idx"), object_hash)?, + }, + _ => return Err(Error::InvalidPath(path.to_owned())), + }) + } +} diff --git a/vendor/gix-pack/src/bundle/mod.rs b/vendor/gix-pack/src/bundle/mod.rs new file mode 100644 index 000000000..076b355d9 --- /dev/null +++ b/vendor/gix-pack/src/bundle/mod.rs @@ -0,0 +1,60 @@ +/// +pub mod init; + +mod find; +/// +#[cfg(not(feature = "wasm"))] +pub mod write; + +/// +pub mod verify { + use std::sync::atomic::AtomicBool; + + use gix_features::progress::Progress; + + /// + pub mod integrity { + /// Returned by [`Bundle::verify_integrity()`][crate::Bundle::verify_integrity()]. + pub struct Outcome<P> { + /// The computed checksum of the index which matched the stored one. + pub actual_index_checksum: gix_hash::ObjectId, + /// The packs traversal outcome + pub pack_traverse_outcome: crate::index::traverse::Statistics, + /// The provided progress instance. + pub progress: P, + } + } + + use crate::Bundle; + + impl Bundle { + /// Similar to [`crate::index::File::verify_integrity()`] but more convenient to call as the presence of the + /// pack file is a given. + pub fn verify_integrity<C, P, F>( + &self, + progress: P, + should_interrupt: &AtomicBool, + options: crate::index::verify::integrity::Options<F>, + ) -> Result<integrity::Outcome<P>, crate::index::traverse::Error<crate::index::verify::integrity::Error>> + where + P: Progress, + C: crate::cache::DecodeEntry, + F: Fn() -> C + Send + Clone, + { + self.index + .verify_integrity( + Some(crate::index::verify::PackContext { + data: &self.pack, + options, + }), + progress, + should_interrupt, + ) + .map(|o| integrity::Outcome { + actual_index_checksum: o.actual_index_checksum, + pack_traverse_outcome: o.pack_traverse_statistics.expect("pack is set"), + progress: o.progress, + }) + } + } +} diff --git a/vendor/gix-pack/src/bundle/write/error.rs b/vendor/gix-pack/src/bundle/write/error.rs new file mode 100644 index 000000000..883c34029 --- /dev/null +++ b/vendor/gix-pack/src/bundle/write/error.rs @@ -0,0 +1,17 @@ +use std::io; + +use gix_tempfile::handle::Writable; + +/// The error returned by [`Bundle::write_to_directory()`][crate::Bundle::write_to_directory()] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("An IO error occurred when reading the pack or creating a temporary file")] + Io(#[from] io::Error), + #[error(transparent)] + PackIter(#[from] crate::data::input::Error), + #[error("Could not move a temporary file into its desired place")] + Persist(#[from] gix_tempfile::handle::persist::Error<Writable>), + #[error(transparent)] + IndexWrite(#[from] crate::index::write::Error), +} diff --git a/vendor/gix-pack/src/bundle/write/mod.rs b/vendor/gix-pack/src/bundle/write/mod.rs new file mode 100644 index 000000000..fc0284b53 --- /dev/null +++ b/vendor/gix-pack/src/bundle/write/mod.rs @@ -0,0 +1,378 @@ +use std::{ + io, + io::Write, + marker::PhantomData, + path::{Path, PathBuf}, + sync::{atomic::AtomicBool, Arc}, +}; + +use gix_features::{interrupt, progress, progress::Progress}; +use gix_tempfile::{AutoRemove, ContainingDirectory}; + +use crate::data; + +mod error; +pub use error::Error; + +mod types; +use types::{LockWriter, PassThrough}; +pub use types::{Options, Outcome}; + +use crate::bundle::write::types::SharedTempFile; + +type ThinPackLookupFn = Box<dyn for<'a> FnMut(gix_hash::ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>>; +type ThinPackLookupFnSend = + Box<dyn for<'a> FnMut(gix_hash::ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>> + Send + 'static>; + +/// The progress ids used in [`write_to_directory()`][crate::Bundle::write_to_directory()]. +/// +/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. +#[derive(Debug, Copy, Clone)] +pub enum ProgressId { + /// The amount of bytes read from the input pack data file. + ReadPackBytes, + /// A root progress counting logical steps towards an index file on disk. + /// + /// Underneath will be more progress information related to actually producing the index. + IndexingSteps(PhantomData<crate::index::write::ProgressId>), +} + +impl From<ProgressId> for gix_features::progress::Id { + fn from(v: ProgressId) -> Self { + match v { + ProgressId::ReadPackBytes => *b"BWRB", + ProgressId::IndexingSteps(_) => *b"BWCI", + } + } +} + +impl crate::Bundle { + /// Given a `pack` data stream, write it along with a generated index into the `directory` if `Some` or discard all output if `None`. + /// + /// In the latter case, the functionality provided here is more a kind of pack data stream validation. + /// + /// * `progress` provides detailed progress information which can be discarded with [`gix_features::progress::Discard`]. + /// * `should_interrupt` is checked regularly and when true, the whole operation will stop. + /// * `thin_pack_base_object_lookup_fn` If set, we expect to see a thin-pack with objects that reference their base object by object id which is + /// expected to exist in the object database the bundle is contained within. + /// `options` further configure how the task is performed. + /// + /// # Note + /// + /// * the resulting pack may be empty, that is, contains zero objects in some situations. This is a valid reply by a server and should + /// be accounted for. + /// - Empty packs always have the same name and not handling this case will result in at most one superfluous pack. + pub fn write_to_directory<P>( + pack: impl io::BufRead, + directory: Option<impl AsRef<Path>>, + mut progress: P, + should_interrupt: &AtomicBool, + thin_pack_base_object_lookup_fn: Option<ThinPackLookupFn>, + options: Options, + ) -> Result<Outcome, Error> + where + P: Progress, + { + let mut read_progress = progress.add_child_with_id("read pack", ProgressId::ReadPackBytes.into()); + read_progress.init(None, progress::bytes()); + let pack = progress::Read { + inner: pack, + progress: progress::ThroughputOnDrop::new(read_progress), + }; + + let object_hash = options.object_hash; + let data_file = Arc::new(parking_lot::Mutex::new(io::BufWriter::with_capacity( + 64 * 1024, + match directory.as_ref() { + Some(directory) => gix_tempfile::new(directory, ContainingDirectory::Exists, AutoRemove::Tempfile)?, + None => gix_tempfile::new(std::env::temp_dir(), ContainingDirectory::Exists, AutoRemove::Tempfile)?, + }, + ))); + let (pack_entries_iter, pack_version): ( + Box<dyn Iterator<Item = Result<data::input::Entry, data::input::Error>>>, + _, + ) = match thin_pack_base_object_lookup_fn { + Some(thin_pack_lookup_fn) => { + let pack = interrupt::Read { + inner: pack, + should_interrupt, + }; + let buffered_pack = io::BufReader::new(pack); + let pack_entries_iter = data::input::LookupRefDeltaObjectsIter::new( + data::input::BytesToEntriesIter::new_from_header( + buffered_pack, + options.iteration_mode, + data::input::EntryDataMode::KeepAndCrc32, + object_hash, + )?, + thin_pack_lookup_fn, + ); + let pack_version = pack_entries_iter.inner.version(); + let pack_entries_iter = data::input::EntriesToBytesIter::new( + pack_entries_iter, + LockWriter { + writer: data_file.clone(), + }, + pack_version, + gix_hash::Kind::Sha1, // Thin packs imply a pack being transported, and there we only ever know SHA1 at the moment. + ); + (Box::new(pack_entries_iter), pack_version) + } + None => { + let pack = PassThrough { + reader: interrupt::Read { + inner: pack, + should_interrupt, + }, + writer: Some(data_file.clone()), + }; + // This buf-reader is required to assure we call 'read()' in order to fill the (extra) buffer. Otherwise all the counting + // we do with the wrapped pack reader doesn't work as it does not expect anyone to call BufRead functions directly. + // However, this is exactly what's happening in the ZipReader implementation that is eventually used. + // The performance impact of this is probably negligible, compared to all the other work that is done anyway :D. + let buffered_pack = io::BufReader::new(pack); + let pack_entries_iter = data::input::BytesToEntriesIter::new_from_header( + buffered_pack, + options.iteration_mode, + data::input::EntryDataMode::Crc32, + object_hash, + )?; + let pack_version = pack_entries_iter.version(); + (Box::new(pack_entries_iter), pack_version) + } + }; + let WriteOutcome { + outcome, + data_path, + index_path, + keep_path, + } = crate::Bundle::inner_write( + directory, + progress, + options, + data_file, + pack_entries_iter, + should_interrupt, + pack_version, + )?; + + Ok(Outcome { + index: outcome, + object_hash, + pack_version, + data_path, + index_path, + keep_path, + }) + } + + /// Equivalent to [`write_to_directory()`][crate::Bundle::write_to_directory()] but offloads reading of the pack into its own thread, hence the `Send + 'static'` bounds. + /// + /// # Note + /// + /// As it sends portions of the input to a thread it requires the 'static lifetime for the interrupt flags. This can only + /// be satisfied by a static AtomicBool which is only suitable for programs that only run one of these operations at a time + /// or don't mind that all of them abort when the flag is set. + pub fn write_to_directory_eagerly<P>( + pack: impl io::Read + Send + 'static, + pack_size: Option<u64>, + directory: Option<impl AsRef<Path>>, + mut progress: P, + should_interrupt: &'static AtomicBool, + thin_pack_base_object_lookup_fn: Option<ThinPackLookupFnSend>, + options: Options, + ) -> Result<Outcome, Error> + where + P: Progress, + P::SubProgress: 'static, + { + let mut read_progress = progress.add_child_with_id("read pack", ProgressId::ReadPackBytes.into()); /* Bundle Write Read pack Bytes*/ + read_progress.init(pack_size.map(|s| s as usize), progress::bytes()); + let pack = progress::Read { + inner: pack, + progress: progress::ThroughputOnDrop::new(read_progress), + }; + + let data_file = Arc::new(parking_lot::Mutex::new(io::BufWriter::new(match directory.as_ref() { + Some(directory) => gix_tempfile::new(directory, ContainingDirectory::Exists, AutoRemove::Tempfile)?, + None => gix_tempfile::new(std::env::temp_dir(), ContainingDirectory::Exists, AutoRemove::Tempfile)?, + }))); + let object_hash = options.object_hash; + let eight_pages = 4096 * 8; + let (pack_entries_iter, pack_version): ( + Box<dyn Iterator<Item = Result<data::input::Entry, data::input::Error>> + Send + 'static>, + _, + ) = match thin_pack_base_object_lookup_fn { + Some(thin_pack_lookup_fn) => { + let pack = interrupt::Read { + inner: pack, + should_interrupt, + }; + let buffered_pack = io::BufReader::with_capacity(eight_pages, pack); + let pack_entries_iter = data::input::LookupRefDeltaObjectsIter::new( + data::input::BytesToEntriesIter::new_from_header( + buffered_pack, + options.iteration_mode, + data::input::EntryDataMode::KeepAndCrc32, + object_hash, + )?, + thin_pack_lookup_fn, + ); + let pack_kind = pack_entries_iter.inner.version(); + (Box::new(pack_entries_iter), pack_kind) + } + None => { + let pack = PassThrough { + reader: interrupt::Read { + inner: pack, + should_interrupt, + }, + writer: Some(data_file.clone()), + }; + let buffered_pack = io::BufReader::with_capacity(eight_pages, pack); + let pack_entries_iter = data::input::BytesToEntriesIter::new_from_header( + buffered_pack, + options.iteration_mode, + data::input::EntryDataMode::Crc32, + object_hash, + )?; + let pack_kind = pack_entries_iter.version(); + (Box::new(pack_entries_iter), pack_kind) + } + }; + let num_objects = pack_entries_iter.size_hint().0; + let pack_entries_iter = + gix_features::parallel::EagerIterIf::new(move || num_objects > 25_000, pack_entries_iter, 5_000, 5); + + let WriteOutcome { + outcome, + data_path, + index_path, + keep_path, + } = crate::Bundle::inner_write( + directory, + progress, + options, + data_file, + pack_entries_iter, + should_interrupt, + pack_version, + )?; + + Ok(Outcome { + index: outcome, + object_hash, + pack_version, + data_path, + index_path, + keep_path, + }) + } + + fn inner_write( + directory: Option<impl AsRef<Path>>, + mut progress: impl Progress, + Options { + thread_limit, + iteration_mode: _, + index_version: index_kind, + object_hash, + }: Options, + data_file: SharedTempFile, + pack_entries_iter: impl Iterator<Item = Result<data::input::Entry, data::input::Error>>, + should_interrupt: &AtomicBool, + pack_version: data::Version, + ) -> Result<WriteOutcome, Error> { + let indexing_progress = progress.add_child_with_id( + "create index file", + ProgressId::IndexingSteps(Default::default()).into(), + ); + Ok(match directory { + Some(directory) => { + let directory = directory.as_ref(); + let mut index_file = gix_tempfile::new(directory, ContainingDirectory::Exists, AutoRemove::Tempfile)?; + + let outcome = crate::index::File::write_data_iter_to_stream( + index_kind, + { + let data_file = Arc::clone(&data_file); + move || new_pack_file_resolver(data_file) + }, + pack_entries_iter, + thread_limit, + indexing_progress, + &mut index_file, + should_interrupt, + object_hash, + pack_version, + )?; + + let data_path = directory.join(format!("pack-{}.pack", outcome.data_hash.to_hex())); + let index_path = data_path.with_extension("idx"); + let keep_path = data_path.with_extension("keep"); + + std::fs::write(&keep_path, b"")?; + Arc::try_unwrap(data_file) + .expect("only one handle left after pack was consumed") + .into_inner() + .into_inner() + .map_err(|err| Error::from(err.into_error()))? + .persist(&data_path)?; + index_file + .persist(&index_path) + .map_err(|err| { + progress.info(format!( + "pack file at {} is retained despite failing to move the index file into place. You can use plumbing to make it usable.", + data_path.display() + )); + err + })?; + WriteOutcome { + outcome, + data_path: Some(data_path), + index_path: Some(index_path), + keep_path: Some(keep_path), + } + } + None => WriteOutcome { + outcome: crate::index::File::write_data_iter_to_stream( + index_kind, + move || new_pack_file_resolver(data_file), + pack_entries_iter, + thread_limit, + indexing_progress, + io::sink(), + should_interrupt, + object_hash, + pack_version, + )?, + data_path: None, + index_path: None, + keep_path: None, + }, + }) + } +} + +fn new_pack_file_resolver( + data_file: SharedTempFile, +) -> io::Result<impl Fn(data::EntryRange, &mut Vec<u8>) -> Option<()> + Send + Clone> { + let mut guard = data_file.lock(); + guard.flush()?; + let mapped_file = Arc::new(crate::mmap::read_only( + &guard.get_mut().with_mut(|f| f.path().to_owned())?, + )?); + let pack_data_lookup = move |range: std::ops::Range<u64>, out: &mut Vec<u8>| -> Option<()> { + mapped_file + .get(range.start as usize..range.end as usize) + .map(|pack_entry| out.copy_from_slice(pack_entry)) + }; + Ok(pack_data_lookup) +} + +struct WriteOutcome { + outcome: crate::index::write::Outcome, + data_path: Option<PathBuf>, + index_path: Option<PathBuf>, + keep_path: Option<PathBuf>, +} diff --git a/vendor/gix-pack/src/bundle/write/types.rs b/vendor/gix-pack/src/bundle/write/types.rs new file mode 100644 index 000000000..56c14ac59 --- /dev/null +++ b/vendor/gix-pack/src/bundle/write/types.rs @@ -0,0 +1,120 @@ +use std::{hash::Hash, io, io::SeekFrom, path::PathBuf, sync::Arc}; + +use gix_tempfile::handle::Writable; + +/// Configuration for [write_to_directory][crate::Bundle::write_to_directory()] or +/// [write_to_directory_eagerly][crate::Bundle::write_to_directory_eagerly()] +#[derive(Debug, Clone)] +pub struct Options { + /// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used. + pub thread_limit: Option<usize>, + /// Determine how much processing to spend on protecting against corruption or recovering from errors. + pub iteration_mode: crate::data::input::Mode, + /// The version of pack index to write, should be [`crate::index::Version::default()`] + pub index_version: crate::index::Version, + /// The kind of hash to use when writing the bundle. + pub object_hash: gix_hash::Kind, +} + +impl Default for Options { + /// Options which favor speed and correctness and write the most commonly supported index file. + fn default() -> Self { + Options { + thread_limit: None, + iteration_mode: crate::data::input::Mode::Verify, + index_version: Default::default(), + object_hash: Default::default(), + } + } +} + +/// Returned by [write_to_directory][crate::Bundle::write_to_directory()] or +/// [write_to_directory_eagerly][crate::Bundle::write_to_directory_eagerly()] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Outcome { + /// The successful result of the index write operation + pub index: crate::index::write::Outcome, + /// The version of the pack + pub pack_version: crate::data::Version, + /// The kind of hash stored within the pack and indices + pub object_hash: gix_hash::Kind, + + /// The path to the pack index file + pub index_path: Option<PathBuf>, + /// The path to the pack data file + pub data_path: Option<PathBuf>, + /// The path to the `.keep` file to prevent collection of the newly written pack until refs are pointing to it. + /// + /// The file is created right before moving the pack data and index data into place (i.e. `data_path` and `index_path`) + /// and is expected to be removed by the caller when ready. + pub keep_path: Option<PathBuf>, +} + +impl Outcome { + /// Instantiate a bundle from the newly written index and data file that are represented by this `Outcome` + pub fn to_bundle(&self) -> Option<Result<crate::Bundle, crate::bundle::init::Error>> { + self.index_path + .as_ref() + .map(|path| crate::Bundle::at(path, self.object_hash)) + } +} + +pub(crate) type SharedTempFile = Arc<parking_lot::Mutex<std::io::BufWriter<gix_tempfile::Handle<Writable>>>>; + +pub(crate) struct PassThrough<R> { + pub reader: R, + pub writer: Option<SharedTempFile>, +} + +impl<R> io::Read for PassThrough<R> +where + R: io::Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + let bytes_read = self.reader.read(buf)?; + if let Some(writer) = self.writer.as_mut() { + use std::io::Write; + writer.lock().write_all(&buf[..bytes_read])?; + } + Ok(bytes_read) + } +} +impl<R> io::BufRead for PassThrough<R> +where + R: io::BufRead, +{ + fn fill_buf(&mut self) -> io::Result<&[u8]> { + self.reader.fill_buf() + } + + fn consume(&mut self, amt: usize) { + self.reader.consume(amt) + } +} + +pub(crate) struct LockWriter { + pub writer: SharedTempFile, +} + +impl io::Write for LockWriter { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + self.writer.lock().write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.writer.lock().flush() + } +} + +impl io::Read for LockWriter { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + self.writer.lock().get_mut().read(buf) + } +} + +impl io::Seek for LockWriter { + fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { + self.writer.lock().seek(pos) + } +} diff --git a/vendor/gix-pack/src/cache/delta/from_offsets.rs b/vendor/gix-pack/src/cache/delta/from_offsets.rs new file mode 100644 index 000000000..8acb4a802 --- /dev/null +++ b/vendor/gix-pack/src/cache/delta/from_offsets.rs @@ -0,0 +1,161 @@ +use std::{ + convert::TryFrom, + fs, io, + io::{BufRead, Read, Seek, SeekFrom}, + sync::atomic::{AtomicBool, Ordering}, + time::Instant, +}; + +use gix_features::progress::{self, Progress}; + +use crate::{cache::delta::Tree, data}; + +/// Returned by [`Tree::from_offsets_in_pack()`] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("{message}")] + Io { source: io::Error, message: &'static str }, + #[error(transparent)] + Header(#[from] crate::data::header::decode::Error), + #[error("Could find object with id {id} in this pack. Thin packs are not supported")] + UnresolvedRefDelta { id: gix_hash::ObjectId }, + #[error(transparent)] + Tree(#[from] crate::cache::delta::Error), + #[error("Interrupted")] + Interrupted, +} + +const PACK_HEADER_LEN: usize = 12; + +/// Generate tree from certain input +impl<T> Tree<T> { + /// Create a new `Tree` from any data sorted by offset, ascending as returned by the `data_sorted_by_offsets` iterator. + /// * `get_pack_offset(item: &T`) -> data::Offset` is a function returning the pack offset of the given item, which can be used + /// for obtaining the objects entry within the pack. + /// * `pack_path` is the path to the pack file itself and from which to read the entry data, which is a pack file matching the offsets + /// returned by `get_pack_offset(…)`. + /// * `progress` is used to track progress when creating the tree. + /// * `resolve_in_pack_id(gix_hash::oid) -> Option<data::Offset>` takes an object ID and tries to resolve it to an object within this pack if + /// possible. Failing to do so aborts the operation, and this function is not expected to be called in usual packs. It's a theoretical + /// possibility though as old packs might have referred to their objects using the 20 bytes hash, instead of their encoded offset from the base. + /// + /// Note that the sort order is ascending. The given pack file path must match the provided offsets. + pub fn from_offsets_in_pack( + pack_path: impl AsRef<std::path::Path>, + data_sorted_by_offsets: impl Iterator<Item = T>, + get_pack_offset: impl Fn(&T) -> data::Offset, + resolve_in_pack_id: impl Fn(&gix_hash::oid) -> Option<data::Offset>, + mut progress: impl Progress, + should_interrupt: &AtomicBool, + object_hash: gix_hash::Kind, + ) -> Result<Self, Error> { + let mut r = io::BufReader::with_capacity( + 8192 * 8, // this value directly corresponds to performance, 8k (default) is about 4x slower than 64k + fs::File::open(pack_path).map_err(|err| Error::Io { + source: err, + message: "open pack path", + })?, + ); + + let anticipated_num_objects = if let Some(num_objects) = data_sorted_by_offsets.size_hint().1 { + progress.init(Some(num_objects), progress::count("objects")); + num_objects + } else { + 0 + }; + let mut tree = Tree::with_capacity(anticipated_num_objects)?; + + { + // safety check - assure ourselves it's a pack we can handle + let mut buf = [0u8; PACK_HEADER_LEN]; + r.read_exact(&mut buf).map_err(|err| Error::Io { + source: err, + message: "reading header buffer with at least 12 bytes failed - pack file truncated?", + })?; + crate::data::header::decode(&buf)?; + } + + let then = Instant::now(); + + let mut previous_cursor_position = None::<u64>; + + let hash_len = object_hash.len_in_bytes(); + for (idx, data) in data_sorted_by_offsets.enumerate() { + let pack_offset = get_pack_offset(&data); + if let Some(previous_offset) = previous_cursor_position { + Self::advance_cursor_to_pack_offset(&mut r, pack_offset, previous_offset)?; + }; + let entry = crate::data::Entry::from_read(&mut r, pack_offset, hash_len).map_err(|err| Error::Io { + source: err, + message: "EOF while parsing header", + })?; + previous_cursor_position = Some(pack_offset + entry.header_size() as u64); + + use crate::data::entry::Header::*; + match entry.header { + Tree | Blob | Commit | Tag => { + tree.add_root(pack_offset, data)?; + } + RefDelta { base_id } => { + resolve_in_pack_id(base_id.as_ref()) + .ok_or(Error::UnresolvedRefDelta { id: base_id }) + .and_then(|base_pack_offset| { + tree.add_child(base_pack_offset, pack_offset, data).map_err(Into::into) + })?; + } + OfsDelta { base_distance } => { + let base_pack_offset = pack_offset + .checked_sub(base_distance) + .expect("in bound distance for deltas"); + tree.add_child(base_pack_offset, pack_offset, data)?; + } + }; + progress.inc(); + if idx % 10_000 == 0 && should_interrupt.load(Ordering::SeqCst) { + return Err(Error::Interrupted); + } + } + + progress.show_throughput(then); + Ok(tree) + } + + fn advance_cursor_to_pack_offset( + r: &mut io::BufReader<fs::File>, + pack_offset: u64, + previous_offset: u64, + ) -> Result<(), Error> { + let bytes_to_skip: u64 = pack_offset + .checked_sub(previous_offset) + .expect("continuously ascending pack offsets"); + if bytes_to_skip == 0 { + return Ok(()); + } + let buf = r.fill_buf().map_err(|err| Error::Io { + source: err, + message: "skip bytes", + })?; + if buf.is_empty() { + // This means we have reached the end of file and can't make progress anymore, before we have satisfied our need + // for more + return Err(Error::Io { + source: io::Error::new( + io::ErrorKind::UnexpectedEof, + "ran out of bytes before reading desired amount of bytes", + ), + message: "index file is damaged or corrupt", + }); + } + if bytes_to_skip <= u64::try_from(buf.len()).expect("sensible buffer size") { + // SAFETY: bytes_to_skip <= buf.len() <= usize::MAX + r.consume(bytes_to_skip as usize); + } else { + r.seek(SeekFrom::Start(pack_offset)).map_err(|err| Error::Io { + source: err, + message: "seek to next entry", + })?; + } + Ok(()) + } +} diff --git a/vendor/gix-pack/src/cache/delta/mod.rs b/vendor/gix-pack/src/cache/delta/mod.rs new file mode 100644 index 000000000..f4c1b6fc6 --- /dev/null +++ b/vendor/gix-pack/src/cache/delta/mod.rs @@ -0,0 +1,216 @@ +/// Returned when using various methods on a [`Tree`] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("Pack offsets must only increment. The previous pack offset was {last_pack_offset}, the current one is {pack_offset}")] + InvariantIncreasingPackOffset { + /// The last seen pack offset + last_pack_offset: crate::data::Offset, + /// The invariant violating offset + pack_offset: crate::data::Offset, + }, +} + +/// +pub mod traverse; + +/// +pub mod from_offsets; + +/// An item stored within the [`Tree`] +pub struct Item<T> { + /// The offset into the pack file at which the pack entry's data is located. + pub offset: crate::data::Offset, + /// The offset of the next item in the pack file. + pub next_offset: crate::data::Offset, + /// Data to store with each Item, effectively data associated with each entry in a pack. + pub data: T, + /// Indices into our Tree's `items`, one for each pack entry that depends on us. + /// + /// Limited to u32 as that's the maximum amount of objects in a pack. + children: Vec<u32>, +} + +/// Identify what kind of node we have last seen +enum NodeKind { + Root, + Child, +} + +/// A tree that allows one-time iteration over all nodes and their children, consuming it in the process, +/// while being shareable among threads without a lock. +/// It does this by making the guarantee that iteration only happens once. +pub struct Tree<T> { + /// The root nodes, i.e. base objects + root_items: Vec<Item<T>>, + /// The child nodes, i.e. those that rely a base object, like ref and ofs delta objects + child_items: Vec<Item<T>>, + /// The last encountered node was either a root or a child. + last_seen: Option<NodeKind>, + /// Future child offsets, associating their offset into the pack with their index in the items array. + /// (parent_offset, child_index) + future_child_offsets: Vec<(crate::data::Offset, usize)>, +} + +impl<T> Tree<T> { + /// Instantiate a empty tree capable of storing `num_objects` amounts of items. + pub fn with_capacity(num_objects: usize) -> Result<Self, Error> { + Ok(Tree { + root_items: Vec::with_capacity(num_objects / 2), + child_items: Vec::with_capacity(num_objects / 2), + last_seen: None, + future_child_offsets: Vec::new(), + }) + } + + fn num_items(&self) -> usize { + self.root_items.len() + self.child_items.len() + } + + fn assert_is_incrementing_and_update_next_offset(&mut self, offset: crate::data::Offset) -> Result<(), Error> { + let items = match &self.last_seen { + Some(NodeKind::Root) => &mut self.root_items, + Some(NodeKind::Child) => &mut self.child_items, + None => return Ok(()), + }; + let item = &mut items.last_mut().expect("last seen won't lie"); + if offset <= item.offset { + return Err(Error::InvariantIncreasingPackOffset { + last_pack_offset: item.offset, + pack_offset: offset, + }); + } + item.next_offset = offset; + Ok(()) + } + + fn set_pack_entries_end_and_resolve_ref_offsets( + &mut self, + pack_entries_end: crate::data::Offset, + ) -> Result<(), traverse::Error> { + if !self.future_child_offsets.is_empty() { + for (parent_offset, child_index) in self.future_child_offsets.drain(..) { + if let Ok(i) = self.child_items.binary_search_by_key(&parent_offset, |i| i.offset) { + self.child_items[i].children.push(child_index as u32); + } else if let Ok(i) = self.root_items.binary_search_by_key(&parent_offset, |i| i.offset) { + self.root_items[i].children.push(child_index as u32); + } else { + return Err(traverse::Error::OutOfPackRefDelta { + base_pack_offset: parent_offset, + }); + } + } + } + + self.assert_is_incrementing_and_update_next_offset(pack_entries_end) + .expect("BUG: pack now is smaller than all previously seen entries"); + Ok(()) + } + + /// Add a new root node, one that only has children but is not a child itself, at the given pack `offset` and associate + /// custom `data` with it. + pub fn add_root(&mut self, offset: crate::data::Offset, data: T) -> Result<(), Error> { + self.assert_is_incrementing_and_update_next_offset(offset)?; + self.last_seen = NodeKind::Root.into(); + self.root_items.push(Item { + offset, + next_offset: 0, + data, + children: Default::default(), + }); + Ok(()) + } + + /// Add a child of the item at `base_offset` which itself resides at pack `offset` and associate custom `data` with it. + pub fn add_child( + &mut self, + base_offset: crate::data::Offset, + offset: crate::data::Offset, + data: T, + ) -> Result<(), Error> { + self.assert_is_incrementing_and_update_next_offset(offset)?; + + let next_child_index = self.child_items.len(); + if let Ok(i) = self.child_items.binary_search_by_key(&base_offset, |i| i.offset) { + self.child_items[i].children.push(next_child_index as u32); + } else if let Ok(i) = self.root_items.binary_search_by_key(&base_offset, |i| i.offset) { + self.root_items[i].children.push(next_child_index as u32); + } else { + self.future_child_offsets.push((base_offset, next_child_index)); + } + + self.last_seen = NodeKind::Child.into(); + self.child_items.push(Item { + offset, + next_offset: 0, + data, + children: Default::default(), + }); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + mod tree { + mod from_offsets_in_pack { + use std::sync::atomic::AtomicBool; + + use crate as pack; + + const SMALL_PACK_INDEX: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx"; + const SMALL_PACK: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack"; + + const INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx"; + const PACK_FOR_INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack"; + + use gix_testtools::fixture_path; + + #[test] + fn v1() -> Result<(), Box<dyn std::error::Error>> { + tree(INDEX_V1, PACK_FOR_INDEX_V1) + } + + #[test] + fn v2() -> Result<(), Box<dyn std::error::Error>> { + tree(SMALL_PACK_INDEX, SMALL_PACK) + } + + fn tree(index_path: &str, pack_path: &str) -> Result<(), Box<dyn std::error::Error>> { + let idx = pack::index::File::at(fixture_path(index_path), gix_hash::Kind::Sha1)?; + crate::cache::delta::Tree::from_offsets_in_pack( + fixture_path(pack_path), + idx.sorted_offsets().into_iter(), + |ofs| *ofs, + |id| idx.lookup(id).map(|index| idx.pack_offset_at_index(index)), + gix_features::progress::Discard, + &AtomicBool::new(false), + gix_hash::Kind::Sha1, + )?; + Ok(()) + } + } + } + + #[test] + fn size_of_pack_tree_item() { + use super::Item; + assert_eq!(std::mem::size_of::<[Item<()>; 7_500_000]>(), 300_000_000); + } + + #[test] + fn size_of_pack_verify_data_structure() { + use super::Item; + pub struct EntryWithDefault { + _index_entry: crate::index::Entry, + _kind: gix_object::Kind, + _object_size: u64, + _decompressed_size: u64, + _compressed_size: u64, + _header_size: u16, + _level: u16, + } + + assert_eq!(std::mem::size_of::<[Item<EntryWithDefault>; 7_500_000]>(), 840_000_000); + } +} diff --git a/vendor/gix-pack/src/cache/delta/traverse/mod.rs b/vendor/gix-pack/src/cache/delta/traverse/mod.rs new file mode 100644 index 000000000..bfe2ec687 --- /dev/null +++ b/vendor/gix-pack/src/cache/delta/traverse/mod.rs @@ -0,0 +1,177 @@ +use std::sync::atomic::{AtomicBool, Ordering}; + +use gix_features::{ + parallel::in_parallel_with_slice, + progress::{self, Progress}, + threading::{lock, Mutable, OwnShared}, +}; + +use crate::{ + cache::delta::{traverse::util::ItemSliceSend, Item, Tree}, + data::EntryRange, +}; + +mod resolve; +pub(crate) mod util; + +/// Returned by [`Tree::traverse()`] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("{message}")] + ZlibInflate { + source: gix_features::zlib::inflate::Error, + message: &'static str, + }, + #[error("The resolver failed to obtain the pack entry bytes for the entry at {pack_offset}")] + ResolveFailed { pack_offset: u64 }, + #[error("One of the object inspectors failed")] + Inspect(#[from] Box<dyn std::error::Error + Send + Sync>), + #[error("Interrupted")] + Interrupted, + #[error( + "The base at {base_pack_offset} was referred to by a ref-delta, but it was never added to the tree as if the pack was still thin." + )] + OutOfPackRefDelta { + /// The base's offset which was from a resolved ref-delta that didn't actually get added to the tree + base_pack_offset: crate::data::Offset, + }, +} + +/// Additional context passed to the `inspect_object(…)` function of the [`Tree::traverse()`] method. +pub struct Context<'a, S> { + /// The pack entry describing the object + pub entry: &'a crate::data::Entry, + /// The offset at which `entry` ends in the pack, useful to learn about the exact range of `entry` within the pack. + pub entry_end: u64, + /// The decompressed object itself, ready to be decoded. + pub decompressed: &'a [u8], + /// Custom state known to the function + pub state: &'a mut S, + /// The depth at which this object resides in the delta-tree. It represents the amount of base objects, with 0 indicating + /// an 'undeltified' object, and higher values indicating delta objects with the given amount of bases. + pub level: u16, +} + +/// Options for [`Tree::traverse()`]. +pub struct Options<'a, P1, P2> { + /// is a progress instance to track progress for each object in the traversal. + pub object_progress: P1, + /// is a progress instance to track the overall progress. + pub size_progress: P2, + /// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on + /// the amount of available logical cores. + pub thread_limit: Option<usize>, + /// Abort the operation if the value is `true`. + pub should_interrupt: &'a AtomicBool, + /// specifies what kind of hashes we expect to be stored in oid-delta entries, which is viable to decoding them + /// with the correct size. + pub object_hash: gix_hash::Kind, +} + +/// The outcome of [`Tree::traverse()`] +pub struct Outcome<T> { + /// The items that have no children in the pack, i.e. base objects. + pub roots: Vec<Item<T>>, + /// The items that children to a root object, i.e. delta objects. + pub children: Vec<Item<T>>, +} + +impl<T> Tree<T> +where + T: Send, +{ + /// Traverse this tree of delta objects with a function `inspect_object` to process each object at will. + /// + /// * `should_run_in_parallel() -> bool` returns true if the underlying pack is big enough to warrant parallel traversal at all. + /// * `resolve(EntrySlice, &mut Vec<u8>) -> Option<()>` resolves the bytes in the pack for the given `EntrySlice` and stores them in the + /// output vector. It returns `Some(())` if the object existed in the pack, or `None` to indicate a resolution error, which would abort the + /// operation as well. + /// * `pack_entries_end` marks one-past-the-last byte of the last entry in the pack, as the last entries size would otherwise + /// be unknown as it's not part of the index file. + /// * `new_thread_state() -> State` is a function to create state to be used in each thread, invoked once per thread. + /// * `inspect_object(node_data: &mut T, progress: Progress, context: Context<ThreadLocal State>) -> Result<(), CustomError>` is a function + /// running for each thread receiving fully decoded objects along with contextual information, which either succeeds with `Ok(())` + /// or returns a `CustomError`. + /// Note that `node_data` can be modified to allow storing maintaining computation results on a per-object basis. + /// + /// This method returns a vector of all tree items, along with their potentially modified custom node data. + /// + /// _Note_ that this method consumed the Tree to assure safe parallel traversal with mutation support. + pub fn traverse<F, P1, P2, MBFN, S, E>( + mut self, + resolve: F, + pack_entries_end: u64, + new_thread_state: impl Fn() -> S + Send + Clone, + inspect_object: MBFN, + Options { + thread_limit, + object_progress, + mut size_progress, + should_interrupt, + object_hash, + }: Options<'_, P1, P2>, + ) -> Result<Outcome<T>, Error> + where + F: for<'r> Fn(EntryRange, &'r mut Vec<u8>) -> Option<()> + Send + Clone, + P1: Progress, + P2: Progress, + MBFN: Fn(&mut T, &mut <P1 as Progress>::SubProgress, Context<'_, S>) -> Result<(), E> + Send + Clone, + E: std::error::Error + Send + Sync + 'static, + { + self.set_pack_entries_end_and_resolve_ref_offsets(pack_entries_end)?; + let object_progress = OwnShared::new(Mutable::new(object_progress)); + + let num_objects = self.num_items(); + let object_counter = { + let mut progress = lock(&object_progress); + progress.init(Some(num_objects), progress::count("objects")); + progress.counter() + }; + size_progress.init(None, progress::bytes()); + let size_counter = size_progress.counter(); + let child_items = self.child_items.as_mut_slice(); + + let start = std::time::Instant::now(); + in_parallel_with_slice( + &mut self.root_items, + thread_limit, + { + let object_progress = object_progress.clone(); + let child_items = ItemSliceSend(child_items as *mut [Item<T>]); + move |thread_index| { + ( + Vec::<u8>::with_capacity(4096), + lock(&object_progress) + .add_child_with_id(format!("thread {thread_index}"), gix_features::progress::UNKNOWN), + new_thread_state(), + resolve.clone(), + inspect_object.clone(), + ItemSliceSend(child_items.0), + ) + } + }, + { + move |node, state| { + resolve::deltas( + object_counter.clone(), + size_counter.clone(), + node, + state, + object_hash.len_in_bytes(), + ) + } + }, + || (!should_interrupt.load(Ordering::Relaxed)).then(|| std::time::Duration::from_millis(50)), + |_| (), + )?; + + lock(&object_progress).show_throughput(start); + size_progress.show_throughput(start); + + Ok(Outcome { + roots: self.root_items, + children: self.child_items, + }) + } +} diff --git a/vendor/gix-pack/src/cache/delta/traverse/resolve.rs b/vendor/gix-pack/src/cache/delta/traverse/resolve.rs new file mode 100644 index 000000000..fc94d87ef --- /dev/null +++ b/vendor/gix-pack/src/cache/delta/traverse/resolve.rs @@ -0,0 +1,154 @@ +use std::{cell::RefCell, collections::BTreeMap, sync::atomic::Ordering}; + +use gix_features::{progress::Progress, zlib}; + +use crate::{ + cache::delta::{ + traverse::{ + util::{ItemSliceSend, Node}, + Context, Error, + }, + Item, + }, + data::EntryRange, +}; + +pub(crate) fn deltas<T, F, P, MBFN, S, E>( + object_counter: Option<gix_features::progress::StepShared>, + size_counter: Option<gix_features::progress::StepShared>, + node: &mut crate::cache::delta::Item<T>, + (bytes_buf, ref mut progress, state, resolve, modify_base, child_items): &mut ( + Vec<u8>, + P, + S, + F, + MBFN, + ItemSliceSend<Item<T>>, + ), + hash_len: usize, +) -> Result<(), Error> +where + T: Send, + F: for<'r> Fn(EntryRange, &'r mut Vec<u8>) -> Option<()>, + P: Progress, + MBFN: Fn(&mut T, &mut P, Context<'_, S>) -> Result<(), E>, + E: std::error::Error + Send + Sync + 'static, +{ + let mut decompressed_bytes_by_pack_offset = BTreeMap::new(); + let bytes_buf = RefCell::new(bytes_buf); + let decompress_from_resolver = |slice: EntryRange| -> Result<(crate::data::Entry, u64, Vec<u8>), Error> { + let mut bytes_buf = bytes_buf.borrow_mut(); + bytes_buf.resize((slice.end - slice.start) as usize, 0); + resolve(slice.clone(), &mut bytes_buf).ok_or(Error::ResolveFailed { + pack_offset: slice.start, + })?; + let entry = crate::data::Entry::from_bytes(&bytes_buf, slice.start, hash_len); + let compressed = &bytes_buf[entry.header_size()..]; + let decompressed_len = entry.decompressed_size as usize; + Ok((entry, slice.end, decompress_all_at_once(compressed, decompressed_len)?)) + }; + + // Traverse the tree breadth first and loose the data produced for the base as it won't be needed anymore. + progress.init(None, gix_features::progress::count_with_decimals("objects", 2)); + + // each node is a base, and its children always start out as deltas which become a base after applying them. + // These will be pushed onto our stack until all are processed + let root_level = 0; + let mut nodes: Vec<_> = vec![( + root_level, + Node { + item: node, + child_items: child_items.0, + }, + )]; + while let Some((level, mut base)) = nodes.pop() { + let (base_entry, entry_end, base_bytes) = if level == root_level { + decompress_from_resolver(base.entry_slice())? + } else { + decompressed_bytes_by_pack_offset + .remove(&base.offset()) + .expect("we store the resolved delta buffer when done") + }; + + // anything done here must be repeated further down for leaf-nodes. + // This way we avoid retaining their decompressed memory longer than needed (they have no children, + // thus their memory can be released right away, using 18% less peak memory on the linux kernel). + { + modify_base( + base.data(), + progress, + Context { + entry: &base_entry, + entry_end, + decompressed: &base_bytes, + state, + level, + }, + ) + .map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?; + object_counter.as_ref().map(|c| c.fetch_add(1, Ordering::SeqCst)); + size_counter + .as_ref() + .map(|c| c.fetch_add(base_bytes.len(), Ordering::SeqCst)); + } + + for mut child in base.into_child_iter() { + let (mut child_entry, entry_end, delta_bytes) = decompress_from_resolver(child.entry_slice())?; + let (base_size, consumed) = crate::data::delta::decode_header_size(&delta_bytes); + let mut header_ofs = consumed; + assert_eq!( + base_bytes.len(), + base_size as usize, + "recorded base size in delta does not match" + ); + let (result_size, consumed) = crate::data::delta::decode_header_size(&delta_bytes[consumed..]); + header_ofs += consumed; + + let mut fully_resolved_delta_bytes = bytes_buf.borrow_mut(); + fully_resolved_delta_bytes.resize(result_size as usize, 0); + crate::data::delta::apply(&base_bytes, &mut fully_resolved_delta_bytes, &delta_bytes[header_ofs..]); + + // FIXME: this actually invalidates the "pack_offset()" computation, which is not obvious to consumers + // at all + child_entry.header = base_entry.header; // assign the actual object type, instead of 'delta' + if child.has_children() { + decompressed_bytes_by_pack_offset.insert( + child.offset(), + (child_entry, entry_end, fully_resolved_delta_bytes.to_owned()), + ); + nodes.push((level + 1, child)); + } else { + modify_base( + child.data(), + progress, + Context { + entry: &child_entry, + entry_end, + decompressed: &fully_resolved_delta_bytes, + state, + level: level + 1, + }, + ) + .map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?; + object_counter.as_ref().map(|c| c.fetch_add(1, Ordering::SeqCst)); + size_counter + .as_ref() + .map(|c| c.fetch_add(base_bytes.len(), Ordering::SeqCst)); + } + } + } + + Ok(()) +} + +fn decompress_all_at_once(b: &[u8], decompressed_len: usize) -> Result<Vec<u8>, Error> { + let mut out = Vec::new(); + out.resize(decompressed_len, 0); + zlib::Inflate::default() + .once(b, &mut out) + .map_err(|err| Error::ZlibInflate { + source: err, + message: "Failed to decompress entry", + })?; + Ok(out) +} diff --git a/vendor/gix-pack/src/cache/delta/traverse/util.rs b/vendor/gix-pack/src/cache/delta/traverse/util.rs new file mode 100644 index 000000000..e7caf2ff5 --- /dev/null +++ b/vendor/gix-pack/src/cache/delta/traverse/util.rs @@ -0,0 +1,63 @@ +use crate::cache::delta::Item; + +pub struct ItemSliceSend<T>(pub *mut [T]) +where + T: Send; + +impl<T> Clone for ItemSliceSend<T> +where + T: Send, +{ + fn clone(&self) -> Self { + ItemSliceSend(self.0) + } +} + +// SAFETY: T is `Send`, and we only ever access one T at a time. And, ptrs need that assurance, I wonder if it's always right. +#[allow(unsafe_code)] +unsafe impl<T> Send for ItemSliceSend<T> where T: Send {} + +/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`]. +pub struct Node<'a, T> { + pub item: &'a mut Item<T>, + pub child_items: *mut [Item<T>], +} + +impl<'a, T> Node<'a, T> { + /// Returns the offset into the pack at which the `Node`s data is located. + pub fn offset(&self) -> u64 { + self.item.offset + } + + /// Returns the slice into the data pack at which the pack entry is located. + pub fn entry_slice(&self) -> crate::data::EntryRange { + self.item.offset..self.item.next_offset + } + + /// Returns the node data associated with this node. + pub fn data(&mut self) -> &mut T { + &mut self.item.data + } + + /// Returns true if this node has children, e.g. is not a leaf in the tree. + pub fn has_children(&self) -> bool { + !self.item.children.is_empty() + } + + /// Transform this `Node` into an iterator over its children. + /// + /// Children are `Node`s referring to pack entries whose base object is this pack entry. + pub fn into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> + 'a { + let children = self.child_items; + self.item.children.iter().map(move |&index| { + // SAFETY: The children array is alive by the 'a lifetime. + // SAFETY: The index is a valid index into the children array. + // SAFETY: The resulting mutable pointer cannot be yielded by any other node. + #[allow(unsafe_code)] + Node { + item: unsafe { &mut *(children as *mut Item<T>).add(index as usize) }, + child_items: children, + } + }) + } +} diff --git a/vendor/gix-pack/src/cache/lru.rs b/vendor/gix-pack/src/cache/lru.rs new file mode 100644 index 000000000..bba4f5d33 --- /dev/null +++ b/vendor/gix-pack/src/cache/lru.rs @@ -0,0 +1,165 @@ +use super::DecodeEntry; + +#[cfg(feature = "pack-cache-lru-dynamic")] +mod memory { + use std::num::NonZeroUsize; + + use clru::WeightScale; + + use super::DecodeEntry; + + struct Entry { + data: Vec<u8>, + kind: gix_object::Kind, + compressed_size: usize, + } + + type Key = (u32, u64); + struct CustomScale; + + impl WeightScale<Key, Entry> for CustomScale { + fn weight(&self, _key: &Key, value: &Entry) -> usize { + value.data.len() + } + } + + /// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes. + pub struct MemoryCappedHashmap { + inner: clru::CLruCache<Key, Entry, std::collections::hash_map::RandomState, CustomScale>, + free_list: Vec<Vec<u8>>, + debug: gix_features::cache::Debug, + } + + impl MemoryCappedHashmap { + /// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes` + /// object data. + pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap { + MemoryCappedHashmap { + inner: clru::CLruCache::with_config( + clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero")) + .with_scale(CustomScale), + ), + free_list: Vec::new(), + debug: gix_features::cache::Debug::new(format!("MemoryCappedHashmap({memory_cap_in_bytes}B)")), + } + } + } + + impl DecodeEntry for MemoryCappedHashmap { + fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) { + self.debug.put(); + if let Ok(Some(previous_entry)) = self.inner.put_with_weight( + (pack_id, offset), + Entry { + data: self + .free_list + .pop() + .map(|mut v| { + v.clear(); + v.resize(data.len(), 0); + v.copy_from_slice(data); + v + }) + .unwrap_or_else(|| Vec::from(data)), + kind, + compressed_size, + }, + ) { + self.free_list.push(previous_entry.data) + } + } + + fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> { + let res = self.inner.get(&(pack_id, offset)).map(|e| { + out.resize(e.data.len(), 0); + out.copy_from_slice(&e.data); + (e.kind, e.compressed_size) + }); + if res.is_some() { + self.debug.hit() + } else { + self.debug.miss() + } + res + } + } +} + +#[cfg(feature = "pack-cache-lru-dynamic")] +pub use memory::MemoryCappedHashmap; + +#[cfg(feature = "pack-cache-lru-static")] +mod _static { + use super::DecodeEntry; + struct Entry { + pack_id: u32, + offset: u64, + data: Vec<u8>, + kind: gix_object::Kind, + compressed_size: usize, + } + + /// A cache using a least-recently-used implementation capable of storing the `SIZE` most recent objects. + /// The cache must be small as the search is 'naive' and the underlying data structure is a linked list. + /// Values of 64 seem to improve performance. + pub struct StaticLinkedList<const SIZE: usize> { + inner: uluru::LRUCache<Entry, SIZE>, + free_list: Vec<Vec<u8>>, + debug: gix_features::cache::Debug, + } + + impl<const SIZE: usize> Default for StaticLinkedList<SIZE> { + fn default() -> Self { + StaticLinkedList { + inner: Default::default(), + free_list: Vec::new(), + debug: gix_features::cache::Debug::new(format!("StaticLinkedList<{SIZE}>")), + } + } + } + + impl<const SIZE: usize> DecodeEntry for StaticLinkedList<SIZE> { + fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) { + self.debug.put(); + if let Some(previous) = self.inner.insert(Entry { + offset, + pack_id, + data: self + .free_list + .pop() + .map(|mut v| { + v.clear(); + v.resize(data.len(), 0); + v.copy_from_slice(data); + v + }) + .unwrap_or_else(|| Vec::from(data)), + kind, + compressed_size, + }) { + self.free_list.push(previous.data) + } + } + + fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> { + let res = self.inner.lookup(|e: &mut Entry| { + if e.pack_id == pack_id && e.offset == offset { + out.resize(e.data.len(), 0); + out.copy_from_slice(&e.data); + Some((e.kind, e.compressed_size)) + } else { + None + } + }); + if res.is_some() { + self.debug.hit() + } else { + self.debug.miss() + } + res + } + } +} + +#[cfg(feature = "pack-cache-lru-static")] +pub use _static::StaticLinkedList; diff --git a/vendor/gix-pack/src/cache/mod.rs b/vendor/gix-pack/src/cache/mod.rs new file mode 100644 index 000000000..cf4b94df8 --- /dev/null +++ b/vendor/gix-pack/src/cache/mod.rs @@ -0,0 +1,55 @@ +use std::ops::DerefMut; + +use gix_object::Kind; + +/// A trait to model putting objects at a given pack `offset` into a cache, and fetching them. +/// +/// It is used to speed up [pack traversals][crate::index::File::traverse()]. +pub trait DecodeEntry { + /// Store a fully decoded object at `offset` of `kind` with `compressed_size` and `data` in the cache. + /// + /// It is up to the cache implementation whether that actually happens or not. + fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize); + /// Attempt to fetch the object at `offset` and store its decoded bytes in `out`, as previously stored with [`DecodeEntry::put()`], and return + /// its (object `kind`, `decompressed_size`) + fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)>; +} + +/// A cache that stores nothing and retrieves nothing, thus it _never_ caches. +#[derive(Default)] +pub struct Never; + +impl DecodeEntry for Never { + fn put(&mut self, _pack_id: u32, _offset: u64, _data: &[u8], _kind: gix_object::Kind, _compressed_size: usize) {} + fn get(&mut self, _pack_id: u32, _offset: u64, _out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> { + None + } +} + +impl<T: DecodeEntry + ?Sized> DecodeEntry for Box<T> { + fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: Kind, compressed_size: usize) { + self.deref_mut().put(pack_id, offset, data, kind, compressed_size) + } + + fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(Kind, usize)> { + self.deref_mut().get(pack_id, offset, out) + } +} + +/// A way of storing and retrieving entire objects to and from a cache. +pub trait Object { + /// Put the object going by `id` of `kind` with `data` into the cache. + fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]); + + /// Try to retrieve the object named `id` and place its data into `out` if available and return `Some(kind)` if found. + fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind>; +} + +/// Various implementations of [`DecodeEntry`] using least-recently-used algorithms. +#[cfg(any(feature = "pack-cache-lru-dynamic", feature = "pack-cache-lru-static"))] +pub mod lru; + +pub mod object; + +/// +pub(crate) mod delta; diff --git a/vendor/gix-pack/src/cache/object.rs b/vendor/gix-pack/src/cache/object.rs new file mode 100644 index 000000000..e64f47a8c --- /dev/null +++ b/vendor/gix-pack/src/cache/object.rs @@ -0,0 +1,123 @@ +//! # Note +//! +//! This module is a bit 'misplaced' if spelled out like 'gix_pack::cache::object::*' but is best placed here for code re-use and +//! general usefulness. +use crate::cache; + +#[cfg(feature = "object-cache-dynamic")] +mod memory { + use std::num::NonZeroUsize; + + use clru::WeightScale; + + use crate::cache; + + struct Entry { + data: Vec<u8>, + kind: gix_object::Kind, + } + + type Key = gix_hash::ObjectId; + + struct CustomScale; + + impl WeightScale<Key, Entry> for CustomScale { + fn weight(&self, key: &Key, value: &Entry) -> usize { + value.data.len() + std::mem::size_of::<Entry>() + key.as_bytes().len() + } + } + + /// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes. + pub struct MemoryCappedHashmap { + inner: clru::CLruCache<Key, Entry, gix_hashtable::hash::Builder, CustomScale>, + free_list: Vec<Vec<u8>>, + debug: gix_features::cache::Debug, + } + + impl MemoryCappedHashmap { + /// The amount of bytes we can hold in total, or the value we saw in `new(…)`. + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + /// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes` + /// object data. + pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap { + MemoryCappedHashmap { + inner: clru::CLruCache::with_config( + clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero")) + .with_hasher(gix_hashtable::hash::Builder::default()) + .with_scale(CustomScale), + ), + free_list: Vec::new(), + debug: gix_features::cache::Debug::new(format!("MemoryCappedObjectHashmap({memory_cap_in_bytes}B)")), + } + } + } + + impl cache::Object for MemoryCappedHashmap { + /// Put the object going by `id` of `kind` with `data` into the cache. + fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]) { + self.debug.put(); + if let Ok(Some(previous_entry)) = self.inner.put_with_weight( + id, + Entry { + data: self + .free_list + .pop() + .map(|mut v| { + v.clear(); + v.resize(data.len(), 0); + v.copy_from_slice(data); + v + }) + .unwrap_or_else(|| Vec::from(data)), + kind, + }, + ) { + self.free_list.push(previous_entry.data) + } + } + + /// Try to retrieve the object named `id` and place its data into `out` if available and return `Some(kind)` if found. + fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind> { + let res = self.inner.get(id).map(|e| { + out.resize(e.data.len(), 0); + out.copy_from_slice(&e.data); + e.kind + }); + if res.is_some() { + self.debug.hit() + } else { + self.debug.miss() + } + res + } + } +} +#[cfg(feature = "object-cache-dynamic")] +pub use memory::MemoryCappedHashmap; + +/// A cache implementation that doesn't do any caching. +pub struct Never; + +impl cache::Object for Never { + /// Noop + fn put(&mut self, _id: gix_hash::ObjectId, _kind: gix_object::Kind, _data: &[u8]) {} + + /// Noop + fn get(&mut self, _id: &gix_hash::ObjectId, _out: &mut Vec<u8>) -> Option<gix_object::Kind> { + None + } +} + +impl<T: cache::Object + ?Sized> cache::Object for Box<T> { + fn put(&mut self, id: gix_hash::ObjectId, kind: gix_object::Kind, data: &[u8]) { + use std::ops::DerefMut; + self.deref_mut().put(id, kind, data) + } + + fn get(&mut self, id: &gix_hash::ObjectId, out: &mut Vec<u8>) -> Option<gix_object::Kind> { + use std::ops::DerefMut; + self.deref_mut().get(id, out) + } +} diff --git a/vendor/gix-pack/src/data/delta.rs b/vendor/gix-pack/src/data/delta.rs new file mode 100644 index 000000000..a898e4aaf --- /dev/null +++ b/vendor/gix-pack/src/data/delta.rs @@ -0,0 +1,70 @@ +/// Given the decompressed pack delta `d`, decode a size in bytes (either the base object size or the result object size) +/// Equivalent to [this canonical git function](https://github.com/git/git/blob/311531c9de557d25ac087c1637818bd2aad6eb3a/delta.h#L89) +pub fn decode_header_size(d: &[u8]) -> (u64, usize) { + let mut i = 0; + let mut size = 0u64; + let mut consumed = 0; + for cmd in d.iter() { + consumed += 1; + size |= (*cmd as u64 & 0x7f) << i; + i += 7; + if *cmd & 0x80 == 0 { + break; + } + } + (size, consumed) +} + +pub fn apply(base: &[u8], mut target: &mut [u8], data: &[u8]) { + let mut i = 0; + while let Some(cmd) = data.get(i) { + i += 1; + match cmd { + cmd if cmd & 0b1000_0000 != 0 => { + let (mut ofs, mut size): (u32, u32) = (0, 0); + if cmd & 0b0000_0001 != 0 { + ofs = data[i] as u32; + i += 1; + } + if cmd & 0b0000_0010 != 0 { + ofs |= (data[i] as u32) << 8; + i += 1; + } + if cmd & 0b0000_0100 != 0 { + ofs |= (data[i] as u32) << 16; + i += 1; + } + if cmd & 0b0000_1000 != 0 { + ofs |= (data[i] as u32) << 24; + i += 1; + } + if cmd & 0b0001_0000 != 0 { + size = data[i] as u32; + i += 1; + } + if cmd & 0b0010_0000 != 0 { + size |= (data[i] as u32) << 8; + i += 1; + } + if cmd & 0b0100_0000 != 0 { + size |= (data[i] as u32) << 16; + i += 1; + } + if size == 0 { + size = 0x10000; // 65536 + } + let ofs = ofs as usize; + std::io::Write::write(&mut target, &base[ofs..ofs + size as usize]) + .expect("delta copy from base: byte slices must match"); + } + 0 => panic!("encountered unsupported command code: 0"), + size => { + std::io::Write::write(&mut target, &data[i..i + *size as usize]) + .expect("delta copy data: slice sizes to match up"); + i += *size as usize; + } + } + } + assert_eq!(i, data.len()); + assert_eq!(target.len(), 0); +} diff --git a/vendor/gix-pack/src/data/entry/decode.rs b/vendor/gix-pack/src/data/entry/decode.rs new file mode 100644 index 000000000..79d7aecff --- /dev/null +++ b/vendor/gix-pack/src/data/entry/decode.rs @@ -0,0 +1,125 @@ +use std::io; + +use gix_features::decode::{leb64, leb64_from_read}; + +use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE}; +use crate::data; + +/// Decoding +impl data::Entry { + /// Decode an entry from the given entry data `d`, providing the `pack_offset` to allow tracking the start of the entry data section. + /// + /// # Panics + /// + /// If we cannot understand the header, garbage data is likely to trigger this. + pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> data::Entry { + let (type_id, size, mut consumed) = parse_header_info(d); + + use crate::data::entry::Header::*; + let object = match type_id { + OFS_DELTA => { + let (distance, leb_bytes) = leb64(&d[consumed..]); + let delta = OfsDelta { + base_distance: distance, + }; + consumed += leb_bytes; + delta + } + REF_DELTA => { + let delta = RefDelta { + base_id: gix_hash::ObjectId::from(&d[consumed..][..hash_len]), + }; + consumed += hash_len; + delta + } + BLOB => Blob, + TREE => Tree, + COMMIT => Commit, + TAG => Tag, + _ => panic!("We currently don't support any V3 features or extensions"), + }; + data::Entry { + header: object, + decompressed_size: size, + data_offset: pack_offset + consumed as u64, + } + } + + /// Instantiate an `Entry` from the reader `r`, providing the `pack_offset` to allow tracking the start of the entry data section. + pub fn from_read( + mut r: impl io::Read, + pack_offset: data::Offset, + hash_len: usize, + ) -> Result<data::Entry, io::Error> { + let (type_id, size, mut consumed) = streaming_parse_header_info(&mut r)?; + + use crate::data::entry::Header::*; + let object = match type_id { + OFS_DELTA => { + let (distance, leb_bytes) = leb64_from_read(&mut r)?; + let delta = OfsDelta { + base_distance: distance, + }; + consumed += leb_bytes; + delta + } + REF_DELTA => { + let mut buf = gix_hash::Kind::buf(); + let hash = &mut buf[..hash_len]; + r.read_exact(hash)?; + #[allow(clippy::redundant_slicing)] + let delta = RefDelta { + base_id: gix_hash::ObjectId::from(&hash[..]), + }; + consumed += hash_len; + delta + } + BLOB => Blob, + TREE => Tree, + COMMIT => Commit, + TAG => Tag, + _ => panic!("We currently don't support any V3 features or extensions"), + }; + Ok(data::Entry { + header: object, + decompressed_size: size, + data_offset: pack_offset + consumed as u64, + }) + } +} + +#[inline] +fn streaming_parse_header_info(mut read: impl io::Read) -> Result<(u8, u64, usize), io::Error> { + let mut byte = [0u8; 1]; + read.read_exact(&mut byte)?; + let mut c = byte[0]; + let mut i = 1; + let type_id = (c >> 4) & 0b0000_0111; + let mut size = c as u64 & 0b0000_1111; + let mut s = 4; + while c & 0b1000_0000 != 0 { + read.read_exact(&mut byte)?; + c = byte[0]; + i += 1; + size += ((c & 0b0111_1111) as u64) << s; + s += 7 + } + Ok((type_id, size, i)) +} + +/// Parses the header of a pack-entry, yielding object type id, decompressed object size, and consumed bytes +#[inline] +fn parse_header_info(data: &[u8]) -> (u8, u64, usize) { + let mut c = data[0]; + let mut i = 1; + let type_id = (c >> 4) & 0b0000_0111; + let mut size = c as u64 & 0b0000_1111; + let mut s = 4; + while c & 0b1000_0000 != 0 { + c = data[i]; + i += 1; + size += ((c & 0b0111_1111) as u64) << s; + s += 7 + } + (type_id, size, i) +} diff --git a/vendor/gix-pack/src/data/entry/header.rs b/vendor/gix-pack/src/data/entry/header.rs new file mode 100644 index 000000000..83983eab0 --- /dev/null +++ b/vendor/gix-pack/src/data/entry/header.rs @@ -0,0 +1,150 @@ +use std::io; + +use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE}; +use crate::data; + +/// The header portion of a pack data entry, identifying the kind of stored object. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[allow(missing_docs)] +pub enum Header { + /// The object is a commit + Commit, + /// The object is a tree + Tree, + /// The object is a blob + Blob, + /// The object is a tag + Tag, + /// Describes a delta-object which needs to be applied to a base. The base object is identified by the `base_id` field + /// which is found within the parent repository. + /// Most commonly used for **thin-packs** when receiving pack files from the server to refer to objects that are not + /// part of the pack but expected to be present in the receivers repository. + /// + /// # Note + /// This could also be an object within this pack if the LSB encoded offset would be larger than 20 bytes, which is unlikely to + /// happen. + /// + /// **The naming** is exactly the same as the canonical implementation uses, namely **REF_DELTA**. + RefDelta { base_id: gix_hash::ObjectId }, + /// Describes a delta-object present in this pack which acts as base for this object. + /// The base object is measured as a distance from this objects + /// pack offset, so that `base_pack_offset = this_objects_pack_offset - base_distance` + /// + /// # Note + /// + /// **The naming** is exactly the same as the canonical implementation uses, namely **OFS_DELTA**. + OfsDelta { base_distance: u64 }, +} + +impl Header { + /// Subtract `distance` from `pack_offset` safely without the chance for overflow or no-ops if `distance` is 0. + pub fn verified_base_pack_offset(pack_offset: data::Offset, distance: u64) -> Option<data::Offset> { + if distance == 0 { + return None; + } + pack_offset.checked_sub(distance) + } + /// Convert the header's object kind into [`gix_object::Kind`] if possible + pub fn as_kind(&self) -> Option<gix_object::Kind> { + use gix_object::Kind::*; + Some(match self { + Header::Tree => Tree, + Header::Blob => Blob, + Header::Commit => Commit, + Header::Tag => Tag, + Header::RefDelta { .. } | Header::OfsDelta { .. } => return None, + }) + } + /// Convert this header's object kind into the packs internal representation + pub fn as_type_id(&self) -> u8 { + use Header::*; + match self { + Blob => BLOB, + Tree => TREE, + Commit => COMMIT, + Tag => TAG, + OfsDelta { .. } => OFS_DELTA, + RefDelta { .. } => REF_DELTA, + } + } + /// Return's true if this is a delta object, i.e. not a full object. + pub fn is_delta(&self) -> bool { + matches!(self, Header::OfsDelta { .. } | Header::RefDelta { .. }) + } + /// Return's true if this is a base object, i.e. not a delta object. + pub fn is_base(&self) -> bool { + !self.is_delta() + } +} + +impl Header { + /// Encode this header along the given `decompressed_size_in_bytes` into the `out` write stream for use within a data pack. + /// + /// Returns the amount of bytes written to `out`. + /// `decompressed_size_in_bytes` is the full size in bytes of the object that this header represents + pub fn write_to(&self, decompressed_size_in_bytes: u64, mut out: impl io::Write) -> io::Result<usize> { + let mut size = decompressed_size_in_bytes; + let mut written = 1; + let mut c: u8 = (self.as_type_id() << 4) | (size as u8 & 0b0000_1111); + size >>= 4; + while size != 0 { + out.write_all(&[c | 0b1000_0000])?; + written += 1; + c = size as u8 & 0b0111_1111; + size >>= 7; + } + out.write_all(&[c])?; + + use Header::*; + match self { + RefDelta { base_id: oid } => { + out.write_all(oid.as_slice())?; + written += oid.as_slice().len(); + } + OfsDelta { base_distance } => { + let mut buf = [0u8; 10]; + let buf = leb64_encode(*base_distance, &mut buf); + out.write_all(buf)?; + written += buf.len(); + } + Blob | Tree | Commit | Tag => {} + } + Ok(written) + } + + /// The size of the header in bytes when serialized + pub fn size(&self, decompressed_size: u64) -> usize { + self.write_to(decompressed_size, io::sink()) + .expect("io::sink() to never fail") + } +} + +#[inline] +fn leb64_encode(mut n: u64, buf: &mut [u8; 10]) -> &[u8] { + let mut bytes_written = 1; + buf[buf.len() - 1] = n as u8 & 0b0111_1111; + for out in buf.iter_mut().rev().skip(1) { + n >>= 7; + if n == 0 { + break; + } + n -= 1; + *out = 0b1000_0000 | (n as u8 & 0b0111_1111); + bytes_written += 1; + } + debug_assert_eq!(n, 0, "BUG: buffer must be large enough to hold a 64 bit integer"); + &buf[buf.len() - bytes_written..] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn leb64_encode_max_int() { + let mut buf = [0u8; 10]; + let buf = leb64_encode(u64::MAX, &mut buf); + assert_eq!(buf.len(), 10, "10 bytes should be used when 64bits are encoded"); + } +} diff --git a/vendor/gix-pack/src/data/entry/mod.rs b/vendor/gix-pack/src/data/entry/mod.rs new file mode 100644 index 000000000..f11c39c5c --- /dev/null +++ b/vendor/gix-pack/src/data/entry/mod.rs @@ -0,0 +1,53 @@ +use crate::data::Entry; + +const _TYPE_EXT1: u8 = 0; +const COMMIT: u8 = 1; +const TREE: u8 = 2; +const BLOB: u8 = 3; +const TAG: u8 = 4; +const _TYPE_EXT2: u8 = 5; +const OFS_DELTA: u8 = 6; +const REF_DELTA: u8 = 7; + +/// A way to uniquely identify the location of an entry within a pack bundle +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Location { + /// The id of the pack containing the object. It's unique within its frame of reference which is the owning object database. + pub pack_id: u32, + /// The size of the entry of disk so that the range of bytes of the entry is `pack_offset..pack_offset + entry_size`. + pub entry_size: usize, + /// The start of the entry in the pack identified by `pack_id`. + pub pack_offset: data::Offset, +} + +impl Location { + /// Compute a range suitable for lookup in pack data using the [`entry_slice()`][crate::data::File::entry_slice()] method. + pub fn entry_range(&self, pack_offset: data::Offset) -> crate::data::EntryRange { + pack_offset..pack_offset + self.entry_size as u64 + } +} + +/// Access +impl Entry { + /// Compute the pack offset to the base entry of the object represented by this entry. + pub fn base_pack_offset(&self, distance: u64) -> data::Offset { + let pack_offset = self.data_offset - self.header_size() as u64; + pack_offset.checked_sub(distance).expect("in-bound distance of deltas") + } + /// The pack offset at which this entry starts + pub fn pack_offset(&self) -> data::Offset { + self.data_offset - self.header_size() as u64 + } + /// The amount of bytes used to describe this entry in the pack. The header starts at [`Self::pack_offset()`] + pub fn header_size(&self) -> usize { + self.header.size(self.decompressed_size) + } +} + +mod decode; + +mod header; +pub use header::Header; + +use crate::data; diff --git a/vendor/gix-pack/src/data/file/decode/entry.rs b/vendor/gix-pack/src/data/file/decode/entry.rs new file mode 100644 index 000000000..60fefec0f --- /dev/null +++ b/vendor/gix-pack/src/data/file/decode/entry.rs @@ -0,0 +1,422 @@ +use std::{convert::TryInto, ops::Range}; + +use gix_features::zlib; +use smallvec::SmallVec; + +use crate::{ + cache, data, + data::{delta, file::decode::Error, File}, +}; + +/// A return value of a resolve function, which given an [`ObjectId`][gix_hash::ObjectId] determines where an object can be found. +#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum ResolvedBase { + /// Indicate an object is within this pack, at the given entry, and thus can be looked up locally. + InPack(data::Entry), + /// Indicates the object of `kind` was found outside of the pack, and its data was written into an output + /// vector which now has a length of `end`. + #[allow(missing_docs)] + OutOfPack { kind: gix_object::Kind, end: usize }, +} + +#[derive(Debug)] +struct Delta { + data: Range<usize>, + base_size: usize, + result_size: usize, + + decompressed_size: usize, + data_offset: data::Offset, +} + +/// Additional information and statistics about a successfully decoded object produced by [`File::decode_entry()`]. +/// +/// Useful to understand the effectiveness of the pack compression or the cost of decompression. +#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Outcome { + /// The kind of resolved object. + pub kind: gix_object::Kind, + /// The amount of deltas in the chain of objects that had to be resolved beforehand. + /// + /// This number is affected by the [`Cache`][cache::DecodeEntry] implementation, with cache hits shortening the + /// delta chain accordingly + pub num_deltas: u32, + /// The total decompressed size of all pack entries in the delta chain + pub decompressed_size: u64, + /// The total compressed size of all pack entries in the delta chain + pub compressed_size: usize, + /// The total size of the decoded object. + pub object_size: u64, +} + +impl Outcome { + pub(crate) fn default_from_kind(kind: gix_object::Kind) -> Self { + Self { + kind, + num_deltas: 0, + decompressed_size: 0, + compressed_size: 0, + object_size: 0, + } + } + fn from_object_entry(kind: gix_object::Kind, entry: &data::Entry, compressed_size: usize) -> Self { + Self { + kind, + num_deltas: 0, + decompressed_size: entry.decompressed_size, + compressed_size, + object_size: entry.decompressed_size, + } + } +} + +/// Decompression of objects +impl File { + /// Decompress the given `entry` into `out` and return the amount of bytes read from the pack data. + /// + /// _Note_ that this method does not resolve deltified objects, but merely decompresses their content + /// `out` is expected to be large enough to hold `entry.size` bytes. + /// + /// # Panics + /// + /// If `out` isn't large enough to hold the decompressed `entry` + pub fn decompress_entry(&self, entry: &data::Entry, out: &mut [u8]) -> Result<usize, Error> { + assert!( + out.len() as u64 >= entry.decompressed_size, + "output buffer isn't large enough to hold decompressed result, want {}, have {}", + entry.decompressed_size, + out.len() + ); + + self.decompress_entry_from_data_offset(entry.data_offset, out) + .map_err(Into::into) + } + + fn assure_v2(&self) { + assert!( + matches!(self.version, crate::data::Version::V2), + "Only V2 is implemented" + ); + } + + /// Obtain the [`Entry`][crate::data::Entry] at the given `offset` into the pack. + /// + /// The `offset` is typically obtained from the pack index file. + pub fn entry(&self, offset: data::Offset) -> data::Entry { + self.assure_v2(); + let pack_offset: usize = offset.try_into().expect("offset representable by machine"); + assert!(pack_offset <= self.data.len(), "offset out of bounds"); + + let object_data = &self.data[pack_offset..]; + data::Entry::from_bytes(object_data, offset, self.hash_len) + } + + /// Decompress the object expected at the given data offset, sans pack header. This information is only + /// known after the pack header was parsed. + /// Note that this method does not resolve deltified objects, but merely decompresses their content + /// `out` is expected to be large enough to hold `entry.size` bytes. + /// Returns the amount of packed bytes there read from the pack data file. + pub(crate) fn decompress_entry_from_data_offset( + &self, + data_offset: data::Offset, + out: &mut [u8], + ) -> Result<usize, zlib::inflate::Error> { + let offset: usize = data_offset.try_into().expect("offset representable by machine"); + assert!(offset < self.data.len(), "entry offset out of bounds"); + + zlib::Inflate::default() + .once(&self.data[offset..], out) + .map(|(_status, consumed_in, _consumed_out)| consumed_in) + } + + /// Like `decompress_entry_from_data_offset`, but returns consumed input and output. + pub(crate) fn decompress_entry_from_data_offset_2( + &self, + data_offset: data::Offset, + out: &mut [u8], + ) -> Result<(usize, usize), zlib::inflate::Error> { + let offset: usize = data_offset.try_into().expect("offset representable by machine"); + assert!(offset < self.data.len(), "entry offset out of bounds"); + + zlib::Inflate::default() + .once(&self.data[offset..], out) + .map(|(_status, consumed_in, consumed_out)| (consumed_in, consumed_out)) + } + + /// Decode an entry, resolving delta's as needed, while growing the `out` vector if there is not enough + /// space to hold the result object. + /// + /// The `entry` determines which object to decode, and is commonly obtained with the help of a pack index file or through pack iteration. + /// + /// `resolve` is a function to lookup objects with the given [`ObjectId`][gix_hash::ObjectId], in case the full object id is used to refer to + /// a base object, instead of an in-pack offset. + /// + /// `delta_cache` is a mechanism to avoid looking up base objects multiple times when decompressing multiple objects in a row. + /// Use a [Noop-Cache][cache::Never] to disable caching all together at the cost of repeating work. + pub fn decode_entry( + &self, + entry: data::Entry, + out: &mut Vec<u8>, + resolve: impl Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>, + delta_cache: &mut impl cache::DecodeEntry, + ) -> Result<Outcome, Error> { + use crate::data::entry::Header::*; + match entry.header { + Tree | Blob | Commit | Tag => { + out.resize( + entry + .decompressed_size + .try_into() + .expect("size representable by machine"), + 0, + ); + self.decompress_entry(&entry, out.as_mut_slice()).map(|consumed_input| { + Outcome::from_object_entry( + entry.header.as_kind().expect("a non-delta entry"), + &entry, + consumed_input, + ) + }) + } + OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, out, delta_cache), + } + } + + /// resolve: technically, this shouldn't ever be required as stored local packs don't refer to objects by id + /// that are outside of the pack. Unless, of course, the ref refers to an object within this pack, which means + /// it's very, very large as 20bytes are smaller than the corresponding MSB encoded number + fn resolve_deltas( + &self, + last: data::Entry, + resolve: impl Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>, + out: &mut Vec<u8>, + cache: &mut impl cache::DecodeEntry, + ) -> Result<Outcome, Error> { + // all deltas, from the one that produces the desired object (first) to the oldest at the end of the chain + let mut chain = SmallVec::<[Delta; 10]>::default(); + let first_entry = last.clone(); + let mut cursor = last; + let mut base_buffer_size: Option<usize> = None; + let mut object_kind: Option<gix_object::Kind> = None; + let mut consumed_input: Option<usize> = None; + + // Find the first full base, either an undeltified object in the pack or a reference to another object. + let mut total_delta_data_size: u64 = 0; + while cursor.header.is_delta() { + if let Some((kind, packed_size)) = cache.get(self.id, cursor.data_offset, out) { + base_buffer_size = Some(out.len()); + object_kind = Some(kind); + // If the input entry is a cache hit, keep the packed size as it must be returned. + // Otherwise, the packed size will be determined later when decompressing the input delta + if total_delta_data_size == 0 { + consumed_input = Some(packed_size); + } + break; + } + total_delta_data_size += cursor.decompressed_size; + let decompressed_size = cursor + .decompressed_size + .try_into() + .expect("a single delta size small enough to fit a usize"); + chain.push(Delta { + data: Range { + start: 0, + end: decompressed_size, + }, + base_size: 0, + result_size: 0, + decompressed_size, + data_offset: cursor.data_offset, + }); + use crate::data::entry::Header; + cursor = match cursor.header { + Header::OfsDelta { base_distance } => self.entry(cursor.base_pack_offset(base_distance)), + Header::RefDelta { base_id } => match resolve(base_id.as_ref(), out) { + Some(ResolvedBase::InPack(entry)) => entry, + Some(ResolvedBase::OutOfPack { end, kind }) => { + base_buffer_size = Some(end); + object_kind = Some(kind); + break; + } + None => return Err(Error::DeltaBaseUnresolved(base_id)), + }, + _ => unreachable!("cursor.is_delta() only allows deltas here"), + }; + } + + // This can happen if the cache held the first entry itself + // We will just treat it as an object then, even though it's technically incorrect. + if chain.is_empty() { + return Ok(Outcome::from_object_entry( + object_kind.expect("object kind as set by cache"), + &first_entry, + consumed_input.expect("consumed bytes as set by cache"), + )); + }; + + // First pass will decompress all delta data and keep it in our output buffer + // [<possibly resolved base object>]<delta-1..delta-n>... + // so that we can find the biggest result size. + let total_delta_data_size: usize = total_delta_data_size.try_into().expect("delta data to fit in memory"); + + let chain_len = chain.len(); + let (first_buffer_end, second_buffer_end) = { + let delta_start = base_buffer_size.unwrap_or(0); + out.resize(delta_start + total_delta_data_size, 0); + + let delta_range = Range { + start: delta_start, + end: delta_start + total_delta_data_size, + }; + let mut instructions = &mut out[delta_range.clone()]; + let mut relative_delta_start = 0; + let mut biggest_result_size = 0; + for (delta_idx, delta) in chain.iter_mut().rev().enumerate() { + let consumed_from_data_offset = self.decompress_entry_from_data_offset( + delta.data_offset, + &mut instructions[..delta.decompressed_size], + )?; + let is_last_delta_to_be_applied = delta_idx + 1 == chain_len; + if is_last_delta_to_be_applied { + consumed_input = Some(consumed_from_data_offset); + } + + let (base_size, offset) = delta::decode_header_size(instructions); + let mut bytes_consumed_by_header = offset; + biggest_result_size = biggest_result_size.max(base_size); + delta.base_size = base_size.try_into().expect("base size fits into usize"); + + let (result_size, offset) = delta::decode_header_size(&instructions[offset..]); + bytes_consumed_by_header += offset; + biggest_result_size = biggest_result_size.max(result_size); + delta.result_size = result_size.try_into().expect("result size fits into usize"); + + // the absolute location into the instructions buffer, so we keep track of the end point of the last + delta.data.start = relative_delta_start + bytes_consumed_by_header; + relative_delta_start += delta.decompressed_size; + delta.data.end = relative_delta_start; + + instructions = &mut instructions[delta.decompressed_size..]; + } + + // Now we can produce a buffer like this + // [<biggest-result-buffer, possibly filled with resolved base object data>]<biggest-result-buffer><delta-1..delta-n> + // from [<possibly resolved base object>]<delta-1..delta-n>... + let biggest_result_size: usize = biggest_result_size + .try_into() + .expect("biggest result size small enough to fit into usize"); + let first_buffer_size = biggest_result_size; + let second_buffer_size = first_buffer_size; + out.resize(first_buffer_size + second_buffer_size + total_delta_data_size, 0); + + // Now 'rescue' the deltas, because in the next step we possibly overwrite that portion + // of memory with the base object (in the majority of cases) + let second_buffer_end = { + let end = first_buffer_size + second_buffer_size; + if delta_range.start < end { + // …this means that the delta size is even larger than two uncompressed worst-case + // intermediate results combined. It would already be undesirable to have it bigger + // then the target size (as you could just store the object in whole). + // However, this just means that it reuses existing deltas smartly, which as we rightfully + // remember stand for an object each. However, this means a lot of data is read to restore + // a single object sometimes. Fair enough - package size is minimized that way. + out.copy_within(delta_range, end); + } else { + let (buffers, instructions) = out.split_at_mut(end); + instructions.copy_from_slice(&buffers[delta_range]); + } + end + }; + + // If we don't have a out-of-pack object already, fill the base-buffer by decompressing the full object + // at which the cursor is left after the iteration + if base_buffer_size.is_none() { + let base_entry = cursor; + debug_assert!(!base_entry.header.is_delta()); + object_kind = base_entry.header.as_kind(); + self.decompress_entry_from_data_offset(base_entry.data_offset, out)?; + } + + (first_buffer_size, second_buffer_end) + }; + + // From oldest to most recent, apply all deltas, swapping the buffer back and forth + // TODO: once we have more tests, we could optimize this memory-intensive work to + // analyse the delta-chains to only copy data once - after all, with 'copy-from-base' deltas, + // all data originates from one base at some point. + // `out` is: [source-buffer][target-buffer][max-delta-instructions-buffer] + let (buffers, instructions) = out.split_at_mut(second_buffer_end); + let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end); + + let mut last_result_size = None; + for ( + delta_idx, + Delta { + data, + base_size, + result_size, + .. + }, + ) in chain.into_iter().rev().enumerate() + { + let data = &mut instructions[data]; + if delta_idx + 1 == chain_len { + last_result_size = Some(result_size); + } + delta::apply(&source_buf[..base_size], &mut target_buf[..result_size], data); + // use the target as source for the next delta + std::mem::swap(&mut source_buf, &mut target_buf); + } + + let last_result_size = last_result_size.expect("at least one delta chain item"); + // uneven chains leave the target buffer after the source buffer + // FIXME(Performance) If delta-chains are uneven, we know we will have to copy bytes over here + // Instead we could use a different start buffer, to naturally end up with the result in the + // right one. + // However, this is a bit more complicated than just that - you have to deal with the base + // object, which should also be placed in the second buffer right away. You don't have that + // control/knowledge for out-of-pack bases, so this is a special case to deal with, too. + // Maybe these invariants can be represented in the type system though. + if chain_len % 2 == 1 { + // this seems inverted, but remember: we swapped the buffers on the last iteration + target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]); + } + out.resize(last_result_size, 0); + + let object_kind = object_kind.expect("a base object as root of any delta chain that we are here to resolve"); + let consumed_input = consumed_input.expect("at least one decompressed delta object"); + cache.put( + self.id, + first_entry.data_offset, + out.as_slice(), + object_kind, + consumed_input, + ); + Ok(Outcome { + kind: object_kind, + // technically depending on the cache, the chain size is not correct as it might + // have been cut short by a cache hit. The caller must deactivate the cache to get + // actual results + num_deltas: chain_len as u32, + decompressed_size: first_entry.decompressed_size, + compressed_size: consumed_input, + object_size: last_result_size as u64, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn size_of_decode_entry_outcome() { + assert_eq!( + std::mem::size_of::<Outcome>(), + 32, + "this shouldn't change without use noticing as it's returned a lot" + ); + } +} diff --git a/vendor/gix-pack/src/data/file/decode/header.rs b/vendor/gix-pack/src/data/file/decode/header.rs new file mode 100644 index 000000000..1f4b1de0a --- /dev/null +++ b/vendor/gix-pack/src/data/file/decode/header.rs @@ -0,0 +1,114 @@ +use crate::{ + data, + data::{delta, file::decode::Error, File}, +}; + +/// A return value of a resolve function, which given an [`ObjectId`][gix_hash::ObjectId] determines where an object can be found. +#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum ResolvedBase { + /// Indicate an object is within this pack, at the given entry, and thus can be looked up locally. + InPack(data::Entry), + /// Indicates the object of `kind` was found outside of the pack. + OutOfPack { + /// The kind of object we found when reading the header of the out-of-pack base. + kind: gix_object::Kind, + /// The amount of deltas encountered if the object was packed as well. + num_deltas: Option<u32>, + }, +} + +/// Additional information and statistics about a successfully decoded object produced by [`File::decode_header()`]. +/// +/// Useful to understand the effectiveness of the pack compression or the cost of decompression. +#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Outcome { + /// The kind of resolved object. + pub kind: gix_object::Kind, + /// The decompressed size of the object. + pub object_size: u64, + /// The amount of deltas in the chain of objects that had to be resolved beforehand. + pub num_deltas: u32, +} + +/// Obtain object information quickly. +impl File { + /// Resolve the object header information starting at `entry`, following the chain of entries as needed. + /// + /// The `entry` determines which object to decode, and is commonly obtained with the help of a pack index file or through pack iteration. + /// + /// `resolve` is a function to lookup objects with the given [`ObjectId`][gix_hash::ObjectId], in case the full object id + /// is used to refer to a base object, instead of an in-pack offset. + pub fn decode_header( + &self, + mut entry: data::Entry, + resolve: impl Fn(&gix_hash::oid) -> Option<ResolvedBase>, + ) -> Result<Outcome, Error> { + use crate::data::entry::Header::*; + let mut num_deltas = 0; + let mut first_delta_decompressed_size = None::<u64>; + loop { + match entry.header { + Tree | Blob | Commit | Tag => { + return Ok(Outcome { + kind: entry.header.as_kind().expect("always valid for non-refs"), + object_size: first_delta_decompressed_size.unwrap_or(entry.decompressed_size), + num_deltas, + }); + } + OfsDelta { base_distance } => { + num_deltas += 1; + if first_delta_decompressed_size.is_none() { + first_delta_decompressed_size = Some(self.decode_delta_object_size(&entry)?); + } + entry = self.entry(entry.base_pack_offset(base_distance)) + } + RefDelta { base_id } => { + num_deltas += 1; + if first_delta_decompressed_size.is_none() { + first_delta_decompressed_size = Some(self.decode_delta_object_size(&entry)?); + } + match resolve(base_id.as_ref()) { + Some(ResolvedBase::InPack(base_entry)) => entry = base_entry, + Some(ResolvedBase::OutOfPack { + kind, + num_deltas: origin_num_deltas, + }) => { + return Ok(Outcome { + kind, + object_size: first_delta_decompressed_size.unwrap_or(entry.decompressed_size), + num_deltas: origin_num_deltas.unwrap_or_default() + num_deltas, + }) + } + None => return Err(Error::DeltaBaseUnresolved(base_id)), + } + } + }; + } + } + + #[inline] + fn decode_delta_object_size(&self, entry: &data::Entry) -> Result<u64, Error> { + let mut buf = [0_u8; 32]; + let used = self.decompress_entry_from_data_offset_2(entry.data_offset, &mut buf)?.1; + let buf = &buf[..used]; + let (_base_size, offset) = delta::decode_header_size(buf); + let (result_size, _offset) = delta::decode_header_size(&buf[offset..]); + Ok(result_size) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn size_of_decode_entry_outcome() { + assert_eq!( + std::mem::size_of::<Outcome>(), + 16, + "this shouldn't change without use noticing as it's returned a lot" + ); + } +} diff --git a/vendor/gix-pack/src/data/file/decode/mod.rs b/vendor/gix-pack/src/data/file/decode/mod.rs new file mode 100644 index 000000000..10bb7f19b --- /dev/null +++ b/vendor/gix-pack/src/data/file/decode/mod.rs @@ -0,0 +1,16 @@ +/// +pub mod entry; +/// +pub mod header; + +/// Returned by [`File::decode_header()`][crate::data::File::decode_header()], +/// [`File::decode_entry()`][crate::data::File::decode_entry()] and . +/// [`File::decompress_entry()`][crate::data::File::decompress_entry()] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("Failed to decompress pack entry")] + ZlibInflate(#[from] gix_features::zlib::inflate::Error), + #[error("A delta chain could not be followed as the ref base with id {0} could not be found")] + DeltaBaseUnresolved(gix_hash::ObjectId), +} diff --git a/vendor/gix-pack/src/data/file/init.rs b/vendor/gix-pack/src/data/file/init.rs new file mode 100644 index 000000000..b16072417 --- /dev/null +++ b/vendor/gix-pack/src/data/file/init.rs @@ -0,0 +1,41 @@ +use std::{convert::TryInto, path::Path}; + +use crate::data; + +/// Instantiation +impl data::File { + /// Try opening a data file at the given `path`. + /// + /// The `object_hash` is a way to read (and write) the same file format with different hashes, as the hash kind + /// isn't stored within the file format itself. + pub fn at(path: impl AsRef<Path>, object_hash: gix_hash::Kind) -> Result<data::File, data::header::decode::Error> { + Self::at_inner(path.as_ref(), object_hash) + } + + fn at_inner(path: &Path, object_hash: gix_hash::Kind) -> Result<data::File, data::header::decode::Error> { + use crate::data::header::N32_SIZE; + let hash_len = object_hash.len_in_bytes(); + + let data = crate::mmap::read_only(path).map_err(|e| data::header::decode::Error::Io { + source: e, + path: path.to_owned(), + })?; + let pack_len = data.len(); + if pack_len < N32_SIZE * 3 + hash_len { + return Err(data::header::decode::Error::Corrupt(format!( + "Pack data of size {pack_len} is too small for even an empty pack with shortest hash" + ))); + } + let (kind, num_objects) = + data::header::decode(&data[..12].try_into().expect("enough data after previous check"))?; + Ok(data::File { + data, + path: path.to_owned(), + id: gix_features::hash::crc32(path.as_os_str().to_string_lossy().as_bytes()), + version: kind, + num_objects, + hash_len, + object_hash, + }) + } +} diff --git a/vendor/gix-pack/src/data/file/mod.rs b/vendor/gix-pack/src/data/file/mod.rs new file mode 100644 index 000000000..6bfe0e272 --- /dev/null +++ b/vendor/gix-pack/src/data/file/mod.rs @@ -0,0 +1,9 @@ +mod init; +/// +pub mod verify; + +/// +pub mod decode; + +/// The bytes used as header in a pack data file. +pub type Header = [u8; 12]; diff --git a/vendor/gix-pack/src/data/file/verify.rs b/vendor/gix-pack/src/data/file/verify.rs new file mode 100644 index 000000000..afec20826 --- /dev/null +++ b/vendor/gix-pack/src/data/file/verify.rs @@ -0,0 +1,42 @@ +use std::sync::atomic::AtomicBool; + +use gix_features::progress::Progress; + +use crate::data::File; + +/// +pub mod checksum { + /// Returned by [`data::File::verify_checksum()`][crate::data::File::verify_checksum()]. + pub type Error = crate::verify::checksum::Error; +} + +/// Checksums and verify checksums +impl File { + /// The checksum in the trailer of this pack data file + pub fn checksum(&self) -> gix_hash::ObjectId { + gix_hash::ObjectId::from(&self.data[self.data.len() - self.hash_len..]) + } + + /// Verifies that the checksum of the packfile over all bytes preceding it indeed matches the actual checksum, + /// returning the actual checksum equivalent to the return value of [`checksum()`][File::checksum()] if there + /// is no mismatch. + /// + /// Note that if no `progress` is desired, one can pass [`gix_features::progress::Discard`]. + /// + /// Have a look at [`index::File::verify_integrity(…)`][crate::index::File::verify_integrity()] for an + /// even more thorough integrity check. + pub fn verify_checksum( + &self, + progress: impl Progress, + should_interrupt: &AtomicBool, + ) -> Result<gix_hash::ObjectId, checksum::Error> { + crate::verify::checksum_on_disk_or_mmap( + self.path(), + &self.data, + self.checksum(), + self.object_hash, + progress, + should_interrupt, + ) + } +} diff --git a/vendor/gix-pack/src/data/header.rs b/vendor/gix-pack/src/data/header.rs new file mode 100644 index 000000000..348a4ca24 --- /dev/null +++ b/vendor/gix-pack/src/data/header.rs @@ -0,0 +1,55 @@ +use crate::data; + +pub(crate) const N32_SIZE: usize = std::mem::size_of::<u32>(); + +/// Parses the first 12 bytes of a pack file, returning the pack version as well as the number of objects contained in the pack. +pub fn decode(data: &[u8; 12]) -> Result<(data::Version, u32), decode::Error> { + let mut ofs = 0; + if &data[ofs..ofs + b"PACK".len()] != b"PACK" { + return Err(decode::Error::Corrupt("Pack data type not recognized".into())); + } + ofs += N32_SIZE; + let kind = match crate::read_u32(&data[ofs..ofs + N32_SIZE]) { + 2 => data::Version::V2, + 3 => data::Version::V3, + v => return Err(decode::Error::UnsupportedVersion(v)), + }; + ofs += N32_SIZE; + let num_objects = crate::read_u32(&data[ofs..ofs + N32_SIZE]); + + Ok((kind, num_objects)) +} + +/// Write a pack data header at `version` with `num_objects` and return a buffer. +pub fn encode(version: data::Version, num_objects: u32) -> [u8; 12] { + use crate::data::Version::*; + let mut buf = [0u8; 12]; + buf[..4].copy_from_slice(b"PACK"); + buf[4..8].copy_from_slice( + &match version { + V2 => 2u32, + V3 => 3, + } + .to_be_bytes()[..], + ); + buf[8..].copy_from_slice(&num_objects.to_be_bytes()[..]); + buf +} + +/// +pub mod decode { + /// Returned by [`decode()`][super::decode()]. + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("Could not open pack file at '{path}'")] + Io { + source: std::io::Error, + path: std::path::PathBuf, + }, + #[error("{0}")] + Corrupt(String), + #[error("Unsupported pack version: {0}")] + UnsupportedVersion(u32), + } +} diff --git a/vendor/gix-pack/src/data/input/bytes_to_entries.rs b/vendor/gix-pack/src/data/input/bytes_to_entries.rs new file mode 100644 index 000000000..cf20d5fbf --- /dev/null +++ b/vendor/gix-pack/src/data/input/bytes_to_entries.rs @@ -0,0 +1,295 @@ +use std::{fs, io}; + +use gix_features::{ + hash, + hash::Sha1, + zlib::{stream::inflate::ReadBoxed, Decompress}, +}; +use gix_hash::ObjectId; + +use crate::data::input; + +/// An iterator over [`Entries`][input::Entry] in a byte stream. +/// +/// The iterator used as part of [Bundle::write_to_directory(…)][crate::Bundle::write_to_directory()]. +pub struct BytesToEntriesIter<BR> { + read: BR, + decompressor: Option<Box<Decompress>>, + offset: u64, + had_error: bool, + version: crate::data::Version, + objects_left: u32, + hash: Option<Sha1>, + mode: input::Mode, + compressed: input::EntryDataMode, + compressed_buf: Option<Vec<u8>>, + hash_len: usize, + object_hash: gix_hash::Kind, +} + +/// Access +impl<BR> BytesToEntriesIter<BR> { + /// The pack version currently being iterated + pub fn version(&self) -> crate::data::Version { + self.version + } + + /// The kind of iteration + pub fn mode(&self) -> input::Mode { + self.mode + } +} + +/// Initialization +impl<BR> BytesToEntriesIter<BR> +where + BR: io::BufRead, +{ + /// Obtain an iterator from a `read` stream to a pack data file and configure it using `mode` and `compressed`. + /// `object_hash` specifies which hash is used for objects in ref-delta entries. + /// + /// Note that `read` is expected at the beginning of a valid pack data file with a header, entries and a trailer. + pub fn new_from_header( + mut read: BR, + mode: input::Mode, + compressed: input::EntryDataMode, + object_hash: gix_hash::Kind, + ) -> Result<BytesToEntriesIter<BR>, input::Error> { + let mut header_data = [0u8; 12]; + read.read_exact(&mut header_data)?; + + let (version, num_objects) = crate::data::header::decode(&header_data)?; + assert_eq!( + version, + crate::data::Version::V2, + "let's stop here if we see undocumented pack formats" + ); + Ok(BytesToEntriesIter { + read, + decompressor: None, + compressed, + offset: 12, + had_error: false, + version, + objects_left: num_objects, + hash: (mode != input::Mode::AsIs).then(|| { + let mut hash = gix_features::hash::hasher(object_hash); + hash.update(&header_data); + hash + }), + mode, + compressed_buf: None, + hash_len: object_hash.len_in_bytes(), + object_hash, + }) + } + + fn next_inner(&mut self) -> Result<input::Entry, input::Error> { + self.objects_left -= 1; // even an error counts as objects + + // Read header + let entry = match self.hash.take() { + Some(hash) => { + let mut read = read_and_pass_to( + &mut self.read, + hash::Write { + inner: io::sink(), + hash, + }, + ); + let res = crate::data::Entry::from_read(&mut read, self.offset, self.hash_len); + self.hash = Some(read.write.hash); + res + } + None => crate::data::Entry::from_read(&mut self.read, self.offset, self.hash_len), + } + .map_err(input::Error::from)?; + + // Decompress object to learn its compressed bytes + let mut decompressor = self + .decompressor + .take() + .unwrap_or_else(|| Box::new(Decompress::new(true))); + let compressed_buf = self.compressed_buf.take().unwrap_or_else(|| Vec::with_capacity(4096)); + decompressor.reset(true); + let mut decompressed_reader = ReadBoxed { + inner: read_and_pass_to( + &mut self.read, + if self.compressed.keep() { + Vec::with_capacity(entry.decompressed_size as usize) + } else { + compressed_buf + }, + ), + decompressor, + }; + + let bytes_copied = io::copy(&mut decompressed_reader, &mut io::sink())?; + if bytes_copied != entry.decompressed_size { + return Err(input::Error::IncompletePack { + actual: bytes_copied, + expected: entry.decompressed_size, + }); + } + + let pack_offset = self.offset; + let compressed_size = decompressed_reader.decompressor.total_in(); + self.offset += entry.header_size() as u64 + compressed_size; + self.decompressor = Some(decompressed_reader.decompressor); + + let mut compressed = decompressed_reader.inner.write; + debug_assert_eq!( + compressed_size, + compressed.len() as u64, + "we must track exactly the same amount of bytes as read by the decompressor" + ); + if let Some(hash) = self.hash.as_mut() { + hash.update(&compressed); + } + + let crc32 = if self.compressed.crc32() { + let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()]; + let header_len = entry.header.write_to(bytes_copied, header_buf.as_mut())?; + let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]); + Some(gix_features::hash::crc32_update(state, &compressed)) + } else { + None + }; + + let compressed = if self.compressed.keep() { + Some(compressed) + } else { + compressed.clear(); + self.compressed_buf = Some(compressed); + None + }; + + // Last objects gets trailer (which is potentially verified) + let trailer = self.try_read_trailer()?; + Ok(input::Entry { + header: entry.header, + header_size: entry.header_size() as u16, + compressed, + compressed_size, + crc32, + pack_offset, + decompressed_size: bytes_copied, + trailer, + }) + } + + fn try_read_trailer(&mut self) -> Result<Option<ObjectId>, input::Error> { + Ok(if self.objects_left == 0 { + let mut id = gix_hash::ObjectId::null(self.object_hash); + if let Err(err) = self.read.read_exact(id.as_mut_slice()) { + if self.mode != input::Mode::Restore { + return Err(err.into()); + } + } + + if let Some(hash) = self.hash.take() { + let actual_id = gix_hash::ObjectId::from(hash.digest()); + if self.mode == input::Mode::Restore { + id = actual_id; + } + if id != actual_id { + return Err(input::Error::ChecksumMismatch { + actual: actual_id, + expected: id, + }); + } + } + Some(id) + } else if self.mode == input::Mode::Restore { + let hash = self.hash.clone().expect("in restore mode a hash is set"); + Some(gix_hash::ObjectId::from(hash.digest())) + } else { + None + }) + } +} + +fn read_and_pass_to<R: io::Read, W: io::Write>(read: &mut R, to: W) -> PassThrough<&mut R, W> { + PassThrough { read, write: to } +} + +impl<R> Iterator for BytesToEntriesIter<R> +where + R: io::BufRead, +{ + type Item = Result<input::Entry, input::Error>; + + fn next(&mut self) -> Option<Self::Item> { + if self.had_error || self.objects_left == 0 { + return None; + } + let result = self.next_inner(); + self.had_error = result.is_err(); + if self.had_error { + self.objects_left = 0; + } + if self.mode == input::Mode::Restore && self.had_error { + None + } else { + Some(result) + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.objects_left as usize, Some(self.objects_left as usize)) + } +} + +impl<R> std::iter::ExactSizeIterator for BytesToEntriesIter<R> where R: io::BufRead {} + +struct PassThrough<R, W> { + read: R, + write: W, +} + +impl<R, W> io::BufRead for PassThrough<R, W> +where + Self: io::Read, + R: io::BufRead, + W: io::Write, +{ + fn fill_buf(&mut self) -> io::Result<&[u8]> { + self.read.fill_buf() + } + + fn consume(&mut self, amt: usize) { + let buf = self + .read + .fill_buf() + .expect("never fail as we called fill-buf before and this does nothing"); + self.write + .write_all(&buf[..amt]) + .expect("a write to never fail - should be a memory buffer"); + self.read.consume(amt) + } +} + +impl<R, W> io::Read for PassThrough<R, W> +where + W: io::Write, + R: io::Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + let bytes_read = self.read.read(buf)?; + self.write.write_all(&buf[..bytes_read])?; + Ok(bytes_read) + } +} + +impl crate::data::File { + /// Returns an iterator over [`Entries`][crate::data::input::Entry], without making use of the memory mapping. + pub fn streaming_iter(&self) -> Result<BytesToEntriesIter<impl io::BufRead>, input::Error> { + let reader = io::BufReader::with_capacity(4096 * 8, fs::File::open(&self.path)?); + BytesToEntriesIter::new_from_header( + reader, + input::Mode::Verify, + input::EntryDataMode::KeepAndCrc32, + self.object_hash, + ) + } +} diff --git a/vendor/gix-pack/src/data/input/entries_to_bytes.rs b/vendor/gix-pack/src/data/input/entries_to_bytes.rs new file mode 100644 index 000000000..a8c21e653 --- /dev/null +++ b/vendor/gix-pack/src/data/input/entries_to_bytes.rs @@ -0,0 +1,155 @@ +use std::iter::Peekable; + +use gix_features::hash; + +use crate::data::input; + +/// An implementation of [`Iterator`] to write [encoded entries][input::Entry] to an inner implementation each time +/// `next()` is called. +/// +/// It is able to deal with an unknown amount of objects as it will rewrite the pack header once the entries iterator +/// is depleted and compute the hash in one go by re-reading the whole file. +pub struct EntriesToBytesIter<I: Iterator, W> { + /// An iterator for input [`input::Entry`] instances + pub input: Peekable<I>, + /// A way of writing encoded bytes. + output: W, + /// Our trailing hash when done writing all input entries + trailer: Option<gix_hash::ObjectId>, + /// The amount of objects in the iteration and the version of the packfile to be written. + /// Will be `None` to signal the header was written already. + data_version: crate::data::Version, + /// The amount of entries seen so far + num_entries: u32, + /// If we are done, no additional writes will occur + is_done: bool, + /// The kind of hash to use for the digest + object_hash: gix_hash::Kind, +} + +impl<I, W> EntriesToBytesIter<I, W> +where + I: Iterator<Item = Result<input::Entry, input::Error>>, + W: std::io::Read + std::io::Write + std::io::Seek, +{ + /// Create a new instance reading [entries][input::Entry] from an `input` iterator and write pack data bytes to + /// `output` writer, resembling a pack of `version`. The amount of entries will be dynamically determined and + /// the pack is completed once the last entry was written. + /// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version. + /// + /// # Panics + /// + /// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors. + pub fn new(input: I, output: W, version: crate::data::Version, object_hash: gix_hash::Kind) -> Self { + assert!( + matches!(version, crate::data::Version::V2), + "currently only pack version 2 can be written", + ); + assert!( + matches!(object_hash, gix_hash::Kind::Sha1), + "currently only Sha1 is supported, right now we don't know how other hashes are encoded", + ); + EntriesToBytesIter { + input: input.peekable(), + output, + object_hash, + num_entries: 0, + trailer: None, + data_version: version, + is_done: false, + } + } + + /// Returns the trailing hash over all ~ entries once done. + /// It's `None` if we are not yet done writing. + pub fn digest(&self) -> Option<gix_hash::ObjectId> { + self.trailer + } + + fn next_inner(&mut self, entry: input::Entry) -> Result<input::Entry, input::Error> { + if self.num_entries == 0 { + let header_bytes = crate::data::header::encode(self.data_version, 0); + self.output.write_all(&header_bytes[..])?; + } + self.num_entries += 1; + entry.header.write_to(entry.decompressed_size, &mut self.output)?; + std::io::copy( + &mut entry + .compressed + .as_deref() + .expect("caller must configure generator to keep compressed bytes"), + &mut self.output, + )?; + Ok(entry) + } + + fn write_header_and_digest(&mut self, last_entry: Option<&mut input::Entry>) -> Result<(), input::Error> { + let header_bytes = crate::data::header::encode(self.data_version, self.num_entries); + let num_bytes_written = if last_entry.is_some() { + self.output.stream_position()? + } else { + header_bytes.len() as u64 + }; + self.output.rewind()?; + self.output.write_all(&header_bytes[..])?; + self.output.flush()?; + + self.output.rewind()?; + let interrupt_never = std::sync::atomic::AtomicBool::new(false); + let digest = hash::bytes( + &mut self.output, + num_bytes_written as usize, + self.object_hash, + &mut gix_features::progress::Discard, + &interrupt_never, + )?; + self.output.write_all(digest.as_slice())?; + self.output.flush()?; + + self.is_done = true; + if let Some(last_entry) = last_entry { + last_entry.trailer = Some(digest); + } + self.trailer = Some(digest); + Ok(()) + } +} + +impl<I, W> Iterator for EntriesToBytesIter<I, W> +where + I: Iterator<Item = Result<input::Entry, input::Error>>, + W: std::io::Read + std::io::Write + std::io::Seek, +{ + /// The amount of bytes written to `out` if `Ok` or the error `E` received from the input. + type Item = Result<input::Entry, input::Error>; + + fn next(&mut self) -> Option<Self::Item> { + if self.is_done { + return None; + } + + match self.input.next() { + Some(res) => Some(match res { + Ok(entry) => self.next_inner(entry).and_then(|mut entry| { + if self.input.peek().is_none() { + self.write_header_and_digest(Some(&mut entry)).map(|_| entry) + } else { + Ok(entry) + } + }), + Err(err) => { + self.is_done = true; + Err(err) + } + }), + None => match self.write_header_and_digest(None) { + Ok(_) => None, + Err(err) => Some(Err(err)), + }, + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.input.size_hint() + } +} diff --git a/vendor/gix-pack/src/data/input/entry.rs b/vendor/gix-pack/src/data/input/entry.rs new file mode 100644 index 000000000..74d4800a0 --- /dev/null +++ b/vendor/gix-pack/src/data/input/entry.rs @@ -0,0 +1,65 @@ +use std::io::Write; + +use crate::data::{entry::Header, input}; + +impl input::Entry { + /// Create a new input entry from a given data `obj` set to be placed at the given `pack_offset`. + /// + /// This method is useful when arbitrary base entries are created + pub fn from_data_obj(obj: &gix_object::Data<'_>, pack_offset: u64) -> Result<Self, input::Error> { + let header = to_header(obj.kind); + let compressed = compress_data(obj)?; + let compressed_size = compressed.len() as u64; + let mut entry = input::Entry { + header, + header_size: header.size(obj.data.len() as u64) as u16, + pack_offset, + compressed: Some(compressed), + compressed_size, + crc32: None, + decompressed_size: obj.data.len() as u64, + trailer: None, + }; + entry.crc32 = Some(entry.compute_crc32()); + Ok(entry) + } + /// The amount of bytes this entry may consume in a pack data file + pub fn bytes_in_pack(&self) -> u64 { + self.header_size as u64 + self.compressed_size + } + + /// Update our CRC value by recalculating it from our header and compressed data. + pub fn compute_crc32(&self) -> u32 { + let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()]; + let header_len = self + .header + .write_to(self.decompressed_size, header_buf.as_mut()) + .expect("write to memory will not fail"); + let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]); + gix_features::hash::crc32_update(state, self.compressed.as_ref().expect("we always set it")) + } +} + +fn to_header(kind: gix_object::Kind) -> Header { + use gix_object::Kind::*; + match kind { + Tree => Header::Tree, + Blob => Header::Blob, + Commit => Header::Commit, + Tag => Header::Tag, + } +} + +fn compress_data(obj: &gix_object::Data<'_>) -> Result<Vec<u8>, input::Error> { + let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new()); + if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) { + match err.kind() { + std::io::ErrorKind::Other => return Err(input::Error::Io(err)), + err => { + unreachable!("Should never see other errors than zlib, but got {:?}", err,) + } + } + }; + out.flush().expect("zlib flush should never fail"); + Ok(out.into_inner()) +} diff --git a/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs b/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs new file mode 100644 index 000000000..f52c645f8 --- /dev/null +++ b/vendor/gix-pack/src/data/input/lookup_ref_delta_objects.rs @@ -0,0 +1,211 @@ +use std::convert::TryInto; + +use gix_hash::ObjectId; + +use crate::data::{entry::Header, input}; + +/// An iterator to resolve thin packs on the fly. +pub struct LookupRefDeltaObjectsIter<I, LFn> { + /// The inner iterator whose entries we will resolve. + pub inner: I, + lookup: LFn, + /// The cached delta to provide next time we are called, it's the delta to go with the base we just resolved in its place. + next_delta: Option<input::Entry>, + /// Fuse to stop iteration after first missing object. + error: bool, + /// The overall pack-offset we accumulated thus far. Each inserted entry offsets all following + /// objects by its length. We need to determine exactly where the object was inserted to see if its affected at all. + inserted_entry_length_at_offset: Vec<Change>, + /// The sum of all entries added so far, as a cache to avoid recomputation + inserted_entries_length_in_bytes: i64, + buf: Vec<u8>, +} + +impl<I, LFn> LookupRefDeltaObjectsIter<I, LFn> +where + I: Iterator<Item = Result<input::Entry, input::Error>>, + LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>, +{ + /// Create a new instance wrapping `iter` and using `lookup` as function to retrieve objects that will serve as bases + /// for ref deltas seen while traversing `iter`. + pub fn new(iter: I, lookup: LFn) -> Self { + LookupRefDeltaObjectsIter { + inner: iter, + lookup, + error: false, + inserted_entry_length_at_offset: Vec::new(), + inserted_entries_length_in_bytes: 0, + next_delta: None, + buf: Vec::new(), + } + } + + fn shifted_pack_offset(&self, pack_offset: u64) -> u64 { + let new_ofs = pack_offset as i64 + self.inserted_entries_length_in_bytes; + new_ofs.try_into().expect("offset value is never becomes negative") + } + + /// positive `size_change` values mean an object grew or was more commonly, was inserted. Negative values + /// mean the object shrunk, usually because there header changed from ref-deltas to ofs deltas. + fn track_change( + &mut self, + shifted_pack_offset: u64, + pack_offset: u64, + size_change: i64, + oid: impl Into<Option<ObjectId>>, + ) { + if size_change == 0 { + return; + } + self.inserted_entry_length_at_offset.push(Change { + shifted_pack_offset, + pack_offset, + size_change_in_bytes: size_change, + oid: oid.into().unwrap_or_else(|| + // NOTE: this value acts as sentinel and the actual hash kind doesn't matter. + gix_hash::Kind::Sha1.null()), + }); + self.inserted_entries_length_in_bytes += size_change; + } + + fn shift_entry_and_point_to_base_by_offset(&mut self, entry: &mut input::Entry, base_distance: u64) { + let pack_offset = entry.pack_offset; + entry.pack_offset = self.shifted_pack_offset(pack_offset); + entry.header = Header::OfsDelta { base_distance }; + let previous_header_size = entry.header_size; + entry.header_size = entry.header.size(entry.decompressed_size) as u16; + + let change = entry.header_size as i64 - previous_header_size as i64; + entry.crc32 = Some(entry.compute_crc32()); + self.track_change(entry.pack_offset, pack_offset, change, None); + } +} + +impl<I, LFn> Iterator for LookupRefDeltaObjectsIter<I, LFn> +where + I: Iterator<Item = Result<input::Entry, input::Error>>, + LFn: for<'a> FnMut(ObjectId, &'a mut Vec<u8>) -> Option<gix_object::Data<'a>>, +{ + type Item = Result<input::Entry, input::Error>; + + fn next(&mut self) -> Option<Self::Item> { + if self.error { + return None; + } + if let Some(delta) = self.next_delta.take() { + return Some(Ok(delta)); + } + match self.inner.next() { + Some(Ok(mut entry)) => match entry.header { + Header::RefDelta { base_id } => { + match self.inserted_entry_length_at_offset.iter().rfind(|e| e.oid == base_id) { + None => { + let base_entry = match (self.lookup)(base_id, &mut self.buf) { + Some(obj) => { + let current_pack_offset = entry.pack_offset; + let mut entry = match input::Entry::from_data_obj(&obj, 0) { + Ok(e) => e, + Err(err) => return Some(Err(err)), + }; + entry.pack_offset = self.shifted_pack_offset(current_pack_offset); + self.track_change( + entry.pack_offset, + current_pack_offset, + entry.bytes_in_pack() as i64, + base_id, + ); + entry + } + None => { + self.error = true; + return Some(Err(input::Error::NotFound { object_id: base_id })); + } + }; + + { + self.shift_entry_and_point_to_base_by_offset(&mut entry, base_entry.bytes_in_pack()); + self.next_delta = Some(entry); + } + Some(Ok(base_entry)) + } + Some(base_entry) => { + let base_distance = + self.shifted_pack_offset(entry.pack_offset) - base_entry.shifted_pack_offset; + self.shift_entry_and_point_to_base_by_offset(&mut entry, base_distance); + Some(Ok(entry)) + } + } + } + _ => { + if self.inserted_entries_length_in_bytes != 0 { + if let Header::OfsDelta { base_distance } = entry.header { + // We have to find the new distance based on the previous distance to the base, using the absolute + // pack offset computed from it as stored in `base_pack_offset`. + let base_pack_offset = entry + .pack_offset + .checked_sub(base_distance) + .expect("distance to be in range of pack"); + match self + .inserted_entry_length_at_offset + .binary_search_by_key(&base_pack_offset, |c| c.pack_offset) + { + Ok(index) => { + let index = { + let maybe_index_of_actual_entry = index + 1; + self.inserted_entry_length_at_offset + .get(maybe_index_of_actual_entry) + .and_then(|c| { + (c.pack_offset == base_pack_offset) + .then_some(maybe_index_of_actual_entry) + }) + .unwrap_or(index) + }; + let new_distance = self + .shifted_pack_offset(entry.pack_offset) + .checked_sub(self.inserted_entry_length_at_offset[index].shifted_pack_offset) + .expect("a base that is behind us in the pack"); + self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance); + } + Err(index) => { + let change_since_offset = self.inserted_entry_length_at_offset[index..] + .iter() + .map(|c| c.size_change_in_bytes) + .sum::<i64>(); + let new_distance: u64 = { + (base_distance as i64 + change_since_offset) + .try_into() + .expect("it still points behind us") + }; + self.shift_entry_and_point_to_base_by_offset(&mut entry, new_distance); + } + } + } else { + // Offset this entry by all changes (positive or negative) that we saw thus far. + entry.pack_offset = self.shifted_pack_offset(entry.pack_offset); + } + } + Some(Ok(entry)) + } + }, + other => other, + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let (min, max) = self.inner.size_hint(); + max.map(|max| (min, Some(max * 2))).unwrap_or_else(|| (min * 2, None)) + } +} + +#[derive(Debug)] +struct Change { + /// The original pack offset as mentioned in the entry we saw. This is used to find this as base object if deltas refer to it by + /// old offset. + pack_offset: u64, + /// The new pack offset that is the shifted location of the pack entry in the pack. + shifted_pack_offset: u64, + /// The size change of the entry header, negative values denote shrinking, positive denote growing. + size_change_in_bytes: i64, + /// The object id of the entry responsible for the change, or null if it's an entry just for tracking an insertion. + oid: ObjectId, +} diff --git a/vendor/gix-pack/src/data/input/mod.rs b/vendor/gix-pack/src/data/input/mod.rs new file mode 100644 index 000000000..df191de67 --- /dev/null +++ b/vendor/gix-pack/src/data/input/mod.rs @@ -0,0 +1,41 @@ +/// An item of the iteration produced by [`BytesToEntriesIter`] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Entry { + /// The header of a pack entry + pub header: crate::data::entry::Header, + /// The amount of bytes used to encode the `header`. `pack_offset + header_size` is the beginning of + /// the compressed data in the pack. + pub header_size: u16, + /// The first byte of the entry at which the `header` can be read. + pub pack_offset: u64, + /// The bytes consumed while producing `decompressed` + /// These do not contain the header, which makes it possible to easily replace a RefDelta with offset deltas + /// when resolving thin packs. + /// Depends on `CompressionMode` when the iterator is initialized. + pub compressed: Option<Vec<u8>>, + /// The amount of bytes the compressed portion of the entry takes, i.e. the portion behind behind the header. + pub compressed_size: u64, + /// The CRC32 over the complete entry, that is encoded header and compressed object data. + /// Depends on `CompressionMode` when the iterator is initialized + pub crc32: Option<u32>, + /// The amount of decompressed bytes of the entry. + pub decompressed_size: u64, + /// Set for the last object in the iteration, providing the hash over all bytes of the iteration + /// for use as trailer in a pack or to verify it matches the trailer. + pub trailer: Option<gix_hash::ObjectId>, +} + +mod entry; + +mod types; +pub use types::{EntryDataMode, Error, Mode}; + +mod bytes_to_entries; +pub use bytes_to_entries::BytesToEntriesIter; + +mod lookup_ref_delta_objects; +pub use lookup_ref_delta_objects::LookupRefDeltaObjectsIter; + +mod entries_to_bytes; +pub use entries_to_bytes::EntriesToBytesIter; diff --git a/vendor/gix-pack/src/data/input/types.rs b/vendor/gix-pack/src/data/input/types.rs new file mode 100644 index 000000000..6fcd459e2 --- /dev/null +++ b/vendor/gix-pack/src/data/input/types.rs @@ -0,0 +1,73 @@ +use std::io; + +/// Returned by [`BytesToEntriesIter::new_from_header()`][crate::data::input::BytesToEntriesIter::new_from_header()] and as part +/// of `Item` of [`BytesToEntriesIter`][crate::data::input::BytesToEntriesIter]. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("An IO operation failed while streaming an entry")] + Io(#[from] io::Error), + #[error(transparent)] + PackParse(#[from] crate::data::header::decode::Error), + #[error("pack checksum in trailer was {expected}, but actual checksum was {actual}")] + ChecksumMismatch { + expected: gix_hash::ObjectId, + actual: gix_hash::ObjectId, + }, + #[error("pack is incomplete: it was decompressed into {actual} bytes but {expected} bytes where expected.")] + IncompletePack { actual: u64, expected: u64 }, + #[error("The object {object_id} could not be decoded or wasn't found")] + NotFound { object_id: gix_hash::ObjectId }, +} + +/// Iteration Mode +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum Mode { + /// Provide the trailer as read from the pack + AsIs, + /// Generate an own hash and trigger an error on the last iterated object + /// if it does not match the hash provided with the pack. + /// + /// This way the one iterating the data cannot miss corruption as long as + /// the iteration is continued through to the end. + Verify, + /// Generate an own hash and if there was an error or the objects are depleted early + /// due to partial packs, return the last valid entry and with our own hash thus far. + /// Note that the existing pack hash, if present, will be ignored. + /// As we won't know which objects fails, every object will have the hash obtained thus far. + /// This also means that algorithms must know about this possibility, or else might wrongfully + /// assume the pack is finished. + Restore, +} + +/// Define what to do with the compressed bytes portion of a pack [`Entry`][super::Entry] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum EntryDataMode { + /// Do nothing with the compressed bytes we read + Ignore, + /// Only create a CRC32 of the entry, otherwise similar to `Ignore` + Crc32, + /// Keep them and pass them along in a newly allocated buffer + Keep, + /// As above, but also compute a CRC32 + KeepAndCrc32, +} + +impl EntryDataMode { + /// Returns true if a crc32 should be computed + pub fn crc32(&self) -> bool { + match self { + EntryDataMode::KeepAndCrc32 | EntryDataMode::Crc32 => true, + EntryDataMode::Keep | EntryDataMode::Ignore => false, + } + } + /// Returns true if compressed bytes should be kept + pub fn keep(&self) -> bool { + match self { + EntryDataMode::Keep | EntryDataMode::KeepAndCrc32 => true, + EntryDataMode::Ignore | EntryDataMode::Crc32 => false, + } + } +} diff --git a/vendor/gix-pack/src/data/mod.rs b/vendor/gix-pack/src/data/mod.rs new file mode 100644 index 000000000..da717fc1a --- /dev/null +++ b/vendor/gix-pack/src/data/mod.rs @@ -0,0 +1,134 @@ +//! a pack data file +use std::{convert::TryInto, path::Path}; + +/// The offset to an entry into the pack data file, relative to its beginning. +pub type Offset = u64; + +/// An identifier to uniquely identify all packs loaded within a known context or namespace. +pub type Id = u32; + +use memmap2::Mmap; + +/// An representing an full- or delta-object within a pack +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Entry { + /// The entry's header + pub header: entry::Header, + /// The decompressed size of the entry in bytes. + /// + /// Note that for non-delta entries this will be the size of the object itself. + pub decompressed_size: u64, + /// absolute offset to compressed object data in the pack, just behind the entry's header + pub data_offset: Offset, +} + +mod file; +pub use file::{decode, verify, Header}; +/// +pub mod header; + +/// +pub mod init { + pub use super::header::decode::Error; +} + +/// +pub mod entry; + +/// +pub mod input; + +/// Utilities to encode pack data entries and write them to a `Write` implementation to resemble a pack data file. +pub mod output; + +/// A slice into a pack file denoting a pack entry. +/// +/// An entry can be decoded into an object. +pub type EntryRange = std::ops::Range<Offset>; + +/// Supported versions of a pack data file +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[allow(missing_docs)] +pub enum Version { + V2, + V3, +} + +impl Default for Version { + fn default() -> Self { + Version::V2 + } +} + +/// A pack data file +pub struct File { + data: Mmap, + path: std::path::PathBuf, + /// A value to represent this pack uniquely when used with cache lookup, or a way to identify this pack by its location on disk. + /// The same location on disk should yield the same id. + /// + /// These must be unique per pack and must be stable, that is they don't change if the pack doesn't change. + /// If the same id is assigned (or reassigned) to different packs, pack creation or cache access will fail in hard-to-debug ways. + /// + /// This value is controlled by the owning object store, which can use it in whichever way it wants as long as the above constraints are met. + pub id: Id, + version: Version, + num_objects: u32, + /// The size of the hash contained within. This is entirely determined by the caller, and repositories have to know which hash to use + /// based on their configuration. + hash_len: usize, + object_hash: gix_hash::Kind, +} + +/// Information about the pack data file itself +impl File { + /// The pack data version of this file + pub fn version(&self) -> Version { + self.version + } + /// The number of objects stored in this pack data file + pub fn num_objects(&self) -> u32 { + self.num_objects + } + /// The length of all mapped data, including the pack header and the pack trailer + pub fn data_len(&self) -> usize { + self.data.len() + } + /// The kind of hash we use internally. + pub fn object_hash(&self) -> gix_hash::Kind { + self.object_hash + } + /// The position of the byte one past the last pack entry, or in other terms, the first byte of the trailing hash. + pub fn pack_end(&self) -> usize { + self.data.len() - self.hash_len + } + + /// The path to the pack data file on disk + pub fn path(&self) -> &Path { + &self.path + } + + /// Returns the pack data at the given slice if its range is contained in the mapped pack data + pub fn entry_slice(&self, slice: EntryRange) -> Option<&[u8]> { + let entry_end: usize = slice.end.try_into().expect("end of pack fits into usize"); + let entry_start = slice.start as usize; + self.data.get(entry_start..entry_end) + } + + /// Returns the CRC32 of the pack data indicated by `pack_offset` and the `size` of the mapped data. + /// + /// _Note:_ finding the right size is only possible by decompressing + /// the pack entry beforehand, or by using the (to be sorted) offsets stored in an index file. + /// + /// # Panics + /// + /// If `pack_offset` or `size` are pointing to a range outside of the mapped pack data. + pub fn entry_crc32(&self, pack_offset: Offset, size: usize) -> u32 { + let pack_offset: usize = pack_offset.try_into().expect("pack_size fits into usize"); + gix_features::hash::crc32(&self.data[pack_offset..pack_offset + size]) + } +} + +pub(crate) mod delta; diff --git a/vendor/gix-pack/src/data/output/bytes.rs b/vendor/gix-pack/src/data/output/bytes.rs new file mode 100644 index 000000000..ec219db9d --- /dev/null +++ b/vendor/gix-pack/src/data/output/bytes.rs @@ -0,0 +1,156 @@ +use std::io::Write; + +use gix_features::hash; + +use crate::data::output; + +/// The error returned by `next()` in the [`FromEntriesIter`] iterator. +#[allow(missing_docs)] +#[derive(Debug, thiserror::Error)] +pub enum Error<E> +where + E: std::error::Error + 'static, +{ + #[error(transparent)] + Io(#[from] std::io::Error), + #[error(transparent)] + Input(E), +} + +/// An implementation of [`Iterator`] to write [encoded entries][output::Entry] to an inner implementation each time +/// `next()` is called. +pub struct FromEntriesIter<I, W> { + /// An iterator for input [`output::Entry`] instances + pub input: I, + /// A way of writing encoded bytes. + output: hash::Write<W>, + /// Our trailing hash when done writing all input entries + trailer: Option<gix_hash::ObjectId>, + /// The amount of objects in the iteration and the version of the packfile to be written. + /// Will be `None` to signal the header was written already. + header_info: Option<(crate::data::Version, u32)>, + /// The pack data version with which pack entries should be written. + entry_version: crate::data::Version, + /// The amount of written bytes thus far + written: u64, + /// Required to quickly find offsets by object IDs, as future objects may refer to those in the past to become a delta offset base. + /// It stores the pack offsets at which objects begin. + /// Additionally we store if an object was invalid, and if so we will not write it nor will we allow delta objects to it. + pack_offsets_and_validity: Vec<(u64, bool)>, + /// If we are done, no additional writes will occur + is_done: bool, +} + +impl<I, W, E> FromEntriesIter<I, W> +where + I: Iterator<Item = Result<Vec<output::Entry>, E>>, + W: std::io::Write, + E: std::error::Error + 'static, +{ + /// Create a new instance reading [entries][output::Entry] from an `input` iterator and write pack data bytes to + /// `output` writer, resembling a pack of `version` with exactly `num_entries` amount of objects contained in it. + /// `object_hash` is the kind of hash to use for the pack checksum and maybe other places, depending on the version. + /// + /// The input chunks are expected to be sorted already. You can use the [InOrderIter][gix_features::parallel::InOrderIter] to assure + /// this happens on the fly holding entire chunks in memory as long as needed for them to be dispensed in order. + /// + /// # Panics + /// + /// Not all combinations of `object_hash` and `version` are supported currently triggering assertion errors. + pub fn new( + input: I, + output: W, + num_entries: u32, + version: crate::data::Version, + object_hash: gix_hash::Kind, + ) -> Self { + assert!( + matches!(version, crate::data::Version::V2), + "currently only pack version 2 can be written", + ); + FromEntriesIter { + input, + output: hash::Write::new(output, object_hash), + trailer: None, + entry_version: version, + pack_offsets_and_validity: Vec::with_capacity(num_entries as usize), + written: 0, + header_info: Some((version, num_entries)), + is_done: false, + } + } + + /// Consume this instance and return the `output` implementation. + /// + /// _Note_ that the `input` iterator can be moved out of this instance beforehand. + pub fn into_write(self) -> W { + self.output.inner + } + + /// Returns the trailing hash over all written entries once done. + /// It's `None` if we are not yet done writing. + pub fn digest(&self) -> Option<gix_hash::ObjectId> { + self.trailer + } + + fn next_inner(&mut self) -> Result<u64, Error<E>> { + let previous_written = self.written; + if let Some((version, num_entries)) = self.header_info.take() { + let header_bytes = crate::data::header::encode(version, num_entries); + self.output.write_all(&header_bytes[..])?; + self.written += header_bytes.len() as u64; + } + match self.input.next() { + Some(entries) => { + for entry in entries.map_err(Error::Input)? { + if entry.is_invalid() { + self.pack_offsets_and_validity.push((0, false)); + continue; + }; + self.pack_offsets_and_validity.push((self.written, true)); + let header = entry.to_entry_header(self.entry_version, |index| { + let (base_offset, is_valid_object) = self.pack_offsets_and_validity[index]; + if !is_valid_object { + unreachable!("if you see this the object database is correct as a delta refers to a non-existing object") + } + self.written - base_offset + }); + self.written += header.write_to(entry.decompressed_size as u64, &mut self.output)? as u64; + self.written += std::io::copy(&mut &*entry.compressed_data, &mut self.output)?; + } + } + None => { + let digest = self.output.hash.clone().digest(); + self.output.inner.write_all(&digest[..])?; + self.written += digest.len() as u64; + self.output.inner.flush()?; + self.is_done = true; + self.trailer = Some(gix_hash::ObjectId::from(digest)); + } + }; + Ok(self.written - previous_written) + } +} + +impl<I, W, E> Iterator for FromEntriesIter<I, W> +where + I: Iterator<Item = Result<Vec<output::Entry>, E>>, + W: std::io::Write, + E: std::error::Error + 'static, +{ + /// The amount of bytes written to `out` if `Ok` or the error `E` received from the input. + type Item = Result<u64, Error<E>>; + + fn next(&mut self) -> Option<Self::Item> { + if self.is_done { + return None; + } + Some(match self.next_inner() { + Err(err) => { + self.is_done = true; + Err(err) + } + Ok(written) => Ok(written), + }) + } +} diff --git a/vendor/gix-pack/src/data/output/count/mod.rs b/vendor/gix-pack/src/data/output/count/mod.rs new file mode 100644 index 000000000..e7ee767de --- /dev/null +++ b/vendor/gix-pack/src/data/output/count/mod.rs @@ -0,0 +1,49 @@ +use gix_hash::ObjectId; + +use crate::data::output::Count; + +/// Specifies how the pack location was handled during counting +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum PackLocation { + /// We did not lookup this object + NotLookedUp, + /// The object was looked up and there may be a location in a pack, along with entry information + LookedUp(Option<crate::data::entry::Location>), +} + +impl PackLocation { + /// Directly go through to LookedUp variant, panic otherwise + pub fn is_none(&self) -> bool { + match self { + PackLocation::LookedUp(opt) => opt.is_none(), + PackLocation::NotLookedUp => unreachable!("must have been resolved"), + } + } + /// Directly go through to LookedUp variant, panic otherwise + pub fn as_ref(&self) -> Option<&crate::data::entry::Location> { + match self { + PackLocation::LookedUp(opt) => opt.as_ref(), + PackLocation::NotLookedUp => unreachable!("must have been resolved"), + } + } +} + +impl Count { + /// Create a new instance from the given `oid` and its corresponding git `obj`ect data. + pub fn from_data(oid: impl Into<ObjectId>, location: Option<crate::data::entry::Location>) -> Self { + Count { + id: oid.into(), + entry_pack_location: PackLocation::LookedUp(location), + } + } +} + +#[path = "objects/mod.rs"] +mod objects_impl; +pub use objects_impl::{objects, objects_unthreaded}; + +/// +pub mod objects { + pub use super::objects_impl::{Error, ObjectExpansion, Options, Outcome, Result}; +} diff --git a/vendor/gix-pack/src/data/output/count/objects/mod.rs b/vendor/gix-pack/src/data/output/count/objects/mod.rs new file mode 100644 index 000000000..d56bc9a5f --- /dev/null +++ b/vendor/gix-pack/src/data/output/count/objects/mod.rs @@ -0,0 +1,405 @@ +use std::{ + cell::RefCell, + sync::{atomic::AtomicBool, Arc}, +}; + +use gix_features::{parallel, progress::Progress}; +use gix_hash::ObjectId; + +use crate::{data::output, find}; + +pub(in crate::data::output::count::objects_impl) mod reduce; +mod util; + +mod types; +pub use types::{Error, ObjectExpansion, Options, Outcome}; + +mod tree; + +/// The return type used by [`objects()`]. +pub type Result<E1, E2> = std::result::Result<(Vec<output::Count>, Outcome), Error<E1, E2>>; + +/// Generate [`Count`][output::Count]s from input `objects` with object expansion based on [`options`][Options] +/// to learn which objects would would constitute a pack. This step is required to know exactly how many objects would +/// be in a pack while keeping data around to avoid minimize object database access. +/// +/// A [`Count`][output::Count] object maintains enough state to greatly accelerate future access of packed objects. +/// +/// * `db` - the object store to use for accessing objects. +/// * `objects_ids` +/// * A list of objects ids to add to the pack. Duplication checks are performed so no object is ever added to a pack twice. +/// * Objects may be expanded based on the provided [`options`][Options] +/// * `progress` +/// * a way to obtain progress information +/// * `should_interrupt` +/// * A flag that is set to true if the operation should stop +/// * `options` +/// * more configuration +pub fn objects<Find, Iter, IterErr, Oid>( + db: Find, + objects_ids: Iter, + progress: impl Progress, + should_interrupt: &AtomicBool, + Options { + thread_limit, + input_object_expansion, + chunk_size, + }: Options, +) -> Result<find::existing::Error<Find::Error>, IterErr> +where + Find: crate::Find + Send + Clone, + <Find as crate::Find>::Error: Send, + Iter: Iterator<Item = std::result::Result<Oid, IterErr>> + Send, + Oid: Into<ObjectId> + Send, + IterErr: std::error::Error + Send, +{ + let lower_bound = objects_ids.size_hint().0; + let (chunk_size, thread_limit, _) = parallel::optimize_chunk_size_and_thread_limit( + chunk_size, + if lower_bound == 0 { None } else { Some(lower_bound) }, + thread_limit, + None, + ); + let chunks = gix_features::iter::Chunks { + inner: objects_ids, + size: chunk_size, + }; + let seen_objs = gix_hashtable::sync::ObjectIdMap::default(); + let progress = Arc::new(parking_lot::Mutex::new(progress)); + + parallel::in_parallel( + chunks, + thread_limit, + { + let progress = Arc::clone(&progress); + move |n| { + ( + Vec::new(), // object data buffer + Vec::new(), // object data buffer 2 to hold two objects at a time + { + let mut p = progress + .lock() + .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN); + p.init(None, gix_features::progress::count("objects")); + p + }, + ) + } + }, + { + let seen_objs = &seen_objs; + move |oids: Vec<std::result::Result<Oid, IterErr>>, (buf1, buf2, progress)| { + expand::this( + &db, + input_object_expansion, + seen_objs, + oids, + buf1, + buf2, + progress, + should_interrupt, + true, /*allow pack lookups*/ + ) + } + }, + reduce::Statistics::new(progress), + ) +} + +/// Like [`objects()`] but using a single thread only to mostly save on the otherwise required overhead. +pub fn objects_unthreaded<Find, IterErr, Oid>( + db: Find, + object_ids: impl Iterator<Item = std::result::Result<Oid, IterErr>>, + mut progress: impl Progress, + should_interrupt: &AtomicBool, + input_object_expansion: ObjectExpansion, +) -> Result<find::existing::Error<Find::Error>, IterErr> +where + Find: crate::Find, + Oid: Into<ObjectId>, + IterErr: std::error::Error, +{ + let seen_objs = RefCell::new(gix_hashtable::HashSet::default()); + + let (mut buf1, mut buf2) = (Vec::new(), Vec::new()); + expand::this( + &db, + input_object_expansion, + &seen_objs, + object_ids, + &mut buf1, + &mut buf2, + &mut progress, + should_interrupt, + false, /*allow pack lookups*/ + ) +} + +mod expand { + use std::sync::atomic::{AtomicBool, Ordering}; + + use gix_features::progress::Progress; + use gix_hash::{oid, ObjectId}; + use gix_object::{CommitRefIter, TagRefIter}; + + use super::{ + tree, + types::{Error, ObjectExpansion, Outcome}, + util, + }; + use crate::{ + data::{output, output::count::PackLocation}, + find, FindExt, + }; + + #[allow(clippy::too_many_arguments)] + pub fn this<Find, IterErr, Oid>( + db: &Find, + input_object_expansion: ObjectExpansion, + seen_objs: &impl util::InsertImmutable, + oids: impl IntoIterator<Item = std::result::Result<Oid, IterErr>>, + buf1: &mut Vec<u8>, + #[allow(clippy::ptr_arg)] buf2: &mut Vec<u8>, + progress: &mut impl Progress, + should_interrupt: &AtomicBool, + allow_pack_lookups: bool, + ) -> super::Result<find::existing::Error<Find::Error>, IterErr> + where + Find: crate::Find, + Oid: Into<ObjectId>, + IterErr: std::error::Error, + { + use ObjectExpansion::*; + + let mut out = Vec::new(); + let mut tree_traversal_state = gix_traverse::tree::breadthfirst::State::default(); + let mut tree_diff_state = gix_diff::tree::State::default(); + let mut parent_commit_ids = Vec::new(); + let mut traverse_delegate = tree::traverse::AllUnseen::new(seen_objs); + let mut changes_delegate = tree::changes::AllNew::new(seen_objs); + let mut outcome = Outcome::default(); + + let stats = &mut outcome; + for id in oids.into_iter() { + if should_interrupt.load(Ordering::Relaxed) { + return Err(Error::Interrupted); + } + + let id = id.map(|oid| oid.into()).map_err(Error::InputIteration)?; + let (obj, location) = db.find(id, buf1)?; + stats.input_objects += 1; + match input_object_expansion { + TreeAdditionsComparedToAncestor => { + use gix_object::Kind::*; + let mut obj = obj; + let mut location = location; + let mut id = id.to_owned(); + + loop { + push_obj_count_unique(&mut out, seen_objs, &id, location, progress, stats, false); + match obj.kind { + Tree | Blob => break, + Tag => { + id = TagRefIter::from_bytes(obj.data) + .target_id() + .expect("every tag has a target"); + let tmp = db.find(id, buf1)?; + + obj = tmp.0; + location = tmp.1; + + stats.expanded_objects += 1; + continue; + } + Commit => { + let current_tree_iter = { + let mut commit_iter = CommitRefIter::from_bytes(obj.data); + let tree_id = commit_iter.tree_id().expect("every commit has a tree"); + parent_commit_ids.clear(); + for token in commit_iter { + match token { + Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + parent_commit_ids.push(id) + } + Ok(_) => break, + Err(err) => return Err(Error::CommitDecode(err)), + } + } + let (obj, location) = db.find(tree_id, buf1)?; + push_obj_count_unique( + &mut out, seen_objs, &tree_id, location, progress, stats, true, + ); + gix_object::TreeRefIter::from_bytes(obj.data) + }; + + let objects = if parent_commit_ids.is_empty() { + traverse_delegate.clear(); + gix_traverse::tree::breadthfirst( + current_tree_iter, + &mut tree_traversal_state, + |oid, buf| { + stats.decoded_objects += 1; + match db.find(oid, buf).ok() { + Some((obj, location)) => { + progress.inc(); + stats.expanded_objects += 1; + out.push(output::Count::from_data(oid, location)); + obj.try_into_tree_iter() + } + None => None, + } + }, + &mut traverse_delegate, + ) + .map_err(Error::TreeTraverse)?; + &traverse_delegate.non_trees + } else { + for commit_id in &parent_commit_ids { + let parent_tree_id = { + let (parent_commit_obj, location) = db.find(commit_id, buf2)?; + + push_obj_count_unique( + &mut out, seen_objs, commit_id, location, progress, stats, true, + ); + CommitRefIter::from_bytes(parent_commit_obj.data) + .tree_id() + .expect("every commit has a tree") + }; + let parent_tree = { + let (parent_tree_obj, location) = db.find(parent_tree_id, buf2)?; + push_obj_count_unique( + &mut out, + seen_objs, + &parent_tree_id, + location, + progress, + stats, + true, + ); + gix_object::TreeRefIter::from_bytes(parent_tree_obj.data) + }; + + changes_delegate.clear(); + gix_diff::tree::Changes::from(Some(parent_tree)) + .needed_to_obtain( + current_tree_iter.clone(), + &mut tree_diff_state, + |oid, buf| { + stats.decoded_objects += 1; + db.find_tree_iter(oid, buf).map(|t| t.0) + }, + &mut changes_delegate, + ) + .map_err(Error::TreeChanges)?; + } + &changes_delegate.objects + }; + for id in objects.iter() { + out.push(id_to_count(db, buf2, id, progress, stats, allow_pack_lookups)); + } + break; + } + } + } + } + TreeContents => { + use gix_object::Kind::*; + let mut id = id; + let mut obj = (obj, location); + loop { + push_obj_count_unique(&mut out, seen_objs, &id, obj.1.clone(), progress, stats, false); + match obj.0.kind { + Tree => { + traverse_delegate.clear(); + gix_traverse::tree::breadthfirst( + gix_object::TreeRefIter::from_bytes(obj.0.data), + &mut tree_traversal_state, + |oid, buf| { + stats.decoded_objects += 1; + match db.find(oid, buf).ok() { + Some((obj, location)) => { + progress.inc(); + stats.expanded_objects += 1; + out.push(output::Count::from_data(oid, location)); + obj.try_into_tree_iter() + } + None => None, + } + }, + &mut traverse_delegate, + ) + .map_err(Error::TreeTraverse)?; + for id in traverse_delegate.non_trees.iter() { + out.push(id_to_count(db, buf1, id, progress, stats, allow_pack_lookups)); + } + break; + } + Commit => { + id = CommitRefIter::from_bytes(obj.0.data) + .tree_id() + .expect("every commit has a tree"); + stats.expanded_objects += 1; + obj = db.find(id, buf1)?; + continue; + } + Blob => break, + Tag => { + id = TagRefIter::from_bytes(obj.0.data) + .target_id() + .expect("every tag has a target"); + stats.expanded_objects += 1; + obj = db.find(id, buf1)?; + continue; + } + } + } + } + AsIs => push_obj_count_unique(&mut out, seen_objs, &id, location, progress, stats, false), + } + } + outcome.total_objects = out.len(); + Ok((out, outcome)) + } + + #[inline] + fn push_obj_count_unique( + out: &mut Vec<output::Count>, + all_seen: &impl util::InsertImmutable, + id: &oid, + location: Option<crate::data::entry::Location>, + progress: &mut impl Progress, + statistics: &mut Outcome, + count_expanded: bool, + ) { + let inserted = all_seen.insert(id.to_owned()); + if inserted { + progress.inc(); + statistics.decoded_objects += 1; + if count_expanded { + statistics.expanded_objects += 1; + } + out.push(output::Count::from_data(id, location)); + } + } + + #[inline] + fn id_to_count<Find: crate::Find>( + db: &Find, + buf: &mut Vec<u8>, + id: &oid, + progress: &mut impl Progress, + statistics: &mut Outcome, + allow_pack_lookups: bool, + ) -> output::Count { + progress.inc(); + statistics.expanded_objects += 1; + output::Count { + id: id.to_owned(), + entry_pack_location: if allow_pack_lookups { + PackLocation::LookedUp(db.location_by_oid(id, buf)) + } else { + PackLocation::NotLookedUp + }, + } + } +} diff --git a/vendor/gix-pack/src/data/output/count/objects/reduce.rs b/vendor/gix-pack/src/data/output/count/objects/reduce.rs new file mode 100644 index 000000000..c6a61d467 --- /dev/null +++ b/vendor/gix-pack/src/data/output/count/objects/reduce.rs @@ -0,0 +1,49 @@ +use std::{marker::PhantomData, sync::Arc}; + +use gix_features::{parallel, progress::Progress}; + +use super::Outcome; +use crate::data::output; + +pub struct Statistics<E, P> { + total: Outcome, + counts: Vec<output::Count>, + progress: Arc<parking_lot::Mutex<P>>, + _err: PhantomData<E>, +} + +impl<E, P> Statistics<E, P> +where + P: Progress, +{ + pub fn new(progress: Arc<parking_lot::Mutex<P>>) -> Self { + Statistics { + total: Default::default(), + counts: Default::default(), + progress, + _err: PhantomData::default(), + } + } +} + +impl<E, P> parallel::Reduce for Statistics<E, P> +where + P: Progress, +{ + type Input = Result<(Vec<output::Count>, Outcome), E>; + type FeedProduce = (); + type Output = (Vec<output::Count>, Outcome); + type Error = E; + + fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> { + let (counts, stats) = item?; + self.total.aggregate(stats); + self.progress.lock().inc_by(counts.len()); + self.counts.extend(counts); + Ok(()) + } + + fn finalize(self) -> Result<Self::Output, Self::Error> { + Ok((self.counts, self.total)) + } +} diff --git a/vendor/gix-pack/src/data/output/count/objects/tree.rs b/vendor/gix-pack/src/data/output/count/objects/tree.rs new file mode 100644 index 000000000..d3f4f6b9a --- /dev/null +++ b/vendor/gix-pack/src/data/output/count/objects/tree.rs @@ -0,0 +1,124 @@ +pub mod changes { + use gix_diff::tree::{ + visit::{Action, Change}, + Visit, + }; + use gix_hash::ObjectId; + use gix_object::{bstr::BStr, tree::EntryMode}; + + use crate::data::output::count::objects_impl::util::InsertImmutable; + + pub struct AllNew<'a, H> { + pub objects: Vec<ObjectId>, + all_seen: &'a H, + } + + impl<'a, H> AllNew<'a, H> + where + H: InsertImmutable, + { + pub fn new(all_seen: &'a H) -> Self { + AllNew { + objects: Default::default(), + all_seen, + } + } + pub fn clear(&mut self) { + self.objects.clear(); + } + } + + impl<'a, H> Visit for AllNew<'a, H> + where + H: InsertImmutable, + { + fn pop_front_tracked_path_and_set_current(&mut self) {} + + fn push_back_tracked_path_component(&mut self, _component: &BStr) {} + + fn push_path_component(&mut self, _component: &BStr) {} + + fn pop_path_component(&mut self) {} + + fn visit(&mut self, change: Change) -> Action { + match change { + Change::Addition { oid, entry_mode } | Change::Modification { oid, entry_mode, .. } => { + if entry_mode == EntryMode::Commit { + return Action::Continue; + } + let inserted = self.all_seen.insert(oid); + if inserted { + self.objects.push(oid); + } + } + Change::Deletion { .. } => {} + }; + Action::Continue + } + } +} + +pub mod traverse { + use gix_hash::ObjectId; + use gix_object::{ + bstr::BStr, + tree::{EntryMode, EntryRef}, + }; + use gix_traverse::tree::{visit::Action, Visit}; + + use crate::data::output::count::objects_impl::util::InsertImmutable; + + pub struct AllUnseen<'a, H> { + pub non_trees: Vec<ObjectId>, + all_seen: &'a H, + } + + impl<'a, H> AllUnseen<'a, H> + where + H: InsertImmutable, + { + pub fn new(all_seen: &'a H) -> Self { + AllUnseen { + non_trees: Default::default(), + all_seen, + } + } + pub fn clear(&mut self) { + self.non_trees.clear(); + } + } + + impl<'a, H> Visit for AllUnseen<'a, H> + where + H: InsertImmutable, + { + fn pop_front_tracked_path_and_set_current(&mut self) {} + + fn push_back_tracked_path_component(&mut self, _component: &BStr) {} + + fn push_path_component(&mut self, _component: &BStr) {} + + fn pop_path_component(&mut self) {} + + fn visit_tree(&mut self, entry: &EntryRef<'_>) -> Action { + let inserted = self.all_seen.insert(entry.oid.to_owned()); + if inserted { + Action::Continue + } else { + Action::Skip + } + } + + fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> Action { + if entry.mode == EntryMode::Commit { + // links don't have a representation + return Action::Continue; + } + let inserted = self.all_seen.insert(entry.oid.to_owned()); + if inserted { + self.non_trees.push(entry.oid.to_owned()); + } + Action::Continue + } + } +} diff --git a/vendor/gix-pack/src/data/output/count/objects/types.rs b/vendor/gix-pack/src/data/output/count/objects/types.rs new file mode 100644 index 000000000..8c8c939df --- /dev/null +++ b/vendor/gix-pack/src/data/output/count/objects/types.rs @@ -0,0 +1,105 @@ +/// Information gathered during the run of [`iter_from_objects()`][super::objects()]. +#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Outcome { + /// The amount of objects provided to start the iteration. + pub input_objects: usize, + /// The amount of objects that have been expanded from the input source. + /// It's desirable to do that as expansion happens on multiple threads, allowing the amount of input objects to be small. + /// `expanded_objects - decoded_objects` is the 'cheap' object we found without decoding the object itself. + pub expanded_objects: usize, + /// The amount of fully decoded objects. These are the most expensive as they are fully decoded + pub decoded_objects: usize, + /// The total amount of encountered objects. Should be `expanded_objects + input_objects`. + pub total_objects: usize, +} + +impl Outcome { + pub(in crate::data::output::count) fn aggregate( + &mut self, + Outcome { + input_objects, + decoded_objects, + expanded_objects, + total_objects, + }: Self, + ) { + self.input_objects += input_objects; + self.decoded_objects += decoded_objects; + self.expanded_objects += expanded_objects; + self.total_objects += total_objects; + } +} + +/// The way input objects are handled +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum ObjectExpansion { + /// Don't do anything with the input objects except for transforming them into pack entries + AsIs, + /// If the input object is a Commit then turn it into a pack entry. Additionally obtain its tree, turn it into a pack entry + /// along with all of its contents, that is nested trees, and any other objects reachable from it. + /// Otherwise, the same as [`AsIs`][ObjectExpansion::AsIs]. + /// + /// This mode is useful if all reachable objects should be added, as in cloning a repository. + TreeContents, + /// If the input is a commit, obtain its ancestors and turn them into pack entries. Obtain the ancestor trees along with the commits + /// tree and turn them into pack entries. Finally obtain the added/changed objects when comparing the ancestor trees with the + /// current tree and turn them into entries as well. + /// Otherwise, the same as [`AsIs`][ObjectExpansion::AsIs]. + /// + /// This mode is useful to build a pack containing only new objects compared to a previous state. + TreeAdditionsComparedToAncestor, +} + +impl Default for ObjectExpansion { + fn default() -> Self { + ObjectExpansion::AsIs + } +} + +/// Configuration options for the pack generation functions provided in [this module][crate::data::output]. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Options { + /// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used. + /// If more than one thread is used, the order of returned [counts][crate::data::output::Count] is not deterministic anymore + /// especially when tree traversal is involved. Thus deterministic ordering requires `Some(1)` to be set. + pub thread_limit: Option<usize>, + /// The amount of objects per chunk or unit of work to be sent to threads for processing + pub chunk_size: usize, + /// The way input objects are handled + pub input_object_expansion: ObjectExpansion, +} + +impl Default for Options { + fn default() -> Self { + Options { + thread_limit: None, + chunk_size: 10, + input_object_expansion: Default::default(), + } + } +} + +/// The error returned by the pack generation iterator [bytes::FromEntriesIter][crate::data::output::bytes::FromEntriesIter]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error<FindErr, IterErr> +where + FindErr: std::error::Error + 'static, + IterErr: std::error::Error + 'static, +{ + #[error(transparent)] + CommitDecode(gix_object::decode::Error), + #[error(transparent)] + FindExisting(#[from] FindErr), + #[error(transparent)] + InputIteration(IterErr), + #[error(transparent)] + TreeTraverse(gix_traverse::tree::breadthfirst::Error), + #[error(transparent)] + TreeChanges(gix_diff::tree::changes::Error), + #[error("Operation interrupted")] + Interrupted, +} diff --git a/vendor/gix-pack/src/data/output/count/objects/util.rs b/vendor/gix-pack/src/data/output/count/objects/util.rs new file mode 100644 index 000000000..a80841313 --- /dev/null +++ b/vendor/gix-pack/src/data/output/count/objects/util.rs @@ -0,0 +1,24 @@ +pub trait InsertImmutable { + fn insert(&self, id: gix_hash::ObjectId) -> bool; +} + +mod trait_impls { + use gix_hash::ObjectId; + use std::cell::RefCell; + + use gix_hashtable::HashSet; + + use super::InsertImmutable; + + impl InsertImmutable for gix_hashtable::sync::ObjectIdMap<()> { + fn insert(&self, id: ObjectId) -> bool { + self.insert(id, ()).is_none() + } + } + + impl InsertImmutable for RefCell<HashSet<ObjectId>> { + fn insert(&self, item: ObjectId) -> bool { + self.borrow_mut().insert(item) + } + } +} diff --git a/vendor/gix-pack/src/data/output/entry/iter_from_counts.rs b/vendor/gix-pack/src/data/output/entry/iter_from_counts.rs new file mode 100644 index 000000000..25e256d5c --- /dev/null +++ b/vendor/gix-pack/src/data/output/entry/iter_from_counts.rs @@ -0,0 +1,428 @@ +pub(crate) mod function { + use std::{cmp::Ordering, sync::Arc}; + + use gix_features::{parallel, parallel::SequenceId, progress::Progress}; + + use super::{reduce, util, Error, Mode, Options, Outcome, ProgressId}; + use crate::data::output; + + /// Given a known list of object `counts`, calculate entries ready to be put into a data pack. + /// + /// This allows objects to be written quite soon without having to wait for the entire pack to be built in memory. + /// A chunk of objects is held in memory and compressed using DEFLATE, and serve the output of this iterator. + /// That way slow writers will naturally apply back pressure, and communicate to the implementation that more time can be + /// spent compressing objects. + /// + /// * `counts` + /// * A list of previously counted objects to add to the pack. Duplication checks are not performed, no object is expected to be duplicated. + /// * `progress` + /// * a way to obtain progress information + /// * `options` + /// * more configuration + /// + /// _Returns_ the checksum of the pack + /// + /// ## Discussion + /// + /// ### Advantages + /// + /// * Begins writing immediately and supports back-pressure. + /// * Abstract over object databases and how input is provided. + /// + /// ### Disadvantages + /// + /// * ~~currently there is no way to easily write the pack index, even though the state here is uniquely positioned to do + /// so with minimal overhead (especially compared to `gix index-from-pack`)~~ Probably works now by chaining Iterators + /// or keeping enough state to write a pack and then generate an index with recorded data. + /// + pub fn iter_from_counts<Find>( + mut counts: Vec<output::Count>, + db: Find, + mut progress: impl Progress + 'static, + Options { + version, + mode, + allow_thin_pack, + thread_limit, + chunk_size, + }: Options, + ) -> impl Iterator<Item = Result<(SequenceId, Vec<output::Entry>), Error<Find::Error>>> + + parallel::reduce::Finalize<Reduce = reduce::Statistics<Error<Find::Error>>> + where + Find: crate::Find + Send + Clone + 'static, + <Find as crate::Find>::Error: Send, + { + assert!( + matches!(version, crate::data::Version::V2), + "currently we can only write version 2" + ); + let (chunk_size, thread_limit, _) = + parallel::optimize_chunk_size_and_thread_limit(chunk_size, Some(counts.len()), thread_limit, None); + { + let progress = Arc::new(parking_lot::Mutex::new( + progress.add_child_with_id("resolving", ProgressId::ResolveCounts.into()), + )); + progress.lock().init(None, gix_features::progress::count("counts")); + let enough_counts_present = counts.len() > 4_000; + let start = std::time::Instant::now(); + parallel::in_parallel_if( + || enough_counts_present, + counts.chunks_mut(chunk_size), + thread_limit, + |_n| Vec::<u8>::new(), + { + let progress = Arc::clone(&progress); + let db = db.clone(); + move |chunk, buf| { + let chunk_size = chunk.len(); + for count in chunk { + use crate::data::output::count::PackLocation::*; + match count.entry_pack_location { + LookedUp(_) => continue, + NotLookedUp => count.entry_pack_location = LookedUp(db.location_by_oid(count.id, buf)), + } + } + progress.lock().inc_by(chunk_size); + Ok::<_, ()>(()) + } + }, + parallel::reduce::IdentityWithResult::<(), ()>::default(), + ) + .expect("infallible - we ignore none-existing objects"); + progress.lock().show_throughput(start); + } + let counts_range_by_pack_id = match mode { + Mode::PackCopyAndBaseObjects => { + let mut progress = progress.add_child_with_id("sorting", ProgressId::SortEntries.into()); + progress.init(Some(counts.len()), gix_features::progress::count("counts")); + let start = std::time::Instant::now(); + + use crate::data::output::count::PackLocation::*; + counts.sort_by(|lhs, rhs| match (&lhs.entry_pack_location, &rhs.entry_pack_location) { + (LookedUp(None), LookedUp(None)) => Ordering::Equal, + (LookedUp(Some(_)), LookedUp(None)) => Ordering::Greater, + (LookedUp(None), LookedUp(Some(_))) => Ordering::Less, + (LookedUp(Some(lhs)), LookedUp(Some(rhs))) => lhs + .pack_id + .cmp(&rhs.pack_id) + .then(lhs.pack_offset.cmp(&rhs.pack_offset)), + (_, _) => unreachable!("counts were resolved beforehand"), + }); + + let mut index: Vec<(u32, std::ops::Range<usize>)> = Vec::new(); + let mut chunks_pack_start = counts.partition_point(|e| e.entry_pack_location.is_none()); + let mut slice = &counts[chunks_pack_start..]; + while !slice.is_empty() { + let current_pack_id = slice[0].entry_pack_location.as_ref().expect("packed object").pack_id; + let pack_end = slice.partition_point(|e| { + e.entry_pack_location.as_ref().expect("packed object").pack_id == current_pack_id + }); + index.push((current_pack_id, chunks_pack_start..chunks_pack_start + pack_end)); + slice = &slice[pack_end..]; + chunks_pack_start += pack_end; + } + + progress.set(counts.len()); + progress.show_throughput(start); + + index + } + }; + + let counts = Arc::new(counts); + let progress = Arc::new(parking_lot::Mutex::new(progress)); + let chunks = util::ChunkRanges::new(chunk_size, counts.len()); + + parallel::reduce::Stepwise::new( + chunks.enumerate(), + thread_limit, + { + let progress = Arc::clone(&progress); + move |n| { + ( + Vec::new(), // object data buffer + progress + .lock() + .add_child_with_id(format!("thread {n}"), gix_features::progress::UNKNOWN), + ) + } + }, + { + let counts = Arc::clone(&counts); + move |(chunk_id, chunk_range): (SequenceId, std::ops::Range<usize>), (buf, progress)| { + let mut out = Vec::new(); + let chunk = &counts[chunk_range]; + let mut stats = Outcome::default(); + let mut pack_offsets_to_id = None; + progress.init(Some(chunk.len()), gix_features::progress::count("objects")); + + for count in chunk.iter() { + out.push(match count + .entry_pack_location + .as_ref() + .and_then(|l| db.entry_by_location(l).map(|pe| (l, pe))) + { + Some((location, pack_entry)) => { + if let Some((cached_pack_id, _)) = &pack_offsets_to_id { + if *cached_pack_id != location.pack_id { + pack_offsets_to_id = None; + } + } + let pack_range = counts_range_by_pack_id[counts_range_by_pack_id + .binary_search_by_key(&location.pack_id, |e| e.0) + .expect("pack-id always present")] + .1 + .clone(); + let base_index_offset = pack_range.start; + let counts_in_pack = &counts[pack_range]; + match output::Entry::from_pack_entry( + pack_entry, + count, + counts_in_pack, + base_index_offset, + allow_thin_pack.then_some({ + |pack_id, base_offset| { + let (cached_pack_id, cache) = pack_offsets_to_id.get_or_insert_with(|| { + db.pack_offsets_and_oid(pack_id) + .map(|mut v| { + v.sort_by_key(|e| e.0); + (pack_id, v) + }) + .expect("pack used for counts is still available") + }); + debug_assert_eq!(*cached_pack_id, pack_id); + stats.ref_delta_objects += 1; + cache + .binary_search_by_key(&base_offset, |e| e.0) + .ok() + .map(|idx| cache[idx].1) + } + }), + version, + ) { + Some(entry) => { + stats.objects_copied_from_pack += 1; + entry + } + None => match db.try_find(count.id, buf).map_err(Error::FindExisting)? { + Some((obj, _location)) => { + stats.decoded_and_recompressed_objects += 1; + output::Entry::from_data(count, &obj) + } + None => { + stats.missing_objects += 1; + Ok(output::Entry::invalid()) + } + }, + } + } + None => match db.try_find(count.id, buf).map_err(Error::FindExisting)? { + Some((obj, _location)) => { + stats.decoded_and_recompressed_objects += 1; + output::Entry::from_data(count, &obj) + } + None => { + stats.missing_objects += 1; + Ok(output::Entry::invalid()) + } + }, + }?); + progress.inc(); + } + Ok((chunk_id, out, stats)) + } + }, + reduce::Statistics::default(), + ) + } +} + +mod util { + #[derive(Clone)] + pub struct ChunkRanges { + cursor: usize, + size: usize, + len: usize, + } + + impl ChunkRanges { + pub fn new(size: usize, total: usize) -> Self { + ChunkRanges { + cursor: 0, + size, + len: total, + } + } + } + + impl Iterator for ChunkRanges { + type Item = std::ops::Range<usize>; + + fn next(&mut self) -> Option<Self::Item> { + if self.cursor >= self.len { + None + } else { + let upper = (self.cursor + self.size).min(self.len); + let range = self.cursor..upper; + self.cursor = upper; + Some(range) + } + } + } +} + +mod reduce { + use std::marker::PhantomData; + + use gix_features::{parallel, parallel::SequenceId}; + + use super::Outcome; + use crate::data::output; + + pub struct Statistics<E> { + total: Outcome, + _err: PhantomData<E>, + } + + impl<E> Default for Statistics<E> { + fn default() -> Self { + Statistics { + total: Default::default(), + _err: PhantomData::default(), + } + } + } + + impl<Error> parallel::Reduce for Statistics<Error> { + type Input = Result<(SequenceId, Vec<output::Entry>, Outcome), Error>; + type FeedProduce = (SequenceId, Vec<output::Entry>); + type Output = Outcome; + type Error = Error; + + fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> { + item.map(|(cid, entries, stats)| { + self.total.aggregate(stats); + (cid, entries) + }) + } + + fn finalize(self) -> Result<Self::Output, Self::Error> { + Ok(self.total) + } + } +} + +mod types { + use crate::data::output::entry; + + /// Information gathered during the run of [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. + #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct Outcome { + /// The amount of fully decoded objects. These are the most expensive as they are fully decoded. + pub decoded_and_recompressed_objects: usize, + /// The amount of objects that could not be located despite them being mentioned during iteration + pub missing_objects: usize, + /// The amount of base or delta objects that could be copied directly from the pack. These are cheapest as they + /// only cost a memory copy for the most part. + pub objects_copied_from_pack: usize, + /// The amount of objects that ref to their base as ref-delta, an indication for a thin back being created. + pub ref_delta_objects: usize, + } + + impl Outcome { + pub(in crate::data::output::entry) fn aggregate( + &mut self, + Outcome { + decoded_and_recompressed_objects: decoded_objects, + missing_objects, + objects_copied_from_pack, + ref_delta_objects, + }: Self, + ) { + self.decoded_and_recompressed_objects += decoded_objects; + self.missing_objects += missing_objects; + self.objects_copied_from_pack += objects_copied_from_pack; + self.ref_delta_objects += ref_delta_objects; + } + } + + /// The way the iterator operates. + #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub enum Mode { + /// Copy base objects and deltas from packs, while non-packed objects will be treated as base objects + /// (i.e. without trying to delta compress them). This is a fast way of obtaining a back while benefiting + /// from existing pack compression and spending the smallest possible time on compressing unpacked objects at + /// the cost of bandwidth. + PackCopyAndBaseObjects, + } + + /// Configuration options for the pack generation functions provided in [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. + #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct Options { + /// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used. + pub thread_limit: Option<usize>, + /// The algorithm to produce a pack + pub mode: Mode, + /// If set, the resulting back can have deltas that refer to an object which is not in the pack. This can happen + /// if the initial counted objects do not contain an object that an existing packed delta refers to, for example, because + /// it wasn't part of the iteration, for instance when the iteration was performed on tree deltas or only a part of the + /// commit graph. Please note that thin packs are not valid packs at rest, thus they are only valid for packs in transit. + /// + /// If set to false, delta objects will be decompressed and recompressed as base objects. + pub allow_thin_pack: bool, + /// The amount of objects per chunk or unit of work to be sent to threads for processing + /// TODO: could this become the window size? + pub chunk_size: usize, + /// The pack data version to produce for each entry + pub version: crate::data::Version, + } + + impl Default for Options { + fn default() -> Self { + Options { + thread_limit: None, + mode: Mode::PackCopyAndBaseObjects, + allow_thin_pack: false, + chunk_size: 10, + version: Default::default(), + } + } + } + + /// The error returned by the pack generation function [`iter_from_counts()`][crate::data::output::entry::iter_from_counts()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error<FindErr> + where + FindErr: std::error::Error + 'static, + { + #[error(transparent)] + FindExisting(FindErr), + #[error(transparent)] + NewEntry(#[from] entry::Error), + } + + /// The progress ids used in [`write_to_directory()`][crate::Bundle::write_to_directory()]. + /// + /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. + #[derive(Debug, Copy, Clone)] + pub enum ProgressId { + /// The amount of [`Count`][crate::data::output::Count] objects which are resolved to their pack location. + ResolveCounts, + /// Layout pack entries for placement into a pack (by pack-id and by offset). + SortEntries, + } + + impl From<ProgressId> for gix_features::progress::Id { + fn from(v: ProgressId) -> Self { + match v { + ProgressId::ResolveCounts => *b"ECRC", + ProgressId::SortEntries => *b"ECSE", + } + } + } +} +pub use types::{Error, Mode, Options, Outcome, ProgressId}; diff --git a/vendor/gix-pack/src/data/output/entry/mod.rs b/vendor/gix-pack/src/data/output/entry/mod.rs new file mode 100644 index 000000000..401d2f24c --- /dev/null +++ b/vendor/gix-pack/src/data/output/entry/mod.rs @@ -0,0 +1,181 @@ +use std::{convert::TryFrom, io::Write}; + +use gix_hash::ObjectId; + +use crate::{data, data::output, find}; + +/// +pub mod iter_from_counts; +pub use iter_from_counts::function::iter_from_counts; + +/// The kind of pack entry to be written +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum Kind { + /// A complete base object, including its kind + Base(gix_object::Kind), + /// A delta against the object with the given index. It's always an index that was already encountered to refer only + /// to object we have written already. + DeltaRef { + /// The absolute index to the object to serve as base. It's up to the writer to maintain enough state to allow producing + /// a packed delta object from it. + object_index: usize, + }, + /// A delta against the given object as identified by its `ObjectId`. + /// This is the case for thin packs only, i.e. those that are sent over the wire. + /// Note that there is the option of the `ObjectId` being used to refer to an object within + /// the same pack, but it's a discontinued practice which won't be encountered here. + DeltaOid { + /// The object serving as base for this delta + id: ObjectId, + }, +} + +/// The error returned by [`output::Entry::from_data()`]. +#[allow(missing_docs)] +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("{0}")] + ZlibDeflate(#[from] std::io::Error), +} + +impl output::Entry { + /// An object which can be identified as invalid easily which happens if objects didn't exist even if they were referred to. + pub fn invalid() -> output::Entry { + output::Entry { + id: gix_hash::Kind::Sha1.null(), // NOTE: the actual object hash used in the repo doesn't matter here, this is a sentinel value. + kind: Kind::Base(gix_object::Kind::Blob), + decompressed_size: 0, + compressed_data: vec![], + } + } + + /// Returns true if this object doesn't really exist but still has to be handled responsibly + /// + /// Note that this is true for tree entries that are commits/git submodules, or for objects which aren't present in our local clone + /// due to shallow clones. + pub fn is_invalid(&self) -> bool { + self.id.is_null() + } + + /// Create an Entry from a previously counted object which is located in a pack. It's `entry` is provided here. + /// The `version` specifies what kind of target `Entry` version the caller desires. + pub fn from_pack_entry( + mut entry: find::Entry, + count: &output::Count, + potential_bases: &[output::Count], + bases_index_offset: usize, + pack_offset_to_oid: Option<impl FnMut(u32, u64) -> Option<ObjectId>>, + target_version: crate::data::Version, + ) -> Option<Result<Self, Error>> { + if entry.version != target_version { + return None; + }; + + let pack_offset_must_be_zero = 0; + let pack_entry = + crate::data::Entry::from_bytes(&entry.data, pack_offset_must_be_zero, count.id.as_slice().len()); + + use crate::data::entry::Header::*; + match pack_entry.header { + Commit => Some(output::entry::Kind::Base(gix_object::Kind::Commit)), + Tree => Some(output::entry::Kind::Base(gix_object::Kind::Tree)), + Blob => Some(output::entry::Kind::Base(gix_object::Kind::Blob)), + Tag => Some(output::entry::Kind::Base(gix_object::Kind::Tag)), + OfsDelta { base_distance } => { + let pack_location = count.entry_pack_location.as_ref().expect("packed"); + let base_offset = pack_location + .pack_offset + .checked_sub(base_distance) + .expect("pack-offset - distance is firmly within the pack"); + potential_bases + .binary_search_by(|e| { + e.entry_pack_location + .as_ref() + .expect("packed") + .pack_offset + .cmp(&base_offset) + }) + .ok() + .map(|idx| output::entry::Kind::DeltaRef { + object_index: idx + bases_index_offset, + }) + .or_else(|| { + pack_offset_to_oid + .and_then(|mut f| f(pack_location.pack_id, base_offset)) + .map(|id| output::entry::Kind::DeltaOid { id }) + }) + } + RefDelta { base_id: _ } => None, // ref deltas are for thin packs or legacy, repack them as base objects + } + .map(|kind| { + Ok(output::Entry { + id: count.id.to_owned(), + kind, + decompressed_size: pack_entry.decompressed_size as usize, + compressed_data: { + entry.data.copy_within(pack_entry.data_offset as usize.., 0); + entry.data.resize( + entry.data.len() + - usize::try_from(pack_entry.data_offset).expect("offset representable as usize"), + 0, + ); + entry.data + }, + }) + }) + } + + /// Create a new instance from the given `oid` and its corresponding git `obj`ect data. + pub fn from_data(count: &output::Count, obj: &gix_object::Data<'_>) -> Result<Self, Error> { + Ok(output::Entry { + id: count.id.to_owned(), + kind: Kind::Base(obj.kind), + decompressed_size: obj.data.len(), + compressed_data: { + let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new()); + if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) { + match err.kind() { + std::io::ErrorKind::Other => return Err(Error::ZlibDeflate(err)), + err => unreachable!("Should never see other errors than zlib, but got {:?}", err,), + } + }; + out.flush()?; + out.into_inner() + }, + }) + } + + /// Transform ourselves into pack entry header of `version` which can be written into a pack. + /// + /// `index_to_pack(object_index) -> pack_offset` is a function to convert the base object's index into + /// the input object array (if each object is numbered) to an offset into the pack. + /// This information is known to the one calling the method. + pub fn to_entry_header( + &self, + version: crate::data::Version, + index_to_base_distance: impl FnOnce(usize) -> u64, + ) -> crate::data::entry::Header { + assert!( + matches!(version, data::Version::V2), + "we can only write V2 pack entries for now" + ); + + use Kind::*; + match self.kind { + Base(kind) => { + use gix_object::Kind::*; + match kind { + Tree => data::entry::Header::Tree, + Blob => data::entry::Header::Blob, + Commit => data::entry::Header::Commit, + Tag => data::entry::Header::Tag, + } + } + DeltaOid { id } => data::entry::Header::RefDelta { base_id: id.to_owned() }, + DeltaRef { object_index } => data::entry::Header::OfsDelta { + base_distance: index_to_base_distance(object_index), + }, + } + } +} diff --git a/vendor/gix-pack/src/data/output/mod.rs b/vendor/gix-pack/src/data/output/mod.rs new file mode 100644 index 000000000..f94d32e8e --- /dev/null +++ b/vendor/gix-pack/src/data/output/mod.rs @@ -0,0 +1,41 @@ +use gix_hash::ObjectId; + +/// +pub mod count; + +/// An item representing a future Entry in the leanest way possible. +/// +/// One can expect to have one of these in memory when building big objects, so smaller is better here. +/// They should contain everything of importance to generate a pack as fast as possible. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Count { + /// The hash of the object to write + pub id: ObjectId, + /// A way to locate a pack entry in the object database, only available if the object is in a pack. + pub entry_pack_location: count::PackLocation, +} + +/// An entry to be written to a file. +/// +/// Some of these will be in-flight and in memory while waiting to be written. Memory requirements depend on the amount of compressed +/// data they hold. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Entry { + /// The hash of the object to write + pub id: ObjectId, + /// The kind of entry represented by `data`. It's used alongside with it to complete the pack entry + /// at rest or in transit. + pub kind: entry::Kind, + /// The size in bytes needed once `data` gets decompressed + pub decompressed_size: usize, + /// The compressed data right behind the header + pub compressed_data: Vec<u8>, +} + +/// +pub mod entry; + +/// +pub mod bytes; diff --git a/vendor/gix-pack/src/find.rs b/vendor/gix-pack/src/find.rs new file mode 100644 index 000000000..8143692e7 --- /dev/null +++ b/vendor/gix-pack/src/find.rs @@ -0,0 +1,63 @@ +/// +pub mod existing { + use gix_hash::ObjectId; + + /// The error returned by the [`find(…)`][crate::FindExt::find()] trait methods. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error<T: std::error::Error + 'static> { + #[error(transparent)] + Find(T), + #[error("An object with id {} could not be found", .oid)] + NotFound { oid: ObjectId }, + } +} + +/// +pub mod existing_object { + use gix_hash::ObjectId; + + /// The error returned by the various [`find_*`][crate::FindExt::find_commit()] trait methods. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error<T: std::error::Error + 'static> { + #[error(transparent)] + Find(T), + #[error(transparent)] + Decode(gix_object::decode::Error), + #[error("An object with id {} could not be found", .oid)] + NotFound { oid: ObjectId }, + #[error("Expected object of kind {} something else", .expected)] + ObjectKind { expected: gix_object::Kind }, + } +} + +/// +pub mod existing_iter { + use gix_hash::ObjectId; + + /// The error returned by the various [`find_*`][crate::FindExt::find_commit()] trait methods. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error<T: std::error::Error + 'static> { + #[error(transparent)] + Find(T), + #[error("An object with id {} could not be found", .oid)] + NotFound { oid: ObjectId }, + #[error("Expected object of kind {} something else", .expected)] + ObjectKind { expected: gix_object::Kind }, + } +} + +/// An Entry in a pack providing access to its data. +/// +/// Its commonly retrieved by reading from a pack index file followed by a read from a pack data file. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[allow(missing_docs)] +pub struct Entry { + /// The pack-data encoded bytes of the pack data entry as present in the pack file, including the header followed by compressed data. + pub data: Vec<u8>, + /// The version of the pack file containing `data` + pub version: crate::data::Version, +} diff --git a/vendor/gix-pack/src/find_traits.rs b/vendor/gix-pack/src/find_traits.rs new file mode 100644 index 000000000..6f828afbf --- /dev/null +++ b/vendor/gix-pack/src/find_traits.rs @@ -0,0 +1,295 @@ +use crate::{data, find}; + +/// Describe how object can be located in an object store with built-in facilities to supports packs specifically. +/// +/// ## Notes +/// +/// Find effectively needs [generic associated types][issue] to allow a trait for the returned object type. +/// Until then, we will have to make due with explicit types and give them the potentially added features we want. +/// +/// Furthermore, despite this trait being in `gix-pack`, it leaks knowledge about objects potentially not being packed. +/// This is a necessary trade-off to allow this trait to live in `gix-pack` where it is used in functions to create a pack. +/// +/// [issue]: https://github.com/rust-lang/rust/issues/44265 +pub trait Find { + /// The error returned by [`try_find()`][Find::try_find()] + type Error: std::error::Error + Send + Sync + 'static; + + /// Returns true if the object exists in the database. + fn contains(&self, id: impl AsRef<gix_hash::oid>) -> bool; + + /// Find an object matching `id` in the database while placing its raw, decoded data into `buffer`. + /// A `pack_cache` can be used to speed up subsequent lookups, set it to [`crate::cache::Never`] if the + /// workload isn't suitable for caching. + /// + /// Returns `Some((<object data>, <pack location if packed>))` if it was present in the database, + /// or the error that occurred during lookup or object retrieval. + fn try_find<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error> { + self.try_find_cached(id, buffer, &mut crate::cache::Never) + } + + /// Like [`Find::try_find()`], but with support for controlling the pack cache. + /// A `pack_cache` can be used to speed up subsequent lookups, set it to [`crate::cache::Never`] if the + /// workload isn't suitable for caching. + /// + /// Returns `Some((<object data>, <pack location if packed>))` if it was present in the database, + /// or the error that occurred during lookup or object retrieval. + fn try_find_cached<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + pack_cache: &mut impl crate::cache::DecodeEntry, + ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error>; + + /// Find the packs location where an object with `id` can be found in the database, or `None` if there is no pack + /// holding the object. + /// + /// _Note_ that this is always None if the object isn't packed even though it exists as loose object. + fn location_by_oid(&self, id: impl AsRef<gix_hash::oid>, buf: &mut Vec<u8>) -> Option<data::entry::Location>; + + /// Obtain a vector of all offsets, in index order, along with their object id. + fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(data::Offset, gix_hash::ObjectId)>>; + + /// Return the [`find::Entry`] for `location` if it is backed by a pack. + /// + /// Note that this is only in the interest of avoiding duplicate work during pack generation. + /// Pack locations can be obtained from [`Find::try_find()`]. + /// + /// # Notes + /// + /// Custom implementations might be interested in providing their own meta-data with `object`, + /// which currently isn't possible as the `Locate` trait requires GATs to work like that. + fn entry_by_location(&self, location: &data::entry::Location) -> Option<find::Entry>; +} + +mod ext { + use gix_object::{BlobRef, CommitRef, CommitRefIter, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter}; + + use crate::find; + + macro_rules! make_obj_lookup { + ($method:ident, $object_variant:path, $object_kind:path, $object_type:ty) => { + /// Like [`find(…)`][Self::find()], but flattens the `Result<Option<_>>` into a single `Result` making a non-existing object an error + /// while returning the desired object type. + fn $method<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + ) -> Result<($object_type, Option<crate::data::entry::Location>), find::existing_object::Error<Self::Error>> + { + let id = id.as_ref(); + self.try_find(id, buffer) + .map_err(find::existing_object::Error::Find)? + .ok_or_else(|| find::existing_object::Error::NotFound { + oid: id.as_ref().to_owned(), + }) + .and_then(|(o, l)| { + o.decode() + .map_err(find::existing_object::Error::Decode) + .map(|o| (o, l)) + }) + .and_then(|(o, l)| match o { + $object_variant(o) => return Ok((o, l)), + _other => Err(find::existing_object::Error::ObjectKind { + expected: $object_kind, + }), + }) + } + }; + } + + macro_rules! make_iter_lookup { + ($method:ident, $object_kind:path, $object_type:ty, $into_iter:tt) => { + /// Like [`find(…)`][Self::find()], but flattens the `Result<Option<_>>` into a single `Result` making a non-existing object an error + /// while returning the desired iterator type. + fn $method<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + ) -> Result<($object_type, Option<crate::data::entry::Location>), find::existing_iter::Error<Self::Error>> { + let id = id.as_ref(); + self.try_find(id, buffer) + .map_err(find::existing_iter::Error::Find)? + .ok_or_else(|| find::existing_iter::Error::NotFound { + oid: id.as_ref().to_owned(), + }) + .and_then(|(o, l)| { + o.$into_iter() + .ok_or_else(|| find::existing_iter::Error::ObjectKind { + expected: $object_kind, + }) + .map(|i| (i, l)) + }) + } + }; + } + + /// An extension trait with convenience functions. + pub trait FindExt: super::Find { + /// Like [`try_find(…)`][super::Find::try_find()], but flattens the `Result<Option<_>>` into a single `Result` making a non-existing object an error. + fn find<'a>( + &self, + id: impl AsRef<gix_hash::oid>, + buffer: &'a mut Vec<u8>, + ) -> Result<(gix_object::Data<'a>, Option<crate::data::entry::Location>), find::existing::Error<Self::Error>> + { + let id = id.as_ref(); + self.try_find(id, buffer) + .map_err(find::existing::Error::Find)? + .ok_or_else(|| find::existing::Error::NotFound { + oid: id.as_ref().to_owned(), + }) + } + + make_obj_lookup!(find_commit, ObjectRef::Commit, Kind::Commit, CommitRef<'a>); + make_obj_lookup!(find_tree, ObjectRef::Tree, Kind::Tree, TreeRef<'a>); + make_obj_lookup!(find_tag, ObjectRef::Tag, Kind::Tag, TagRef<'a>); + make_obj_lookup!(find_blob, ObjectRef::Blob, Kind::Blob, BlobRef<'a>); + make_iter_lookup!(find_commit_iter, Kind::Blob, CommitRefIter<'a>, try_into_commit_iter); + make_iter_lookup!(find_tree_iter, Kind::Tree, TreeRefIter<'a>, try_into_tree_iter); + make_iter_lookup!(find_tag_iter, Kind::Tag, TagRefIter<'a>, try_into_tag_iter); + } + + impl<T: super::Find> FindExt for T {} +} +pub use ext::FindExt; + +mod find_impls { + use std::{ops::Deref, rc::Rc}; + + use gix_hash::oid; + + use crate::{data, find}; + + impl<T> crate::Find for &T + where + T: crate::Find, + { + type Error = T::Error; + + fn contains(&self, id: impl AsRef<oid>) -> bool { + (*self).contains(id) + } + + fn try_find_cached<'a>( + &self, + id: impl AsRef<oid>, + buffer: &'a mut Vec<u8>, + pack_cache: &mut impl crate::cache::DecodeEntry, + ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error> { + (*self).try_find_cached(id, buffer, pack_cache) + } + + fn location_by_oid(&self, id: impl AsRef<oid>, buf: &mut Vec<u8>) -> Option<data::entry::Location> { + (*self).location_by_oid(id, buf) + } + + fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(data::Offset, gix_hash::ObjectId)>> { + (*self).pack_offsets_and_oid(pack_id) + } + + fn entry_by_location(&self, location: &data::entry::Location) -> Option<find::Entry> { + (*self).entry_by_location(location) + } + } + + impl<T> super::Find for std::sync::Arc<T> + where + T: super::Find, + { + type Error = T::Error; + + fn contains(&self, id: impl AsRef<oid>) -> bool { + self.deref().contains(id) + } + + fn try_find_cached<'a>( + &self, + id: impl AsRef<oid>, + buffer: &'a mut Vec<u8>, + pack_cache: &mut impl crate::cache::DecodeEntry, + ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error> { + self.deref().try_find_cached(id, buffer, pack_cache) + } + + fn location_by_oid(&self, id: impl AsRef<oid>, buf: &mut Vec<u8>) -> Option<data::entry::Location> { + self.deref().location_by_oid(id, buf) + } + + fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(data::Offset, gix_hash::ObjectId)>> { + self.deref().pack_offsets_and_oid(pack_id) + } + + fn entry_by_location(&self, object: &data::entry::Location) -> Option<find::Entry> { + self.deref().entry_by_location(object) + } + } + + impl<T> super::Find for Rc<T> + where + T: super::Find, + { + type Error = T::Error; + + fn contains(&self, id: impl AsRef<oid>) -> bool { + self.deref().contains(id) + } + + fn try_find_cached<'a>( + &self, + id: impl AsRef<oid>, + buffer: &'a mut Vec<u8>, + pack_cache: &mut impl crate::cache::DecodeEntry, + ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error> { + self.deref().try_find_cached(id, buffer, pack_cache) + } + + fn location_by_oid(&self, id: impl AsRef<oid>, buf: &mut Vec<u8>) -> Option<data::entry::Location> { + self.deref().location_by_oid(id, buf) + } + + fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(data::Offset, gix_hash::ObjectId)>> { + self.deref().pack_offsets_and_oid(pack_id) + } + + fn entry_by_location(&self, location: &data::entry::Location) -> Option<find::Entry> { + self.deref().entry_by_location(location) + } + } + + impl<T> super::Find for Box<T> + where + T: super::Find, + { + type Error = T::Error; + + fn contains(&self, id: impl AsRef<oid>) -> bool { + self.deref().contains(id) + } + + fn try_find_cached<'a>( + &self, + id: impl AsRef<oid>, + buffer: &'a mut Vec<u8>, + pack_cache: &mut impl crate::cache::DecodeEntry, + ) -> Result<Option<(gix_object::Data<'a>, Option<data::entry::Location>)>, Self::Error> { + self.deref().try_find_cached(id, buffer, pack_cache) + } + + fn location_by_oid(&self, id: impl AsRef<oid>, buf: &mut Vec<u8>) -> Option<data::entry::Location> { + self.deref().location_by_oid(id, buf) + } + + fn pack_offsets_and_oid(&self, pack_id: u32) -> Option<Vec<(data::Offset, gix_hash::ObjectId)>> { + self.deref().pack_offsets_and_oid(pack_id) + } + + fn entry_by_location(&self, location: &data::entry::Location) -> Option<find::Entry> { + self.deref().entry_by_location(location) + } + } +} diff --git a/vendor/gix-pack/src/index/access.rs b/vendor/gix-pack/src/index/access.rs new file mode 100644 index 000000000..0ac85dff7 --- /dev/null +++ b/vendor/gix-pack/src/index/access.rs @@ -0,0 +1,290 @@ +use std::{mem::size_of, ops::Range}; + +use crate::{ + data, + index::{self, EntryIndex, PrefixLookupResult, FAN_LEN}, +}; + +const N32_SIZE: usize = size_of::<u32>(); +const N64_SIZE: usize = size_of::<u64>(); +const V1_HEADER_SIZE: usize = FAN_LEN * N32_SIZE; +const V2_HEADER_SIZE: usize = N32_SIZE * 2 + FAN_LEN * N32_SIZE; +const N32_HIGH_BIT: u32 = 1 << 31; + +/// Represents an entry within a pack index file, effectively mapping object [`IDs`][gix_hash::ObjectId] to pack data file locations. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Entry { + /// The ID of the object + pub oid: gix_hash::ObjectId, + /// The offset to the object's header in the pack data file + pub pack_offset: data::Offset, + /// The CRC32 hash over all bytes of the pack data entry. + /// + /// This can be useful for direct copies of pack data entries from one pack to another with insurance there was no bit rot. + /// _Note_: Only available in index version 2 or newer + pub crc32: Option<u32>, +} + +/// Iteration and access +impl index::File { + fn iter_v1(&self) -> impl Iterator<Item = Entry> + '_ { + match self.version { + index::Version::V1 => self.data[V1_HEADER_SIZE..] + .chunks(N32_SIZE + self.hash_len) + .take(self.num_objects as usize) + .map(|c| { + let (ofs, oid) = c.split_at(N32_SIZE); + Entry { + oid: gix_hash::ObjectId::from(oid), + pack_offset: crate::read_u32(ofs) as u64, + crc32: None, + } + }), + _ => panic!("Cannot use iter_v1() on index of type {:?}", self.version), + } + } + + fn iter_v2(&self) -> impl Iterator<Item = Entry> + '_ { + let pack64_offset = self.offset_pack_offset64_v2(); + match self.version { + index::Version::V2 => izip!( + self.data[V2_HEADER_SIZE..].chunks(self.hash_len), + self.data[self.offset_crc32_v2()..].chunks(N32_SIZE), + self.data[self.offset_pack_offset_v2()..].chunks(N32_SIZE) + ) + .take(self.num_objects as usize) + .map(move |(oid, crc32, ofs32)| Entry { + oid: gix_hash::ObjectId::from(oid), + pack_offset: self.pack_offset_from_offset_v2(ofs32, pack64_offset), + crc32: Some(crate::read_u32(crc32)), + }), + _ => panic!("Cannot use iter_v2() on index of type {:?}", self.version), + } + } + + /// Returns the object hash at the given index in our list of (sorted) sha1 hashes. + /// The index ranges from 0 to self.num_objects() + /// + /// # Panics + /// + /// If `index` is out of bounds. + pub fn oid_at_index(&self, index: EntryIndex) -> &gix_hash::oid { + let index = index as usize; + let start = match self.version { + index::Version::V2 => V2_HEADER_SIZE + index * self.hash_len, + index::Version::V1 => V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len) + N32_SIZE, + }; + gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len]) + } + + /// Returns the offset into our pack data file at which to start reading the object at `index`. + /// + /// # Panics + /// + /// If `index` is out of bounds. + pub fn pack_offset_at_index(&self, index: EntryIndex) -> data::Offset { + let index = index as usize; + match self.version { + index::Version::V2 => { + let start = self.offset_pack_offset_v2() + index * N32_SIZE; + self.pack_offset_from_offset_v2(&self.data[start..][..N32_SIZE], self.offset_pack_offset64_v2()) + } + index::Version::V1 => { + let start = V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len); + crate::read_u32(&self.data[start..][..N32_SIZE]) as u64 + } + } + } + + /// Returns the CRC32 of the object at the given `index`. + /// + /// _Note_: These are always present for index version 2 or higher. + /// # Panics + /// + /// If `index` is out of bounds. + pub fn crc32_at_index(&self, index: EntryIndex) -> Option<u32> { + let index = index as usize; + match self.version { + index::Version::V2 => { + let start = self.offset_crc32_v2() + index * N32_SIZE; + Some(crate::read_u32(&self.data[start..start + N32_SIZE])) + } + index::Version::V1 => None, + } + } + + /// Returns the `index` of the given hash for use with the [`oid_at_index()`][index::File::oid_at_index()], + /// [`pack_offset_at_index()`][index::File::pack_offset_at_index()] or [`crc32_at_index()`][index::File::crc32_at_index()]. + // NOTE: pretty much the same things as in `multi_index::File::lookup`, change things there + // as well. + pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<EntryIndex> { + lookup(id, &self.fan, |idx| self.oid_at_index(idx)) + } + + /// Given a `prefix`, find an object that matches it uniquely within this index and return `Some(Ok(entry_index))`. + /// If there is more than one object matching the object `Some(Err(())` is returned. + /// + /// Finally, if no object matches the index, the return value is `None`. + /// + /// Pass `candidates` to obtain the set of entry-indices matching `prefix`, with the same return value as + /// one would have received if it remained `None`. It will be empty if no object matched the `prefix`. + /// + // NOTE: pretty much the same things as in `index::File::lookup`, change things there + // as well. + pub fn lookup_prefix( + &self, + prefix: gix_hash::Prefix, + candidates: Option<&mut Range<EntryIndex>>, + ) -> Option<PrefixLookupResult> { + lookup_prefix( + prefix, + candidates, + &self.fan, + |idx| self.oid_at_index(idx), + self.num_objects, + ) + } + + /// An iterator over all [`Entries`][Entry] of this index file. + pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a> { + match self.version { + index::Version::V2 => Box::new(self.iter_v2()), + index::Version::V1 => Box::new(self.iter_v1()), + } + } + + /// Return a vector of ascending offsets into our respective pack data file. + /// + /// Useful to control an iteration over all pack entries in a cache-friendly way. + pub fn sorted_offsets(&self) -> Vec<data::Offset> { + let mut ofs: Vec<_> = match self.version { + index::Version::V1 => self.iter().map(|e| e.pack_offset).collect(), + index::Version::V2 => { + let offset32_start = &self.data[self.offset_pack_offset_v2()..]; + let pack_offset_64_start = self.offset_pack_offset64_v2(); + offset32_start + .chunks(N32_SIZE) + .take(self.num_objects as usize) + .map(|offset| self.pack_offset_from_offset_v2(offset, pack_offset_64_start)) + .collect() + } + }; + ofs.sort_unstable(); + ofs + } + + #[inline] + fn offset_crc32_v2(&self) -> usize { + V2_HEADER_SIZE + self.num_objects as usize * self.hash_len + } + + #[inline] + fn offset_pack_offset_v2(&self) -> usize { + self.offset_crc32_v2() + self.num_objects as usize * N32_SIZE + } + + #[inline] + fn offset_pack_offset64_v2(&self) -> usize { + self.offset_pack_offset_v2() + self.num_objects as usize * N32_SIZE + } + + #[inline] + fn pack_offset_from_offset_v2(&self, offset: &[u8], pack64_offset: usize) -> data::Offset { + debug_assert_eq!(self.version, index::Version::V2); + let ofs32 = crate::read_u32(offset); + if (ofs32 & N32_HIGH_BIT) == N32_HIGH_BIT { + let from = pack64_offset + (ofs32 ^ N32_HIGH_BIT) as usize * N64_SIZE; + crate::read_u64(&self.data[from..][..N64_SIZE]) + } else { + ofs32 as u64 + } + } +} + +pub(crate) fn lookup_prefix<'a>( + prefix: gix_hash::Prefix, + candidates: Option<&mut Range<EntryIndex>>, + fan: &[u32; FAN_LEN], + oid_at_index: impl Fn(EntryIndex) -> &'a gix_hash::oid, + num_objects: u32, +) -> Option<PrefixLookupResult> { + let first_byte = prefix.as_oid().first_byte() as usize; + let mut upper_bound = fan[first_byte]; + let mut lower_bound = if first_byte != 0 { fan[first_byte - 1] } else { 0 }; + + // Bisect using indices + while lower_bound < upper_bound { + let mid = (lower_bound + upper_bound) / 2; + let mid_sha = oid_at_index(mid); + + use std::cmp::Ordering::*; + match prefix.cmp_oid(mid_sha) { + Less => upper_bound = mid, + Equal => match candidates { + Some(candidates) => { + let first_past_entry = ((0..mid).rev()) + .take_while(|prev| prefix.cmp_oid(oid_at_index(*prev)) == Equal) + .last(); + + let last_future_entry = ((mid + 1)..num_objects) + .take_while(|next| prefix.cmp_oid(oid_at_index(*next)) == Equal) + .last(); + + *candidates = match (first_past_entry, last_future_entry) { + (Some(first), Some(last)) => first..last + 1, + (Some(first), None) => first..mid + 1, + (None, Some(last)) => mid..last + 1, + (None, None) => mid..mid + 1, + }; + + return if candidates.len() > 1 { + Some(Err(())) + } else { + Some(Ok(mid)) + }; + } + None => { + let next = mid + 1; + if next < num_objects && prefix.cmp_oid(oid_at_index(next)) == Equal { + return Some(Err(())); + } + if mid != 0 && prefix.cmp_oid(oid_at_index(mid - 1)) == Equal { + return Some(Err(())); + } + return Some(Ok(mid)); + } + }, + Greater => lower_bound = mid + 1, + } + } + + if let Some(candidates) = candidates { + *candidates = 0..0; + } + None +} + +pub(crate) fn lookup<'a>( + id: impl AsRef<gix_hash::oid>, + fan: &[u32; FAN_LEN], + oid_at_index: impl Fn(EntryIndex) -> &'a gix_hash::oid, +) -> Option<EntryIndex> { + let id = id.as_ref(); + let first_byte = id.first_byte() as usize; + let mut upper_bound = fan[first_byte]; + let mut lower_bound = if first_byte != 0 { fan[first_byte - 1] } else { 0 }; + + while lower_bound < upper_bound { + let mid = (lower_bound + upper_bound) / 2; + let mid_sha = oid_at_index(mid); + + use std::cmp::Ordering::*; + match id.cmp(mid_sha) { + Less => upper_bound = mid, + Equal => return Some(mid), + Greater => lower_bound = mid + 1, + } + } + None +} diff --git a/vendor/gix-pack/src/index/init.rs b/vendor/gix-pack/src/index/init.rs new file mode 100644 index 000000000..13eecdbda --- /dev/null +++ b/vendor/gix-pack/src/index/init.rs @@ -0,0 +1,91 @@ +use std::{mem::size_of, path::Path}; + +use crate::index::{self, Version, FAN_LEN, V2_SIGNATURE}; + +/// Returned by [`index::File::at()`]. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("Could not open pack index file at '{path}'")] + Io { + source: std::io::Error, + path: std::path::PathBuf, + }, + #[error("{message}")] + Corrupt { message: String }, + #[error("Unsupported index version: {version})")] + UnsupportedVersion { version: u32 }, +} + +const N32_SIZE: usize = size_of::<u32>(); + +/// Instantiation +impl index::File { + /// Open the pack index file at the given `path`. + /// + /// The `object_hash` is a way to read (and write) the same file format with different hashes, as the hash kind + /// isn't stored within the file format itself. + pub fn at(path: impl AsRef<Path>, object_hash: gix_hash::Kind) -> Result<index::File, Error> { + Self::at_inner(path.as_ref(), object_hash) + } + + fn at_inner(path: &Path, object_hash: gix_hash::Kind) -> Result<index::File, Error> { + let data = crate::mmap::read_only(path).map_err(|source| Error::Io { + source, + path: path.to_owned(), + })?; + let idx_len = data.len(); + let hash_len = object_hash.len_in_bytes(); + + let footer_size = hash_len * 2; + if idx_len < FAN_LEN * N32_SIZE + footer_size { + return Err(Error::Corrupt { + message: format!("Pack index of size {idx_len} is too small for even an empty index"), + }); + } + let (kind, fan, num_objects) = { + let (kind, d) = { + let (sig, d) = data.split_at(V2_SIGNATURE.len()); + if sig == V2_SIGNATURE { + (Version::V2, d) + } else { + (Version::V1, &data[..]) + } + }; + let d = { + if let Version::V2 = kind { + let (vd, dr) = d.split_at(N32_SIZE); + let version = crate::read_u32(vd); + if version != Version::V2 as u32 { + return Err(Error::UnsupportedVersion { version }); + } + dr + } else { + d + } + }; + let (fan, bytes_read) = read_fan(d); + let (_, _d) = d.split_at(bytes_read); + let num_objects = fan[FAN_LEN - 1]; + + (kind, fan, num_objects) + }; + Ok(index::File { + data, + path: path.to_owned(), + version: kind, + num_objects, + fan, + hash_len, + object_hash, + }) + } +} + +fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) { + let mut fan = [0; FAN_LEN]; + for (c, f) in d.chunks(N32_SIZE).zip(fan.iter_mut()) { + *f = crate::read_u32(c); + } + (fan, FAN_LEN * N32_SIZE) +} diff --git a/vendor/gix-pack/src/index/mod.rs b/vendor/gix-pack/src/index/mod.rs new file mode 100644 index 000000000..341322f7d --- /dev/null +++ b/vendor/gix-pack/src/index/mod.rs @@ -0,0 +1,155 @@ +//! an index into the pack file +//! +/// From itertools +/// Create an iterator running multiple iterators in lockstep. +/// +/// The `izip!` iterator yields elements until any subiterator +/// returns `None`. +/// +/// This is a version of the standard ``.zip()`` that's supporting more than +/// two iterators. The iterator element type is a tuple with one element +/// from each of the input iterators. Just like ``.zip()``, the iteration stops +/// when the shortest of the inputs reaches its end. +/// +/// **Note:** The result of this macro is in the general case an iterator +/// composed of repeated `.zip()` and a `.map()`; it has an anonymous type. +/// The special cases of one and two arguments produce the equivalent of +/// `$a.into_iter()` and `$a.into_iter().zip($b)` respectively. +/// +/// Prefer this macro `izip!()` over [`multizip`] for the performance benefits +/// of using the standard library `.zip()`. +/// +/// [`multizip`]: fn.multizip.html +/// +/// ``` +/// # use itertools::izip; +/// # +/// # fn main() { +/// +/// // iterate over three sequences side-by-side +/// let mut results = [0, 0, 0, 0]; +/// let inputs = [3, 7, 9, 6]; +/// +/// for (r, index, input) in izip!(&mut results, 0..10, &inputs) { +/// *r = index * 10 + input; +/// } +/// +/// assert_eq!(results, [0 + 3, 10 + 7, 29, 36]); +/// # } +/// ``` +macro_rules! izip { + // @closure creates a tuple-flattening closure for .map() call. usage: + // @closure partial_pattern => partial_tuple , rest , of , iterators + // eg. izip!( @closure ((a, b), c) => (a, b, c) , dd , ee ) + ( @closure $p:pat => $tup:expr ) => { + |$p| $tup + }; + + // The "b" identifier is a different identifier on each recursion level thanks to hygiene. + ( @closure $p:pat => ( $($tup:tt)* ) , $_iter:expr $( , $tail:expr )* ) => { + izip!(@closure ($p, b) => ( $($tup)*, b ) $( , $tail )*) + }; + + // unary + ($first:expr $(,)*) => { + std::iter::IntoIterator::into_iter($first) + }; + + // binary + ($first:expr, $second:expr $(,)*) => { + izip!($first) + .zip($second) + }; + + // n-ary where n > 2 + ( $first:expr $( , $rest:expr )* $(,)* ) => { + izip!($first) + $( + .zip($rest) + )* + .map( + izip!(@closure a => (a) $( , $rest )*) + ) + }; +} + +use memmap2::Mmap; + +/// The version of an index file +#[derive(PartialEq, Eq, Ord, PartialOrd, Debug, Hash, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[allow(missing_docs)] +pub enum Version { + V1 = 1, + V2 = 2, +} + +impl Default for Version { + fn default() -> Self { + Version::V2 + } +} + +impl Version { + /// The kind of hash to produce to be compatible to this kind of index + pub fn hash(&self) -> gix_hash::Kind { + match self { + Version::V1 | Version::V2 => gix_hash::Kind::Sha1, + } + } +} + +/// A way to indicate if a lookup, despite successful, was ambiguous or yielded exactly +/// one result in the particular index. +pub type PrefixLookupResult = Result<EntryIndex, ()>; + +/// The type for referring to indices of an entry within the index file. +pub type EntryIndex = u32; + +const FAN_LEN: usize = 256; + +/// A representation of a pack index file +pub struct File { + data: Mmap, + path: std::path::PathBuf, + version: Version, + num_objects: u32, + fan: [u32; FAN_LEN], + hash_len: usize, + object_hash: gix_hash::Kind, +} + +/// Basic file information +impl File { + /// The version of the pack index + pub fn version(&self) -> Version { + self.version + } + /// The path of the opened index file + pub fn path(&self) -> &std::path::Path { + &self.path + } + /// The amount of objects stored in the pack and index, as one past the highest entry index. + pub fn num_objects(&self) -> EntryIndex { + self.num_objects + } + /// The kind of hash we assume + pub fn object_hash(&self) -> gix_hash::Kind { + self.object_hash + } +} + +const V2_SIGNATURE: &[u8] = b"\xfftOc"; +/// +pub mod init; + +pub(crate) mod access; +pub use access::Entry; + +/// +pub mod traverse; +mod util; +/// +pub mod verify; +/// +pub mod write; diff --git a/vendor/gix-pack/src/index/traverse/error.rs b/vendor/gix-pack/src/index/traverse/error.rs new file mode 100644 index 000000000..2310c3bab --- /dev/null +++ b/vendor/gix-pack/src/index/traverse/error.rs @@ -0,0 +1,44 @@ +use crate::index; + +/// Returned by [`index::File::traverse_with_index()`] and [`index::File::traverse_with_lookup`] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error<E: std::error::Error + Send + Sync + 'static> { + #[error("One of the traversal processors failed")] + Processor(#[source] E), + #[error("Index file, pack file or object verification failed")] + VerifyChecksum(#[from] index::verify::checksum::Error), + #[error("The pack delta tree index could not be built")] + Tree(#[from] crate::cache::delta::from_offsets::Error), + #[error("The tree traversal failed")] + TreeTraversal(#[from] crate::cache::delta::traverse::Error), + #[error("Object {id} at offset {offset} could not be decoded")] + PackDecode { + id: gix_hash::ObjectId, + offset: u64, + source: crate::data::decode::Error, + }, + #[error("The packfiles checksum didn't match the index file checksum: expected {expected}, got {actual}")] + PackMismatch { + expected: gix_hash::ObjectId, + actual: gix_hash::ObjectId, + }, + #[error("The hash of {kind} object at offset {offset} didn't match the checksum in the index file: expected {expected}, got {actual}")] + PackObjectMismatch { + expected: gix_hash::ObjectId, + actual: gix_hash::ObjectId, + offset: u64, + kind: gix_object::Kind, + }, + #[error( + "The CRC32 of {kind} object at offset {offset} didn't match the checksum in the index file: expected {expected}, got {actual}" + )] + Crc32Mismatch { + expected: u32, + actual: u32, + offset: u64, + kind: gix_object::Kind, + }, + #[error("Interrupted")] + Interrupted, +} diff --git a/vendor/gix-pack/src/index/traverse/mod.rs b/vendor/gix-pack/src/index/traverse/mod.rs new file mode 100644 index 000000000..42c820b0e --- /dev/null +++ b/vendor/gix-pack/src/index/traverse/mod.rs @@ -0,0 +1,245 @@ +use std::sync::atomic::AtomicBool; + +use gix_features::{parallel, progress::Progress}; + +use crate::index; + +mod reduce; +/// +pub mod with_index; +/// +pub mod with_lookup; +use reduce::Reducer; + +mod error; +pub use error::Error; + +mod types; +pub use types::{Algorithm, ProgressId, SafetyCheck, Statistics}; + +/// Traversal options for [`index::File::traverse()`]. +#[derive(Debug, Clone)] +pub struct Options<F> { + /// The algorithm to employ. + pub traversal: Algorithm, + /// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on + /// the amount of available logical cores. + pub thread_limit: Option<usize>, + /// The kinds of safety checks to perform. + pub check: SafetyCheck, + /// A function to create a pack cache + pub make_pack_lookup_cache: F, +} + +impl Default for Options<fn() -> crate::cache::Never> { + fn default() -> Self { + Options { + check: Default::default(), + traversal: Default::default(), + thread_limit: None, + make_pack_lookup_cache: || crate::cache::Never, + } + } +} + +/// The outcome of the [`traverse()`][index::File::traverse()] method. +pub struct Outcome<P> { + /// The checksum obtained when hashing the file, which matched the checksum contained within the file. + pub actual_index_checksum: gix_hash::ObjectId, + /// The statistics obtained during traversal. + pub statistics: Statistics, + /// The input progress to allow reuse. + pub progress: P, +} + +/// Traversal of pack data files using an index file +impl index::File { + /// Iterate through all _decoded objects_ in the given `pack` and handle them with a `Processor`. + /// The return value is (pack-checksum, [`Outcome`], `progress`), thus the pack traversal will always verify + /// the whole packs checksum to assure it was correct. In case of bit-rod, the operation will abort early without + /// verifying all objects using the [interrupt mechanism][gix_features::interrupt] mechanism. + /// + /// # Algorithms + /// + /// Using the [`Options::traversal`] field one can chose between two algorithms providing different tradeoffs. Both invoke + /// `new_processor()` to create functions receiving decoded objects, their object kind, index entry and a progress instance to provide + /// progress information. + /// + /// * [`Algorithm::DeltaTreeLookup`] builds an index to avoid any unnecessary computation while resolving objects, avoiding + /// the need for a cache entirely, rendering `new_cache()` unused. + /// One could also call [`traverse_with_index()`][index::File::traverse_with_index()] directly. + /// * [`Algorithm::Lookup`] uses a cache created by `new_cache()` to avoid having to re-compute all bases of a delta-chain while + /// decoding objects. + /// One could also call [`traverse_with_lookup()`][index::File::traverse_with_lookup()] directly. + /// + /// Use [`thread_limit`][Options::thread_limit] to further control parallelism and [`check`][SafetyCheck] to define how much the passed + /// objects shall be verified beforehand. + pub fn traverse<P, C, Processor, E, F>( + &self, + pack: &crate::data::File, + progress: P, + should_interrupt: &AtomicBool, + new_processor: impl Fn() -> Processor + Send + Clone, + Options { + traversal, + thread_limit, + check, + make_pack_lookup_cache, + }: Options<F>, + ) -> Result<Outcome<P>, Error<E>> + where + P: Progress, + C: crate::cache::DecodeEntry, + E: std::error::Error + Send + Sync + 'static, + Processor: FnMut( + gix_object::Kind, + &[u8], + &index::Entry, + &mut <P::SubProgress as Progress>::SubProgress, + ) -> Result<(), E>, + F: Fn() -> C + Send + Clone, + { + match traversal { + Algorithm::Lookup => self.traverse_with_lookup( + new_processor, + pack, + progress, + should_interrupt, + with_lookup::Options { + thread_limit, + check, + make_pack_lookup_cache, + }, + ), + Algorithm::DeltaTreeLookup => self.traverse_with_index( + pack, + new_processor, + progress, + should_interrupt, + crate::index::traverse::with_index::Options { check, thread_limit }, + ), + } + } + + fn possibly_verify<E>( + &self, + pack: &crate::data::File, + check: SafetyCheck, + pack_progress: impl Progress, + index_progress: impl Progress, + should_interrupt: &AtomicBool, + ) -> Result<gix_hash::ObjectId, Error<E>> + where + E: std::error::Error + Send + Sync + 'static, + { + Ok(if check.file_checksum() { + if self.pack_checksum() != pack.checksum() { + return Err(Error::PackMismatch { + actual: pack.checksum(), + expected: self.pack_checksum(), + }); + } + let (pack_res, id) = parallel::join( + move || pack.verify_checksum(pack_progress, should_interrupt), + move || self.verify_checksum(index_progress, should_interrupt), + ); + pack_res?; + id? + } else { + self.index_checksum() + }) + } + + #[allow(clippy::too_many_arguments)] + fn decode_and_process_entry<C, P, E>( + &self, + check: SafetyCheck, + pack: &crate::data::File, + cache: &mut C, + buf: &mut Vec<u8>, + progress: &mut P, + index_entry: &crate::index::Entry, + processor: &mut impl FnMut(gix_object::Kind, &[u8], &index::Entry, &mut P) -> Result<(), E>, + ) -> Result<crate::data::decode::entry::Outcome, Error<E>> + where + C: crate::cache::DecodeEntry, + P: Progress, + E: std::error::Error + Send + Sync + 'static, + { + let pack_entry = pack.entry(index_entry.pack_offset); + let pack_entry_data_offset = pack_entry.data_offset; + let entry_stats = pack + .decode_entry( + pack_entry, + buf, + |id, _| { + self.lookup(id).map(|index| { + crate::data::decode::entry::ResolvedBase::InPack(pack.entry(self.pack_offset_at_index(index))) + }) + }, + cache, + ) + .map_err(|e| Error::PackDecode { + source: e, + id: index_entry.oid, + offset: index_entry.pack_offset, + })?; + let object_kind = entry_stats.kind; + let header_size = (pack_entry_data_offset - index_entry.pack_offset) as usize; + let entry_len = header_size + entry_stats.compressed_size; + + process_entry( + check, + object_kind, + buf, + progress, + index_entry, + || pack.entry_crc32(index_entry.pack_offset, entry_len), + processor, + )?; + Ok(entry_stats) + } +} + +#[allow(clippy::too_many_arguments)] +fn process_entry<P, E>( + check: SafetyCheck, + object_kind: gix_object::Kind, + decompressed: &[u8], + progress: &mut P, + index_entry: &crate::index::Entry, + pack_entry_crc32: impl FnOnce() -> u32, + processor: &mut impl FnMut(gix_object::Kind, &[u8], &index::Entry, &mut P) -> Result<(), E>, +) -> Result<(), Error<E>> +where + P: Progress, + E: std::error::Error + Send + Sync + 'static, +{ + if check.object_checksum() { + let mut hasher = gix_features::hash::hasher(index_entry.oid.kind()); + hasher.update(&gix_object::encode::loose_header(object_kind, decompressed.len())); + hasher.update(decompressed); + + let actual_oid = gix_hash::ObjectId::from(hasher.digest()); + if actual_oid != index_entry.oid { + return Err(Error::PackObjectMismatch { + actual: actual_oid, + expected: index_entry.oid, + offset: index_entry.pack_offset, + kind: object_kind, + }); + } + if let Some(desired_crc32) = index_entry.crc32 { + let actual_crc32 = pack_entry_crc32(); + if actual_crc32 != desired_crc32 { + return Err(Error::Crc32Mismatch { + actual: actual_crc32, + expected: desired_crc32, + offset: index_entry.pack_offset, + kind: object_kind, + }); + } + } + } + processor(object_kind, decompressed, index_entry, progress).map_err(Error::Processor) +} diff --git a/vendor/gix-pack/src/index/traverse/reduce.rs b/vendor/gix-pack/src/index/traverse/reduce.rs new file mode 100644 index 000000000..e05341242 --- /dev/null +++ b/vendor/gix-pack/src/index/traverse/reduce.rs @@ -0,0 +1,129 @@ +use std::{ + sync::atomic::{AtomicBool, Ordering}, + time::Instant, +}; + +use gix_features::{ + parallel, + progress::Progress, + threading::{lock, Mutable, OwnShared}, +}; + +use crate::{data, index::traverse}; + +fn add_decode_result(lhs: &mut data::decode::entry::Outcome, rhs: data::decode::entry::Outcome) { + lhs.num_deltas += rhs.num_deltas; + lhs.decompressed_size += rhs.decompressed_size; + lhs.compressed_size += rhs.compressed_size; + lhs.object_size += rhs.object_size; +} + +fn div_decode_result(lhs: &mut data::decode::entry::Outcome, div: usize) { + if div != 0 { + lhs.num_deltas = (lhs.num_deltas as f32 / div as f32) as u32; + lhs.decompressed_size /= div as u64; + lhs.compressed_size /= div; + lhs.object_size /= div as u64; + } +} + +pub struct Reducer<'a, P, E> { + progress: OwnShared<Mutable<P>>, + check: traverse::SafetyCheck, + then: Instant, + entries_seen: usize, + stats: traverse::Statistics, + should_interrupt: &'a AtomicBool, + _error: std::marker::PhantomData<E>, +} + +impl<'a, P, E> Reducer<'a, P, E> +where + P: Progress, +{ + pub fn from_progress( + progress: OwnShared<Mutable<P>>, + pack_data_len_in_bytes: usize, + check: traverse::SafetyCheck, + should_interrupt: &'a AtomicBool, + ) -> Self { + let stats = traverse::Statistics { + pack_size: pack_data_len_in_bytes as u64, + ..Default::default() + }; + Reducer { + progress, + check, + then: Instant::now(), + entries_seen: 0, + should_interrupt, + stats, + _error: Default::default(), + } + } +} + +impl<'a, P, E> parallel::Reduce for Reducer<'a, P, E> +where + P: Progress, + E: std::error::Error + Send + Sync + 'static, +{ + type Input = Result<Vec<data::decode::entry::Outcome>, traverse::Error<E>>; + type FeedProduce = (); + type Output = traverse::Statistics; + type Error = traverse::Error<E>; + + fn feed(&mut self, input: Self::Input) -> Result<(), Self::Error> { + let chunk_stats: Vec<_> = match input { + Err(err @ traverse::Error::PackDecode { .. }) if !self.check.fatal_decode_error() => { + lock(&self.progress).info(format!("Ignoring decode error: {err}")); + return Ok(()); + } + res => res, + }?; + self.entries_seen += chunk_stats.len(); + + let chunk_total = chunk_stats.into_iter().fold( + data::decode::entry::Outcome::default_from_kind(gix_object::Kind::Tree), + |mut total, stats| { + *self.stats.objects_per_chain_length.entry(stats.num_deltas).or_insert(0) += 1; + self.stats.total_decompressed_entries_size += stats.decompressed_size; + self.stats.total_compressed_entries_size += stats.compressed_size as u64; + self.stats.total_object_size += stats.object_size; + use gix_object::Kind::*; + match stats.kind { + Commit => self.stats.num_commits += 1, + Tree => self.stats.num_trees += 1, + Blob => self.stats.num_blobs += 1, + Tag => self.stats.num_tags += 1, + } + add_decode_result(&mut total, stats); + total + }, + ); + + add_decode_result(&mut self.stats.average, chunk_total); + lock(&self.progress).set(self.entries_seen); + + if self.should_interrupt.load(Ordering::SeqCst) { + return Err(Self::Error::Interrupted); + } + Ok(()) + } + + fn finalize(mut self) -> Result<Self::Output, Self::Error> { + div_decode_result(&mut self.stats.average, self.entries_seen); + + let elapsed_s = self.then.elapsed().as_secs_f32(); + let objects_per_second = (self.entries_seen as f32 / elapsed_s) as u32; + + lock(&self.progress).info(format!( + "of {} objects done in {:.2}s ({} objects/s, ~{}/s)", + self.entries_seen, + elapsed_s, + objects_per_second, + gix_features::progress::bytesize::ByteSize(self.stats.average.object_size * objects_per_second as u64) + )); + Ok(self.stats) + } +} diff --git a/vendor/gix-pack/src/index/traverse/types.rs b/vendor/gix-pack/src/index/traverse/types.rs new file mode 100644 index 000000000..84ebc8932 --- /dev/null +++ b/vendor/gix-pack/src/index/traverse/types.rs @@ -0,0 +1,123 @@ +use std::{collections::BTreeMap, marker::PhantomData}; + +/// Statistics regarding object encountered during execution of the [`traverse()`][crate::index::File::traverse()] method. +#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Statistics { + /// The average over all decoded objects + pub average: crate::data::decode::entry::Outcome, + /// A mapping of the length of the chain to the amount of objects at that length. + /// + /// A length of 0 indicates full objects, and everything above that involves the given amount + /// of delta objects. + pub objects_per_chain_length: BTreeMap<u32, u32>, + /// The amount of bytes in all compressed streams, one per entry + pub total_compressed_entries_size: u64, + /// The amount of bytes in all decompressed streams, one per entry + pub total_decompressed_entries_size: u64, + /// The amount of bytes occupied by all undeltified, decompressed objects + pub total_object_size: u64, + /// The amount of bytes occupied by the pack itself, in bytes + pub pack_size: u64, + /// The amount of objects encountered that where commits + pub num_commits: u32, + /// The amount of objects encountered that where trees + pub num_trees: u32, + /// The amount of objects encountered that where tags + pub num_tags: u32, + /// The amount of objects encountered that where blobs + pub num_blobs: u32, +} + +impl Default for Statistics { + fn default() -> Self { + Statistics { + average: crate::data::decode::entry::Outcome::default_from_kind(gix_object::Kind::Tree), + objects_per_chain_length: Default::default(), + total_compressed_entries_size: 0, + total_decompressed_entries_size: 0, + total_object_size: 0, + pack_size: 0, + num_blobs: 0, + num_commits: 0, + num_trees: 0, + num_tags: 0, + } + } +} + +/// The ways to validate decoded objects before passing them to the processor. +#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum SafetyCheck { + /// Don't verify the validity of the checksums stored in the index and pack file + SkipFileChecksumVerification, + + /// All of the above, and also don't perform any object checksum verification + SkipFileAndObjectChecksumVerification, + + /// All of the above, and only log object decode errors. + /// + /// Useful if there is a damaged pack and you would like to traverse as many objects as possible. + SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError, + + /// Perform all available safety checks before operating on the pack and + /// abort if any of them fails + All, +} + +impl SafetyCheck { + pub(crate) fn file_checksum(&self) -> bool { + matches!(self, SafetyCheck::All) + } + pub(crate) fn object_checksum(&self) -> bool { + matches!(self, SafetyCheck::All | SafetyCheck::SkipFileChecksumVerification) + } + pub(crate) fn fatal_decode_error(&self) -> bool { + match self { + SafetyCheck::All + | SafetyCheck::SkipFileChecksumVerification + | SafetyCheck::SkipFileAndObjectChecksumVerification => true, + SafetyCheck::SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError => false, + } + } +} + +impl Default for SafetyCheck { + fn default() -> Self { + SafetyCheck::All + } +} + +/// The way we verify the pack +#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum Algorithm { + /// Build an index to allow decoding each delta and base exactly once, saving a lot of computational + /// resource at the expense of resident memory, as we will use an additional `DeltaTree` to accelerate + /// delta chain resolution. + DeltaTreeLookup, + /// We lookup each object similarly to what would happen during normal repository use. + /// Uses more compute resources as it will resolve delta chains from back to front, but start right away + /// without indexing or investing any memory in indices. + /// + /// This option may be well suited for big packs in memory-starved system that support memory mapping. + Lookup, +} + +impl Default for Algorithm { + fn default() -> Self { + Algorithm::DeltaTreeLookup + } +} + +/// The progress ids used in [`traverse()`][crate::index::File::traverse()] . +/// +/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. +#[derive(Debug, Copy, Clone)] +pub enum ProgressId { + /// A root progress which isn't actually used, but links to the `ProgressId` of the lookup version of the algorithm. + WithLookup(PhantomData<super::with_lookup::ProgressId>), + /// A root progress which isn't actually used, but links to the `ProgressId` of the indexed version of the algorithm. + WithIndex(PhantomData<super::with_index::ProgressId>), +} diff --git a/vendor/gix-pack/src/index/traverse/with_index.rs b/vendor/gix-pack/src/index/traverse/with_index.rs new file mode 100644 index 000000000..769bbd07f --- /dev/null +++ b/vendor/gix-pack/src/index/traverse/with_index.rs @@ -0,0 +1,230 @@ +use std::sync::atomic::{AtomicBool, Ordering}; + +use gix_features::{parallel, progress::Progress}; + +use super::Error; +use crate::{ + cache::delta::traverse, + index::{self, traverse::Outcome, util::index_entries_sorted_by_offset_ascending}, +}; + +/// Traversal options for [`traverse_with_index()`][index::File::traverse_with_index()] +#[derive(Default)] +pub struct Options { + /// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on + /// the amount of available logical cores. + pub thread_limit: Option<usize>, + /// The kinds of safety checks to perform. + pub check: crate::index::traverse::SafetyCheck, +} + +/// The progress ids used in [`index::File::traverse_with_index()`]. +/// +/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. +#[derive(Debug, Copy, Clone)] +pub enum ProgressId { + /// The amount of bytes currently processed to generate a checksum of the *pack data file*. + HashPackDataBytes, + /// The amount of bytes currently processed to generate a checksum of the *pack index file*. + HashPackIndexBytes, + /// Collect all object hashes into a vector and sort it by their pack offset. + CollectSortedIndexEntries, + /// Count the objects processed when building a cache tree from all objects in a pack index. + TreeFromOffsetsObjects, + /// The amount of objects which were decoded. + DecodedObjects, + /// The amount of bytes that were decoded in total, as the sum of all bytes to represent all decoded objects. + DecodedBytes, +} + +impl From<ProgressId> for gix_features::progress::Id { + fn from(v: ProgressId) -> Self { + match v { + ProgressId::HashPackDataBytes => *b"PTHP", + ProgressId::HashPackIndexBytes => *b"PTHI", + ProgressId::CollectSortedIndexEntries => *b"PTCE", + ProgressId::TreeFromOffsetsObjects => *b"PTDI", + ProgressId::DecodedObjects => *b"PTRO", + ProgressId::DecodedBytes => *b"PTDB", + } + } +} + +/// Traversal with index +impl index::File { + /// Iterate through all _decoded objects_ in the given `pack` and handle them with a `Processor`, using an index to reduce waste + /// at the cost of memory. + /// + /// For more details, see the documentation on the [`traverse()`][index::File::traverse()] method. + pub fn traverse_with_index<P, Processor, E>( + &self, + pack: &crate::data::File, + new_processor: impl Fn() -> Processor + Send + Clone, + mut progress: P, + should_interrupt: &AtomicBool, + Options { check, thread_limit }: Options, + ) -> Result<Outcome<P>, Error<E>> + where + P: Progress, + Processor: FnMut( + gix_object::Kind, + &[u8], + &index::Entry, + &mut <P::SubProgress as Progress>::SubProgress, + ) -> Result<(), E>, + E: std::error::Error + Send + Sync + 'static, + { + let (verify_result, traversal_result) = parallel::join( + { + let pack_progress = progress.add_child_with_id( + format!( + "Hash of pack '{}'", + pack.path().file_name().expect("pack has filename").to_string_lossy() + ), + ProgressId::HashPackDataBytes.into(), + ); + let index_progress = progress.add_child_with_id( + format!( + "Hash of index '{}'", + self.path.file_name().expect("index has filename").to_string_lossy() + ), + ProgressId::HashPackIndexBytes.into(), + ); + move || { + let res = self.possibly_verify(pack, check, pack_progress, index_progress, should_interrupt); + if res.is_err() { + should_interrupt.store(true, Ordering::SeqCst); + } + res + } + }, + || -> Result<_, Error<_>> { + let sorted_entries = index_entries_sorted_by_offset_ascending( + self, + progress.add_child_with_id("collecting sorted index", ProgressId::CollectSortedIndexEntries.into()), + ); /* Pack Traverse Collect sorted Entries */ + let tree = crate::cache::delta::Tree::from_offsets_in_pack( + pack.path(), + sorted_entries.into_iter().map(Entry::from), + |e| e.index_entry.pack_offset, + |id| self.lookup(id).map(|idx| self.pack_offset_at_index(idx)), + progress.add_child_with_id("indexing", ProgressId::TreeFromOffsetsObjects.into()), + should_interrupt, + self.object_hash, + )?; + let mut outcome = digest_statistics(tree.traverse( + |slice, out| pack.entry_slice(slice).map(|entry| out.copy_from_slice(entry)), + pack.pack_end() as u64, + new_processor, + |data, + progress, + traverse::Context { + entry: pack_entry, + entry_end, + decompressed: bytes, + state: ref mut processor, + level, + }| { + let object_kind = pack_entry.header.as_kind().expect("non-delta object"); + data.level = level; + data.decompressed_size = pack_entry.decompressed_size; + data.object_kind = object_kind; + data.compressed_size = entry_end - pack_entry.data_offset; + data.object_size = bytes.len() as u64; + let result = crate::index::traverse::process_entry( + check, + object_kind, + bytes, + progress, + &data.index_entry, + || { + // TODO: Fix this - we overwrite the header of 'data' which also changes the computed entry size, + // causing index and pack to seemingly mismatch. This is surprising, and should be done differently. + // debug_assert_eq!(&data.index_entry.pack_offset, &pack_entry.pack_offset()); + gix_features::hash::crc32( + pack.entry_slice(data.index_entry.pack_offset..entry_end) + .expect("slice pointing into the pack (by now data is verified)"), + ) + }, + processor, + ); + match result { + Err(err @ Error::PackDecode { .. }) if !check.fatal_decode_error() => { + progress.info(format!("Ignoring decode error: {err}")); + Ok(()) + } + res => res, + } + }, + crate::cache::delta::traverse::Options { + object_progress: progress.add_child_with_id("Resolving", ProgressId::DecodedObjects.into()), + size_progress: progress.add_child_with_id("Decoding", ProgressId::DecodedBytes.into()), + thread_limit, + should_interrupt, + object_hash: self.object_hash, + }, + )?); + outcome.pack_size = pack.data_len() as u64; + Ok(outcome) + }, + ); + Ok(Outcome { + actual_index_checksum: verify_result?, + statistics: traversal_result?, + progress, + }) + } +} + +struct Entry { + index_entry: crate::index::Entry, + object_kind: gix_object::Kind, + object_size: u64, + decompressed_size: u64, + compressed_size: u64, + level: u16, +} + +impl From<crate::index::Entry> for Entry { + fn from(index_entry: crate::index::Entry) -> Self { + Entry { + index_entry, + level: 0, + object_kind: gix_object::Kind::Tree, + object_size: 0, + decompressed_size: 0, + compressed_size: 0, + } + } +} + +fn digest_statistics(traverse::Outcome { roots, children }: traverse::Outcome<Entry>) -> index::traverse::Statistics { + let mut res = index::traverse::Statistics::default(); + let average = &mut res.average; + for item in roots.iter().chain(children.iter()) { + res.total_compressed_entries_size += item.data.compressed_size; + res.total_decompressed_entries_size += item.data.decompressed_size; + res.total_object_size += item.data.object_size; + *res.objects_per_chain_length.entry(item.data.level as u32).or_insert(0) += 1; + + average.decompressed_size += item.data.decompressed_size; + average.compressed_size += item.data.compressed_size as usize; + average.object_size += item.data.object_size; + average.num_deltas += item.data.level as u32; + use gix_object::Kind::*; + match item.data.object_kind { + Blob => res.num_blobs += 1, + Tree => res.num_trees += 1, + Tag => res.num_tags += 1, + Commit => res.num_commits += 1, + }; + } + + let num_nodes = roots.len() + children.len(); + average.decompressed_size /= num_nodes as u64; + average.compressed_size /= num_nodes; + average.object_size /= num_nodes as u64; + average.num_deltas /= num_nodes as u32; + + res +} diff --git a/vendor/gix-pack/src/index/traverse/with_lookup.rs b/vendor/gix-pack/src/index/traverse/with_lookup.rs new file mode 100644 index 000000000..509ae4e4f --- /dev/null +++ b/vendor/gix-pack/src/index/traverse/with_lookup.rs @@ -0,0 +1,190 @@ +use std::sync::atomic::{AtomicBool, Ordering}; + +use gix_features::{ + parallel::{self, in_parallel_if}, + progress::{self, Progress}, + threading::{lock, Mutable, OwnShared}, +}; + +use super::{Error, Reducer}; +use crate::{ + data, index, + index::{traverse::Outcome, util}, +}; + +/// Traversal options for [`index::File::traverse_with_lookup()`] +pub struct Options<F> { + /// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on + /// the amount of available logical cores. + pub thread_limit: Option<usize>, + /// The kinds of safety checks to perform. + pub check: index::traverse::SafetyCheck, + /// A function to create a pack cache + pub make_pack_lookup_cache: F, +} + +impl Default for Options<fn() -> crate::cache::Never> { + fn default() -> Self { + Options { + check: Default::default(), + thread_limit: None, + make_pack_lookup_cache: || crate::cache::Never, + } + } +} + +/// The progress ids used in [`index::File::traverse_with_lookup()`]. +/// +/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. +#[derive(Debug, Copy, Clone)] +pub enum ProgressId { + /// The amount of bytes currently processed to generate a checksum of the *pack data file*. + HashPackDataBytes, + /// The amount of bytes currently processed to generate a checksum of the *pack index file*. + HashPackIndexBytes, + /// Collect all object hashes into a vector and sort it by their pack offset. + CollectSortedIndexEntries, + /// The amount of objects which were decoded by brute-force. + DecodedObjects, +} + +impl From<ProgressId> for gix_features::progress::Id { + fn from(v: ProgressId) -> Self { + match v { + ProgressId::HashPackDataBytes => *b"PTHP", + ProgressId::HashPackIndexBytes => *b"PTHI", + ProgressId::CollectSortedIndexEntries => *b"PTCE", + ProgressId::DecodedObjects => *b"PTRO", + } + } +} + +/// Verify and validate the content of the index file +impl index::File { + /// Iterate through all _decoded objects_ in the given `pack` and handle them with a `Processor` using a cache to reduce the amount of + /// waste while decoding objects. + /// + /// For more details, see the documentation on the [`traverse()`][index::File::traverse()] method. + pub fn traverse_with_lookup<P, C, Processor, E, F>( + &self, + new_processor: impl Fn() -> Processor + Send + Clone, + pack: &crate::data::File, + mut progress: P, + should_interrupt: &AtomicBool, + Options { + thread_limit, + check, + make_pack_lookup_cache, + }: Options<F>, + ) -> Result<Outcome<P>, Error<E>> + where + P: Progress, + C: crate::cache::DecodeEntry, + E: std::error::Error + Send + Sync + 'static, + Processor: FnMut( + gix_object::Kind, + &[u8], + &index::Entry, + &mut <P::SubProgress as Progress>::SubProgress, + ) -> Result<(), E>, + F: Fn() -> C + Send + Clone, + { + let (verify_result, traversal_result) = parallel::join( + { + let pack_progress = progress.add_child_with_id( + format!( + "Hash of pack '{}'", + pack.path().file_name().expect("pack has filename").to_string_lossy() + ), + ProgressId::HashPackDataBytes.into(), + ); + let index_progress = progress.add_child_with_id( + format!( + "Hash of index '{}'", + self.path.file_name().expect("index has filename").to_string_lossy() + ), + ProgressId::HashPackIndexBytes.into(), + ); + move || { + let res = self.possibly_verify(pack, check, pack_progress, index_progress, should_interrupt); + if res.is_err() { + should_interrupt.store(true, Ordering::SeqCst); + } + res + } + }, + || { + let index_entries = util::index_entries_sorted_by_offset_ascending( + self, + progress.add_child_with_id("collecting sorted index", ProgressId::CollectSortedIndexEntries.into()), + ); + + let (chunk_size, thread_limit, available_cores) = + parallel::optimize_chunk_size_and_thread_limit(1000, Some(index_entries.len()), thread_limit, None); + let there_are_enough_entries_to_process = || index_entries.len() > chunk_size * available_cores; + let input_chunks = index_entries.chunks(chunk_size.max(chunk_size)); + let reduce_progress = OwnShared::new(Mutable::new({ + let mut p = progress.add_child_with_id("Traversing", ProgressId::DecodedObjects.into()); + p.init(Some(self.num_objects() as usize), progress::count("objects")); + p + })); + let state_per_thread = { + let reduce_progress = reduce_progress.clone(); + move |index| { + ( + make_pack_lookup_cache(), + new_processor(), + Vec::with_capacity(2048), // decode buffer + lock(&reduce_progress) + .add_child_with_id(format!("thread {index}"), gix_features::progress::UNKNOWN), // per thread progress + ) + } + }; + + in_parallel_if( + there_are_enough_entries_to_process, + input_chunks, + thread_limit, + state_per_thread, + |entries: &[index::Entry], + (cache, ref mut processor, buf, progress)| + -> Result<Vec<data::decode::entry::Outcome>, Error<_>> { + progress.init( + Some(entries.len()), + gix_features::progress::count_with_decimals("objects", 2), + ); + let mut stats = Vec::with_capacity(entries.len()); + progress.set(0); + for index_entry in entries.iter() { + let result = self.decode_and_process_entry( + check, + pack, + cache, + buf, + progress, + index_entry, + processor, + ); + progress.inc(); + let stat = match result { + Err(err @ Error::PackDecode { .. }) if !check.fatal_decode_error() => { + progress.info(format!("Ignoring decode error: {err}")); + continue; + } + res => res, + }?; + stats.push(stat); + } + Ok(stats) + }, + Reducer::from_progress(reduce_progress, pack.data_len(), check, should_interrupt), + ) + }, + ); + Ok(Outcome { + actual_index_checksum: verify_result?, + statistics: traversal_result?, + progress, + }) + } +} diff --git a/vendor/gix-pack/src/index/util.rs b/vendor/gix-pack/src/index/util.rs new file mode 100644 index 000000000..284ee6158 --- /dev/null +++ b/vendor/gix-pack/src/index/util.rs @@ -0,0 +1,47 @@ +use std::{io, time::Instant}; + +use gix_features::progress::{self, Progress}; + +pub(crate) fn index_entries_sorted_by_offset_ascending( + idx: &crate::index::File, + mut progress: impl Progress, +) -> Vec<crate::index::Entry> { + progress.init(Some(idx.num_objects as usize), progress::count("entries")); + let start = Instant::now(); + + let mut v = Vec::with_capacity(idx.num_objects as usize); + for entry in idx.iter() { + v.push(entry); + progress.inc(); + } + v.sort_by_key(|e| e.pack_offset); + + progress.show_throughput(start); + v +} + +pub(crate) struct Count<W> { + pub bytes: u64, + pub inner: W, +} + +impl<W> Count<W> { + pub fn new(inner: W) -> Self { + Count { bytes: 0, inner } + } +} + +impl<W> io::Write for Count<W> +where + W: io::Write, +{ + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + let written = self.inner.write(buf)?; + self.bytes += written as u64; + Ok(written) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} diff --git a/vendor/gix-pack/src/index/verify.rs b/vendor/gix-pack/src/index/verify.rs new file mode 100644 index 000000000..4a4852fb6 --- /dev/null +++ b/vendor/gix-pack/src/index/verify.rs @@ -0,0 +1,290 @@ +use std::sync::atomic::AtomicBool; + +use gix_features::progress::Progress; +use gix_object::{bstr::ByteSlice, WriteTo}; + +use crate::index; + +/// +pub mod integrity { + use std::marker::PhantomData; + + use gix_object::bstr::BString; + + /// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()]. + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("The fan at index {index} is out of order as it's larger then the following value.")] + Fan { index: usize }, + #[error("{kind} object {id} could not be decoded")] + ObjectDecode { + source: gix_object::decode::Error, + kind: gix_object::Kind, + id: gix_hash::ObjectId, + }, + #[error("{kind} object {id} wasn't re-encoded without change, wanted\n{expected}\n\nGOT\n\n{actual}")] + ObjectEncodeMismatch { + kind: gix_object::Kind, + id: gix_hash::ObjectId, + expected: BString, + actual: BString, + }, + } + + /// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()]. + pub struct Outcome<P> { + /// The computed checksum of the index which matched the stored one. + pub actual_index_checksum: gix_hash::ObjectId, + /// The packs traversal outcome, if one was provided + pub pack_traverse_statistics: Option<crate::index::traverse::Statistics>, + /// The provided progress instance. + pub progress: P, + } + + /// Additional options to define how the integrity should be verified. + #[derive(Clone)] + pub struct Options<F> { + /// The thoroughness of the verification + pub verify_mode: crate::index::verify::Mode, + /// The way to traverse packs + pub traversal: crate::index::traverse::Algorithm, + /// The amount of threads to use of `Some(N)`, with `None|Some(0)` using all available cores are used. + pub thread_limit: Option<usize>, + /// A function to create a pack cache + pub make_pack_lookup_cache: F, + } + + impl Default for Options<fn() -> crate::cache::Never> { + fn default() -> Self { + Options { + verify_mode: Default::default(), + traversal: Default::default(), + thread_limit: None, + make_pack_lookup_cache: || crate::cache::Never, + } + } + } + + /// The progress ids used in [`index::File::verify_integrity()`][crate::index::File::verify_integrity()]. + /// + /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. + #[derive(Debug, Copy, Clone)] + pub enum ProgressId { + /// The amount of bytes read to verify the index checksum. + ChecksumBytes, + /// A root progress for traversal which isn't actually used directly, but here to link to the respective `ProgressId` types. + Traverse(PhantomData<crate::index::verify::index::traverse::ProgressId>), + } + + impl From<ProgressId> for gix_features::progress::Id { + fn from(v: ProgressId) -> Self { + match v { + ProgressId::ChecksumBytes => *b"PTHI", + ProgressId::Traverse(_) => gix_features::progress::UNKNOWN, + } + } + } +} + +/// +pub mod checksum { + /// Returned by [`index::File::verify_checksum()`][crate::index::File::verify_checksum()]. + pub type Error = crate::verify::checksum::Error; +} + +/// Various ways in which a pack and index can be verified +#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)] +pub enum Mode { + /// Validate the object hash and CRC32 + HashCrc32, + /// Validate hash and CRC32, and decode each non-Blob object. + /// Each object should be valid, i.e. be decodable. + HashCrc32Decode, + /// Validate hash and CRC32, and decode and encode each non-Blob object. + /// Each object should yield exactly the same hash when re-encoded. + HashCrc32DecodeEncode, +} + +impl Default for Mode { + fn default() -> Self { + Mode::HashCrc32DecodeEncode + } +} + +/// Information to allow verifying the integrity of an index with the help of its corresponding pack. +pub struct PackContext<'a, F> { + /// The pack data file itself. + pub data: &'a crate::data::File, + /// The options further configuring the pack traversal and verification + pub options: integrity::Options<F>, +} + +/// Verify and validate the content of the index file +impl index::File { + /// Returns the trailing hash stored at the end of this index file. + /// + /// It's a hash over all bytes of the index. + pub fn index_checksum(&self) -> gix_hash::ObjectId { + gix_hash::ObjectId::from(&self.data[self.data.len() - self.hash_len..]) + } + + /// Returns the hash of the pack data file that this index file corresponds to. + /// + /// It should [`crate::data::File::checksum()`] of the corresponding pack data file. + pub fn pack_checksum(&self) -> gix_hash::ObjectId { + let from = self.data.len() - self.hash_len * 2; + gix_hash::ObjectId::from(&self.data[from..][..self.hash_len]) + } + + /// Validate that our [`index_checksum()`][index::File::index_checksum()] matches the actual contents + /// of this index file, and return it if it does. + pub fn verify_checksum( + &self, + progress: impl Progress, + should_interrupt: &AtomicBool, + ) -> Result<gix_hash::ObjectId, checksum::Error> { + crate::verify::checksum_on_disk_or_mmap( + self.path(), + &self.data, + self.index_checksum(), + self.object_hash, + progress, + should_interrupt, + ) + } + + /// The most thorough validation of integrity of both index file and the corresponding pack data file, if provided. + /// Returns the checksum of the index file, the traversal outcome and the given progress if the integrity check is successful. + /// + /// If `pack` is provided, it is expected (and validated to be) the pack belonging to this index. + /// It will be used to validate internal integrity of the pack before checking each objects integrity + /// is indeed as advertised via its SHA1 as stored in this index, as well as the CRC32 hash. + /// The last member of the Option is a function returning an implementation of [`crate::cache::DecodeEntry`] to be used if + /// the [`index::traverse::Algorithm`] is `Lookup`. + /// To set this to `None`, use `None::<(_, _, _, fn() -> crate::cache::Never)>`. + /// + /// The `thread_limit` optionally specifies the amount of threads to be used for the [pack traversal][index::File::traverse()]. + /// `make_cache` is only used in case a `pack` is specified, use existing implementations in the [`crate::cache`] module. + /// + /// # Tradeoffs + /// + /// The given `progress` is inevitably consumed if there is an error, which is a tradeoff chosen to easily allow using `?` in the + /// error case. + pub fn verify_integrity<P, C, F>( + &self, + pack: Option<PackContext<'_, F>>, + mut progress: P, + should_interrupt: &AtomicBool, + ) -> Result<integrity::Outcome<P>, index::traverse::Error<index::verify::integrity::Error>> + where + P: Progress, + C: crate::cache::DecodeEntry, + F: Fn() -> C + Send + Clone, + { + if let Some(first_invalid) = crate::verify::fan(&self.fan) { + return Err(index::traverse::Error::Processor(integrity::Error::Fan { + index: first_invalid, + })); + } + + match pack { + Some(PackContext { + data: pack, + options: + integrity::Options { + verify_mode, + traversal, + thread_limit, + make_pack_lookup_cache, + }, + }) => self + .traverse( + pack, + progress, + should_interrupt, + || { + let mut encode_buf = Vec::with_capacity(2048); + move |kind, data, index_entry, progress| { + Self::verify_entry(verify_mode, &mut encode_buf, kind, data, index_entry, progress) + } + }, + index::traverse::Options { + traversal, + thread_limit, + check: index::traverse::SafetyCheck::All, + make_pack_lookup_cache, + }, + ) + .map(|o| integrity::Outcome { + actual_index_checksum: o.actual_index_checksum, + pack_traverse_statistics: Some(o.statistics), + progress: o.progress, + }), + None => self + .verify_checksum( + progress.add_child_with_id("Sha1 of index", integrity::ProgressId::ChecksumBytes.into()), + should_interrupt, + ) + .map_err(Into::into) + .map(|id| integrity::Outcome { + actual_index_checksum: id, + pack_traverse_statistics: None, + progress, + }), + } + } + + #[allow(clippy::too_many_arguments)] + fn verify_entry<P>( + verify_mode: Mode, + encode_buf: &mut Vec<u8>, + object_kind: gix_object::Kind, + buf: &[u8], + index_entry: &index::Entry, + progress: &mut P, + ) -> Result<(), integrity::Error> + where + P: Progress, + { + if let Mode::HashCrc32Decode | Mode::HashCrc32DecodeEncode = verify_mode { + use gix_object::Kind::*; + match object_kind { + Tree | Commit | Tag => { + let object = gix_object::ObjectRef::from_bytes(object_kind, buf).map_err(|err| { + integrity::Error::ObjectDecode { + source: err, + kind: object_kind, + id: index_entry.oid, + } + })?; + if let Mode::HashCrc32DecodeEncode = verify_mode { + encode_buf.clear(); + object + .write_to(&mut *encode_buf) + .expect("writing to a memory buffer never fails"); + if encode_buf.as_slice() != buf { + let mut should_return_error = true; + if let gix_object::Kind::Tree = object_kind { + if buf.as_bstr().find(b"100664").is_some() || buf.as_bstr().find(b"100640").is_some() { + progress.info(format!("Tree object {} would be cleaned up during re-serialization, replacing mode '100664|100640' with '100644'", index_entry.oid)); + should_return_error = false + } + } + if should_return_error { + return Err(integrity::Error::ObjectEncodeMismatch { + kind: object_kind, + id: index_entry.oid, + expected: buf.into(), + actual: encode_buf.clone().into(), + }); + } + } + } + } + Blob => {} + }; + } + Ok(()) + } +} diff --git a/vendor/gix-pack/src/index/write/encode.rs b/vendor/gix-pack/src/index/write/encode.rs new file mode 100644 index 000000000..80f0cac61 --- /dev/null +++ b/vendor/gix-pack/src/index/write/encode.rs @@ -0,0 +1,127 @@ +use std::{cmp::Ordering, io}; + +pub(crate) const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff; +pub(crate) const HIGH_BIT: u32 = 0x8000_0000; + +use gix_features::{ + hash, + progress::{self, Progress}, +}; + +use crate::index::{util::Count, V2_SIGNATURE}; + +pub(crate) fn write_to( + out: impl io::Write, + entries_sorted_by_oid: Vec<crate::cache::delta::Item<crate::index::write::TreeEntry>>, + pack_hash: &gix_hash::ObjectId, + kind: crate::index::Version, + mut progress: impl Progress, +) -> io::Result<gix_hash::ObjectId> { + use io::Write; + assert_eq!(kind, crate::index::Version::V2, "Can only write V2 packs right now"); + assert!( + entries_sorted_by_oid.len() <= u32::MAX as usize, + "a pack cannot have more than u32::MAX objects" + ); + + // Write header + let mut out = Count::new(std::io::BufWriter::with_capacity( + 8 * 4096, + hash::Write::new(out, kind.hash()), + )); + out.write_all(V2_SIGNATURE)?; + out.write_all(&(kind as u32).to_be_bytes())?; + + progress.init(Some(4), progress::steps()); + let start = std::time::Instant::now(); + let _info = progress.add_child_with_id("writing fan-out table", gix_features::progress::UNKNOWN); + let fan_out = fanout(entries_sorted_by_oid.iter().map(|e| e.data.id.first_byte())); + + for value in fan_out.iter() { + out.write_all(&value.to_be_bytes())?; + } + + progress.inc(); + let _info = progress.add_child_with_id("writing ids", gix_features::progress::UNKNOWN); + for entry in &entries_sorted_by_oid { + out.write_all(entry.data.id.as_slice())?; + } + + progress.inc(); + let _info = progress.add_child_with_id("writing crc32", gix_features::progress::UNKNOWN); + for entry in &entries_sorted_by_oid { + out.write_all(&entry.data.crc32.to_be_bytes())?; + } + + progress.inc(); + let _info = progress.add_child_with_id("writing offsets", gix_features::progress::UNKNOWN); + { + let mut offsets64 = Vec::<u64>::new(); + for entry in &entries_sorted_by_oid { + let offset: u32 = if entry.offset > LARGE_OFFSET_THRESHOLD { + assert!( + offsets64.len() < LARGE_OFFSET_THRESHOLD as usize, + "Encoding breakdown - way too many 64bit offsets" + ); + offsets64.push(entry.offset); + ((offsets64.len() - 1) as u32) | HIGH_BIT + } else { + entry.offset as u32 + }; + out.write_all(&offset.to_be_bytes())?; + } + for value in offsets64 { + out.write_all(&value.to_be_bytes())?; + } + } + + out.write_all(pack_hash.as_slice())?; + + let bytes_written_without_trailer = out.bytes; + let mut out = out.inner.into_inner()?; + let index_hash: gix_hash::ObjectId = out.hash.digest().into(); + out.inner.write_all(index_hash.as_slice())?; + out.inner.flush()?; + + progress.inc(); + progress.show_throughput_with( + start, + (bytes_written_without_trailer + 20) as usize, + progress::bytes().expect("unit always set"), + progress::MessageLevel::Success, + ); + + Ok(index_hash) +} + +pub(crate) fn fanout(iter: impl ExactSizeIterator<Item = u8>) -> [u32; 256] { + let mut fan_out = [0u32; 256]; + let entries_len = iter.len() as u32; + let mut iter = iter.enumerate(); + let mut idx_and_entry = iter.next(); + let mut upper_bound = 0; + + for (offset_be, byte) in fan_out.iter_mut().zip(0u8..=255) { + *offset_be = match idx_and_entry.as_ref() { + Some((_idx, first_byte)) => match first_byte.cmp(&byte) { + Ordering::Less => unreachable!("ids should be ordered, and we make sure to keep ahead with them"), + Ordering::Greater => upper_bound, + Ordering::Equal => { + if byte == 255 { + entries_len + } else { + idx_and_entry = iter.find(|(_, first_byte)| *first_byte != byte); + upper_bound = idx_and_entry + .as_ref() + .map(|(idx, _)| *idx as u32) + .unwrap_or(entries_len); + upper_bound + } + } + }, + None => entries_len, + }; + } + + fan_out +} diff --git a/vendor/gix-pack/src/index/write/error.rs b/vendor/gix-pack/src/index/write/error.rs new file mode 100644 index 000000000..a5ef6ad67 --- /dev/null +++ b/vendor/gix-pack/src/index/write/error.rs @@ -0,0 +1,25 @@ +use std::io; + +/// Returned by [`crate::index::File::write_data_iter_to_stream()`] +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("An IO error occurred when reading the pack or creating a temporary file")] + Io(#[from] io::Error), + #[error("A pack entry could not be extracted")] + PackEntryDecode(#[from] crate::data::input::Error), + #[error("Indices of type {} cannot be written, only {} are supported", *.0 as usize, crate::index::Version::default() as usize)] + Unsupported(crate::index::Version), + #[error("Ref delta objects are not supported as there is no way to look them up. Resolve them beforehand.")] + IteratorInvariantNoRefDelta, + #[error("The iterator failed to set a trailing hash over all prior pack entries in the last provided entry")] + IteratorInvariantTrailer, + #[error("Only u32::MAX objects can be stored in a pack, found {0}")] + IteratorInvariantTooManyObjects(usize), + #[error("{pack_offset} is not a valid offset for pack offset {distance}")] + IteratorInvariantBaseOffset { pack_offset: u64, distance: u64 }, + #[error(transparent)] + Tree(#[from] crate::cache::delta::Error), + #[error(transparent)] + TreeTraversal(#[from] crate::cache::delta::traverse::Error), +} diff --git a/vendor/gix-pack/src/index/write/mod.rs b/vendor/gix-pack/src/index/write/mod.rs new file mode 100644 index 000000000..c8fdaa271 --- /dev/null +++ b/vendor/gix-pack/src/index/write/mod.rs @@ -0,0 +1,263 @@ +use std::{convert::TryInto, io, sync::atomic::AtomicBool}; + +pub use error::Error; +use gix_features::progress::{self, Progress}; + +use crate::cache::delta::{traverse, Tree}; + +pub(crate) mod encode; +mod error; + +pub(crate) struct TreeEntry { + pub id: gix_hash::ObjectId, + pub crc32: u32, +} + +/// Information gathered while executing [`write_data_iter_to_stream()`][crate::index::File::write_data_iter_to_stream] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Outcome { + /// The version of the verified index + pub index_version: crate::index::Version, + /// The verified checksum of the verified index + pub index_hash: gix_hash::ObjectId, + + /// The hash of the '.pack' file, also found in its trailing bytes + pub data_hash: gix_hash::ObjectId, + /// The amount of objects that were verified, always the amount of objects in the pack. + pub num_objects: u32, +} + +/// The progress ids used in [`write_data_iter_from_stream()`][crate::index::File::write_data_iter_to_stream()]. +/// +/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. +#[derive(Debug, Copy, Clone)] +pub enum ProgressId { + /// Counts the amount of objects that were index thus far. + IndexObjects, + /// The amount of bytes that were decompressed while decoding pack entries. + /// + /// This is done to determine entry boundaries. + DecompressedBytes, + /// The amount of objects whose hashes were computed. + /// + /// This is done by decoding them, which typically involves decoding delta objects. + ResolveObjects, + /// The amount of bytes that were decoded in total, as the sum of all bytes to represent all resolved objects. + DecodedBytes, + /// The amount of bytes written to the index file. + IndexBytesWritten, +} + +impl From<ProgressId> for gix_features::progress::Id { + fn from(v: ProgressId) -> Self { + match v { + ProgressId::IndexObjects => *b"IWIO", + ProgressId::DecompressedBytes => *b"IWDB", + ProgressId::ResolveObjects => *b"IWRO", + ProgressId::DecodedBytes => *b"IWDB", + ProgressId::IndexBytesWritten => *b"IWBW", + } + } +} + +/// Various ways of writing an index file from pack entries +impl crate::index::File { + /// Write information about `entries` as obtained from a pack data file into a pack index file via the `out` stream. + /// The resolver produced by `make_resolver` must resolve pack entries from the same pack data file that produced the + /// `entries` iterator. + /// + /// * `kind` is the version of pack index to produce, use [`crate::index::Version::default()`] if in doubt. + /// * `tread_limit` is used for a parallel tree traversal for obtaining object hashes with optimal performance. + /// * `root_progress` is the top-level progress to stay informed about the progress of this potentially long-running + /// computation. + /// * `object_hash` defines what kind of object hash we write into the index file. + /// * `pack_version` is the version of the underlying pack for which `entries` are read. It's used in case none of these objects are provided + /// to compute a pack-hash. + /// + /// # Remarks + /// + /// * neither in-pack nor out-of-pack Ref Deltas are supported here, these must have been resolved beforehand. + /// * `make_resolver()` will only be called after the iterator stopped returning elements and produces a function that + /// provides all bytes belonging to a pack entry writing them to the given mutable output `Vec`. + /// It should return `None` if the entry cannot be resolved from the pack that produced the `entries` iterator, causing + /// the write operation to fail. + #[allow(clippy::too_many_arguments)] + pub fn write_data_iter_to_stream<F, F2>( + version: crate::index::Version, + make_resolver: F, + entries: impl Iterator<Item = Result<crate::data::input::Entry, crate::data::input::Error>>, + thread_limit: Option<usize>, + mut root_progress: impl Progress, + out: impl io::Write, + should_interrupt: &AtomicBool, + object_hash: gix_hash::Kind, + pack_version: crate::data::Version, + ) -> Result<Outcome, Error> + where + F: FnOnce() -> io::Result<F2>, + F2: for<'r> Fn(crate::data::EntryRange, &'r mut Vec<u8>) -> Option<()> + Send + Clone, + { + if version != crate::index::Version::default() { + return Err(Error::Unsupported(version)); + } + let mut num_objects: usize = 0; + let mut last_seen_trailer = None; + let (anticipated_num_objects, upper_bound) = entries.size_hint(); + let worst_case_num_objects_after_thin_pack_resolution = upper_bound.unwrap_or(anticipated_num_objects); + let mut tree = Tree::with_capacity(worst_case_num_objects_after_thin_pack_resolution)?; + let indexing_start = std::time::Instant::now(); + + root_progress.init(Some(4), progress::steps()); + let mut objects_progress = root_progress.add_child_with_id("indexing", ProgressId::IndexObjects.into()); + objects_progress.init(Some(anticipated_num_objects), progress::count("objects")); + let mut decompressed_progress = + root_progress.add_child_with_id("decompressing", ProgressId::DecompressedBytes.into()); + decompressed_progress.init(None, progress::bytes()); + let mut pack_entries_end: u64 = 0; + + for entry in entries { + let crate::data::input::Entry { + header, + pack_offset, + crc32, + header_size, + compressed: _, + compressed_size, + decompressed_size, + trailer, + } = entry?; + + decompressed_progress.inc_by(decompressed_size as usize); + + let entry_len = header_size as u64 + compressed_size; + pack_entries_end = pack_offset + entry_len; + + let crc32 = crc32.expect("crc32 to be computed by the iterator. Caller assures correct configuration."); + + use crate::data::entry::Header::*; + match header { + Tree | Blob | Commit | Tag => { + tree.add_root( + pack_offset, + TreeEntry { + id: object_hash.null(), + crc32, + }, + )?; + } + RefDelta { .. } => return Err(Error::IteratorInvariantNoRefDelta), + OfsDelta { base_distance } => { + let base_pack_offset = + crate::data::entry::Header::verified_base_pack_offset(pack_offset, base_distance).ok_or( + Error::IteratorInvariantBaseOffset { + pack_offset, + distance: base_distance, + }, + )?; + tree.add_child( + base_pack_offset, + pack_offset, + TreeEntry { + id: object_hash.null(), + crc32, + }, + )?; + } + }; + last_seen_trailer = trailer; + num_objects += 1; + objects_progress.inc(); + } + let num_objects: u32 = num_objects + .try_into() + .map_err(|_| Error::IteratorInvariantTooManyObjects(num_objects))?; + + objects_progress.show_throughput(indexing_start); + decompressed_progress.show_throughput(indexing_start); + drop(objects_progress); + drop(decompressed_progress); + + root_progress.inc(); + + let resolver = make_resolver()?; + let sorted_pack_offsets_by_oid = { + let traverse::Outcome { roots, children } = tree.traverse( + resolver, + pack_entries_end, + || (), + |data, + _progress, + traverse::Context { + entry, + decompressed: bytes, + .. + }| { + modify_base(data, entry, bytes, version.hash()); + Ok::<_, Error>(()) + }, + traverse::Options { + object_progress: root_progress.add_child_with_id("Resolving", ProgressId::ResolveObjects.into()), + size_progress: root_progress.add_child_with_id("Decoding", ProgressId::DecodedBytes.into()), + thread_limit, + should_interrupt, + object_hash, + }, + )?; + root_progress.inc(); + + let mut items = roots; + items.extend(children); + { + let _progress = root_progress.add_child_with_id("sorting by id", gix_features::progress::UNKNOWN); + items.sort_by_key(|e| e.data.id); + } + + root_progress.inc(); + items + }; + + let pack_hash = match last_seen_trailer { + Some(ph) => ph, + None if num_objects == 0 => { + let header = crate::data::header::encode(pack_version, 0); + let mut hasher = gix_features::hash::hasher(object_hash); + hasher.update(&header); + gix_hash::ObjectId::from(hasher.digest()) + } + None => return Err(Error::IteratorInvariantTrailer), + }; + let index_hash = encode::write_to( + out, + sorted_pack_offsets_by_oid, + &pack_hash, + version, + root_progress.add_child_with_id("writing index file", ProgressId::IndexBytesWritten.into()), + )?; + root_progress.show_throughput_with( + indexing_start, + num_objects as usize, + progress::count("objects").expect("unit always set"), + progress::MessageLevel::Success, + ); + Ok(Outcome { + index_version: version, + index_hash, + data_hash: pack_hash, + num_objects, + }) + } +} + +fn modify_base(entry: &mut TreeEntry, pack_entry: &crate::data::Entry, decompressed: &[u8], hash: gix_hash::Kind) { + fn compute_hash(kind: gix_object::Kind, bytes: &[u8], object_hash: gix_hash::Kind) -> gix_hash::ObjectId { + let mut hasher = gix_features::hash::hasher(object_hash); + hasher.update(&gix_object::encode::loose_header(kind, bytes.len())); + hasher.update(bytes); + gix_hash::ObjectId::from(hasher.digest()) + } + + let object_kind = pack_entry.header.as_kind().expect("base object as source of iteration"); + let id = compute_hash(object_kind, decompressed, hash); + entry.id = id; +} diff --git a/vendor/gix-pack/src/lib.rs b/vendor/gix-pack/src/lib.rs new file mode 100755 index 000000000..200b87019 --- /dev/null +++ b/vendor/gix-pack/src/lib.rs @@ -0,0 +1,73 @@ +//! Git stores all of its data as _Objects_, which are data along with a hash over all data. Storing objects efficiently +//! is what git packs are concerned about. +//! +//! Packs consist of [data files][data::File] and [index files][index::File]. The latter can be generated from a data file +//! and make accessing objects within a pack feasible. +//! +//! A [Bundle] conveniently combines a data pack alongside its index to allow [finding][Find] objects or verifying the pack. +//! Objects returned by `.find(…)` are [objects][gix_object::Data] which know their pack location in order to speed up +//! various common operations like creating new packs from existing ones. +//! +//! When traversing all objects in a pack, a _delta tree acceleration structure_ can be built from pack data or an index +//! in order to decompress packs in parallel and without any waste. +//! ## Feature Flags +#![cfg_attr( + feature = "document-features", + cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![deny(missing_docs, rust_2018_idioms, unsafe_code)] + +/// +pub mod bundle; +/// A bundle of pack data and the corresponding pack index +pub struct Bundle { + /// The pack file corresponding to `index` + pub pack: data::File, + /// The index file corresponding to `pack` + pub index: index::File, +} + +/// +pub mod find; + +/// +pub mod cache; +/// +pub mod data; + +mod find_traits; +pub use find_traits::{Find, FindExt}; + +/// +pub mod index; +/// +pub mod multi_index; + +/// +pub mod verify; + +mod mmap { + use std::path::Path; + + pub fn read_only(path: &Path) -> std::io::Result<memmap2::Mmap> { + let file = std::fs::File::open(path)?; + // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. + #[allow(unsafe_code)] + unsafe { + memmap2::Mmap::map(&file) + } + } +} + +use std::convert::TryInto; + +#[inline] +fn read_u32(b: &[u8]) -> u32 { + u32::from_be_bytes(b.try_into().unwrap()) +} + +#[inline] +fn read_u64(b: &[u8]) -> u64 { + u64::from_be_bytes(b.try_into().unwrap()) +} diff --git a/vendor/gix-pack/src/multi_index/access.rs b/vendor/gix-pack/src/multi_index/access.rs new file mode 100644 index 000000000..d209cd0b9 --- /dev/null +++ b/vendor/gix-pack/src/multi_index/access.rs @@ -0,0 +1,143 @@ +use std::{ + ops::Range, + path::{Path, PathBuf}, +}; + +use crate::{ + data, + index::PrefixLookupResult, + multi_index::{EntryIndex, File, PackIndex, Version}, +}; + +/// Represents an entry within a multi index file, effectively mapping object [`IDs`][gix_hash::ObjectId] to pack data +/// files and the offset within. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Entry { + /// The ID of the object. + pub oid: gix_hash::ObjectId, + /// The offset to the object's header in the pack data file. + pub pack_offset: data::Offset, + /// The index of the pack matching our [`File::index_names()`] slice. + pub pack_index: PackIndex, +} + +/// Access methods +impl File { + /// Returns the version of the multi-index file. + pub fn version(&self) -> Version { + self.version + } + /// Returns the path from which the multi-index file was loaded. + /// + /// Note that it might have changed in the mean time, or might have been removed as well. + pub fn path(&self) -> &Path { + &self.path + } + /// Returns the amount of indices stored in this multi-index file. It's the same as [File::index_names().len()][File::index_names()], + /// and returned as one past the highest known index. + pub fn num_indices(&self) -> PackIndex { + self.num_indices + } + /// Returns the total amount of objects available for lookup, and returned as one past the highest known entry index + pub fn num_objects(&self) -> EntryIndex { + self.num_objects + } + /// Returns the kind of hash function used for object ids available in this index. + pub fn object_hash(&self) -> gix_hash::Kind { + self.object_hash + } + /// Returns the checksum over the entire content of the file (excluding the checksum itself). + /// + /// It can be used to validate it didn't change after creation. + pub fn checksum(&self) -> gix_hash::ObjectId { + gix_hash::ObjectId::from(&self.data[self.data.len() - self.hash_len..]) + } + /// Return all names of index files (`*.idx`) whose objects we contain. + /// + /// The corresponding pack can be found by replacing the `.idx` extension with `.pack`. + pub fn index_names(&self) -> &[PathBuf] { + &self.index_names + } +} + +impl File { + /// Return the object id at the given `index`, which ranges from 0 to [File::num_objects()]. + pub fn oid_at_index(&self, index: EntryIndex) -> &gix_hash::oid { + debug_assert!(index < self.num_objects, "index out of bounds"); + let index: usize = index as usize; + let start = self.lookup_ofs + index * self.hash_len; + gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len]) + } + + /// Given a `prefix`, find an object that matches it uniquely within this index and return `Some(Ok(entry_index))`. + /// If there is more than one object matching the object `Some(Err(())` is returned. + /// + /// Finally, if no object matches the index, the return value is `None`. + /// + /// Pass `candidates` to obtain the set of entry-indices matching `prefix`, with the same return value as + /// one would have received if it remained `None`. It will be empty if no object matched the `prefix`. + /// + // NOTE: pretty much the same things as in `index::File::lookup`, change things there + // as well. + pub fn lookup_prefix( + &self, + prefix: gix_hash::Prefix, + candidates: Option<&mut Range<EntryIndex>>, + ) -> Option<PrefixLookupResult> { + crate::index::access::lookup_prefix( + prefix, + candidates, + &self.fan, + |idx| self.oid_at_index(idx), + self.num_objects, + ) + } + + /// Find the index ranging from 0 to [File::num_objects()] that belongs to data associated with `id`, or `None` if it wasn't found. + /// + /// Use this index for finding additional information via [`File::pack_id_and_pack_offset_at_index()`]. + pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<EntryIndex> { + crate::index::access::lookup(id, &self.fan, |idx| self.oid_at_index(idx)) + } + + /// Given the `index` ranging from 0 to [File::num_objects()], return the pack index and its absolute offset into the pack. + /// + /// The pack-index refers to an entry in the [`index_names`][File::index_names()] list, from which the pack can be derived. + pub fn pack_id_and_pack_offset_at_index(&self, index: EntryIndex) -> (PackIndex, data::Offset) { + const OFFSET_ENTRY_SIZE: usize = 4 + 4; + let index = index as usize; + let start = self.offsets_ofs + index * OFFSET_ENTRY_SIZE; + + const HIGH_BIT: u32 = 1 << 31; + + let pack_index = crate::read_u32(&self.data[start..][..4]); + let offset = &self.data[start + 4..][..4]; + let ofs32 = crate::read_u32(offset); + let pack_offset = if (ofs32 & HIGH_BIT) == HIGH_BIT { + // We determine if large offsets are actually larger than 4GB and if not, we don't use the high-bit to signal anything + // but allow the presence of the large-offset chunk to signal what's happening. + if let Some(offsets_64) = self.large_offsets_ofs { + let from = offsets_64 + (ofs32 ^ HIGH_BIT) as usize * 8; + crate::read_u64(&self.data[from..][..8]) + } else { + ofs32 as u64 + } + } else { + ofs32 as u64 + }; + (pack_index, pack_offset) + } + + /// Return an iterator over all entries within this file. + pub fn iter(&self) -> impl Iterator<Item = Entry> + '_ { + (0..self.num_objects).map(move |idx| { + let (pack_index, pack_offset) = self.pack_id_and_pack_offset_at_index(idx); + Entry { + oid: self.oid_at_index(idx).to_owned(), + pack_offset, + pack_index, + } + }) + } +} diff --git a/vendor/gix-pack/src/multi_index/chunk.rs b/vendor/gix-pack/src/multi_index/chunk.rs new file mode 100644 index 000000000..7ed8eebcb --- /dev/null +++ b/vendor/gix-pack/src/multi_index/chunk.rs @@ -0,0 +1,276 @@ +/// Information for the chunk about index names +pub mod index_names { + use std::path::{Path, PathBuf}; + + use gix_object::bstr::{BString, ByteSlice}; + + /// The ID used for the index-names chunk. + pub const ID: gix_chunk::Id = *b"PNAM"; + + /// + pub mod decode { + use gix_object::bstr::BString; + + /// The error returned by [from_bytes()][super::from_bytes()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("The pack names were not ordered alphabetically.")] + NotOrderedAlphabetically, + #[error("Each pack path name must be terminated with a null byte")] + MissingNullByte, + #[error("Couldn't turn path '{path}' into OS path due to encoding issues")] + PathEncoding { path: BString }, + #[error("non-padding bytes found after all paths were read.")] + UnknownTrailerBytes, + } + } + + /// Parse null-separated index names from the given `chunk` of bytes and the expected number of packs and indices. + /// Ignore padding bytes which are typically \0. + pub fn from_bytes(mut chunk: &[u8], num_packs: u32) -> Result<Vec<PathBuf>, decode::Error> { + let mut out = Vec::new(); + for _ in 0..num_packs { + let null_byte_pos = chunk.find_byte(b'\0').ok_or(decode::Error::MissingNullByte)?; + + let path = &chunk[..null_byte_pos]; + let path = gix_path::try_from_byte_slice(path) + .map_err(|_| decode::Error::PathEncoding { + path: BString::from(path), + })? + .to_owned(); + + if let Some(previous) = out.last() { + if previous >= &path { + return Err(decode::Error::NotOrderedAlphabetically); + } + } + out.push(path); + + chunk = &chunk[null_byte_pos + 1..]; + } + + if !chunk.is_empty() && !chunk.iter().all(|b| *b == 0) { + return Err(decode::Error::UnknownTrailerBytes); + } + // NOTE: git writes garbage into this chunk, usually extra \0 bytes, which we simply ignore. If we were strict + // about it we couldn't read this chunk data at all. + Ok(out) + } + + /// Calculate the size on disk for our chunk with the given index paths. Note that these are expected to have been processed already + /// to actually be file names. + pub fn storage_size(paths: impl IntoIterator<Item = impl AsRef<Path>>) -> u64 { + let mut count = 0u64; + for path in paths { + let path = path.as_ref(); + let ascii_path = path.to_str().expect("UTF-8 compatible paths"); + assert!( + ascii_path.is_ascii(), + "must use ascii bytes for correct size computation" + ); + count += (ascii_path.as_bytes().len() + 1/* null byte */) as u64 + } + + let needed_alignment = CHUNK_ALIGNMENT - (count % CHUNK_ALIGNMENT); + if needed_alignment < CHUNK_ALIGNMENT { + count += needed_alignment; + } + count + } + + /// Write all `paths` in order to `out`, including padding. + pub fn write( + paths: impl IntoIterator<Item = impl AsRef<Path>>, + mut out: impl std::io::Write, + ) -> std::io::Result<()> { + let mut written_bytes = 0; + for path in paths { + let path = path.as_ref().to_str().expect("UTF-8 path"); + out.write_all(path.as_bytes())?; + out.write_all(&[0])?; + written_bytes += path.as_bytes().len() as u64 + 1; + } + + let needed_alignment = CHUNK_ALIGNMENT - (written_bytes % CHUNK_ALIGNMENT); + if needed_alignment < CHUNK_ALIGNMENT { + let padding = [0u8; CHUNK_ALIGNMENT as usize]; + out.write_all(&padding[..needed_alignment as usize])?; + } + Ok(()) + } + + const CHUNK_ALIGNMENT: u64 = 4; +} + +/// Information for the chunk with the fanout table +pub mod fanout { + use std::convert::TryInto; + + use crate::multi_index; + + /// The size of the fanout table + pub const SIZE: usize = 4 * 256; + + /// The id uniquely identifying the fanout table. + pub const ID: gix_chunk::Id = *b"OIDF"; + + /// Decode the fanout table contained in `chunk`, or return `None` if it didn't have the expected size. + pub fn from_bytes(chunk: &[u8]) -> Option<[u32; 256]> { + if chunk.len() != SIZE { + return None; + } + let mut out = [0; 256]; + for (c, f) in chunk.chunks(4).zip(out.iter_mut()) { + *f = u32::from_be_bytes(c.try_into().unwrap()); + } + out.into() + } + + /// Write the fanout for the given entries, which must be sorted by oid + pub(crate) fn write( + sorted_entries: &[multi_index::write::Entry], + mut out: impl std::io::Write, + ) -> std::io::Result<()> { + let fanout = crate::index::write::encode::fanout(sorted_entries.iter().map(|e| e.id.first_byte())); + + for value in fanout.iter() { + out.write_all(&value.to_be_bytes())?; + } + Ok(()) + } +} + +/// Information about the oid lookup table. +pub mod lookup { + use std::ops::Range; + + use crate::multi_index; + + /// The id uniquely identifying the oid lookup table. + pub const ID: gix_chunk::Id = *b"OIDL"; + + /// Return the amount of bytes needed to store the data on disk for the given amount of `entries` + pub fn storage_size(entries: usize, object_hash: gix_hash::Kind) -> u64 { + (entries * object_hash.len_in_bytes()) as u64 + } + + pub(crate) fn write( + sorted_entries: &[multi_index::write::Entry], + mut out: impl std::io::Write, + ) -> std::io::Result<()> { + for entry in sorted_entries { + out.write_all(entry.id.as_slice())?; + } + Ok(()) + } + + /// Return true if the size of the `offset` range seems to match for a `hash` of the given kind and the amount of objects. + pub fn is_valid(offset: &Range<usize>, hash: gix_hash::Kind, num_objects: u32) -> bool { + (offset.end - offset.start) / hash.len_in_bytes() == num_objects as usize + } +} + +/// Information about the offsets table. +pub mod offsets { + use std::{convert::TryInto, ops::Range}; + + use crate::multi_index; + + /// The id uniquely identifying the offsets table. + pub const ID: gix_chunk::Id = *b"OOFF"; + + /// Return the amount of bytes needed to offset data for `entries`. + pub fn storage_size(entries: usize) -> u64 { + (entries * (4 /*pack-id*/ + 4/* pack offset */)) as u64 + } + + pub(crate) fn write( + sorted_entries: &[multi_index::write::Entry], + large_offsets_needed: bool, + mut out: impl std::io::Write, + ) -> std::io::Result<()> { + use crate::index::write::encode::{HIGH_BIT, LARGE_OFFSET_THRESHOLD}; + let mut num_large_offsets = 0u32; + + for entry in sorted_entries { + out.write_all(&entry.pack_index.to_be_bytes())?; + + let offset: u32 = if large_offsets_needed { + if entry.pack_offset > LARGE_OFFSET_THRESHOLD { + let res = num_large_offsets | HIGH_BIT; + num_large_offsets += 1; + res + } else { + entry.pack_offset as u32 + } + } else { + entry + .pack_offset + .try_into() + .expect("without large offsets, pack-offset fits u32") + }; + out.write_all(&offset.to_be_bytes())?; + } + Ok(()) + } + + /// Returns true if the `offset` range seems to match the size required for `num_objects`. + pub fn is_valid(offset: &Range<usize>, num_objects: u32) -> bool { + let entry_size = 4 /* pack-id */ + 4 /* pack-offset */; + ((offset.end - offset.start) / num_objects as usize) == entry_size + } +} + +/// Information about the large offsets table. +pub mod large_offsets { + use std::ops::Range; + + use crate::{index::write::encode::LARGE_OFFSET_THRESHOLD, multi_index}; + + /// The id uniquely identifying the large offsets table (with 64 bit offsets) + pub const ID: gix_chunk::Id = *b"LOFF"; + + /// Returns Some(num-large-offset) if there are offsets larger than u32. + pub(crate) fn num_large_offsets(entries: &[multi_index::write::Entry]) -> Option<usize> { + let mut num_large_offsets = 0; + let mut needs_large_offsets = false; + for entry in entries { + if entry.pack_offset > LARGE_OFFSET_THRESHOLD { + num_large_offsets += 1; + } + if entry.pack_offset > u32::MAX as crate::data::Offset { + needs_large_offsets = true; + } + } + + needs_large_offsets.then_some(num_large_offsets) + } + /// Returns true if the `offsets` range seems to be properly aligned for the data we expect. + pub fn is_valid(offset: &Range<usize>) -> bool { + (offset.end - offset.start) % 8 == 0 + } + + pub(crate) fn write( + sorted_entries: &[multi_index::write::Entry], + mut num_large_offsets: usize, + mut out: impl std::io::Write, + ) -> std::io::Result<()> { + for offset in sorted_entries + .iter() + .filter_map(|e| (e.pack_offset > LARGE_OFFSET_THRESHOLD).then_some(e.pack_offset)) + { + out.write_all(&offset.to_be_bytes())?; + num_large_offsets = num_large_offsets + .checked_sub(1) + .expect("BUG: wrote more offsets the previously found"); + } + assert_eq!(num_large_offsets, 0, "BUG: wrote less offsets than initially counted"); + Ok(()) + } + + /// Return the amount of bytes needed to store the given amount of `large_offsets` + pub(crate) fn storage_size(large_offsets: usize) -> u64 { + 8 * large_offsets as u64 + } +} diff --git a/vendor/gix-pack/src/multi_index/init.rs b/vendor/gix-pack/src/multi_index/init.rs new file mode 100644 index 000000000..190b40a7b --- /dev/null +++ b/vendor/gix-pack/src/multi_index/init.rs @@ -0,0 +1,157 @@ +use std::{convert::TryFrom, path::Path}; + +use crate::multi_index::{chunk, File, Version}; + +mod error { + use crate::multi_index::chunk; + + /// The error returned by [File::at()][super::File::at()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Could not open multi-index file at '{path}'")] + Io { + source: std::io::Error, + path: std::path::PathBuf, + }, + #[error("{message}")] + Corrupt { message: &'static str }, + #[error("Unsupported multi-index version: {version})")] + UnsupportedVersion { version: u8 }, + #[error("Unsupported hash kind: {kind})")] + UnsupportedObjectHash { kind: u8 }, + #[error(transparent)] + ChunkFileDecode(#[from] gix_chunk::file::decode::Error), + #[error(transparent)] + MissingChunk(#[from] gix_chunk::file::index::offset_by_kind::Error), + #[error(transparent)] + FileTooLarge(#[from] gix_chunk::file::index::data_by_kind::Error), + #[error("The multi-pack fan doesn't have the correct size of 256 * 4 bytes")] + MultiPackFanSize, + #[error(transparent)] + PackNames(#[from] chunk::index_names::decode::Error), + #[error("multi-index chunk {:?} has invalid size: {message}", String::from_utf8_lossy(.id))] + InvalidChunkSize { id: gix_chunk::Id, message: &'static str }, + } +} + +pub use error::Error; + +/// Initialization +impl File { + /// Open the multi-index file at the given `path`. + pub fn at(path: impl AsRef<Path>) -> Result<Self, Error> { + Self::try_from(path.as_ref()) + } +} + +impl TryFrom<&Path> for File { + type Error = Error; + + fn try_from(path: &Path) -> Result<Self, Self::Error> { + let data = crate::mmap::read_only(path).map_err(|source| Error::Io { + source, + path: path.to_owned(), + })?; + + const TRAILER_LEN: usize = gix_hash::Kind::shortest().len_in_bytes(); /* trailing hash */ + if data.len() + < Self::HEADER_LEN + + gix_chunk::file::Index::size_for_entries(4 /*index names, fan, offsets, oids*/) + + chunk::fanout::SIZE + + TRAILER_LEN + { + return Err(Error::Corrupt { + message: "multi-index file is truncated and too short", + }); + } + + let (version, object_hash, num_chunks, num_indices) = { + let (signature, data) = data.split_at(4); + if signature != Self::SIGNATURE { + return Err(Error::Corrupt { + message: "Invalid signature", + }); + } + let (version, data) = data.split_at(1); + let version = match version[0] { + 1 => Version::V1, + version => return Err(Error::UnsupportedVersion { version }), + }; + + let (object_hash, data) = data.split_at(1); + let object_hash = gix_hash::Kind::try_from(object_hash[0]) + .map_err(|unknown| Error::UnsupportedObjectHash { kind: unknown })?; + let (num_chunks, data) = data.split_at(1); + let num_chunks = num_chunks[0]; + + let (_num_base_files, data) = data.split_at(1); // TODO: handle base files once it's clear what this does + + let (num_indices, _) = data.split_at(4); + let num_indices = crate::read_u32(num_indices); + + (version, object_hash, num_chunks, num_indices) + }; + + let chunks = gix_chunk::file::Index::from_bytes(&data, Self::HEADER_LEN, num_chunks as u32)?; + + let index_names = chunks.data_by_id(&data, chunk::index_names::ID)?; + let index_names = chunk::index_names::from_bytes(index_names, num_indices)?; + + let fan = chunks.data_by_id(&data, chunk::fanout::ID)?; + let fan = chunk::fanout::from_bytes(fan).ok_or(Error::MultiPackFanSize)?; + let num_objects = fan[255]; + + let lookup = chunks.validated_usize_offset_by_id(chunk::lookup::ID, |offset| { + chunk::lookup::is_valid(&offset, object_hash, num_objects) + .then_some(offset) + .ok_or(Error::InvalidChunkSize { + id: chunk::lookup::ID, + message: "The chunk with alphabetically ordered object ids doesn't have the correct size", + }) + })??; + let offsets = chunks.validated_usize_offset_by_id(chunk::offsets::ID, |offset| { + chunk::offsets::is_valid(&offset, num_objects) + .then_some(offset) + .ok_or(Error::InvalidChunkSize { + id: chunk::offsets::ID, + message: "The chunk with offsets into the pack doesn't have the correct size", + }) + })??; + let large_offsets = chunks + .validated_usize_offset_by_id(chunk::large_offsets::ID, |offset| { + chunk::large_offsets::is_valid(&offset) + .then_some(offset) + .ok_or(Error::InvalidChunkSize { + id: chunk::large_offsets::ID, + message: "The chunk with large offsets into the pack doesn't have the correct size", + }) + }) + .ok() + .transpose()?; + + let checksum_offset = chunks.highest_offset() as usize; + let trailer = &data[checksum_offset..]; + if trailer.len() != object_hash.len_in_bytes() { + return Err(Error::Corrupt { + message: + "Trailing checksum didn't have the expected size or there were unknown bytes after the checksum.", + }); + } + + Ok(File { + data, + path: path.to_owned(), + version, + hash_len: object_hash.len_in_bytes(), + object_hash, + fan, + index_names, + lookup_ofs: lookup.start, + offsets_ofs: offsets.start, + large_offsets_ofs: large_offsets.map(|r| r.start), + num_objects, + num_indices, + }) + } +} diff --git a/vendor/gix-pack/src/multi_index/mod.rs b/vendor/gix-pack/src/multi_index/mod.rs new file mode 100644 index 000000000..3f7ed0ff5 --- /dev/null +++ b/vendor/gix-pack/src/multi_index/mod.rs @@ -0,0 +1,57 @@ +use std::path::PathBuf; + +use memmap2::Mmap; + +/// Known multi-index file versions +#[derive(PartialEq, Eq, Ord, PartialOrd, Debug, Hash, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +#[allow(missing_docs)] +pub enum Version { + V1 = 1, +} + +impl Default for Version { + fn default() -> Self { + Version::V1 + } +} + +/// An index into our [`File::index_names()`] array yielding the name of the index and by implication, its pack file. +pub type PackIndex = u32; + +/// The type for referring to indices of an entry within the index file. +pub type EntryIndex = u32; + +/// A representation of an index file for multiple packs at the same time, typically stored in a file +/// named 'multi-pack-index'. +pub struct File { + data: Mmap, + path: std::path::PathBuf, + version: Version, + hash_len: usize, + object_hash: gix_hash::Kind, + /// The amount of pack files contained within + num_indices: u32, + num_objects: u32, + + fan: [u32; 256], + index_names: Vec<PathBuf>, + lookup_ofs: usize, + offsets_ofs: usize, + large_offsets_ofs: Option<usize>, +} + +/// +pub mod write; + +/// +mod access; + +/// +pub mod verify; + +/// +pub mod chunk; + +/// +pub mod init; diff --git a/vendor/gix-pack/src/multi_index/verify.rs b/vendor/gix-pack/src/multi_index/verify.rs new file mode 100644 index 000000000..856a48501 --- /dev/null +++ b/vendor/gix-pack/src/multi_index/verify.rs @@ -0,0 +1,337 @@ +use std::{cmp::Ordering, sync::atomic::AtomicBool, time::Instant}; + +use gix_features::progress::Progress; + +use crate::{index, multi_index::File}; + +/// +pub mod integrity { + use crate::multi_index::EntryIndex; + + /// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()]. + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("Object {id} should be at pack-offset {expected_pack_offset} but was found at {actual_pack_offset}")] + PackOffsetMismatch { + id: gix_hash::ObjectId, + expected_pack_offset: u64, + actual_pack_offset: u64, + }, + #[error(transparent)] + MultiIndexChecksum(#[from] crate::multi_index::verify::checksum::Error), + #[error(transparent)] + IndexIntegrity(#[from] crate::index::verify::integrity::Error), + #[error(transparent)] + BundleInit(#[from] crate::bundle::init::Error), + #[error("Counted {actual} objects, but expected {expected} as per multi-index")] + UnexpectedObjectCount { actual: usize, expected: usize }, + #[error("{id} wasn't found in the index referenced in the multi-pack index")] + OidNotFound { id: gix_hash::ObjectId }, + #[error("The object id at multi-index entry {index} wasn't in order")] + OutOfOrder { index: EntryIndex }, + #[error("The fan at index {index} is out of order as it's larger then the following value.")] + Fan { index: usize }, + #[error("The multi-index claims to have no objects")] + Empty, + #[error("Interrupted")] + Interrupted, + } + + /// Returned by [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()]. + pub struct Outcome<P> { + /// The computed checksum of the multi-index which matched the stored one. + pub actual_index_checksum: gix_hash::ObjectId, + /// The for each entry in [`index_names()`][super::File::index_names()] provide the corresponding pack traversal outcome. + pub pack_traverse_statistics: Vec<crate::index::traverse::Statistics>, + /// The provided progress instance. + pub progress: P, + } + + /// The progress ids used in [`multi_index::File::verify_integrity()`][crate::multi_index::File::verify_integrity()]. + /// + /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. + #[derive(Debug, Copy, Clone)] + pub enum ProgressId { + /// The amount of bytes read to verify the multi-index checksum. + ChecksumBytes, + /// The amount of objects whose offset has been checked. + ObjectOffsets, + } + + impl From<ProgressId> for gix_features::progress::Id { + fn from(v: ProgressId) -> Self { + match v { + ProgressId::ChecksumBytes => *b"MVCK", + ProgressId::ObjectOffsets => *b"MVOF", + } + } + } +} + +/// +pub mod checksum { + /// Returned by [`multi_index::File::verify_checksum()`][crate::multi_index::File::verify_checksum()]. + pub type Error = crate::verify::checksum::Error; +} + +impl File { + /// Validate that our [`checksum()`][File::checksum()] matches the actual contents + /// of this index file, and return it if it does. + pub fn verify_checksum( + &self, + progress: impl Progress, + should_interrupt: &AtomicBool, + ) -> Result<gix_hash::ObjectId, checksum::Error> { + crate::verify::checksum_on_disk_or_mmap( + self.path(), + &self.data, + self.checksum(), + self.object_hash, + progress, + should_interrupt, + ) + } + + /// Similar to [`verify_integrity()`][File::verify_integrity()] but without any deep inspection of objects. + /// + /// Instead we only validate the contents of the multi-index itself. + pub fn verify_integrity_fast<P>( + &self, + progress: P, + should_interrupt: &AtomicBool, + ) -> Result<(gix_hash::ObjectId, P), integrity::Error> + where + P: Progress, + { + self.verify_integrity_inner( + progress, + should_interrupt, + false, + index::verify::integrity::Options::default(), + ) + .map_err(|err| match err { + index::traverse::Error::Processor(err) => err, + _ => unreachable!("BUG: no other error type is possible"), + }) + .map(|o| (o.actual_index_checksum, o.progress)) + } + + /// Similar to [`crate::Bundle::verify_integrity()`] but checks all contained indices and their packs. + /// + /// Note that it's considered a failure if an index doesn't have a corresponding pack. + pub fn verify_integrity<C, P, F>( + &self, + progress: P, + should_interrupt: &AtomicBool, + options: index::verify::integrity::Options<F>, + ) -> Result<integrity::Outcome<P>, index::traverse::Error<integrity::Error>> + where + P: Progress, + C: crate::cache::DecodeEntry, + F: Fn() -> C + Send + Clone, + { + self.verify_integrity_inner(progress, should_interrupt, true, options) + } + + fn verify_integrity_inner<C, P, F>( + &self, + mut progress: P, + should_interrupt: &AtomicBool, + deep_check: bool, + options: index::verify::integrity::Options<F>, + ) -> Result<integrity::Outcome<P>, index::traverse::Error<integrity::Error>> + where + P: Progress, + C: crate::cache::DecodeEntry, + F: Fn() -> C + Send + Clone, + { + let parent = self.path.parent().expect("must be in a directory"); + + let actual_index_checksum = self + .verify_checksum( + progress.add_child_with_id( + format!("{}: checksum", self.path.display()), + integrity::ProgressId::ChecksumBytes.into(), + ), + should_interrupt, + ) + .map_err(integrity::Error::from) + .map_err(index::traverse::Error::Processor)?; + + if let Some(first_invalid) = crate::verify::fan(&self.fan) { + return Err(index::traverse::Error::Processor(integrity::Error::Fan { + index: first_invalid, + })); + } + + if self.num_objects == 0 { + return Err(index::traverse::Error::Processor(integrity::Error::Empty)); + } + + let mut pack_traverse_statistics = Vec::new(); + + let operation_start = Instant::now(); + let mut total_objects_checked = 0; + let mut pack_ids_and_offsets = Vec::with_capacity(self.num_objects as usize); + { + let order_start = Instant::now(); + let mut progress = progress.add_child_with_id("checking oid order", gix_features::progress::UNKNOWN); + progress.init( + Some(self.num_objects as usize), + gix_features::progress::count("objects"), + ); + + for entry_index in 0..(self.num_objects - 1) { + let lhs = self.oid_at_index(entry_index); + let rhs = self.oid_at_index(entry_index + 1); + + if rhs.cmp(lhs) != Ordering::Greater { + return Err(index::traverse::Error::Processor(integrity::Error::OutOfOrder { + index: entry_index, + })); + } + let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index); + pack_ids_and_offsets.push((pack_id, entry_index)); + progress.inc(); + } + { + let entry_index = self.num_objects - 1; + let (pack_id, _) = self.pack_id_and_pack_offset_at_index(entry_index); + pack_ids_and_offsets.push((pack_id, entry_index)); + } + // sort by pack-id to allow handling all indices matching a pack while its open. + pack_ids_and_offsets.sort_by(|l, r| l.0.cmp(&r.0)); + progress.show_throughput(order_start); + }; + + progress.init( + Some(self.num_indices as usize), + gix_features::progress::count("indices"), + ); + + let mut pack_ids_slice = pack_ids_and_offsets.as_slice(); + + for (pack_id, index_file_name) in self.index_names.iter().enumerate() { + progress.set_name(index_file_name.display().to_string()); + progress.inc(); + + let mut bundle = None; + let index; + let index_path = parent.join(index_file_name); + let index = if deep_check { + bundle = crate::Bundle::at(index_path, self.object_hash) + .map_err(integrity::Error::from) + .map_err(index::traverse::Error::Processor)? + .into(); + bundle.as_ref().map(|b| &b.index).expect("just set") + } else { + index = Some( + index::File::at(index_path, self.object_hash) + .map_err(|err| integrity::Error::BundleInit(crate::bundle::init::Error::Index(err))) + .map_err(index::traverse::Error::Processor)?, + ); + index.as_ref().expect("just set") + }; + + let slice_end = pack_ids_slice.partition_point(|e| e.0 == pack_id as crate::data::Id); + let multi_index_entries_to_check = &pack_ids_slice[..slice_end]; + { + let offset_start = Instant::now(); + let mut offsets_progress = + progress.add_child_with_id("verify object offsets", integrity::ProgressId::ObjectOffsets.into()); + offsets_progress.init( + Some(pack_ids_and_offsets.len()), + gix_features::progress::count("objects"), + ); + pack_ids_slice = &pack_ids_slice[slice_end..]; + + for entry_id in multi_index_entries_to_check.iter().map(|e| e.1) { + let oid = self.oid_at_index(entry_id); + let (_, expected_pack_offset) = self.pack_id_and_pack_offset_at_index(entry_id); + let entry_in_bundle_index = index.lookup(oid).ok_or_else(|| { + index::traverse::Error::Processor(integrity::Error::OidNotFound { id: oid.to_owned() }) + })?; + let actual_pack_offset = index.pack_offset_at_index(entry_in_bundle_index); + if actual_pack_offset != expected_pack_offset { + return Err(index::traverse::Error::Processor( + integrity::Error::PackOffsetMismatch { + id: oid.to_owned(), + expected_pack_offset, + actual_pack_offset, + }, + )); + } + offsets_progress.inc(); + } + + if should_interrupt.load(std::sync::atomic::Ordering::Relaxed) { + return Err(index::traverse::Error::Processor(integrity::Error::Interrupted)); + } + offsets_progress.show_throughput(offset_start); + } + + total_objects_checked += multi_index_entries_to_check.len(); + + if let Some(bundle) = bundle { + progress.set_name(format!("Validating {}", index_file_name.display())); + let crate::bundle::verify::integrity::Outcome { + actual_index_checksum: _, + pack_traverse_outcome, + progress: returned_progress, + } = bundle + .verify_integrity(progress, should_interrupt, options.clone()) + .map_err(|err| { + use index::traverse::Error::*; + match err { + Processor(err) => Processor(integrity::Error::IndexIntegrity(err)), + VerifyChecksum(err) => VerifyChecksum(err), + Tree(err) => Tree(err), + TreeTraversal(err) => TreeTraversal(err), + PackDecode { id, offset, source } => PackDecode { id, offset, source }, + PackMismatch { expected, actual } => PackMismatch { expected, actual }, + PackObjectMismatch { + expected, + actual, + offset, + kind, + } => PackObjectMismatch { + expected, + actual, + offset, + kind, + }, + Crc32Mismatch { + expected, + actual, + offset, + kind, + } => Crc32Mismatch { + expected, + actual, + offset, + kind, + }, + Interrupted => Interrupted, + } + })?; + progress = returned_progress; + pack_traverse_statistics.push(pack_traverse_outcome); + } + } + + assert_eq!( + self.num_objects as usize, total_objects_checked, + "BUG: our slicing should allow to visit all objects" + ); + + progress.set_name("Validating multi-pack"); + progress.show_throughput(operation_start); + + Ok(integrity::Outcome { + actual_index_checksum, + pack_traverse_statistics, + progress, + }) + } +} diff --git a/vendor/gix-pack/src/multi_index/write.rs b/vendor/gix-pack/src/multi_index/write.rs new file mode 100644 index 000000000..314506401 --- /dev/null +++ b/vendor/gix-pack/src/multi_index/write.rs @@ -0,0 +1,244 @@ +use std::{ + convert::TryInto, + path::PathBuf, + sync::atomic::{AtomicBool, Ordering}, + time::{Instant, SystemTime}, +}; + +use gix_features::progress::Progress; + +use crate::multi_index; + +mod error { + /// The error returned by [multi_index::File::write_from_index_paths()][super::multi_index::File::write_from_index_paths()].. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Io(#[from] std::io::Error), + #[error("Interrupted")] + Interrupted, + #[error(transparent)] + OpenIndex(#[from] crate::index::init::Error), + } +} +pub use error::Error; + +/// An entry suitable for sorting and writing +pub(crate) struct Entry { + pub(crate) id: gix_hash::ObjectId, + pub(crate) pack_index: u32, + pub(crate) pack_offset: crate::data::Offset, + /// Used for sorting in case of duplicates + index_mtime: SystemTime, +} + +/// Options for use in [`multi_index::File::write_from_index_paths()`]. +pub struct Options { + /// The kind of hash to use for objects and to expect in the input files. + pub object_hash: gix_hash::Kind, +} + +/// The result of [`multi_index::File::write_from_index_paths()`]. +pub struct Outcome<P> { + /// The calculated multi-index checksum of the file at `multi_index_path`. + pub multi_index_checksum: gix_hash::ObjectId, + /// The input progress + pub progress: P, +} + +/// The progress ids used in [`write_from_index_paths()`][multi_index::File::write_from_index_paths()]. +/// +/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. +#[derive(Debug, Copy, Clone)] +pub enum ProgressId { + /// Counts each path in the input set whose entries we enumerate and write into the multi-index + FromPathsCollectingEntries, + /// The amount of bytes written as part of the multi-index. + BytesWritten, +} + +impl From<ProgressId> for gix_features::progress::Id { + fn from(v: ProgressId) -> Self { + match v { + ProgressId::FromPathsCollectingEntries => *b"MPCE", + ProgressId::BytesWritten => *b"MPBW", + } + } +} + +impl multi_index::File { + pub(crate) const SIGNATURE: &'static [u8] = b"MIDX"; + pub(crate) const HEADER_LEN: usize = 4 /*signature*/ + + 1 /*version*/ + + 1 /*object id version*/ + + 1 /*num chunks */ + + 1 /*num base files */ + + 4 /*num pack files*/; + + /// Create a new multi-index file for writing to `out` from the pack index files at `index_paths`. + /// + /// Progress is sent to `progress` and interruptions checked via `should_interrupt`. + pub fn write_from_index_paths<P>( + mut index_paths: Vec<PathBuf>, + out: impl std::io::Write, + mut progress: P, + should_interrupt: &AtomicBool, + Options { object_hash }: Options, + ) -> Result<Outcome<P>, Error> + where + P: Progress, + { + let out = gix_features::hash::Write::new(out, object_hash); + let (index_paths_sorted, index_filenames_sorted) = { + index_paths.sort(); + let file_names = index_paths + .iter() + .map(|p| PathBuf::from(p.file_name().expect("file name present"))) + .collect::<Vec<_>>(); + (index_paths, file_names) + }; + + let entries = { + let mut entries = Vec::new(); + let start = Instant::now(); + let mut progress = + progress.add_child_with_id("Collecting entries", ProgressId::FromPathsCollectingEntries.into()); + progress.init(Some(index_paths_sorted.len()), gix_features::progress::count("indices")); + + // This could be parallelized… but it's probably not worth it unless you have 500mio objects. + for (index_id, index) in index_paths_sorted.iter().enumerate() { + let mtime = index + .metadata() + .and_then(|m| m.modified()) + .unwrap_or(SystemTime::UNIX_EPOCH); + let index = crate::index::File::at(index, object_hash)?; + + entries.reserve(index.num_objects() as usize); + entries.extend(index.iter().map(|e| Entry { + id: e.oid, + pack_index: index_id as u32, + pack_offset: e.pack_offset, + index_mtime: mtime, + })); + progress.inc(); + if should_interrupt.load(Ordering::Relaxed) { + return Err(Error::Interrupted); + } + } + progress.show_throughput(start); + + let start = Instant::now(); + progress.set_name("Deduplicate"); + progress.init(Some(entries.len()), gix_features::progress::count("entries")); + entries.sort_by(|l, r| { + l.id.cmp(&r.id) + .then_with(|| l.index_mtime.cmp(&r.index_mtime).reverse()) + .then_with(|| l.pack_index.cmp(&r.pack_index)) + }); + entries.dedup_by_key(|e| e.id); + progress.inc_by(entries.len()); + progress.show_throughput(start); + if should_interrupt.load(Ordering::Relaxed) { + return Err(Error::Interrupted); + } + entries + }; + + let mut cf = gix_chunk::file::Index::for_writing(); + cf.plan_chunk( + multi_index::chunk::index_names::ID, + multi_index::chunk::index_names::storage_size(&index_filenames_sorted), + ); + cf.plan_chunk(multi_index::chunk::fanout::ID, multi_index::chunk::fanout::SIZE as u64); + cf.plan_chunk( + multi_index::chunk::lookup::ID, + multi_index::chunk::lookup::storage_size(entries.len(), object_hash), + ); + cf.plan_chunk( + multi_index::chunk::offsets::ID, + multi_index::chunk::offsets::storage_size(entries.len()), + ); + + let num_large_offsets = multi_index::chunk::large_offsets::num_large_offsets(&entries); + if let Some(num_large_offsets) = num_large_offsets { + cf.plan_chunk( + multi_index::chunk::large_offsets::ID, + multi_index::chunk::large_offsets::storage_size(num_large_offsets), + ); + } + + let mut write_progress = progress.add_child_with_id("Writing multi-index", ProgressId::BytesWritten.into()); + let write_start = Instant::now(); + write_progress.init( + Some(cf.planned_storage_size() as usize + Self::HEADER_LEN), + gix_features::progress::bytes(), + ); + let mut out = gix_features::progress::Write { + inner: out, + progress: write_progress, + }; + + let bytes_written = Self::write_header( + &mut out, + cf.num_chunks().try_into().expect("BUG: wrote more than 256 chunks"), + index_paths_sorted.len() as u32, + object_hash, + )?; + + { + progress.set_name("Writing chunks"); + progress.init(Some(cf.num_chunks()), gix_features::progress::count("chunks")); + + let mut chunk_write = cf.into_write(&mut out, bytes_written)?; + while let Some(chunk_to_write) = chunk_write.next_chunk() { + match chunk_to_write { + multi_index::chunk::index_names::ID => { + multi_index::chunk::index_names::write(&index_filenames_sorted, &mut chunk_write)? + } + multi_index::chunk::fanout::ID => multi_index::chunk::fanout::write(&entries, &mut chunk_write)?, + multi_index::chunk::lookup::ID => multi_index::chunk::lookup::write(&entries, &mut chunk_write)?, + multi_index::chunk::offsets::ID => { + multi_index::chunk::offsets::write(&entries, num_large_offsets.is_some(), &mut chunk_write)? + } + multi_index::chunk::large_offsets::ID => multi_index::chunk::large_offsets::write( + &entries, + num_large_offsets.expect("available if planned"), + &mut chunk_write, + )?, + unknown => unreachable!("BUG: forgot to implement chunk {:?}", std::str::from_utf8(&unknown)), + } + progress.inc(); + if should_interrupt.load(Ordering::Relaxed) { + return Err(Error::Interrupted); + } + } + } + + // write trailing checksum + let multi_index_checksum: gix_hash::ObjectId = out.inner.hash.digest().into(); + out.inner.inner.write_all(multi_index_checksum.as_slice())?; + out.progress.show_throughput(write_start); + + Ok(Outcome { + multi_index_checksum, + progress, + }) + } + + fn write_header( + mut out: impl std::io::Write, + num_chunks: u8, + num_indices: u32, + object_hash: gix_hash::Kind, + ) -> std::io::Result<usize> { + out.write_all(Self::SIGNATURE)?; + out.write_all(&[crate::multi_index::Version::V1 as u8])?; + out.write_all(&[object_hash as u8])?; + out.write_all(&[num_chunks])?; + out.write_all(&[0])?; /* unused number of base files */ + out.write_all(&num_indices.to_be_bytes())?; + + Ok(Self::HEADER_LEN) + } +} diff --git a/vendor/gix-pack/src/verify.rs b/vendor/gix-pack/src/verify.rs new file mode 100644 index 000000000..f985c8657 --- /dev/null +++ b/vendor/gix-pack/src/verify.rs @@ -0,0 +1,64 @@ +use std::{path::Path, sync::atomic::AtomicBool}; + +use gix_features::progress::Progress; + +/// +pub mod checksum { + /// Returned by various methods to verify the checksum of a memory mapped file that might also exist on disk. + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("Interrupted by user")] + Interrupted, + #[error("index checksum mismatch: expected {expected}, got {actual}")] + Mismatch { + expected: gix_hash::ObjectId, + actual: gix_hash::ObjectId, + }, + } +} + +/// Returns the `index` at which the following `index + 1` value is not an increment over the value at `index`. +pub fn fan(data: &[u32]) -> Option<usize> { + data.windows(2) + .enumerate() + .find_map(|(win_index, v)| (v[0] > v[1]).then_some(win_index)) +} + +/// Calculate the hash of the given kind by trying to read the file from disk at `data_path` or falling back on the mapped content in `data`. +/// `Ok(desired_hash)` or `Err(Some(actual_hash))` is returned if the hash matches or mismatches. +/// If the `Err(None)` is returned, the operation was interrupted. +pub fn checksum_on_disk_or_mmap( + data_path: &Path, + data: &[u8], + expected: gix_hash::ObjectId, + object_hash: gix_hash::Kind, + mut progress: impl Progress, + should_interrupt: &AtomicBool, +) -> Result<gix_hash::ObjectId, checksum::Error> { + let data_len_without_trailer = data.len() - object_hash.len_in_bytes(); + let actual = match gix_features::hash::bytes_of_file( + data_path, + data_len_without_trailer, + object_hash, + &mut progress, + should_interrupt, + ) { + Ok(id) => id, + Err(err) if err.kind() == std::io::ErrorKind::Interrupted => return Err(checksum::Error::Interrupted), + Err(_io_err) => { + let start = std::time::Instant::now(); + let mut hasher = gix_features::hash::hasher(object_hash); + hasher.update(&data[..data_len_without_trailer]); + progress.inc_by(data_len_without_trailer); + progress.show_throughput(start); + gix_hash::ObjectId::from(hasher.digest()) + } + }; + + if actual == expected { + Ok(actual) + } else { + Err(checksum::Error::Mismatch { actual, expected }) + } +} |