summaryrefslogtreecommitdiffstats
path: root/vendor/gix/src/repository/object.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
commit9835e2ae736235810b4ea1c162ca5e65c547e770 (patch)
tree3fcebf40ed70e581d776a8a4c65923e8ec20e026 /vendor/gix/src/repository/object.rs
parentReleasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff)
downloadrustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz
rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix/src/repository/object.rs')
-rw-r--r--vendor/gix/src/repository/object.rs54
1 files changed, 47 insertions, 7 deletions
diff --git a/vendor/gix/src/repository/object.rs b/vendor/gix/src/repository/object.rs
index bda1a54c3..f4592475f 100644
--- a/vendor/gix/src/repository/object.rs
+++ b/vendor/gix/src/repository/object.rs
@@ -1,5 +1,6 @@
#![allow(clippy::result_large_err)]
use std::convert::TryInto;
+use std::ops::DerefMut;
use gix_hash::ObjectId;
use gix_odb::{Find, FindExt, Write};
@@ -36,7 +37,7 @@ impl crate::Repository {
Ok(Object::from_data(id, kind, buf, self))
}
- /// Try to find the object with `id` or return `None` it it wasn't found.
+ /// Try to find the object with `id` or return `None` if it wasn't found.
pub fn try_find_object(&self, id: impl Into<ObjectId>) -> Result<Option<Object<'_>>, object::find::Error> {
let id = id.into();
if id == gix_hash::ObjectId::empty_tree(self.object_hash()) {
@@ -58,32 +59,71 @@ impl crate::Repository {
}
}
+ fn shared_empty_buf(&self) -> std::cell::RefMut<'_, Vec<u8>> {
+ let mut bufs = self.bufs.borrow_mut();
+ if bufs.last().is_none() {
+ bufs.push(Vec::with_capacity(512));
+ }
+ std::cell::RefMut::map(bufs, |bufs| {
+ let buf = bufs.last_mut().expect("we assure one is present");
+ buf.clear();
+ buf
+ })
+ }
+
/// Write the given object into the object database and return its object id.
+ ///
+ /// Note that we hash the object in memory to avoid storing objects that are already present. That way,
+ /// we avoid writing duplicate objects using slow disks that will eventually have to be garbage collected.
pub fn write_object(&self, object: impl gix_object::WriteTo) -> Result<Id<'_>, object::write::Error> {
+ let mut buf = self.shared_empty_buf();
+ object.write_to(buf.deref_mut())?;
+
+ let oid = gix_object::compute_hash(self.object_hash(), object.kind(), &buf);
+ if self.objects.contains(oid) {
+ return Ok(oid.attach(self));
+ }
+
self.objects
- .write(object)
+ .write_buf(object.kind(), &buf)
.map(|oid| oid.attach(self))
.map_err(Into::into)
}
/// Write a blob from the given `bytes`.
+ ///
+ /// We avoid writing duplicate objects to slow disks that will eventually have to be garbage collected by
+ /// pre-hashing the data, and checking if the object is already present.
pub fn write_blob(&self, bytes: impl AsRef<[u8]>) -> Result<Id<'_>, object::write::Error> {
+ let bytes = bytes.as_ref();
+ let oid = gix_object::compute_hash(self.object_hash(), gix_object::Kind::Blob, bytes);
+ if self.objects.contains(oid) {
+ return Ok(oid.attach(self));
+ }
self.objects
- .write_buf(gix_object::Kind::Blob, bytes.as_ref())
+ .write_buf(gix_object::Kind::Blob, bytes)
.map(|oid| oid.attach(self))
}
/// Write a blob from the given `Read` implementation.
+ ///
+ /// Note that we hash the object in memory to avoid storing objects that are already present. That way,
+ /// we avoid writing duplicate objects using slow disks that will eventually have to be garbage collected.
+ ///
+ /// If that is prohibitive, use the object database directly.
pub fn write_blob_stream(
&self,
mut bytes: impl std::io::Read + std::io::Seek,
) -> Result<Id<'_>, object::write::Error> {
- let current = bytes.stream_position()?;
- let len = bytes.seek(std::io::SeekFrom::End(0))? - current;
- bytes.seek(std::io::SeekFrom::Start(current))?;
+ let mut buf = self.shared_empty_buf();
+ std::io::copy(&mut bytes, buf.deref_mut())?;
+ let oid = gix_object::compute_hash(self.object_hash(), gix_object::Kind::Blob, &buf);
+ if self.objects.contains(oid) {
+ return Ok(oid.attach(self));
+ }
self.objects
- .write_stream(gix_object::Kind::Blob, len, bytes)
+ .write_buf(gix_object::Kind::Blob, &buf)
.map(|oid| oid.attach(self))
}