use std::cmp::Ordering; pub(crate) const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff; pub(crate) const HIGH_BIT: u32 = 0x8000_0000; pub(crate) fn fanout(iter: &mut dyn ExactSizeIterator) -> [u32; 256] { let mut fan_out = [0u32; 256]; let entries_len = iter.len() as u32; let mut iter = iter.enumerate(); let mut idx_and_entry = iter.next(); let mut upper_bound = 0; for (offset_be, byte) in fan_out.iter_mut().zip(0u8..=255) { *offset_be = match idx_and_entry.as_ref() { Some((_idx, first_byte)) => match first_byte.cmp(&byte) { Ordering::Less => unreachable!("ids should be ordered, and we make sure to keep ahead with them"), Ordering::Greater => upper_bound, Ordering::Equal => { if byte == 255 { entries_len } else { idx_and_entry = iter.find(|(_, first_byte)| *first_byte != byte); upper_bound = idx_and_entry.as_ref().map_or(entries_len, |(idx, _)| *idx as u32); upper_bound } } }, None => entries_len, }; } fan_out } #[cfg(feature = "streaming-input")] mod function { use gix_features::{ hash, progress::{self, DynNestedProgress}, }; use std::io; use super::{fanout, HIGH_BIT, LARGE_OFFSET_THRESHOLD}; use crate::index::V2_SIGNATURE; struct Count { bytes: u64, inner: W, } impl Count { fn new(inner: W) -> Self { Count { bytes: 0, inner } } } impl io::Write for Count where W: io::Write, { fn write(&mut self, buf: &[u8]) -> io::Result { let written = self.inner.write(buf)?; self.bytes += written as u64; Ok(written) } fn flush(&mut self) -> io::Result<()> { self.inner.flush() } } pub(crate) fn write_to( out: &mut dyn io::Write, entries_sorted_by_oid: Vec>, pack_hash: &gix_hash::ObjectId, kind: crate::index::Version, progress: &mut dyn DynNestedProgress, ) -> io::Result { use io::Write; assert_eq!(kind, crate::index::Version::V2, "Can only write V2 packs right now"); assert!( entries_sorted_by_oid.len() <= u32::MAX as usize, "a pack cannot have more than u32::MAX objects" ); // Write header let mut out = Count::new(std::io::BufWriter::with_capacity( 8 * 4096, hash::Write::new(out, kind.hash()), )); out.write_all(V2_SIGNATURE)?; out.write_all(&(kind as u32).to_be_bytes())?; progress.init(Some(4), progress::steps()); let start = std::time::Instant::now(); let _info = progress.add_child_with_id("writing fan-out table".into(), gix_features::progress::UNKNOWN); let fan_out = fanout(&mut entries_sorted_by_oid.iter().map(|e| e.data.id.first_byte())); for value in fan_out.iter() { out.write_all(&value.to_be_bytes())?; } progress.inc(); let _info = progress.add_child_with_id("writing ids".into(), gix_features::progress::UNKNOWN); for entry in &entries_sorted_by_oid { out.write_all(entry.data.id.as_slice())?; } progress.inc(); let _info = progress.add_child_with_id("writing crc32".into(), gix_features::progress::UNKNOWN); for entry in &entries_sorted_by_oid { out.write_all(&entry.data.crc32.to_be_bytes())?; } progress.inc(); let _info = progress.add_child_with_id("writing offsets".into(), gix_features::progress::UNKNOWN); { let mut offsets64 = Vec::::new(); for entry in &entries_sorted_by_oid { let offset: u32 = if entry.offset > LARGE_OFFSET_THRESHOLD { assert!( offsets64.len() < LARGE_OFFSET_THRESHOLD as usize, "Encoding breakdown - way too many 64bit offsets" ); offsets64.push(entry.offset); ((offsets64.len() - 1) as u32) | HIGH_BIT } else { entry.offset as u32 }; out.write_all(&offset.to_be_bytes())?; } for value in offsets64 { out.write_all(&value.to_be_bytes())?; } } out.write_all(pack_hash.as_slice())?; let bytes_written_without_trailer = out.bytes; let out = out.inner.into_inner()?; let index_hash: gix_hash::ObjectId = out.hash.digest().into(); out.inner.write_all(index_hash.as_slice())?; out.inner.flush()?; progress.inc(); progress.show_throughput_with( start, (bytes_written_without_trailer + 20) as usize, progress::bytes().expect("unit always set"), progress::MessageLevel::Success, ); Ok(index_hash) } } #[cfg(feature = "streaming-input")] pub(crate) use function::write_to;