summaryrefslogtreecommitdiffstats
path: root/vendor/gix-pack/src/index/write/encode.rs
blob: f1195875c41352ecb8b2221b7906b78665e961f6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
use std::{cmp::Ordering, io};

pub(crate) const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff;
pub(crate) const HIGH_BIT: u32 = 0x8000_0000;

use gix_features::{
    hash,
    progress::{self, Progress},
};

use crate::index::{util::Count, V2_SIGNATURE};

pub(crate) fn write_to(
    out: impl io::Write,
    entries_sorted_by_oid: Vec<crate::cache::delta::Item<crate::index::write::TreeEntry>>,
    pack_hash: &gix_hash::ObjectId,
    kind: crate::index::Version,
    mut progress: impl Progress,
) -> io::Result<gix_hash::ObjectId> {
    use io::Write;
    assert_eq!(kind, crate::index::Version::V2, "Can only write V2 packs right now");
    assert!(
        entries_sorted_by_oid.len() <= u32::MAX as usize,
        "a pack cannot have more than u32::MAX objects"
    );

    // Write header
    let mut out = Count::new(std::io::BufWriter::with_capacity(
        8 * 4096,
        hash::Write::new(out, kind.hash()),
    ));
    out.write_all(V2_SIGNATURE)?;
    out.write_all(&(kind as u32).to_be_bytes())?;

    progress.init(Some(4), progress::steps());
    let start = std::time::Instant::now();
    let _info = progress.add_child_with_id("writing fan-out table", gix_features::progress::UNKNOWN);
    let fan_out = fanout(entries_sorted_by_oid.iter().map(|e| e.data.id.first_byte()));

    for value in fan_out.iter() {
        out.write_all(&value.to_be_bytes())?;
    }

    progress.inc();
    let _info = progress.add_child_with_id("writing ids", gix_features::progress::UNKNOWN);
    for entry in &entries_sorted_by_oid {
        out.write_all(entry.data.id.as_slice())?;
    }

    progress.inc();
    let _info = progress.add_child_with_id("writing crc32", gix_features::progress::UNKNOWN);
    for entry in &entries_sorted_by_oid {
        out.write_all(&entry.data.crc32.to_be_bytes())?;
    }

    progress.inc();
    let _info = progress.add_child_with_id("writing offsets", gix_features::progress::UNKNOWN);
    {
        let mut offsets64 = Vec::<u64>::new();
        for entry in &entries_sorted_by_oid {
            let offset: u32 = if entry.offset > LARGE_OFFSET_THRESHOLD {
                assert!(
                    offsets64.len() < LARGE_OFFSET_THRESHOLD as usize,
                    "Encoding breakdown - way too many 64bit offsets"
                );
                offsets64.push(entry.offset);
                ((offsets64.len() - 1) as u32) | HIGH_BIT
            } else {
                entry.offset as u32
            };
            out.write_all(&offset.to_be_bytes())?;
        }
        for value in offsets64 {
            out.write_all(&value.to_be_bytes())?;
        }
    }

    out.write_all(pack_hash.as_slice())?;

    let bytes_written_without_trailer = out.bytes;
    let mut out = out.inner.into_inner()?;
    let index_hash: gix_hash::ObjectId = out.hash.digest().into();
    out.inner.write_all(index_hash.as_slice())?;
    out.inner.flush()?;

    progress.inc();
    progress.show_throughput_with(
        start,
        (bytes_written_without_trailer + 20) as usize,
        progress::bytes().expect("unit always set"),
        progress::MessageLevel::Success,
    );

    Ok(index_hash)
}

pub(crate) fn fanout(iter: impl ExactSizeIterator<Item = u8>) -> [u32; 256] {
    let mut fan_out = [0u32; 256];
    let entries_len = iter.len() as u32;
    let mut iter = iter.enumerate();
    let mut idx_and_entry = iter.next();
    let mut upper_bound = 0;

    for (offset_be, byte) in fan_out.iter_mut().zip(0u8..=255) {
        *offset_be = match idx_and_entry.as_ref() {
            Some((_idx, first_byte)) => match first_byte.cmp(&byte) {
                Ordering::Less => unreachable!("ids should be ordered, and we make sure to keep ahead with them"),
                Ordering::Greater => upper_bound,
                Ordering::Equal => {
                    if byte == 255 {
                        entries_len
                    } else {
                        idx_and_entry = iter.find(|(_, first_byte)| *first_byte != byte);
                        upper_bound = idx_and_entry.as_ref().map_or(entries_len, |(idx, _)| *idx as u32);
                        upper_bound
                    }
                }
            },
            None => entries_len,
        };
    }

    fan_out
}