diff options
Diffstat (limited to 'vendor/gix-commitgraph/src/file/init.rs')
-rw-r--r-- | vendor/gix-commitgraph/src/file/init.rs | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/vendor/gix-commitgraph/src/file/init.rs b/vendor/gix-commitgraph/src/file/init.rs new file mode 100644 index 000000000..1d8f52e3f --- /dev/null +++ b/vendor/gix-commitgraph/src/file/init.rs @@ -0,0 +1,257 @@ +use std::{ + convert::{TryFrom, TryInto}, + path::Path, +}; + +use bstr::ByteSlice; +use memmap2::Mmap; + +use crate::{ + file::{ + ChunkId, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH, + EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE, + }, + File, +}; + +/// The error used in [`File::at()`]. +#[derive(thiserror::Error, Debug)] +#[allow(missing_docs)] +pub enum Error { + #[error("Commit-graph {:?} chunk contains {from_chunk} base graphs, but commit-graph file header claims {from_header} base graphs", BASE_GRAPHS_LIST_CHUNK_ID.as_bstr())] + BaseGraphMismatch { from_header: u8, from_chunk: u32 }, + #[error("Commit-graph {:?} chunk contains {chunk1_commits} commits, but {:?} chunk contains {chunk2_commits} commits", .chunk1_id.as_bstr(), .chunk2_id.as_bstr())] + CommitCountMismatch { + chunk1_id: ChunkId, + chunk1_commits: u32, + chunk2_id: ChunkId, + chunk2_commits: u32, + }, + #[error("{0}")] + Corrupt(String), + // This error case is disabled, as git allows extra garbage in the extra edges list? + // #[error("The last entry in commit-graph's extended edges list does is not marked as being terminal")] + // ExtraEdgesOverflow, + #[error("Could not open commit-graph file at '{}'", .path.display())] + Io { + #[source] + err: std::io::Error, + path: std::path::PathBuf, + }, + #[error("{0}")] + Trailer(String), + #[error("Commit-graph file uses unsupported hash version: {0}")] + UnsupportedHashVersion(u8), + #[error("Unsupported commit-graph file version: {0}")] + UnsupportedVersion(u8), + #[error(transparent)] + ChunkFileDecode(#[from] gix_chunk::file::decode::Error), + #[error(transparent)] + MissingChunk(#[from] gix_chunk::file::index::offset_by_kind::Error), + #[error("Commit-graph chunk {:?} has invalid size: {msg}", .id.as_bstr())] + InvalidChunkSize { id: ChunkId, msg: String }, +} + +const MIN_FILE_SIZE: usize = HEADER_LEN + + gix_chunk::file::Index::size_for_entries(3 /*OIDF, OIDL, CDAT*/) + + FAN_LEN * 4 /* FANOUT TABLE CHUNK OIDF */ + + gix_hash::Kind::shortest().len_in_bytes(); + +impl File { + /// Try to parse the commit graph file at `path`. + pub fn at(path: impl AsRef<Path>) -> Result<File, Error> { + Self::try_from(path.as_ref()) + } +} + +impl TryFrom<&Path> for File { + type Error = Error; + + fn try_from(path: &Path) -> Result<Self, Self::Error> { + let data = std::fs::File::open(path) + .and_then(|file| { + // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. + #[allow(unsafe_code)] + unsafe { + Mmap::map(&file) + } + }) + .map_err(|e| Error::Io { + err: e, + path: path.to_owned(), + })?; + let data_size = data.len(); + if data_size < MIN_FILE_SIZE { + return Err(Error::Corrupt( + "Commit-graph file too small even for an empty graph".to_owned(), + )); + } + + let mut ofs = 0; + if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE { + return Err(Error::Corrupt( + "Commit-graph file does not start with expected signature".to_owned(), + )); + } + ofs += SIGNATURE.len(); + + match data[ofs] { + 1 => (), + x => { + return Err(Error::UnsupportedVersion(x)); + } + }; + ofs += 1; + + let object_hash = gix_hash::Kind::try_from(data[ofs]).map_err(Error::UnsupportedHashVersion)?; + ofs += 1; + + let chunk_count = data[ofs]; + // Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make + // it redundant. + ofs += 1; + + let base_graph_count = data[ofs]; + ofs += 1; + + let chunks = gix_chunk::file::Index::from_bytes(&data, ofs, chunk_count as u32)?; + + let base_graphs_list_offset = chunks + .validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| { + let chunk_size = chunk_range.len(); + if chunk_size % object_hash.len_in_bytes() != 0 { + return Err(Error::InvalidChunkSize { + id: BASE_GRAPHS_LIST_CHUNK_ID, + msg: format!( + "chunk size {} is not a multiple of {}", + chunk_size, + object_hash.len_in_bytes() + ), + }); + } + let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes()) + .try_into() + .expect("base graph count to fit in 32-bits"); + if chunk_base_graph_count != u32::from(base_graph_count) { + return Err(Error::BaseGraphMismatch { + from_chunk: chunk_base_graph_count, + from_header: base_graph_count, + }); + } + Ok(chunk_range.start) + }) + .ok() + .transpose()?; + + let (commit_data_offset, commit_data_count) = + chunks.validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| { + let chunk_size = chunk_range.len(); + + let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH; + if chunk_size % entry_size != 0 { + return Err(Error::InvalidChunkSize { + id: COMMIT_DATA_CHUNK_ID, + msg: format!("chunk size {chunk_size} is not a multiple of {entry_size}"), + }); + } + Ok(( + chunk_range.start, + (chunk_size / entry_size) + .try_into() + .expect("number of commits in CDAT chunk to fit in 32 bits"), + )) + })??; + + let fan_offset = chunks.validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| { + let chunk_size = chunk_range.len(); + + let expected_size = 4 * FAN_LEN; + if chunk_size != expected_size { + return Err(Error::InvalidChunkSize { + id: OID_FAN_CHUNK_ID, + msg: format!("expected chunk length {expected_size}, got {chunk_size}"), + }); + } + Ok(chunk_range.start) + })??; + + let (oid_lookup_offset, oid_lookup_count) = + chunks.validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| { + let chunk_size = chunk_range.len(); + + if chunk_size % object_hash.len_in_bytes() != 0 { + return Err(Error::InvalidChunkSize { + id: OID_LOOKUP_CHUNK_ID, + msg: format!( + "chunk size {} is not a multiple of {}", + chunk_size, + object_hash.len_in_bytes() + ), + }); + } + Ok(( + chunk_range.start, + (chunk_size / object_hash.len_in_bytes()) + .try_into() + .expect("number of commits in OIDL chunk to fit in 32 bits"), + )) + })??; + + let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok(); + + let trailer = &data[chunks.highest_offset() as usize..]; + if trailer.len() != object_hash.len_in_bytes() { + return Err(Error::Trailer(format!( + "Expected commit-graph trailer to contain {} bytes, got {}", + object_hash.len_in_bytes(), + trailer.len() + ))); + } + + if base_graph_count > 0 && base_graphs_list_offset.is_none() { + return Err(gix_chunk::file::index::offset_by_kind::Error { + kind: BASE_GRAPHS_LIST_CHUNK_ID, + } + .into()); + } + + let (fan, _) = read_fan(&data[fan_offset..]); + if oid_lookup_count != fan[255] { + return Err(Error::CommitCountMismatch { + chunk1_id: OID_FAN_CHUNK_ID, + chunk1_commits: fan[255], + chunk2_id: OID_LOOKUP_CHUNK_ID, + chunk2_commits: oid_lookup_count, + }); + } + if commit_data_count != fan[255] { + return Err(Error::CommitCountMismatch { + chunk1_id: OID_FAN_CHUNK_ID, + chunk1_commits: fan[255], + chunk2_id: COMMIT_DATA_CHUNK_ID, + chunk2_commits: commit_data_count, + }); + } + Ok(File { + base_graph_count, + base_graphs_list_offset, + commit_data_offset, + data, + extra_edges_list_range, + fan, + oid_lookup_offset, + path: path.to_owned(), + hash_len: object_hash.len_in_bytes(), + object_hash, + }) + } +} + +// Copied from gix-odb/pack/index/init.rs +fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) { + let mut fan = [0; FAN_LEN]; + for (c, f) in d.chunks(4).zip(fan.iter_mut()) { + *f = u32::from_be_bytes(c.try_into().unwrap()); + } + (fan, FAN_LEN * 4) +} |