summaryrefslogtreecommitdiffstats
path: root/vendor/gix-commitgraph/src/file/init.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gix-commitgraph/src/file/init.rs')
-rw-r--r--vendor/gix-commitgraph/src/file/init.rs257
1 files changed, 257 insertions, 0 deletions
diff --git a/vendor/gix-commitgraph/src/file/init.rs b/vendor/gix-commitgraph/src/file/init.rs
new file mode 100644
index 000000000..1d8f52e3f
--- /dev/null
+++ b/vendor/gix-commitgraph/src/file/init.rs
@@ -0,0 +1,257 @@
+use std::{
+ convert::{TryFrom, TryInto},
+ path::Path,
+};
+
+use bstr::ByteSlice;
+use memmap2::Mmap;
+
+use crate::{
+ file::{
+ ChunkId, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH,
+ EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE,
+ },
+ File,
+};
+
+/// The error used in [`File::at()`].
+#[derive(thiserror::Error, Debug)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("Commit-graph {:?} chunk contains {from_chunk} base graphs, but commit-graph file header claims {from_header} base graphs", BASE_GRAPHS_LIST_CHUNK_ID.as_bstr())]
+ BaseGraphMismatch { from_header: u8, from_chunk: u32 },
+ #[error("Commit-graph {:?} chunk contains {chunk1_commits} commits, but {:?} chunk contains {chunk2_commits} commits", .chunk1_id.as_bstr(), .chunk2_id.as_bstr())]
+ CommitCountMismatch {
+ chunk1_id: ChunkId,
+ chunk1_commits: u32,
+ chunk2_id: ChunkId,
+ chunk2_commits: u32,
+ },
+ #[error("{0}")]
+ Corrupt(String),
+ // This error case is disabled, as git allows extra garbage in the extra edges list?
+ // #[error("The last entry in commit-graph's extended edges list does is not marked as being terminal")]
+ // ExtraEdgesOverflow,
+ #[error("Could not open commit-graph file at '{}'", .path.display())]
+ Io {
+ #[source]
+ err: std::io::Error,
+ path: std::path::PathBuf,
+ },
+ #[error("{0}")]
+ Trailer(String),
+ #[error("Commit-graph file uses unsupported hash version: {0}")]
+ UnsupportedHashVersion(u8),
+ #[error("Unsupported commit-graph file version: {0}")]
+ UnsupportedVersion(u8),
+ #[error(transparent)]
+ ChunkFileDecode(#[from] gix_chunk::file::decode::Error),
+ #[error(transparent)]
+ MissingChunk(#[from] gix_chunk::file::index::offset_by_kind::Error),
+ #[error("Commit-graph chunk {:?} has invalid size: {msg}", .id.as_bstr())]
+ InvalidChunkSize { id: ChunkId, msg: String },
+}
+
+const MIN_FILE_SIZE: usize = HEADER_LEN
+ + gix_chunk::file::Index::size_for_entries(3 /*OIDF, OIDL, CDAT*/)
+ + FAN_LEN * 4 /* FANOUT TABLE CHUNK OIDF */
+ + gix_hash::Kind::shortest().len_in_bytes();
+
+impl File {
+ /// Try to parse the commit graph file at `path`.
+ pub fn at(path: impl AsRef<Path>) -> Result<File, Error> {
+ Self::try_from(path.as_ref())
+ }
+}
+
+impl TryFrom<&Path> for File {
+ type Error = Error;
+
+ fn try_from(path: &Path) -> Result<Self, Self::Error> {
+ let data = std::fs::File::open(path)
+ .and_then(|file| {
+ // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
+ #[allow(unsafe_code)]
+ unsafe {
+ Mmap::map(&file)
+ }
+ })
+ .map_err(|e| Error::Io {
+ err: e,
+ path: path.to_owned(),
+ })?;
+ let data_size = data.len();
+ if data_size < MIN_FILE_SIZE {
+ return Err(Error::Corrupt(
+ "Commit-graph file too small even for an empty graph".to_owned(),
+ ));
+ }
+
+ let mut ofs = 0;
+ if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE {
+ return Err(Error::Corrupt(
+ "Commit-graph file does not start with expected signature".to_owned(),
+ ));
+ }
+ ofs += SIGNATURE.len();
+
+ match data[ofs] {
+ 1 => (),
+ x => {
+ return Err(Error::UnsupportedVersion(x));
+ }
+ };
+ ofs += 1;
+
+ let object_hash = gix_hash::Kind::try_from(data[ofs]).map_err(Error::UnsupportedHashVersion)?;
+ ofs += 1;
+
+ let chunk_count = data[ofs];
+ // Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make
+ // it redundant.
+ ofs += 1;
+
+ let base_graph_count = data[ofs];
+ ofs += 1;
+
+ let chunks = gix_chunk::file::Index::from_bytes(&data, ofs, chunk_count as u32)?;
+
+ let base_graphs_list_offset = chunks
+ .validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| {
+ let chunk_size = chunk_range.len();
+ if chunk_size % object_hash.len_in_bytes() != 0 {
+ return Err(Error::InvalidChunkSize {
+ id: BASE_GRAPHS_LIST_CHUNK_ID,
+ msg: format!(
+ "chunk size {} is not a multiple of {}",
+ chunk_size,
+ object_hash.len_in_bytes()
+ ),
+ });
+ }
+ let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes())
+ .try_into()
+ .expect("base graph count to fit in 32-bits");
+ if chunk_base_graph_count != u32::from(base_graph_count) {
+ return Err(Error::BaseGraphMismatch {
+ from_chunk: chunk_base_graph_count,
+ from_header: base_graph_count,
+ });
+ }
+ Ok(chunk_range.start)
+ })
+ .ok()
+ .transpose()?;
+
+ let (commit_data_offset, commit_data_count) =
+ chunks.validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| {
+ let chunk_size = chunk_range.len();
+
+ let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
+ if chunk_size % entry_size != 0 {
+ return Err(Error::InvalidChunkSize {
+ id: COMMIT_DATA_CHUNK_ID,
+ msg: format!("chunk size {chunk_size} is not a multiple of {entry_size}"),
+ });
+ }
+ Ok((
+ chunk_range.start,
+ (chunk_size / entry_size)
+ .try_into()
+ .expect("number of commits in CDAT chunk to fit in 32 bits"),
+ ))
+ })??;
+
+ let fan_offset = chunks.validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| {
+ let chunk_size = chunk_range.len();
+
+ let expected_size = 4 * FAN_LEN;
+ if chunk_size != expected_size {
+ return Err(Error::InvalidChunkSize {
+ id: OID_FAN_CHUNK_ID,
+ msg: format!("expected chunk length {expected_size}, got {chunk_size}"),
+ });
+ }
+ Ok(chunk_range.start)
+ })??;
+
+ let (oid_lookup_offset, oid_lookup_count) =
+ chunks.validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| {
+ let chunk_size = chunk_range.len();
+
+ if chunk_size % object_hash.len_in_bytes() != 0 {
+ return Err(Error::InvalidChunkSize {
+ id: OID_LOOKUP_CHUNK_ID,
+ msg: format!(
+ "chunk size {} is not a multiple of {}",
+ chunk_size,
+ object_hash.len_in_bytes()
+ ),
+ });
+ }
+ Ok((
+ chunk_range.start,
+ (chunk_size / object_hash.len_in_bytes())
+ .try_into()
+ .expect("number of commits in OIDL chunk to fit in 32 bits"),
+ ))
+ })??;
+
+ let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok();
+
+ let trailer = &data[chunks.highest_offset() as usize..];
+ if trailer.len() != object_hash.len_in_bytes() {
+ return Err(Error::Trailer(format!(
+ "Expected commit-graph trailer to contain {} bytes, got {}",
+ object_hash.len_in_bytes(),
+ trailer.len()
+ )));
+ }
+
+ if base_graph_count > 0 && base_graphs_list_offset.is_none() {
+ return Err(gix_chunk::file::index::offset_by_kind::Error {
+ kind: BASE_GRAPHS_LIST_CHUNK_ID,
+ }
+ .into());
+ }
+
+ let (fan, _) = read_fan(&data[fan_offset..]);
+ if oid_lookup_count != fan[255] {
+ return Err(Error::CommitCountMismatch {
+ chunk1_id: OID_FAN_CHUNK_ID,
+ chunk1_commits: fan[255],
+ chunk2_id: OID_LOOKUP_CHUNK_ID,
+ chunk2_commits: oid_lookup_count,
+ });
+ }
+ if commit_data_count != fan[255] {
+ return Err(Error::CommitCountMismatch {
+ chunk1_id: OID_FAN_CHUNK_ID,
+ chunk1_commits: fan[255],
+ chunk2_id: COMMIT_DATA_CHUNK_ID,
+ chunk2_commits: commit_data_count,
+ });
+ }
+ Ok(File {
+ base_graph_count,
+ base_graphs_list_offset,
+ commit_data_offset,
+ data,
+ extra_edges_list_range,
+ fan,
+ oid_lookup_offset,
+ path: path.to_owned(),
+ hash_len: object_hash.len_in_bytes(),
+ object_hash,
+ })
+ }
+}
+
+// Copied from gix-odb/pack/index/init.rs
+fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) {
+ let mut fan = [0; FAN_LEN];
+ for (c, f) in d.chunks(4).zip(fan.iter_mut()) {
+ *f = u32::from_be_bytes(c.try_into().unwrap());
+ }
+ (fan, FAN_LEN * 4)
+}