diff --git a/src/main.rs b/src/main.rs index 64bb2ea..6a1b0f7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,7 @@ mod index; mod kind; mod log; mod object; +mod pack; mod repository; mod tree; @@ -64,6 +65,8 @@ enum Command { LsIndex, /// Write the index file WriteIndex, + /// Dump Pack Files + DumpPackFiles, } fn main() -> Result<(), Error> { @@ -112,6 +115,10 @@ fn main() -> Result<(), Error> { Ok(_) => (), Err(e) => eprintln!("Failed to write index: {}", e), }, + Command::DumpPackFiles => match repo.dump_pack_files() { + Ok(_) => (), + Err(e) => eprintln!("Failed to dump pack files: {}", e), + }, } Ok(()) diff --git a/src/pack.rs b/src/pack.rs new file mode 100644 index 0000000..0d8ad6d --- /dev/null +++ b/src/pack.rs @@ -0,0 +1,342 @@ +use std::{ + fs::File, + io::{BufReader, Cursor, Read, Seek, SeekFrom}, +}; + +use anyhow::Error; +use flate2::read::ZlibDecoder; + +use crate::repository::Repository; + +#[derive(Debug)] +#[allow(dead_code)] +struct PackHeader { + signature: [u8; 4], + version: u32, + num_objects: u32, +} + +#[derive(Debug)] +#[allow(dead_code)] +struct PackObject { + object_type: PackObjectType, + object_size: u32, + object_data: Vec, +} + +#[derive(Debug, PartialEq, Eq)] +enum PackObjectType { + Commit, + Tree, + Blob, + Tag, + OfsDelta, + RefDelta, +} + +impl PackObjectType { + fn from_u8(value: u8) -> Result { + match value { + 1 => Ok(PackObjectType::Commit), + 2 => Ok(PackObjectType::Tree), + 3 => Ok(PackObjectType::Blob), + 4 => Ok(PackObjectType::Tag), + 6 => Ok(PackObjectType::OfsDelta), + 7 => Ok(PackObjectType::RefDelta), + _ => Err(Error::msg("Unknown object type")), + } + } +} + +impl std::fmt::Display for PackObjectType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + PackObjectType::Commit => "commit", + PackObjectType::Tree => "tree", + PackObjectType::Blob => "blob", + PackObjectType::Tag => "tag", + PackObjectType::OfsDelta => "ofs-delta", + PackObjectType::RefDelta => "ref-delta", + }; + + write!(f, "{}", s) + } +} + +fn parse_pack_header(file: &mut File) -> Result { + let mut header = [0; 12]; + file.read_exact(&mut header)?; + + let signature: &[u8] = &header[0..4]; + if signature != b"PACK" { + return Err(Error::msg("Invalid pack file")); + } + + let version = u32::from_be_bytes([header[4], header[5], header[6], header[7]]); + if version != 2 { + return Err(Error::msg("Invalid pack file version")); + } + + let num_objects = u32::from_be_bytes([header[8], header[9], header[10], header[11]]); + let signature: [u8; 4] = signature[0..4].try_into().unwrap(); + + Ok(PackHeader { + signature, + version, + num_objects, + }) +} + +fn read_vli_le(file: &mut BufReader) -> Result +where + R: Read, +{ + let mut val: u32 = 0; + let mut shift = 0; + loop { + let mut byte = [0; 1]; + file.read_exact(&mut byte)?; + let byt = byte[0] as u32; + + val |= (byt & 0x7f) << shift; + shift += 7; + + if byt & 0x80 == 0 { + break; + } + } + + Ok(val) +} + +fn read_vli_be(file: &mut BufReader, offset: bool) -> Result +where + R: Read, +{ + let mut val: u32 = 0; + loop { + let mut byte = [0; 1]; + file.read_exact(&mut byte)?; + let byt = byte[0] as u32; + + val = (val << 7) | (byt & 0x7f); + if byt & 0x80 == 0 { + break; + } + + if offset { + val += 1; + } + } + + Ok(val) +} + +fn make_delta_obj( + file: &mut File, + base_obj: PackObject, + object_size: u32, +) -> Result { + let mut object_data = Vec::new(); + + let pos = file.seek(SeekFrom::Current(0))?; + let mut zlib_decoder = ZlibDecoder::new(&mut *file); + zlib_decoder.read_to_end(&mut object_data)?; + let read_bytes = zlib_decoder.total_in(); + file.seek(std::io::SeekFrom::Start(pos + read_bytes))?; + + assert_eq!(object_data.len(), object_size as usize); + + let mut fp2 = BufReader::new(Cursor::new(object_data.as_slice())); + + let _base_obj_size = read_vli_le(&mut fp2)?; + let patched_obj_size = read_vli_le(&mut fp2)?; + + // println!( + // "base_obj_size={}, obj_size={}", + // base_obj_size, patched_obj_size + // ); + + let mut obj_data = Vec::new(); + while fp2.seek(SeekFrom::Current(0))? < object_data.len() as u64 { + let mut byte = [0; 1]; + fp2.read_exact(&mut byte)?; + let byt = byte[0]; + + if byt == 0x00 { + continue; + } + + if byt & 0x80 != 0 { + // copy data from base object + let mut vals = [0; 6]; + for i in 0..6 { + let bmask = 1 << i; + if byt & bmask != 0 { + fp2.read_exact(&mut byte)?; + vals[i] = byte[0]; + } else { + vals[i] = 0; + } + } + + let start = u32::from_le_bytes(vals[0..4].try_into().expect("4 bytes")); + let nbytes = u16::from_le_bytes(vals[4..6].try_into().expect("2 bytes")); + let nbytes = if nbytes == 0 { 0x10000 } else { nbytes as u32 }; + + obj_data.extend_from_slice( + &base_obj.object_data[start as usize..(start + nbytes) as usize], + ); + } else { + // add new data + let nbytes = byt & 0x7f; + // println!("APPEND NEW BYTES #bytes={}", nbytes); + let mut data = vec![0; nbytes as usize]; + fp2.read_exact(&mut data)?; + obj_data.extend_from_slice(&data); + } + } + + // println!("Final object data: #bytes={}", obj_data.len()); + + assert_eq!(obj_data.len(), patched_obj_size as usize); + + Ok(PackObject { + object_type: base_obj.object_type, + object_size: patched_obj_size, + object_data: obj_data, + }) +} + +fn parse_pack_ofs_delta_object( + file: &mut File, + object_size: u32, + fpos: u64, +) -> Result { + // println!("pos: 0x{:x}", file.seek(SeekFrom::Current(0))?); + + let mut reader = BufReader::new(&mut *file); + let offset = read_vli_be(&mut reader, true)?; + let new_position = reader.stream_position()?; + file.seek(SeekFrom::Start(new_position))?; + + let base_obj_offset = fpos - offset as u64; + + // println!( + // "offset:0x{:x} base_obj_offset:0x{:x}", + // offset, base_obj_offset + // ); + + let prev_pos = file.seek(SeekFrom::Current(0))?; + file.seek(SeekFrom::Start(base_obj_offset))?; + + let base_obj = parse_pack_entry(file)?; + assert!(vec![ + PackObjectType::Commit, + PackObjectType::Tree, + PackObjectType::Blob, + PackObjectType::Tag + ] + .contains(&base_obj.object_type)); + + file.seek(SeekFrom::Start(prev_pos))?; + + make_delta_obj(file, base_obj, object_size) +} + +fn parse_pack_entry(file: &mut File) -> Result { + let object_pos = file.seek(SeekFrom::Current(0))?; + + let mut byte = [0; 1]; + file.read_exact(&mut byte)?; + let object_type: u8 = (byte[0] & 0x70) >> 4; + let mut object_data = Vec::new(); + + let mut object_size: u32 = (byte[0] & 0x0f) as u32; + let mut bshift = 4; + while (byte[0] & 0x80) == 0x80 { + file.read_exact(&mut byte)?; + object_size += (byte[0] as u32 & 0x7f) << bshift; + bshift += 7; + } + + println!( + "Reading object: fpos=0x{:x}, type:{} size:{}", + object_pos, + PackObjectType::from_u8(object_type)?, + object_size + ); + + match PackObjectType::from_u8(object_type)? { + PackObjectType::Commit + | PackObjectType::Tree + | PackObjectType::Blob + | PackObjectType::Tag => { + // get current file offset + let pos = file.seek(SeekFrom::Current(0))?; + let mut zlib_decoder = ZlibDecoder::new(&mut *file); + + zlib_decoder.read_to_end(&mut object_data)?; + let read_bytes = zlib_decoder.total_in(); + + file.seek(std::io::SeekFrom::Start(pos + read_bytes))?; + + assert_eq!(object_data.len(), object_size as usize); + } + PackObjectType::OfsDelta => { + return parse_pack_ofs_delta_object(file, object_size, object_pos); + } + PackObjectType::RefDelta => unimplemented!(), + } + + Ok(PackObject { + object_type: PackObjectType::from_u8(object_type)?, + object_size, + object_data, + }) +} + +impl Repository { + pub fn dump_pack_files(&self) -> Result<(), Error> { + let pack_dir = self.path.join(".git/objects/pack"); + + for entry in pack_dir.read_dir()? { + let entry = entry?; + let path = entry.file_name(); + let path_str = path.to_str().unwrap(); + if path_str.starts_with("pack-") && path_str.ends_with(".pack") { + let pack_id = &path_str[5..path_str.len() - 5]; + self.dump_pack_file(pack_id)?; + } + } + + Ok(()) + } + + pub fn dump_pack_file(&self, pack_id: &str) -> Result<(), Error> { + let file_path = self + .path + .join(format!(".git/objects/pack/pack-{}.pack", pack_id)); + + let mut file = File::open(file_path)?; + + let header = parse_pack_header(&mut file)?; + println!("{:?}", header); + + for _ in 0..header.num_objects { + let _obj = parse_pack_entry(&mut file)?; + // println!( + // "Read object: type={}, #bytes={}", + // obj.object_type, obj.object_size + // ); + // println!("{:?}", obj); + + // println!(); + } + + // At the end of the file, there should be a 20-byte SHA-1 checksum + // TBD + + Ok(()) + } +}