Compare commits

..

11 Commits

Author SHA1 Message Date
a0cbb8dcda feat: basic packfile retrieval
All checks were successful
CI checks / Clippy (push) Successful in 50s
CI checks / Format (push) Successful in 26s
2025-02-11 22:18:32 +01:00
ae3c1b23af feat: parsing single pack-file
All checks were successful
CI checks / Clippy (push) Successful in 30s
CI checks / Format (push) Successful in 26s
2025-02-11 19:23:08 +01:00
de4c366ebb feat: adding hash-object
All checks were successful
CI checks / Clippy (push) Successful in 27s
CI checks / Format (push) Successful in 25s
2025-02-08 23:16:24 +01:00
daf5fa3272 feat: parse idx files
Some checks failed
CI checks / Clippy (push) Failing after 26s
CI checks / Format (push) Successful in 25s
2025-02-08 19:07:58 +01:00
6cd2907f0d refactor: bufreaders and zlib
All checks were successful
CI checks / Clippy (push) Successful in 27s
CI checks / Format (push) Successful in 39s
2025-02-08 16:49:00 +01:00
2a4a411341 feat: adding pack files support
Some checks failed
CI checks / Clippy (push) Failing after 27s
CI checks / Format (push) Successful in 25s
2025-02-08 14:55:49 +01:00
1d8009cad1 feat(log): show first log line
All checks were successful
CI checks / Clippy (push) Successful in 27s
CI checks / Format (push) Successful in 25s
2025-02-06 23:43:25 +01:00
8c6994986c feat: adding write-index
All checks were successful
CI checks / Clippy (push) Successful in 28s
CI checks / Format (push) Successful in 41s
2025-02-06 23:34:48 +01:00
bed1464bb9 feat: adding ls-index
All checks were successful
CI checks / Clippy (push) Successful in 29s
CI checks / Format (push) Successful in 24s
2025-02-06 08:18:57 +01:00
f047bb5181 feat: adding log
All checks were successful
CI checks / Clippy (push) Successful in 26s
CI checks / Format (push) Successful in 25s
2025-02-05 23:00:17 +01:00
62af81b28f feat: adding commit, show commands
All checks were successful
CI checks / Clippy (push) Successful in 26s
CI checks / Format (push) Successful in 25s
2025-02-05 21:55:05 +01:00
11 changed files with 2622 additions and 15 deletions

1476
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -8,5 +8,9 @@ anyhow = "1.0.95"
clap = { version = "4.5.27", features = ["derive", "string"] }
flate2 = "1.0.35"
hex = "0.4.3"
nom = "8.0.0"
reqwest = "0.12.12"
sha1 = "0.10.6"
thiserror = "2.0.11"
tokio = { version = "1.43.0", features = ["full"] }
walkdir = "2.5.0"

108
src/commit.rs Normal file
View File

@ -0,0 +1,108 @@
use std::fs::{read_to_string, File};
use anyhow::{Context, Result};
use hex::FromHex;
use crate::{kind::Kind, repository::Repository};
impl Repository {
pub fn read_head(&self) -> Result<String> {
let head_path = self.path.join(".git").join("HEAD");
read_to_string(head_path).context("reading head")
}
pub fn current_branch(&self) -> Result<String> {
let head = self.read_head()?;
Ok(head
.trim_start_matches("ref: refs/heads/")
.trim_end()
.to_string())
}
pub fn current_commit(&self) -> Result<[u8; 20]> {
let current_branch = self.current_branch()?;
let branch_path = self
.path
.join(".git")
.join("refs")
.join("heads")
.join(&current_branch);
let r = read_to_string(branch_path).context("could not read current branch")?;
let r = r.trim();
Ok(<[u8; 20]>::from_hex(r)?)
}
pub fn has_current_commit(&self) -> bool {
self.current_commit().is_ok()
}
pub fn set_current_commit(&self, hash: &[u8; 20]) -> Result<()> {
let current_branch = self
.current_branch()
.context("could not find current branch")?;
let branch_path = self.path.join(".git").join("refs").join("heads");
if !branch_path.exists() {
std::fs::create_dir_all(&branch_path)?;
}
let branch_path = branch_path.join(&current_branch);
// file does not exist
if !branch_path.exists() {
File::create(&branch_path)?;
}
std::fs::write(branch_path, hex::encode(hash))?;
Ok(())
}
pub fn commit(&self, message: &str) -> Result<[u8; 20]> {
let has_current_commit = self.has_current_commit();
let mut out: Vec<u8> = Vec::new();
let tree_hash = self
.write_tree(&self.path)
.context("could not write_tree")?;
out.extend_from_slice(b"tree ");
out.extend_from_slice(hex::encode(tree_hash).as_bytes());
out.push(b'\n');
if has_current_commit {
let current_commit_id = self.current_commit()?;
out.extend_from_slice(b"parent ");
out.extend_from_slice(hex::encode(current_commit_id).as_bytes());
out.push(b'\n');
}
out.push(b'\n');
out.extend_from_slice(message.as_bytes());
out.push(b'\n');
let hash = self.write_object(Kind::Commit, &out).context("Write")?;
// update current branch's commit id
self.set_current_commit(&hash)?;
self.write_index()?;
Ok(hash)
}
pub fn show(&self, hash: Option<String>) -> Result<()> {
let mut commit = if let Some(hash) = hash {
self.read_object(&hash)?
} else {
let current_commit = self.current_commit()?;
self.read_object(&hex::encode(current_commit))?
};
println!("{}", commit.string()?);
Ok(())
}
}

167
src/http.rs Normal file
View File

@ -0,0 +1,167 @@
use std::io::Write;
use anyhow::{Error, Result};
use nom::AsBytes;
use reqwest::Client;
pub async fn clone(repo: &str) -> Result<(), Error> {
let (size, refs) = get_refs(repo).await?;
println!("Refs:");
for (sha1, name) in refs.iter() {
println!("{} {}", sha1, name);
}
println!("Downloaded file size: {}", size);
Ok(())
}
pub fn parse_refs(input: &[u8]) -> Result<Vec<(String, String)>> {
let mut refs = Vec::new();
let mut index: usize = 0;
loop {
if index >= input.len() {
break;
}
// pick the next 4 bytes and convert to u32 from hex
let mut bytes = [0; 4];
bytes.copy_from_slice(&input[index..index + 4]);
let hex_str = std::str::from_utf8(&bytes)?;
let res = usize::from_str_radix(hex_str, 16)?;
if res == 0 {
index += 4;
continue;
}
if input[index + 4] == b'#' {
index += res;
continue;
}
let mut sha1_bytes = [0; 40];
sha1_bytes.copy_from_slice(&input[index + 4..index + 44]);
let idx_0 = input[index + 45..index + res - 1]
.iter()
.position(|&x| x == 0);
let sha1 = std::str::from_utf8(&sha1_bytes)?;
let name = if let Some(idx_0) = idx_0 {
std::str::from_utf8(&input[index + 45..index + 45 + idx_0])?
} else {
std::str::from_utf8(&input[index + 45..index + res - 1])?
};
refs.push((name.to_string(), sha1.to_string()));
index += res;
}
Ok(refs)
}
pub async fn get_refs(repo_url: &str) -> Result<(usize, Vec<(String, String)>), Error> {
let info_refs_url = format!("{}/info/refs?service=git-upload-pack", repo_url);
let client = Client::new();
let response = client
.get(&info_refs_url)
.header("User-Agent", "git/2.30.0")
.send()
.await?;
response.error_for_status_ref()?;
let content = response.bytes().await?;
let refs = parse_refs(&content)?;
get_packfile(repo_url, refs).await
}
pub fn packet_line(data: &str) -> Vec<u8> {
let length = format!("{:04x}", data.len() + 4);
let mut line = Vec::new();
line.extend_from_slice(length.as_bytes());
line.extend_from_slice(data.as_bytes());
line
}
pub async fn get_packfile(
repo_url: &str,
refs: Vec<(String, String)>,
) -> Result<(usize, Vec<(String, String)>), Error> {
let upload_pack_url = format!("{}/git-upload-pack", repo_url);
let mut payload: Vec<u8> = Vec::new();
payload.extend(packet_line("command=fetch").as_slice());
payload.extend(packet_line("agent=git/2.30.0").as_slice());
payload.extend(packet_line("object-format=sha1").as_slice());
payload.extend("0001".as_bytes());
payload.extend(packet_line("ofs-delta").as_slice());
payload.extend(packet_line("no-progress").as_slice());
for (_, sha1) in refs.iter() {
let want = format!("want {}\n", sha1);
payload.extend(packet_line(want.as_str()).as_slice());
}
payload.extend("0000".as_bytes());
payload.extend(packet_line("done").as_slice());
let client = Client::new();
let response = client
.post(&upload_pack_url)
.header("User-Agent", "git/2.30.0")
.header("Content-Type", "application/x-git-upload-pack-request")
.header("Accept-Encoding", "deflate")
.header("Accept", "application/x-git-upload-pack-result")
.header("Git-Protocol", "version=2")
.body(payload)
.send()
.await?;
response.error_for_status_ref()?;
let content = response.bytes().await?;
decode_git_response(content.as_bytes())?;
Ok((content.len(), refs))
}
fn decode_git_response(content: &[u8]) -> Result<(), Error> {
let mut cursor = 0;
let mut pack_data = Vec::new();
while cursor < content.len() {
let length_str = std::str::from_utf8(&content[cursor..cursor + 4])?;
cursor += 4;
let length = usize::from_str_radix(length_str, 16)?;
if length == 0 {
break;
}
let payload = &content[cursor..cursor + length - 4];
cursor += length - 4;
let side_band = payload[0];
let data = &payload[1..];
if side_band == 1 {
pack_data.extend(data);
} else if side_band == 2 {
println!("Progress: {}", std::str::from_utf8(data)?);
} else if side_band == 3 {
println!("Error: {}", std::str::from_utf8(data)?);
}
}
if !pack_data.is_empty() {
let mut packfile = std::fs::File::create("downloaded.pack")?;
packfile.write_all(&pack_data)?;
println!("Packfile saved as 'downloaded.pack'");
}
Ok(())
}

256
src/index.rs Normal file
View File

@ -0,0 +1,256 @@
use std::{os::linux::fs::MetadataExt, path::Path};
use nom::{
bytes::complete::take,
number::complete::{be_u16, be_u32},
IResult, Parser,
};
use anyhow::{anyhow, Error, Result};
use sha1::{Digest, Sha1};
use walkdir::WalkDir;
use crate::repository::Repository;
#[derive(Debug)]
#[allow(dead_code)]
struct IndexHeader {
signature: [u8; 4], // "DIRC"
version: u32, // 2, 3, or 4
entries_count: u32,
}
#[derive(Debug)]
#[allow(dead_code)]
struct IndexEntry {
ctime_s: u32,
ctime_n: u32,
mtime_s: u32,
mtime_n: u32,
dev: u32,
ino: u32,
mode: u32,
uid: u32,
gid: u32,
size: u32,
sha1: [u8; 20],
flags: u16,
file_path: String,
}
#[derive(Debug)]
#[allow(dead_code)]
struct Index {
header: IndexHeader,
entries: Vec<IndexEntry>,
}
fn parse_index(input: &[u8]) -> IResult<&[u8], Index> {
let (mut input, header) = parse_header(input)?;
let mut entries = Vec::with_capacity(header.entries_count as usize);
for _ in 0..header.entries_count {
let (remaining, entry) = parse_entry(input)?;
entries.push(entry);
input = remaining;
}
Ok((input, Index { header, entries }))
}
fn parse_header(input: &[u8]) -> IResult<&[u8], IndexHeader> {
let (input, (signature, version, entries_count)) =
(take(4usize), be_u32, be_u32).parse(input)?;
let mut sig = [0u8; 4];
sig.copy_from_slice(signature);
Ok((
input,
IndexHeader {
signature: sig,
version,
entries_count,
},
))
}
fn parse_entry(input: &[u8]) -> IResult<&[u8], IndexEntry> {
let start_input_len = input.len();
let (
input,
(ctime_s, ctime_n, mtime_s, mtime_n, dev, ino, mode, uid, gid, size, sha1_bytes, flags),
) = (
be_u32,
be_u32,
be_u32,
be_u32,
be_u32,
be_u32,
be_u32,
be_u32,
be_u32,
be_u32,
take(20usize),
be_u16,
)
.parse(input)?;
let current_input_len = input.len();
let path_len = flags & 0xFFF;
let (input, path_bytes) = take(path_len as usize)(input)?;
let file_path = String::from_utf8_lossy(path_bytes).into_owned();
// between 1 and 8 NUL bytes to pad the entry.
let padding_len = 8 - ((start_input_len - current_input_len) + path_len as usize) % 8;
let (input, _) = take(padding_len)(input)?;
let mut sha1 = [0u8; 20];
sha1.copy_from_slice(sha1_bytes);
Ok((
input,
IndexEntry {
ctime_s,
ctime_n,
mtime_s,
mtime_n,
dev,
ino,
mode,
uid,
gid,
size,
sha1,
flags,
file_path,
},
))
}
impl Index {
pub fn read_from_file(path: &Path) -> Result<Self, Error> {
let content = std::fs::read(path)?;
let (_remaining, index) =
parse_index(&content).map_err(|e| anyhow!("Failed to parse index: {}", e))?;
Ok(index)
}
}
impl Repository {
pub fn read_index(&self) -> Result<()> {
let index_path = self.path.join(".git").join("index");
let index = Index::read_from_file(&index_path)?;
for entry in index.entries {
println!("{} {}", hex::encode(entry.sha1), entry.file_path);
}
Ok(())
}
pub fn write_index(&self) -> Result<()> {
let index_path = self.path.join(".git").join("index");
// list all files in the repository
let files = list_all_files(&self.path, &self.ignore)?;
let index = Index {
header: IndexHeader {
signature: *b"DIRC",
version: 2,
entries_count: files.len() as u32,
},
entries: Vec::new(),
};
let mut content = Vec::new();
content.extend_from_slice(&index.header.signature);
content.extend_from_slice(&index.header.version.to_be_bytes());
content.extend_from_slice(&index.header.entries_count.to_be_bytes());
for file in files {
let metadata = std::fs::metadata(self.path.join(file.clone()))?;
let entry = IndexEntry {
ctime_s: metadata.st_ctime() as u32,
ctime_n: metadata.st_ctime_nsec() as u32,
mtime_s: metadata.st_mtime() as u32,
mtime_n: metadata.st_mtime_nsec() as u32,
dev: metadata.st_dev() as u32,
ino: metadata.st_ino() as u32,
mode: metadata.st_mode(),
uid: metadata.st_uid(),
gid: metadata.st_gid(),
size: metadata.st_size() as u32,
sha1: hash_file(&self.path.join(file.clone()))?,
flags: 0,
file_path: file,
};
let mut entry_content = Vec::new();
entry_content.extend_from_slice(&entry.ctime_s.to_be_bytes());
entry_content.extend_from_slice(&entry.ctime_n.to_be_bytes());
entry_content.extend_from_slice(&entry.mtime_s.to_be_bytes());
entry_content.extend_from_slice(&entry.mtime_n.to_be_bytes());
entry_content.extend_from_slice(&entry.dev.to_be_bytes());
entry_content.extend_from_slice(&entry.ino.to_be_bytes());
entry_content.extend_from_slice(&entry.mode.to_be_bytes());
entry_content.extend_from_slice(&entry.uid.to_be_bytes());
entry_content.extend_from_slice(&entry.gid.to_be_bytes());
entry_content.extend_from_slice(&entry.size.to_be_bytes());
entry_content.extend_from_slice(&entry.sha1);
//entry_content.extend_from_slice(&entry.flags.to_be_bytes());
let path_bytes = entry.file_path.as_bytes();
entry_content.extend_from_slice(&(path_bytes.len() as u16).to_be_bytes());
entry_content.extend_from_slice(path_bytes);
// between 1 and 8 NUL bytes to pad the entry.
let padding_len = 8 - entry_content.len() % 8;
entry_content.extend(vec![0u8; padding_len]);
content.extend(entry_content);
}
std::fs::write(index_path, content)?;
Ok(())
}
}
pub fn list_all_files(path: &Path, ignore_list: &[String]) -> Result<Vec<String>> {
let mut files = Vec::new();
for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
if entry.file_type().is_file() {
if ignore_list.iter().any(|i| entry.path().ends_with(i)) {
continue;
}
let s = entry.path().to_path_buf().to_str().unwrap().to_string();
let s = s.strip_prefix(path.to_str().unwrap()).unwrap().to_string();
if ignore_list.iter().any(|i| s.starts_with(i)) {
continue;
}
files.push(s.strip_prefix("/").unwrap().to_string());
}
}
files.sort();
Ok(files)
}
fn hash_file(path: &Path) -> Result<[u8; 20]> {
let content = std::fs::read(path)?;
let mut hasher = Sha1::new();
hasher.update(format!("blob {}\0", content.len()).as_bytes());
hasher.update(content);
Ok(hasher.finalize().into())
}

View File

@ -1,3 +1,5 @@
use std::fmt;
use anyhow::{anyhow, Result};
#[derive(Debug)]
@ -30,13 +32,16 @@ impl Kind {
Kind::Symlink => "120000",
}
}
}
pub fn string(&self) -> &str {
match self {
impl fmt::Display for Kind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let kind = match self {
Kind::Blob(_) => "blob",
Kind::Commit => "commit",
Kind::Tree => "tree",
Kind::Symlink => "symlink",
}
};
write!(f, "{}", kind)
}
}

36
src/log.rs Normal file
View File

@ -0,0 +1,36 @@
use crate::repository::Repository;
use anyhow::Result;
use hex::FromHex;
impl Repository {
pub fn log(&self) -> Result<()> {
let mut current_commit = self.current_commit()?;
loop {
let mut commit = self.read_object(&hex::encode(current_commit))?;
let commit_desc = commit.string()?;
let lines = commit_desc.lines().collect::<Vec<&str>>();
// find the first empty line
let first_empty_line = lines.iter().position(|line| line.is_empty());
println!(
"{} {}",
hex::encode(current_commit),
lines[first_empty_line.unwrap() + 1]
);
let parent_commit_id = lines.iter().find(|line| line.starts_with("parent "));
if parent_commit_id.is_none() {
break;
}
let parent_commit_id = parent_commit_id.unwrap();
current_commit = <[u8; 20]>::from_hex(parent_commit_id.split_once(' ').unwrap().1)?;
}
Ok(())
}
}

View File

@ -1,16 +1,23 @@
use anyhow::{Error, Result};
use object::hash_object;
use repository::default_init_path;
use std::path::PathBuf;
use clap::Parser;
use clap::Subcommand;
mod commit;
mod error;
mod http;
mod index;
mod kind;
mod log;
mod object;
mod pack;
mod repository;
mod tree;
use crate::http::clone;
use crate::repository::Repository;
#[derive(Parser)]
@ -43,9 +50,49 @@ enum Command {
/// The path to write
path: PathBuf,
},
/// Commit current changes
Commit {
/// The commit message
message: String,
},
/// Get the current branch
Branch,
/// Get the latest commit
Show {
/// The commit to show
hash: Option<String>,
},
/// Show the commit log
Log,
/// List the index entries
LsIndex,
/// Write the index file
WriteIndex,
/// Dump a Pack File
DumpPack {
/// The pack file to dump
file: PathBuf,
},
/// Dump Pack Files
DumpPackFiles,
/// Dump Pack Index file
DumpPackIndexFile {
/// The pack index file to dump
pack_id: String,
},
/// Hash an object
HashObject {
/// The object to hash
file: PathBuf,
},
Clone {
/// The repository to clone
repo: String,
},
}
fn main() -> Result<(), Error> {
#[tokio::main]
async fn main() -> Result<(), Error> {
let cli = Cli::parse();
let mut repo = Repository::new()?;
@ -67,6 +114,50 @@ fn main() -> Result<(), Error> {
Ok(hash) => println!("{}", hex::encode(hash)),
Err(e) => eprintln!("Failed to write tree: {}", e),
},
Command::Commit { message } => match repo.commit(&message) {
Ok(hash) => println!("{}", hex::encode(hash)),
Err(e) => eprintln!("Failed to commit: {}", e),
},
Command::Branch => match repo.current_branch() {
Ok(branch) => println!("{}", branch),
Err(e) => eprintln!("Failed to get branch: {}", e),
},
Command::Show { hash } => match repo.show(hash) {
Ok(_) => (),
Err(e) => eprintln!("Failed to show: {}", e),
},
Command::Log => match repo.log() {
Ok(_) => (),
Err(e) => eprintln!("Failed to show log: {}", e),
},
Command::LsIndex => match repo.read_index() {
Ok(_) => (),
Err(e) => eprintln!("Failed to list index: {}", e),
},
Command::WriteIndex => match repo.write_index() {
Ok(_) => (),
Err(e) => eprintln!("Failed to write index: {}", e),
},
Command::DumpPackFiles => match repo.dump_pack_files() {
Ok(_) => (),
Err(e) => eprintln!("Failed to dump pack files: {}", e),
},
Command::DumpPack { file } => match repo.dump_pack(&file) {
Ok(_) => (),
Err(e) => eprintln!("Failed to dump pack: {}", e),
},
Command::DumpPackIndexFile { pack_id } => match repo.dump_pack_index_file(&pack_id) {
Ok(_) => (),
Err(e) => eprintln!("Failed to dump pack index file: {}", e),
},
Command::HashObject { file } => match hash_object(&file) {
Ok(hash) => println!("{}", hex::encode(hash)),
Err(e) => eprintln!("Failed to hash object: {}", e),
},
Command::Clone { repo } => match clone(&repo).await {
Ok(_) => (),
Err(e) => eprintln!("Failed to clone: {}", e),
},
}
Ok(())

View File

@ -2,6 +2,7 @@ use crate::repository::Repository;
use crate::{error::RuntimeError, kind::Kind};
use anyhow::{anyhow, Context, Result};
use flate2::{write::ZlibEncoder, Compression};
use sha1::{Digest, Sha1};
use std::io::Write;
use std::{
@ -77,12 +78,12 @@ impl Repository {
let content = std::fs::read(file)?;
Ok(self.write_object(Kind::Blob(false), &content)?)
self.write_object(Kind::Blob(false), &content)
}
pub fn write_object(&self, kind: Kind, content: &[u8]) -> Result<[u8; 20]> {
let mut hasher = Sha1::new();
hasher.update(format!("{} {}\0", kind.string(), content.len()).as_bytes());
hasher.update(format!("{} {}\0", kind, content.len()).as_bytes());
hasher.update(content);
let hash = hasher.finalize().into();
let hash_str = hex::encode(hash);
@ -100,9 +101,8 @@ impl Repository {
let file_out_fd = File::create(target_file).context("could not open target file")?;
let mut zlib_out = ZlibEncoder::new(file_out_fd, Compression::default());
write!(zlib_out, "{} {}\0", kind.string(), content.len())
.context("could not write header")?;
zlib_out.write(content)?;
write!(zlib_out, "{} {}\0", kind, content.len()).context("could not write header")?;
zlib_out.write_all(content)?;
zlib_out
.finish()
.context("could not compress or write file")?;
@ -111,6 +111,18 @@ impl Repository {
}
}
pub fn hash_object(file: &Path) -> Result<[u8; 20]> {
let content = std::fs::read(file)?;
let kind = Kind::Blob(false);
let mut hasher = Sha1::new();
hasher.update(format!("{} {}\0", kind, content.len()).as_bytes());
hasher.update(content);
let hash = hasher.finalize().into();
Ok(hash)
}
fn is_path_in_repo(repo_path: &Path, file_path: &Path) -> Result<bool> {
// Convert both paths to absolute paths
let repo_canonical = repo_path.canonicalize()?;
@ -178,7 +190,7 @@ impl<R: BufRead> Object<R> {
format!(
"{:0>6} {} {} {:name_len$}",
entry.mode,
entry.kind.string(),
entry.kind,
hash,
entry.name,
name_len = max_name_len
@ -193,3 +205,31 @@ impl<R: BufRead> Object<R> {
Ok(res)
}
}
#[cfg(test)]
mod tests {
use super::*;
use hex::FromHex;
use std::io::Cursor;
#[test]
fn test_object_string() {
let data = b"hello";
let _obj = Object {
kind: Kind::Blob(true),
_size: 5,
data: Cursor::new(data),
};
let temp_file = std::env::temp_dir().join("temp_file");
let mut file = File::create(&temp_file).unwrap();
file.write_all(data).unwrap();
let res = hash_object(&temp_file);
assert_eq!(
res.unwrap(),
<[u8; 20]>::from_hex("b6fc4c620b67d95f953a5c1c1230aaab5db5a1b0").unwrap(),
);
}
}

427
src/pack.rs Normal file
View File

@ -0,0 +1,427 @@
use std::{
fs::File,
io::{BufReader, Cursor, Read, Seek, SeekFrom},
path::Path,
};
use anyhow::Error;
use flate2::read::ZlibDecoder;
use sha1::{Digest, Sha1};
use crate::repository::Repository;
#[derive(Debug)]
#[allow(dead_code)]
struct PackHeader {
signature: [u8; 4],
version: u32,
num_objects: u32,
}
#[derive(Debug)]
#[allow(dead_code)]
struct PackObject {
object_type: PackObjectType,
object_size: u32,
object_data: Vec<u8>,
pos: u64,
end_pos: u64,
}
#[derive(Debug, PartialEq, Eq)]
enum PackObjectType {
Commit,
Tree,
Blob,
Tag,
OfsDelta,
RefDelta,
}
impl PackObjectType {
fn from_u8(value: u8) -> Result<PackObjectType, Error> {
match value {
1 => Ok(PackObjectType::Commit),
2 => Ok(PackObjectType::Tree),
3 => Ok(PackObjectType::Blob),
4 => Ok(PackObjectType::Tag),
6 => Ok(PackObjectType::OfsDelta),
7 => Ok(PackObjectType::RefDelta),
_ => Err(Error::msg("Unknown object type")),
}
}
}
impl std::fmt::Display for PackObjectType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
PackObjectType::Commit => "commit",
PackObjectType::Tree => "tree",
PackObjectType::Blob => "blob",
PackObjectType::Tag => "tag",
PackObjectType::OfsDelta => "ofs-delta",
PackObjectType::RefDelta => "ref-delta",
};
write!(f, "{}", s)
}
}
fn parse_pack_header(file: &mut File) -> Result<PackHeader, Error> {
let mut header = [0; 12];
file.read_exact(&mut header)?;
let signature: &[u8] = &header[0..4];
if signature != b"PACK" {
return Err(Error::msg("Invalid pack file"));
}
let version = u32::from_be_bytes([header[4], header[5], header[6], header[7]]);
if version != 2 {
return Err(Error::msg("Invalid pack file version"));
}
let num_objects = u32::from_be_bytes([header[8], header[9], header[10], header[11]]);
let signature: [u8; 4] = signature[0..4].try_into().unwrap();
Ok(PackHeader {
signature,
version,
num_objects,
})
}
fn read_vli_le<R>(file: &mut BufReader<R>) -> Result<u32, Error>
where
R: Read,
{
let mut val: u32 = 0;
let mut shift = 0;
loop {
let mut byte = [0; 1];
file.read_exact(&mut byte)?;
let byt = byte[0] as u32;
val |= (byt & 0x7f) << shift;
shift += 7;
if byt & 0x80 == 0 {
break;
}
}
Ok(val)
}
fn read_vli_be(file: &mut File, offset: bool) -> Result<u32, Error> {
let mut val: u32 = 0;
loop {
let mut byte = [0; 1];
file.read_exact(&mut byte)?;
let byt = byte[0] as u32;
val = (val << 7) | (byt & 0x7f);
if byt & 0x80 == 0 {
break;
}
if offset {
val += 1;
}
}
Ok(val)
}
fn decompress_file(file: &mut File) -> Result<Vec<u8>, Error> {
let mut object_data = Vec::new();
let pos = file.stream_position()?;
let mut zlib_decoder = ZlibDecoder::new(&mut *file);
zlib_decoder.read_to_end(&mut object_data)?;
let read_bytes = zlib_decoder.total_in();
file.seek(std::io::SeekFrom::Start(pos + read_bytes))?;
Ok(object_data)
}
fn make_delta_obj(
file: &mut File,
base_obj: PackObject,
object_size: u32,
) -> Result<PackObject, Error> {
let current_pos = file.stream_position()?;
let object_data = decompress_file(file)?;
assert_eq!(object_data.len(), object_size as usize);
let mut fp2 = BufReader::new(Cursor::new(object_data.as_slice()));
let _base_obj_size = read_vli_le(&mut fp2)?;
let patched_obj_size = read_vli_le(&mut fp2)?;
// println!(
// "base_obj_size={}, obj_size={}",
// base_obj_size, patched_obj_size
// );
let mut obj_data = Vec::new();
while fp2.stream_position()? < object_data.len() as u64 {
let mut byte = [0; 1];
fp2.read_exact(&mut byte)?;
let byt = byte[0];
if byt == 0x00 {
continue;
}
if byt & 0x80 != 0 {
// copy data from base object
let mut vals = [0; 6];
for (i, val) in vals.iter_mut().enumerate() {
let bmask = 1 << i;
if byt & bmask != 0 {
fp2.read_exact(&mut byte)?;
*val = byte[0];
} else {
*val = 0;
}
}
let start = u32::from_le_bytes(vals[0..4].try_into().expect("4 bytes"));
let nbytes = u16::from_le_bytes(vals[4..6].try_into().expect("2 bytes"));
let nbytes = if nbytes == 0 { 0x10000 } else { nbytes as u32 };
obj_data.extend_from_slice(
&base_obj.object_data[start as usize..(start + nbytes) as usize],
);
} else {
// add new data
let nbytes = byt & 0x7f;
// println!("APPEND NEW BYTES #bytes={}", nbytes);
let mut data = vec![0; nbytes as usize];
fp2.read_exact(&mut data)?;
obj_data.extend_from_slice(&data);
}
}
// println!("Final object data: #bytes={}", obj_data.len());
assert_eq!(obj_data.len(), patched_obj_size as usize);
Ok(PackObject {
object_type: base_obj.object_type,
object_size: patched_obj_size,
object_data: obj_data,
pos: current_pos,
end_pos: file.stream_position()?,
})
}
fn parse_pack_ofs_delta_object(
file: &mut File,
object_size: u32,
fpos: u64,
) -> Result<PackObject, Error> {
// println!("pos: 0x{:x}", file.seek(SeekFrom::Current(0))?);
// let mut reader = BufReader::new(&mut *file);
let offset = read_vli_be(file, true)?;
// let new_position = reader.stream_position()?;
// file.seek(SeekFrom::Start(new_position))?;
let base_obj_offset = fpos - offset as u64;
// println!(
// "offset:0x{:x} base_obj_offset:0x{:x}",
// offset, base_obj_offset
// );
let prev_pos = file.stream_position()?;
file.seek(SeekFrom::Start(base_obj_offset))?;
let base_obj = parse_pack_entry(file)?;
assert!([
PackObjectType::Commit,
PackObjectType::Tree,
PackObjectType::Blob,
PackObjectType::Tag
]
.contains(&base_obj.object_type));
file.seek(SeekFrom::Start(prev_pos))?;
make_delta_obj(file, base_obj, object_size)
}
fn parse_pack_entry(file: &mut File) -> Result<PackObject, Error> {
let object_pos = file.stream_position()?;
let mut byte = [0; 1];
file.read_exact(&mut byte)?;
let object_type: u8 = (byte[0] & 0x70) >> 4;
let object_data;
let mut object_size: u32 = (byte[0] & 0x0f) as u32;
let mut bshift = 4;
while (byte[0] & 0x80) == 0x80 {
file.read_exact(&mut byte)?;
object_size += (byte[0] as u32 & 0x7f) << bshift;
bshift += 7;
}
// println!(
// "Reading object: fpos=0x{:x}, type:{} size:{}",
// object_pos,
// PackObjectType::from_u8(object_type)?,
// object_size
// );
match PackObjectType::from_u8(object_type)? {
PackObjectType::Commit
| PackObjectType::Tree
| PackObjectType::Blob
| PackObjectType::Tag => {
object_data = decompress_file(file)?;
assert_eq!(object_data.len(), object_size as usize);
}
PackObjectType::OfsDelta => {
return parse_pack_ofs_delta_object(file, object_size, object_pos);
}
PackObjectType::RefDelta => unimplemented!(),
}
Ok(PackObject {
object_type: PackObjectType::from_u8(object_type)?,
object_size,
object_data,
pos: object_pos,
end_pos: file.stream_position()?,
})
}
impl Repository {
pub fn dump_pack_files(&self) -> Result<(), Error> {
let pack_dir = self.path.join(".git/objects/pack");
for entry in pack_dir.read_dir()? {
let entry = entry?;
let path = entry.file_name();
let path_str = path.to_str().unwrap();
if path_str.starts_with("pack-") && path_str.ends_with(".pack") {
let pack_id = &path_str[5..path_str.len() - 5];
self.dump_pack_file(pack_id)?;
}
}
Ok(())
}
pub fn dump_pack(&self, path: &Path) -> Result<(), Error> {
let mut file = File::open(path)?;
let header = parse_pack_header(&mut file)?;
println!("{:?}", header);
for _ in 0..header.num_objects {
let obj = parse_pack_entry(&mut file)?;
let mut hasher = Sha1::new();
hasher.update(format!("{} {}\0", obj.object_type, obj.object_size).as_bytes());
hasher.update(obj.object_data);
println!(
"{} {} {} {} {}",
hex::encode(hasher.finalize()),
obj.object_type,
obj.object_size,
obj.end_pos - obj.pos,
obj.pos,
);
}
let mut checksum_pack = [0; 20];
file.read_exact(&mut checksum_pack)?;
Ok(())
}
pub fn dump_pack_file(&self, pack_id: &str) -> Result<(), Error> {
let file_path = self
.path
.join(format!(".git/objects/pack/pack-{}.pack", pack_id));
self.dump_pack(&file_path)
}
pub fn dump_pack_index_file(&self, pack_id: &str) -> Result<(), Error> {
let file_path = self
.path
.join(format!(".git/objects/pack/pack-{}.idx", pack_id));
let mut file = File::open(file_path)?;
let mut buf = [0; 4];
file.read_exact(&mut buf)?;
if buf[0] != 0xff || "t0c".as_bytes() == &buf[1..4] {
return Err(Error::msg("Invalid pack index magic"));
}
file.read_exact(&mut buf)?;
let version = u32::from_be_bytes(buf);
println!("{}", version);
if version != 2 {
return Err(Error::msg("Invalid pack index version"));
}
let mut num_objects: u32 = 0;
let mut fanout_table = [0u32; 256];
// fanout table: 256 x 8 bytes
let mut buf = [0u8; 256 * 4];
file.read_exact(&mut buf)?;
for (idx, fanout_record) in fanout_table.iter_mut().enumerate() {
num_objects = u32::from_be_bytes(buf[idx * 4..idx * 4 + 4].try_into().unwrap());
*fanout_record = num_objects;
}
let mut names = vec![0u8; 20 * num_objects as usize];
file.read_exact(&mut names)?;
let mut crc32_buf = vec![0u8; 4 * num_objects as usize];
file.read_exact(&mut crc32_buf)?;
let crc32: Vec<u32> = crc32_buf
.chunks_exact(4)
.map(|chunk| u32::from_be_bytes(chunk.try_into().unwrap()))
.collect();
let mut offsets_buf = vec![0u8; 4 * num_objects as usize];
file.read_exact(&mut offsets_buf)?;
let offsets: Vec<u32> = offsets_buf
.chunks_exact(4)
.map(|chunk| u32::from_be_bytes(chunk.try_into().unwrap()))
.collect();
for i in 0..num_objects {
let offset = offsets[i as usize];
let crc32 = crc32[i as usize];
let name = &names[(i * 20) as usize..(i * 20 + 20) as usize];
println!(
"{} offset: 0x{:x} crc32: {}",
hex::encode(name),
offset,
crc32
);
}
let mut checksum_pack = [0; 20];
file.read_exact(&mut checksum_pack)?;
let mut checksum_idx = [0; 20];
file.read_exact(&mut checksum_idx)?;
Ok(())
}
}

View File

@ -2,7 +2,7 @@ use anyhow::Result;
use std::{
env,
fs::{create_dir, read_to_string},
path::PathBuf,
path::{Path, PathBuf},
};
pub struct Repository {
@ -38,12 +38,13 @@ impl Repository {
let ignore_content = read_to_string(ignore_path)?;
self.ignore = ignore_content.lines().map(String::from).collect();
self.ignore.push("/.git/".to_string());
Ok(true)
}
pub fn init_repository(&mut self, path: &PathBuf) -> Result<PathBuf> {
self.path = path.clone();
pub fn init_repository(&mut self, path: &Path) -> Result<PathBuf> {
self.path = path.to_path_buf();
let git_dir = self.path.join(".git");
create_dir(&git_dir)?;