2024-10-03 09:35:37 +02:00

110 lines
2.3 KiB
Bash

#!/bin/sh
set -e
REPOSITORY=https://github.com/descartes-underwriting/devops-technical-test-data.git
BRANCH=main
DATA_DIR=$(pwd)/data
MAX_NUM=0
VERBOSE=0
while getopts ":r:b:d:vxn:" opt
do
case ${opt} in
r)
REPOSITORY=${OPTARG}
;;
b)
BRANCH=${OPTARG}
;;
d)
DATA_DIR=${OPTARG}
;;
n)
MAX_NUM=${OPTARG}
;;
v)
VERBOSE=1
;;
x)
set -x
;;
?)
echo "Invalid option -${OPTARG}"
exit 1
;;
esac
done
IFS="
"
GIT=git
GIT_OPTS=(-c core.quotepath=false)
if test ! -d ${DATA_DIR}
then
mkdir -p ${DATA_DIR}
fi
TMPDIR=$(mktemp -d)
cd ${TMPDIR}
git clone ${REPOSITORY} && cd ./*
# List all commit sha, from older to newer;
COMMIT_SHAS=$($GIT ${GIT_OPTS[@]} log --reverse --pretty=format:"%H" remotes/origin/${BRANCH})
NUM_ADDED=0
for COMMIT_SHA in ${COMMIT_SHAS}
do
if test -d ${DATA_DIR}/${COMMIT_SHA}
then
# commit already backup; skipping
continue
fi
echo "new commit: ${COMMIT_SHA}"
# There are malformed files names that are creating complex filenames to parse.
# Those malformed filenames are double quotes, so to remove quotes, -c core.quotepath=false
# and -z are used. sed 's/\x0//g' is removing the null byte
FILES=$($GIT ${GIT_OPTS[@]} show --pretty= --name-only -z ${COMMIT_SHA} | sed 's/\x0//g')
if test -z ${FILES}
then
# merge commit, etc. There is no file here.
echo "No file was found in commit ${COMMIT_SHA}; skipping"
continue
fi
TARGET_BACKUP_SHA=${DATA_DIR}/${COMMIT_SHA}
mkdir -p ${TARGET_BACKUP_SHA}
for FILE in ${FILES}
do
TARGET_DIR=$(dirname ${FILE})
mkdir -p ${TARGET_BACKUP_SHA}/${TARGET_DIR}
if test ${VERBOSE} -eq 1
then
echo "Writing ${TARGET_BACKUP_SHA}/${FILE}"
fi
# ${FILE} contains path/to/file
$GIT ${GIT_OPTS[@]} show ${COMMIT_SHA}:${FILE} > ${TARGET_BACKUP_SHA}/${FILE}
done
NUM_ADDED=$((NUM_ADDED + 1))
if test ${NUM_ADDED} -eq ${MAX_NUM}
then
echo "Max commit to backup reached; stopping."
break
fi
done
echo "done: ${NUM_ADDED}"