2024-10-02 23:14:50 +02:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
set -e
|
|
|
|
|
|
|
|
REPOSITORY=https://github.com/descartes-underwriting/devops-technical-test-data.git
|
|
|
|
BRANCH=main
|
|
|
|
DATA_DIR=$(pwd)/data
|
2024-10-08 20:35:01 +02:00
|
|
|
# max number of commit to backup
|
2024-10-02 23:14:50 +02:00
|
|
|
MAX_NUM=0
|
2024-10-08 20:35:01 +02:00
|
|
|
# logs!
|
2024-10-02 23:14:50 +02:00
|
|
|
VERBOSE=0
|
2024-10-08 20:35:01 +02:00
|
|
|
# ignore tracking. Can be useful to fill missing commits
|
|
|
|
# if 0 (and -i not used), will continue where it stopped.
|
|
|
|
IGNORE_TRACK=0
|
2024-10-02 23:14:50 +02:00
|
|
|
|
2024-10-08 20:35:01 +02:00
|
|
|
while getopts ":r:b:d:vxn:hi" opt
|
2024-10-02 23:14:50 +02:00
|
|
|
do
|
|
|
|
case ${opt} in
|
|
|
|
r)
|
|
|
|
REPOSITORY=${OPTARG}
|
|
|
|
;;
|
|
|
|
b)
|
|
|
|
BRANCH=${OPTARG}
|
|
|
|
;;
|
|
|
|
d)
|
|
|
|
DATA_DIR=${OPTARG}
|
2024-10-08 20:13:43 +02:00
|
|
|
if [[ "${DATA_DIR}" != /* ]]
|
|
|
|
then
|
|
|
|
# I want an absolute directory
|
|
|
|
DATA_DIR=$(pwd)/${DATA_DIR}
|
|
|
|
fi
|
2024-10-02 23:14:50 +02:00
|
|
|
;;
|
2024-10-08 20:35:01 +02:00
|
|
|
i)
|
|
|
|
IGNORE_TRACK=1
|
|
|
|
;;
|
2024-10-02 23:14:50 +02:00
|
|
|
n)
|
|
|
|
MAX_NUM=${OPTARG}
|
|
|
|
;;
|
|
|
|
v)
|
|
|
|
VERBOSE=1
|
|
|
|
;;
|
|
|
|
x)
|
|
|
|
set -x
|
|
|
|
;;
|
2024-10-08 19:57:56 +02:00
|
|
|
h)
|
|
|
|
echo "Usage: $0 [-r <repository url>] [-b <branch>] [-d <absolute path dest>] [-n <num>] [-v] [-x] [-h]"
|
|
|
|
echo "Ex: $0 -r $REPOSITORY -b $BRANCH -d $DATA_DIR"
|
|
|
|
exit 0
|
|
|
|
;;
|
2024-10-02 23:14:50 +02:00
|
|
|
?)
|
|
|
|
echo "Invalid option -${OPTARG}"
|
|
|
|
exit 1
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
done
|
|
|
|
|
|
|
|
IFS="
|
|
|
|
"
|
|
|
|
|
|
|
|
GIT=git
|
|
|
|
GIT_OPTS=(-c core.quotepath=false)
|
|
|
|
|
|
|
|
if test ! -d ${DATA_DIR}
|
|
|
|
then
|
|
|
|
mkdir -p ${DATA_DIR}
|
|
|
|
fi
|
|
|
|
|
|
|
|
TMPDIR=$(mktemp -d)
|
|
|
|
cd ${TMPDIR}
|
|
|
|
|
|
|
|
git clone ${REPOSITORY} && cd ./*
|
|
|
|
|
2024-10-08 20:35:01 +02:00
|
|
|
CURRENT_COMMIT=""
|
|
|
|
if test -s ${DATA_DIR}/.track -a ${IGNORE_TRACK} -eq 0
|
|
|
|
then
|
|
|
|
CURRENT_COMMIT=$(cat ${DATA_DIR}/.track)..
|
|
|
|
fi
|
|
|
|
|
2024-10-02 23:14:50 +02:00
|
|
|
# List all commit sha, from older to newer;
|
2024-10-08 20:35:01 +02:00
|
|
|
COMMIT_SHAS=$($GIT ${GIT_OPTS[@]} log --reverse --pretty=format:"%H" remotes/origin/${BRANCH} ${CURRENT_COMMIT})
|
2024-10-02 23:14:50 +02:00
|
|
|
|
|
|
|
NUM_ADDED=0
|
|
|
|
|
|
|
|
for COMMIT_SHA in ${COMMIT_SHAS}
|
|
|
|
do
|
|
|
|
if test -d ${DATA_DIR}/${COMMIT_SHA}
|
|
|
|
then
|
|
|
|
# commit already backup; skipping
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo "new commit: ${COMMIT_SHA}"
|
|
|
|
|
|
|
|
# There are malformed files names that are creating complex filenames to parse.
|
|
|
|
# Those malformed filenames are double quotes, so to remove quotes, -c core.quotepath=false
|
2024-10-08 19:51:30 +02:00
|
|
|
# and -z are used. sed 's/\x0/\n/g' is replacing the null byte by a return to line
|
|
|
|
FILES=$($GIT ${GIT_OPTS[@]} show --pretty= --name-only -z ${COMMIT_SHA} | sed 's/\x0/\n/g')
|
2024-10-02 23:14:50 +02:00
|
|
|
|
2024-10-08 19:51:30 +02:00
|
|
|
if test -z "${FILES}"
|
2024-10-02 23:14:50 +02:00
|
|
|
then
|
|
|
|
# merge commit, etc. There is no file here.
|
2024-10-03 11:54:28 +02:00
|
|
|
echo "No file was found in commit ${COMMIT_SHA}"
|
2024-10-02 23:14:50 +02:00
|
|
|
fi
|
|
|
|
|
|
|
|
TARGET_BACKUP_SHA=${DATA_DIR}/${COMMIT_SHA}
|
|
|
|
mkdir -p ${TARGET_BACKUP_SHA}
|
|
|
|
|
|
|
|
for FILE in ${FILES}
|
|
|
|
do
|
|
|
|
TARGET_DIR=$(dirname ${FILE})
|
|
|
|
mkdir -p ${TARGET_BACKUP_SHA}/${TARGET_DIR}
|
|
|
|
|
|
|
|
if test ${VERBOSE} -eq 1
|
|
|
|
then
|
|
|
|
echo "Writing ${TARGET_BACKUP_SHA}/${FILE}"
|
|
|
|
fi
|
|
|
|
|
2024-10-03 11:54:28 +02:00
|
|
|
# Retrieve file state (Added, Modified, Deleted)
|
2024-10-08 20:25:41 +02:00
|
|
|
# Using basic regex as we want to avoid checking '()' or other complex regex patterns.
|
|
|
|
# This is suboptimal and error prone. But it passes the full backup :-)
|
|
|
|
STATE=$(${GIT} ${GIT_OPTS[@]} show --name-status --pretty= ${COMMIT_SHA} | grep -G "^..${FILE}\$" | cut -f1)
|
2024-10-03 11:54:28 +02:00
|
|
|
|
|
|
|
if test "${STATE}" != "D"
|
|
|
|
then
|
|
|
|
# ${FILE} contains path/to/file
|
|
|
|
$GIT ${GIT_OPTS[@]} show ${COMMIT_SHA}:${FILE} > ${TARGET_BACKUP_SHA}/${FILE}
|
|
|
|
else
|
|
|
|
echo "Skipping ${FILE} as file was deleted in this commit"
|
|
|
|
fi
|
2024-10-02 23:14:50 +02:00
|
|
|
done
|
|
|
|
|
2024-10-03 11:54:28 +02:00
|
|
|
# if ${TARGET_BACKUP_SHA} is empty, keep track it was "backuped"
|
|
|
|
if test -z "$(ls -A ${TARGET_BACKUP_SHA})"
|
|
|
|
then
|
|
|
|
echo "Folder ${TARGET_BACKUP_SHA} is empty. Marking it to keep as it in the backup"
|
|
|
|
touch ${TARGET_BACKUP_SHA}/.gitkeep
|
|
|
|
fi
|
|
|
|
|
2024-10-02 23:14:50 +02:00
|
|
|
NUM_ADDED=$((NUM_ADDED + 1))
|
|
|
|
|
2024-10-08 20:35:01 +02:00
|
|
|
echo -n ${COMMIT_SHA} > ${DATA_DIR}/.track
|
|
|
|
|
2024-10-02 23:14:50 +02:00
|
|
|
if test ${NUM_ADDED} -eq ${MAX_NUM}
|
|
|
|
then
|
|
|
|
echo "Max commit to backup reached; stopping."
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
2024-10-08 19:51:30 +02:00
|
|
|
echo "done: ${NUM_ADDED} commits"
|
2024-10-02 23:14:50 +02:00
|
|
|
|