descartes-technical-test/scripts/backup.sh

166 lines
4.3 KiB
Bash
Raw Permalink Normal View History

2024-10-02 23:14:50 +02:00
#!/bin/sh
set -o errexit
set -o nounset
set -o pipefail
2024-10-02 23:14:50 +02:00
REPOSITORY=https://github.com/descartes-underwriting/devops-technical-test-data.git
BRANCH=main
DATA_DIR=$(pwd)/data
2024-10-08 20:35:01 +02:00
# max number of commit to backup
2024-10-02 23:14:50 +02:00
MAX_NUM=0
2024-10-08 20:35:01 +02:00
# logs!
2024-10-02 23:14:50 +02:00
VERBOSE=0
2024-10-08 20:35:01 +02:00
# ignore tracking. Can be useful to fill missing commits
# if 0 (and -i not used), will continue where it stopped.
IGNORE_TRACK=0
2024-10-02 23:14:50 +02:00
2024-10-08 20:35:01 +02:00
while getopts ":r:b:d:vxn:hi" opt
2024-10-02 23:14:50 +02:00
do
case ${opt} in
r)
REPOSITORY=${OPTARG}
;;
b)
BRANCH=${OPTARG}
;;
d)
DATA_DIR=${OPTARG}
2024-10-08 20:13:43 +02:00
if [[ "${DATA_DIR}" != /* ]]
then
# I want an absolute directory
DATA_DIR=$(pwd)/${DATA_DIR}
fi
2024-10-02 23:14:50 +02:00
;;
2024-10-08 20:35:01 +02:00
i)
IGNORE_TRACK=1
;;
2024-10-02 23:14:50 +02:00
n)
MAX_NUM=${OPTARG}
;;
v)
VERBOSE=1
;;
x)
set -x
;;
2024-10-08 19:57:56 +02:00
h)
2024-10-08 21:33:32 +02:00
echo "Usage: $0 [-r <repository url>] [-b <branch>] [-d <path dest>] [-n <num>] [-v] [-x] [-i] [-h]"
echo
echo "Ex: $0 \\"
echo " -r $REPOSITORY \\"
echo " -b $BRANCH \\"
echo " -d $DATA_DIR"
echo
echo "Available flags:"
echo " -r <repository url> - set remote repository url (suffixed by .git) to backup"
echo " -b <branch> - branch to backup"
echo " -d </path/to/path> - where to backup; relative or absolute"
echo " -n <num> - number of commit to backup (default: unlimited)"
echo " -i - ignore tracking information - to restart from scratch"
echo " -v - verbose mode"
echo " -x - debug mode"
echo " -h - this help"
2024-10-08 19:57:56 +02:00
exit 0
;;
2024-10-02 23:14:50 +02:00
?)
echo "Invalid option -${OPTARG}"
exit 1
;;
esac
done
IFS="
"
GIT=git
GIT_OPTS=(-c core.quotepath=false)
if test ! -d ${DATA_DIR}
then
mkdir -p ${DATA_DIR}
fi
TMPDIR=$(mktemp -d)
cd ${TMPDIR}
git clone ${REPOSITORY} && cd ./*
2024-10-08 20:35:01 +02:00
CURRENT_COMMIT=""
if test -s ${DATA_DIR}/.track -a ${IGNORE_TRACK} -eq 0
then
CURRENT_COMMIT=$(cat ${DATA_DIR}/.track)..
fi
2024-10-02 23:14:50 +02:00
# List all commit sha, from older to newer;
2024-10-08 20:35:01 +02:00
COMMIT_SHAS=$($GIT ${GIT_OPTS[@]} log --reverse --pretty=format:"%H" remotes/origin/${BRANCH} ${CURRENT_COMMIT})
2024-10-02 23:14:50 +02:00
NUM_ADDED=0
for COMMIT_SHA in ${COMMIT_SHAS}
do
if test -d ${DATA_DIR}/${COMMIT_SHA}
then
# commit already backup; skipping
continue
fi
echo "new commit: ${COMMIT_SHA}"
# There are malformed files names that are creating complex filenames to parse.
# Those malformed filenames are double quotes, so to remove quotes, -c core.quotepath=false
2024-10-08 19:51:30 +02:00
# and -z are used. sed 's/\x0/\n/g' is replacing the null byte by a return to line
FILES=$($GIT ${GIT_OPTS[@]} show --pretty= --name-only -z ${COMMIT_SHA} | sed 's/\x0/\n/g')
2024-10-02 23:14:50 +02:00
2024-10-08 19:51:30 +02:00
if test -z "${FILES}"
2024-10-02 23:14:50 +02:00
then
# merge commit, etc. There is no file here.
echo "No file was found in commit ${COMMIT_SHA}"
2024-10-02 23:14:50 +02:00
fi
TARGET_BACKUP_SHA=${DATA_DIR}/${COMMIT_SHA}
mkdir -p ${TARGET_BACKUP_SHA}
for FILE in ${FILES}
do
TARGET_DIR=$(dirname ${FILE})
mkdir -p ${TARGET_BACKUP_SHA}/${TARGET_DIR}
if test ${VERBOSE} -eq 1
then
echo "Writing ${TARGET_BACKUP_SHA}/${FILE}"
fi
# Retrieve file state (Added, Modified, Deleted)
STATE=$(${GIT} ${GIT_OPTS[@]} show --name-status --pretty= ${COMMIT_SHA} -- "${FILE}" | cut -f1)
if test "${STATE}" != "D"
then
# ${FILE} contains path/to/file
$GIT ${GIT_OPTS[@]} show ${COMMIT_SHA}:${FILE} > ${TARGET_BACKUP_SHA}/${FILE}
else
echo "Skipping ${FILE} as file was deleted in this commit"
fi
2024-10-02 23:14:50 +02:00
done
# if ${TARGET_BACKUP_SHA} is empty, keep track it was "backuped"
if test -z "$(ls -A ${TARGET_BACKUP_SHA})"
then
echo "Folder ${TARGET_BACKUP_SHA} is empty. Marking it to keep as it in the backup"
touch ${TARGET_BACKUP_SHA}/.gitkeep
fi
2024-10-02 23:14:50 +02:00
NUM_ADDED=$((NUM_ADDED + 1))
2024-10-08 20:35:01 +02:00
echo -n ${COMMIT_SHA} > ${DATA_DIR}/.track
2024-10-02 23:14:50 +02:00
if test ${NUM_ADDED} -eq ${MAX_NUM}
then
echo "Max commit to backup reached; stopping."
break
fi
done
2024-10-08 19:51:30 +02:00
echo "done: ${NUM_ADDED} commits"
2024-10-02 23:14:50 +02:00