#!/bin/sh set -e REPOSITORY=https://github.com/descartes-underwriting/devops-technical-test-data.git BRANCH=main DATA_DIR=$(pwd)/data # max number of commit to backup MAX_NUM=0 # logs! VERBOSE=0 # ignore tracking. Can be useful to fill missing commits # if 0 (and -i not used), will continue where it stopped. IGNORE_TRACK=0 while getopts ":r:b:d:vxn:hi" opt do case ${opt} in r) REPOSITORY=${OPTARG} ;; b) BRANCH=${OPTARG} ;; d) DATA_DIR=${OPTARG} if [[ "${DATA_DIR}" != /* ]] then # I want an absolute directory DATA_DIR=$(pwd)/${DATA_DIR} fi ;; i) IGNORE_TRACK=1 ;; n) MAX_NUM=${OPTARG} ;; v) VERBOSE=1 ;; x) set -x ;; h) echo "Usage: $0 [-r ] [-b ] [-d ] [-n ] [-v] [-x] [-h]" echo "Ex: $0 -r $REPOSITORY -b $BRANCH -d $DATA_DIR" exit 0 ;; ?) echo "Invalid option -${OPTARG}" exit 1 ;; esac done IFS=" " GIT=git GIT_OPTS=(-c core.quotepath=false) if test ! -d ${DATA_DIR} then mkdir -p ${DATA_DIR} fi TMPDIR=$(mktemp -d) cd ${TMPDIR} git clone ${REPOSITORY} && cd ./* CURRENT_COMMIT="" if test -s ${DATA_DIR}/.track -a ${IGNORE_TRACK} -eq 0 then CURRENT_COMMIT=$(cat ${DATA_DIR}/.track).. fi # List all commit sha, from older to newer; COMMIT_SHAS=$($GIT ${GIT_OPTS[@]} log --reverse --pretty=format:"%H" remotes/origin/${BRANCH} ${CURRENT_COMMIT}) NUM_ADDED=0 for COMMIT_SHA in ${COMMIT_SHAS} do if test -d ${DATA_DIR}/${COMMIT_SHA} then # commit already backup; skipping continue fi echo "new commit: ${COMMIT_SHA}" # There are malformed files names that are creating complex filenames to parse. # Those malformed filenames are double quotes, so to remove quotes, -c core.quotepath=false # and -z are used. sed 's/\x0/\n/g' is replacing the null byte by a return to line FILES=$($GIT ${GIT_OPTS[@]} show --pretty= --name-only -z ${COMMIT_SHA} | sed 's/\x0/\n/g') if test -z "${FILES}" then # merge commit, etc. There is no file here. echo "No file was found in commit ${COMMIT_SHA}" fi TARGET_BACKUP_SHA=${DATA_DIR}/${COMMIT_SHA} mkdir -p ${TARGET_BACKUP_SHA} for FILE in ${FILES} do TARGET_DIR=$(dirname ${FILE}) mkdir -p ${TARGET_BACKUP_SHA}/${TARGET_DIR} if test ${VERBOSE} -eq 1 then echo "Writing ${TARGET_BACKUP_SHA}/${FILE}" fi # Retrieve file state (Added, Modified, Deleted) # Using basic regex as we want to avoid checking '()' or other complex regex patterns. # This is suboptimal and error prone. But it passes the full backup :-) STATE=$(${GIT} ${GIT_OPTS[@]} show --name-status --pretty= ${COMMIT_SHA} | grep -G "^..${FILE}\$" | cut -f1) if test "${STATE}" != "D" then # ${FILE} contains path/to/file $GIT ${GIT_OPTS[@]} show ${COMMIT_SHA}:${FILE} > ${TARGET_BACKUP_SHA}/${FILE} else echo "Skipping ${FILE} as file was deleted in this commit" fi done # if ${TARGET_BACKUP_SHA} is empty, keep track it was "backuped" if test -z "$(ls -A ${TARGET_BACKUP_SHA})" then echo "Folder ${TARGET_BACKUP_SHA} is empty. Marking it to keep as it in the backup" touch ${TARGET_BACKUP_SHA}/.gitkeep fi NUM_ADDED=$((NUM_ADDED + 1)) echo -n ${COMMIT_SHA} > ${DATA_DIR}/.track if test ${NUM_ADDED} -eq ${MAX_NUM} then echo "Max commit to backup reached; stopping." break fi done echo "done: ${NUM_ADDED} commits"