#!/bin/sh set -e REPOSITORY=https://github.com/descartes-underwriting/devops-technical-test-data.git BRANCH=main DATA_DIR=$(pwd)/data MAX_NUM=0 VERBOSE=0 while getopts ":r:b:d:vxn:" opt do case ${opt} in r) REPOSITORY=${OPTARG} ;; b) BRANCH=${OPTARG} ;; d) DATA_DIR=${OPTARG} ;; n) MAX_NUM=${OPTARG} ;; v) VERBOSE=1 ;; x) set -x ;; ?) echo "Invalid option -${OPTARG}" exit 1 ;; esac done IFS=" " GIT=git GIT_OPTS=(-c core.quotepath=false) if test ! -d ${DATA_DIR} then mkdir -p ${DATA_DIR} fi TMPDIR=$(mktemp -d) cd ${TMPDIR} git clone ${REPOSITORY} && cd ./* # List all commit sha, from older to newer; COMMIT_SHAS=$($GIT ${GIT_OPTS[@]} log --reverse --pretty=format:"%H" remotes/origin/${BRANCH}) NUM_ADDED=0 for COMMIT_SHA in ${COMMIT_SHAS} do if test -d ${DATA_DIR}/${COMMIT_SHA} then # commit already backup; skipping continue fi echo "new commit: ${COMMIT_SHA}" # There are malformed files names that are creating complex filenames to parse. # Those malformed filenames are double quotes, so to remove quotes, -c core.quotepath=false # and -z are used. sed 's/\x0//g' is removing the null byte FILES=$($GIT ${GIT_OPTS[@]} show --pretty= --name-only -z ${COMMIT_SHA} | sed 's/\x0//g') if test -z ${FILES} then # merge commit, etc. There is no file here. echo "No file was found in commit ${COMMIT_SHA}" fi TARGET_BACKUP_SHA=${DATA_DIR}/${COMMIT_SHA} mkdir -p ${TARGET_BACKUP_SHA} for FILE in ${FILES} do TARGET_DIR=$(dirname ${FILE}) mkdir -p ${TARGET_BACKUP_SHA}/${TARGET_DIR} if test ${VERBOSE} -eq 1 then echo "Writing ${TARGET_BACKUP_SHA}/${FILE}" fi # Retrieve file state (Added, Modified, Deleted) STATE=$(${GIT} ${GIT_OPTS[@]} show --name-status --pretty= ${COMMIT_SHA} | grep -E '^..${FILE}$' | cut -f1) if test "${STATE}" != "D" then # ${FILE} contains path/to/file $GIT ${GIT_OPTS[@]} show ${COMMIT_SHA}:${FILE} > ${TARGET_BACKUP_SHA}/${FILE} else echo "Skipping ${FILE} as file was deleted in this commit" fi done # if ${TARGET_BACKUP_SHA} is empty, keep track it was "backuped" if test -z "$(ls -A ${TARGET_BACKUP_SHA})" then echo "Folder ${TARGET_BACKUP_SHA} is empty. Marking it to keep as it in the backup" touch ${TARGET_BACKUP_SHA}/.gitkeep fi NUM_ADDED=$((NUM_ADDED + 1)) if test ${NUM_ADDED} -eq ${MAX_NUM} then echo "Max commit to backup reached; stopping." break fi done echo "done: ${NUM_ADDED}"