#!/bin/bash ################################################################################ ################################################################################ # DISCLAIMER: # It is recommended that you clone your project somewhere safe as a backup; e.g. # 'git clone https://git_url/group/project.git ~/projects/project-backup' # This script _WILL_ perform destructive changes on a repository ################################################################################ ################################################################################ # INFOS: # If 'git remote' displays any remotes, the 'true' size (after running this # script) of the repository will not be 'seen' with 'du -sh'; remove remotes # or force push your updated project to see the new reduced size. # To push updated / cleaned history to the remote, you will need to execute # a 'force' push; e.g. 'git push -f '; some services # like GitLab protect branches and will prevent a force push, without removing # protection from the branch in question. # To remove protection from a branch (and re-add it) browse to: # https://///protected_branches or # Click the gear in the upper right and click "Protected Branches" ################################################################################ ################################################################################ # Additional info # https://git-scm.com/book/en/v2/Git-Internals-Maintenance-and-Data-Recovery # http://stackoverflow.com/questions/3765234/listing-and-deleting-git-commits-that-are-under-no-branch-dangling # find the git directory for the repository we are in git_dir="$(git rev-parse --show-toplevel)/.git" # default to the 5 largest blobs number=5 if [[ "$1" =~ [0-9] ]]; then number=$1 fi process_blob() { blob="$1" # Get files included in the blob files=$(git rev-list --objects --all | grep "$blob" | awk '{print $2}') for file in $files; do printf "%s\n" "$file" read -p "Remove? [y/N] " confirm if [ "$confirm" == 'Y' ] || [ "$confirm" == 'y' ]; then # Find the first commit the file appears in first=$(git log --oneline --branches -- $file \ | tail -n1 \ | awk '{print $1}') if [[ -z "$first" ]]; then printf "Can't find commit for %s; has it been removed already?\n" 1>&2 else # Remove the file from the commit and rewrite history git filter-branch -f --index-filter \ "git rm --ignore-unmatch --cached "$file"" -- "$first"^.. fi fi done } # Run git's garbage collection git gc # Get all git objects, including size on disk, filter out junk objects=$(git verify-pack -v $git_dir/objects/pack/pack-*.idx \ | grep blob \ | sort -rk3 -n \ | head -n $number) # Open FD 3 to read the contents of "$objects" # (Necessary to use 'read' / STDIN elsewhere) exec 3<<<"$objects" while read -u 3 -rd $'\n' object; do awk '{printf "%s %0.2fM\n", $1, $3/1024/1024}' <<< "$object" process_blob $(echo "$object" | awk '{print $1}') done # Close FD 3 exec 3<&- # Ensure "$git_dir" is set so we don't try 'rm -rf /refs/original' or something if [[ -n "$git_dir" ]]; then rm -rf "$git_dir"/refs/original rm -rf "$git_dir"/logs/ git fsck --full git reflog expire --expire-unreachable=now --all git gc --prune=now fi