git-spacehogs 3.34 KB
Newer Older
Chris Bills's avatar
Chris Bills committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
#!/bin/bash

################################################################################
################################################################################
# DISCLAIMER:

# It is recommended that you clone your project somewhere safe as a backup; e.g.
# 'git clone https://git_url/group/project.git ~/projects/project-backup'

# This script _WILL_ perform destructive changes on a repository
################################################################################
################################################################################
# INFOS:

# If 'git remote' displays any remotes, the 'true' size (after running this
# script) of the repository will not be 'seen' with 'du -sh'; remove remotes
# or force push your updated project to see the new reduced size.

# To push updated / cleaned history to the remote, you will need to execute
# a 'force' push; e.g. 'git push -f <remote_name> <branch>'; some services
# like GitLab protect branches and will prevent a force push, without removing
# protection from the branch in question.

# To remove protection from a branch (and re-add it) browse to:
# https://<gitlab_url>/<namespace>/<project>/protected_branches or
# Click the gear in the upper right and click "Protected Branches"
################################################################################
################################################################################

# Additional info
# https://git-scm.com/book/en/v2/Git-Internals-Maintenance-and-Data-Recovery
# http://stackoverflow.com/questions/3765234/listing-and-deleting-git-commits-that-are-under-no-branch-dangling

# find the git directory for the repository we are in
git_dir="$(git rev-parse --show-toplevel)/.git"

# default to the 10 largest blobs
number=5
if [[ "$1" =~ [0-9] ]]; then
  number=$1
fi

process_blob() {
  blob="$1"

  # Get files included in the blob
  files=$(git rev-list --objects --all | grep "$blob" | awk '{print $2}')
  for file in $files; do
    printf "%s\n" "$file"
    read -p "Remove? [y/N] " confirm
    if [ "$confirm" == 'Y' ] || [ "$confirm" == 'y' ]; then
      # Find the first commit the file appears in
      first=$(git log --oneline --branches -- $file \
              | tail -n1 \
              | awk '{print $1}')
      if [[ -z "$first" ]]; then
        printf "Can't find commit for %s; has it been removed already?\n" 1>&2
      else
        # Remove the file from the commit and rewrite history
        # More info:
        git filter-branch -f --index-filter \
          "git rm --ignore-unmatch --cached "$file"" -- "$first"^..
      fi
    fi
  done
}

# Run git's garbage collection
git gc

# Get all git objects, including  size on disk, filter out junk
objects=$(git verify-pack -v $git_dir/objects/pack/pack-*.idx \
  | grep blob \
  | sort -rk3 -n \
  | head -n $number)

# Open FD 3 to read the contents of "$objects"
# (Necessary to use 'read' / STDIN elsewhere)
exec 3<<<"$objects"

while read -u 3 -rd $'\n' object; do
  awk '{printf "%s %0.2fM\n", $1, $3/1024/1024}' <<< "$object"
  process_blob $(echo "$object" | awk '{print $1}')
done

# Close FD 3
exec 3<&-

# Ensure "$git_dir" is set so we don't try 'rm -rf /refs/original' or something
if [[ -n "$git_dir" ]]; then
  rm -rf "$git_dir"/refs/original
  rm -rf "$git_dir"/logs/
  git fsck --full
  git reflog expire --expire-unreachable=now --all
  git gc --prune=now
fi