GitLab maintenance scheduled for Today, 2019-12-05, from 17:00 to 18:00 CT - Services will be unavailable during this time.

git-spacehogs 3.31 KB
Newer Older
Chris Bills's avatar
Chris Bills committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
#!/bin/bash

################################################################################
################################################################################
# DISCLAIMER:

# It is recommended that you clone your project somewhere safe as a backup; e.g.
# 'git clone https://git_url/group/project.git ~/projects/project-backup'

# This script _WILL_ perform destructive changes on a repository
################################################################################
################################################################################
# INFOS:

# If 'git remote' displays any remotes, the 'true' size (after running this
# script) of the repository will not be 'seen' with 'du -sh'; remove remotes
# or force push your updated project to see the new reduced size.

# To push updated / cleaned history to the remote, you will need to execute
# a 'force' push; e.g. 'git push -f <remote_name> <branch>'; some services
# like GitLab protect branches and will prevent a force push, without removing
# protection from the branch in question.

# To remove protection from a branch (and re-add it) browse to:
# https://<gitlab_url>/<namespace>/<project>/protected_branches or
# Click the gear in the upper right and click "Protected Branches"
################################################################################
################################################################################

# Additional info
# https://git-scm.com/book/en/v2/Git-Internals-Maintenance-and-Data-Recovery
# http://stackoverflow.com/questions/3765234/listing-and-deleting-git-commits-that-are-under-no-branch-dangling

# find the git directory for the repository we are in
git_dir="$(git rev-parse --show-toplevel)/.git"

Chris Bills's avatar
Chris Bills committed
37
# default to the 5 largest blobs
Chris Bills's avatar
Chris Bills committed
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
number=5
if [[ "$1" =~ [0-9] ]]; then
  number=$1
fi

process_blob() {
  blob="$1"

  # Get files included in the blob
  files=$(git rev-list --objects --all | grep "$blob" | awk '{print $2}')
  for file in $files; do
    printf "%s\n" "$file"
    read -p "Remove? [y/N] " confirm
    if [ "$confirm" == 'Y' ] || [ "$confirm" == 'y' ]; then
      # Find the first commit the file appears in
      first=$(git log --oneline --branches -- $file \
              | tail -n1 \
              | awk '{print $1}')
      if [[ -z "$first" ]]; then
        printf "Can't find commit for %s; has it been removed already?\n" 1>&2
      else
        # Remove the file from the commit and rewrite history
        git filter-branch -f --index-filter \
          "git rm --ignore-unmatch --cached "$file"" -- "$first"^..
      fi
    fi
  done
}

# Run git's garbage collection
git gc

# Get all git objects, including  size on disk, filter out junk
objects=$(git verify-pack -v $git_dir/objects/pack/pack-*.idx \
  | grep blob \
  | sort -rk3 -n \
  | head -n $number)

# Open FD 3 to read the contents of "$objects"
# (Necessary to use 'read' / STDIN elsewhere)
exec 3<<<"$objects"

while read -u 3 -rd $'\n' object; do
  awk '{printf "%s %0.2fM\n", $1, $3/1024/1024}' <<< "$object"
  process_blob $(echo "$object" | awk '{print $1}')
done

# Close FD 3
exec 3<&-

# Ensure "$git_dir" is set so we don't try 'rm -rf /refs/original' or something
if [[ -n "$git_dir" ]]; then
  rm -rf "$git_dir"/refs/original
  rm -rf "$git_dir"/logs/
  git fsck --full
  git reflog expire --expire-unreachable=now --all
  git gc --prune=now
fi