parallel-fsstats.pl 3.17 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
#!/usr/bin/perl -w
#
#  (C) 2010 by Argonne National Laboratory.
#      See COPYRIGHT in top-level directory.
#

use Cwd;
use Getopt::Long;
use English;

my $hostfile;
my $dirfile;
my @hosts;
my @dirs;
my %pidmapping = ();
my %pidmapping_dir = ();

process_args();

open(FILE, $hostfile) or die("Error opening hostfile.");
# read file into an array
@hosts = <FILE>;
chomp(@hosts);
close(FILE);

open(FILE, $dirfile) or die("Error opening dirfile.");
# read file into an array
@dirs = <FILE>;
chomp(@dirs);
close(FILE);

# loop as long as we still have directories to process or outstanding jobs
while ($#dirs > -1 || keys(%pidmapping))
{
    if($#hosts > -1 && $#dirs > -1)
    {
        # we have work to do and a free host to do it on

        # grab a directory and host out of the lists
        my $dir = pop(@dirs);
        my $host = pop(@hosts);

        my $pid = fork();
        if (not defined $pid) 
        {
            die("Error: could not fork.");
        }
            
        my $cmd = "./fsstats-runner.bash $host $dir";

        if($pid == 0)
        {
            # child runs remote command
            my $error_code = 0;
            system($cmd);

            # look for exec problem, signal, or command error code
            if (($? == -1) || ($? & 127) || (($? >> 8) != 0))
            {
                $error_code = 1;
            }
            else
            {
                $error_code = 0;
            }

            # propagate an error code to parent
            exit($error_code);
        }
        else
        {
            print("fsstats of $dir on $host started...\n");
            # parent keeps up with what child is running where
            $pidmapping{$pid} = $host;
            $pidmapping_dir{$pid} = $dir;
        }
    }
    else
    {
        # we have launched as much as we can; wait for something to finish

        my $child = waitpid(-1, 0);
        if($child < 1)
        {
            die("Error: lost track of a child process.\n");
        }
        my $child_error_code = $?;
        print("fsstats of $pidmapping_dir{$child} on $pidmapping{$child} ");
        if($child_error_code == 0)
        {
            print(" SUCCESS.\n");
        }
        else
        {
            print(" FAILURE (continuing).\n");
        }
        
        # put the host back on the queue
        push(@hosts, $pidmapping{$child});
        delete($pidmapping{$child});
        delete($pidmapping_dir{$child});
    }
}

sub process_args
{
    use vars qw( $opt_help );

    Getopt::Long::Configure("no_ignore_case", "bundling");
    GetOptions( "help" );

    if($opt_help)
    {
        print_help();
        exit(0);
    }

    # there should be two remaining arguments (hostfile and dirfile)
    if($#ARGV != 1)
    {
        print "Error: invalid arguments.\n";
        print_help();
        exit(1);
    }

    $hostfile = $ARGV[0];
    $dirfile = $ARGV[1];

    return;
}

sub print_help
{
    print <<EOF;

Usage: $PROGRAM_NAME <hostfile> <dirfile>

    --help          Prints this help message

Purpose:

    This script runs parallel copies of fsstats on each directory listed in
    the <dirfile>.  The <hostfile> specifies a list of hosts to run fsstats
    on via ssh.

EOF
    return;
}