darshan-aggregate-stats.pl 12.1 KB
Newer Older
1
#!/usr/bin/perl -w
2 3 4 5 6
#
#  (C) 2009 by Argonne National Laboratory.
#      See COPYRIGHT in top-level directory.
#

7 8
use FindBin;
use lib "$FindBin::Bin/../lib/";
9 10
use TeX::Encode;
use Encode;
11 12 13 14 15 16 17
use File::Temp qw/ tempdir /;
use Cwd;

my $gnuplot = "";

my $tmp_dir = tempdir( CLEANUP => 1 );
my $orig_dir = getcwd;
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113

open(TRACE, $ARGV[0]) || die("can't open $ARGV[0] for processing: $!\n");

$max_access = -1;
$max_access_hash = 0;

while ($line = <TRACE>) {
    chop($line);
    
    if ($line =~ /^\s*$/) {
        # ignore blank lines
    }
    elsif ($line =~ /^#/) {
	if ($line =~ /^# exe: /) {
	    ($junk, $cmdline) = split(':', $line, 2);
            # add escape characters if needed for special characters in
            # command line
            $cmdline = encode('latex', $cmdline);
	}
	if ($line =~ /^# nprocs: /) {
	    ($junk, $nprocs) = split(':', $line, 2);
	    $procreads[$nprocs] = 0;
	}
	if ($line =~ /^# run time: /) {
	    ($junk, $runtime) = split(':', $line, 2);
	}
	if ($line =~ /^# start_time: /) {
	    ($junk, $starttime) = split(':', $line, 2);
	}
	if ($line =~ /^# uid: /) {
	    ($junk, $uid) = split(':', $line, 2);
	}
    }
    else {
	@fields = split(/[\t ]+/, $line);
	$summary{$fields[2]} += $fields[3];

	# record per-process POSIX read count
	if ($fields[2] eq "CP_POSIX_READS") {
	    if ($fields[0] == -1) {
		$procreads[$nprocs] += $fields[3];
	    }
	    else {
		$procreads[$fields[0]] += $fields[3];
	    }
	}

	# record per-proces POSIX write count
	if ($fields[2] eq "CP_POSIX_WRITES") {
	    if ($fields[0] == -1) {
		$procwrites[$nprocs] += $fields[3];
	    }
	    else {
		$procwrites[$fields[0]] += $fields[3];
	    }
	}

        # record access and stride counters
        if ($fields[2] =~ /(^CP_STRIDE.*)/) {
            $tmpfield = $1;

            if(defined $common{$fields[1]}{$tmpfield}) {
                $common{$fields[1]}{$tmpfield} += $fields[3];
            }
            else {
                $common{$fields[1]}{$tmpfield} = $fields[3];
            }
            $common{$fields[1]}{'name'} = $fields[4];
        }
        if ($fields[2] =~ /(^CP_ACCESS.*)/) {
            $tmpfield = $1;
            if(defined $common{$fields[1]}{$tmpfield}) {
                $common{$fields[1]}{$tmpfield} += $fields[3];
            }
            else {
                $common{$fields[1]}{$tmpfield} = $fields[3];
            }
            if(defined $common{$fields[1]}{'totalaccess'}) {
                $common{$fields[1]}{'totalaccess'} += $fields[3];
            }
            else {
                $common{$fields[1]}{'totalaccess'} = $fields[3];
            }

            if($common{$fields[1]}{'totalaccess'} > $max_access) {
                $max_access = $common{$fields[1]}{'totalaccess'};
                $max_access_hash = $fields[1];
            }
        }
    }
}

# print "max_access: $max_access.\n";
# print "max_access_hash: $max_access_hash.\n";

# counts of operations
114
open(COUNTS, ">$tmp_dir/counts.dat") || die("error opening output file: $!\n");
115 116 117 118 119 120 121 122 123 124 125 126 127 128
print COUNTS "# P=POSIX, MI=MPI-IO indep., MC=MPI-IO coll., R=read, W=write\n";
print COUNTS "# PR, MIR, MCR, PW, MIW, MCW, Popen, Pseek, Pstat\n";
print COUNTS "Read, ", $summary{CP_POSIX_READS}, ", ",
    $summary{CP_INDEP_READS}, ", ", $summary{CP_COLL_READS}, "\n",
    "Write, ", $summary{CP_POSIX_WRITES}, ", ", 
    $summary{CP_INDEP_WRITES}, ", ", $summary{CP_COLL_WRITES}, "\n",
    "Open, ", $summary{CP_POSIX_OPENS}, ", ", $summary{CP_INDEP_OPENS},", ",
    $summary{CP_COLL_OPENS}, "\n",
    "Stat, ", $summary{CP_POSIX_STATS}, ", 0, 0\n",
    "Seek, ", $summary{CP_POSIX_SEEKS}, ", 0, 0\n",
    "Mmap, ", $summary{CP_POSIX_MMAPS}, ", 0, 0\n";
close COUNTS;

# histograms of reads and writes
129
open (HIST, ">$tmp_dir/hist.dat") || die("error opening output file: $!\n");
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
print HIST "# size_range read write\n";
print HIST "0-100, ", $summary{CP_SIZE_READ_0_100}, ", ",
                 $summary{CP_SIZE_WRITE_0_100}, "\n";
print HIST "101-1K, ", $summary{CP_SIZE_READ_100_1K}, ", ",
                 $summary{CP_SIZE_WRITE_100_1K}, "\n";
print HIST "1K-10K, ", $summary{CP_SIZE_READ_1K_10K}, ", ",
                 $summary{CP_SIZE_WRITE_1K_10K}, "\n";
print HIST "10K-100K, ", $summary{CP_SIZE_READ_10K_100K}, ", ",
                 $summary{CP_SIZE_WRITE_10K_100K}, "\n";
print HIST "100K-1M, ", $summary{CP_SIZE_READ_100K_1M}, ", ",
                 $summary{CP_SIZE_WRITE_100K_1M}, "\n";
print HIST "1M-4M, ", $summary{CP_SIZE_READ_1M_4M}, ", ",
                 $summary{CP_SIZE_WRITE_1M_4M}, "\n";
print HIST "4M-10M, ", $summary{CP_SIZE_READ_4M_10M}, ", ",
                 $summary{CP_SIZE_WRITE_4M_10M}, "\n";
print HIST "10M-100M, ", $summary{CP_SIZE_READ_10M_100M}, ", ",
                 $summary{CP_SIZE_WRITE_10M_100M}, "\n";
print HIST "100M-1G, ", $summary{CP_SIZE_READ_100M_1G}, ", ",
                 $summary{CP_SIZE_WRITE_100M_1G}, "\n";
print HIST "1G+, ", $summary{CP_SIZE_READ_1G_PLUS}, ", ",
                 $summary{CP_SIZE_WRITE_1G_PLUS}, "\n";
close HIST;

# sequential and consecutive accesses
154
open (PATTERN, ">$tmp_dir/pattern.dat") || die("error opening output file: $!\n");
155 156 157 158 159 160 161 162
print PATTERN "# op total sequential consecutive\n";
print PATTERN "Read, ", $summary{CP_POSIX_READS}, ", ",
    $summary{CP_SEQ_READS}, ", ", $summary{CP_CONSEC_READS}, "\n";
print PATTERN "Write, ", $summary{CP_POSIX_WRITES}, ", ",
    $summary{CP_SEQ_WRITES}, ", ", $summary{CP_CONSEC_WRITES}, "\n";
close PATTERN;

# aligned I/O
163
open (ALIGN, ">$tmp_dir/align.dat") || die("error opening output file: $!\n");
164 165 166 167 168 169
print ALIGN "# total unaligned_mem unaligned_file align_mem align_file\n";
print ALIGN $summary{CP_POSIX_READS} + $summary{CP_POSIX_WRITES}, ", ",
    $summary{CP_MEM_NOT_ALIGNED}, ", ", $summary{CP_FILE_NOT_ALIGNED}, "\n";
close ALIGN;

# MPI types
170
open (TYPES, ">$tmp_dir/types.dat") || die("error opening output file: $!\n");
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
print TYPES "# type use_count\n";
print TYPES "Named, ", $summary{CP_COMBINER_NAMED}, "\n";
print TYPES "Dup, ", $summary{CP_COMBINER_DUP}, "\n";
print TYPES "Contig, ", $summary{CP_COMBINER_CONTIGUOUS}, "\n";
print TYPES "Vector, ", $summary{CP_COMBINER_VECTOR}, "\n";
print TYPES "HvecInt, ", $summary{CP_COMBINER_HVECTOR_INTEGER}, "\n";
print TYPES "Hvector, ", $summary{CP_COMBINER_HVECTOR}, "\n";
print TYPES "Indexed, ", $summary{CP_COMBINER_INDEXED}, "\n";
print TYPES "HindInt, ", $summary{CP_COMBINER_HINDEXED_INTEGER}, "\n";
print TYPES "Hindexed, ", $summary{CP_COMBINER_HINDEXED}, "\n";
print TYPES "IndBlk, ", $summary{CP_COMBINER_INDEXED_BLOCK}, "\n";
print TYPES "StructInt, ", $summary{CP_COMBINER_STRUCT_INTEGER}, "\n";
print TYPES "Struct, ", $summary{CP_COMBINER_STRUCT}, "\n";
print TYPES "Subarray, ", $summary{CP_COMBINER_SUBARRAY}, "\n";
print TYPES "Darray, ", $summary{CP_COMBINER_DARRAY}, "\n";
print TYPES "F90Real, ", $summary{CP_COMBINER_F90_REAL}, "\n";
print TYPES "F90Complex, ", $summary{CP_COMBINER_F90_COMPLEX}, "\n";
print TYPES "F90Int, ", $summary{CP_COMBINER_F90_INTEGER}, "\n";
print TYPES "Resized, ", $summary{CP_COMBINER_RESIZED}, "\n";
close TYPES;

# generate histogram of process I/O counts
#
# NOTE: NEED TO FILL IN ACTUAL WRITE DATA!!!
#
$minprocread = (defined $procreads[0]) ? $procreads[0] : 0;
$maxprocread = (defined $procreads[0]) ? $procreads[0] : 0;
for ($i=1; $i < $nprocs; $i++) {
    $rdi = (defined $procreads[$i]) ? $procreads[$i] : 0;
    $minprocread = ($rdi > $minprocread) ? $minprocread : $rdi;
    $maxprocread = ($rdi < $maxprocread) ? $maxprocread : $rdi;
}
$minprocread += $procreads[$nprocs];
$maxprocread += $procreads[$nprocs];
# print "$minprocread $maxprocread\n";

@bucket = ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 );

for ($i=0; $i < $nprocs; $i++) {
    $mysize = ((defined $procreads[$i]) ? $procreads[$i] : 0) +
	$procreads[$nprocs];
    $mysize -= $minprocread;
    $mybucket = ($mysize > 0) ?
	(($mysize * 10) / ($maxprocread - $minprocread)) : 0;
    $bucket[$mybucket]++;
}

218
open(IODIST, ">$tmp_dir/iodist.dat") || die("error opening output file: $!\n");
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
print IODIST "# bucket n_procs_rd n_procs_wr\n";
print IODIST "# NOTE: WRITES ARE A COPY OF READS FOR NOW!!!\n";

$bucketsize = $maxprocread - $minprocread / 10;
# TODO: do writes also, is dropping a 0 in for now
for ($i=0; $i < 10; $i++) {
    print IODIST $bucketsize * $i + $minprocread, "-",
    $bucketsize * ($i+1) + $minprocread, ", ", $bucket[$i], ", 0\n";
}
close IODIST;

# generate title for summary
($executable, $junk) = split(' ', $cmdline, 2);
@parts = split('/', $executable);
$cmd = $parts[$#parts];

@timearray = localtime($starttime);
$year = $timearray[5] + 1900;
$mon = $timearray[4] + 1;
$mday = $timearray[3];

240
open(TITLE, ">$tmp_dir/title.tex") || die("error opening output file:$!\n");
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
print TITLE "
\\rhead{\\thepage\\ of \\pageref{LastPage}}
\\chead[
\\large $cmd ($mon/$mday/$year)
]
{
\\large $cmd ($mon/$mday/$year)
}
\\cfoot[
\\scriptsize{$cmdline}
]
{
\\scriptsize{$cmdline}
}
";
close TITLE;

258
open(TABLES, ">$tmp_dir/job-table.tex") || die("error opening output file:$!\n");
259 260 261 262 263 264 265 266 267
print TABLES "
\\begin{tabular}{|p{.63\\columnwidth}|p{.63\\columnwidth}|p{.63\\columnwidth}|}
\\hline
uid: $uid \& nprocs: $nprocs \& runtime: $runtime seconds\\\\
\\hline
\\end{tabular}
";
close TABLES;

268
open(TABLES, ">$tmp_dir/access-table.tex") || die("error opening output file:$!\n");
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
print TABLES "
\\begin{tabular}{|r|r|}
\\multicolumn{2}{c}{Top 4 Access Sizes} \\\\
\\hline
access size \& count \\\\
\\hline
  $common{$max_access_hash}{CP_ACCESS1_ACCESS} \& $common{$max_access_hash}{CP_ACCESS1_COUNT} \\\\
  $common{$max_access_hash}{CP_ACCESS2_ACCESS} \& $common{$max_access_hash}{CP_ACCESS2_COUNT} \\\\
  $common{$max_access_hash}{CP_ACCESS3_ACCESS} \& $common{$max_access_hash}{CP_ACCESS3_COUNT} \\\\
  $common{$max_access_hash}{CP_ACCESS4_ACCESS} \& $common{$max_access_hash}{CP_ACCESS4_COUNT} \\\\
\\hline
\\end{tabular}
";
close TABLES;

284
open(TABLES, ">$tmp_dir/stride-table.tex") || die("error opening output file:$!\n");
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
print TABLES "
\\begin{tabular}{|r|r|}
\\multicolumn{2}{c}{file: $common{$max_access_hash}{'name'}} \\\\
\\hline
stride size \& count \\\\
\\hline
  $common{$max_access_hash}{CP_STRIDE1_STRIDE} \& $common{$max_access_hash}{CP_STRIDE1_COUNT} \\\\
  $common{$max_access_hash}{CP_STRIDE2_STRIDE} \& $common{$max_access_hash}{CP_STRIDE2_COUNT} \\\\
  $common{$max_access_hash}{CP_STRIDE3_STRIDE} \& $common{$max_access_hash}{CP_STRIDE3_COUNT} \\\\
  $common{$max_access_hash}{CP_STRIDE4_STRIDE} \& $common{$max_access_hash}{CP_STRIDE4_COUNT} \\\\
\\hline
\\end{tabular}
";
close TABLES;


301
open(TIME, ">$tmp_dir/time-summary.dat") || die("error opening output file:$!\n");
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
print TIME "# <type>, <app time>, <read>, <write>, <meta>\n";
print TIME "POSIX, ", ((($runtime * $nprocs - $summary{CP_F_POSIX_READ_TIME} -
    $summary{CP_F_POSIX_WRITE_TIME} -
    $summary{CP_F_POSIX_META_TIME})/($runtime * $nprocs)) * 100);
print TIME ", ", (($summary{CP_F_POSIX_READ_TIME}/($runtime * $nprocs))*100);
print TIME ", ", (($summary{CP_F_POSIX_WRITE_TIME}/($runtime * $nprocs))*100);
print TIME ", ", (($summary{CP_F_POSIX_META_TIME}/($runtime * $nprocs))*100), "\n";
print TIME "MPI-IO, ", ((($runtime * $nprocs - $summary{CP_F_MPI_READ_TIME} -
    $summary{CP_F_MPI_WRITE_TIME} -
    $summary{CP_F_MPI_META_TIME})/($runtime * $nprocs)) * 100);
print TIME ", ", (($summary{CP_F_MPI_READ_TIME}/($runtime * $nprocs))*100);
print TIME ", ", (($summary{CP_F_MPI_WRITE_TIME}/($runtime * $nprocs))*100);
print TIME ", ", (($summary{CP_F_MPI_META_TIME}/($runtime * $nprocs))*100), "\n";
close TIME;

317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
# copy template files to tmp tmp_dir
system "cp $FindBin::Bin/../share/* $tmp_dir/";

if(-x "$FindBin::Bin/gnuplot")
{
    $gnuplot = "$FindBin::Bin/gnuplot";
}
else
{
    $gnuplot = "gnuplot";
}

# move to tmp_dir
chdir $tmp_dir;

332
# execute gnuplot scripts
333
system "$gnuplot counts-eps.gplt";
334
system "epstopdf counts.eps";
335
system "$gnuplot hist-eps.gplt";
336
system "epstopdf hist.eps";
337
system "$gnuplot pattern-eps.gplt";
338
system "epstopdf pattern.eps";
339
system "$gnuplot time-summary-eps.gplt";
340
system "epstopdf time-summary.eps";
341

342 343 344 345
#system "gnuplot align-pdf.gplt";
#system "gnuplot iodist-pdf.gplt";
#system "gnuplot types-pdf.gplt";

346 347 348 349
# generate summary PDF
system "pdflatex -halt-on-error summary.tex > latex.output";
system "pdflatex -halt-on-error summary.tex > latex.output2";

350 351
# move the summary out to final location
system "mv summary.pdf $orig_dir/";