Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
darshan
darshan
Commits
e4a20730
Commit
e4a20730
authored
Feb 15, 2016
by
Shane Snyder
Browse files
forgot to add 3 files to darshan-test/2.x
parent
919fe4a9
Changes
3
Show whitespace changes
Inline
Side-by-side
darshan-test/2.x/fsstats-merge.pl
0 → 100755
View file @
e4a20730
#!/usr/bin/perl -w
#
# (C) 2010 by Argonne National Laboratory.
#
# Portions of this code including histogram package and routines for
# printing human readable sizes and percentages taken from fsstats 1.4.5
# Copyright (c) 2005 Panasas, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
use
Cwd
;
use
Getopt::
Long
;
use
English
;
# files to process
my
@csv_files
=
();
process_args
();
my
$special_files
=
0
;
my
$skipped_hlink
=
0
;
my
$skipped_snapshot
=
0
;
my
$total_cap_used
=
0
;
my
$total_size
=
0
;
my
$slink_relative
=
0
;
my
$slink_absolute
=
0
;
my
$slink_relative_pct
=
0
;
my
$slink_absolute_pct
=
0
;
my
(
$size_histo
,
$cap_histo
,
$pos_ovhd_histo
,
$neg_ovhd_histo
,
$dir_histo
,
$dirkb_histo
,
$fname_histo
,
$slink_histo
,
$hlink_histo
,
$mtime_files_histo
,
$mtime_bytes_histo
,
$ctime_files_histo
,
$ctime_bytes_histo
,
$atime_files_histo
,
$atime_bytes_histo
);
# initialize new histogram data structures (copied from fsstats program)
$size_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
$cap_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
$pos_ovhd_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
$neg_ovhd_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
$dir_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
);
$dirkb_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
#Be careful in choosing the value of 'max' for histos.
#If the log_incr is set for them, the largest value+1 in the last bucket will be a power of 2.
#If it is not set the largest value+1 in the last bucket will be some multiple of the 'max' value.
#To be able to toggle log_incr off and on without having to change anything else 'max' should be
#chosen carefully. Otherwise results may look wrong.
#Hence if the max value chosen is 'n', then n+1 should be a power of 2. And n+1 should also be a
#multiple of the 'incr' value.
#Dont play with min values.
# $fname_histo = Histo->new(min => 0, max => 120, incr => 8);
$fname_histo
=
Histo
->
new
(
min
=>
0
,
max
=>
127
,
incr
=>
8
);
# $slink_histo = Histo->new(min => 0, max => 120, incr => 8);
$slink_histo
=
Histo
->
new
(
min
=>
0
,
max
=>
127
,
incr
=>
8
);
$hlink_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
);
$mtime_files_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
$mtime_bytes_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
$ctime_files_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
$ctime_bytes_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
$atime_files_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
$atime_bytes_histo
=
Histo
->
new
(
min
=>
0
,
incr
=>
1
,
log_incr
=>
1
,
integer_vals
=>
0
);
# loop through specified csv files
foreach
my
$file
(
@csv_files
)
{
open
(
CSV
,
"
$file
")
||
die
("
Error opening file
$file
: $!
\n
");
add_summary
(
CSV
);
add_histogram
(
CSV
,
$size_histo
,
"
file size
");
add_histogram
(
CSV
,
$cap_histo
,
"
capacity used
");
add_histogram
(
CSV
,
$pos_ovhd_histo
,
"
positive overhead
");
add_histogram
(
CSV
,
$neg_ovhd_histo
,
"
negative overhead
");
add_histogram
(
CSV
,
$dir_histo
,
"
directory size (entries)
");
add_histogram
(
CSV
,
$dirkb_histo
,
"
directory size
");
add_histogram
(
CSV
,
$fname_histo
,
"
filename length
");
add_histogram
(
CSV
,
$hlink_histo
,
"
link count
");
add_histogram
(
CSV
,
$slink_histo
,
"
symlink target length
");
add_histogram
(
CSV
,
$mtime_files_histo
,
"
mtime (files)
");
add_histogram
(
CSV
,
$mtime_bytes_histo
,
"
mtime (KB)
");
add_histogram
(
CSV
,
$ctime_files_histo
,
"
ctime (files)
");
add_histogram
(
CSV
,
$ctime_bytes_histo
,
"
ctime (KB)
");
add_histogram
(
CSV
,
$atime_files_histo
,
"
atime (files)
");
add_histogram
(
CSV
,
$atime_bytes_histo
,
"
atime (KB)
");
close
(
CSV
);
}
print_output
();
exit
0
;
sub
print_output
{
open
(
$fh
,
"
>-
")
||
die
("
Error opening output.
\n
");
printf
(
$fh
"
#Generated by fsstats-merge.pl (fsstats v1.4.5)
\n
");
printf
(
$fh
"
#Comment: This is a comment line that can be modified or repeated before
\n
");
printf
(
$fh
"
#uploading to record voluntarily added information.
\n\n
");
printf
(
$fh
"
skipped special files,%d
\n
",
$special_files
);
printf
(
$fh
"
skipped duplicate hardlinks,%d
\n
",
$skipped_hlink
);
printf
(
$fh
"
skipped snapshot dirs,%d
\n
",
$skipped_snapshot
);
printf
(
$fh
"
total capacity used,%s
\n
",
kb_to_print
(
$total_cap_used
));
printf
(
$fh
"
total user data,%s
\n
",
kb_to_print
(
$total_size
));
printf
(
$fh
"
percent overhead,%f
\n
",
ovhd_pct
(
$total_size
,
$total_cap_used
)
/
100
);
printf
(
$fh
"
\n
");
$size_histo
->
print_csv
(
$fh
,
"
file size
",
"
KB
");
$cap_histo
->
print_csv
(
$fh
,
"
capacity used
",
"
KB
");
$pos_ovhd_histo
->
print_csv
(
$fh
,
"
positive overhead
",
"
KB
");
$neg_ovhd_histo
->
print_csv
(
$fh
,
"
negative overhead
",
"
KB
");
$dir_histo
->
print_csv
(
$fh
,
"
directory size (entries)
",
"
ents
");
$dirkb_histo
->
print_csv
(
$fh
,
"
directory size
",
"
KB
");
$fname_histo
->
print_csv
(
$fh
,
"
filename length
",
"
chars
");
$hlink_histo
->
print_csv
(
$fh
,
"
link count
",
"
links
");
$slink_histo
->
print_csv
(
$fh
,
"
symlink target length
",
"
chars
");
printf
(
$fh
"
relative symlink target pct,%f
\n
"
.
"
absolute symlink target pct,%f
\n
",
$slink_relative
?
$slink_relative
/
$slink_histo
->
{
count
}
:
0
,
$slink_absolute
?
$slink_absolute
/
$slink_histo
->
{
count
}
:
0
);
$mtime_files_histo
->
print_csv
(
$fh
,
"
mtime (files)
",
"
days
");
$mtime_bytes_histo
->
print_csv
(
$fh
,
"
mtime (KB)
",
"
days
");
$ctime_files_histo
->
print_csv
(
$fh
,
"
ctime (files)
",
"
days
");
$ctime_bytes_histo
->
print_csv
(
$fh
,
"
ctime (KB)
",
"
days
");
$atime_files_histo
->
print_csv
(
$fh
,
"
atime (files)
",
"
days
");
$atime_bytes_histo
->
print_csv
(
$fh
,
"
atime (KB)
",
"
days
");
}
sub
process_args
{
use
vars
qw( $opt_help )
;
Getopt::Long::
Configure
("
no_ignore_case
",
"
bundling
");
GetOptions
(
"
help
"
);
if
(
$opt_help
)
{
print_help
();
exit
(
0
);
}
# there should be at least two remaining arguments (file names)
if
(
$#ARGV
<
1
)
{
print
"
Error: invalid arguments.
\n
";
print_help
();
exit
(
1
);
}
@csv_files
=
@ARGV
;
return
;
}
sub
print_help
{
print
<<EOF;
Usage: $PROGRAM_NAME file file [file ...]
--help Prints this help message
Purpose:
This script reads multiple csv files generated by fsstats 1.4.5 and
merges the statistics into a single csv file which is printed to stdout.
EOF
return
;
}
# add_summary()
# adds summary fields from the specified file handle
#
sub
add_summary
{
my
(
$file
)
=
@_
;
my
$line
=
"";
my
$version
=
"";
seek
(
$file
,
0
,
0
);
while
(
$line
=
<
$file
>
)
{
if
(
$line
=~
/Generated by.*fsstats v([\d\.]+)/
)
{
$version
=
$
1
;
}
if
(
$line
=~
/^skipped special files,(.*)/
)
{
$special_files
+=
$
1
;}
if
(
$line
=~
/^skipped duplicate hardlinks,(.*)/
)
{
$skipped_hlink
+=
$
1
;}
if
(
$line
=~
/^skipped snapshot dirs,(.*)/
)
{
$skipped_snapshot
+=
$
1
;}
if
(
$line
=~
/^total capacity used,(.*)/
)
{
$total_cap_used
+=
print_to_kb
(
$
1
);}
if
(
$line
=~
/^total user data,(.*)/
)
{
$total_size
+=
print_to_kb
(
$
1
);}
# NOTE: deliberately don't accumulate slink percentage; we have to do
# a calculation later so that they are weighted properly. See
# special case in add_histogram() subroutine.
if
(
$line
=~
/^relative symlink target pct,(.*)/
)
{
$slink_relative_pct
=
$
1
;}
if
(
$line
=~
/^absolute symlink target pct,(.*)/
)
{
$slink_absolute_pct
=
$
1
;}
}
if
(
$version
ne
"
1.4.5
")
{
die
("
Error: csv file not generated by fsstats v1.4.5.
\n
");
}
seek
(
$file
,
0
,
0
);
}
# add_histogram()
#
# finds the specified histogram in a csv file and sums it to the existing
# histogram data structure
sub
add_histogram
{
my
$file
=
$_
[
0
];
# open file handle
# $_[1] is the data structure we are adding to
my
$name
=
quotemeta
(
$_
[
2
]);
# name of the histogram
# save some global values; we need to replace/adjust those manually
# after adding in the histogram because we no longer know the actual
# value of each data point
my
$old_count
=
$_
[
1
]
->
{
count
};
my
$old_total_val
=
$_
[
1
]
->
{
total_val
};
my
$old_min_val
=
$_
[
1
]
->
{
min_val
};
my
$old_max_val
=
$_
[
1
]
->
{
max_val
};
my
$count
=
0
;
my
$average
=
0
;
my
$min
=
0
;
my
$max
=
0
;
my
$total_count
=
0
;
seek
(
$file
,
0
,
0
);
while
(
$line
=
<
$file
>
)
{
# look for specified histogram
if
(
$line
=~
/^histogram,$name$/
)
{
while
(
$line
=
<
$file
>
)
{
if
(
$line
=~
/^count,(.*),/
)
{
$count
=
$
1
;}
elsif
(
$line
=~
/^average,(.*),/
)
{
$average
=
$
1
;}
elsif
(
$line
=~
/^min,(.*),/
)
{
$min
=
$
1
;}
elsif
(
$line
=~
/^max,(.*),/
)
{
$max
=
$
1
;}
elsif
(
$line
=~
/^bucket min,bucket max,/
)
{}
# key
elsif
(
$line
=~
/^([0-9]*\.?[0-9]*),([0-9]*\.?\-?[0-9]*),([0-9]*\.?[0-9]*),([0-9]*\.?[0-9]*),([0-9]*\.?[0-9]*),([0-9]*\.?[0-9]*),([0-9]*\.?[0-9]*),([0-9]*\.?[0-9]*)/
)
{
if
(
$
3
>
0
)
{
$_
[
1
]
->
add
((
$
6
/
$
3
),
$
3
);
$total_count
+=
$
3
;
}
}
elsif
(
$line
=~
/^\s*$/
)
{
# stop when we hit a blank line
last
;
}
else
{
print
$line
;
die
("
Error: poorly formated csv file.
\n
");
}
}
last
;
}
}
# work backwards to relative and absolute slink counters
if
(
$name
eq
quotemeta
("
symlink target length
"))
{
$slink_relative
+=
$slink_relative_pct
*
$total_count
;
$slink_absolute
+=
$slink_absolute_pct
*
$total_count
;
}
# fix min and max value
if
(
defined
$old_min_val
&&
$old_min_val
<
$min
)
{
$_
[
1
]
->
{
min_val
}
=
$old_min_val
;}
else
{
$_
[
1
]
->
{
min_val
}
=
$min
;}
if
(
defined
$old_max_val
&&
$old_max_val
<
$max
)
{
$_
[
1
]
->
{
max_val
}
=
$old_max_val
;}
else
{
$_
[
1
]
->
{
max_val
}
=
$max
;}
seek
(
$file
,
0
,
0
);
}
# convert a printable value to KB
sub
print_to_kb
{
my
(
$arg
)
=
@_
;
my
(
$value
,
$unit
)
=
split
(
/ /
,
$arg
);
my
$num
=
0
;
if
(
$unit
eq
"
TB
")
{
$num
=
$value
*
(
1024
*
1024
*
1024
);
}
elsif
(
$unit
eq
"
GB
")
{
$num
=
$value
*
(
1024
*
1024
);
}
elsif
(
$unit
eq
"
MB
")
{
$num
=
$value
*
(
1024
);
}
elsif
(
$unit
eq
"
KB
")
{
$num
=
$value
;
}
return
$num
;
}
# The routines in this section were taken directly from fsstats 1.4.5 by
# Marc Unangst <munangst@panasas.com> and Shobhit Dayal
# <sdayal@andrew.cmu.edu>
#######################################################################
# Compute the percent overhead for a given capacity-used and size.
# This method of computing overhead computes "percentage of the
# capacity used that is overhead" and ranges from 0% (no overhead) to
# 100% (size==0 and cap>0, space is all overhead).
sub
ovhd_pct
{
my
(
$size
,
$cap
)
=
@_
;
if
(
$cap
==
0
)
{
return
0
;
}
return
((
$cap
-
$size
)
/
$cap
)
*
100
;
}
# convert a KB value to a "printable" value (GB, MB, or KB) depending
# on its magnitude. returns a string suitable for printing.
sub
kb_to_print
{
my
(
$kb
)
=
@_
;
my
$num
;
my
$unit
;
if
(
$kb
>
1024
*
1024
*
1024
)
{
$num
=
$kb
/
(
1024
*
1024
*
1024
);
$unit
=
"
TB
";
}
elsif
(
$kb
>
1024
*
1024
)
{
$num
=
$kb
/
(
1024
*
1024
);
$unit
=
"
GB
";
}
elsif
(
$kb
>
1024
)
{
$num
=
$kb
/
1024
;
$unit
=
"
MB
";
}
else
{
$num
=
$kb
;
$unit
=
"
KB
";
}
return
sprintf
("
%.2f %s
",
$num
,
$unit
);
}
##### Histo.pm #####
#
# Histo.pm
#
# Histogram module for Perl.
#
# Author: Marc Unangst <munangst@panasas.com>
#
# Copyright (c) 2005 Panasas, Inc. All rights reserved.
#
use
strict
;
package
Histo
;
#
# Constructor for a new Histo object. The arguments are a hash
# of parameter/value pairs. The "min" and "incr" parameters
# must be supplied. "max" and "log_incr" are optional.
#
sub
new
{
my
$type
=
shift
;
my
%params
=
@_
;
my
$self
=
{};
die
"
Histo->new: required parameters not set
\n
"
unless
(
defined
$params
{
min
}
&&
defined
$params
{
incr
});
$self
->
{
min
}
=
$params
{
min
};
# $self->{max} = $params{max}-1 if defined $params{max};
$self
->
{
max
}
=
$params
{
max
}
if
defined
$params
{
max
};
$self
->
{
incr
}
=
$params
{
incr
};
if
(
defined
$params
{
integer_vals
})
{
$self
->
{
integer_vals
}
=
$params
{
integer_vals
};
}
else
{
$self
->
{
integer_vals
}
=
1
;
}
$self
->
{
count
}
=
0
;
$self
->
{
total_val
}
=
0
;
if
(
$params
{
log_incr
})
{
$self
->
{
log_incr
}
=
$params
{
log_incr
};
$self
->
{
bucket_max
}
=
[
$self
->
{
min
}
+
$self
->
{
log_incr
}];
}
else
{
$self
->
{
log_incr
}
=
0
;
}
$self
->
{
buckets
}
=
[]
;
$self
->
{
buckets_val
}
=
[]
;
bless
$self
,
$type
;
}
#
# Add a new data point to the histogram.
#
# @arg $val
# Value to add to the histogram
# @arg $count
# Optional; if specified, the weight of the item being added.
# Calling add($x, 3) is the same as calling add($x) three times.
#
sub
add
($$;$) {
my
$self
=
shift
;
my
(
$val
,
$count
)
=
@_
;
if
(
!
defined
$count
)
{
$count
=
1
;
}
if
(
!
defined
$self
->
{
min_val
}
||
$val
<
$self
->
{
min_val
})
{
$self
->
{
min_val
}
=
$val
;
}
if
(
!
defined
$self
->
{
max_val
}
||
$val
>
$self
->
{
max_val
})
{
$self
->
{
max_val
}
=
$val
;
}
# if(int($val) != $val) {
# $self->{integer_vals} = 0;
# }
$self
->
{
count
}
+=
$count
;
$self
->
{
total_val
}
+=
(
$val*$count
);
#$self->{total_val} += $val;
if
(
defined
$self
->
{
max
}
&&
$val
>
$self
->
{
max
})
{
$self
->
{
over_max
}
+=
$count
;
$self
->
{
over_max_buckets_val
}
+=
$val*$count
;
}
elsif
(
$val
<
$self
->
{
min
})
{
$self
->
{
under_min
}
+=
$count
;
$self
->
{
under_min_buckets_val
}
+=
$val*$count
;
}
else
{
my
$b
;
my
$val_to_use
=
$val
;
# NOTE: not applicable in the fsstats-merge.pl script
# if($self == $pos_ovhd_histo || $self == $neg_ovhd_histo) {
# $val_to_use = $size;
# }
if
(
$self
->
{
log_incr
})
{
$b
=
0
;
my
$x
=
$self
->
{
bucket_max
}[
0
];
while
(
$val_to_use
>=
$x
+
1
)
{
$x
=
$x
*
2
+
1
;
$b
++
;
if
(
$b
>
$#
{
$self
->
{
bucket_max
}})
{
$self
->
{
bucket_max
}[
$b
]
=
$x
;
}
}
}
else
{
$b
=
int
((
$val_to_use
-
$self
->
{
min
})
/
$self
->
{
incr
});
}
#print STDERR "sample $val into bucket $b\n";
$self
->
{
buckets
}[
$b
]
+=
$count
;
$self
->
{
buckets_val
}[
$b
]
+=
$val*$count
;
if
(
!
defined
$self
->
{
largest_bucket
}
||
$self
->
{
buckets
}[
$b
]
>
$self
->
{
largest_bucket
})
{
$self
->
{
largest_bucket
}
=
$self
->
{
buckets
}[
$b
];
}
}
}
#
# Get maximum value of the specified bucket.
#
# @arg $b
# bucket number
#
# @internal
#
sub
_get_bucket_max
($$)
{
my
$self
=
shift
;
my
(
$b
)
=
@_
;
# my $epsilon; dont need this
# if($self->{integer_vals}) {
# $epsilon = 1;
# $epsilon = 0;
# }
# else {
# $epsilon = 0.1;
# }
if
(
$self
->
{
log_incr
})
{
if
(
$b
<=
$#
{
$self
->
{
bucket_max
}})
{
# return ($self->{bucket_max}[$b]-$epsilon);
return
(
$self
->
{
bucket_max
}[
$b
]);
}
else
{
return
undef
;
}
}
else
{
#return ($self->{incr}*($b+1))-$epsilon;
return
((
$self
->
{
incr
}
*
(
$b
+
1
))
-
1
);
}
}
#
# Get minimum value of the specified bucket.
#
# @arg $b
# bucket number
#
# @internal
#
sub
_get_bucket_min
($$)
{
my
$self
=
shift
;
my
(
$b
)
=
@_
;
if
(
$self
->
{
log_incr
})
{
if
(
$b
==
0
)
{
return
$self
->
{
min
};
}
elsif
(
$b
<=
$#
{
$self
->
{
bucket_max
}})
{
# return $self->{bucket_max}[$b-1]
return
$self
->
{
bucket_max
}[
$b
-
1
]
+
1
;
}
else
{
return
undef
;
}
}
else
{
return
(
$self
->
{
min
}
+
$self
->
{
incr
}
*
(
$b
));
}
}
#
# Print the histogram contents to STDOUT.
#
# @arg $prefix
# String to prefix each output line with.
# @arg $unit_str
# String that describes the units of the histogram items.
#
sub
print
($$$)
{
my
$self
=
shift
;
my
(
$prefix
,
$unit_str
)
=
@_
;
my
$c
=
0
;
my
$d
=
0
;
# my $prev_pct = 0;
my
$width
;
my
$fmt
;
# if ($self->{integer_vals}) {
$width
=
length
sprintf
("
%d
",
$self
->
_get_bucket_max
(
$#
{
$self
->
{
buckets
}}));
$fmt
=
"
d
";
# }
# else {
# $width = length sprintf("%.1f", $self->_get_bucket_max($#{$self->{buckets}}));
# $fmt = ".1f"
# }
my
$bwidth
=
0
;
if
(
defined
$self
->
{
largest_bucket
})
{
$bwidth
=
length
sprintf
("
%d
",
$self
->
{
largest_bucket
});
}
if
(
$bwidth
<
5
)
{
$bwidth
=
5
;
}
my
$bwidth_val
=
length
sprintf
("
%.2f
",
$self
->
{
total_val
});
printf
("
%scount=%d avg=%.2f %s
\n
",
$prefix
,
$self
->
{
count
},