Commit 6e004935 authored by Philip Carns's avatar Philip Carns

add wtime_offset field to job header to detect when MPI_Wtime() is not

relative to MPI_Init() or gettimeofday(), see #129


git-svn-id: https://svn.mcs.anl.gov/repos/darshan/trunk@831 3b7491f3-a168-0410-bf4b-c445ed680a29
parent cc2cc4c1
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#endif #endif
/* update this on file format changes */ /* update this on file format changes */
#define CP_VERSION "2.02" #define CP_VERSION "2.03"
/* magic number for validating output files and checking byte order */ /* magic number for validating output files and checking byte order */
#define CP_MAGIC_NR 6567223 #define CP_MAGIC_NR 6567223
...@@ -243,6 +243,7 @@ struct darshan_job ...@@ -243,6 +243,7 @@ struct darshan_job
int64_t end_time; int64_t end_time;
int64_t nprocs; int64_t nprocs;
int64_t jobid; int64_t jobid;
double wtime_offset; /* offset of MPI_Wtime() from job start */
char metadata[DARSHAN_JOB_METADATA_LEN]; char metadata[DARSHAN_JOB_METADATA_LEN];
}; };
......
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for darshan-runtime 2.2.8. # Generated by GNU Autoconf 2.69 for darshan-runtime 2.2.9-pre1.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
...@@ -577,8 +577,8 @@ MAKEFLAGS= ...@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='darshan-runtime' PACKAGE_NAME='darshan-runtime'
PACKAGE_TARNAME='darshan-runtime' PACKAGE_TARNAME='darshan-runtime'
PACKAGE_VERSION='2.2.8' PACKAGE_VERSION='2.2.9-pre1'
PACKAGE_STRING='darshan-runtime 2.2.8' PACKAGE_STRING='darshan-runtime 2.2.9-pre1'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
...@@ -1243,7 +1243,7 @@ if test "$ac_init_help" = "long"; then ...@@ -1243,7 +1243,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures darshan-runtime 2.2.8 to adapt to many kinds of systems. \`configure' configures darshan-runtime 2.2.9-pre1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
...@@ -1304,7 +1304,7 @@ fi ...@@ -1304,7 +1304,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of darshan-runtime 2.2.8:";; short | recursive ) echo "Configuration of darshan-runtime 2.2.9-pre1:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
...@@ -1408,7 +1408,7 @@ fi ...@@ -1408,7 +1408,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
darshan-runtime configure 2.2.8 darshan-runtime configure 2.2.9-pre1
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
...@@ -1760,7 +1760,7 @@ cat >config.log <<_ACEOF ...@@ -1760,7 +1760,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by darshan-runtime $as_me 2.2.8, which was It was created by darshan-runtime $as_me 2.2.9-pre1, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
...@@ -4336,7 +4336,7 @@ $as_echo "#define __D_MPI_REQUEST MPI_Request" >>confdefs.h ...@@ -4336,7 +4336,7 @@ $as_echo "#define __D_MPI_REQUEST MPI_Request" >>confdefs.h
fi fi
DARSHAN_VERSION="2.2.8" DARSHAN_VERSION="2.2.9-pre1"
...@@ -4854,7 +4854,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ...@@ -4854,7 +4854,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by darshan-runtime $as_me 2.2.8, which was This file was extended by darshan-runtime $as_me 2.2.9-pre1, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
...@@ -4916,7 +4916,7 @@ _ACEOF ...@@ -4916,7 +4916,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
darshan-runtime config.status 2.2.8 darshan-runtime config.status 2.2.9-pre1
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"
......
...@@ -5,7 +5,7 @@ dnl Process this file with autoconf to produce a configure script. ...@@ -5,7 +5,7 @@ dnl Process this file with autoconf to produce a configure script.
dnl You may need to use autoheader as well if changing any DEFINEs dnl You may need to use autoheader as well if changing any DEFINEs
dnl sanity checks, output header, location of scripts used here dnl sanity checks, output header, location of scripts used here
AC_INIT([darshan-runtime], [2.2.8]) AC_INIT([darshan-runtime], [2.2.9-pre1])
AC_CONFIG_SRCDIR([darshan.h]) AC_CONFIG_SRCDIR([darshan.h])
AC_CONFIG_AUX_DIR(../maint/config) AC_CONFIG_AUX_DIR(../maint/config)
AC_CONFIG_HEADER(darshan-runtime-config.h) AC_CONFIG_HEADER(darshan-runtime-config.h)
......
...@@ -1306,6 +1306,7 @@ void darshan_initialize(int argc, char** argv, int nprocs, int rank) ...@@ -1306,6 +1306,7 @@ void darshan_initialize(int argc, char** argv, int nprocs, int rank)
darshan_global_job->log_job.uid = getuid(); darshan_global_job->log_job.uid = getuid();
darshan_global_job->log_job.start_time = time(NULL); darshan_global_job->log_job.start_time = time(NULL);
darshan_global_job->log_job.nprocs = nprocs; darshan_global_job->log_job.nprocs = nprocs;
darshan_global_job->log_job.wtime_offset = posix_wtime();
my_rank = rank; my_rank = rank;
/* record exe and arguments */ /* record exe and arguments */
......
...@@ -51,6 +51,9 @@ my %hash_unique_file_time = (); ...@@ -51,6 +51,9 @@ my %hash_unique_file_time = ();
my $shared_file_time = 0; my $shared_file_time = 0;
my $total_job_bytes = 0; my $total_job_bytes = 0;
my $warn_read_time_overflow = 0;
my $warn_write_time_overflow = 0;
process_args(); process_args();
check_prereqs(); check_prereqs();
...@@ -123,6 +126,9 @@ while ($line = <TRACE>) { ...@@ -123,6 +126,9 @@ while ($line = <TRACE>) {
if ($line =~ /^# jobid: /) { if ($line =~ /^# jobid: /) {
($junk, $jobid) = split(':', $line, 2); ($junk, $jobid) = split(':', $line, 2);
} }
if ($line =~ /^# wtime_offset: /) {
($junk, $wtime_offset) = split(':', $line, 2);
}
if ($line =~ /^# darshan log version: /) { if ($line =~ /^# darshan log version: /) {
($junk, $version) = split(':', $line, 2); ($junk, $version) = split(':', $line, 2);
$version =~ s/^\s+//; $version =~ s/^\s+//;
...@@ -236,12 +242,15 @@ while ($line = <TRACE>) { ...@@ -236,12 +242,15 @@ while ($line = <TRACE>) {
if ($fields[2] eq "CP_F_READ_END_TIMESTAMP" && $fields[3] != 0) { if ($fields[2] eq "CP_F_READ_END_TIMESTAMP" && $fields[3] != 0) {
# assume we got the read start already # assume we got the read start already
my $xdelta = $fields[3] - $last_read_start; my $xdelta = $fields[3] - $last_read_start;
# adjust based on wtime_offset if available
$last_read_start -= $wtime_offset;
# adjust for systems that have absolute time stamps # adjust for systems that have absolute time stamps
if($last_read_start > $starttime) { if($last_read_start > $starttime) {
$last_read_start -= $starttime; $last_read_start -= $starttime;
} }
if($fields[3] > $runtime) if($fields[3] > $runtime && !$warn_read_time_overflow)
{ {
$warn_read_time_overflow = 1;
print "Warning: detected read access at time $fields[3] but runtime is only $runtime seconds.\n"; print "Warning: detected read access at time $fields[3] but runtime is only $runtime seconds.\n";
} }
if($fields[0] == -1){ if($fields[0] == -1){
...@@ -258,12 +267,15 @@ while ($line = <TRACE>) { ...@@ -258,12 +267,15 @@ while ($line = <TRACE>) {
if ($fields[2] eq "CP_F_WRITE_END_TIMESTAMP" && $fields[3] != 0) { if ($fields[2] eq "CP_F_WRITE_END_TIMESTAMP" && $fields[3] != 0) {
# assume we got the write start already # assume we got the write start already
my $xdelta = $fields[3] - $last_write_start; my $xdelta = $fields[3] - $last_write_start;
# adjust based on wtime_offset if available
$last_write_start -= $wtime_offset;
# adjust for systems that have absolute time stamps # adjust for systems that have absolute time stamps
if($last_write_start > $starttime) { if($last_write_start > $starttime) {
$last_write_start -= $starttime; $last_write_start -= $starttime;
} }
if($fields[3] > $runtime) if($fields[3] > $runtime && !$warn_write_time_overflow)
{ {
$warn_write_time_overflow = 1;
print "Warning: detected write access at time $fields[3] but runtime is only $runtime seconds.\n"; print "Warning: detected write access at time $fields[3] but runtime is only $runtime seconds.\n";
} }
if($fields[0] == -1){ if($fields[0] == -1){
......
...@@ -221,6 +221,7 @@ int (*getfile_internal)(darshan_fd fd, ...@@ -221,6 +221,7 @@ int (*getfile_internal)(darshan_fd fd,
#define JOB_SIZE_200 56 #define JOB_SIZE_200 56
/* internal routines for parsing different file versions */ /* internal routines for parsing different file versions */
static int getjob_internal_203(darshan_fd file, struct darshan_job *job);
static int getjob_internal_201(darshan_fd file, struct darshan_job *job); static int getjob_internal_201(darshan_fd file, struct darshan_job *job);
static int getjob_internal_200(darshan_fd file, struct darshan_job *job); static int getjob_internal_200(darshan_fd file, struct darshan_job *job);
static int getfile_internal_200(darshan_fd fd, struct darshan_job *job, static int getfile_internal_200(darshan_fd fd, struct darshan_job *job,
...@@ -369,7 +370,13 @@ int darshan_log_getjob(darshan_fd file, struct darshan_job *job) ...@@ -369,7 +370,13 @@ int darshan_log_getjob(darshan_fd file, struct darshan_job *job)
return(-1); return(-1);
} }
if(strcmp(file->version, "2.02") == 0) if(strcmp(file->version, "2.03") == 0)
{
getjob_internal = getjob_internal_203;
getfile_internal = getfile_internal_200;
file->job_struct_size = sizeof(*job);
}
else if(strcmp(file->version, "2.02") == 0)
{ {
getjob_internal = getjob_internal_201; getjob_internal = getjob_internal_201;
getfile_internal = getfile_internal_200; getfile_internal = getfile_internal_200;
...@@ -729,14 +736,23 @@ void darshan_log_close(darshan_fd file) ...@@ -729,14 +736,23 @@ void darshan_log_close(darshan_fd file)
*/ */
void darshan_log_print_version_warnings(struct darshan_job *job) void darshan_log_print_version_warnings(struct darshan_job *job)
{ {
if(strcmp(job->version_string, "2.02") == 0) if(strcmp(job->version_string, "2.03") == 0)
{ {
/* current version */ /* current version */
return; return;
} }
if(strcmp(job->version_string, "2.02") == 0)
{
printf("# WARNING: version 2.02 log format does not support the following parameters:\n");
printf("# wtime_offset (in job header).\n");
return;
}
if(strcmp(job->version_string, "2.01") == 0) if(strcmp(job->version_string, "2.01") == 0)
{ {
printf("# WARNING: version 2.01 log format does not support the following parameters:\n");
printf("# wtime_offset (in job header).\n");
printf("# WARNING: version 2.01 log format has the following limitations:\n"); printf("# WARNING: version 2.01 log format has the following limitations:\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n"); printf("# - inaccurate statistics in some multi-threaded cases.\n");
return; return;
...@@ -744,6 +760,8 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -744,6 +760,8 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
if(strcmp(job->version_string, "2.00") == 0) if(strcmp(job->version_string, "2.00") == 0)
{ {
printf("# WARNING: version 2.00 log format does not support the following parameters:\n");
printf("# wtime_offset (in job header).\n");
printf("# WARNING: version 2.00 log format has the following limitations:\n"); printf("# WARNING: version 2.00 log format has the following limitations:\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n"); printf("# - inaccurate statistics in some multi-threaded cases.\n");
return; return;
...@@ -760,6 +778,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -760,6 +778,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# CP_F_SLOWEST_RANK_TIME\n"); printf("# CP_F_SLOWEST_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_TIME\n"); printf("# CP_F_VARIANCE_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_BYTES\n"); printf("# CP_F_VARIANCE_RANK_BYTES\n");
printf("# wtime_offset (in job header).\n");
printf("# WARNING: version 1.24 log format has the following limitations:\n"); printf("# WARNING: version 1.24 log format has the following limitations:\n");
printf("# - does not store the job id in the file.\n"); printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n"); printf("# - inaccurate statistics in some multi-threaded cases.\n");
...@@ -777,6 +796,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -777,6 +796,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# CP_F_SLOWEST_RANK_TIME\n"); printf("# CP_F_SLOWEST_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_TIME\n"); printf("# CP_F_VARIANCE_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_BYTES\n"); printf("# CP_F_VARIANCE_RANK_BYTES\n");
printf("# wtime_offset (in job header).\n");
printf("# WARNING: version 1.23 log format has the following limitations:\n"); printf("# WARNING: version 1.23 log format has the following limitations:\n");
printf("# - may have incorrect mount point mappings for files with rank > 0.\n"); printf("# - may have incorrect mount point mappings for files with rank > 0.\n");
printf("# - does not store the job id in the file.\n"); printf("# - does not store the job id in the file.\n");
...@@ -797,6 +817,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -797,6 +817,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# CP_F_SLOWEST_RANK_TIME\n"); printf("# CP_F_SLOWEST_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_TIME\n"); printf("# CP_F_VARIANCE_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_BYTES\n"); printf("# CP_F_VARIANCE_RANK_BYTES\n");
printf("# wtime_offset (in job header).\n");
printf("# WARNING: version 1.22 log format has the following limitations:\n"); printf("# WARNING: version 1.22 log format has the following limitations:\n");
printf("# - does not record mounted file systems, mount points, or fs types.\n"); printf("# - does not record mounted file systems, mount points, or fs types.\n");
printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n"); printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n");
...@@ -825,6 +846,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job) ...@@ -825,6 +846,7 @@ void darshan_log_print_version_warnings(struct darshan_job *job)
printf("# CP_F_SLOWEST_RANK_TIME\n"); printf("# CP_F_SLOWEST_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_TIME\n"); printf("# CP_F_VARIANCE_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_BYTES\n"); printf("# CP_F_VARIANCE_RANK_BYTES\n");
printf("# wtime_offset (in job header).\n");
printf("# WARNING: version 1.21 log format has the following limitations:\n"); printf("# WARNING: version 1.21 log format has the following limitations:\n");
printf("# - does not record mounted file systems, mount points, or fs types.\n"); printf("# - does not record mounted file systems, mount points, or fs types.\n");
printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n"); printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n");
...@@ -1078,7 +1100,7 @@ static void shift_missing_1_24(struct darshan_file* file) ...@@ -1078,7 +1100,7 @@ static void shift_missing_1_24(struct darshan_file* file)
return; return;
} }
static int getjob_internal_201(darshan_fd file, struct darshan_job *job) static int getjob_internal_203(darshan_fd file, struct darshan_job *job)
{ {
int ret; int ret;
...@@ -1100,6 +1122,70 @@ static int getjob_internal_201(darshan_fd file, struct darshan_job *job) ...@@ -1100,6 +1122,70 @@ static int getjob_internal_201(darshan_fd file, struct darshan_job *job)
return(0); return(0);
} }
/* try byte swapping */
DARSHAN_BSWAP64(&job->magic_nr);
if(job->magic_nr == CP_MAGIC_NR)
{
file->swap_flag = 1;
DARSHAN_BSWAP64(&job->uid);
DARSHAN_BSWAP64(&job->start_time);
DARSHAN_BSWAP64(&job->end_time);
DARSHAN_BSWAP64(&job->nprocs);
DARSHAN_BSWAP64(&job->jobid);
DARSHAN_BSWAP64(&job->wtime_offset);
return(0);
}
/* otherwise this file is just broken */
fprintf(stderr, "Error: bad magic number in darshan file.\n");
return(-1);
}
static int getjob_internal_201(darshan_fd file, struct darshan_job *job)
{
int ret;
struct darshan_job_201
{
char version_string[8];
int64_t magic_nr;
int64_t uid;
int64_t start_time;
int64_t end_time;
int64_t nprocs;
int64_t jobid;
char metadata[DARSHAN_JOB_METADATA_LEN];
} job_201;
memset(&job_201, 0, sizeof(job_201));
memset(job, 0, sizeof(*job));
ret = darshan_log_seek(file, 0);
if(ret < 0)
return(ret);
ret = darshan_log_read(file, &job_201, sizeof(job_201));
if (ret < sizeof(job_201))
{
fprintf(stderr, "Error: invalid log file (too short).\n");
return(-1);
}
memcpy(job->version_string, job_201.version_string, 8);
job->magic_nr = job_201.magic_nr;
job->uid = job_201.uid;
job->start_time = job_201.start_time;
job->end_time = job_201.end_time;
job->nprocs = job_201.nprocs;
job->jobid = job_201.jobid;
memcpy(job->metadata, job_201.metadata, DARSHAN_JOB_METADATA_LEN);
if(job->magic_nr == CP_MAGIC_NR)
{
/* no byte swapping needed, this file is in host format already */
file->swap_flag = 0;
return(0);
}
/* try byte swapping */ /* try byte swapping */
DARSHAN_BSWAP64(&job->magic_nr); DARSHAN_BSWAP64(&job->magic_nr);
if(job->magic_nr == CP_MAGIC_NR) if(job->magic_nr == CP_MAGIC_NR)
...@@ -1133,7 +1219,7 @@ static int getjob_internal_200(darshan_fd file, struct darshan_job *job) ...@@ -1133,7 +1219,7 @@ static int getjob_internal_200(darshan_fd file, struct darshan_job *job)
int64_t jobid; int64_t jobid;
} job_200; } job_200;
memset(job, 0, sizeof(job_200)); memset(&job_200, 0, sizeof(job_200));
memset(job, 0, sizeof(*job)); memset(job, 0, sizeof(*job));
ret = darshan_log_seek(file, 0); ret = darshan_log_seek(file, 0);
......
...@@ -261,6 +261,7 @@ int main(int argc, char **argv) ...@@ -261,6 +261,7 @@ int main(int argc, char **argv)
printf("# end_time_asci: %s", ctime(&tmp_time)); printf("# end_time_asci: %s", ctime(&tmp_time));
printf("# nprocs: %" PRId64 "\n", job.nprocs); printf("# nprocs: %" PRId64 "\n", job.nprocs);
printf("# run time: %" PRId64 "\n", job.end_time - job.start_time + 1); printf("# run time: %" PRId64 "\n", job.end_time - job.start_time + 1);
printf("# wtime_offset: %f\n", job.wtime_offset);
for(token=strtok_r(job.metadata, "\n", &save); for(token=strtok_r(job.metadata, "\n", &save);
token != NULL; token != NULL;
token=strtok_r(NULL, "\n", &save)) token=strtok_r(NULL, "\n", &save))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment