Commit 7fdf26f0 authored by Shane Snyder's avatar Shane Snyder
Browse files

First cut at logutils and posix parser

parent 3d963df9
......@@ -14,6 +14,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <zlib.h>
#ifdef HAVE_LIBBZ2
......@@ -30,15 +31,24 @@ struct darshan_fd_s
int swap_flag;
char version[8];
char* name;
/* TODO: ultimately store indices here */
int mod_count;
int64_t job_off;
int64_t rec_off;
int64_t mod_off;
int64_t end_off;
};
static int darshan_log_seek(darshan_fd fd, off_t offset);
static int darshan_log_read(darshan_fd fd, void *buf, int len);
static int darshan_log_write(darshan_fd fd, void *buf, int len);
//static const char* darshan_log_error(darshan_fd fd, int* errnum);
/* a rather crude API for accessing raw binary darshan files */
/* darshan_log_open()
*
* open a darshan log file for reading/writing
*
* returns 0 on success, -1 on failure
*/
darshan_fd darshan_log_open(const char *name, const char *mode)
{
int o_flags;
......@@ -78,11 +88,20 @@ darshan_fd darshan_log_open(const char *name, const char *mode)
return(tmp_fd);
}
int darshan_log_getheader(darshan_fd file, struct darshan_header *header)
/* darshan_log_getheader()
*
* read the header of the darshan log and set internal data structures
* NOTE: this function must be called before reading other portions of the log
*
* returns 0 on success, -1 on failure
*/
int darshan_log_getheader(darshan_fd fd, struct darshan_header *header)
{
struct stat sbuf;
int64_t ndx_buf[2];
int ret;
ret = darshan_log_seek(file, 0);
ret = darshan_log_seek(fd, 0);
if(ret < 0)
{
fprintf(stderr, "Error: unable to seek in darshan log file.\n");
......@@ -90,46 +109,68 @@ int darshan_log_getheader(darshan_fd file, struct darshan_header *header)
}
/* read header from log file */
ret = darshan_log_read(file, header, sizeof(*header));
ret = darshan_log_read(fd, header, sizeof(*header));
if(ret < sizeof(*header))
{
fprintf(stderr, "Error: invalid darshan log file (failed to read header).\n");
return(-1);
}
/* save the version string -- this can be used to support multiple log versions */
strncpy(file->version, header->version_string, 8);
/* save the version string */
strncpy(fd->version, header->version_string, 8);
if(header->magic_nr == CP_MAGIC_NR)
{
/* no byte swapping needed, this file is in host format already */
file->swap_flag = 0;
return(0);
fd->swap_flag = 0;
}
else
{
/* try byte swapping */
DARSHAN_BSWAP64(&header->magic_nr);
if(header->magic_nr == CP_MAGIC_NR)
{
fd->swap_flag = 1;
}
else
{
/* otherwise this file is just broken */
fprintf(stderr, "Error: bad magic number in darshan log file.\n");
return(-1);
}
}
/* try byte swapping */
DARSHAN_BSWAP64(&header->magic_nr);
if(header->magic_nr == CP_MAGIC_NR)
/* read index map from log file */
ret = darshan_log_read(fd, ndx_buf, (2*sizeof(int64_t)));
if(ret < (2 * sizeof(int64_t)))
{
file->swap_flag = 1;
return(0);
fprintf(stderr, "Error: invalid darshan log file (failed to read header indexes).\n");
return(-1);
}
/* otherwise this file is just broken */
fprintf(stderr, "Error: bad magic number in darshan log file.\n");
return(-1);
/* fill index info into darshan file descriptor */
fd->job_off = sizeof(struct darshan_header) + (2 * sizeof(int64_t)); /* TODO: */
fd->rec_off = ndx_buf[0];
fd->mod_off = ndx_buf[1];
/* use stat to get log file size -- used to help index the log */
fstat(fd->pf, &sbuf);
fd->end_off = sbuf.st_size;
return(0);
}
/* darshan_log_getjob()
*
* read job level metadata from the darshan log file
*
* returns 0 on success, -1 on failure
*/
int darshan_log_getjob(darshan_fd file, struct darshan_job *job)
int darshan_log_getjob(darshan_fd fd, struct darshan_job *job)
{
int ret;
char buffer[DARSHAN_JOB_METADATA_LEN];
ret = darshan_log_seek(file, sizeof(struct darshan_header));
ret = darshan_log_seek(fd, fd->job_off);
if(ret < 0)
{
fprintf(stderr, "Error: unable to seek in darshan log file.\n");
......@@ -137,14 +178,14 @@ int darshan_log_getjob(darshan_fd file, struct darshan_job *job)
}
/* read the job data from the log file */
ret = darshan_log_read(file, job, sizeof(*job));
ret = darshan_log_read(fd, job, sizeof(*job));
if(ret < sizeof(*job))
{
fprintf(stderr, "Error: invalid darshan log file (failed to read job data).\n");
return(-1);
}
if(file->swap_flag)
if(fd->swap_flag)
{
/* swap bytes if necessary */
DARSHAN_BSWAP64(&job->uid);
......@@ -157,540 +198,117 @@ int darshan_log_getjob(darshan_fd file, struct darshan_job *job)
return(0);
}
int darshan_log_getmap(darshan_fd file, unsigned char **map_buf)
/* darshan_log_getjob()
*
* read job level metadata from the darshan log file
*
* returns 0 on success, -1 on failure
*/
int darshan_log_getmap(darshan_fd fd, struct darshan_record_ref **map)
{
int map_size;
unsigned char *map_buf;
unsigned char *buf_ptr;
darshan_record_id *rec_id_ptr;
uint32_t *path_len_ptr;
char *path_ptr;
struct darshan_record_ref *ref;
int ret;
struct stat sbuf;
int map_buf_size;
ret = darshan_log_seek(file, sizeof(struct darshan_header) + CP_JOB_RECORD_SIZE);
ret = darshan_log_seek(fd, fd->rec_off);
if(ret < 0)
{
fprintf(stderr, "Error: unable to seek in darshan log file.\n");
return(ret);
}
/* TODO: use indices map rather than stat to determine offsets */
/* allocate a buffer to store the (serialized) darshan record map */
/* NOTE: caller's responsibility to free this allocated map buffer */
fstat(file->pf, &sbuf);
map_buf_size = sbuf.st_size - (sizeof(struct darshan_header) + CP_JOB_RECORD_SIZE);
*map_buf = malloc(map_buf_size);
if(!(*map_buf))
map_size = fd->mod_off - fd->rec_off;
map_buf = malloc(map_size);
if(!map_buf)
return(-1);
/* read the record map from the log file */
ret = darshan_log_read(file, *map_buf, map_buf_size);
if(ret < map_buf_size)
ret = darshan_log_read(fd, map_buf, map_size);
if(ret < map_size)
{
fprintf(stderr, "Error: invalid darshan log file (failed to read record map).\n");
free(map_buf);
return(-1);
}
if(file->swap_flag)
buf_ptr = map_buf;
while(buf_ptr < (map_buf + map_size))
{
/* we need to sort out endianness issues before passing back the serialized buffer */
/* get pointers for each field of this darshan record */
/* NOTE: darshan record map serialization method:
* ... darshan_record_id | (uint32_t) path_len | path ...
*/
unsigned char *buf_ptr = *map_buf;
darshan_record_id *rec_id_ptr;
uint32_t *path_len_ptr;
while(buf_ptr < (*map_buf + map_buf_size))
rec_id_ptr = (darshan_record_id *)buf_ptr;
buf_ptr += sizeof(darshan_record_id);
path_len_ptr = (uint32_t *)buf_ptr;
buf_ptr += sizeof(uint32_t);
path_ptr = (char *)buf_ptr;
buf_ptr += *path_len_ptr;
ref = malloc(sizeof(*ref));
if(!ref)
{
rec_id_ptr = (darshan_record_id *)buf_ptr;
buf_ptr += sizeof(darshan_record_id);
path_len_ptr = (uint32_t *)buf_ptr;
buf_ptr += sizeof(uint32_t);
buf_ptr += *path_len_ptr;
DARSHAN_BSWAP64(rec_id_ptr);
DARSHAN_BSWAP32(path_len_ptr);
free(map_buf);
return(-1);
}
}
return(0);
}
/* TODO: implement */
/* TODO: could this could be used in darshan-runtime? do we refactor so we aren't maintaining in 2 spots? */
int darshan_log_build_map(unsigned char *map_buf, int map_buf_size, some_struct *rec_hash)
{
unsigned char *buf_ptr;
return(0);
}
/* TODO: implement */
/* TODO: could this could be used in darshan-runtime? do we refactor so we aren't maintaining in 2 spots? */
int darshan_log_destroy_map()
{
return(0);
}
#if 0
/* darshan_log_getfile()
*
* return 1 if file record found, 0 on eof, and -1 on error
*/
int darshan_log_getfile(darshan_fd fd, struct darshan_job *job, struct darshan_file *file)
{
int ret;
ret = getfile_internal(fd, job, file);
return(ret);
}
int darshan_log_getexe(darshan_fd fd, char *buf)
{
int ret;
char* newline;
ret = darshan_log_seek(fd, fd->job_struct_size);
if(ret < 0)
return(ret);
ret = darshan_log_read(fd, buf, (fd->COMPAT_CP_EXE_LEN + 1));
if (ret < (fd->COMPAT_CP_EXE_LEN + 1))
{
perror("darshan_log_read");
return(-1);
}
/* this call is only supposed to return the exe string, but starting in
* log format 1.23 there could be a table of mount entry information
* after the exe. Look for newline character and truncate there.
*/
newline = strchr(buf, '\n');
if(newline)
*newline = '\0';
return (0);
}
#endif
void darshan_log_close(darshan_fd file)
{
if(file->pf)
close(file->pf);
free(file->name);
free(file);
}
#if 0
/* darshan_log_print_version_warnings()
*
* Print summary of any problems with the detected log format
*/
void darshan_log_print_version_warnings(struct darshan_job *job)
{
if(strcmp(job->version_string, "2.05") == 0)
{
/* current version */
return;
}
if(strcmp(job->version_string, "2.04") == 0)
{
printf("# WARNING: version 2.04 log format has the following limitations:\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
if(strcmp(job->version_string, "2.03") == 0)
{
/* no meaningful change to interpretation of log file, 2.03 just
* increased the header space available for annotations.
*/
printf("# WARNING: version 2.03 log format has the following limitations:\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
if(strcmp(job->version_string, "2.02") == 0)
{
printf("# WARNING: version 2.01 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
if(strcmp(job->version_string, "2.01") == 0)
{
printf("# WARNING: version 2.01 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
if(strcmp(job->version_string, "2.00") == 0)
{
printf("# WARNING: version 2.00 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
if(strcmp(job->version_string, "1.24") == 0)
{
printf("# WARNING: version 1.24 log format does not support the following parameters:\n");
printf("# CP_FASTEST_RANK\n");
printf("# CP_FASTEST_RANK_BYTES\n");
printf("# CP_SLOWEST_RANK\n");
printf("# CP_SLOWEST_RANK_BYTES\n");
printf("# CP_F_FASTEST_RANK_TIME\n");
printf("# CP_F_SLOWEST_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_BYTES\n");
printf("# WARNING: version 1.24 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
if(strcmp(job->version_string, "1.23") == 0)
{
printf("# WARNING: version 1.23 log format does not support the following parameters:\n");
printf("# CP_FASTEST_RANK\n");
printf("# CP_FASTEST_RANK_BYTES\n");
printf("# CP_SLOWEST_RANK\n");
printf("# CP_SLOWEST_RANK_BYTES\n");
printf("# CP_F_FASTEST_RANK_TIME\n");
printf("# CP_F_SLOWEST_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_BYTES\n");
printf("# WARNING: version 1.23 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - may have incorrect mount point mappings for files with rank > 0.\n");
printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
if(strcmp(job->version_string, "1.22") == 0)
{
printf("# WARNING: version 1.22 log format does not support the following parameters:\n");
printf("# CP_DEVICE\n");
printf("# CP_SIZE_AT_OPEN\n");
printf("# CP_FASTEST_RANK\n");
printf("# CP_FASTEST_RANK_BYTES\n");
printf("# CP_SLOWEST_RANK\n");
printf("# CP_SLOWEST_RANK_BYTES\n");
printf("# CP_F_FASTEST_RANK_TIME\n");
printf("# CP_F_SLOWEST_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_BYTES\n");
printf("# WARNING: version 1.22 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - does not record mounted file systems, mount points, or fs types.\n");
printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n");
printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
if(strcmp(job->version_string, "1.21") == 0)
{
printf("# WARNING: version 1.21 log format does not support the following parameters:\n");
printf("# CP_INDEP_NC_OPENS\n");
printf("# CP_COLL_NC_OPENS\n");
printf("# CP_HDF5_OPENS\n");
printf("# CP_MAX_READ_TIME_SIZE\n");
printf("# CP_MAX_WRITE_TIME_SIZE\n");
printf("# CP_DEVICE\n");
printf("# CP_SIZE_AT_OPEN\n");
printf("# CP_F_MAX_READ_TIME\n");
printf("# CP_F_MAX_WRITE_TIME\n");
printf("# CP_FASTEST_RANK\n");
printf("# CP_FASTEST_RANK_BYTES\n");
printf("# CP_SLOWEST_RANK\n");
printf("# CP_SLOWEST_RANK_BYTES\n");
printf("# CP_F_FASTEST_RANK_TIME\n");
printf("# CP_F_SLOWEST_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_TIME\n");
printf("# CP_F_VARIANCE_RANK_BYTES\n");
printf("# WARNING: version 1.21 log format has the following limitations:\n");
printf("# - *_TIMESTAMP fields are not normalized relative to MPI_Init() time.\n");
printf("# - does not record mounted file systems, mount points, or fs types.\n");
printf("# - attributes syncs to cumulative metadata time, rather than cumulative write time.\n");
printf("# - does not store the job id in the file.\n");
printf("# - inaccurate statistics in some multi-threaded cases.\n");
printf("# - CP_F_SLOWEST_RANK_TIME and CP_F_FASTEST_RANK_TIME only report elapsed time at the POSIX level.\n");
return;
}
fprintf(stderr, "Error: version %s not supported by parser.\n",
job->version_string);
return;
}
/* shift_missing_1_21()
*
* translates indices to account for counters that weren't present in log
* format 1.21
*/
/*******************************
* version 1.21 to 2.00 differences
* - added:
* - CP_INDEP_NC_OPENS
* - CP_COLL_NC_OPENS
* - CP_HDF5_OPENS
* - CP_MAX_READ_TIME_SIZE
* - CP_MAX_WRITE_TIME_SIZE
* - CP_DEVICE
* - CP_SIZE_AT_OPEN
* - CP_FASTEST_RANK
* - CP_FASTEST_RANK_BYTES
* - CP_SLOWEST_RANK
* - CP_SLOWEST_RANK_BYTES
* - CP_F_MAX_READ_TIME
* - CP_F_MAX_WRITE_TIME
* - CP_F_FASTEST_RANK_TIME
* - CP_F_SLOWEST_RANK_TIME
* - CP_F_VARIANCE_RANK_TIME
* - CP_F_VARIANCE_RANK_BYTES
* - changed params:
* - CP_FILE_RECORD_SIZE: 1184 to 1328
* - CP_NUM_INDICES: 133 to 144
* - CP_F_NUM_INDICES: 12 to 18
*/
static void shift_missing_1_21(struct darshan_file* file)
{
int c_index = 0;
int missing_counters[] = {
CP_INDEP_NC_OPENS,
CP_COLL_NC_OPENS,
CP_HDF5_OPENS,
CP_MAX_READ_TIME_SIZE,
CP_MAX_WRITE_TIME_SIZE,
CP_DEVICE,
CP_SIZE_AT_OPEN,
CP_FASTEST_RANK,
CP_FASTEST_RANK_BYTES,
CP_SLOWEST_RANK,
CP_SLOWEST_RANK_BYTES,
-1};
int missing_f_counters[] = {
CP_F_MAX_READ_TIME,
CP_F_MAX_WRITE_TIME,
CP_F_FASTEST_RANK_TIME,
CP_F_SLOWEST_RANK_TIME,
CP_F_VARIANCE_RANK_TIME,
CP_F_VARIANCE_RANK_BYTES,
-1};
c_index = 0;
while(missing_counters[c_index] != -1)
{
int missing_counter = missing_counters[c_index];
c_index++;
if(missing_counter < (CP_NUM_INDICES - 1))
ref->rec.name = malloc(*path_len_ptr + 1);
if(!ref->rec.name)
{
/* shift down */
memmove(&file->counters[missing_counter+1],
&file->counters[missing_counter],
(CP_NUM_INDICES-missing_counter-1)*sizeof(int64_t));
free(map_buf);
free(ref);
return(-1);
}
/* zero out missing counter */
file->counters[missing_counter] = 0;
}
c_index = 0;
while(missing_f_counters[c_index] != -1)
{
int missing_counter = missing_f_counters[c_index];
c_index++;
if(missing_counter < (CP_F_NUM_INDICES - 1))
if(fd->swap_flag)
{
/* shift down */
memmove(&file->fcounters[missing_counter+1],
&file->fcounters[missing_counter],
(CP_F_NUM_INDICES-missing_counter-1)*sizeof(double));
/* we need to sort out endianness issues before deserializing */
DARSHAN_BSWAP64(rec_id_ptr);
DARSHAN_BSWAP32(path_len_ptr);
}
/* zero out missing counter */
file->fcounters[missing_counter] = 0;
}
return;
}
/* set the fields for this record */
ref->rec.id = *rec_id_ptr;
memcpy(ref->rec.name, path_ptr, *path_len_ptr);
ref->rec.name[*path_len_ptr] = '\0';
/* shift_missing_1_22()
*
* translates indices to account for counters that weren't present in log
* format 1.22
*/
/*******************************
* version 1.22 to 2.00 differences
*
* - added:
* - CP_DEVICE
* - CP_SIZE_AT_OPEN
* - CP_FASTEST_RANK
* - CP_FASTEST_RANK_BYTES
* - CP_SLOWEST_RANK
* - CP_SLOWEST_RANK_BYTES
* - CP_F_FASTEST_RANK_TIME
* - CP_F_SLOWEST_RANK_TIME
* - CP_F_VARIANCE_RANK_TIME
* - CP_F_VARIANCE_RANK_BYTES
* - changed params:
* - CP_FILE_RECORD_SIZE: 1240 to 1328
* - CP_NUM_INDICES: 138 to 144
* - CP_F_NUM_INDICES: 14 to 18
*/
static void shift_missing_1_22(struct darshan_file* file)
{
int c_index = 0;
int missing_counters[] = {
CP_DEVICE,
CP_SIZE_AT_OPEN,
CP_FASTEST_RANK,
CP_FASTEST_RANK_BYTES,
CP_SLOWEST_RANK,
CP_SLOWEST_RANK_BYTES,
-1};
int missing_f_counters[] = {
CP_F_FASTEST_RANK_TIME,
CP_F_SLOWEST_RANK_TIME,
CP_F_VARIANCE_RANK_TIME,
CP_F_VARIANCE_RANK_BYTES,
-1};
c_index = 0;
while(missing_counters[c_index] != -1)
{
int missing_counter = missing_counters[c_index];
c_index++;
if(missing_counter < (CP_NUM_INDICES - 1))
{
/* shift down */
memmove(&file->counters[missing_counter+1],
&file->counters[missing_counter],
(CP_NUM_INDICES-missing_counter-1)*sizeof(int64_t));
}