From 4264247fd91b3d9a6350f12565cb78751c37d053 Mon Sep 17 00:00:00 2001 From: "Glenn K. Lockwood" Date: Wed, 15 Jun 2016 13:42:28 -0700 Subject: [PATCH] fixed a bug preventing shared file reduction from working correctly --- darshan-lustre-log-format.h | 13 +++++++++++-- darshan-runtime/lib/darshan-lustre.c | 11 +++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/darshan-lustre-log-format.h b/darshan-lustre-log-format.h index 866b65a..3c3f2cf 100644 --- a/darshan-lustre-log-format.h +++ b/darshan-lustre-log-format.h @@ -7,10 +7,14 @@ #ifndef __DARSHAN_LUSTRE_LOG_FORMAT_H #define __DARSHAN_LUSTRE_LOG_FORMAT_H +/* NOTE -- redefining the size of OST_ID will require changing the DARSHAN_BSWAP + * macro used in darshan-util/darshan-lustre-logutils.c as well + */ +typedef int64_t OST_ID; + /* current Lustre log format version */ #define DARSHAN_LUSTRE_VER 1 -/* TODO: add integer counters here (e.g., counter for stripe width, stripe size, etc etc) */ #define LUSTRE_COUNTERS \ /* number of OSTs for file system */\ X(LUSTRE_OSTS) \ @@ -44,7 +48,12 @@ struct darshan_lustre_record darshan_record_id rec_id; int64_t rank; int64_t counters[LUSTRE_NUM_INDICES]; - int64_t ost_ids[1]; + OST_ID ost_ids[1]; }; +/* + * helper function to calculate the size of a record + */ +#define LUSTRE_RECORD_SIZE( osts ) ( sizeof(struct darshan_lustre_record) + sizeof(OST_ID) * (osts - 1) ) + #endif /* __DARSHAN_LUSTRE_LOG_FORMAT_H */ diff --git a/darshan-runtime/lib/darshan-lustre.c b/darshan-runtime/lib/darshan-lustre.c index 59c3d51..aed5e6a 100644 --- a/darshan-runtime/lib/darshan-lustre.c +++ b/darshan-runtime/lib/darshan-lustre.c @@ -44,7 +44,6 @@ static void lustre_record_reduction_op(void* infile_v, void* inoutfile_v, #define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex) #define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex) -#define LUSTRE_RECORD_SIZE( osts ) ( sizeof(struct darshan_lustre_record) + sizeof(int64_t) * (osts - 1) ) void darshan_instrument_lustre_file(const char* filepath, int fd) { @@ -263,9 +262,17 @@ static void lustre_get_output_data( */ sort_lustre_records(); - /* allocate memory for the reduction output on rank 0 */ + /* simply drop all shared records from non-root ranks by truncating + * the record array and recalculating the size of the used buffer + */ if (my_rank != 0) + { lustre_runtime->record_count -= shared_rec_count; + lustre_runtime->record_buffer_used = 0; + for ( i = 0; i < lustre_runtime->record_count; i++ ) + lustre_runtime->record_buffer_used += + LUSTRE_RECORD_SIZE( (lustre_runtime->record_runtime_array[i]).record->counters[LUSTRE_STRIPE_WIDTH] ); + } } *lustre_buf = (void *)(lustre_runtime->record_buffer); -- 2.26.2