Commit 4264247f authored by Glenn K. Lockwood's avatar Glenn K. Lockwood

fixed a bug preventing shared file reduction from working correctly

parent fad7e816
...@@ -7,10 +7,14 @@ ...@@ -7,10 +7,14 @@
#ifndef __DARSHAN_LUSTRE_LOG_FORMAT_H #ifndef __DARSHAN_LUSTRE_LOG_FORMAT_H
#define __DARSHAN_LUSTRE_LOG_FORMAT_H #define __DARSHAN_LUSTRE_LOG_FORMAT_H
/* NOTE -- redefining the size of OST_ID will require changing the DARSHAN_BSWAP
* macro used in darshan-util/darshan-lustre-logutils.c as well
*/
typedef int64_t OST_ID;
/* current Lustre log format version */ /* current Lustre log format version */
#define DARSHAN_LUSTRE_VER 1 #define DARSHAN_LUSTRE_VER 1
/* TODO: add integer counters here (e.g., counter for stripe width, stripe size, etc etc) */
#define LUSTRE_COUNTERS \ #define LUSTRE_COUNTERS \
/* number of OSTs for file system */\ /* number of OSTs for file system */\
X(LUSTRE_OSTS) \ X(LUSTRE_OSTS) \
...@@ -44,7 +48,12 @@ struct darshan_lustre_record ...@@ -44,7 +48,12 @@ struct darshan_lustre_record
darshan_record_id rec_id; darshan_record_id rec_id;
int64_t rank; int64_t rank;
int64_t counters[LUSTRE_NUM_INDICES]; int64_t counters[LUSTRE_NUM_INDICES];
int64_t ost_ids[1]; OST_ID ost_ids[1];
}; };
/*
* helper function to calculate the size of a record
*/
#define LUSTRE_RECORD_SIZE( osts ) ( sizeof(struct darshan_lustre_record) + sizeof(OST_ID) * (osts - 1) )
#endif /* __DARSHAN_LUSTRE_LOG_FORMAT_H */ #endif /* __DARSHAN_LUSTRE_LOG_FORMAT_H */
...@@ -44,7 +44,6 @@ static void lustre_record_reduction_op(void* infile_v, void* inoutfile_v, ...@@ -44,7 +44,6 @@ static void lustre_record_reduction_op(void* infile_v, void* inoutfile_v,
#define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex) #define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex)
#define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex) #define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex)
#define LUSTRE_RECORD_SIZE( osts ) ( sizeof(struct darshan_lustre_record) + sizeof(int64_t) * (osts - 1) )
void darshan_instrument_lustre_file(const char* filepath, int fd) void darshan_instrument_lustre_file(const char* filepath, int fd)
{ {
...@@ -263,9 +262,17 @@ static void lustre_get_output_data( ...@@ -263,9 +262,17 @@ static void lustre_get_output_data(
*/ */
sort_lustre_records(); sort_lustre_records();
/* allocate memory for the reduction output on rank 0 */ /* simply drop all shared records from non-root ranks by truncating
* the record array and recalculating the size of the used buffer
*/
if (my_rank != 0) if (my_rank != 0)
{
lustre_runtime->record_count -= shared_rec_count; lustre_runtime->record_count -= shared_rec_count;
lustre_runtime->record_buffer_used = 0;
for ( i = 0; i < lustre_runtime->record_count; i++ )
lustre_runtime->record_buffer_used +=
LUSTRE_RECORD_SIZE( (lustre_runtime->record_runtime_array[i]).record->counters[LUSTRE_STRIPE_WIDTH] );
}
} }
*lustre_buf = (void *)(lustre_runtime->record_buffer); *lustre_buf = (void *)(lustre_runtime->record_buffer);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment