Commit c180b5b3 authored by Rob Latham's avatar Rob Latham

record distribution of keys to servers

this requires a hacked up version of mdhim until there's a real api for
getting this information.
parent 46a4fa29
......@@ -19,6 +19,8 @@
X(MDHIM_GETS) \
/* largest get */ \
X(MDHIM_GET_MAX_SIZE) \
/* how many servers? */ \
X(MDHIM_SERVERS) \
/* end of counters */ \
X(MDHIM_NUM_INDICES)
......@@ -59,6 +61,11 @@ struct darshan_mdhim_record
struct darshan_base_record base_rec;
int64_t counters[MDHIM_NUM_INDICES];
double fcounters[MDHIM_F_NUM_INDICES];
/* when we allocate this struct, we'll do so with enough extra memory to
* hold N servers. Compare to approach taken with darshan_lustre_record */
int32_t server_histogram[1];
};
/* '-1' because d_m_r already allocated with space for one */
#define MDHIM_RECORD_SIZE(servers) (sizeof(struct darshan_mdhim_record) + sizeof(int32_t) * ((servers) - 1) )
#endif /* __DARSHAN_MDHIM_LOG_FORMAT_H */
......@@ -34,6 +34,8 @@ DARSHAN_FORWARD_DECL(mdhimPut, struct mdhim_brm_t *, (mdhim_t *md,
DARSHAN_FORWARD_DECL(mdhimGet, struct mdhim_bgetrm_t *, (mdhim_t *md,
struct index_t *index, void *key, int key_len, int op));
DARSHAN_FORWARD_DECL(mdhimInit, int, (mdhim_t *md, mdhim_options_t *opts));
/* The mdhim_record_ref structure maintains necessary runtime metadata
* for the MDHIM module record (darshan_mdhim_record structure, defined in
* darshan-mdhim-log-format.h) pointed to by 'record_p'. This metadata
......@@ -81,7 +83,7 @@ struct mdhim_runtime
static void mdhim_runtime_initialize(
void);
static struct mdhim_record_ref *mdhim_track_new_record(
darshan_record_id rec_id, const char *name);
darshan_record_id rec_id, int nr_servers, const char *name);
static void mdhim_cleanup_runtime(
void);
......@@ -123,7 +125,6 @@ static int my_rank = -1;
if(mdhim_runtime) break; \
} \
MDHIM_UNLOCK(); \
return(ret); \
} while(0)
/* the MDHIM_POST_RECORD macro is executed after performing MDHIM
......@@ -134,7 +135,7 @@ static int my_rank = -1;
} while(0)
/* macro for instrumenting the "MDHIM" module's put function */
#define MDHIM_RECORD_PUT(__ret, __md, __vallen, __tm1, __tm2) do{ \
#define MDHIM_RECORD_PUT(__ret, __md, __id, __vallen, __tm1, __tm2) do{ \
darshan_record_id rec_id; \
struct mdhim_record_ref *rec_ref; \
double __elapsed = __tm2 - __tm1; \
......@@ -145,9 +146,7 @@ static int my_rank = -1;
rec_id = darshan_core_gen_record_id(RECORD_STRING); \
/* look up a record reference for this record id using darshan rec_ref interface */ \
rec_ref = darshan_lookup_record_ref(mdhim_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
/* if no reference was found, track a new one for this record */ \
if(!rec_ref) rec_ref = mdhim_track_new_record(rec_id, RECORD_STRING); \
/* if we still don't have a valid reference, back out */ \
/* if no reference was found, that's odd: was init not called? */ \
if(!rec_ref) break; \
/* increment counter indicating number of calls to 'put' */ \
rec_ref->record_p->counters[MDHIM_PUTS] += 1; \
......@@ -160,10 +159,12 @@ static int my_rank = -1;
if(rec_ref->record_p->fcounters[MDHIM_F_PUT_TIMESTAMP] == 0 || \
rec_ref->record_p->fcounters[MDHIM_F_PUT_TIMESTAMP] > __tm1) \
rec_ref->record_p->fcounters[MDHIM_F_PUT_TIMESTAMP] = __tm1; \
/* record which server gets this request */ \
rec_ref->record_p->server_histogram[(__id)]++; \
} while(0)
/* macro for instrumenting the "MDHIM" module's get function */
#define MDHIM_RECORD_GET(__ret, __md, __keylen, __tm1, __tm2) do{ \
#define MDHIM_RECORD_GET(__ret, __md, __id, __keylen, __tm1, __tm2) do{ \
darshan_record_id rec_id; \
struct mdhim_record_ref *rec_ref; \
double __elapsed = __tm2 - __tm1; \
......@@ -174,9 +175,7 @@ static int my_rank = -1;
rec_id = darshan_core_gen_record_id(RECORD_STRING); \
/* look up a record reference for this record id using darshan rec_ref interface */ \
rec_ref = darshan_lookup_record_ref(mdhim_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
/* if no reference was found, track a new one for this record */ \
if(!rec_ref) rec_ref = mdhim_track_new_record(rec_id, RECORD_STRING); \
/* if we still don't have a valid reference, back out */ \
/* if no reference was found, we're in trouble */ \
if(!rec_ref) break; \
/* increment counter indicating number of calls to 'get' */ \
rec_ref->record_p->counters[MDHIM_GETS] += 1; \
......@@ -189,6 +188,8 @@ static int my_rank = -1;
if(rec_ref->record_p->fcounters[MDHIM_F_GET_TIMESTAMP] == 0 || \
rec_ref->record_p->fcounters[MDHIM_F_GET_TIMESTAMP] > __tm1) \
rec_ref->record_p->fcounters[MDHIM_F_GET_TIMESTAMP] = __tm1; \
/* server distribution */ \
rec_ref->record_p->server_histogram[(__id)]++; \
} while(0)
/**********************************************************
......@@ -199,6 +200,40 @@ static int my_rank = -1;
* names, depending on whether the Darshan library is statically or
* dynamically linked.
*/
int DARSHAN_DECL(mdhimInit)(mdhim_t *md, mdhim_options_t *opts)
{
/* not counting or tracking anything in this routine, but grabbing a
* bit of information about the mdhim instance */
int ret;
darshan_record_id rec_id;
struct mdhim_record_ref *rec_ref;
int nr_servers;
MPI_Comm_size(opts->comm, &nr_servers);
MDHIM_PRE_RECORD();
/* posix uses '__name' to generate a unique Darshan record id
but mdhim doesn't use string names for its keyval store. Assumes
one MDHIM instance */
rec_id = darshan_core_gen_record_id(RECORD_STRING);
/* look up a record reference for this record id using darshan
* rec_ref interface */
rec_ref = darshan_lookup_record_ref(mdhim_runtime->rec_id_hash,
&rec_id, sizeof(darshan_record_id));
/* if no reference was found, track a new one for this record */
if(!rec_ref) rec_ref = mdhim_track_new_record(rec_id,
nr_servers, RECORD_STRING);
/* if we still don't have a valid reference, well that's too dang bad */
if (rec_ref) rec_ref->record_p->counters[MDHIM_SERVERS] = nr_servers;
MDHIM_POST_RECORD();
MAP_OR_FAIL(mdhimInit);
ret = __real_mdhimInit(md, opts);
return ret;
}
struct mdhim_brm_t *DARSHAN_DECL(mdhimPut)(mdhim_t *md,
void *key, int key_len,
void *value, int value_len,
......@@ -223,11 +258,14 @@ struct mdhim_brm_t *DARSHAN_DECL(mdhimPut)(mdhim_t *md,
secondary_global_info, secondary_global_info);
tm2 = darshan_core_wtime();
int server_id = mdhimWhichServer(md, key, key_len);
MDHIM_PRE_RECORD();
/* Call macro for instrumenting data for mdhimPut function calls. */
/* TODO: call the mdhim hash routines and instrument which servers
* get this request */
MDHIM_RECORD_PUT(ret, md, value_len, tm1, tm2);
MDHIM_RECORD_PUT(ret, md, server_id, value_len, tm1, tm2);
MDHIM_POST_RECORD();
return(ret);
......@@ -248,9 +286,11 @@ struct mdhim_bgetrm_t * DARSHAN_DECL(mdhimGet)(mdhim_t *md,
ret = __real_mdhimGet(md, index, key, key_len, op);
tm2 = darshan_core_wtime();
int server_id = mdhimWhichServer(md, key, key_len);
MDHIM_PRE_RECORD();
/* Call macro for instrumenting data for get function calls. */
MDHIM_RECORD_GET(ret, md, key_len, tm1, tm2);
MDHIM_RECORD_GET(ret, md, server_id, key_len, tm1, tm2);
MDHIM_POST_RECORD();
return ret;
}
......@@ -298,11 +338,12 @@ static void mdhim_runtime_initialize()
/* allocate and track a new MDHIM module record */
static struct mdhim_record_ref *mdhim_track_new_record(
darshan_record_id rec_id, const char *name)
darshan_record_id rec_id, int nr_servers, const char *name)
{
struct darshan_mdhim_record *record_p = NULL;
struct mdhim_record_ref *rec_ref = NULL;
int ret;
size_t rec_size;
rec_ref = calloc(1, sizeof(*rec_ref));
if(!rec_ref)
......@@ -319,6 +360,7 @@ static struct mdhim_record_ref *mdhim_track_new_record(
return(NULL);
}
rec_size = MDHIM_RECORD_SIZE(nr_servers);
/* register the actual file record with darshan-core so it is persisted
* in the log file
*/
......@@ -326,7 +368,7 @@ static struct mdhim_record_ref *mdhim_track_new_record(
rec_id,
name,
DARSHAN_MDHIM_MOD,
sizeof(struct darshan_mdhim_record),
rec_size,
NULL);
if(!record_p)
......
......@@ -63,6 +63,7 @@ static int darshan_log_get_mdhim_record(darshan_fd fd, void** mdhim_buf_p)
{
struct darshan_mdhim_record *rec =
*((struct darshan_mdhim_record **)mdhim_buf_p);
struct darshan_mdhim_record tmp_rec;
int i;
int ret;
......@@ -76,37 +77,63 @@ static int darshan_log_get_mdhim_record(darshan_fd fd, void** mdhim_buf_p)
return(-1);
}
/* read a MDHIM module record from the darshan log file */
ret = darshan_log_get_mod(fd, DARSHAN_MDHIM_MOD, rec,
/* read the fixed-sized portion of the bdMDHIM module record from the
* darshan log file */
ret = darshan_log_get_mod(fd, DARSHAN_MDHIM_MOD, &tmp_rec,
sizeof(struct darshan_mdhim_record));
if (ret < 0)
return (-1);
else if (ret < sizeof(struct darshan_mdhim_record))
return (0);
/* swap bytes if necessary */
if (fd->swap_flag)
{
/* reader-makes-right: don't look at a field until it has
* been swapped */
DARSHAN_BSWAP64(&tmp_rec.base_rec.id);
DARSHAN_BSWAP64(&tmp_rec.base_rec.rank);
for (i=0; i< MDHIM_NUM_INDICES; i++)
DARSHAN_BSWAP64(&tmp_rec.counters[i]);
for (i=0; i< MDHIM_F_NUM_INDICES; i++)
DARSHAN_BSWAP64(&tmp_rec.fcounters[i]);
for (i=0; i< tmp_rec.counters[MDHIM_SERVERS]; i++)
DARSHAN_BSWAP32(&tmp_rec.server_histogram[i]);
}
if(*mdhim_buf_p == NULL)
{
if(ret == sizeof(struct darshan_mdhim_record))
*mdhim_buf_p = rec;
rec = malloc(MDHIM_RECORD_SIZE(tmp_rec.counters[MDHIM_SERVERS]));
if (!rec)
return (-1);
}
memcpy(rec, &tmp_rec, sizeof(struct darshan_mdhim_record));
if (rec->counters[MDHIM_SERVERS] > 1) {
ret = darshan_log_get_mod(fd, DARSHAN_MDHIM_MOD,
&(rec->server_histogram[1]),
(rec->counters[MDHIM_SERVERS] - 1)*sizeof(int32_t));
if (ret < (rec->counters[MDHIM_SERVERS] -1)*sizeof(int32_t))
ret = -1;
else
free(rec);
{
ret = 1;
if (fd->swap_flag)
for(i=1; i< rec->counters[MDHIM_SERVERS]; i++)
DARSHAN_BSWAP32(&(rec->server_histogram[i]));
}
}
if(ret < 0)
return(-1);
else if(ret < sizeof(struct darshan_mdhim_record))
return(0);
else
{
/* if the read was successful, do any necessary byte-swapping */
if(fd->swap_flag)
{
DARSHAN_BSWAP64(&(rec->base_rec.id));
DARSHAN_BSWAP64(&(rec->base_rec.rank));
for(i=0; i<MDHIM_NUM_INDICES; i++)
DARSHAN_BSWAP64(&rec->counters[i]);
for(i=0; i<MDHIM_F_NUM_INDICES; i++)
DARSHAN_BSWAP64(&rec->fcounters[i]);
}
return(1);
ret = 1;
}
if (*mdhim_buf_p == NULL)
{
if (ret == 1)
*mdhim_buf_p = rec;
else
free(rec);
}
return (ret);
}
/* write the MDHIM record stored in 'mdhim_buf' to log file descriptor 'fd'.
......@@ -152,7 +179,17 @@ static void darshan_log_print_mdhim_record(void *file_rec, char *file_name,
mdhim_f_counter_names[i], mdhim_rec->fcounters[i],
file_name, mnt_pt, fs_type);
}
for (i=0; i< mdhim_rec->counters[MDHIM_SERVERS]; i++)
{
char strbuf[25];
snprintf(strbuf, 25, "MDHIM_SERVER_%d", i);
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_MDHIM_MOD],
mdhim_rec->base_rec.rank,
mdhim_rec->base_rec.id,
strbuf,
(int64_t)mdhim_rec->server_histogram[i],
file_name, mnt_pt, fs_type);
}
return;
}
......@@ -162,8 +199,10 @@ static void darshan_log_print_mdhim_description(int ver)
printf("\n# description of MDHIM counters:\n");
printf("# MDHIM_PUTS: number of 'mdhim_put' function calls.\n");
printf("# MDHIM_GETS: number of 'mdhim_get' function calls.\n");
printf("# MDHIM_SERVERS: how many mdhim servers \n");
printf("# MDHIM_F_PUT_TIMESTAMP: timestamp of the first call to function 'mdhim_put'.\n");
printf("# MDHIM_F_GET_TIMESTAMP: timestamp of the first call to function 'mdhim_get'.\n");
printf("# MDHIM_SERVER_N: how many operations sent to this server\n");
return;
}
......@@ -237,7 +276,53 @@ static void darshan_log_print_mdhim_record_diff(void *file_rec1, char *file_name
file2->fcounters[i], file_name2, "", "");
}
}
i=0;
while (1)
{
char strbuf[25];
snprintf(strbuf, 25, "MDHIM_SERVER_%d", i);
if (!file2 || (i >= file2->counters[MDHIM_SERVERS]))
{
printf("- ");
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_MDHIM_MOD],
file1->base_rec.rank,
file1->base_rec.id,
strbuf,
(int64_t)file1->server_histogram[i],
file_name1, "", "");
}
else if (!file1 || (i >= file1->counters[MDHIM_SERVERS]))
{
printf("+ ");
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_MDHIM_MOD],
file2->base_rec.rank,
file2->base_rec.id,
strbuf,
(int64_t)file2->server_histogram[i],
file_name2, "", "");
}
else if (file1->server_histogram[i] != file2->server_histogram[i])
{
printf("- ");
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_MDHIM_MOD],
file1->base_rec.rank,
file1->base_rec.id,
strbuf,
(int64_t)file1->server_histogram[i],
file_name1, "", "");
printf("+ ");
DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_MDHIM_MOD],
file2->base_rec.rank,
file2->base_rec.id,
strbuf,
(int64_t)file2->server_histogram[i],
file_name2, "", "");
}
i++;
if ( (!file1 || i >= file1->counters[MDHIM_SERVERS]) &&
(!file2 || i >= file2->counters[MDHIM_SERVERS] ) )
break;
}
return;
}
......@@ -260,6 +345,10 @@ static void darshan_log_agg_mdhim_records(void *rec, void *agg_rec, int init_fla
/* sum */
agg_mdhim_rec->counters[i] += mdhim_rec->counters[i];
break;
case MDHIM_SERVERS:
/* all clients should have the same value for this, hence
* assignment instead of aggregating */
agg_mdhim_rec->counters[i] = mdhim_rec->counters[i];
default:
/* if we don't know how to aggregate this counter, just set to -1 */
agg_mdhim_rec->counters[i] = -1;
......@@ -296,6 +385,10 @@ static void darshan_log_agg_mdhim_records(void *rec, void *agg_rec, int init_fla
break;
}
}
for (i=0; i< mdhim_rec->counters[MDHIM_SERVERS]; i++)
{
agg_mdhim_rec->server_histogram[i] += mdhim_rec->server_histogram[i];
}
return;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment