Commit 2aa1f3f9 authored by fisaila's avatar fisaila

Added partial tracing functionality.

- actived through DARSHAN_TRACING environment variable
- darshan_trace_log_record write to a process local log
- logs written collectively to a trace file at the end of the run (no reduction) 



git-svn-id: https://svn.mcs.anl.gov/repos/darshan/branches/darshan-florin-extensions-addcounters-datatypes@870 3b7491f3-a168-0410-bf4b-c445ed680a29
parent 364be0ac
......@@ -338,4 +338,19 @@ struct darshan_job
*/
#define CP_FILE_PARTIAL(__file)((((__file)->counters[CP_POSIX_OPENS] || (__file)->counters[CP_POSIX_FOPENS] || (__file)->counters[CP_POSIX_STATS]) ? 0 : 1))
/* Florin
Structure for Darshan tracing functionality
*/
struct darshan_trace_record {
int rank;
int epoch;
int op;
double tm1;
double tm2;
int send_count;
int recv_count;
long long int offset;
};
#endif /* __DARSHAN_LOG_FORMAT_H */
......@@ -226,6 +226,22 @@ enum cp_counter_type
extern struct darshan_job_runtime* darshan_global_job;
extern int epoch_counter;
/* Florin
Trace log structures
*/
#define DARSHAN_TRACER_LOG_SIZE 1024*1024
extern char darshan_log[DARSHAN_TRACER_LOG_SIZE];
//extern char darshan_log[];
extern int darshan_log_ptr;
void darshan_trace_log_record(int rank, int epoch, int op, double tm1, double tm2, int send_count, int recv_count, long long int offset);
void darshan_trace_log_write();
void darshan_initialize(int argc, char** argv, int nprocs, int rank);
void darshan_finalize(struct darshan_job_runtime* job);
void darshan_condense(void);
......
......@@ -140,6 +140,74 @@ extern char* __progname;
} \
} while(0)
int epoch_counter = 0;
char darshan_log[DARSHAN_TRACER_LOG_SIZE];
int darshan_log_ptr = 0;
void darshan_trace_log_record(int rank, int epoch, int op, double tm1, double tm2, int send_count, int recv_count, long long int offset) {
if (getenv("DARSHAN_TRACING")) {
if (darshan_log_ptr + sizeof(struct darshan_trace_record) > DARSHAN_TRACER_LOG_SIZE) {
printf("Out of memory for log recording\n");
return;
}
else {
struct darshan_trace_record* d = (struct darshan_trace_record*) (darshan_log + darshan_log_ptr);
PMPI_Comm_rank(MPI_COMM_WORLD, &(d->rank));
//d->rank = rank;
d->epoch = epoch;
d->op = op;
d->tm1 = tm1;
d->tm2 = tm2;
d->send_count = send_count;
d->recv_count = recv_count;
d->offset = offset;
darshan_log_ptr += sizeof(struct darshan_trace_record);
}
}
}
void darshan_trace_log_write() {
char *filename;
//if ((filename=getenv("DARSHAN_TRACING"))!=NULL) {
if (getenv("DARSHAN_TRACING")){
MPI_Offset offset;
int rank;
MPI_File fh;
MPI_Status status;
struct tm* my_tm;
time_t tm;
filename = (char*) malloc(PATH_MAX);
tm = time(NULL);
my_tm = localtime(&tm);
snprintf(filename, PATH_MAX,
"%s_%d-%d-%d.darshan_trace",
__progname,
(my_tm->tm_mon+1),
my_tm->tm_mday,
(my_tm->tm_hour*60*60 + my_tm->tm_min*60 + my_tm->tm_sec));
DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &rank);
if (rank == 0)
fprintf(stdout, "DARSHAN_TRACEFILE:%s\n", filename);
MPI_Scan(&darshan_log_ptr, &offset, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
DARSHAN_MPI_CALL(PMPI_File_open)(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_EXCL,
MPI_INFO_NULL, &fh);
DARSHAN_MPI_CALL(PMPI_File_write_at_all)(fh, offset - darshan_log_ptr, darshan_log, darshan_log_ptr, MPI_BYTE, &status);
DARSHAN_MPI_CALL(PMPI_File_close)(&fh);
free(filename);
}
}
int count_contiguous_blocks_memory(MPI_Datatype datatype, int count);
int count_contiguous_blocks_file(MPI_File fh, MPI_Offset foff1, MPI_Offset foff2);
MPI_Offset func_1_inf(MPI_File fh, MPI_Offset x, int memtype_size);
......@@ -179,7 +247,6 @@ MPI_Offset func_1(MPI_File fh, MPI_Offset x);
static struct darshan_file_runtime* darshan_file_by_fh(MPI_File fh);
static int epoch_counter = 0;
void printHints(MPI_File fh)
{
......@@ -250,6 +317,7 @@ void CP_RECORD_MPI_WRITE(int __ret, MPI_File __fh, int __count, MPI_Datatype __d
CP_SET(file, CP_MAX_FILE_DTYPE_BLOCKS, file_blocks); //
CP_SET(file, CP_MIN_FILE_DTYPE_EXTENT, foff2 - foff1 + 1); //
CP_SET(file, CP_MIN_FILE_DTYPE_BLOCKS, file_blocks); //
darshan_trace_log_record(-1, epoch_counter,__counter,__tm1,__tm2,__count*size, 0,__voff);
}
......@@ -2903,6 +2971,10 @@ void darshan_shutdown(int timing_flag)
}
}
// Florin: Write the trace log if any
darshan_trace_log_write();
// Moved here from previous darshan_shutdown
CP_LOCK();
if (final_job->trailing_data)
......@@ -3361,13 +3433,17 @@ int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void
if (crt_filename)
file = darshan_file_by_name(crt_filename);
if (file) {
int size = 0;
DARSHAN_MPI_CALL(PMPI_Type_size)(sendtype, &size);
int size_snd, size_rcv;
int rank;
DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &rank);
DARSHAN_MPI_CALL(PMPI_Type_size)(sendtype, &size_snd);
DARSHAN_MPI_CALL(PMPI_Type_size)(sendtype, &size_rcv);
CP_INC(file, CP_MPI_ALLTOALLS, 1);
CP_INC(file, CP_BYTES_MPI_ALLTOALL, size*sendcount);
CP_INC(file, CP_BYTES_MPI_ALLTOALL, size_snd*sendcount);
CP_F_INC(file, CP_F_MPI_ALLTOALL_TIME, (tm2 - tm1));
// if (rank == 0)
// printf("%s %d\n",s,size*sendcount);
darshan_trace_log_record(rank, epoch_counter,CP_MPI_ALLTOALLS,tm1,tm2,size_snd*sendcount, size_rcv*recvcount,-1);
}
CP_UNLOCK();
}
......@@ -3398,19 +3474,26 @@ int MPI_Alltoallv(const void *sendbuf, const int *sendcounts, const int *sdispls
if (crt_filename)
file = darshan_file_by_name(crt_filename);
if (file) {
int size, comm_size, i, counts=0;
DARSHAN_MPI_CALL(PMPI_Type_size)(sendtype, &size);
int size_snd, comm_size, i, count_snd=0, size_rcv, count_rcv=0;
int rank;
DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &rank);
DARSHAN_MPI_CALL(PMPI_Type_size)(sendtype, &size_snd);
DARSHAN_MPI_CALL(PMPI_Type_size)(recvtype, &size_rcv);
DARSHAN_MPI_CALL(PMPI_Comm_size)(comm, &comm_size);
for (i=0; i<comm_size; i++) {
counts+=sendcounts[i];
count_snd+=sendcounts[i];
count_rcv+=recvcounts[i];
// if (sendcounts[i] > 0)
// sprintf(s,"%s %d:%d",s, i,sendcounts[i]*size);
}
// printf("%s\n",s);
CP_INC(file, CP_MPI_ALLTOALLVS, 1);
CP_INC(file, CP_BYTES_MPI_ALLTOALLV, size*counts);
CP_INC(file, CP_BYTES_MPI_ALLTOALLV, size_snd*count_snd);
CP_F_INC(file, CP_F_MPI_ALLTOALLV_TIME, (tm2 - tm1));
darshan_trace_log_record(rank, epoch_counter,CP_MPI_ALLTOALLVS,tm1,tm2,size_snd*count_snd, size_rcv*count_rcv,-1);
}
CP_UNLOCK();
}
......@@ -3475,6 +3558,8 @@ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype da
file = darshan_file_by_name(crt_filename);
if (file) {
int size;
int rank;
DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &rank);
DARSHAN_MPI_CALL(PMPI_Type_size)(datatype, &size);
CP_INC(file, CP_MPI_ALLREDUCES, 1);
CP_INC(file, CP_BYTES_MPI_ALLREDUCE, size*count);
......@@ -3482,6 +3567,7 @@ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype da
/* if (rank == 0)
printf("%s %d\n",s,size*count);
*/
darshan_trace_log_record(rank, epoch_counter,CP_MPI_ALLREDUCES,tm1,tm2,size*count,size*count,-1);
}
CP_UNLOCK();
}
......
......@@ -955,10 +955,10 @@ ssize_t DARSHAN_DECL(write)(int fd, const void *buf, size_t count)
MAP_OR_FAIL(write);
/*
int rank;
DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &rank);
printf("POSIX WRITE rank:%d count:%d\n", rank, count);
//int rank;
//DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &rank);
/* printf("POSIX WRITE rank:%d count:%d\n", rank, count);
*/
if (value = getenv("DARSHAN_POSIX_EPOCH"))
darshan_start_epoch();
......@@ -971,6 +971,14 @@ ssize_t DARSHAN_DECL(write)(int fd, const void *buf, size_t count)
tm2 = darshan_wtime();
CP_LOCK();
CP_RECORD_WRITE(ret, fd, count, 0, 0, aligned_flag, 0, tm1, tm2);
if (darshan_global_job) {
int rank;
long long int off=0;
PMPI_Comm_rank(MPI_COMM_WORLD, &rank);
off = lseek64(fd,0,1);
darshan_trace_log_record(rank, epoch_counter, CP_POSIX_WRITES, tm1, tm2, count, 0, off);
}
CP_UNLOCK();
if (value = getenv("DARSHAN_POSIX_EPOCH"))
......
all: darshan-parser darshan-convert darshan-diff darshan-analyzer darshan-log-params darshan-util-lib
all: darshan-parser-trace darshan-convert darshan-diff darshan-analyzer darshan-log-params darshan-util-lib
DESTDIR =
srcdir = @srcdir@
......@@ -38,6 +38,9 @@ uthash-1.9.2:
darshan-parser: darshan-parser.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o | uthash-1.9.2
gcc $(CFLAGS) $< darshan-logutils.o -o $@ $(LIBS)
darshan-parser-trace: darshan-parser-trace.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o | uthash-1.9.2
gcc $(CFLAGS) $< darshan-logutils.o -o $@ $(LIBS)
darshan-convert: darshan-convert.c $(DARSHAN_LOG_FORMAT) darshan-logutils.h darshan-logutils.o lookup3.o
gcc $(CFLAGS) $< darshan-logutils.o lookup3.o -o $@ $(LIBS)
......@@ -74,6 +77,7 @@ install:: all
install -d $(libdir)
install -d $(includedir)
install -m 755 darshan-parser $(bindir)
install -m 755 darshan-parser-trace $(bindir)
install -m 755 darshan-convert $(bindir)
install -m 755 darshan-diff $(bindir)
install -m 755 darshan-analyzer $(bindir)
......
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <darshan-logutils.h>
/*
struct darshan_trace_record {
int rank;
int epoch;
int op;
double tm1;
double tm2;
int send_count;
int recv_count;
long long int offset;
};
*/
void read_log(char *filename, char* csvfile) {
int fd;
ssize_t bytes_read;
FILE *fout;
struct darshan_trace_record d;
if ((fd=open(filename,O_RDONLY))<0){
perror("Open Failed");
return;
}
if (csvfile){
if ((fout=fopen(csvfile,"w"))==NULL){
perror("Open Failed");
return;
}
}
else
fout = stdout;
fprintf(fout, "rank,epoch,counter,start_time,end_time,write_count,read_count,offset\n");
while ((bytes_read = read(fd,&d,sizeof(struct darshan_trace_record)))>0) {
fprintf(fout, "%d,%d,%s,%.6f,%.6f,%d,%d,%lld\n",
d.rank, d.epoch, darshan_names[d.op],d.tm1,d.tm2,d.send_count, d.recv_count,d.offset);
}
close(fd);
if (csvfile)
fclose(fout);
}
int main(int argc, char **argv)
{
if (argc != 2) {
printf("Call %s darshan_trace_file_name", argv[0]);
exit(1);
}
read_log(argv[1], NULL);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment