diff --git a/darshan-util/Makefile.in b/darshan-util/Makefile.in index 6db3e22ce9178a57e0e9610c32e16271c8246607..a8b4d52f1e200f9f0fa551f41aedd8d9476b9cd7 100644 --- a/darshan-util/Makefile.in +++ b/darshan-util/Makefile.in @@ -144,6 +144,7 @@ install:: all install -m 755 darshan-dxt-parser $(bindir) install -m 755 darshan-merge $(bindir) install -m 755 $(srcdir)/darshan-summary-per-file.sh $(bindir) + install -m 755 dxt_analyzer.py $(bindir) install -m 755 libdarshan-util.a $(libdir) ifeq ($(DARSHAN_ENABLE_SHARED),1) install -m 755 libdarshan-util.so $(libdir) diff --git a/darshan-util/doc/dxt_analyzer.txt b/darshan-util/doc/dxt_analyzer.txt new file mode 100644 index 0000000000000000000000000000000000000000..b0608333347205e765c385ad51029b4fdb3c3a38 --- /dev/null +++ b/darshan-util/doc/dxt_analyzer.txt @@ -0,0 +1,47 @@ +# Email questions to SDMSUPPORT@LBL.GOV +# Scientific Data Management Research Group +# Lawrence Berkeley National Laboratory +# +# last update on Mon Aug 7 08:47:28 PDT 2017 + + +To plot the read or write activity from Darshan Extended Trace (DXT) logs. + +% ./dxt_analyzer.py --help +usage: dxt_analyzer.py [-h] -i DXT_LOGNAME [-o SAVEFIG] [--show] [--read] + [--filemode] [-f FNAME] + +io activity plot from dxt log + +optional arguments: + -h, --help show this help message and exit + -i DXT_LOGNAME, --input DXT_LOGNAME + dxt log path + -o SAVEFIG, --save SAVEFIG + output file name for the plot + --show Show the plot rather than saving to a PDF + --read READ I/O action to be plotted. + Default is False for WRITE mode. + --filemode Single file mode (must be used with --fname). + Default is False for all files + -f FNAME, --fname FNAME + name of file to be plotted (must use with --filemode) + +Example runs: +% python dxt_analyzer.py -i darshan_dxt-a.txt + +% python dxt_analyzer.py -i darshan_dxt-a.txt \ + --filemode -f /global/cscratch1/sd/asim/amrex/a24/plt00000.hdf5 + +% python dxt_analyzer.py -i darshan_dxt-d.txt \ + -o dxt-d.pdf + +% python dxt_analyzer.py -i darshan_dxt-df.txt + +% python dxt_analyzer.py -i darshan_dxt-c.txt + +% python dxt_analyzer.py -i darshan_dxt-v.txt + + +For more information on creating DXT logs, see: +http://www.mcs.anl.gov/research/projects/darshan/docs/darshan3-util.html#_darshan_dxt_parser diff --git a/darshan-util/dxt_analyzer.py b/darshan-util/dxt_analyzer.py new file mode 100755 index 0000000000000000000000000000000000000000..e520ab7c10649fa145be76cd262d55ec46dcef2d --- /dev/null +++ b/darshan-util/dxt_analyzer.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python2 + +### +# +# *** Copyright Notice *** +# +# dxt_analyzer.py +# Copyright (c) 2017, The Regents of the University of California, +# through Lawrence Berkeley National Laboratory (subject to receipt +# of any required approvals from the U.S. Dept. of Energy). +# All rights reserved. +# +# If you have questions about your rights to use or distribute this software, +# please contact Berkeley Lab's Innovation & Partnerships Office +# at IPO@lbl.gov. +# +# NOTICE. This software was developed under funding from the +# U.S. Department of Energy. As such, the U.S. Government has been granted +# for itself and others acting on its behalf a paid-up, nonexclusive, +# irrevocable, worldwide license in the Software to reproduce, prepare +# derivative works, and perform publicly and display publicly. Beginning +# five (5) years after the date permission to assert copyright is obtained +# from the U.S. Department of Energy, and subject to any subsequent five (5) +# year renewals, the U.S. Government is granted for itself and others acting +# on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in +# the Software to reproduce, prepare derivative works, distribute copies to +# the public, perform publicly and display publicly, and to permit others to +# do so. +# +# Email questions to SDMSUPPORT@LBL.GOV +# Scientific Data Management Research Group +# Lawrence Berkeley National Laboratory +# +# last update on Tue Aug 8 08:16:49 PDT 2017 +# + + +''' +dxt_analyzer.py +To plot the read or write activity from Darshan Extended Trace (DXT) logs. + +For more information on creating DXT logs, see: +http://www.mcs.anl.gov/research/projects/darshan/docs/darshan3-util.html#_darshan_dxt_parser + +% ./io_activitity_dxt.py --help +usage: dxt_analyzer.py [-h] -i DXT_LOGNAME [-o SAVEFIG] [--show] [--read] + [--filemode] [-f FNAME] + +io activity plot from dxt log + +optional arguments: + -h, --help show this help message and exit + -i DXT_LOGNAME, --input DXT_LOGNAME + dxt log path + -o SAVEFIG, --save SAVEFIG + output file name for the plot + --show Show the plot rather than saving to a PDF + --read READ I/O action to be plotted. + Default is False for WRITE mode. + --filemode Single file mode (must be used with --fname) + Default is False for all files + -f FNAME, --fname FNAME + name of file to be plotted (must use with --filemode) + +e.g. +python dxt_analyzer.py -i darshan_dxt-a.txt +python dxt_analyzer.py -i darshan_dxt-a.txt \ + --filemode -f /global/cscratch1/sd/asim/amrex/a24/plt00000.hdf5 +python dxt_analyzer.py -i darshan_dxt-c.txt +python dxt_analyzer.py -i darshan_dxt-d.txt -o dxt-d.pdf +python dxt_analyzer.py -i darshan_dxt-df.txt +python dxt_analyzer.py -i darshan_dxt-v.txt + +''' + +import numpy as np +import matplotlib +matplotlib.use('PDF') +import matplotlib.pyplot as plt +import re +import argparse + +#------------------------------------------------------------------------------ +# Regular expression and helper funtion definitions +#------------------------------------------------------------------------------ +global_finfo = {} +#'filename':{'rw':[], 'mount':'', 'fs':'', 'stripe_size':-1, 'stripe_width':-1, 'OSTlist':[]} +# Module rank write/read segment offset length start end OST +POSIX_LOG_PATTERN = ' (X_POSIX)\s+([+-]?\d+(?:\.\d+)?)\s+(\S+)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+\[\s*([+-]?\d+(?:\.\d+)?)\]\.*' +POSIX_LOG_NO_OSTS = ' (X_POSIX)\s+([+-]?\d+(?:\.\d+)?)\s+(\S+)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)' + +# Module rank write/read segment length start end +MPIIO_LOG_PATTERN = ' (X_MPIIO)\s+([+-]?\d+(?:\.\d+)?)\s+(\S+)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)' + +HEADER_PATTERN = '(#\s+)(\S+):(\s+)(\d+)' +COMMENT_PATTERN_DXT = '# \.*' +USEFUL_COMMENT_PATTERN = '# DXT,\.*' +LISTNUM_PATTERN = '(\d+.*)' + +header = re.compile(HEADER_PATTERN) +validposix = re.compile(POSIX_LOG_PATTERN) +posix_no_ost = re.compile(POSIX_LOG_NO_OSTS) +validmpiio = re.compile(MPIIO_LOG_PATTERN) +comment_dxt = re.compile(COMMENT_PATTERN_DXT) +use_comment = re.compile(USEFUL_COMMENT_PATTERN) +listnumpattern = re.compile(LISTNUM_PATTERN) + + +def store_useful_comment(pieces, finfo_dict, curr_fname): + '''updates a file's dictionary entry using the pieces of a comment/header line from a DXT log''' + attribute = pieces[0].replace('# DXT, ', '') + if attribute=='file_id': + curr_fname = pieces[2].replace(' ','').rstrip('\n') + if curr_fname not in finfo_dict.keys(): + finfo_dict[curr_fname] = {'mount':'', 'fs':'', 'stripe_size':-1, 'stripe_width':-1, 'OST_list':[]} + return curr_fname + elif attribute=='mnt_pt': + finfo_dict[curr_fname]['mount'] = pieces[1].replace(', fs_type', '').replace(' ', '').rstrip('\n') + finfo_dict[curr_fname]['fs'] = pieces[2].replace(' ', '').rstrip('\n') + elif attribute=='Lustre stripe_size': + finfo_dict[curr_fname]['stripe_size'] = int(pieces[1].replace(', Lustre stripe_count', '').replace(' ', '')) + finfo_dict[curr_fname]['stripe_width'] = int(pieces[2].replace(' ','')) + elif attribute=='Lustre OST obdidx': + liststr = pieces[1][1:] + listnums = listnumpattern.match(liststr).groups()[0] + OSTlist = [int(x) for x in re.findall(r'\d+', listnums)] + finfo_dict[curr_fname]['OST_list'] = OSTlist + return curr_fname + + +def parse_dxt_log_line(line, curr_fname, finfo_dict): + '''takes a DXT log line and stores pertinent file metadata into a dictionary, and parses read/write trace data into a tuple''' + if not line: return (curr_fname, (), -1) + pieces = line.split(":") + if comment_dxt.match(pieces[0]): + if use_comment.match(pieces[0]): + curr_fname = store_useful_comment(pieces, finfo_dict, curr_fname) + return (curr_fname, (), -1) + header_match = header.match(line) + if header_match: + return (curr_fname, (header_match.group(2),int(header_match.group(4))), -11) + else: + return (curr_fname, (), -1) + data = validposix.match(line) + if data: + return (curr_fname, + ( + data.group(1).replace('X_', '')+'_'+(data.group(3).upper()), + ( + int(data.group(2)), #rank + int(data.group(6)), #length + float(data.group(7)), #start + float(data.group(8)), #end + int(data.group(9)) #OST + ) + ), 1) + data = posix_no_ost.match(line) + if data: + return (curr_fname, + ( + data.group(1).replace('X_', '')+'_'+(data.group(3).upper()), + ( + int(data.group(2)), #rank + int(data.group(6)), #length + float(data.group(7)), #start + float(data.group(8)), #end + -1 # No OST + ) + ), 1) + data = validmpiio.match(line) + if data: + return (curr_fname, + ( + data.group(1).replace('X_', '')+'_'+(data.group(3).upper()), + ( + int(data.group(2)), #rank + int(data.group(5)), #length + float(data.group(6)), #start + float(data.group(7)), #end + ) + ), 1) + return (curr_fname, (), -1) + + +def get_verts_file(data, module, fname, action): + '''make a list of rectangle vertices to plot the posix/mpi read/write activity for each rank for a specific file''' + keyword = module+'_'+action + filedata = map(lambda x: x[1], filter(lambda x: x[0]==fname, data)) + filtered = map(lambda x: x[1], filter(lambda x: x[0]==keyword, filedata)) + activities = map(lambda x: (x[0], (x[2], x[3])), filtered) + verts = np.zeros((1,4,2)) + for entry in activities: + lx,rx,by,ty = entry[1][0], entry[1][1], entry[0] - 0.5 , entry[0] + 0.5 + newverts = np.array([((lx,by), (lx,ty), (rx, ty), (rx, by))]) + verts = np.concatenate((verts, newverts)) + return verts[1:] + + +def get_verts_all(data, module, action): + '''make a list of rectangle vertices to plot the posix/mpi read/write activity for each rank''' + keyword = module+'_'+action + IOdata = map(lambda x: x[1], data) + filtered = map(lambda x: x[1], filter(lambda x: x[0]==keyword, IOdata)) + activities = map(lambda x: (x[0], (x[2], x[3])), filtered) + verts = np.zeros((1,4,2)) + for entry in activities: + lx,rx,by,ty = entry[1][0], entry[1][1], entry[0] - 0.5 , entry[0] + 0.5 + newverts = np.array([((lx,by), (lx,ty), (rx, ty), (rx, by))]) + verts = np.concatenate((verts, newverts)) + return verts[1:] + + +#------------------------------------------------------------------------------ +# Set variables and create plots +#------------------------------------------------------------------------------ + +# Variables to be set by user +# name of DXT log file +dxt_logname = './darshan_dxt-5967365.txt' +# create plot for a specific file (mode='file') or for all files (mode='all') +singlefile_mode = False # for 'all' +mode = 'all' +# name of file to be plotted (must use with mode='file') +fname = '/global/cscratch1/sd/junmin/heatTxf.n64.s32/ph5.4096.n128s64.t1.h5' +# FLAG for READ I/O action to be plotted. +read_flag = False # for READ action +action = 'WRITE' +# filename to save the plot (must end with .pdf). +# showflag takes priority than savefig. +showflag = False # If you do not want to save, set this to True. +savefig = 'dxt_plot.pdf' + +parser = argparse.ArgumentParser(description='io activity plot from dxt log') +parser.add_argument("-i", "--input", action="store", dest="dxt_logname", required=True, help="dxt log path") +parser.add_argument("-o", "--save", action="store", dest="savefig", required=False, help="output file name for the plot") +parser.add_argument("--show", action="store_true", dest="showflag", required=False, help="Show the plot rather than saving to a PDF") +parser.add_argument("--read", action="store_true", dest="read_flag", required=False, help="READ I/O action to be plotted. Default is False for WRITE mode.") +parser.add_argument("--filemode", action="store_true", dest="singlefile_mode", required=False, help="Single file mode (must be used with --fname). Default is False for all files") +parser.add_argument("-f", "--fname", action="store", dest="fname", required=False, help="name of file to be plotted (must use with --filemode)") + +#args = parser.parse_args(['-l', '/Users/asim/Desktop/simcodes/vpic/runs/ttest/junmin-darshan_dxt-5967365.txt', +#'--save', 'dxt_plot.pdf']) +args = parser.parse_args() # uncomment this line for general use + +dxt_logname = args.dxt_logname +if (args.savefig): + savefig = args.savefig +if (args.showflag): + showflag = True +if (args.read_flag): + read_flag = True + action = 'READ' +if (args.singlefile_mode): + singlefile_mode = True + mode = 'file' + fname = args.fname + + +with open(dxt_logname) as infile: + finfo_dict = {} + curr_fname = '' + logdata = [] + for line in infile: + curr_fname, data, flag = parse_dxt_log_line(line, curr_fname, finfo_dict) + if flag == -11: + (k,v) = data + if k == 'jobid': jobid = v + elif k == 'start_time': start_time = v + elif k == 'end_time': end_time = v + elif k == 'nprocs': nprocs = v + elif flag == 1: + logdata.append((curr_fname, data)) + +fig, ax = plt.subplots(dpi=150) + +if mode=='file': + mpiio_verts = get_verts_file(logdata, 'MPIIO', fname, action) + mpiio_collec = matplotlib.collections.PolyCollection(mpiio_verts, facecolor='blue', edgecolor='blue') + posix_verts = get_verts_file(logdata, 'POSIX', fname, action) + posix_collec = matplotlib.collections.PolyCollection(posix_verts, facecolor='red', edgecolor='red') + title = str(jobid)+'_'+fname.split('/').pop()+'_'+action+'_activity' +else : # mode=='all' + mpiio_verts = get_verts_all(logdata, 'MPIIO', action) + mpiio_collec = matplotlib.collections.PolyCollection(mpiio_verts, facecolor='blue', edgecolor='blue') + posix_verts = get_verts_all(logdata, 'POSIX', action) + posix_collec = matplotlib.collections.PolyCollection(posix_verts, facecolor='red', edgecolor='red') + title = str(jobid)+'_'+action+'_activity' + + +ax.add_collection(mpiio_collec) +ax.add_collection(posix_collec) +ax.autoscale() +plt.ylabel("MPI rank") +plt.xlabel("Time (s)") +plt.title(title) +if (showflag): + plt.show() +else: + plt.savefig(savefig, format='pdf') +