Commit 09531f17 authored by Shane Snyder's avatar Shane Snyder
Browse files

Merge branch 'master' into 'master'

dxt_analyzer

See merge request !16
parents 2d14002d 774b5d94
......@@ -144,6 +144,7 @@ install:: all
install -m 755 darshan-dxt-parser $(bindir)
install -m 755 darshan-merge $(bindir)
install -m 755 $(srcdir)/darshan-summary-per-file.sh $(bindir)
install -m 755 dxt_analyzer.py $(bindir)
install -m 755 libdarshan-util.a $(libdir)
ifeq ($(DARSHAN_ENABLE_SHARED),1)
install -m 755 libdarshan-util.so $(libdir)
......
# Email questions to SDMSUPPORT@LBL.GOV
# Scientific Data Management Research Group
# Lawrence Berkeley National Laboratory
#
# last update on Mon Aug 7 08:47:28 PDT 2017
To plot the read or write activity from Darshan Extended Trace (DXT) logs.
% ./dxt_analyzer.py --help
usage: dxt_analyzer.py [-h] -i DXT_LOGNAME [-o SAVEFIG] [--show] [--read]
[--filemode] [-f FNAME]
io activity plot from dxt log
optional arguments:
-h, --help show this help message and exit
-i DXT_LOGNAME, --input DXT_LOGNAME
dxt log path
-o SAVEFIG, --save SAVEFIG
output file name for the plot
--show Show the plot rather than saving to a PDF
--read READ I/O action to be plotted.
Default is False for WRITE mode.
--filemode Single file mode (must be used with --fname).
Default is False for all files
-f FNAME, --fname FNAME
name of file to be plotted (must use with --filemode)
Example runs:
% python dxt_analyzer.py -i darshan_dxt-a.txt
% python dxt_analyzer.py -i darshan_dxt-a.txt \
--filemode -f /global/cscratch1/sd/asim/amrex/a24/plt00000.hdf5
% python dxt_analyzer.py -i darshan_dxt-d.txt \
-o dxt-d.pdf
% python dxt_analyzer.py -i darshan_dxt-df.txt
% python dxt_analyzer.py -i darshan_dxt-c.txt
% python dxt_analyzer.py -i darshan_dxt-v.txt
For more information on creating DXT logs, see:
http://www.mcs.anl.gov/research/projects/darshan/docs/darshan3-util.html#_darshan_dxt_parser
#!/usr/bin/env python2
###
#
# *** Copyright Notice ***
#
# dxt_analyzer.py
# Copyright (c) 2017, The Regents of the University of California,
# through Lawrence Berkeley National Laboratory (subject to receipt
# of any required approvals from the U.S. Dept. of Energy).
# All rights reserved.
#
# If you have questions about your rights to use or distribute this software,
# please contact Berkeley Lab's Innovation & Partnerships Office
# at IPO@lbl.gov.
#
# NOTICE. This software was developed under funding from the
# U.S. Department of Energy. As such, the U.S. Government has been granted
# for itself and others acting on its behalf a paid-up, nonexclusive,
# irrevocable, worldwide license in the Software to reproduce, prepare
# derivative works, and perform publicly and display publicly. Beginning
# five (5) years after the date permission to assert copyright is obtained
# from the U.S. Department of Energy, and subject to any subsequent five (5)
# year renewals, the U.S. Government is granted for itself and others acting
# on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in
# the Software to reproduce, prepare derivative works, distribute copies to
# the public, perform publicly and display publicly, and to permit others to
# do so.
#
# Email questions to SDMSUPPORT@LBL.GOV
# Scientific Data Management Research Group
# Lawrence Berkeley National Laboratory
#
# last update on Tue Aug 8 08:16:49 PDT 2017
#
'''
dxt_analyzer.py
To plot the read or write activity from Darshan Extended Trace (DXT) logs.
For more information on creating DXT logs, see:
http://www.mcs.anl.gov/research/projects/darshan/docs/darshan3-util.html#_darshan_dxt_parser
% ./io_activitity_dxt.py --help
usage: dxt_analyzer.py [-h] -i DXT_LOGNAME [-o SAVEFIG] [--show] [--read]
[--filemode] [-f FNAME]
io activity plot from dxt log
optional arguments:
-h, --help show this help message and exit
-i DXT_LOGNAME, --input DXT_LOGNAME
dxt log path
-o SAVEFIG, --save SAVEFIG
output file name for the plot
--show Show the plot rather than saving to a PDF
--read READ I/O action to be plotted.
Default is False for WRITE mode.
--filemode Single file mode (must be used with --fname)
Default is False for all files
-f FNAME, --fname FNAME
name of file to be plotted (must use with --filemode)
e.g.
python dxt_analyzer.py -i darshan_dxt-a.txt
python dxt_analyzer.py -i darshan_dxt-a.txt \
--filemode -f /global/cscratch1/sd/asim/amrex/a24/plt00000.hdf5
python dxt_analyzer.py -i darshan_dxt-c.txt
python dxt_analyzer.py -i darshan_dxt-d.txt -o dxt-d.pdf
python dxt_analyzer.py -i darshan_dxt-df.txt
python dxt_analyzer.py -i darshan_dxt-v.txt
'''
import numpy as np
import matplotlib
matplotlib.use('PDF')
import matplotlib.pyplot as plt
import re
import argparse
#------------------------------------------------------------------------------
# Regular expression and helper funtion definitions
#------------------------------------------------------------------------------
global_finfo = {}
#'filename':{'rw':[], 'mount':'', 'fs':'', 'stripe_size':-1, 'stripe_width':-1, 'OSTlist':[]}
# Module rank write/read segment offset length start end OST
POSIX_LOG_PATTERN = ' (X_POSIX)\s+([+-]?\d+(?:\.\d+)?)\s+(\S+)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+\[\s*([+-]?\d+(?:\.\d+)?)\]\.*'
POSIX_LOG_NO_OSTS = ' (X_POSIX)\s+([+-]?\d+(?:\.\d+)?)\s+(\S+)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)'
# Module rank write/read segment length start end
MPIIO_LOG_PATTERN = ' (X_MPIIO)\s+([+-]?\d+(?:\.\d+)?)\s+(\S+)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)\s+([+-]?\d+(?:\.\d+)?)'
HEADER_PATTERN = '(#\s+)(\S+):(\s+)(\d+)'
COMMENT_PATTERN_DXT = '# \.*'
USEFUL_COMMENT_PATTERN = '# DXT,\.*'
LISTNUM_PATTERN = '(\d+.*)'
header = re.compile(HEADER_PATTERN)
validposix = re.compile(POSIX_LOG_PATTERN)
posix_no_ost = re.compile(POSIX_LOG_NO_OSTS)
validmpiio = re.compile(MPIIO_LOG_PATTERN)
comment_dxt = re.compile(COMMENT_PATTERN_DXT)
use_comment = re.compile(USEFUL_COMMENT_PATTERN)
listnumpattern = re.compile(LISTNUM_PATTERN)
def store_useful_comment(pieces, finfo_dict, curr_fname):
'''updates a file's dictionary entry using the pieces of a comment/header line from a DXT log'''
attribute = pieces[0].replace('# DXT, ', '')
if attribute=='file_id':
curr_fname = pieces[2].replace(' ','').rstrip('\n')
if curr_fname not in finfo_dict.keys():
finfo_dict[curr_fname] = {'mount':'', 'fs':'', 'stripe_size':-1, 'stripe_width':-1, 'OST_list':[]}
return curr_fname
elif attribute=='mnt_pt':
finfo_dict[curr_fname]['mount'] = pieces[1].replace(', fs_type', '').replace(' ', '').rstrip('\n')
finfo_dict[curr_fname]['fs'] = pieces[2].replace(' ', '').rstrip('\n')
elif attribute=='Lustre stripe_size':
finfo_dict[curr_fname]['stripe_size'] = int(pieces[1].replace(', Lustre stripe_count', '').replace(' ', ''))
finfo_dict[curr_fname]['stripe_width'] = int(pieces[2].replace(' ',''))
elif attribute=='Lustre OST obdidx':
liststr = pieces[1][1:]
listnums = listnumpattern.match(liststr).groups()[0]
OSTlist = [int(x) for x in re.findall(r'\d+', listnums)]
finfo_dict[curr_fname]['OST_list'] = OSTlist
return curr_fname
def parse_dxt_log_line(line, curr_fname, finfo_dict):
'''takes a DXT log line and stores pertinent file metadata into a dictionary, and parses read/write trace data into a tuple'''
if not line: return (curr_fname, (), -1)
pieces = line.split(":")
if comment_dxt.match(pieces[0]):
if use_comment.match(pieces[0]):
curr_fname = store_useful_comment(pieces, finfo_dict, curr_fname)
return (curr_fname, (), -1)
header_match = header.match(line)
if header_match:
return (curr_fname, (header_match.group(2),int(header_match.group(4))), -11)
else:
return (curr_fname, (), -1)
data = validposix.match(line)
if data:
return (curr_fname,
(
data.group(1).replace('X_', '')+'_'+(data.group(3).upper()),
(
int(data.group(2)), #rank
int(data.group(6)), #length
float(data.group(7)), #start
float(data.group(8)), #end
int(data.group(9)) #OST
)
), 1)
data = posix_no_ost.match(line)
if data:
return (curr_fname,
(
data.group(1).replace('X_', '')+'_'+(data.group(3).upper()),
(
int(data.group(2)), #rank
int(data.group(6)), #length
float(data.group(7)), #start
float(data.group(8)), #end
-1 # No OST
)
), 1)
data = validmpiio.match(line)
if data:
return (curr_fname,
(
data.group(1).replace('X_', '')+'_'+(data.group(3).upper()),
(
int(data.group(2)), #rank
int(data.group(5)), #length
float(data.group(6)), #start
float(data.group(7)), #end
)
), 1)
return (curr_fname, (), -1)
def get_verts_file(data, module, fname, action):
'''make a list of rectangle vertices to plot the posix/mpi read/write activity for each rank for a specific file'''
keyword = module+'_'+action
filedata = map(lambda x: x[1], filter(lambda x: x[0]==fname, data))
filtered = map(lambda x: x[1], filter(lambda x: x[0]==keyword, filedata))
activities = map(lambda x: (x[0], (x[2], x[3])), filtered)
verts = np.zeros((1,4,2))
for entry in activities:
lx,rx,by,ty = entry[1][0], entry[1][1], entry[0] - 0.5 , entry[0] + 0.5
newverts = np.array([((lx,by), (lx,ty), (rx, ty), (rx, by))])
verts = np.concatenate((verts, newverts))
return verts[1:]
def get_verts_all(data, module, action):
'''make a list of rectangle vertices to plot the posix/mpi read/write activity for each rank'''
keyword = module+'_'+action
IOdata = map(lambda x: x[1], data)
filtered = map(lambda x: x[1], filter(lambda x: x[0]==keyword, IOdata))
activities = map(lambda x: (x[0], (x[2], x[3])), filtered)
verts = np.zeros((1,4,2))
for entry in activities:
lx,rx,by,ty = entry[1][0], entry[1][1], entry[0] - 0.5 , entry[0] + 0.5
newverts = np.array([((lx,by), (lx,ty), (rx, ty), (rx, by))])
verts = np.concatenate((verts, newverts))
return verts[1:]
#------------------------------------------------------------------------------
# Set variables and create plots
#------------------------------------------------------------------------------
# Variables to be set by user
# name of DXT log file
dxt_logname = './darshan_dxt-5967365.txt'
# create plot for a specific file (mode='file') or for all files (mode='all')
singlefile_mode = False # for 'all'
mode = 'all'
# name of file to be plotted (must use with mode='file')
fname = '/global/cscratch1/sd/junmin/heatTxf.n64.s32/ph5.4096.n128s64.t1.h5'
# FLAG for READ I/O action to be plotted.
read_flag = False # for READ action
action = 'WRITE'
# filename to save the plot (must end with .pdf).
# showflag takes priority than savefig.
showflag = False # If you do not want to save, set this to True.
savefig = 'dxt_plot.pdf'
parser = argparse.ArgumentParser(description='io activity plot from dxt log')
parser.add_argument("-i", "--input", action="store", dest="dxt_logname", required=True, help="dxt log path")
parser.add_argument("-o", "--save", action="store", dest="savefig", required=False, help="output file name for the plot")
parser.add_argument("--show", action="store_true", dest="showflag", required=False, help="Show the plot rather than saving to a PDF")
parser.add_argument("--read", action="store_true", dest="read_flag", required=False, help="READ I/O action to be plotted. Default is False for WRITE mode.")
parser.add_argument("--filemode", action="store_true", dest="singlefile_mode", required=False, help="Single file mode (must be used with --fname). Default is False for all files")
parser.add_argument("-f", "--fname", action="store", dest="fname", required=False, help="name of file to be plotted (must use with --filemode)")
#args = parser.parse_args(['-l', '/Users/asim/Desktop/simcodes/vpic/runs/ttest/junmin-darshan_dxt-5967365.txt',
#'--save', 'dxt_plot.pdf'])
args = parser.parse_args() # uncomment this line for general use
dxt_logname = args.dxt_logname
if (args.savefig):
savefig = args.savefig
if (args.showflag):
showflag = True
if (args.read_flag):
read_flag = True
action = 'READ'
if (args.singlefile_mode):
singlefile_mode = True
mode = 'file'
fname = args.fname
with open(dxt_logname) as infile:
finfo_dict = {}
curr_fname = ''
logdata = []
for line in infile:
curr_fname, data, flag = parse_dxt_log_line(line, curr_fname, finfo_dict)
if flag == -11:
(k,v) = data
if k == 'jobid': jobid = v
elif k == 'start_time': start_time = v
elif k == 'end_time': end_time = v
elif k == 'nprocs': nprocs = v
elif flag == 1:
logdata.append((curr_fname, data))
fig, ax = plt.subplots(dpi=150)
if mode=='file':
mpiio_verts = get_verts_file(logdata, 'MPIIO', fname, action)
mpiio_collec = matplotlib.collections.PolyCollection(mpiio_verts, facecolor='blue', edgecolor='blue')
posix_verts = get_verts_file(logdata, 'POSIX', fname, action)
posix_collec = matplotlib.collections.PolyCollection(posix_verts, facecolor='red', edgecolor='red')
title = str(jobid)+'_'+fname.split('/').pop()+'_'+action+'_activity'
else : # mode=='all'
mpiio_verts = get_verts_all(logdata, 'MPIIO', action)
mpiio_collec = matplotlib.collections.PolyCollection(mpiio_verts, facecolor='blue', edgecolor='blue')
posix_verts = get_verts_all(logdata, 'POSIX', action)
posix_collec = matplotlib.collections.PolyCollection(posix_verts, facecolor='red', edgecolor='red')
title = str(jobid)+'_'+action+'_activity'
ax.add_collection(mpiio_collec)
ax.add_collection(posix_collec)
ax.autoscale()
plt.ylabel("MPI rank")
plt.xlabel("Time (s)")
plt.title(title)
if (showflag):
plt.show()
else:
plt.savefig(savefig, format='pdf')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment