Commit cd732a8f authored by Jakob Luettgau's avatar Jakob Luettgau
Browse files

Add isolated transformation example from dxt-timeline. (dxt2png).

parent c7b0910d
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import math
from operator import itemgetter
from PIL import Image, ImageDraw
def sanitize_size(x):
""" Ensure segments are at least represented by one pixel. """
if x < 1:
x = 1
return int(x)
def calc_duration(trace):
start_time = float('inf')
end_time = float('-inf')
for seg in trace:
if seg['start_time'] < start_time:
start_time = seg['start_time']
if seg['end_time'] > end_time:
end_time = seg['end_time']
return start_time, end_time, end_time - start_time
def calc_minsize(trace):
minsize = 0
for seg in trace:
size = seg['offset'] + seg['length']
if size > minsize:
minsize = size
return minsize
def segment(rec):
"""
* write segments, then read segments
* segments in order they occur
"""
for item in rec['read_segments']:
item.update({'type': 'r'})
for item in rec['write_segments']:
item.update({'type': '2'})
trace = rec['read_segments'] + rec['write_segments']
minsize = calc_minsize(trace)
start, end, duration = calc_duration(trace)
print("len(trace):", len(trace), "minsize:", minsize, "duration:", duration)
count = len(trace)
factor = 720
width = sanitize_size( duration * factor )
factor = width/count
#print(count)
#print(factor)
# image properties
#height = int(math.log(minsize))
#height = sanitize_size( int((math.log(minsize)*math.log(minsize))/2) )
height = sanitize_size( math.log(minsize)*math.log(minsize) )
#print(width, height)
#img = Image.new('RGB', (width, height), color = (0, 0, 0))
#img = Image.new('RGBA', (width, height), color = (0, 0, 0, 0))
img = Image.new('RGBA', (width, height), color = (33, 33, 33, 255))
# sort?
trace = sorted(trace, key=itemgetter('start_time'))
draw = ImageDraw.Draw(img)
for i, event in enumerate(trace):
typ = event['type']
off = event['offset']
lee = event['length']
sta = event['start_time']
end = event['end_time']
#print(typ, off, lee, sta, end)
xx = i*factor;
yy = height * (off / minsize)
wi = 1 * factor;
he = sanitize_size( height * (lee / minsize) )
fill = None
#fill = (0,0,0,0)
if typ == 'r':
fill = (222, 66, 111, 200)
elif typ == 'w':
fill = (66, 222, 222, 200)
#print([xx, yy, xx+wi-1, yy+he-1])
# draw.rectangle(xy, fill=None, outline=None)
# where yx either [(x0, y0), (x1, y1)] or [x0, y0, x1, y1]
draw.rectangle([xx, yy, xx+wi-1, yy+he-1], fill=fill, outline=None)
del draw
return img
def wallclock(rec):
for item in rec['read_segments']:
item.update({'type': 'r'})
for item in rec['write_segments']:
item.update({'type': '2'})
trace = rec['read_segments'] + rec['write_segments']
minsize = calc_minsize(trace)
start, end, duration = calc_duration(trace)
count = len(trace)
factor = 720
if duration == 0:
duration = 1
# image properties
width = sanitize_size( duration * factor )
#height = int(math.log(minsize))
height = sanitize_size( math.log(minsize)*math.log(minsize) )
#print(width, height)
#img = Image.new('RGB', (width, height), color = (0, 0, 0))
#img = Image.new('RGBA', (width, height), color = (0, 0, 0, 0))
img = Image.new('RGBA', (width, height), color = (33, 33, 33, 255))
# sort?
trace = sorted(trace, key=itemgetter('start_time'))
draw = ImageDraw.Draw(img)
for i, event in enumerate(trace):
typ = event['type']
off = event['offset']
lee = event['length']
sta = event['start_time'] - start
end = event['end_time'] - start
xx = sta/duration * width;
yy = height * (off / minsize)
wi = sanitize_size( (end-sta)*factor );
he = sanitize_size( height * (lee / minsize) )
fill = None
#fill = (0,0,0,0)
if typ == 'r':
fill = (222, 66, 111, 200)
#fill = (222, 66, 111)
elif typ == 'w':
fill = (66, 222, 222, 200)
#fill = (66, 222, 222)
#print([xx, yy, xx+wi, yy+he-1])
# draw.rectangle(xy, fill=None, outline=None)
# where yx either [(x0, y0), (x1, y1)] or [x0, y0, x1, y1]
draw.rectangle([xx, yy, xx+wi-1, yy+he-1], fill=fill, outline=None)
del draw
return img
def visualize(data, modes=['wallclock', 'segment'], path="./"):
"""
alternative mode: wallclock
"""
#print(data)
fileid = data['cur']['fileid']
rankid = data['rankid']
trace = data['cur']['ranks'][ data['rankid'] ]['trace']
minsize = data['minsize']
start = data['cur']['ranks'][ data['rankid'] ]['start']
end = data['cur']['ranks'][ data['rankid'] ]['end']
duration = (end-start)
filename = "%s/%s" % (path, fileid)
#filename = "%s/file%s_rank%s" % (path, fileid, rankid)
filename = os.path.normpath(filename)
if 'wallclock' in modes:
img = wallclock(trace, minsize, duration, start, end)
print('Writing %s_wallclock.png' % (filename))
img.save('%s_wallclock.png' % (filename), 'PNG')
if 'segment' in modes:
img = segment(trace, minsize, duration)
print('Writing %s_segment.png' % (filename))
img.save('%s_segment.png' % (filename), 'PNG')
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# DarshanUtils for Python # DarshanUtils for Python
This notebook gives an overwiew of features provided by the Python bindings for DarshanUtils. This notebook gives an overwiew of features provided by the Python bindings for DarshanUtils.
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
By default all records, metadata, available modules and the name records are loaded: By default all records, metadata, available modules and the name records are loaded:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import darshan import darshan
report = darshan.DarshanReport("example-logs/example.darshan", read_all=True) # Default behavior report = darshan.DarshanReport("example-logs/example.darshan", read_all=True) # Default behavior
report.info() report.info()
``` ```
%%%% Output: stream %%%% Output: stream
Filename: example-logs/example.darshan Filename: example-logs/example.darshan
Times: 2017-03-20 10:07:47 to 2017-03-20 10:09:43 (Duration 0:01:56) Times: 2017-03-20 10:07:47 to 2017-03-20 10:09:43 (Duration 0:01:56)
Executeable: /global/project/projectdirs/m888/glock/tokio-abc-results/bin.edison/vpicio_uni /scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5 32 Executeable: /global/project/projectdirs/m888/glock/tokio-abc-results/bin.edison/vpicio_uni /scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5 32
Processes: 2048 Processes: 2048
JobID: 4478544 JobID: 4478544
UID: 69615 UID: 69615
Modules in Log: ['POSIX', 'MPI-IO', 'LUSTRE', 'STDIO'] Modules in Log: ['POSIX', 'MPI-IO', 'LUSTRE', 'STDIO']
Loaded Records: {'POSIX': 1, 'MPI-IO': 1, 'STDIO': 129} Loaded Records: {'POSIX': 1, 'MPI-IO': 1, 'STDIO': 129}
Name Records: 4 Name Records: 4
Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'} Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}
DarshanReport: id(140346659969064) (tmp) DarshanReport: id(140346659969064) (tmp)
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
report.modules report.modules
``` ```
%%%% Output: execute_result %%%% Output: execute_result
{'POSIX': {'len': 186, 'ver': 3, 'idx': 1, 'num_records': 1}, {'POSIX': {'len': 186, 'ver': 3, 'idx': 1, 'num_records': 1},
'MPI-IO': {'len': 154, 'ver': 2, 'idx': 2, 'num_records': 1}, 'MPI-IO': {'len': 154, 'ver': 2, 'idx': 2, 'num_records': 1},
'LUSTRE': {'len': 87, 'ver': 1, 'idx': 7}, 'LUSTRE': {'len': 87, 'ver': 1, 'idx': 7},
'STDIO': {'len': 3234, 'ver': 1, 'idx': 8, 'num_records': 129}} 'STDIO': {'len': 3234, 'ver': 1, 'idx': 8, 'num_records': 129}}
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
A few of the internal data structures explained: A few of the internal data structures explained:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# report.metadata # dictionary with raw metadata from darshan log # report.metadata # dictionary with raw metadata from darshan log
# report.modules # dictionary with raw module info from darshan log (need: technical, module idx) # report.modules # dictionary with raw module info from darshan log (need: technical, module idx)
# report.name_records # dictionary for resovling name records: id -> path/name # report.name_records # dictionary for resovling name records: id -> path/name
# report.records # per module "dataframes"/dictionaries holding loaded records # report.records # per module "dataframes"/dictionaries holding loaded records
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
The darshan report holds a variety of namespaces for report related data. All of them are also referenced in `report.data` at the moment, but reliance on this internal organization of the report object is discouraged once the API stabilized. Currently, `report.data` references the following information: The darshan report holds a variety of namespaces for report related data. All of them are also referenced in `report.data` at the moment, but reliance on this internal organization of the report object is discouraged once the API stabilized. Currently, `report.data` references the following information:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
report.data.keys() report.data.keys()
``` ```
%%%% Output: execute_result %%%% Output: execute_result
dict_keys(['version', 'metadata', 'records', 'summary', 'modules', 'counters', 'name_records', 'mounts']) dict_keys(['version', 'metadata', 'records', 'summary', 'modules', 'counters', 'name_records', 'mounts'])
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
report.mod_read_all_records('POSIX') report.mod_read_all_records('POSIX')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
report.mod_read_all_records('STDIO') report.mod_read_all_records('STDIO')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
report.update_name_records() report.update_name_records()
report.info() report.info()
``` ```
%%%% Output: stream %%%% Output: stream
POSIX POSIX
MPI-IO MPI-IO
STDIO STDIO
Filename: example-logs/example.darshan Filename: example-logs/example.darshan
Times: 2017-03-20 10:07:47 to 2017-03-20 10:09:43 (Duration 0:01:56) Times: 2017-03-20 10:07:47 to 2017-03-20 10:09:43 (Duration 0:01:56)
Executeable: /global/project/projectdirs/m888/glock/tokio-abc-results/bin.edison/vpicio_uni /scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5 32 Executeable: /global/project/projectdirs/m888/glock/tokio-abc-results/bin.edison/vpicio_uni /scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5 32
Processes: 2048 Processes: 2048
JobID: 4478544 JobID: 4478544
UID: 69615 UID: 69615
Modules in Log: ['POSIX', 'MPI-IO', 'LUSTRE', 'STDIO'] Modules in Log: ['POSIX', 'MPI-IO', 'LUSTRE', 'STDIO']
Loaded Records: {'POSIX': 1, 'MPI-IO': 1, 'STDIO': 129} Loaded Records: {'POSIX': 1, 'MPI-IO': 1, 'STDIO': 129}
Name Records: 3 Name Records: 3
Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'} Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}
DarshanReport: id(140346659969064) (tmp) DarshanReport: id(140346659969064) (tmp)
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# visualization helper used by different examples in the remainder of this notebook # visualization helper used by different examples in the remainder of this notebook
from IPython.display import display, HTML from IPython.display import display, HTML
# usage: display(obj) # usage: display(obj)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Record Formats and Selectively Loading Records ### Record Formats and Selectively Loading Records
For memory efficiant analysis, it is possible to supress records from being loaded automatically. This is useful, for example, when analysis considers only records of a particular layer/module. For memory efficiant analysis, it is possible to supress records from being loaded automatically. This is useful, for example, when analysis considers only records of a particular layer/module.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import darshan import darshan
report = darshan.DarshanReport("example-logs/example.darshan", read_all=False, lookup_name_records=True) # Loads no records! report = darshan.DarshanReport("example-logs/example.darshan", read_all=False, lookup_name_records=True) # Loads no records!
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# expected to fail, as no records were loaded # expected to fail, as no records were loaded
try: try:
print(len(report.records['STDIO']), "records loaded for STDIO.") print(len(report.records['STDIO']), "records loaded for STDIO.")
except: except:
print("No STDIO records loaded for this report yet.") print("No STDIO records loaded for this report yet.")
``` ```
%%%% Output: stream %%%% Output: stream
No STDIO records loaded for this report yet. No STDIO records loaded for this report yet.
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Additional records then can be loaded selectively, for example, on a per module basis: Additional records then can be loaded selectively, for example, on a per module basis:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
#### dtype: pandas #### dtype: pandas
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
report.mod_read_all_records("STDIO", dtype="pandas") report.mod_read_all_records("STDIO", dtype="pandas")
``` ```
%%%% Output: stream %%%% Output: stream
STDIO STDIO
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
print('id', report.records['STDIO'][0]['id']) print('id', report.records['STDIO'][0]['id'])
print('rank', report.records['STDIO'][0]['rank']) print('rank', report.records['STDIO'][0]['rank'])
display(report.records['STDIO'][0]['counters']) display(report.records['STDIO'][0]['counters'])
display(report.records['STDIO'][0]['fcounters']) display(report.records['STDIO'][0]['fcounters'])
``` ```
%%%% Output: stream %%%% Output: stream
id -1 id -1
rank -1 rank -1
%%%% Output: display_data %%%% Output: display_data
%%%% Output: display_data %%%% Output: display_data
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
#### dtype: dict #### dtype: dict
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
report.mod_read_all_records("STDIO", dtype='dict') report.mod_read_all_records("STDIO", dtype='dict')
report.records['STDIO'][0] report.records['STDIO'][0]
``` ```
%%%% Output: stream %%%% Output: stream
STDIO STDIO
%%%% Output: execute_result %%%% Output: execute_result
{'id': 15920181672442173319, {'id': 15920181672442173319,
'rank': 0, 'rank': 0,
'counters': {'STDIO_OPENS': 1, 'counters': {'STDIO_OPENS': 1,
'STDIO_FDOPENS': -1, 'STDIO_FDOPENS': -1,
'STDIO_READS': 0, 'STDIO_READS': 0,