Commit 0f44520b authored by Jakob Luettgau's avatar Jakob Luettgau
Browse files

Change format mode flag from 'mode=..' to 'dtype=..'.

parent 4912d57e
......@@ -231,7 +231,7 @@ def log_lookup_name_records(log, ids=[]):
def log_get_dxt_record(log, mod_name, mod_type, reads=True, writes=True, mode='dict'):
def log_get_dxt_record(log, mod_name, mod_type, reads=True, writes=True, dtype='dict'):
"""
Returns a dictionary holding a dxt darshan log record.
......@@ -305,7 +305,7 @@ def log_get_dxt_record(log, mod_name, mod_type, reads=True, writes=True, mode='d
rec['read_segments'].append(seg)
if mode == "pandas":
if dtype == "pandas":
rec['read_segments'] = pd.DataFrame(rec['read_segments'])
rec['write_segments'] = pd.DataFrame(rec['write_segments'])
......@@ -313,7 +313,7 @@ def log_get_dxt_record(log, mod_name, mod_type, reads=True, writes=True, mode='d
def log_get_generic_record(log, mod_name, mod_type, mode='numpy'):
def log_get_generic_record(log, mod_name, mod_type, dtype='numpy'):
"""
Returns a dictionary holding a generic darshan log record.
......@@ -358,13 +358,13 @@ def log_get_generic_record(log, mod_name, mod_type, mode='numpy'):
rec['fcounters'] = np.array(flst, dtype=np.float64)
fcdict = dict(zip(fcounter_names(mod_name), rec['fcounters']))
if mode == "dict":
if dtype == "dict":
rec.update({
'counters': cdict,
'fcounters': fcdict
})
if mode == "pandas":
if dtype == "pandas":
rec.update({
'counters': pd.DataFrame(cdict, index=[0]),
'fcounters': pd.DataFrame(fcdict, index=[0])
......@@ -491,16 +491,24 @@ def log_get_lustre_record(log):
rec['ost_ids'] = np.array(ostlst, dtype=np.int64)
print(rec['ost_ids'])
return rec
sys.exit()
if mode == "dict":
rec = {'counters': cdict, 'fcounter': fcdict}
if dtype == "dict":
rec.update({
'counters': cdict,
'fcounters': fcdict
})
if mode == "pandas":
rec = {
'counters': pd.DataFrame(cdict, index=[0]),
'fcounters': pd.DataFrame(fcdict, index=[0])
}
if dtype == "pandas":
rec.update({
'counters': pd.DataFrame(cdict, index=[0]),
'fcounters': pd.DataFrame(fcdict, index=[0])
})
return rec
......
......@@ -314,7 +314,7 @@ class DarshanReport(object):
self.counters[mod]['fcounters'] = fcn
rec = backend.log_get_generic_record(self.log, mod, structdefs[mod], mode=dtype)
rec = backend.log_get_generic_record(self.log, mod, structdefs[mod], dtype=dtype)
while rec != None:
if dtype == 'pandas':
self.records[mod].append(rec)
......@@ -431,7 +431,7 @@ class DarshanReport(object):
self.counters[mod]['counters'] = cn
self.counters[mod]['fcounters'] = fcn
rec = backend.log_get_generic_record(self.log, mod, structdefs[mod], mode=dtype)
rec = backend.log_get_generic_record(self.log, mod, structdefs[mod], dtype=dtype)
while rec != None:
yield rec
......
%% Cell type:markdown id: tags:
# DarshanUtils for Python
This notebook gives an overwiew of features provided by the Python bindings for DarshanUtils.
%% Cell type:markdown id: tags:
By default all records, metadata, available modules and the name records are loaded:
%% Cell type:code id: tags:
``` python
import darshan
report = darshan.DarshanReport("example-logs/example.darshan", read_all=True) # Default behavior
report.info()
```
%%%% Output: stream
Filename: example-logs/example.darshan
Times: 2017-03-20 10:07:47 to 2017-03-20 10:09:43 (Duration 0:01:56)
Executeable: /global/project/projectdirs/m888/glock/tokio-abc-results/bin.edison/vpicio_uni /scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5 32
Processes: 2048
JobID: 4478544
UID: 69615
Modules in Log: ['POSIX', 'MPI-IO', 'LUSTRE', 'STDIO']
Loaded Records: {'POSIX': 1, 'MPI-IO': 1, 'STDIO': 129}
Name Records: 4
Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}
DarshanReport: id(140346659969064) (tmp)
%% Cell type:code id: tags:
``` python
report.modules
```
%%%% Output: execute_result
{'POSIX': {'len': 186, 'ver': 3, 'idx': 1, 'num_records': 1},
'MPI-IO': {'len': 154, 'ver': 2, 'idx': 2, 'num_records': 1},
'LUSTRE': {'len': 87, 'ver': 1, 'idx': 7},
'STDIO': {'len': 3234, 'ver': 1, 'idx': 8, 'num_records': 129}}
%% Cell type:markdown id: tags:
A few of the internal data structures explained:
%% Cell type:code id: tags:
``` python
# report.metadata # dictionary with raw metadata from darshan log
# report.modules # dictionary with raw module info from darshan log (need: technical, module idx)
# report.name_records # dictionary for resovling name records: id -> path/name
# report.records # per module "dataframes"/dictionaries holding loaded records
```
%% Cell type:markdown id: tags:
The darshan report holds a variety of namespaces for report related data. All of them are also referenced in `report.data` at the moment, but reliance on this internal organization of the report object is discouraged once the API stabilized. Currently, `report.data` references the following information:
%% Cell type:code id: tags:
``` python
report.data.keys()
```
%%%% Output: execute_result
dict_keys(['version', 'metadata', 'records', 'summary', 'modules', 'counters', 'name_records', 'mounts'])
%% Cell type:code id: tags:
``` python
report.mod_read_all_records('POSIX')
```
%% Cell type:code id: tags:
``` python
report.mod_read_all_records('STDIO')
```
%% Cell type:code id: tags:
``` python
report.update_name_records()
report.info()
```
%%%% Output: stream
POSIX
MPI-IO
STDIO
Filename: example-logs/example.darshan
Times: 2017-03-20 10:07:47 to 2017-03-20 10:09:43 (Duration 0:01:56)
Executeable: /global/project/projectdirs/m888/glock/tokio-abc-results/bin.edison/vpicio_uni /scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5 32
Processes: 2048
JobID: 4478544
UID: 69615
Modules in Log: ['POSIX', 'MPI-IO', 'LUSTRE', 'STDIO']
Loaded Records: {'POSIX': 1, 'MPI-IO': 1, 'STDIO': 129}
Name Records: 3
Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}
DarshanReport: id(140346659969064) (tmp)
%% Cell type:code id: tags:
``` python
# visualization helper used by different examples in the remainder of this notebook
from IPython.display import display, HTML
# usage: display(obj)
```
%% Cell type:markdown id: tags:
### Selectively Loading Records
### Record Formats and Selectively Loading Records
For memory efficiant analysis, it is possible to supress records from being loaded automatically. This is useful, for example, when analysis considers only records of a particular layer/module.
%% Cell type:code id: tags:
``` python
import darshan
report = darshan.DarshanReport("example-logs/example.darshan", read_all=False) # Loads no records!
```
%% Cell type:code id: tags:
``` python
# expected to fail, as no records were loaded
try:
print(len(report.records['STDIO']), "records loaded for STDIO.")
except:
print("No STDIO records loaded for this report yet.")
```
%%%% Output: stream
No STDIO records loaded for this report yet.
%% Cell type:markdown id: tags:
Additional records then can be loaded selectively, for example, on a per module basis:
%% Cell type:markdown id: tags:
#### dtype: dict
%% Cell type:code id: tags:
``` python
report.mod_read_all_records("STDIO", dtype='dict')
report.records['STDIO'][0]
```
%%%% Output: execute_result
{'id': 15920181672442173319,
'rank': 0,
'counters': {'STDIO_OPENS': 1,
'STDIO_FDOPENS': -1,
'STDIO_READS': 0,
'STDIO_WRITES': 6,
'STDIO_SEEKS': 0,
'STDIO_FLUSHES': 0,
'STDIO_BYTES_WRITTEN': 280,
'STDIO_BYTES_READ': 0,
'STDIO_MAX_BYTE_READ': 0,
'STDIO_MAX_BYTE_WRITTEN': 279,
'STDIO_FASTEST_RANK': 0,
'STDIO_FASTEST_RANK_BYTES': 0,
'STDIO_SLOWEST_RANK': 0,
'STDIO_SLOWEST_RANK_BYTES': 0},
'fcounters': {'STDIO_F_META_TIME': 0.0,
'STDIO_F_WRITE_TIME': 6.794929504394531e-05,
'STDIO_F_READ_TIME': 0.0,
'STDIO_F_OPEN_START_TIMESTAMP': 0.0,
'STDIO_F_CLOSE_START_TIMESTAMP': 0.0,
'STDIO_F_WRITE_START_TIMESTAMP': 0.07752799987792969,
'STDIO_F_READ_START_TIMESTAMP': 0.0,
'STDIO_F_OPEN_END_TIMESTAMP': 0.0,
'STDIO_F_CLOSE_END_TIMESTAMP': 0.0,
'STDIO_F_WRITE_END_TIMESTAMP': 116.28358292579651,
'STDIO_F_READ_END_TIMESTAMP': 0.0,
'STDIO_F_FASTEST_RANK_TIME': 0.0,
'STDIO_F_SLOWEST_RANK_TIME': 0.0,
'STDIO_F_VARIANCE_RANK_TIME': 0.0,
'STDIO_F_VARIANCE_RANK_BYTES': 0.0}}
%% Cell type:markdown id: tags:
#### dtype: pandas
%% Cell type:code id: tags:
``` python
report.mod_read_all_records("STDIO", dtype="pandas")
```
%%%% Output: stream
STDIO
%% Cell type:code id: tags:
``` python
report.update_name_records()
print('id', report.records['STDIO'][0]['id'], '=>', report.name_records[report.records['STDIO'][0]['id']])
print('rank', report.records['STDIO'][0]['rank'])
display(report.records['STDIO'][0]['counters'])
display(report.records['STDIO'][0]['fcounters'])
```
%%%% Output: stream
STDIO
id 15920181672442173319 => <STDOUT>
rank 0
%%%% Output: display_data
%%%% Output: display_data
%% Cell type:markdown id: tags:
#### dtype: numpy
%% Cell type:code id: tags:
``` python
report.mod_read_all_records("STDIO")
report.records['STDIO'][0]
```
%%%% Output: execute_result
{'id': 15920181672442173319,
'rank': 0,
'counters': array([ 1, -1, 0, 6, 0, 0, 280, 0, 0, 279, 0, 0, 0,
0]),
'fcounters': array([0.00000000e+00, 6.79492950e-05, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 7.75279999e-02, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 1.16283583e+02, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00])}
%% Cell type:markdown id: tags:
#### The Log in Memory
Let's have a look at how calling `report.mod_read_all_records("STDIO")` changed the state of the log in memory.
%% Cell type:code id: tags:
``` python
# Compare to info line: "Loaded Records: {...}"
report.info()
```
%%%% Output: stream
Filename: example-logs/example.darshan
Times: 2017-03-20 10:07:47 to 2017-03-20 10:09:43 (Duration 0:01:56)
Executeable: /global/project/projectdirs/m888/glock/tokio-abc-results/bin.edison/vpicio_uni /scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5 32
Processes: 2048
JobID: 4478544
UID: 69615
Modules in Log: ['POSIX', 'MPI-IO', 'LUSTRE', 'STDIO']
Loaded Records: {'STDIO': 129}
Name Records: 2
Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}
DarshanReport: id(140178278176472) (tmp)
%% Cell type:markdown id: tags:
When interacting on individual log data for example in a for loop you would most likely care about the following instead:
%% Cell type:code id: tags:
``` python
print("Num records:", len(report.records['STDIO']))
# show first 10 records
for rec in report.records['STDIO'][0:10]:
print(rec)
```
%%%% Output: stream
Num records: 129
{'id': 15920181672442173319, 'rank': 0, 'counters': array([ 1, -1, 0, 6, 0, 0, 280, 0, 0, 279, 0, 0, 0,
0]), 'fcounters': array([0.00000000e+00, 6.79492950e-05, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 7.75279999e-02, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 1.16283583e+02, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00])}
{'id': 7238257241479193519, 'rank': 0, 'counters': array([ 1, -1, 0, 68, 0, 0, 3029, 0, 0, 3028, 0,
0, 0, 0]), 'fcounters': array([ 0. , -2662.74663377, 0. , 0. ,
0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. ,
0. , 0. , 0. ])}
{'id': 7238257241479193519, 'rank': 16, 'counters': array([ 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'fcounters': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])}
{'id': 7238257241479193519, 'rank': 32, 'counters': array([ 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'fcounters': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])}
{'id': 7238257241479193519, 'rank': 48, 'counters': array([ 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'fcounters': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])}
{'id': 7238257241479193519, 'rank': 64, 'counters': array([ 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'fcounters': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])}
{'id': 7238257241479193519, 'rank': 80, 'counters': array([ 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'fcounters': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])}
{'id': 7238257241479193519, 'rank': 96, 'counters': array([ 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'fcounters': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])}
{'id': 7238257241479193519, 'rank': 112, 'counters': array([ 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'fcounters': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])}
{'id': 7238257241479193519, 'rank': 128, 'counters': array([ 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'fcounters': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])}
%% Cell type:markdown id: tags:
### Aggregation and Filtering (Experimental)
Darshan log data is routinely aggregated for quick overview. The report object offers a few methods to perform common aggregations:
%% Cell type:markdown id: tags:
Report aggregations and summarization remains **experimental** for now, mostly to allow interfaces to stabilize. But experimental features can be switched on easily by invoking `darshan.enable_experimental()`:
%% Cell type:code id: tags:
``` python
import darshan
darshan.enable_experimental(verbose=True) # Enable verbosity, listing new functionality
```
%%%% Output: stream
Added method create_time_summary to DarshanReport.
Added method print_module_records to DarshanReport.
Added method summarize to DarshanReport.
Added method merge to DarshanReport.
Added method create_timeline to DarshanReport.
Added method records_as_dict to DarshanReport.
Added method reduce to DarshanReport.
Added method agg_ioops to DarshanReport.
Added method create_sankey to DarshanReport.
Added method filter to DarshanReport.
Added method mod_agg_iohist to DarshanReport.
Added method name_records_summary to DarshanReport.
%% Cell type:code id: tags:
``` python
# Example report, which counts records in log across modules
report.name_records_summary()
```
%%%% Output: execute_result
{15920181672442173319: {'name': '<STDOUT>', 'counts': {'STDIO': 1}},
7238257241479193519: {'name': '<STDERR>', 'counts': {'STDIO': 128}}}
%% Cell type:markdown id: tags:
### Chain operations like filtering and reductions
The filter and reduce operations return DarshanReports themsleves, thus allow to convieniently chain operations.
%% Cell type:code id: tags:
``` python
import pprint
import darshan
darshan.enable_experimental()
report = darshan.DarshanReport("example-logs/example.darshan", read_all=True)
report.name_records
```
%%%% Output: execute_result
{14734109647742566553: '<STDIN>',
15920181672442173319: '<STDOUT>',
7238257241479193519: '<STDERR>',
6301063301082038805: '/scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5'}
%% Cell type:code id: tags:
``` python
report.filter(name_records=[6301063301082038805, 15920181672442173319]).records
```
%%%% Output: execute_result
{'POSIX': [{'id': 6301063301082038805,
'rank': -1,
'counters': array([ 2049, -1, -1, 0,
16402, 16404, 0, 0,
0, 0, -1, -1,
0, 0, 0, 2199023259968,
0, 2199023261831, 0, 0,
0, 16384, 0, 0,
8, 16401, 1048576, 0,
134217728, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 4,
14, 0, 0, 0,
0, 0, 0, 16384,
0, 274743689216, 274743691264, 0,
0, 10240, 4096, 0,
0, 134217728, 272, 544,
328, 16384, 8, 2,
2, 597, 1073741824, 1312,
1073741824]),
'fcounters': array([3.91914105e+00, 0.00000000e+00, 3.94006395e+00, 3.92709398e+00,
3.93657994e+00, 0.00000000e+00, 1.15078166e+02, 1.15770358e+02,
0.00000000e+00, 1.00397600e+05, 1.13008418e+01, 0.00000000e+00,
1.79409459e+01, 2.04360995e+01, 8.54749503e+01, 0.00000000e+00,
0.00000000e+00])}],
'MPI-IO': [{'id': 6301063301082038805,
'rank': -1,
'counters': array([ 0, 2048, 0, 18,
0, 16384, 0, 0,
0, 0, 0, 0,
32768, 9, 0, 2199023259968,
0, 0, 134217728, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 4, 14, 0,
0, 0, 0, 0,
0, 16384, 0, 134217728,
272, 544, 328, 16384,
8, 2, 2, 597,
1073741824, 1312, 1073741824]),
'fcounters': array([ 3.91278315e+00, 0.00000000e+00, 3.94003701e+00, -1.00000000e+00,
-1.00000000e+00, 0.00000000e+00, 1.15078169e+02, 1.15773214e+02,
0.00000000e+00, 1.00397897e+05, 4.90211463e+01, 0.00000000e+00,
1.79409699e+01, 2.04544911e+01, 8.54922221e+01, 9.54322809e+01,
7.01663086e+02])}],
'STDIO': [{'id': 15920181672442173319,
'rank': 0,
'counters': array([ 1, -1, 0, 6, 0, 0, 280, 0, 0, 279, 0, 0, 0,
0]),
'fcounters': array([0.00000000e+00, 6.79492950e-05, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 7.75279999e-02, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 1.16283583e+02, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00])}]}
%% Cell type:code id: tags:
``` python
# reduce all after filtering
report.filter(pattern="*.hdf5").reduce().records
```
%%%% Output: execute_result
{'POSIX': [{'id': '*',
'rank': -1,
'counters': array([ 2049, -1, -1, 0,
16402, 16404, 0, 0,
0, 0, -1, -1,
0, 0, 0, 2199023259968,
0, 2199023261831, 0, 0,
0, 16384, 0, 0,
8, 16401, 1048576, 0,
134217728, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 4,
14, 0, 0, 0,
0, 0, 0, 16384,
0, 274743689216, 274743691264, 0,
0, 10240, 4096, 0,
0, 134217728, 272, 544,
328, 16384, 8, 2,
2, 597, 1073741824, 1312,
1073741824]),
'fcounters': array([3.91914105e+00, 0.00000000e+00, 3.94006395e+00, 3.92709398e+00,
3.93657994e+00, 0.00000000e+00, 1.15078166e+02, 1.15770358e+02,
0.00000000e+00, 1.00397600e+05, 1.13008418e+01, 0.00000000e+00,
1.79409459e+01, 2.04360995e+01, 8.54749503e+01, 0.00000000e+00,
0.00000000e+00])}],
'MPI-IO': [{'id': '*',
'rank': -1,
'counters': array([ 0, 2048, 0, 18,
0, 16384, 0, 0,
0, 0, 0, 0,
32768, 9, 0, 2199023259968,
0, 0, 134217728, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 4, 14, 0,
0, 0, 0, 0,
0, 16384, 0, 134217728,
272, 544, 328, 16384,
8, 2, 2, 597,
1073741824, 1312, 1073741824]),
'fcounters': array([ 3.91278315e+00, 0.00000000e+00, 3.94003701e+00, -1.00000000e+00,
-1.00000000e+00, 0.00000000e+00, 1.15078169e+02, 1.15773214e+02,
0.00000000e+00, 1.00397897e+05, 4.90211463e+01, 0.00000000e+00,
1.79409699e+01, 2.04544911e+01, 8.54922221e+01, 9.54322809e+01,
7.01663086e+02])}]}
%% Cell type:code id: tags:
``` python
# only preserve some
report.filter(name_records=[6301063301082038805]).reduce(mods=['POSIX', 'STDIO']).records
```
%%%% Output: execute_result
{'POSIX': [{'id': '*',
'rank': -1,
'counters': array([ 2049, -1, -1, 0,
16402, 16404, 0, 0,
0, 0, -1, -1,
0, 0, 0, 2199023259968,
0, 2199023261831, 0, 0,
0, 16384, 0, 0,
8, 16401, 1048576, 0,
134217728, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 4,
14, 0, 0, 0,
0, 0, 0, 16384,
0, 274743689216, 274743691264, 0,
0, 10240, 4096, 0,