Commit ac6e8ca0 authored by Jakob Luettgau's avatar Jakob Luettgau
Browse files

Add additional documentation, simplefy documentation.

parent 6ba187a4
=======
History
=======
0.1.0 (2019-07-30)
------------------
* Initial python package.
include HISTORY.rst
include README.rst
recursive-include tests *
......
......@@ -3,11 +3,12 @@ pydarshan
=========
Python utilities to interact with darshan log records of HPC applications.
pydarshan requires that you have darshan-util
pydarshan requires darshan-utils (3.2.0+) to be installed.
Features
--------
* Darshan Report Object Wrapper
* CFFI bindings to access darshan log files
* Plots typically found in the darshan reports (matplotlib)
* Auto-discover darshan-util.so (via darshan-parser in $PATH)
......@@ -17,41 +18,29 @@ Usage
-----
For examples and a jupyter notebook to get started with pydarshan make sure
to check out the `examples` subdirectory.
to check out the `notebooks` subdirectory.
A brief examples showing some of the basic functionality is the following::
import darshan
log = darshan.log_open("example.darshan")
# Access Job Information
darshan.log_get_job(log)
# Example Return:
# {'jobid': 4478544,
# 'uid': 69615,
# 'start_time': 1490000867,
# 'end_time': 1490000983,
# 'metadata': {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}}
# Access available modules and modules
darshan.log_get_modules(log)
# Returns:
# {'POSIX': {'len': 186, 'ver': 3, 'idx': 1},
# 'MPI-IO': {'len': 154, 'ver': 2, 'idx': 2},
# 'LUSTRE': {'len': 87, 'ver': 1, 'idx': 6},
# 'STDIO': {'len': 3234, 'ver': 1, 'idx': 7}}
# Access different record types as numpy arrays, with integer and float counters seperated
# Example Return: {'counters': array([...], dtype=uint64), 'fcounters': array([...])}
posix_record = darshan.log_get_posix_record(log)
mpiio_record = darshan.log_get_mpiio_record(log)
stdio_record = darshan.log_get_stdio_record(log)
# ...
# open darshan log
report = darshan.DarshanReport('example.darshan')
# load some report data
report.mod_read_all_records('POSIX')
report.mod_read_all_records('MPI-IO')
# or fetch all
report.read_all_generic_records()
darshan.log_close(log)
# ...
# generate summaries for currently loaded data
report.summarize()
# ...
# generate a timeline from dxt records
report.read_all_dxt_records()
report.create_timeline()
Installation
......
......@@ -5,5 +5,8 @@
__version__ = '0.1.0'
from darshan.backend.cffi_backend import *
from darshan.report import *
options = {}
#from darshan.backend.cffi_backend import *
from darshan.report import DarshanReport
# -*- coding: utf-8 -*-
"""
The api_def_c carries a copy of CFFI compatible headers for libdarshan-util.so.
"""
header = """/* from darshan-logutils.h */
struct darshan_mnt_info
{
......@@ -135,21 +140,21 @@ struct darshan_name_record
/* counter names */
char *apxc_counter_names[];
char *bgq_counter_names[];
char *bgq_f_counter_names[];
char *hdf5_counter_names[];
char *hdf5_f_counter_names[];
char *mpiio_counter_names[];
char *mpiio_f_counter_names[];
char *pnetcdf_counter_names[];
char *pnetcdf_f_counter_names[];
char *posix_counter_names[];
char *posix_f_counter_names[];
char *stdio_counter_names[];
char *stdio_f_counter_names[];
char *decaf_counter_names[];
char *decaf_f_counter_names[];
extern char *apxc_counter_names[];
extern char *bgq_counter_names[];
extern char *bgq_f_counter_names[];
extern char *hdf5_counter_names[];
extern char *hdf5_f_counter_names[];
extern char *mpiio_counter_names[];
extern char *mpiio_f_counter_names[];
extern char *pnetcdf_counter_names[];
extern char *pnetcdf_f_counter_names[];
extern char *posix_counter_names[];
extern char *posix_f_counter_names[];
extern char *stdio_counter_names[];
extern char *stdio_f_counter_names[];
extern char *decaf_counter_names[];
extern char *decaf_f_counter_names[];
/* Supported Functions */
void* darshan_log_open(char *);
......
......@@ -14,19 +14,18 @@ from darshan.api_def_c import load_darshan_header
DARSHAN_PATH = discover_darshan()
API_def_c = load_darshan_header()
# print(API_def_c)
ffi = cffi.FFI()
ffi.cdef(API_def_c)
libdutil = ffi.dlopen(DARSHAN_PATH + "/lib/libdarshan-util.so")
#libdutil = ffi.dlopen(DARSHAN_PATH + "/lib/libdarshan-util.so")
libdutil = ffi.dlopen("libdarshan-util.so")
#libdutil = ffi.dlopen("/home/pq/p/software/darshan-cffi/darshan-util/libdarshan-util.so")
#print(DARSHAN_PATH + "/lib/libdarshan-util.so")
#modules = {}
def log_open(filename):
......
# -*- coding: utf-8 -*-
"""
The darshan.common module provides common functionality required by mulitple modules.
"""
......@@ -5,7 +5,8 @@ import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import darshan
import darshan.backend.cffi_backend as backend
def plot_access_histogram(log, filter=None, data=None):
......@@ -28,12 +29,12 @@ def plot_access_histogram(log, filter=None, data=None):
write_vals = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
mods = darshan.log_get_modules(log)
mods = backend.log_get_modules(log)
if str(filter).upper() == "POSIX":
posix_record = darshan.log_get_posix_record(log)
posix = dict(zip(darshan.counter_names("POSIX"), posix_record['counters']))
posix_record = backend.log_get_posix_record(log)
posix = dict(zip(backend.counter_names("POSIX"), posix_record['counters']))
read_vals = [
posix['POSIX_SIZE_READ_0_100'],
......@@ -62,8 +63,8 @@ def plot_access_histogram(log, filter=None, data=None):
]
elif str(filter).upper() == "MPIIO":
mpiio_record = darshan.log_get_mpiio_record(log)
mpiio = dict(zip(darshan.counter_names("mpiio"), mpiio_record['counters']))
mpiio_record = backend.log_get_mpiio_record(log)
mpiio = dict(zip(backend.counter_names("mpiio"), mpiio_record['counters']))
read_vals = [
mpiio['MPIIO_SIZE_READ_AGG_0_100'],
......@@ -158,12 +159,12 @@ def plot_opcounts(log=None, filter=None, data=None):
stdio_vals = [0, 0, 0, 0, 0, 0, 0]
mods = darshan.log_get_modules(log)
mods = backend.log_get_modules(log)
# Gather POSIX
if 'POSIX' in mods:
posix_record = darshan.log_get_posix_record(log)
posix = dict(zip(darshan.counter_names("POSIX"), posix_record['counters']))
posix_record = backend.log_get_posix_record(log)
posix = dict(zip(backend.counter_names("POSIX"), posix_record['counters']))
posix_vals = [
posix['POSIX_READS'],
......@@ -177,8 +178,8 @@ def plot_opcounts(log=None, filter=None, data=None):
# Gather MPIIO
if 'MPI-IO' in mods:
mpiio_record = darshan.log_get_mpiio_record(log)
mpiio = dict(zip(darshan.counter_names("mpiio"), mpiio_record['counters']))
mpiio_record = backend.log_get_mpiio_record(log)
mpiio = dict(zip(backend.counter_names("mpiio"), mpiio_record['counters']))
mpiind_vals = [
mpiio['MPIIO_INDEP_READS'],
......@@ -202,8 +203,8 @@ def plot_opcounts(log=None, filter=None, data=None):
# Gather Stdio
# if 'STDIO' in mods:
# stdio_record = darshan.log_get_stdio_record(log)
# stdio = dict(zip(darshan.counter_names("STDIO"), stdio_record['counters']))
# stdio_record = backend.log_get_stdio_record(log)
# stdio = dict(zip(backend.counter_names("STDIO"), stdio_record['counters']))
#
# stdio_vals = [
# stdio['STDIO_READS'],
......
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
The darshan.repport module provides the DarshanReport class for convienient
interaction and aggregation of Darshan logs using Python.
"""
import darshan.backend.cffi_backend as backend
import json
import numpy as np
......@@ -10,7 +16,13 @@ import datetime
class NumpyEncoder(json.JSONEncoder):
"""
Helper class for JSON serialization if the report contains numpy
log records, which are not handled by the default json encoder.
"""
def default(self, obj):
if isinstance(obj, np.ndarray):
return obj.tolist()
......@@ -18,6 +30,8 @@ class NumpyEncoder(json.JSONEncoder):
class DarshanReport(object):
"""
The DarshanReport class provides a convienient wrapper to access darshan
......@@ -38,7 +52,7 @@ class DarshanReport(object):
self.log = backend.log_open(self.filename)
# state dependent book-keeping
self.converted_records = False # true if convert_records() was called (unnumpyfiy)
self.converted_records = False # true if convert_records() was called (unnumpyfy)
# when using report algebra this log allows to untangle potentially
# unfair aggregations (e.g., double accounting)
......@@ -52,19 +66,22 @@ class DarshanReport(object):
def __add__(self, other):
new_report = self.copy()
#new_report = copy.deepcopy(self)
new_report.provenance_log.append(("add", self, other))
return new_report
def read_all(self):
self.read_all_generic_records()
self.read_all_dxt_records()
return
def read_all_generic_records(self):
"""
Read all available information from darshan log and return das dictionary.
Read all available information from darshan log and return as dictionary.
Args:
None
......@@ -79,6 +96,25 @@ class DarshanReport(object):
pass
def read_all_dxt_records(self):
"""
Read all available information from darshan log and return as dictionary.
Args:
None
Return:
None
"""
for mod in self.report['modules']:
self.mod_read_all_dxt_records(mod)
pass
def read_metadata(self):
"""
Read metadata such as the job, the executables and available modules.
......@@ -97,9 +133,10 @@ class DarshanReport(object):
def mod_read_all_dxt_records(self, mod, mode='numpy'):
def mod_read_all_records(self, mod, mode='numpy'):
"""
Reads all dxt records for provided module.
Reads all generic records for module
Args:
mod (str): Identifier of module to fetch all records
......@@ -109,27 +146,34 @@ class DarshanReport(object):
None
"""
unsupported = ['DXT_POSIX', 'DXT_MPIIO', 'LUSTRE']
unsupported.append('STDIO') # TODO: reenable when segfault resolved
if mod not in self.report['modules']:
print("Skipping. Log does not contain data for mod:", mod)
return
supported = ['DXT_POSIX', 'DXT_MPIIO']
if mod not in supported:
if mod in unsupported:
print("Skipping. Currently unsupported:", mod)
# skip mod
return
structdefs = {
"BG/Q": "struct darshan_bgq_record **",
"HDF5": "struct darshan_hdf5_file **",
"MPI-IO": "struct darshan_mpiio_file **",
"PNETCDF": "struct darshan_pnetcdf_file **",
"POSIX": "struct darshan_posix_file **",
"STDIO": "struct darshan_stdio_file **",
"DECAF": "struct darshan_decaf_record **",
"DXT_POSIX": "struct dxt_file_record **",
"DXT_MPIIO": "struct dxt_file_record **",
}
self.report['records'][mod] = []
cn = backend.counter_names(mod)
fcn = backend.fcounter_names(mod)
self.report['modules'][mod]['counters'] = cn
self.report['modules'][mod]['fcounters'] = fcn
self.report['modules'][mod]['num_records'] = 0
......@@ -137,7 +181,7 @@ class DarshanReport(object):
rec = backend.log_get_dxt_record(self.log, mod, structdefs[mod])
rec = backend.log_get_generic_record(self.log, mod, structdefs[mod])
while rec != None:
# TODO: performance hog and hacky ;)
#recs = json.dumps(rec, cls=NumpyEncoder)
......@@ -146,19 +190,15 @@ class DarshanReport(object):
if mode == 'numpy':
self.report['records'][mod].append(rec)
else:
print("Not implemented.")
exit(1)
#c = dict(zip(cn, rec['counters']))
#fc = dict(zip(fcn, rec['fcounters']))
#self.report['records'][mod].append([c, fc])
pass
c = dict(zip(cn, rec['counters']))
fc = dict(zip(fcn, rec['fcounters']))
self.report['records'][mod].append([c, fc])
self.report['modules'][mod]['num_records'] += 1
# fetch next
rec = backend.log_get_dxt_record(self.log, mod, structdefs[mod])
rec = backend.log_get_generic_record(self.log, mod, structdefs[mod])
pass
......@@ -166,10 +206,9 @@ class DarshanReport(object):
def mod_read_all_records(self, mod, mode='numpy'):
def mod_read_all_dxt_records(self, mod, mode='numpy'):
"""
Reads all records for module
Reads all dxt records for provided module.
Args:
mod (str): Identifier of module to fetch all records
......@@ -179,33 +218,27 @@ class DarshanReport(object):
None
"""
unsupported = ['DXT_POSIX', 'DXT_MPIIO', 'LUSTRE', 'STDIO']
if mod in unsupported:
if mod not in self.report['modules']:
print("Skipping. Log does not contain data for mod:", mod)
return
supported = ['DXT_POSIX', 'DXT_MPIIO']
if mod not in supported:
print("Skipping. Currently unsupported:", mod)
# skip mod
return
structdefs = {
"BG/Q": "struct darshan_bgq_record **",
"HDF5": "struct darshan_hdf5_file **",
"MPI-IO": "struct darshan_mpiio_file **",
"PNETCDF": "struct darshan_pnetcdf_file **",
"POSIX": "struct darshan_posix_file **",
"STDIO": "struct darshan_stdio_file **",
"DECAF": "struct darshan_decaf_record **",
"DXT_POSIX": "struct dxt_file_record **",
"DXT_MPIIO": "struct dxt_file_record **",
}
self.report['records'][mod] = []
cn = backend.counter_names(mod)
fcn = backend.fcounter_names(mod)
self.report['modules'][mod]['counters'] = cn
self.report['modules'][mod]['fcounters'] = fcn
self.report['modules'][mod]['num_records'] = 0
......@@ -213,7 +246,7 @@ class DarshanReport(object):
rec = backend.log_get_generic_record(self.log, mod, structdefs[mod])
rec = backend.log_get_dxt_record(self.log, mod, structdefs[mod])
while rec != None:
# TODO: performance hog and hacky ;)
#recs = json.dumps(rec, cls=NumpyEncoder)
......@@ -222,20 +255,26 @@ class DarshanReport(object):
if mode == 'numpy':
self.report['records'][mod].append(rec)
else:
c = dict(zip(cn, rec['counters']))
fc = dict(zip(fcn, rec['fcounters']))
self.report['records'][mod].append([c, fc])
print("Not implemented.")
exit(1)
#c = dict(zip(cn, rec['counters']))
#fc = dict(zip(fcn, rec['fcounters']))
#self.report['records'][mod].append([c, fc])
pass
self.report['modules'][mod]['num_records'] += 1
# fetch next
rec = backend.log_get_generic_record(self.log, mod, structdefs[mod])
rec = backend.log_get_dxt_record(self.log, mod, structdefs[mod])
pass
def mod_agg(self, mod, ranks=None, files=None, preserve_rank=False, preserve_file=False):
"""
Aggregate counters for a given module name and return updated dictionary.
......@@ -429,7 +468,7 @@ class DarshanReport(object):
ctx[mod] = agg
ctx[mod + '_final'] = tmp
ctx[mod + '_simple'] = tmp
......@@ -623,12 +662,6 @@ class DarshanReport(object):
pass
def create_sankey(self):
"""
Generate a summary that shows the dataflow between ranks, files and
......
......@@ -3,7 +3,7 @@
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = python -msphinx
SPHINXBUILD = python -m sphinx
SPHINXPROJ = pydarshan
SOURCEDIR = .
BUILDDIR = _build
......
......@@ -121,6 +121,20 @@ html_static_path = ['static']
htmlhelp_basename = 'darshandoc'
# -- Options for manual page output ------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'pydarshan',
u'pydarshan Documentation',
[author], 1)
]
# -- Options for LaTeX output ------------------------------------------
latex_elements = {
......@@ -151,17 +165,6 @@ latex_documents = [
]
# -- Options for manual page output ------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'pydarshan',
u'pydarshan Documentation',
[author], 1)
]
# -- Options for Texinfo output ----------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
......@@ -177,12 +180,6 @@ texinfo_documents = [
]
intersphinx_mapping = {
'https://docs.python.org/': None,
'http://matplotlib.org': None,
......
......@@ -8,23 +8,23 @@ darshan.backend.ascii\_backend module
-------------------------------------
.. automodule:: darshan.backend.ascii_backend
:members:
:undoc-members:
:show-inheritance:
:members:
:undoc-members:
:show-inheritance:
darshan.backend.cffi\_backend module