Commit e69f1f9a authored by Jakob Luettgau's avatar Jakob Luettgau
Browse files

Add extension.c to ease including additional shared libraries. Various small...

Add extension.c to ease including additional shared libraries. Various small changes and bugfixes: libdarshan-util discovery/precedence, timeline aggregation, setup.py.
parent a279d721
......@@ -109,6 +109,20 @@ def discover_darshan_wheel():
raise RuntimeError('Could not discover darshan! Is darshan-util installed and set in your PATH?')
def discover_darshan_pyinstaller():
"""
Discovers darshan-util if installed as as part of a pyinstaller bundle.
:return: Path to a darshan-util installation.
"""
path = os.path.dirname(__file__)
if path:
return os.path.realpath(path + '/../')
else:
raise RuntimeError('Could not discover darshan! Is darshan-util installed and set in your PATH?')
def find_utils(ffi, libdutil):
......@@ -131,38 +145,43 @@ def find_utils(ffi, libdutil):
libdutil = ffi.dlopen("libdarshan-util.so")
except:
libdutil = None
print("ffi.dlopen failed")
if libdutil is None:
try:
DARSHAN_PATH = discover_darshan_shutil()
libdutil = ffi.dlopen(DARSHAN_PATH + "/lib/libdarshan-util.so")
library_path = discover_darshan_shutil()
libdutil = ffi.dlopen(library_path + "/lib/libdarshan-util.so")
except:
libdutil = None
print("shutil failed")
if libdutil is None:
try:
DARSHAN_PATH = discover_darshan_pkgconfig()
libdutil = ffi.dlopen(DARSHAN_PATH + "/lib/libdarshan-util.so")
library_path = discover_darshan_pkgconfig()
libdutil = ffi.dlopen(library_path + "/lib/libdarshan-util.so")
except:
libdutil = None
print("pkgconfig failed")
if libdutil is None:
try:
DARSHAN_PATH = discover_darshan_wheel()
import glob
DARSHAN_SO = glob.glob(f'{DARSHAN_PATH}/libdarshan-util*.so')[0]
libdutil = ffi.dlopen(DARSHAN_SO)
library_path = glob.glob(f'{DARSHAN_PATH}/libdarshan-util*.so')[0]
libdutil = ffi.dlopen(library_path)
except:
libdutil = None
if libdutil is None:
try:
DARSHAN_PATH = discover_darshan_pyinstaller()
import glob
library_path = glob.glob(f'{DARSHAN_PATH}/libdarshan-util*.so')[0]
libdutil = ffi.dlopen(library_path)
except:
libdutil = None
print("")
if libdutil is None:
raise RuntimeError('Could not find libdarshan-util.so! Is darshan-util installed? Please ensure one of the the following: 1) export LD_LIBRARY_PATH=<path-to-libdarshan-util.so>, or 2) darshan-parser can found using the PATH variable, or 3) pkg-config can resolve pkg-config --path darshan-util')
raise RuntimeError('Could not find libdarshan-util.so! Is darshan-util installed? Please ensure one of the the following: 1) export LD_LIBRARY_PATH=<path-to-libdarshan-util.so>, or 2) darshan-parser can found using the PATH variable, or 3) pkg-config can resolve pkg-config --path darshan-util, or 4) install a wheel that includes darshan-utils via pip.')
return libdutil
......
from darshan.report import *
def create_timeline(self, group_by='rank', mode="append"):
"""
Generate/update a timeline from dxt tracing records of current report.
Args:
group_by (str): By which factor to group entries (default: rank)
Allowed Parameters: rank, filename
def configure_groups():
"""
Prepare a dictionary to lookup high level group ordering.
"""
from collections import OrderedDict
order = 0.0
group_config = OrderedDict()
group_config['H5F'] = {}
group_config['H5D'] = {}
group_config['MPIIO'] = {}
group_config['DXT_MPIIO'] = {}
group_config['STDIO'] = {}
group_config['POSIX'] = {}
group_config['DXT_POSIX'] = {}
group_config['LUSTRE'] = {}
# apply order
for k,v in group_config.items():
v['order'] = order
order += 1.0
return group_config
def purge_empty_nestedGroups(groups):
for k,v in groups.items():
if 'nestedGroups' in v:
if len(v['nestedGroups']) == 0:
v.pop('nestedGroups', None)
v.pop('showNested', None)
def normalize_mod(mod, inverse=False):
mapping = {
'MPI-IO': 'MPIIO'
}
inverse_mapping = {
'MPIIO': 'MPI-IO'
}
if inverse and mod in inverse_mapping:
return inverse_mapping[mod]
if mod in mapping:
return mapping[mod]
return mod
def update_parent_item(self, groups, items, parent_id, start=None, end=None):
item = None
if parent_id in items:
item = items[parent_id]
else:
items[parent_id] = {
'id': parent_id,
'group': parent_id,
'content': '',
'start': start,
'end': end
}
item = items[parent_id]
self.mod_read_all_dxt_records("DXT_POSIX")
self.mod_read_all_dxt_records("DXT_MPIIO")
ctx = {'groups': [], 'items': []}
if start < item['start']:
item['start'] = start
if end > item['end']:
item['end'] = end
groups = ctx['groups']
items = ctx['items']
def summarized_items(self, groups, items, mod, nmod, rec, rec_id, group_id, parent_group):
if mod in ['LUSTRE']:
# skip, nothing to summarize
return
item_id = f'{group_id}'
start = float('inf')
end = float('-inf')
start_time = datetime.datetime.fromtimestamp( self.data['metadata']['job']['start_time'] )
drec = {}
drec['fcounters'] = dict(zip(self.counters[mod]['fcounters'], rec['fcounters']))
# find min/max for starttime and endtime
ops = ['OPEN', 'CLOSE']
if mod not in ['H5F']:
# add the following ops for all but H5F, ...
ops += ['READ', 'WRITE']
for op in ops:
item_id = f'{group_id}:{op}'
cur_start = drec['fcounters'][f'{nmod}_F_{op}_START_TIMESTAMP']
cur_end = drec['fcounters'][f'{nmod}_F_{op}_END_TIMESTAMP']
if cur_start < start:
start = cur_start
if cur_end > end:
end = cur_end
# add item
if item_id not in items:
items[item_id] = {
'id': item_id,
'group': group_id,
'content': '',
'start': start,
'end': end
}
def groupify(rec, mod):
for seg in rec['write_segments']:
seg.update( {'type': 'w'} )
# order by first access
if start < groups[group_id]['order']:
groups[group_id]['order'] = start
update_parent_item(self, groups, items, parent_group, start=start, end=end)
for seg in rec['read_segments']:
seg.update( {'type': 'r'} )
def compress_pathname(pathname):
max_len = 42
if len(pathname) < max_len:
return pathname
elems = pathname.split('/')
#if len(elems[-1]) < max_len:
# return elems[-1]
snip = '...'
return pathname[0:int((max_len-len(snip))/2)] + snip + elems[-1][-int((max_len-len(snip))/2):]
segments = rec['write_segments'] + rec['read_segments']
segments = sorted(segments, key=lambda k: k['start_time'])
start = float('inf')
end = float('-inf')
trace = []
minsize = 0
for seg in segments:
trace += [ seg['type'], seg['offset'], seg['length'], seg['start_time'], seg['end_time'] ]
seg_minsize = seg['offset'] + seg['length']
if minsize < seg_minsize:
minsize = seg_minsize
if start > seg['start_time']:
start = seg['start_time']
if end < seg['end_time']:
end = seg['end_time']
# reconstruct timestamps
start = start_time + datetime.timedelta(seconds=start)
end = start_time + datetime.timedelta(seconds=end)
rid = "%s:%d:%d" % (mod, rec['id'], rec['rank'])
item = {
"id": rid,
"rank": rec['rank'],
"hostname": rec['hostname'],
#"filename": rec['filename'],
"filename": "FIXME: NEED FILENAME",
"group": rid,
"start": start.isoformat(),
"end": end.isoformat(),
"limitSize": False, # required to prevent rendering glitches
"data": {
"duration": (end-start).total_seconds(),
"start": segments[0]['start_time'],
"size": minsize, # minimal estimated filesize
"trace": trace,
}
}
items.append(item)
group = {
"id": rid,
#"content": "[%s] " % (mod) + rec['filename'][-84:],
"content": "[%s] " % (mod) + "NEED FILENAME",
"order": seg['start_time']
}
groups.append(group)
def create_timeline(self,
group_by='mod,file,rank',
action='attach,overwrite', summary_name='timeline'
):
"""
Generate/update a timeline from records of the current report state.
Args:
group_by (str): By which factor to group entries (default: rank)
Allowed Parameters: rank, filename
"""
report = self
supported = ['DXT_POSIX', 'DXT_MPIIO']
for mod in supported:
if mod in self.data['records']:
for rec in self.data['records'][mod]:
groupify(rec, mod)
groups = {}
items = {}
group_config = configure_groups()
#start_time = datetime.datetime.fromtimestamp( self.data['metadata']['job']['start_time'] )
for mod in report.modules:
nmod = normalize_mod(mod)
group_id = nmod
groups[group_id] = {
'id': group_id,
'content': f'{group_id}',
'order': group_config[nmod]['order'],
'nestedGroups': [], # to be filled later
'showNested': False
}
for mod in report.modules:
if mod in ['DXT_POSIX', 'DXT_MPIIO']:
continue
nmod = normalize_mod(mod)
parent_group = nmod
for rec in report.records[mod]:
rec_id = rec['id']
group_id = f'{nmod}:{rec_id}'
# add group
if group_id not in groups:
groups[group_id] = {
'id': group_id,
'content':
'<b>' +
compress_pathname(report.name_records[rec['id']]) +
'</b><br>' +
f'{group_id}' +
'',
'order': float('inf'),
'title': report.name_records[rec['id']],
'nestedGroups': [], # to be filled later
'showNested': False
}
groups[parent_group]['nestedGroups'].append(group_id)
# add items
#detailed_items(groups, items, mod, nmod, rec, rec_id, group_id, parent_group)
summarized_items(self, groups, items, mod, nmod, rec, rec_id, group_id, parent_group)
purge_empty_nestedGroups(groups)
# flatten dictionaries to list
timeline = {
'groups': [v for k,v in groups.items()],
'items': [v for k,v in items.items()]
}
# overwrite existing summary entry
if mode == "append":
self.summary['timeline'] = ctx
if action == "attach,overwrite":
self.summary[summary_name] = timeline
return ctx
return timeline
#include "Python.h"
/* The wrapper to the underlying C function */
static PyObject *
py_dot(PyObject *self, PyObject *args)
{
PyObject *x_obj, *y_obj;
double *x, *y;
Py_ssize_t i, length_x, length_y;
if (!PyArg_ParseTuple(args, "OO", &x_obj, &y_obj))
return NULL;
if ((!PySequence_Check(x_obj)) || (!PySequence_Check(y_obj))) {
PyErr_SetString(PyExc_TypeError, "A sequence is required.");
return NULL;
}
length_x = PySequence_Length(x_obj);
length_y = PySequence_Length(y_obj);
if (length_x < 0 || length_y < 0) {
PyErr_SetString(PyExc_TypeError, "A sequence is required.");
return NULL;
}
if (length_x != length_y) {
PyErr_SetString(PyExc_ValueError, "Lengths are not aligned.");
}
x = (double*) calloc(length_x, sizeof(double));
y = (double*) calloc(length_y, sizeof(double));
for (i = 0; i < length_x; i++) {
PyObject* x_item = PySequence_GetItem(x_obj, i);
PyObject* y_item = PySequence_GetItem(y_obj, i);
x[i] = PyFloat_AsDouble(x_item);
if (PyErr_Occurred()) {
Py_DECREF(x_item);
Py_DECREF(y_item);
PyErr_SetString(PyExc_TypeError, "Cannot convert object to double.");
return NULL;
}
y[i] = PyFloat_AsDouble(y_item);
if (PyErr_Occurred()) {
Py_DECREF(x_item);
Py_DECREF(y_item);
PyErr_SetString(PyExc_TypeError, "Cannot convert object to double.");
return NULL;
}
Py_DECREF(x_item);
Py_DECREF(y_item);
}
double result = 42;
return PyFloat_FromDouble(result);
}
static PyMethodDef module_functions[] = {
{"dot", py_dot, METH_VARARGS, NULL},
{NULL, NULL} /* sentinel */
};
#if PY_MAJOR_VERSION >= 3
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"extension", /* m_name */
"This is a module", /* m_doc */
-1, /* m_size */
module_functions, /* m_methods */
NULL, /* m_reload */
NULL, /* m_traverse */
NULL, /* m_clear */
NULL, /* m_free */
};
#endif
static PyObject *
moduleinit(void)
{
PyObject *m;
#if PY_MAJOR_VERSION >= 3
m = PyModule_Create(&moduledef);
#else
m = Py_InitModule3("extension",
module_functions, "This is a module");
#endif
return m;
}
#if PY_MAJOR_VERSION < 3
PyMODINIT_FUNC
initextension(void)
{
moduleinit();
}
#else
PyMODINIT_FUNC
PyInit_extension(void)
{
return moduleinit();
}
#endif
jupyter
notebook
# altair (has svg well exposed)
altair
# for bokeh + svg export
# but bokeh is almost as verbose to use as matplotlib
bokeh
selenium
......@@ -45,26 +45,20 @@ setup(
author='',
author_email='',
classifiers=[
'Development Status :: 2 - Pre-Alpha',
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'Natural Language :: English',
'Programming Language :: Python :: 3',
# 'Programming Language :: Python :: 3.4',
# 'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.9'
],
description="Python tools to interact with darshan log records of HPC applications.",
#long_description=readme,
long_description="PyDarshan",
#options={"bdist_wheel": {"universal": False}},
ext_modules = ext_modules,
ext_modules = ext_modules,
install_requires=requirements,
include_package_data=True,
keywords='darshan',
......@@ -74,6 +68,6 @@ setup(
test_suite='tests',
tests_require=test_requirements,
url='https://www.mcs.anl.gov/research/projects/darshan/',
version='0.0.2',
version='0.0.3',
zip_safe=False,
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment