Commit 91bdad64 authored by Jakob Luettgau's avatar Jakob Luettgau
Browse files

Make sure dtype parameter is used consistently.

parent c16aa8a2
......@@ -325,7 +325,6 @@ def log_get_generic_record(log, mod_name, mod_type, dtype='numpy'):
})
if dtype == "pandas":
df_c = pd.DataFrame(cdict, index=[0])
df_fc = pd.DataFrame(fcdict, index=[0])
......
......@@ -67,7 +67,8 @@ def create_timeline(self, group_by='rank', mode="append"):
"id": rid,
"rank": rec['rank'],
"hostname": rec['hostname'],
"filename": rec['filename'],
#"filename": rec['filename'],
"filename": "FIXME: NEED FILENAME",
"group": rid,
"start": start.isoformat(),
......@@ -86,7 +87,8 @@ def create_timeline(self, group_by='rank', mode="append"):
group = {
"id": rid,
"content": "[%s] " % (mod) + rec['filename'][-84:],
#"content": "[%s] " % (mod) + rec['filename'][-84:],
"content": "[%s] " % (mod) + "NEED FILENAME",
"order": seg['start_time']
}
groups.append(group)
......
......@@ -61,28 +61,46 @@ class DarshanReport(object):
a number of common aggregations can be performed.
"""
# a way to conser memory?
#__slots__ = ['attr1', 'attr2']
def __init__(self,
filename=None, data_format='pandas',
filename=None, dtype='pandas',
start_time=None, end_time=None,
automatic_summary=False,
read_all=True, lookup_name_records=True):
"""
Args:
filename (str): filename to open (optional)
dtype (str): default dtype for internal structures
automatic_summary (bool): automatically generate summary after loading
read_all (bool): whether to read all records for log
lookup_name_records (bool): lookup and update name_records as records are loaded
Return:
None
"""
self.filename = filename
# options
self.data_format = data_format # Experimental: preferred internal representation: pandas/numpy useful for aggregations, dict good for export/REST
# Behavioral Options
self.dtype = dtype # Experimental: preferred internal representation: pandas/numpy useful for aggregations, dict good for export/REST
# might require alternative granularity: e.g., records, vs summaries?
# vs dict/pandas? dict/native?
self.automatic_summary = automatic_summary
self.lookup_name_records = lookup_name_records
# state dependent book-keeping
# State dependent book-keeping
self.converted_records = False # true if convert_records() was called (unnumpyfy)
#
self.start_time = float('inf')
self.end_time = float('-inf')
# initialize data namespaces
# Report Metadata
self.start_time = start_time if start_time else float('inf')
self.end_time = end_time if end_time else float('-inf')
self.timebase = self.start_time
# Initialize data namespaces
self.metadata = {}
self.modules = {}
self.counters = {}
......@@ -91,7 +109,7 @@ class DarshanReport(object):
self.name_records = {}
# initialize report/summary namespace
self.summary_revision = 0 # counter to check if summary needs update
self.summary_revision = 0 # counter to check if summary needs update (see data_revision)
self.summary = {}
......@@ -110,7 +128,7 @@ class DarshanReport(object):
# when using report algebra this log allows to untangle potentially
# unfair aggregations (e.g., double accounting)
self.provenance_enabled = True
self.provenance_log = []
self.provenance_graph = []
self.provenance_reports = {}
......@@ -236,10 +254,10 @@ class DarshanReport(object):
ids.add(rec['id'])
self.name_records = backend.log_lookup_name_records(self.log, ids)
self.name_records.update(backend.log_lookup_name_records(self.log, ids))
def read_all(self):
def read_all(self, dtype=None):
"""
Read all available records from darshan log and return as dictionary.
......@@ -249,15 +267,15 @@ class DarshanReport(object):
Return:
None
"""
self.read_all_generic_records()
self.read_all_dxt_records()
self.mod_read_all_lustre_records()
self.read_all_generic_records(dtype=dtype)
self.read_all_dxt_records(dtype=dtype)
self.mod_read_all_lustre_records(dtype=dtype)
return
def read_all_generic_records(self, counters=True, fcounters=True):
def read_all_generic_records(self, counters=True, fcounters=True, dtype=None):
"""
Read all generic records from darshan log and return as dictionary.
......@@ -267,8 +285,11 @@ class DarshanReport(object):
Return:
None
"""
dtype = dtype if dtype else self.dtype
for mod in self.data['modules']:
self.mod_read_all_records(mod, warnings=False)
self.mod_read_all_records(mod, dtype=dtype, warnings=False)
pass
......@@ -283,16 +304,16 @@ class DarshanReport(object):
Return:
None
"""
dtype = dtype if dtype else self.dtype
for mod in self.data['modules']:
self.mod_read_all_dxt_records(mod, warnings=False, reads=reads, writes=writes, dtype=dtype)
pass
def mod_read_all_records(self, mod, dtype='numpy', warnings=True):
def mod_read_all_records(self, mod, dtype=None, warnings=True):
"""
Reads all generic records for module
......@@ -313,21 +334,23 @@ class DarshanReport(object):
return
self.data['records'][mod] = []
# handling options
dtype = dtype if dtype else self.dtype
self.data['records'][mod] = []
cn = backend.counter_names(mod)
fcn = backend.fcounter_names(mod)
# update module metadata
self.modules[mod]['num_records'] = 0
if mod not in self.counters:
self.counters[mod] = {}
self.counters[mod]['counters'] = cn
self.counters[mod]['fcounters'] = fcn
self.counters[mod]['counters'] = cn
self.counters[mod]['fcounters'] = fcn
# fetch records
rec = backend.log_get_generic_record(self.log, mod, _structdefs[mod], dtype=dtype)
while rec != None:
if dtype == 'pandas':
......@@ -346,7 +369,6 @@ class DarshanReport(object):
if self.lookup_name_records:
self.update_name_records()
# process/combine records if the format dtype allows for this
if dtype == 'pandas':
combined_c = None
......@@ -377,7 +399,7 @@ class DarshanReport(object):
pass
def mod_read_all_dxt_records(self, mod, dtype='numpy', warnings=True, reads=True, writes=True):
def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, writes=True):
"""
Reads all dxt records for provided module.
......@@ -404,14 +426,19 @@ class DarshanReport(object):
return
self.records[mod] = []
self.modules[mod]['num_records'] = 0
# handling options
dtype = dtype if dtype else self.dtype
self.records[mod] = []
# update module metadata
self.modules[mod]['num_records'] = 0
if mod not in self.counters:
self.counters[mod] = {}
# fetch records
rec = backend.log_get_dxt_record(self.log, mod, _structdefs[mod], dtype=dtype)
while rec != None:
if dtype == 'numpy':
......@@ -419,20 +446,20 @@ class DarshanReport(object):
else:
self.records[mod].append(rec)
pass
self.data['modules'][mod]['num_records'] += 1
# fetch next
rec = backend.log_get_dxt_record(self.log, mod, _structdefs[mod], reads=reads, writes=writes, dtype=dtype)
if self.lookup_name_records:
self.update_name_records()
pass
def mod_read_all_lustre_records(self, mod="LUSTRE", dtype='numpy', warnings=True):
def mod_read_all_lustre_records(self, mod="LUSTRE", dtype=None, warnings=True):
"""
Reads all dxt records for provided module.
......@@ -459,14 +486,21 @@ class DarshanReport(object):
return
self.records[mod] = []
self.modules[mod]['num_records'] = 0
# handling options
dtype = dtype if dtype else self.dtype
self.records[mod] = []
cn = backend.counter_names(mod)
# update module metadata
self.modules[mod]['num_records'] = 0
if mod not in self.counters:
self.counters[mod] = {}
self.counters[mod]['counters'] = cn
# fetch records
rec = backend.log_get_record(self.log, mod, dtype=dtype)
while rec != None:
self.records[mod].append(rec)
......@@ -475,12 +509,39 @@ class DarshanReport(object):
# fetch next
rec = backend.log_get_record(self.log, mod, dtype=dtype)
if self.lookup_name_records:
self.update_name_records()
# process/combine records if the format dtype allows for this
if dtype == 'pandas':
combined_c = None
for rec in self.records[mod]:
obj = rec['counters']
#print(type(obj))
#display(obj)
if combined_c is None:
combined_c = rec['counters']
else:
combined_c = pd.concat([combined_c, rec['counters']])
self.records[mod] = [{
'rank': -1,
'id': -1,
'counters': combined_c,
}]
pass
def mod_records(self, mod, dtype='numpy', warnings=True):
def mod_records(self, mod,
dtype='numpy', warnings=True):
"""
Return generator for lazy record loading and traversal.
......@@ -583,6 +644,32 @@ class DarshanReport(object):
#print("Memory:", get_size(self), 'bytes')
###########################################################################
# Internal Organisation
###########################################################################
def rebase_timestamps(records=None, inplace=False, timebase=False):
"""
Updates all records in the report to use timebase (defaults: start_time).
This might allow to conserve memory as reports are merged.
Args:
records (dict, list): records to rebase
inplace (bool): weather to merel return a copy or to update records
timebase (datetime.datetime): new timebase to use
Return:
rebased_records (same type as provided to records)
"""
rebase_records = copy.deepcopy(record)
# TODO: apply timestamp rebase
# TODO: settle on format
return rebased_records
###########################################################################
# Conversion
###########################################################################
def to_dict(self):
"""
Return dictionary representation of report data.
......@@ -641,3 +728,10 @@ class DarshanReport(object):
pass
return json.dumps(data, cls=DarshanReportJSONEncoder)
@staticmethod
def from_string(string):
return DarshanReport()
......@@ -13,7 +13,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"By default all records, metadata, available modules and the name records are loaded:"
"By default all records, metadata, available modules and the name records are loaded when opening a Darshan log:"
]
},
{
......@@ -35,7 +35,7 @@
"Loaded Records: {'POSIX': 1, 'MPI-IO': 1, 'STDIO': 129, 'LUSTRE': 1}\n",
"Name Records: 3\n",
"Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}\n",
"DarshanReport: id(140072175381248) (tmp)\n"
"DarshanReport: id(140464297220080) (tmp)\n"
]
}
],
......@@ -1159,7 +1159,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Report aggregations and summarization remains **experimental** for now, mostly to allow interfaces to stabilize. But experimental features can be switched on easily by invoking `darshan.enable_experimental()`:"
"Report aggregations and summarization remains**experimental** for now, mostly to allow interfaces to stabilize. But experimental features can be switched on easily by invoking `darshan.enable_experimental()`:"
]
},
{
......
......@@ -26,6 +26,8 @@ setup(
'Programming Language :: Python :: 3.7',
],
description="Python tools to interact with darshan log records of HPC applications.",
#options={"bdist_wheel": {"universal": False}},
#libraries=["darshanutils"],
install_requires=requirements,
long_description=readme,
include_package_data=True,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment