Commit 2da3a81e authored by Jakob Luettgau's avatar Jakob Luettgau
Browse files

Enable function chaining for filter and reduce. Remove obsolete.

parent 88bd7814
.PHONY: clean clean-test clean-pyc clean-build docs help
.DEFAULT_GOAL := help
help:
@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
clean: clean-build clean-pyc clean-test # remove all build, test, coverage and Python artifacts
clean-build: ## remove build artifacts
clean-build: # remove build artifacts
rm -rf build/
rm -rf dist/
rm -rf .eggs/
......@@ -14,34 +11,36 @@ clean-build: ## remove build artifacts
find . -name '*.egg-info' -exec rm -fr {} +
find . -name '*.egg' -exec rm -f {} +
clean-pyc: ## remove Python file artifacts
clean-pyc: # remove Python file artifacts
find . -name '*.pyc' -exec rm -f {} +
find . -name '*.pyo' -exec rm -f {} +
find . -name '*~' -exec rm -f {} +
find . -name '__pycache__' -exec rm -fr {} +
clean-test: ## remove test and coverage artifacts
clean-test: # remove test and coverage artifacts
rm -rf .tox/
rm -f .coverage
rm -rf htmlcov/
rm -rf .pytest_cache
lint: ## check style with flake8
lint: # check style with flake8
flake8 darshan tests
test: ## run tests quickly with the default Python
test: # run tests quickly with the default Python
py.test --verbose
test-all: ## run tests on every Python version with tox
test-all: # run tests on every Python version with tox
tox
coverage: ## check code coverage quickly with the default Python
coverage: # check code coverage quickly with the default Python
coverage run --source darshan -m pytest
coverage report -m
coverage html
$(BROWSER) htmlcov/index.html
xdg-open htmlcov/index.html
docs: ## generate Sphinx HTML documentation, including API docs
docs: # generate Sphinx HTML documentation, including API docs
rm -f docs/darshan.rst
rm -f docs/darshan.backend.rst
rm -f docs/darshan.plots.rst
......@@ -53,16 +52,19 @@ docs: ## generate Sphinx HTML documentation, including API docs
docs-show: docs
xdg-open docs/_build/html/index.html
servedocs: docs ## compile the docs watching for changes
servedocs: docs # compile the docs watching for changes
watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
release: dist ## package and upload a release
twine upload dist/*
dist: clean ## builds source and wheel package
release: dist # package and upload a release
twine upload --repository testpypi dist/*
#twine upload dist/*
dist: clean # builds source and wheel package
python setup.py sdist
python setup.py bdist_wheel
ls -l dist
install: clean ## install the package to the active Python's site-packages
install: clean # install the package to the active Python's site-packages
python setup.py install
......@@ -5,12 +5,18 @@
__version__ = '0.1.0'
options = {
}
#from darshan.backend.cffi_backend import *
from darshan.report import DarshanReport
def enable_experimental(verbose=True):
def enable_experimental(verbose=False):
"""
Enable experimental features such as aggregation methods for reports.
......
......@@ -103,7 +103,7 @@ def agg_ioops(self, mode='append'):
# cleanup and prepare for json serialization?
tmp = json.dumps(ctx, cls=NumpyEncoder)
tmp = json.dumps(ctx, cls=DarshanReportJSONEncoder)
ctx = json.loads(tmp)
......
......@@ -43,12 +43,8 @@ def create_time_summary(self, mode="append"):
# overwrite existing summary entry
if mode == "append":
self.summary['time_summary'] = ctx
return ctx
from darshan.report import *
import sys
import copy
import re
def filter(self, mods=None, name_records=None, data_format='numpy', mode='append'):
def filter(self, mods=None, name_records=None, pattern=None, regex=None):
"""
Return filtered list of records.
......@@ -15,11 +17,12 @@ def filter(self, mods=None, name_records=None, data_format='numpy', mode='append
None
"""
r = copy.deepcopy(self)
# convienience
recs = self.records
recs = r.records
ctx = {}
......@@ -36,38 +39,51 @@ def filter(self, mods=None, name_records=None, data_format='numpy', mode='append
name_records = None
# change inputs to whitelists
# whitelist all mods
if mods == None:
mods = self.records.keys()
mods = r.records.keys()
if name_records == None:
name_records = list(self.name_records.keys())
if pattern != None:
pattern = pattern.replace("*", "(.*?)")
elif regex:
pattern = regex
# whitelist name_records
if name_records == None and pattern == None and regex == None:
# allow all name records if no critirium provided
name_records = list(r.name_records.keys())
else:
resolve_table = {}
for key, value in self.name_records.items():
ids = []
for key, value in r.name_records.items():
resolve_table[key] = key
resolve_table[value] = key
ids = []
for nrec in name_records:
if nrec in resolve_table:
ids.append(resolve_table[nrec])
# TODO: decide if overwriting kargs is really a good idea.. currently considering it a sanitation step
name_records = ids
# whitelist names that match pattern
if pattern != None or regex != None:
if re.match(pattern, value):
print("YES", pattern, value)
ids.append(key)
else:
print("NO", pattern, value)
# convert filenames/name_records mix into list of ids only
if name_records != None:
for nrec in name_records:
if nrec in resolve_table:
ids.append(resolve_table[nrec])
print(mods)
print(name_records)
# TODO: decide if overwriting kargs is really a good idea.. currently considering it a sanitation step
name_records = ids
if name_records != None:
# aggragate
for mod, recs in self.records.items():
for mod, recs in r.records.items():
if mod not in mods:
continue
......@@ -82,13 +98,7 @@ def filter(self, mods=None, name_records=None, data_format='numpy', mode='append
ctx[mod].append(rec)
r.records = ctx
if mode == 'append':
name = 'filter'
if name not in self.summary:
self.summary[name] = {}
self.data[name] = ctx
return ctx
return r
from darshan.report import *
import sys
import copy
def merge(self, other, reduce_first=False):
"""
Merge two darshan reports and return a new combined report.
Args:
mods: Name(s) of modules to preserve (reduced)
name_records: Id(s)/Name(s) of name_records to preserve (reduced)
Return:
None
"""
# new report
nr = DarshanReport()
# keep provenance?
if self.provenance_enabled or other.provenance_enabled:
# Currently, assume logs remain in memomry to create prov. tree on demand
# Alternative: maintain a tree with simpler refs? (modified reports would not work then)
#nr.provenance_reports[self.filename] = copy.copy(self)
#nr.provenance_reports[other.filename] = copy.copy(other)
nr.provenance_reports[self.filename] = None
nr.provenance_reports[other.filename] = None
nr.provenance_log.append(("add", self, other, datetime.datetime.now()))
# update metadata helper
def update_metadata(report, force=False):
if force:
nr.start_time = report.start_time
nr.end_time = report.end_time
return
if report.start_time < nr.start_time:
nr.start_time = report.start_time
if report.end_time > nr.end_time:
nr.end_time = report.end_time
update_metadata(self, force=True)
update_metadata(other)
# copy over records (references, under assumption single records are not altered)
for report in [self, other]:
for key, records in report.data['records'].items():
#print(report, key)
if key not in nr.records:
nr.records[key] = copy.copy(records)
else:
nr.records[key] += copy.copy(records)
for key, mod in report.modules.items():
if key not in nr.modules:
nr.modules[key] = copy.copy(mod)
# TODO: invalidate len/counters
for key, counter in report.counters.items():
if key not in nr.counters:
nr.counters[key] = copy.copy(counter)
# TODO: invalidate len/counters
for key, nrec in report.name_records.items():
if key not in nr.counters:
nr.name_records[key] = copy.copy(nrec)
# TODO: verify colliding name_records?
return nr
......@@ -48,7 +48,7 @@ def mod_agg_iohist(self, mod, mode='append'):
name = "%s_%s_%s" % (typ, name[-2], name[-1])
return name
tmp = json.dumps(ctx[mod], cls=NumpyEncoder)
tmp = json.dumps(ctx[mod], cls=DarshanReportJSONEncoder)
tmp = json.loads(tmp)
cn = backend.counter_names(mod)
c = dict(zip(cn, tmp))
......
from darshan.report import *
def print_module_records(self, mode='append'):
"""
Compile the I/O operations summary for the current report.
Args:
mode (str): Whether to 'append' (default) or to 'return' aggregation.
Return:
None or dict: Depending on mode
"""
descriptions = {
'POSIX': """
# *******************************************************
# POSIX module data
# *******************************************************
# description of POSIX counters:
# POSIX_*: posix operation counts.
# READS,WRITES,OPENS,SEEKS,STATS,MMAPS,SYNCS,FILENOS,DUPS are types of operations.
# POSIX_RENAME_SOURCES/TARGETS: total count file was source or target of a rename operation
# POSIX_RENAMED_FROM: Darshan record ID of the first rename source, if file was a rename target
# POSIX_MODE: mode that file was opened in.
# POSIX_BYTES_*: total bytes read and written.
# POSIX_MAX_BYTE_*: highest offset byte read and written.
# POSIX_CONSEC_*: number of exactly adjacent reads and writes.
# POSIX_SEQ_*: number of reads and writes from increasing offsets.
# POSIX_RW_SWITCHES: number of times access alternated between read and write.
# POSIX_*_ALIGNMENT: memory and file alignment.
# POSIX_*_NOT_ALIGNED: number of reads and writes that were not aligned.
# POSIX_MAX_*_TIME_SIZE: size of the slowest read and write operations.
# POSIX_SIZE_*_*: histogram of read and write access sizes.
# POSIX_STRIDE*_STRIDE: the four most common strides detected.
# POSIX_STRIDE*_COUNT: count of the four most common strides.
# POSIX_ACCESS*_ACCESS: the four most common access sizes.
# POSIX_ACCESS*_COUNT: count of the four most common access sizes.
# POSIX_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).
# POSIX_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared files).
# POSIX_F_*_START_TIMESTAMP: timestamp of first open/read/write/close.
# POSIX_F_*_END_TIMESTAMP: timestamp of last open/read/write/close.
# POSIX_F_READ/WRITE/META_TIME: cumulative time spent in read, write, or metadata operations.
# POSIX_F_MAX_*_TIME: duration of the slowest read and write operations.
# POSIX_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).
# POSIX_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).
# WARNING: POSIX module log format version <=3 has the following limitations:
# - No support for the following counters to properly instrument dup, fileno, and rename operations:
# - POSIX_FILENOS
# - POSIX_DUPS
# - POSIX_RENAME_SOURCES
# - POSIX_RENAME_TARGETS
# - POSIX_RENAMED_FROM
#<module> <rank> <record id> <counter> <value> <file name> <mount pt> <fs type>
""",
"MPI-IO": """
# *******************************************************
# MPI-IO module data
# *******************************************************
# description of MPIIO counters:
# MPIIO_INDEP_*: MPI independent operation counts.
# MPIIO_COLL_*: MPI collective operation counts.
# MPIIO_SPLIT_*: MPI split collective operation counts.
# MPIIO_NB_*: MPI non blocking operation counts.
# READS,WRITES,and OPENS are types of operations.
# MPIIO_SYNCS: MPI file sync operation counts.
# MPIIO_HINTS: number of times MPI hints were used.
# MPIIO_VIEWS: number of times MPI file views were used.
# MPIIO_MODE: MPI-IO access mode that file was opened with.
# MPIIO_BYTES_*: total bytes read and written at MPI-IO layer.
# MPIIO_RW_SWITCHES: number of times access alternated between read and write.
# MPIIO_MAX_*_TIME_SIZE: size of the slowest read and write operations.
# MPIIO_SIZE_*_AGG_*: histogram of MPI datatype total sizes for read and write operations.
# MPIIO_ACCESS*_ACCESS: the four most common total access sizes.
# MPIIO_ACCESS*_COUNT: count of the four most common total access sizes.
# MPIIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).
# MPIIO_*_RANK_BYTES: total bytes transferred at MPI-IO layer by the fastest and slowest ranks (for shared files).
# MPIIO_F_*_START_TIMESTAMP: timestamp of first MPI-IO open/read/write/close.
# MPIIO_F_*_END_TIMESTAMP: timestamp of last MPI-IO open/read/write/close.
# MPIIO_F_READ/WRITE/META_TIME: cumulative time spent in MPI-IO read, write, or metadata operations.
# MPIIO_F_MAX_*_TIME: duration of the slowest MPI-IO read and write operations.
# MPIIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).
# MPIIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).
# WARNING: MPIIO module log format version <=2 does not support the following counters:
# - MPIIO_F_CLOSE_START_TIMESTAMP
# - MPIIO_F_OPEN_END_TIMESTAMP
#<module> <rank> <record id> <counter> <value> <file name> <mount pt> <fs type>
""",
"LUSTRE": """
# *******************************************************
# LUSTRE module data
# *******************************************************
# description of LUSTRE counters:
# LUSTRE_OSTS: number of OSTs across the entire file system.
# LUSTRE_MDTS: number of MDTs across the entire file system.
# LUSTRE_STRIPE_OFFSET: OST ID offset specified when the file was created.
# LUSTRE_STRIPE_SIZE: stripe size for file in bytes.
# LUSTRE_STRIPE_WIDTH: number of OSTs over which the file is striped.
# LUSTRE_OST_ID_*: indices of OSTs over which the file is striped.
#<module> <rank> <record id> <counter> <value> <file name> <mount pt> <fs type>
""",
"STDIO": """
# *******************************************************
# STDIO module data
# *******************************************************
# description of STDIO counters:
# STDIO_{OPENS|FDOPENS|WRITES|READS|SEEKS|FLUSHES} are types of operations.
# STDIO_BYTES_*: total bytes read and written.
# STDIO_MAX_BYTE_*: highest offset byte read and written.
# STDIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).
# STDIO_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared files).
# STDIO_F_*_START_TIMESTAMP: timestamp of the first call to that type of function.
# STDIO_F_*_END_TIMESTAMP: timestamp of the completion of the last call to that type of function.
# STDIO_F_*_TIME: cumulative time spent in different types of functions.
# STDIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).
# STDIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).
# WARNING: STDIO module log format version 1 has the following limitations:
# - No support for properly instrumenting fdopen operations (STDIO_FDOPENS)
#<module> <rank> <record id> <counter> <value> <file name> <mount pt> <fs type>
""",
}
pass
from darshan.report import *
def records_as_dict(self, mode='append', recs=None):
"""
Convert all counters to dictionaries with human-readable names.
Args:
mode (str): Whether to 'append' (default) or to 'return' aggregation.
Return:
None or dict: Depending on mode
"""
# convienience
if recs is None:
recs = self.records
ctx = {}
# aggragate
for mod in recs:
if mod in ['DXT_POSIX']:
continue
# check records for module are present
if mod not in ctx:
ctx[mod] = []
for rec in recs[mod]:
c = None
fc = None
if 'counters' in rec:
c = dict(zip(self.counters[mod]['counters'], rec['counters'].tolist()))
if 'fcounters' in rec:
fc = dict(zip(self.counters[mod]['fcounters'], rec['fcounters'].tolist()))
if rec['id'] in self.name_records:
nrec = self.name_records[rec['id']]
else:
nrec = None
ctx[mod].append({'id': rec['id'], 'rank': rec['rank'], 'counters': c, 'fcounters': fc, 'name_record': nrec})
return ctx
......@@ -17,8 +17,11 @@ def reduce(self, operation="sum", mods=None, name_records=None, mode='append', d
"""
r = copy.deepcopy(self)
# convienience
recs = self.records
recs = r.records
ctx = {}
......@@ -43,15 +46,15 @@ def reduce(self, operation="sum", mods=None, name_records=None, mode='append', d
# change inputs to whitelists
if mods == None:
mods = self.records.keys()
mods = r.records.keys()
if name_records == None:
name_records = list(self.name_records.keys())
name_records = list(r.name_records.keys())
else:
resolve_table = {}
for key, value in self.name_records.items():
for key, value in r.name_records.items():
resolve_table[key] = key
resolve_table[value] = key
......@@ -72,7 +75,7 @@ def reduce(self, operation="sum", mods=None, name_records=None, mode='append', d
if name_records != None:
# aggragate
for mod, recs in self.records.items():
for mod, recs in r.records.items():
if mod not in mods:
continue
......@@ -121,12 +124,7 @@ def reduce(self, operation="sum", mods=None, name_records=None, mode='append', d
result[mod].append(rec)
r.records = result
if mode == 'append':
name = 'reduction'
if name not in self.summary:
self.summary[name] = {}
self.data[name] = ctx
return result
return r
# -*- coding: utf-8 -*-
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import darshan.backend.cffi_backend as backend
def plot_access_histogram(log, filter=None, data=None):