report.py 32.1 KB
Newer Older
1 2 3
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

4 5 6 7 8 9
"""
The darshan.repport module provides the DarshanReport class for convienient
interaction and aggregation of Darshan logs using Python.
"""


10
import darshan.backend.cffi_backend as backend
11

12
import json
13
import re
14
import copy
15
import datetime
16
import sys
17

18 19 20
import numpy as np
import pandas as pd

Jakob Luettgau's avatar
Jakob Luettgau committed
21 22
import collections.abc

23
import logging
24
logger = logging.getLogger(__name__)
25 26 27



28
class DarshanReportJSONEncoder(json.JSONEncoder):
29
    """
30 31
    Helper class for JSON serialization if the report contains, for example,
    numpy or dates records, which are not handled by the default JSON encoder.
32
    """
33 34 35
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
36 37 38 39
    
        if isinstance(obj, datetime.datetime):
            return obj.isoformat()

40 41 42
        return json.JSONEncoder.default(self, obj)


Jakob Luettgau's avatar
Jakob Luettgau committed
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
class DarshanRecordCollection(collections.abc.MutableSequence):
    """
    Darshan log records may nest various properties (e.g., DXT, Lustre).
    As such they can not faithfully represented using only a single
    Numpy array or a Pandas dataframe.

    The DarshanRecordCollection is used as a wrapper to offer
    users a stable API to DarshanReports and contained records
    in various popular formats while allowing to optimize 
    memory and internal representations as necessary.
    """

    def __init__(self, mod=None, report=None):     
        super(DarshanRecordCollection, self).__init__()
        self.mod = mod             # collections should be homogenous in module type
        self.report = report       # reference the report offering lookup for, e.g., counter names

        self.rank = None           # if all records in collection share rank, save memory
        self.id = None             # if all records in collection share id/nrec, save memory

        self.timebase = None       # allow fast time rebase without touching every record
        self.start_time = None
        self.end_time = None

        self._type = "collection"  # collection => list(), single => [record], nested => [[], ... ,[]]
        self._records = list()     # internal format before user conversion
    
    def __len__(self):
        return len(self._records)
    
    def __setitem__(self, key, val):
        self._records[key] = val

    def __getitem__(self, key):
        if self._type == "record":
            if isinstance(key, collections.abc.Hashable):
                #TODO: might extend this style access to collection/nested type as well
                #      but do not want to offer an access which might not be feasible to maintain
                return self._records[0][key]
            else:
                return self._records[0]

        # Wrap single record in RecordCollection to attach conversions: to_json, to_dict, to_df, ...
        # This way conversion logic can be shared.
        record = DarshanRecordCollection(mod=self.mod, report=self.report)

        if isinstance(key, slice):
            record._type = "collection"
            record._records = self._records[key]
        else:
            record._type = "record"
            record.append(self._records[key])
        return record

    def __delitem__(self, key):
        del self._list[ii]

    def insert(self, key, val):
        self._records.insert(key, val)

    def append(self, val):
        self.insert(len(self._records), val)


    def __repr__(self):
        if self._type == "record":
            return self._records[0].__repr__()
        
        return object.__repr__(self)

    #def __repr__(self):
    #    print("DarshanRecordCollection.__repr__")
    #    repr = ""
    #    for rec in self._records:
    #        repr += f"{rec}\n"
    #    return repr

    def info(self, describe=False, plot=False):
        """
        Print information about the record for inspection.

        Args:
            describe (bool): show detailed summary and statistics (default: False)
            plot (bool): show plots for quick value overview for counters and fcounters (default: False)

        Return:
            None
        """
        mod = self.mod
        records = self._records

        print("Module:       ", mod, sep="")
        print("Records:      ", len(self), sep="")
        print("Coll. Type:   ", self._type, sep="")

        if mod in ['LUSTRE']:
            for i, rec in enumerate(records):
                pass
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            ids = set()
            ranks = set()
            hostnames = set()
            reads = 0
            writes = 0
            for i, rec in enumerate(records):
                ids.add(rec['id']) 
                ranks.add(rec['rank']) 
                hostnames.add(rec['hostname']) 
                reads += rec['read_count']
                writes += rec['write_count']
            print("Ranks:        ", str(ranks), sep="")
            print("Name Records: ", str(ids), sep="")
            print("Hostnames:    ", str(hostnames), sep="")
            print("Read Events:  ", str(reads), sep="")
            print("Write Events: ", str(writes), sep="")


            if describe or plot:
                logger.warn("No plots/descriptions defined for DXT records info.")

        else:
            ids = set()
            ranks = set()
            for i, rec in enumerate(records):
                ids.add(rec['id']) 
                ranks.add(rec['rank']) 
            print("Ranks:        ", str(ranks), sep="")
            print("Name Records: ", str(ids), sep="")


            if describe or plot:
                df = self.to_df(attach=None)
                pd_max_rows = pd.get_option('display.max_rows')
                pd_max_columns = pd.get_option('display.max_columns')
                pd.set_option('display.max_rows', None)

                if plot:
                    figw = 7
                    lh = 0.3    # lineheight
                    # get number of counters for plot height adjustment
                    nc = self[0]['counters'].size
                    nfc = self[0]['fcounters'].size

                    display(df['counters'].plot.box(vert=False, figsize=(figw, nc*lh)))
                    display(df['fcounters'].plot.box(vert=False, figsize=(figw, nfc*lh)))

                if describe:
                    display(df['counters'].describe().transpose())
                    display(df['fcounters'].describe().transpose())

                pd.set_option('display.max_rows', pd_max_rows)


    ###########################################################################
    # Export Conversions (following the pandas naming conventions)
    ###########################################################################
    def to_numpy(self):
        records = copy.deepcopy(self._records)
        return records

    def to_list(self):
        mod = self.mod
        records = copy.deepcopy(self._records)

        if mod in ['LUSTRE']:
            raise NotImplementedError
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            raise NotImplementedError
        else:
            for i, rec in enumerate(records):
                rec['counters'] = rec['counters'].tolist()
                rec['fcounters'] = rec['fcounters'].tolist()
        return records

    def to_dict(self):
        mod = self.mod
        records = copy.deepcopy(self._records)
        counters = self.report.counters[self.mod]
        if mod in ['LUSTRE']:
            raise NotImplementedError
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            # format already in a dict format, but may offer switches for expansion
            logger.warn("WARNING: The output of DarshanRecordCollection.to_dict() may change in the future.")
        else:
            for i, rec in enumerate(records):
                rec['counters'] = dict(zip(counters['counters'], rec['counters']))
                rec['fcounters'] = dict(zip(counters['fcounters'], rec['fcounters']))
        return records

    def to_json(self):
        records = self.to_list()
        return json.dumps(records, cls=DarshanReportJSONEncoder)

    def to_df(self, attach="default"):
        if attach == "default":
            attach = ['id', 'rank']

        mod = self.mod
        records = copy.deepcopy(self._records)

        if mod in ['LUSTRE']:
            for i, rec in enumerate(records):
                rec = rec
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            for i, rec in enumerate(records):
                rec['read_segments'] = pd.DataFrame(rec['read_segments'])
                rec['write_segments'] = pd.DataFrame(rec['write_segments'])
        else:
            counters = []
            fcounters = []
            ids = []
            ranks = []

            for i, rec in enumerate(records):
                counters.append(rec['counters'])
                fcounters.append(rec['fcounters'])
                ids.append(rec['id'])
                ranks.append(rec['rank'])
            
            records = {"counters": None, "fcounters": None}
            records['counters'] = pd.DataFrame(counters, columns=self.report.counters[mod]['counters'])
            records['fcounters'] = pd.DataFrame(fcounters, columns=self.report.counters[mod]['fcounters'])

            def flip_column_order(df):
                return df[df.columns[::-1]]

            # attach ids and ranks
            if attach is not None:
                for counter_type in ['counters', 'fcounters']:
                    records[counter_type] = flip_column_order(records[counter_type])
                    if 'id' in attach:
                        records[counter_type]['id'] = ids
                    if 'rank' in attach:
                        records[counter_type]['rank'] = ranks
                    records[counter_type] = flip_column_order(records[counter_type])

        return records


282
class DarshanReport(object):
283 284 285 286 287
    """
    The DarshanReport class provides a convienient wrapper to access darshan
    logs, which also caches already fetched information. In addition to that
    a number of common aggregations can be performed.
    """
288

Jakob Luettgau's avatar
Jakob Luettgau committed
289
    # a way to conserve memory?
290 291 292
    #__slots__ = ['attr1', 'attr2']


293
    def __init__(self, 
Jakob Luettgau's avatar
Jakob Luettgau committed
294
            filename=None, dtype='numpy', 
295
            start_time=None, end_time=None,
296
            automatic_summary=False,
297
            read_all=True, lookup_name_records=True):
298 299 300 301 302 303 304 305 306 307 308 309
        """
        Args:
            filename (str): filename to open (optional)
            dtype (str): default dtype for internal structures
            automatic_summary (bool): automatically generate summary after loading
            read_all (bool): whether to read all records for log
            lookup_name_records (bool): lookup and update name_records as records are loaded

        Return:
            None

        """
310 311
        self.filename = filename

312
        # Behavioral Options
Jakob Luettgau's avatar
Jakob Luettgau committed
313
        self.dtype = dtype                                  # default dtype to return when viewing records
314
        self.automatic_summary = automatic_summary
315
        self.lookup_name_records = lookup_name_records
316

317
        # State dependent book-keeping
Jakob Luettgau's avatar
PEP8.  
Jakob Luettgau committed
318
        self.converted_records = False  # true if convert_records() was called (unnumpyfy)
319

320

321
        # Report Metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
322 323
        #
        # Start/End + Timebase are 
324 325 326 327 328
        self.start_time = start_time if start_time else float('inf')
        self.end_time = end_time if end_time else float('-inf')
        self.timebase = self.start_time

        # Initialize data namespaces
Jakob Luettgau's avatar
Jakob Luettgau committed
329 330 331
        self._metadata = {}
        self._modules = {}
        self._counters = {}
332
        self.records = {}
Jakob Luettgau's avatar
Jakob Luettgau committed
333
        self._mounts = {}
334
        self.name_records = {}
335 336

        # initialize report/summary namespace
337
        self.summary_revision = 0       # counter to check if summary needs update (see data_revision)
338 339 340 341 342 343
        self.summary = {}


        # legacy references (deprecate before 1.0?)
        self.data_revision = 0          # counter for consistency checks
        self.data = {'version': 1}
Jakob Luettgau's avatar
Jakob Luettgau committed
344
        self.data['metadata'] = self._metadata
345 346
        self.data['records'] = self.records
        self.data['summary'] = self.summary
Jakob Luettgau's avatar
Jakob Luettgau committed
347
        self.data['modules'] = self._modules
348 349 350
        self.data['counters'] = self.counters
        self.data['name_records'] = self.name_records

351

352 353 354

        # when using report algebra this log allows to untangle potentially
        # unfair aggregations (e.g., double accounting)
355
        self.provenance_enabled = True
356
        self.provenance_graph = []
357 358 359
        self.provenance_reports = {}


360 361 362 363
        if filename:
            self.open(filename, read_all=read_all)    


Jakob Luettgau's avatar
Jakob Luettgau committed
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
    @property
    def metadata(self):
        return self._metadata

    @property
    def modules(self):
        return self._modules

    @property
    def counters(self):
        return self._counters

#    @property
#    def counters(self):
#        return self._counters
#
#    @property
#    def name_records(self):
#        return self._name_records
#
#
#    @property
#    def summary(self):
#        return self._summary
#   
      

391 392 393
    def open(self, filename, read_all=False):
        """
        Open log file via CFFI backend.
394

395 396 397
        Args:
            filename (str): filename to open (optional)
            read_all (bool): whether to read all records for log
398

399 400 401 402
        Return:
            None

        """
403 404 405

        self.filename = filename

406 407
        if filename:
            self.log = backend.log_open(self.filename)
408 409 410
            if not bool(self.log['handle']):
                raise RuntimeError("Failed to open file.")

411
            self.read_metadata(read_all=read_all)
412

413 414
            if read_all:
                self.read_all()
415 416 417


    def __add__(self, other):
418
        """
419
        Allow reports to be merged using the addition operator.
420
        """
421

422
        return self.merge(other)
423

424

425 426 427
    def __deepcopy__(self, memo):
        """
        Creates a deepcopy of report.
428

429 430
        .. note::
            Needed to purge reference to self.log as Cdata can not be pickled:
431 432
            TypeError: can't pickle _cffi_backend.CData objects
        """
433

434 435 436 437 438
        cls = self.__class__
        result = cls.__new__(cls)
        memo[id(self)] = result
        for k, v in self.__dict__.items():
            if k in ["log"]:
439
                # blacklist of members not to copy
440 441 442
                continue
            setattr(result, k, copy.deepcopy(v, memo))
        return result
443

444
        # TODO: might consider treating self.log as list of open logs to not deactivate load functions?
445

446
        return result
447

448

449
    def read_metadata(self, read_all=False):
450 451 452 453 454 455 456 457 458 459
        """
        Read metadata such as the job, the executables and available modules.

        Args:
            None

        Return:
            None

        """
460 461
        self.metadata['job'] = backend.log_get_job(self.log)
        self.metadata['exe'] = backend.log_get_exe(self.log)
462

463 464
        self.start_time = datetime.datetime.fromtimestamp(self.metadata['job']['start_time'])
        self.end_time = datetime.datetime.fromtimestamp(self.metadata['job']['end_time'])
465 466

        self.data['mounts'] = backend.log_get_mounts(self.log)
467
        self.mounts = self.data['mounts']
468 469

        self.data['modules'] = backend.log_get_modules(self.log)
Jakob Luettgau's avatar
Jakob Luettgau committed
470
        self._modules = self.data['modules']
471

472
        if read_all == True:
473 474
            self.data["name_records"] = backend.log_get_name_records(self.log)
            self.name_records = self.data['name_records']
475 476


477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
    def update_name_records(self, mod=None):
        """
        Update (and prune unused) name records from resolve table.

        First reindexes all used name record identifiers and then queries 
        darshan-utils library to compile filtered list of name records.

        Args:
            None

        Return:
            None

        """
        # sanitize inputs
        mods = mod
        if mods is None:
            mods = self.records
        else:
            mods = [mod]

        
        # state
        ids = set()

        for mod in mods:
503
            logger.debug(f" Refreshing name_records for mod={mod}")
504 505 506 507
            for rec in self.records[mod]:
                ids.add(rec['id'])


508
        self.name_records.update(backend.log_lookup_name_records(self.log, ids))
509 510
        

511
    def read_all(self, dtype=None):
512 513 514 515 516 517 518 519 520
        """
        Read all available records from darshan log and return as dictionary.

        Args:
            None

        Return:
            None
        """
521 522 523

        self.read_all_generic_records(dtype=dtype)
        self.read_all_dxt_records(dtype=dtype)
524 525 526 527 528 529
        if "LUSTRE" in self.data['modules']:
            self.mod_read_all_lustre_records(dtype=dtype)
        if "APMPI" in self.data['modules']:
            self.mod_read_all_apmpi_records(dtype=dtype)
        if "APXC" in self.data['modules']:
            self.mod_read_all_apxc_records(dtype=dtype)
530
        
531 532 533
        return


534
    def read_all_generic_records(self, counters=True, fcounters=True, dtype=None):
535
        """
536
        Read all generic records from darshan log and return as dictionary.
537

538 539
        Args:
            None
540

541 542 543
        Return:
            None
        """
544 545 546

        dtype = dtype if dtype else self.dtype

547
        for mod in self.data['modules']:
548
            self.mod_read_all_records(mod, dtype=dtype, warnings=False)
549

550 551


552
    def read_all_dxt_records(self, reads=True, writes=True, dtype=None):
553
        """
554
        Read all dxt records from darshan log and return as dictionary.
555 556 557 558 559 560 561

        Args:
            None

        Return:
            None
        """
562 563 564

        dtype = dtype if dtype else self.dtype

565
        for mod in self.data['modules']:
566
            self.mod_read_all_dxt_records(mod, warnings=False, reads=reads, writes=writes, dtype=dtype)
567 568 569



570
    def mod_read_all_records(self, mod, dtype=None, warnings=True):
571
        """
572
        Reads all generic records for module
573 574 575

        Args:
            mod (str): Identifier of module to fetch all records
576
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary, 'pandas'
577 578 579 580 581

        Return:
            None

        """
582
        unsupported =  ['DXT_POSIX', 'DXT_MPIIO', 'LUSTRE', 'APMPI', 'APXC']
583

584
        if mod in unsupported:
585
            if warnings:
586
                logger.warning(f" Skipping. Currently unsupported: {mod} in mod_read_all_records().")
587 588 589 590
            # skip mod
            return 


591 592 593
        # handling options
        dtype = dtype if dtype else self.dtype

594

Jakob Luettgau's avatar
Jakob Luettgau committed
595
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
596 597 598
        cn = backend.counter_names(mod)
        fcn = backend.fcounter_names(mod)

599
        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
600
        self._modules[mod]['num_records'] = 0
601 602
        if mod not in self.counters:
            self.counters[mod] = {}
603 604
            self.counters[mod]['counters'] = cn 
            self.counters[mod]['fcounters'] = fcn
605 606


607
        # fetch records
Kevin Harms's avatar
Kevin Harms committed
608
        rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
609
        while rec != None:
Kevin Harms's avatar
Kevin Harms committed
610
            self.records[mod].append(rec)
Jakob Luettgau's avatar
Jakob Luettgau committed
611
            self._modules[mod]['num_records'] += 1
612 613

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
614
            rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
615

616

617 618
        if self.lookup_name_records:
            self.update_name_records()
619 620 621

        # process/combine records if the format dtype allows for this
        if dtype == 'pandas':
622 623
            combined_c = None
            combined_fc = None
624

625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
            for rec in self.records[mod]:
                obj = rec['counters']
                #print(type(obj))
                #display(obj)
                
                if combined_c is None:
                    combined_c = rec['counters']
                else:
                    combined_c = pd.concat([combined_c, rec['counters']])
                    
                if combined_fc is None:
                    combined_fc = rec['fcounters']
                else:
                    combined_fc = pd.concat([combined_fc, rec['fcounters']])

            self.records[mod] = [{
                'rank': -1,
                'id': -1,
                'counters': combined_c,
                'fcounters': combined_fc
                }]
646

647

648
    def mod_read_all_apmpi_records(self, mod="APMPI", dtype=None, warnings=True):
649 650 651 652 653 654 655 656 657 658 659 660 661
        """ 
        Reads all APMPI records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
662
                logger.warning(f" Skipping. Log does not contain data for mod: {mod}")
663 664 665 666 667 668 669 670 671
            return


        supported =  ['APMPI'] 
        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_apmpi_records(). Supported: {supported}")
            # skip mod
            return
672

673 674 675
        # handling options
        dtype = dtype if dtype else self.dtype

676 677
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)

678
        # update module metadata
679
        self._modules[mod]['num_records'] = 0
680 681 682 683
        if mod not in self.counters:
            self.counters[mod] = {}

        # fetch records
684 685
        # fetch header record
        rec = backend.log_get_apmpi_record(self.log, mod, "HEADER", dtype=dtype)
686
        while rec != None:
687
            self.records[mod].append(rec)
688 689 690
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
691
            rec = backend.log_get_apmpi_record(self.log, mod, "PERF", dtype=dtype)
692 693 694 695


        if self.lookup_name_records:
            self.update_name_records()
696 697


698
    def mod_read_all_apxc_records(self, mod="APXC", dtype=None, warnings=True):
699 700 701 702 703 704 705 706 707 708 709 710 711
        """ 
        Reads all APXC records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
712
                logger.warning(f" Skipping. Log does not contain data for mod: {mod}")
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746
            return

        supported =  ['APXC'] 
        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_apxc_records(). Supported: {supported}")
            # skip mod
            return

        # handling options
        dtype = dtype if dtype else self.dtype

        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
        cn = backend.counter_names(mod)

        # update module metadata
        self._modules[mod]['num_records'] = 0
        if mod not in self.counters:
            self.counters[mod] = {}

        # fetch records
        # fetch header record
        rec = backend.log_get_apxc_record(self.log, mod, "HEADER", dtype=dtype)
        while rec != None:
            self.records[mod].append(rec)
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
            rec = backend.log_get_apxc_record(self.log, mod, "PERF", dtype=dtype)

        if self.lookup_name_records:
            self.update_name_records()


747
    def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, writes=True):
748
        """
749
        Reads all dxt records for provided module.
750 751 752

        Args:
            mod (str): Identifier of module to fetch all records
753
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary
754 755 756

        Return:
            None
757 758

        """
759
        if mod not in self.data['modules']:
760
            if warnings:
761
                logger.warning(f" Skipping. Log does not contain data for mod: {mod}")
762 763 764 765 766 767
            return


        supported =  ['DXT_POSIX', 'DXT_MPIIO']

        if mod not in supported:
768
            if warnings:
769
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_dxt_records(). Supported: {supported}")
770 771 772
            # skip mod
            return 

773

774 775
        # handling options
        dtype = dtype if dtype else self.dtype
776

777

Jakob Luettgau's avatar
Jakob Luettgau committed
778
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
779 780

        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
781
        self._modules[mod]['num_records'] = 0
782 783
        if mod not in self.counters:
            self.counters[mod] = {}
784 785


786
        # fetch records
Kevin Harms's avatar
Kevin Harms committed
787
        rec = backend.log_get_dxt_record(self.log, mod, dtype=dtype)
788
        while rec != None:
Kevin Harms's avatar
Kevin Harms committed
789
            self.records[mod].append(rec)
790
            self.data['modules'][mod]['num_records'] += 1
791 792

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
793
            rec = backend.log_get_dxt_record(self.log, mod, reads=reads, writes=writes, dtype=dtype)
794

795 796 797 798

        if self.lookup_name_records:
            self.update_name_records()

799 800


801

802
    def mod_read_all_lustre_records(self, mod="LUSTRE", dtype=None, warnings=True):
803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
        """
        Reads all dxt records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
                logger.warning(f" Skipping. Log does not contain data for mod: {mod}")
            return


        supported =  ['LUSTRE']

        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_dxt_records(). Supported: {supported}")
            # skip mod
            return 


829 830 831
        # handling options
        dtype = dtype if dtype else self.dtype

832

Jakob Luettgau's avatar
Jakob Luettgau committed
833
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
834
        cn = backend.counter_names(mod)
835

836
        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
837
        self._modules[mod]['num_records'] = 0
838 839
        if mod not in self.counters:
            self.counters[mod] = {}
840
            self.counters[mod]['counters'] = cn 
841 842


843
        # fetch records
844 845 846 847 848 849 850 851
        rec = backend.log_get_record(self.log, mod, dtype=dtype)
        while rec != None:
            self.records[mod].append(rec)
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
            rec = backend.log_get_record(self.log, mod, dtype=dtype)

852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876

        if self.lookup_name_records:
            self.update_name_records()

        # process/combine records if the format dtype allows for this
        if dtype == 'pandas':
            combined_c = None

            for rec in self.records[mod]:
                obj = rec['counters']
                #print(type(obj))
                #display(obj)
                
                if combined_c is None:
                    combined_c = rec['counters']
                else:
                    combined_c = pd.concat([combined_c, rec['counters']])
                    

            self.records[mod] = [{
                'rank': -1,
                'id': -1,
                'counters': combined_c,
                }]

877 878 879 880




881 882
    def mod_records(self, mod, 
                    dtype='numpy', warnings=True):
883 884
        """
        Return generator for lazy record loading and traversal.
885 886 887 888

        .. warning::
            Can't be used for now when alternating between different modules.
            A temporary workaround can be to open the same log multiple times,
Jakob Luettgau's avatar
Jakob Luettgau committed
889
            as this way buffers are not shared between get_record invocations
890 891
            in the lower level library.

892 893 894

        Args:
            mod (str): Identifier of module to fetch records for
895
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary
896 897 898 899 900 901 902 903 904 905 906 907 908

        Return:
            None

        """
        cn = backend.counter_names(mod)
        fcn = backend.fcounter_names(mod)

        if mod not in self.counters:
            self.counters[mod] = {}
        self.counters[mod]['counters'] = cn 
        self.counters[mod]['fcounters'] = fcn

Kevin Harms's avatar
Kevin Harms committed
909
        rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
910 911 912 913
        while rec != None:
            yield rec

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
914
            rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
915 916


917
    def info(self, metadata=False):
918
        """
919
        Print information about the record for inspection.
920 921

        Args:
922
            metadata (bool): show detailed metadata (default: False)
923 924 925

        Return:
            None
926
        """
927
        print("Filename:       ", self.filename, sep="")
928

929 930 931
        tdelta = self.end_time - self.start_time
        print("Times:          ", self.start_time, " to ", self.end_time, " (Duration ", tdelta, ")", sep="")

932 933 934 935 936 937 938
        if 'exe' in self.metadata:
            print("Executeable:    ", self.metadata['exe'], sep="")

        if 'job' in self.metadata:
            print("Processes:      ", self.metadata['job']['nprocs'], sep="")
            print("JobID:          ", self.metadata['job']['jobid'], sep="")
            print("UID:            ", self.metadata['job']['uid'], sep="")
Jakob Luettgau's avatar
Jakob Luettgau committed
939
            print("Modules in Log: ", list(self._modules.keys()), sep="")
940 941 942 943 944 945 946 947

        loaded = {}
        for mod in self.records:
            loaded[mod] = len(self.records[mod])
        print("Loaded Records: ", loaded, sep="")

        print("Name Records:   ", len(self.name_records), sep="")
        
948 949
        if 'job' in self.metadata:
            print("Darshan/Hints:  ", self.metadata['job']['metadata'], sep="")
950
        print("DarshanReport:  id(", id(self), ") (tmp)", sep="")
951

952

953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980
        if metadata:
            for key, val in self.metadata.items():
                if key == "job":
                    for key2, val2 in self.metadata[key].items():
                        print("metadata['", key ,"']['", key2, "'] = ", val2, sep="")
                else:
                    print("metadata['", key, "'] = ", val, sep="")
    
    
        #def get_size(obj, seen=None):
        #    """Recursively finds size of objects"""
        #    size = sys.getsizeof(obj)
        #    if seen is None:
        #        seen = set()
        #    obj_id = id(obj)
        #    if obj_id in seen:
        #        return 0
        #    # Important mark as seen *before* entering recursion to gracefully handle
        #    # self-referential objects
        #    seen.add(obj_id)
        #    if isinstance(obj, dict):
        #        size += sum([get_size(v, seen) for v in obj.values()])
        #        size += sum([get_size(k, seen) for k in obj.keys()])
        #    elif hasattr(obj, '__dict__'):
        #        size += get_size(obj.__dict__, seen)
        #    elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
        #        size += sum([get_size(i, seen) for i in obj])
        #    return size
981

982
        #print("Memory:", get_size(self), 'bytes')
983

984

985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
    ###########################################################################
    # Internal Organisation
    ###########################################################################
    def rebase_timestamps(records=None, inplace=False, timebase=False):
        """
        Updates all records in the report to use timebase (defaults: start_time).
        This might allow to conserve memory as reports are merged.

        Args:
            records (dict, list):  records to rebase
            inplace (bool): weather to merel return a copy or to update records
            timebase (datetime.datetime): new timebase to use

        Return:
            rebased_records (same type as provided to records)
        """
        rebase_records = copy.deepcopy(record)

        # TODO: apply timestamp rebase
        # TODO: settle on format

        return rebased_records

    ###########################################################################
Jakob Luettgau's avatar
Jakob Luettgau committed
1009
    # Export Conversions
1010
    ###########################################################################
1011
    def to_dict(self):
1012
        """
Jakob Luettgau's avatar
PEP8.  
Jakob Luettgau committed
1013
        Return dictionary representation of report data.
1014 1015 1016 1017 1018

        Args:
            None

        Return:
1019
            dict
1020
        """
1021
        data = copy.deepcopy(self.data)
1022

1023 1024
        recs = data['records']
        for mod in recs:
Jakob Luettgau's avatar
Jakob Luettgau committed
1025 1026 1027 1028 1029 1030 1031
            try:
                #recs[mod] = recs[mod].to_dict()
                recs[mod] = recs[mod].to_list()
            except:
                recs[mod] = "Not implemented."


1032

1033
        return data
1034 1035


1036
    def to_json(self):
1037
        """
Jakob Luettgau's avatar
PEP8.  
Jakob Luettgau committed
1038
        Return JSON representation of report data as string.
1039 1040 1041 1042 1043 1044 1045

        Args:
            None

        Return:
            JSON String
        """
1046 1047 1048 1049
        data = copy.deepcopy(self.data)

        recs = data['records']
        for mod in recs:
Jakob Luettgau's avatar
Jakob Luettgau committed
1050 1051 1052 1053
            try:
                recs[mod] = recs[mod].to_list()
            except:
                recs[mod] = "Not implemented."
1054

1055
        return json.dumps(data, cls=DarshanReportJSONEncoder)
1056 1057 1058 1059




Jakob Luettgau's avatar
Jakob Luettgau committed
1060