report.py 31.8 KB
Newer Older
1 2 3
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

4 5 6 7 8 9
"""
The darshan.repport module provides the DarshanReport class for convienient
interaction and aggregation of Darshan logs using Python.
"""


10
import darshan.backend.cffi_backend as backend
11

12
import json
13
import re
14
import copy
15
import datetime
16
import sys
17

18 19 20
import numpy as np
import pandas as pd

Jakob Luettgau's avatar
Jakob Luettgau committed
21 22
import collections.abc

23
import logging
24
logger = logging.getLogger(__name__)
25 26 27



28
class DarshanReportJSONEncoder(json.JSONEncoder):
29
    """
30 31
    Helper class for JSON serialization if the report contains, for example,
    numpy or dates records, which are not handled by the default JSON encoder.
32
    """
33 34 35
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
36 37 38 39
    
        if isinstance(obj, datetime.datetime):
            return obj.isoformat()

40 41 42
        return json.JSONEncoder.default(self, obj)


Jakob Luettgau's avatar
Jakob Luettgau committed
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
class DarshanRecordCollection(collections.abc.MutableSequence):
    """
    Darshan log records may nest various properties (e.g., DXT, Lustre).
    As such they can not faithfully represented using only a single
    Numpy array or a Pandas dataframe.

    The DarshanRecordCollection is used as a wrapper to offer
    users a stable API to DarshanReports and contained records
    in various popular formats while allowing to optimize 
    memory and internal representations as necessary.
    """

    def __init__(self, mod=None, report=None):     
        super(DarshanRecordCollection, self).__init__()
        self.mod = mod             # collections should be homogenous in module type
        self.report = report       # reference the report offering lookup for, e.g., counter names

        self.rank = None           # if all records in collection share rank, save memory
        self.id = None             # if all records in collection share id/nrec, save memory

        self.timebase = None       # allow fast time rebase without touching every record
        self.start_time = None
        self.end_time = None

        self._type = "collection"  # collection => list(), single => [record], nested => [[], ... ,[]]
        self._records = list()     # internal format before user conversion
    
    def __len__(self):
        return len(self._records)
    
    def __setitem__(self, key, val):
        self._records[key] = val

    def __getitem__(self, key):
        if self._type == "record":
            if isinstance(key, collections.abc.Hashable):
                #TODO: might extend this style access to collection/nested type as well
                #      but do not want to offer an access which might not be feasible to maintain
                return self._records[0][key]
            else:
                return self._records[0]

        # Wrap single record in RecordCollection to attach conversions: to_json, to_dict, to_df, ...
        # This way conversion logic can be shared.
        record = DarshanRecordCollection(mod=self.mod, report=self.report)

        if isinstance(key, slice):
            record._type = "collection"
            record._records = self._records[key]
        else:
            record._type = "record"
            record.append(self._records[key])
        return record

    def __delitem__(self, key):
        del self._list[ii]

    def insert(self, key, val):
        self._records.insert(key, val)

    def append(self, val):
        self.insert(len(self._records), val)


    def __repr__(self):
        if self._type == "record":
            return self._records[0].__repr__()
        
        return object.__repr__(self)

    #def __repr__(self):
    #    print("DarshanRecordCollection.__repr__")
    #    repr = ""
    #    for rec in self._records:
    #        repr += f"{rec}\n"
    #    return repr

    def info(self, describe=False, plot=False):
        """
        Print information about the record for inspection.

        Args:
            describe (bool): show detailed summary and statistics (default: False)
            plot (bool): show plots for quick value overview for counters and fcounters (default: False)

        Return:
            None
        """
        mod = self.mod
        records = self._records

        print("Module:       ", mod, sep="")
        print("Records:      ", len(self), sep="")
        print("Coll. Type:   ", self._type, sep="")

        if mod in ['LUSTRE']:
            for i, rec in enumerate(records):
                pass
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            ids = set()
            ranks = set()
            hostnames = set()
            reads = 0
            writes = 0
            for i, rec in enumerate(records):
                ids.add(rec['id']) 
                ranks.add(rec['rank']) 
                hostnames.add(rec['hostname']) 
                reads += rec['read_count']
                writes += rec['write_count']
            print("Ranks:        ", str(ranks), sep="")
            print("Name Records: ", str(ids), sep="")
            print("Hostnames:    ", str(hostnames), sep="")
            print("Read Events:  ", str(reads), sep="")
            print("Write Events: ", str(writes), sep="")


            if describe or plot:
                logger.warn("No plots/descriptions defined for DXT records info.")

        else:
            ids = set()
            ranks = set()
            for i, rec in enumerate(records):
                ids.add(rec['id']) 
                ranks.add(rec['rank']) 
            print("Ranks:        ", str(ranks), sep="")
            print("Name Records: ", str(ids), sep="")


            if describe or plot:
                df = self.to_df(attach=None)
                pd_max_rows = pd.get_option('display.max_rows')
                pd_max_columns = pd.get_option('display.max_columns')
                pd.set_option('display.max_rows', None)

                if plot:
                    figw = 7
                    lh = 0.3    # lineheight
                    # get number of counters for plot height adjustment
                    nc = self[0]['counters'].size
                    nfc = self[0]['fcounters'].size

                    display(df['counters'].plot.box(vert=False, figsize=(figw, nc*lh)))
                    display(df['fcounters'].plot.box(vert=False, figsize=(figw, nfc*lh)))

                if describe:
                    display(df['counters'].describe().transpose())
                    display(df['fcounters'].describe().transpose())

                pd.set_option('display.max_rows', pd_max_rows)


    ###########################################################################
    # Export Conversions (following the pandas naming conventions)
    ###########################################################################
    def to_numpy(self):
        records = copy.deepcopy(self._records)
        return records

    def to_list(self):
        mod = self.mod
        records = copy.deepcopy(self._records)

        if mod in ['LUSTRE']:
            raise NotImplementedError
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            raise NotImplementedError
        else:
            for i, rec in enumerate(records):
                rec['counters'] = rec['counters'].tolist()
                rec['fcounters'] = rec['fcounters'].tolist()
        return records

    def to_dict(self):
        mod = self.mod
        records = copy.deepcopy(self._records)
        counters = self.report.counters[self.mod]
        if mod in ['LUSTRE']:
            raise NotImplementedError
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            # format already in a dict format, but may offer switches for expansion
            logger.warn("WARNING: The output of DarshanRecordCollection.to_dict() may change in the future.")
        else:
            for i, rec in enumerate(records):
                rec['counters'] = dict(zip(counters['counters'], rec['counters']))
                rec['fcounters'] = dict(zip(counters['fcounters'], rec['fcounters']))
        return records

    def to_json(self):
        records = self.to_list()
        return json.dumps(records, cls=DarshanReportJSONEncoder)

    def to_df(self, attach="default"):
        if attach == "default":
            attach = ['id', 'rank']

        mod = self.mod
        records = copy.deepcopy(self._records)

        if mod in ['LUSTRE']:
            for i, rec in enumerate(records):
                rec = rec
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            for i, rec in enumerate(records):
                rec['read_segments'] = pd.DataFrame(rec['read_segments'])
                rec['write_segments'] = pd.DataFrame(rec['write_segments'])
        else:
            counters = []
            fcounters = []
            ids = []
            ranks = []

            for i, rec in enumerate(records):
                counters.append(rec['counters'])
                fcounters.append(rec['fcounters'])
                ids.append(rec['id'])
                ranks.append(rec['rank'])
            
            records = {"counters": None, "fcounters": None}
            records['counters'] = pd.DataFrame(counters, columns=self.report.counters[mod]['counters'])
            records['fcounters'] = pd.DataFrame(fcounters, columns=self.report.counters[mod]['fcounters'])

            def flip_column_order(df):
                return df[df.columns[::-1]]

            # attach ids and ranks
            if attach is not None:
                for counter_type in ['counters', 'fcounters']:
                    records[counter_type] = flip_column_order(records[counter_type])
                    if 'id' in attach:
                        records[counter_type]['id'] = ids
                    if 'rank' in attach:
                        records[counter_type]['rank'] = ranks
                    records[counter_type] = flip_column_order(records[counter_type])

        return records


282
class DarshanReport(object):
283 284 285 286 287
    """
    The DarshanReport class provides a convienient wrapper to access darshan
    logs, which also caches already fetched information. In addition to that
    a number of common aggregations can be performed.
    """
288

Jakob Luettgau's avatar
Jakob Luettgau committed
289
    # a way to conserve memory?
290 291 292
    #__slots__ = ['attr1', 'attr2']


293
    def __init__(self, 
Jakob Luettgau's avatar
Jakob Luettgau committed
294
            filename=None, dtype='numpy', 
295
            start_time=None, end_time=None,
296
            automatic_summary=False,
297
            read_all=True, lookup_name_records=True):
298 299 300 301 302 303 304 305 306 307 308 309
        """
        Args:
            filename (str): filename to open (optional)
            dtype (str): default dtype for internal structures
            automatic_summary (bool): automatically generate summary after loading
            read_all (bool): whether to read all records for log
            lookup_name_records (bool): lookup and update name_records as records are loaded

        Return:
            None

        """
310 311
        self.filename = filename

312
        # Behavioral Options
Jakob Luettgau's avatar
Jakob Luettgau committed
313
        self.dtype = dtype                                  # default dtype to return when viewing records
314
        self.automatic_summary = automatic_summary
315
        self.lookup_name_records = lookup_name_records
316

317
        # State dependent book-keeping
Jakob Luettgau's avatar
PEP8.  
Jakob Luettgau committed
318
        self.converted_records = False  # true if convert_records() was called (unnumpyfy)
319

320

321
        # Report Metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
322 323
        #
        # Start/End + Timebase are 
324 325 326 327 328
        self.start_time = start_time if start_time else float('inf')
        self.end_time = end_time if end_time else float('-inf')
        self.timebase = self.start_time

        # Initialize data namespaces
Jakob Luettgau's avatar
Jakob Luettgau committed
329 330 331
        self._metadata = {}
        self._modules = {}
        self._counters = {}
332
        self.records = {}
Jakob Luettgau's avatar
Jakob Luettgau committed
333
        self._mounts = {}
334
        self.name_records = {}
335 336

        # initialize report/summary namespace
337
        self.summary_revision = 0       # counter to check if summary needs update (see data_revision)
338 339 340 341 342 343
        self.summary = {}


        # legacy references (deprecate before 1.0?)
        self.data_revision = 0          # counter for consistency checks
        self.data = {'version': 1}
Jakob Luettgau's avatar
Jakob Luettgau committed
344
        self.data['metadata'] = self._metadata
345 346
        self.data['records'] = self.records
        self.data['summary'] = self.summary
Jakob Luettgau's avatar
Jakob Luettgau committed
347
        self.data['modules'] = self._modules
348 349 350
        self.data['counters'] = self.counters
        self.data['name_records'] = self.name_records

351

352 353 354

        # when using report algebra this log allows to untangle potentially
        # unfair aggregations (e.g., double accounting)
355
        self.provenance_enabled = True
356
        self.provenance_graph = []
357 358 359
        self.provenance_reports = {}


360 361 362 363
        if filename:
            self.open(filename, read_all=read_all)    


Jakob Luettgau's avatar
Jakob Luettgau committed
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
    @property
    def metadata(self):
        return self._metadata

    @property
    def modules(self):
        return self._modules

    @property
    def counters(self):
        return self._counters

#    @property
#    def counters(self):
#        return self._counters
#
#    @property
#    def name_records(self):
#        return self._name_records
#
#
#    @property
#    def summary(self):
#        return self._summary
#   
      

391 392 393
    def open(self, filename, read_all=False):
        """
        Open log file via CFFI backend.
394

395 396 397
        Args:
            filename (str): filename to open (optional)
            read_all (bool): whether to read all records for log
398

399 400 401 402
        Return:
            None

        """
403 404 405

        self.filename = filename

406 407
        if filename:
            self.log = backend.log_open(self.filename)
408 409 410
            if not bool(self.log['handle']):
                raise RuntimeError("Failed to open file.")

411
            self.read_metadata(read_all=read_all)
412

413 414
            if read_all:
                self.read_all()
415 416 417


    def __add__(self, other):
418
        """
419
        Allow reports to be merged using the addition operator.
420
        """
421

422
        return self.merge(other)
423

424

425 426 427
    def __deepcopy__(self, memo):
        """
        Creates a deepcopy of report.
428

429 430
        .. note::
            Needed to purge reference to self.log as Cdata can not be pickled:
431 432
            TypeError: can't pickle _cffi_backend.CData objects
        """
433

434 435 436 437 438
        cls = self.__class__
        result = cls.__new__(cls)
        memo[id(self)] = result
        for k, v in self.__dict__.items():
            if k in ["log"]:
439
                # blacklist of members not to copy
440 441 442
                continue
            setattr(result, k, copy.deepcopy(v, memo))
        return result
443

444
        # TODO: might consider treating self.log as list of open logs to not deactivate load functions?
445

446
        return result
447

448

449
    def read_metadata(self, read_all=False):
450 451 452 453 454 455 456 457 458 459
        """
        Read metadata such as the job, the executables and available modules.

        Args:
            None

        Return:
            None

        """
460 461
        self.metadata['job'] = backend.log_get_job(self.log)
        self.metadata['exe'] = backend.log_get_exe(self.log)
462

463 464
        self.start_time = datetime.datetime.fromtimestamp(self.metadata['job']['start_time'])
        self.end_time = datetime.datetime.fromtimestamp(self.metadata['job']['end_time'])
465 466

        self.data['mounts'] = backend.log_get_mounts(self.log)
467
        self.mounts = self.data['mounts']
468 469

        self.data['modules'] = backend.log_get_modules(self.log)
Jakob Luettgau's avatar
Jakob Luettgau committed
470
        self._modules = self.data['modules']
471

472
        if read_all == True:
473 474
            self.data["name_records"] = backend.log_get_name_records(self.log)
            self.name_records = self.data['name_records']
475 476


477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
    def update_name_records(self, mod=None):
        """
        Update (and prune unused) name records from resolve table.

        First reindexes all used name record identifiers and then queries 
        darshan-utils library to compile filtered list of name records.

        Args:
            None

        Return:
            None

        """
        # sanitize inputs
        mods = mod
        if mods is None:
            mods = self.records
        else:
            mods = [mod]

        
        # state
        ids = set()

        for mod in mods:
503
            logger.debug(f" Refreshing name_records for mod={mod}")
504 505 506 507
            for rec in self.records[mod]:
                ids.add(rec['id'])


508
        self.name_records.update(backend.log_lookup_name_records(self.log, ids))
509 510
        

511
    def read_all(self, dtype=None):
512 513 514 515 516 517 518 519 520
        """
        Read all available records from darshan log and return as dictionary.

        Args:
            None

        Return:
            None
        """
521 522 523 524

        self.read_all_generic_records(dtype=dtype)
        self.read_all_dxt_records(dtype=dtype)
        self.mod_read_all_lustre_records(dtype=dtype)
525
        
526 527 528
        return


529
    def read_all_generic_records(self, counters=True, fcounters=True, dtype=None):
530
        """
531
        Read all generic records from darshan log and return as dictionary.
532

533 534
        Args:
            None
535

536 537 538
        Return:
            None
        """
539 540 541

        dtype = dtype if dtype else self.dtype

542
        for mod in self.data['modules']:
543
            self.mod_read_all_records(mod, dtype=dtype, warnings=False)
544

545 546


547
    def read_all_dxt_records(self, reads=True, writes=True, dtype=None):
548
        """
549
        Read all dxt records from darshan log and return as dictionary.
550 551 552 553 554 555 556

        Args:
            None

        Return:
            None
        """
557 558 559

        dtype = dtype if dtype else self.dtype

560
        for mod in self.data['modules']:
561
            self.mod_read_all_dxt_records(mod, warnings=False, reads=reads, writes=writes, dtype=dtype)
562 563 564



565
    def mod_read_all_records(self, mod, dtype=None, warnings=True):
566
        """
567
        Reads all generic records for module
568 569 570

        Args:
            mod (str): Identifier of module to fetch all records
571
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary, 'pandas'
572 573 574 575 576

        Return:
            None

        """
577
        unsupported =  ['DXT_POSIX', 'DXT_MPIIO', 'LUSTRE']
578

579
        if mod in unsupported:
580
            if warnings:
581
                logger.warning(f" Skipping. Currently unsupported: {mod} in mod_read_all_records().")
582 583 584 585
            # skip mod
            return 


586 587 588
        # handling options
        dtype = dtype if dtype else self.dtype

589

Jakob Luettgau's avatar
Jakob Luettgau committed
590
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
591 592 593
        cn = backend.counter_names(mod)
        fcn = backend.fcounter_names(mod)

594
        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
595
        self._modules[mod]['num_records'] = 0
596 597
        if mod not in self.counters:
            self.counters[mod] = {}
598 599
            self.counters[mod]['counters'] = cn 
            self.counters[mod]['fcounters'] = fcn
600 601


602
        # fetch records
Kevin Harms's avatar
Kevin Harms committed
603
        rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
604
        while rec != None:
Kevin Harms's avatar
Kevin Harms committed
605
            self.records[mod].append(rec)
Jakob Luettgau's avatar
Jakob Luettgau committed
606
            self._modules[mod]['num_records'] += 1
607 608

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
609
            rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
610

611

612 613
        if self.lookup_name_records:
            self.update_name_records()
614 615 616

        # process/combine records if the format dtype allows for this
        if dtype == 'pandas':
617 618
            combined_c = None
            combined_fc = None
619

620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
            for rec in self.records[mod]:
                obj = rec['counters']
                #print(type(obj))
                #display(obj)
                
                if combined_c is None:
                    combined_c = rec['counters']
                else:
                    combined_c = pd.concat([combined_c, rec['counters']])
                    
                if combined_fc is None:
                    combined_fc = rec['fcounters']
                else:
                    combined_fc = pd.concat([combined_fc, rec['fcounters']])

            self.records[mod] = [{
                'rank': -1,
                'id': -1,
                'counters': combined_c,
                'fcounters': combined_fc
                }]
641

642

643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
    def mod_read_all_apmpi_records(self, mod, dtype=None, warnings=True):
        """ 
        Reads all APMPI records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
                logger.warning(f"Skipping. Log does not contain data for mod: {mod}")
            return


        supported =  ['APMPI'] 
        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_apmpi_records(). Supported: {supported}")
            # skip mod
            return
667

668 669 670
        # handling options
        dtype = dtype if dtype else self.dtype

671 672
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)

673
        # update module metadata
674
        self._modules[mod]['num_records'] = 0
675 676 677 678
        if mod not in self.counters:
            self.counters[mod] = {}

        # fetch records
679 680
        # fetch header record
        rec = backend.log_get_apmpi_record(self.log, mod, "HEADER", dtype=dtype)
681
        while rec != None:
682
            self.records[mod].append(rec)
683 684 685
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
686
            rec = backend.log_get_apmpi_record(self.log, mod, "PERF", dtype=dtype)
687 688 689 690


        if self.lookup_name_records:
            self.update_name_records()
691 692


693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
    def mod_read_all_apxc_records(self, mod, dtype=None, warnings=True):
        """ 
        Reads all APXC records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
                logger.warning(f"Skipping. Log does not contain data for mod: {mod}")
            return

        supported =  ['APXC'] 
        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_apxc_records(). Supported: {supported}")
            # skip mod
            return

        # handling options
        dtype = dtype if dtype else self.dtype

        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
        cn = backend.counter_names(mod)

        # update module metadata
        self._modules[mod]['num_records'] = 0
        if mod not in self.counters:
            self.counters[mod] = {}

        # fetch records
        # fetch header record
        rec = backend.log_get_apxc_record(self.log, mod, "HEADER", dtype=dtype)
        while rec != None:
            self.records[mod].append(rec)
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
            rec = backend.log_get_apxc_record(self.log, mod, "PERF", dtype=dtype)

        if self.lookup_name_records:
            self.update_name_records()


742
    def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, writes=True):
743
        """
744
        Reads all dxt records for provided module.
745 746 747

        Args:
            mod (str): Identifier of module to fetch all records
748
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary
749 750 751

        Return:
            None
752 753

        """
754
        if mod not in self.data['modules']:
755
            if warnings:
756
                logger.warning(f"Skipping. Log does not contain data for mod: {mod}")
757 758 759 760 761 762
            return


        supported =  ['DXT_POSIX', 'DXT_MPIIO']

        if mod not in supported:
763
            if warnings:
764
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_dxt_records(). Supported: {supported}")
765 766 767
            # skip mod
            return 

768

769 770
        # handling options
        dtype = dtype if dtype else self.dtype
771

772

Jakob Luettgau's avatar
Jakob Luettgau committed
773
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
774 775

        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
776
        self._modules[mod]['num_records'] = 0
777 778
        if mod not in self.counters:
            self.counters[mod] = {}
779 780


781
        # fetch records
Kevin Harms's avatar
Kevin Harms committed
782
        rec = backend.log_get_dxt_record(self.log, mod, dtype=dtype)
783
        while rec != None:
Kevin Harms's avatar
Kevin Harms committed
784
            self.records[mod].append(rec)
785
            self.data['modules'][mod]['num_records'] += 1
786 787

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
788
            rec = backend.log_get_dxt_record(self.log, mod, reads=reads, writes=writes, dtype=dtype)
789

790 791 792 793

        if self.lookup_name_records:
            self.update_name_records()

794 795


796

797
    def mod_read_all_lustre_records(self, mod="LUSTRE", dtype=None, warnings=True):
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
        """
        Reads all dxt records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
                logger.warning(f" Skipping. Log does not contain data for mod: {mod}")
            return


        supported =  ['LUSTRE']

        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_dxt_records(). Supported: {supported}")
            # skip mod
            return 


824 825 826
        # handling options
        dtype = dtype if dtype else self.dtype

827

Jakob Luettgau's avatar
Jakob Luettgau committed
828
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
829
        cn = backend.counter_names(mod)
830

831
        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
832
        self._modules[mod]['num_records'] = 0
833 834
        if mod not in self.counters:
            self.counters[mod] = {}
835
            self.counters[mod]['counters'] = cn 
836 837


838
        # fetch records
839 840 841 842 843 844 845 846
        rec = backend.log_get_record(self.log, mod, dtype=dtype)
        while rec != None:
            self.records[mod].append(rec)
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
            rec = backend.log_get_record(self.log, mod, dtype=dtype)

847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871

        if self.lookup_name_records:
            self.update_name_records()

        # process/combine records if the format dtype allows for this
        if dtype == 'pandas':
            combined_c = None

            for rec in self.records[mod]:
                obj = rec['counters']
                #print(type(obj))
                #display(obj)
                
                if combined_c is None:
                    combined_c = rec['counters']
                else:
                    combined_c = pd.concat([combined_c, rec['counters']])
                    

            self.records[mod] = [{
                'rank': -1,
                'id': -1,
                'counters': combined_c,
                }]

872 873 874 875




876 877
    def mod_records(self, mod, 
                    dtype='numpy', warnings=True):
878 879
        """
        Return generator for lazy record loading and traversal.
880 881 882 883

        .. warning::
            Can't be used for now when alternating between different modules.
            A temporary workaround can be to open the same log multiple times,
Jakob Luettgau's avatar
Jakob Luettgau committed
884
            as this way buffers are not shared between get_record invocations
885 886
            in the lower level library.

887 888 889

        Args:
            mod (str): Identifier of module to fetch records for
890
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary
891 892 893 894 895 896 897 898 899 900 901 902 903

        Return:
            None

        """
        cn = backend.counter_names(mod)
        fcn = backend.fcounter_names(mod)

        if mod not in self.counters:
            self.counters[mod] = {}
        self.counters[mod]['counters'] = cn 
        self.counters[mod]['fcounters'] = fcn

Kevin Harms's avatar
Kevin Harms committed
904
        rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
905 906 907 908
        while rec != None:
            yield rec

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
909
            rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
910 911


912
    def info(self, metadata=False):
913
        """
914
        Print information about the record for inspection.
915 916

        Args:
917
            metadata (bool): show detailed metadata (default: False)
918 919 920

        Return:
            None
921
        """
922
        print("Filename:       ", self.filename, sep="")
923

924 925 926
        tdelta = self.end_time - self.start_time
        print("Times:          ", self.start_time, " to ", self.end_time, " (Duration ", tdelta, ")", sep="")

927 928 929 930 931 932 933
        if 'exe' in self.metadata:
            print("Executeable:    ", self.metadata['exe'], sep="")

        if 'job' in self.metadata:
            print("Processes:      ", self.metadata['job']['nprocs'], sep="")
            print("JobID:          ", self.metadata['job']['jobid'], sep="")
            print("UID:            ", self.metadata['job']['uid'], sep="")
Jakob Luettgau's avatar
Jakob Luettgau committed
934
            print("Modules in Log: ", list(self._modules.keys()), sep="")
935 936 937 938 939 940 941 942

        loaded = {}
        for mod in self.records:
            loaded[mod] = len(self.records[mod])
        print("Loaded Records: ", loaded, sep="")

        print("Name Records:   ", len(self.name_records), sep="")
        
943 944
        if 'job' in self.metadata:
            print("Darshan/Hints:  ", self.metadata['job']['metadata'], sep="")
945
        print("DarshanReport:  id(", id(self), ") (tmp)", sep="")
946

947

948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
        if metadata:
            for key, val in self.metadata.items():
                if key == "job":
                    for key2, val2 in self.metadata[key].items():
                        print("metadata['", key ,"']['", key2, "'] = ", val2, sep="")
                else:
                    print("metadata['", key, "'] = ", val, sep="")
    
    
        #def get_size(obj, seen=None):
        #    """Recursively finds size of objects"""
        #    size = sys.getsizeof(obj)
        #    if seen is None:
        #        seen = set()
        #    obj_id = id(obj)
        #    if obj_id in seen:
        #        return 0
        #    # Important mark as seen *before* entering recursion to gracefully handle
        #    # self-referential objects
        #    seen.add(obj_id)
        #    if isinstance(obj, dict):
        #        size += sum([get_size(v, seen) for v in obj.values()])
        #        size += sum([get_size(k, seen) for k in obj.keys()])
        #    elif hasattr(obj, '__dict__'):
        #        size += get_size(obj.__dict__, seen)
        #    elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
        #        size += sum([get_size(i, seen) for i in obj])
        #    return size
976

977
        #print("Memory:", get_size(self), 'bytes')
978

979

980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003
    ###########################################################################
    # Internal Organisation
    ###########################################################################
    def rebase_timestamps(records=None, inplace=False, timebase=False):
        """
        Updates all records in the report to use timebase (defaults: start_time).
        This might allow to conserve memory as reports are merged.

        Args:
            records (dict, list):  records to rebase
            inplace (bool): weather to merel return a copy or to update records
            timebase (datetime.datetime): new timebase to use

        Return:
            rebased_records (same type as provided to records)
        """
        rebase_records = copy.deepcopy(record)

        # TODO: apply timestamp rebase
        # TODO: settle on format

        return rebased_records

    ###########################################################################
Jakob Luettgau's avatar
Jakob Luettgau committed
1004
    # Export Conversions
1005
    ###########################################################################
1006
    def to_dict(self):
1007
        """
Jakob Luettgau's avatar
PEP8.  
Jakob Luettgau committed
1008
        Return dictionary representation of report data.
1009 1010 1011 1012 1013

        Args:
            None

        Return:
1014
            dict
1015
        """
1016
        data = copy.deepcopy(self.data)
1017

1018 1019
        recs = data['records']
        for mod in recs:
Jakob Luettgau's avatar
Jakob Luettgau committed
1020 1021 1022 1023 1024 1025 1026
            try:
                #recs[mod] = recs[mod].to_dict()
                recs[mod] = recs[mod].to_list()
            except:
                recs[mod] = "Not implemented."


1027

1028
        return data
1029 1030


1031
    def to_json(self):
1032
        """
Jakob Luettgau's avatar
PEP8.  
Jakob Luettgau committed
1033
        Return JSON representation of report data as string.
1034 1035 1036 1037 1038 1039 1040

        Args:
            None

        Return:
            JSON String
        """
1041 1042 1043 1044
        data = copy.deepcopy(self.data)

        recs = data['records']
        for mod in recs:
Jakob Luettgau's avatar
Jakob Luettgau committed
1045 1046 1047 1048
            try:
                recs[mod] = recs[mod].to_list()
            except:
                recs[mod] = "Not implemented."
1049

1050
        return json.dumps(data, cls=DarshanReportJSONEncoder)
1051 1052 1053 1054




Jakob Luettgau's avatar
Jakob Luettgau committed
1055