sensor.py 2.89 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
#
# This file is part of the NRM project.
# For more info, see https://xgitlab.cels.anl.gov/argo/nrm
#
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################

11 12 13 14 15 16 17
"""Sensor Module:
    provide the core functionalities related to measuring power, energy,
    temperature and other information about the local node, using our internal
    version of the coolr code.

    This module should be the only one interfacing with coolr.
"""
18
from __future__ import print_function
19
import time
20 21 22 23 24 25
import coolr
import coolr.clr_rapl
import coolr.clr_hwmon
import coolr.clr_nodeinfo
import coolr.clr_cpufreq
import coolr.clr_misc
26 27 28 29 30


class SensorManager:
    """Performs sensor reading and basic data aggregation."""

31 32
    def __init__(self, use_msr_safe):
        self.use_msr_safe = use_msr_safe
33
        self.nodeconfig = coolr.clr_nodeinfo.nodeconfig()
34
        self.nodename = self.nodeconfig.nodename
35 36 37
        self.cputopology = coolr.clr_nodeinfo.cputopology()
        self.coretemp = coolr.clr_hwmon.coretemp_reader()
        self.rapl = coolr.clr_rapl.rapl_reader()
38
        self.msr = coolr.msr.Msr()
39 40 41 42 43 44

    def start(self):
        self.rapl.start_energy_counter()

    def stop(self):
        self.rapl.stop_energy_counter()
45 46

    def do_update(self):
47 48 49
        machine_info = dict()
        machine_info['energy'] = self.rapl.sample(accflag=True)
        machine_info['temperature'] = self.coretemp.sample()
50
        machine_info['time'] = time.time()
51
        return machine_info
52 53 54 55 56 57

    def get_powerlimits(self):
        pl = self.rapl.get_powerlimits()
        # only return enabled domains
        return {k: pl[k] for k in pl if pl[k]['enabled']}

58 59 60 61 62 63
    # id is either a domain or a cpuid
    def set_powerlimit(self, id, value):
        if self.use_msr_safe:
            self.msr.set_powerlimit(id, value)
        else:
            self.rapl.set_powerlimit(value, id)
64 65 66

    def calc_difference(self, start, end):
        diff = dict()
67 68 69 70 71 72 73
        for k in start.keys():
            if k not in ['time']:
                start[k.replace('p', 'package-')] = start[k]
                start.pop(k)
                end[k.replace('p', 'package-')] = end[k]
                end.pop(k)

74
        # Calculate energy difference
75
        diff['energy'] = self.rapl.diffenergy(start, end)
76 77 78 79 80 81 82 83 84
        # Update time elapsed
        diff['time'] = diff['energy']['time']
        # Remove 'time' field returned by function
        diff['energy'].pop('time')
        # Convert uJ to J
        diff['energy'] = {k: diff['energy'][k]/(1000000.0) for k in
                          diff['energy']}

        # Calculate power difference
85
        diff['power'] = self.rapl.calcpower(start, end)
86 87 88 89
        # Remove 'delta' field returned by function
        diff['power'].pop('delta')

        return diff