daemon.py 10.6 KB
Newer Older
1 2
from __future__ import print_function

3 4
from containers import ContainerManager
from resources import ResourceManager
5
import json
6
import logging
7
import os
8
import re
9
import sensor
10 11 12
import signal
import zmq
from zmq.eventloop import ioloop, zmqstream
13

14

15 16 17 18 19 20 21 22
application_fsm_table = {'stable': {'i': 's_ask_i', 'd': 's_ask_d'},
                         's_ask_i': {'done': 'stable', 'max': 'max'},
                         's_ask_d': {'done': 'stable', 'min': 'min'},
                         'max': {'d': 'max_ask_d'},
                         'min': {'i': 'min_ask_i'},
                         'max_ask_d': {'done': 'stable', 'min': 'nop'},
                         'min_ask_i': {'done': 'stable', 'max': 'nop'},
                         'nop': {}}
23

24 25
logger = logging.getLogger('nrm')

26

27
class Application(object):
28 29 30 31 32 33 34 35 36
    def __init__(self, identity):
        self.identity = identity
        self.buf = ''
        self.state = 'stable'

    def append_buffer(self, msg):
        self.buf = self.buf + msg

    def do_transition(self, msg):
37
        transitions = application_fsm_table[self.state]
38 39 40 41 42 43
        if msg in transitions:
            self.state = transitions[msg]
        else:
            pass

    def get_allowed_requests(self):
44
        return application_fsm_table[self.state].keys()
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76

    def get_messages(self):
        buf = self.buf
        begin = 0
        off = 0
        ret = ''
        while begin < len(buf):
            if buf.startswith('min', begin):
                ret = 'min'
                off = len(ret)
            elif buf.startswith('max', begin):
                ret = 'max'
                off = len(ret)
            elif buf.startswith('done (', begin):
                n = re.split("done \((\d+)\)", buf[begin:])[1]
                ret = 'done'
                off = len('done ()') + len(n)
            else:
                m = re.match("\d+", buf[begin:])
                if m:
                    ret = 'ok'
                    off = m.end()
                else:
                    break
            begin = begin + off
            yield ret
        self.buf = buf[begin:]
        return


class Daemon(object):
    def __init__(self):
77
        self.applications = {}
78
        self.containerpids = {}
79
        self.buf = ''
80
        self.target = 1.0
81

82
    def do_application_receive(self, parts):
83
        logger.info("receiving application stream: %r", parts)
84 85 86 87
        identity = parts[0]

        if len(parts[1]) == 0:
            # empty frame, indicate connect/disconnect
88
            if identity in self.applications:
89
                logger.info("known client disconnected")
90
                del self.applications[identity]
91
            else:
92
                logger.info("new client: " + repr(identity))
93
                self.applications[identity] = Application(identity)
94
        else:
95 96 97
            if identity in self.applications:
                application = self.applications[identity]
                # we need to unpack the stream into application messages
98
                # messages can be: min, max, done (%d), %d
99 100 101
                application.append_buffer(parts[1])
                for m in application.get_messages():
                    application.do_transition(m)
102 103
                    logger.info("application now in state: %s",
                                application.state)
104

105
    def do_upstream_receive(self, parts):
106
        logger.info("receiving upstream message: %r", parts)
107
        if len(parts) != 1:
108
            logger.error("unexpected msg length, dropping it: %r", parts)
109 110
            return
        msg = json.loads(parts[0])
111 112 113 114
        if isinstance(msg, dict):
            command = msg.get('command')
            # TODO: switch to a dispatch dictionary
            if command is None:
115
                logger.error("missing command in message: %r", msg)
116 117 118
                return
            if command == 'setpower':
                self.target = float(msg['limit'])
119
                logger.info("new target measure: %g", self.target)
120
            elif command == 'run':
121
                logger.info("new container required: %r", msg)
122 123
                pid = self.container_manager.create(msg)
                if pid > 0:
124
                    self.containerpids[pid] = msg['uuid']
125 126
                    # TODO: obviously we need to send more info than that
                    update = {'type': 'container',
127
                              'event': 'start',
128 129 130 131 132 133 134
                              'uuid': msg['uuid'],
                              'errno': 0,
                              'pid': pid,
                              }
                    self.upstream_pub.send_json(update)
                else:
                    update = {'type': 'container',
135
                              'event': 'start',
136 137 138 139
                              'uuid': msg['uuid'],
                              'errno': pid,
                              }
                    self.upstream_pub.send_json(update)
140
            elif command == 'kill':
141
                logger.info("asked to kill container: %r", msg)
142 143
                response = self.container_manager.kill(msg['uuid'])
                # no update here, as it will trigger child exit
144
            elif command == 'list':
145
                logger.info("asked for container list: %r", msg)
146 147 148 149 150 151
                response = self.container_manager.list()
                update = {'type': 'container',
                          'event': 'list',
                          'payload': response,
                          }
                self.upstream_pub.send_json(update)
152
            else:
153
                logger.error("invalid command: %r", command)
154

155
    def do_sensor(self):
156
        self.machine_info = self.sensor.do_update()
157
        logger.info("current state: %r", self.machine_info)
158
        total_power = self.machine_info['energy']['power']['total']
159 160 161 162 163
        msg = {'type': 'power',
               'total': total_power,
               'limit': self.target
               }
        self.upstream_pub.send_json(msg)
164
        logger.info("sending sensor message: %r", msg)
165 166

    def do_control(self):
167
        total_power = self.machine_info['energy']['power']['total']
168

169
        for identity, application in self.applications.iteritems():
170
            if total_power < self.target:
171
                if 'i' in application.get_allowed_requests():
172
                    self.downstream.send_multipart([identity, 'i'])
173
                    application.do_transition('i')
174
            elif total_power > self.target:
175
                if 'd' in application.get_allowed_requests():
176
                    self.downstream.send_multipart([identity, 'd'])
177
                    application.do_transition('d')
178 179
            else:
                pass
180
            logger.info("application now in state: %s", application.state)
181 182

    def do_signal(self, signum, frame):
183 184 185 186 187
        if signum == signal.SIGINT:
            ioloop.IOLoop.current().add_callback_from_signal(self.do_shutdown)
        elif signum == signal.SIGCHLD:
            ioloop.IOLoop.current().add_callback_from_signal(self.do_children)
        else:
188
            logger.error("wrong signal: %d", signum)
189 190 191 192 193

    def do_children(self):
        # find out if children have terminated
        while True:
            try:
194 195
                pid, status, rusage = os.wait3(os.WNOHANG)
                if pid == 0 and status == 0:
196 197 198 199
                    break
            except OSError:
                break

200
            logger.info("child update %d: %r", pid, status)
201 202 203
            # check if its a pid we care about
            if pid in self.containerpids:
                # check if this is an exit
204
                if os.WIFEXITED(status) or os.WIFSIGNALED(status):
205 206 207 208 209
                    uuid = self.containerpids[pid]
                    self.container_manager.delete(uuid)
                    msg = {'type': 'container',
                           'event': 'exit',
                           'status': status,
210
                           'uuid': uuid,
211 212
                           }
                    self.upstream_pub.send_json(msg)
213
            else:
214
                logger.debug("child update ignored")
215
                pass
216 217

    def do_shutdown(self):
218
        self.sensor.stop()
219 220 221
        ioloop.IOLoop.current().stop()

    def main(self):
222
        # Bind port for downstream clients
223
        bind_port = 1234
224
        # Bind address for downstream clients
225
        bind_address = '*'
226 227 228 229
        # PUB port for upstream clients
        upstream_pub_port = 2345
        # SUB port for upstream clients
        upstream_sub_port = 3456
230

231
        # setup application listening socket
232
        context = zmq.Context()
233 234 235 236 237 238 239 240 241 242 243
        downstream_socket = context.socket(zmq.STREAM)
        upstream_pub_socket = context.socket(zmq.PUB)
        upstream_sub_socket = context.socket(zmq.SUB)

        downstream_bind_param = "tcp://%s:%d" % (bind_address, bind_port)
        upstream_pub_param = "tcp://%s:%d" % (bind_address, upstream_pub_port)
        upstream_sub_param = "tcp://localhost:%d" % (upstream_sub_port)

        downstream_socket.bind(downstream_bind_param)
        upstream_pub_socket.bind(upstream_pub_param)
        upstream_sub_socket.connect(upstream_sub_param)
244
        upstream_sub_filter = ""
245 246
        upstream_sub_socket.setsockopt(zmq.SUBSCRIBE, upstream_sub_filter)

247 248 249
        logger.info("downstream socket bound to: %s", downstream_bind_param)
        logger.info("upstream pub socket bound to: %s", upstream_pub_param)
        logger.info("upstream sub socket connected to: %s", upstream_sub_param)
250 251 252 253 254 255 256 257

        # register socket triggers
        self.downstream = zmqstream.ZMQStream(downstream_socket)
        self.downstream.on_recv(self.do_application_receive)
        self.upstream_sub = zmqstream.ZMQStream(upstream_sub_socket)
        self.upstream_sub.on_recv(self.do_upstream_receive)
        # create a stream to let ioloop deal with blocking calls on HWM
        self.upstream_pub = zmqstream.ZMQStream(upstream_pub_socket)
258

259 260 261
        # create resource and container manager
        self.resource_manager = ResourceManager()
        self.container_manager = ContainerManager(self.resource_manager)
262

263 264
        # create sensor manager and make first measurement
        self.sensor = sensor.SensorManager()
265
        self.sensor.start()
266
        self.machine_info = self.sensor.do_update()
267 268 269 270

        # setup periodic sensor updates
        self.sensor_cb = ioloop.PeriodicCallback(self.do_sensor, 1000)
        self.sensor_cb.start()
271 272 273 274 275 276

        self.control = ioloop.PeriodicCallback(self.do_control, 1000)
        self.control.start()

        # take care of signals
        signal.signal(signal.SIGINT, self.do_signal)
277
        signal.signal(signal.SIGCHLD, self.do_signal)
278 279 280 281 282 283 284 285 286

        ioloop.IOLoop.current().start()


def runner():
    ioloop.install()
    logging.basicConfig(level=logging.DEBUG)
    daemon = Daemon()
    daemon.main()