daemon.py 10.2 KB
Newer Older
1 2
from __future__ import print_function

3 4
from containers import ContainerManager
from resources import ResourceManager
5
import json
6
import logging
7
import os
8
import re
9
import sensor
10 11 12
import signal
import zmq
from zmq.eventloop import ioloop, zmqstream
13

14

15 16 17 18 19 20 21 22
application_fsm_table = {'stable': {'i': 's_ask_i', 'd': 's_ask_d'},
                         's_ask_i': {'done': 'stable', 'max': 'max'},
                         's_ask_d': {'done': 'stable', 'min': 'min'},
                         'max': {'d': 'max_ask_d'},
                         'min': {'i': 'min_ask_i'},
                         'max_ask_d': {'done': 'stable', 'min': 'nop'},
                         'min_ask_i': {'done': 'stable', 'max': 'nop'},
                         'nop': {}}
23

24 25
logger = logging.getLogger('nrm')

26

27
class Application(object):
28 29 30 31 32 33 34 35 36
    def __init__(self, identity):
        self.identity = identity
        self.buf = ''
        self.state = 'stable'

    def append_buffer(self, msg):
        self.buf = self.buf + msg

    def do_transition(self, msg):
37
        transitions = application_fsm_table[self.state]
38 39 40 41 42 43
        if msg in transitions:
            self.state = transitions[msg]
        else:
            pass

    def get_allowed_requests(self):
44
        return application_fsm_table[self.state].keys()
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76

    def get_messages(self):
        buf = self.buf
        begin = 0
        off = 0
        ret = ''
        while begin < len(buf):
            if buf.startswith('min', begin):
                ret = 'min'
                off = len(ret)
            elif buf.startswith('max', begin):
                ret = 'max'
                off = len(ret)
            elif buf.startswith('done (', begin):
                n = re.split("done \((\d+)\)", buf[begin:])[1]
                ret = 'done'
                off = len('done ()') + len(n)
            else:
                m = re.match("\d+", buf[begin:])
                if m:
                    ret = 'ok'
                    off = m.end()
                else:
                    break
            begin = begin + off
            yield ret
        self.buf = buf[begin:]
        return


class Daemon(object):
    def __init__(self):
77
        self.applications = {}
78
        self.containerpids = {}
79
        self.buf = ''
80
        self.target = 1.0
81

82
    def do_application_receive(self, parts):
83
        logger.info("receiving application stream: %r", parts)
84 85 86 87
        identity = parts[0]

        if len(parts[1]) == 0:
            # empty frame, indicate connect/disconnect
88
            if identity in self.applications:
89
                logger.info("known client disconnected")
90
                del self.applications[identity]
91
            else:
92
                logger.info("new client: " + repr(identity))
93
                self.applications[identity] = Application(identity)
94
        else:
95 96 97
            if identity in self.applications:
                application = self.applications[identity]
                # we need to unpack the stream into application messages
98
                # messages can be: min, max, done (%d), %d
99 100 101
                application.append_buffer(parts[1])
                for m in application.get_messages():
                    application.do_transition(m)
102 103
                    logger.info("application now in state: %s",
                                application.state)
104

105
    def do_upstream_receive(self, parts):
106
        logger.info("receiving upstream message: %r", parts)
107
        if len(parts) != 1:
108
            logger.error("unexpected msg length, dropping it: %r", parts)
109 110
            return
        msg = json.loads(parts[0])
111 112 113 114
        if isinstance(msg, dict):
            command = msg.get('command')
            # TODO: switch to a dispatch dictionary
            if command is None:
115
                logger.error("missing command in message: %r", msg)
116 117 118
                return
            if command == 'setpower':
                self.target = float(msg['limit'])
119
                logger.info("new target measure: %g", self.target)
120
            elif command == 'run':
121
                logger.info("new container required: %r", msg)
122 123 124 125 126 127 128 129 130 131
                process = self.container_manager.create(msg)
                self.containerpids[process.pid] = msg['uuid']
                # TODO: obviously we need to send more info than that
                update = {'type': 'container',
                          'event': 'start',
                          'uuid': msg['uuid'],
                          'errno': 0,
                          'pid': process.pid,
                          }
                self.upstream_pub.send_json(update)
132
            elif command == 'kill':
133
                logger.info("asked to kill container: %r", msg)
134 135
                response = self.container_manager.kill(msg['uuid'])
                # no update here, as it will trigger child exit
136
            elif command == 'list':
137
                logger.info("asked for container list: %r", msg)
138 139 140 141 142 143
                response = self.container_manager.list()
                update = {'type': 'container',
                          'event': 'list',
                          'payload': response,
                          }
                self.upstream_pub.send_json(update)
144
            else:
145
                logger.error("invalid command: %r", command)
146

147
    def do_sensor(self):
148
        self.machine_info = self.sensor.do_update()
149
        logger.info("current state: %r", self.machine_info)
150
        total_power = self.machine_info['energy']['power']['total']
151 152 153 154 155
        msg = {'type': 'power',
               'total': total_power,
               'limit': self.target
               }
        self.upstream_pub.send_json(msg)
156
        logger.info("sending sensor message: %r", msg)
157 158

    def do_control(self):
159
        total_power = self.machine_info['energy']['power']['total']
160

161
        for identity, application in self.applications.iteritems():
162
            if total_power < self.target:
163
                if 'i' in application.get_allowed_requests():
164
                    self.downstream.send_multipart([identity, 'i'])
165
                    application.do_transition('i')
166
            elif total_power > self.target:
167
                if 'd' in application.get_allowed_requests():
168
                    self.downstream.send_multipart([identity, 'd'])
169
                    application.do_transition('d')
170 171
            else:
                pass
172
            logger.info("application now in state: %s", application.state)
173 174

    def do_signal(self, signum, frame):
175 176 177 178 179
        if signum == signal.SIGINT:
            ioloop.IOLoop.current().add_callback_from_signal(self.do_shutdown)
        elif signum == signal.SIGCHLD:
            ioloop.IOLoop.current().add_callback_from_signal(self.do_children)
        else:
180
            logger.error("wrong signal: %d", signum)
181 182 183 184 185

    def do_children(self):
        # find out if children have terminated
        while True:
            try:
186 187
                pid, status, rusage = os.wait3(os.WNOHANG)
                if pid == 0 and status == 0:
188 189 190 191
                    break
            except OSError:
                break

192
            logger.info("child update %d: %r", pid, status)
193 194 195
            # check if its a pid we care about
            if pid in self.containerpids:
                # check if this is an exit
196
                if os.WIFEXITED(status) or os.WIFSIGNALED(status):
197 198 199 200 201
                    uuid = self.containerpids[pid]
                    self.container_manager.delete(uuid)
                    msg = {'type': 'container',
                           'event': 'exit',
                           'status': status,
202
                           'uuid': uuid,
203 204
                           }
                    self.upstream_pub.send_json(msg)
205
            else:
206
                logger.debug("child update ignored")
207
                pass
208 209

    def do_shutdown(self):
210
        self.sensor.stop()
211 212 213
        ioloop.IOLoop.current().stop()

    def main(self):
214
        # Bind port for downstream clients
215
        bind_port = 1234
216
        # Bind address for downstream clients
217
        bind_address = '*'
218 219 220 221
        # PUB port for upstream clients
        upstream_pub_port = 2345
        # SUB port for upstream clients
        upstream_sub_port = 3456
222

223
        # setup application listening socket
224
        context = zmq.Context()
225 226 227 228 229 230 231 232 233 234 235
        downstream_socket = context.socket(zmq.STREAM)
        upstream_pub_socket = context.socket(zmq.PUB)
        upstream_sub_socket = context.socket(zmq.SUB)

        downstream_bind_param = "tcp://%s:%d" % (bind_address, bind_port)
        upstream_pub_param = "tcp://%s:%d" % (bind_address, upstream_pub_port)
        upstream_sub_param = "tcp://localhost:%d" % (upstream_sub_port)

        downstream_socket.bind(downstream_bind_param)
        upstream_pub_socket.bind(upstream_pub_param)
        upstream_sub_socket.connect(upstream_sub_param)
236
        upstream_sub_filter = ""
237 238
        upstream_sub_socket.setsockopt(zmq.SUBSCRIBE, upstream_sub_filter)

239 240 241
        logger.info("downstream socket bound to: %s", downstream_bind_param)
        logger.info("upstream pub socket bound to: %s", upstream_pub_param)
        logger.info("upstream sub socket connected to: %s", upstream_sub_param)
242 243 244 245 246 247 248 249

        # register socket triggers
        self.downstream = zmqstream.ZMQStream(downstream_socket)
        self.downstream.on_recv(self.do_application_receive)
        self.upstream_sub = zmqstream.ZMQStream(upstream_sub_socket)
        self.upstream_sub.on_recv(self.do_upstream_receive)
        # create a stream to let ioloop deal with blocking calls on HWM
        self.upstream_pub = zmqstream.ZMQStream(upstream_pub_socket)
250

251 252 253
        # create resource and container manager
        self.resource_manager = ResourceManager()
        self.container_manager = ContainerManager(self.resource_manager)
254

255 256
        # create sensor manager and make first measurement
        self.sensor = sensor.SensorManager()
257
        self.sensor.start()
258
        self.machine_info = self.sensor.do_update()
259 260 261 262

        # setup periodic sensor updates
        self.sensor_cb = ioloop.PeriodicCallback(self.do_sensor, 1000)
        self.sensor_cb.start()
263 264 265 266 267 268

        self.control = ioloop.PeriodicCallback(self.do_control, 1000)
        self.control.start()

        # take care of signals
        signal.signal(signal.SIGINT, self.do_signal)
269
        signal.signal(signal.SIGCHLD, self.do_signal)
270 271 272 273 274 275 276 277 278

        ioloop.IOLoop.current().start()


def runner():
    ioloop.install()
    logging.basicConfig(level=logging.DEBUG)
    daemon = Daemon()
    daemon.main()