messaging.py 10.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
#
# This file is part of the NRM project.
# For more info, see https://xgitlab.cels.anl.gov/argo/nrm
#
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from collections import namedtuple
import json
import logging
import uuid
import zmq
import zmq.utils
import zmq.utils.monitor
from zmq.eventloop import zmqstream

# basestring support
try:
    basestring
except NameError:
    basestring = str

logger = logging.getLogger('nrm')

# list of APIs supported by this messaging layer. Each message is
# indexed by its intended api user and the type of the message, along with
# basic field type information.
31
APIS = ['up_rpc_req', 'up_rpc_rep', 'up_pub', 'down_event']
32
MSGFORMATS = {k: {} for k in APIS}
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

MSGFORMATS['up_rpc_req'] = {
        'list':                 {},
        'run':                  {
            'manifest':         basestring,
            'path':             basestring,
            'args':             list,
            'container_uuid':   basestring,
            'environ':          dict
            },
        'kill':                 {
            'container_uuid':   basestring
            },
        'setpower':             {
            'limit':            basestring
            }
        }

MSGFORMATS['up_rpc_rep'] = {
        'list':                 {
            'payload':          list
            },
        'stdout':               {
            'container_uuid':   basestring,
            'payload':          basestring
            },
        'stderr':               {
            'container_uuid':   basestring,
            'payload':          basestring
            },
        'process_start':        {
            'container_uuid':   basestring,
            'pid':              int
            },
        'process_exit':         {
            'container_uuid':   basestring,
            'status':           basestring
            },
        'getpower':             {
            'limit':            basestring
            }
        }

MSGFORMATS['up_pub'] = {
        'power':                {
            'total':            float,
            'limit':            float
            },
        'container_start':      {
            'container_uuid':   basestring,
            'errno':            int,
84
            'power':            basestring
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
            },
        'container_exit':       {
            'container_uuid':   basestring,
            'profile_data':     dict
            },
        'performance':          {
            'container_uuid':   basestring,
            'payload':          int
            },
        'progress':             {
            'application_uuid': basestring,
            'payload':          int
            }
        }

MSGFORMATS['down_event'] = {
        'application_start':    {
            'container_uuid':   basestring,
            'application_uuid': basestring
            },
        'application_exit':     {
            'application_uuid': basestring
            },
        'performance':          {
            'container_uuid':   basestring,
            'application_uuid': basestring,
            'payload':          int,
            },
        'progress':             {
            'application_uuid': basestring,
            'payload':          int
            },
        'phase_context':        {
            'cpu':              int,
            'startcompute':     int,
            'endcompute':       int,
            'startbarrier':     int,
            'endbarrier':       int
            }
        }
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174

# Mirror of the message formats, using namedtuples as the actual transport
# for users of this messaging layer.
MSGTYPES = {k: {} for k in APIS}
for api, types in MSGFORMATS.items():
    tname = "msg_{}_".format(api)
    MSGTYPES[api] = {k: namedtuple(tname+k, sorted(['api', 'type'] + v.keys()))
                     for k, v in types.items()}


def wire2msg(wire_msg):
    """Convert the wire format into a msg from the available MSGTYPES."""
    fields = json.loads(wire_msg)
    assert 'api' in fields
    api = fields['api']
    assert api in MSGFORMATS
    valid_types = MSGFORMATS[api]
    assert 'type' in fields
    mtype = fields['type']
    assert mtype in valid_types
    # format check
    fmt = valid_types[mtype]
    for key in fields:
        if key in ['api', 'type']:
            continue
        assert key in fmt, "%r missing from %r" % (key, fmt)
        assert isinstance(fields[key], fmt[key]), \
            "type mismatch for %r: %r != %r" % (key, fields[key], fmt[key])
    for key in fmt:
        assert key in fields, "%r missing from %r" % (key, fields)
        assert isinstance(fields[key], fmt[key]), \
            "type mismatch for %r: %r != %r" % (key, fields[key], fmt[key])

    mtuple = MSGTYPES[api][mtype]
    return mtuple(**fields)


def msg2wire(msg):
    """Convert a message to its wire format (dict)."""
    fields = msg._asdict()
    return json.dumps(fields)


class UpstreamRPCClient(object):

    """Implements the message layer client to the upstream RPC API."""

    def __init__(self, address):
        self.address = address
        self.uuid = str(uuid.uuid4())
175
        self.zmq_context = zmq.Context.instance()
176
177
178
        self.socket = self.zmq_context.socket(zmq.DEALER)
        self.socket.setsockopt(zmq.IDENTITY, self.uuid)

179
180
    def connect(self, wait=True):
        """Connect, and wait for the socket to be connected."""
181
        monitor = self.socket.get_monitor_socket()
182
183
        self.socket.connect(self.address)
        while wait:
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
            msg = zmq.utils.monitor.recv_monitor_message(monitor)
            logger.debug("monitor message: %r", msg)
            if int(msg['event']) == zmq.EVENT_CONNECTED:
                logger.debug("socket connected")
                break
        self.socket.disable_monitor()

    def sendmsg(self, msg):
        """Sends a message, including the client uuid as the identity."""
        self.socket.send(msg2wire(msg))

    def recvmsg(self):
        """Receives a message."""
        wire = self.socket.recv()
        logger.debug("received message: %r", wire)
        return wire2msg(wire)


class UpstreamRPCServer(object):

    """Implements the message layer server to the upstream RPC API."""

    def __init__(self, address):
        self.address = address
208
        self.zmq_context = zmq.Context.instance()
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
        self.socket = self.zmq_context.socket(zmq.ROUTER)
        self.socket.bind(address)

    def recvmsg(self):
        """Receives a message and returns it along with the client identity."""
        frames = self.socket.recv_multipart()
        logger.debug("received message: %r", frames)
        assert len(frames) == 2
        msg = wire2msg(frames[1])
        return msg, str(frames[0])

    def do_recv_callback(self, frames):
        """Receives a message from zmqstream.on_recv, passing it to a user
        callback."""
        logger.info("receiving message: %r", frames)
        assert len(frames) == 2
        msg = wire2msg(frames[1])
        assert self.callback
        self.callback(msg, str(frames[0]))

    def sendmsg(self, msg, client_uuid):
        """Sends a message to the identified client."""
        logger.debug("sending message: %r to client: %r", msg, client_uuid)
        self.socket.send_multipart([client_uuid, msg2wire(msg)])

    def setup_recv_callback(self, callback):
        """Setup a ioloop-backed callback for receiving messages."""
        self.stream = zmqstream.ZMQStream(self.socket)
        self.callback = callback
        self.stream.on_recv(self.do_recv_callback)


class UpstreamPubServer(object):

    """Implements the message layer server for the upstream PUB/SUB API."""

    def __init__(self, address):
        self.address = address
247
        self.zmq_context = zmq.Context.instance()
248
        self.socket = self.zmq_context.socket(zmq.PUB)
Valentin Reis's avatar
Valentin Reis committed
249
        self.socket.setsockopt(zmq.LINGER, 0)
250
251
252
253
254
255
        self.socket.bind(address)

    def sendmsg(self, msg):
        """Sends a message."""
        logger.debug("sending message: %r", msg)
        self.socket.send(msg2wire(msg))
256
257
258
259
260
261
262
263


class UpstreamPubClient(object):

    """Implements the message layer client to the upstream Pub API."""

    def __init__(self, address):
        self.address = address
264
        self.zmq_context = zmq.Context.instance()
265
266
267
        self.socket = self.zmq_context.socket(zmq.SUB)
        self.socket.setsockopt(zmq.SUBSCRIBE, '')

268
    def connect(self, wait=True):
269
270
        """Creates a monitor socket and wait for the connect event."""
        monitor = self.socket.get_monitor_socket()
271
272
        self.socket.connect(self.address)
        while wait:
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
            msg = zmq.utils.monitor.recv_monitor_message(monitor)
            logger.debug("monitor message: %r", msg)
            if int(msg['event']) == zmq.EVENT_CONNECTED:
                logger.debug("socket connected")
                break
        self.socket.disable_monitor()

    def recvmsg(self):
        """Receives a message and returns it."""
        frames = self.socket.recv_multipart()
        logger.debug("received message: %r", frames)
        assert len(frames) == 1
        return wire2msg(frames[0])

    def do_recv_callback(self, frames):
        """Receives a message from zmqstream.on_recv, passing it to a user
        callback."""
        logger.info("receiving message: %r", frames)
        assert len(frames) == 1
        msg = wire2msg(frames[0])
        assert self.callback
        self.callback(msg)

    def setup_recv_callback(self, callback):
        """Setup a ioloop-backed callback for receiving messages."""
        self.stream = zmqstream.ZMQStream(self.socket)
        self.callback = callback
        self.stream.on_recv(self.do_recv_callback)
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317


class DownstreamEventServer(UpstreamRPCServer):

    """Implements the message layer server for the downstream event API."""

    def sendmsg(self, msg, client_uuid):
        assert False, "Cannot send message from this side of the event stream."


class DownstreamEventClient(UpstreamRPCClient):

    """Implements the message layer client for the downstream event API."""

    def recvmsg(self):
        assert False, \
            "Cannot receive messages from this side of the event stream."