newmad_init.c 9.81 KB
Newer Older
1
2
3
4
5
6
7
8
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
 *  (C) 2006 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "newmad_impl.h"

9
10
#define MPIDI_CH3I_HOSTNAME_KEY "hostname_id"
#define MPIDI_CH3I_URL_KEY      "url_id"
11
12
13
14
15
16
17
18
19

MPID_nem_netmod_funcs_t MPIDI_nem_newmad_funcs = {
    MPID_nem_newmad_init,
    MPID_nem_newmad_finalize,
    MPID_nem_newmad_poll,
    MPID_nem_newmad_get_business_card,
    MPID_nem_newmad_connect_to_root,
    MPID_nem_newmad_vc_init,
    MPID_nem_newmad_vc_destroy,
20
21
    MPID_nem_newmad_vc_terminate,
    MPID_nem_newmad_anysource_iprobe
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
};

static MPIDI_Comm_ops_t comm_ops = {
    MPID_nem_newmad_directRecv, /* recv_posted */

    MPID_nem_newmad_directSend, /* send */
    MPID_nem_newmad_directSend, /* rsend */
    MPID_nem_newmad_directSsend, /* ssend */
    MPID_nem_newmad_directSend, /* isend */
    MPID_nem_newmad_directSend, /* irsend */
    MPID_nem_newmad_directSsend, /* issend */

    NULL,                   /* send_init */
    NULL,                   /* bsend_init */
    NULL,                   /* rsend_init */
    NULL,                   /* ssend_init */
    NULL,                   /* startall */

    MPID_nem_newmad_cancel_send,/* cancel_send */
41
42
43
44
    MPID_nem_newmad_cancel_recv, /* cancel_recv */

    MPID_nem_newmad_probe, /* probe */
    MPID_nem_newmad_iprobe /* iprobe */
45
46
47
48
};


static int         mpid_nem_newmad_myrank;
49
50
51
static const char *label="mpich2";
static const char *local_session_url = NULL;
nm_session_t       mpid_nem_newmad_session;
52
53
54
55
56
57
58
59
60
int                mpid_nem_newmad_pending_send_req = 0;

#undef FUNCNAME
#define FUNCNAME init_mad
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static int init_mad( MPIDI_PG_t *pg_p )
{
    int   mpi_errno = MPI_SUCCESS;
61
    char *dummy_argv[2] = {"mpich2",NULL};
62
    int   dummy_argc    = 1;
63
    int   ret;
64
65

    MPID_nem_newmad_internal_req_queue_init();
66

67
68
69
70
71
    ret = nm_session_create(&mpid_nem_newmad_session, label);
    MPIU_Assert( ret == NM_ESUCCESS);
    
    ret = nm_session_init(mpid_nem_newmad_session, &dummy_argc,dummy_argv, &local_session_url);
    MPIU_Assert( ret == NM_ESUCCESS);
72

73
    ret = nm_sr_init(mpid_nem_newmad_session);
74
    if(ret != NM_ESUCCESS) {
75
	fprintf(stdout,"nm_sr_init return err = %d\n", ret);
76
    }
77
   
78
79
 fn_exit:
    return mpi_errno;
80
 fn_fail: ATTRIBUTE((unused))
81
82
83
    goto fn_exit;
}

Guillaume Mercier's avatar
Guillaume Mercier committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#undef FUNCNAME
#define FUNCNAME MPID_nem_newmad_init_completed
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int
MPID_nem_newmad_init_completed(void)
{
   
   int mpi_errno = MPI_SUCCESS ;
   int ret;
   
   ret = nm_sr_monitor(mpid_nem_newmad_session, NM_SR_EVENT_RECV_UNEXPECTED,
		       &MPID_nem_newmad_get_adi_msg);
   MPIU_Assert( ret == NM_ESUCCESS);
   
fn_exit:
       return mpi_errno;
fn_fail:
       goto fn_exit;
}
104
105
106
107
108
109

#undef FUNCNAME
#define FUNCNAME MPID_nem_newmad_init
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int
Darius Buntinas's avatar
Darius Buntinas committed
110
MPID_nem_newmad_init (MPIDI_PG_t *pg_p, int pg_rank,
111
		      char **bc_val_p, int *val_max_sz_p)
112
{
113
114
115
   int mpi_errno = MPI_SUCCESS ;
   int index;

116
   /*
117
   fprintf(stdout,"Size of MPID_nem_mad_module_vc_area_internal_t : %i | size of nm_sr_request_t :%i | Size of req_area : %i\n",
118
         sizeof(MPID_nem_newmad_vc_area_internal_t),sizeof(nm_sr_request_t), sizeof(MPID_nem_newmad_req_area));
119
120
    */
   
121
122
   /*
   MPIU_Assert( sizeof(MPID_nem_newmad_vc_area_internal_t) <= MPID_NEM_VC_NETMOD_AREA_LEN);
123
   MPIU_Assert( sizeof(MPID_nem_newmad_req_area) <= MPID_NEM_REQ_NETMOD_AREA_LEN);
124
   */
125
126
   
   /*
127
128
129
130
131
132
133
134
135
   if (sizeof(MPID_nem_newmad_vc_area_internal_t) > MPID_NEM_VC_NETMOD_AREA_LEN)
   {
       fprintf(stdout,"===========================================================\n");
       fprintf(stdout,"===  Error : Newmad data structure size is too long     ===\n");
       fprintf(stdout,"===  VC netmod area is %4i | Nmad struct size is %4i    ===\n", 
	       MPID_NEM_VC_NETMOD_AREA_LEN, sizeof(MPID_nem_newmad_vc_area_internal_t));
       fprintf(stdout,"===========================================================\n");
       MPIU_Abort();    
   }
136
137
   */
   
138
139
140
141
142
143
144
145
146
   if (sizeof(MPID_nem_newmad_req_area) > MPID_NEM_REQ_NETMOD_AREA_LEN)
   {
       fprintf(stdout,"===========================================================\n");
       fprintf(stdout,"===  Error : Newmad data structure size is too long     ===\n");
       fprintf(stdout,"===  Req netmod area is %4i | Nmad struct size is %4i   ===\n", 
	       MPID_NEM_REQ_NETMOD_AREA_LEN, sizeof(MPID_nem_newmad_req_area));
       fprintf(stdout,"===========================================================\n");
       MPIU_Abort();    
   }
147
148
149
150

   mpid_nem_newmad_myrank = pg_rank;
   init_mad(pg_p);

151
   mpi_errno = MPID_nem_newmad_get_business_card(pg_rank,bc_val_p, val_max_sz_p);
152
153
   if (mpi_errno) MPIU_ERR_POP (mpi_errno);

154
   mpi_errno = MPIDI_CH3I_Register_anysource_notification(MPID_nem_newmad_anysource_posted, MPID_nem_newmad_anysource_matched);
155
156
   if (mpi_errno) MPIU_ERR_POP(mpi_errno);

Guillaume Mercier's avatar
Guillaume Mercier committed
157
158
159
   mpi_errno = MPID_nem_register_initcomp_cb(MPID_nem_newmad_init_completed);
   if (mpi_errno) MPIU_ERR_POP(mpi_errno);
   
160
161
   fn_exit:
       return mpi_errno;
162
   fn_fail: 
163
164
165
166
167
168
169
170
171
172
       goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_newmad_get_business_card
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int
MPID_nem_newmad_get_business_card (int my_rank, char **bc_val_p, int *val_max_sz_p)
{
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
    int mpi_errno = MPI_SUCCESS;
    int str_errno = MPIU_STR_SUCCESS;
    char name[MPID_NEM_NMAD_MAX_SIZE];
    
    gethostname(name,MPID_NEM_NMAD_MAX_SIZE);
    
    str_errno = MPIU_Str_add_binary_arg (bc_val_p, val_max_sz_p, MPIDI_CH3I_HOSTNAME_KEY, name, strlen(name));
    if (str_errno) {
        MPIU_ERR_CHKANDJUMP(str_errno == MPIU_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len");
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
    }
    
    str_errno = MPIU_Str_add_binary_arg (bc_val_p, val_max_sz_p, MPIDI_CH3I_URL_KEY, local_session_url, strlen(local_session_url));
    if (str_errno) {
        MPIU_ERR_CHKANDJUMP(str_errno == MPIU_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len");
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
    }
    
 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
195
196
197
198
199
200
201
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_newmad_get_from_bc
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int
202
MPID_nem_newmad_get_from_bc (const char *business_card, char *hostname, char *url)
203
204
{
   int mpi_errno = MPI_SUCCESS;
205
   int str_errno = MPIU_STR_SUCCESS;
206
207
   int len;
   
208
   str_errno = MPIU_Str_get_binary_arg (business_card, MPIDI_CH3I_HOSTNAME_KEY, hostname,
209
					MPID_NEM_NMAD_MAX_SIZE, &len);
210
211
212
213
214
215
216
   if (str_errno != MPIU_STR_SUCCESS)
   {	
      /* FIXME: create a real error string for this */
      MPIU_ERR_CHKANDJUMP(str_errno, mpi_errno, MPI_ERR_OTHER, "**argstr_hostd");
   }
   
   str_errno = MPIU_Str_get_binary_arg (business_card, MPIDI_CH3I_URL_KEY, url,
217
					MPID_NEM_NMAD_MAX_SIZE, &len);
218
219
220
221
222
   if (str_errno != MPIU_STR_SUCCESS)
   {      
      /* FIXME: create a real error string for this */
      MPIU_ERR_CHKANDJUMP(str_errno, mpi_errno, MPI_ERR_OTHER, "**argstr_hostd");
   }
223
224
225
   
   fn_exit:
     return mpi_errno;
226
   fn_fail:
227
228
229
230
231
232
233
234
235
236
237
     goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_newmad_connect_to_root
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int
MPID_nem_newmad_connect_to_root (const char *business_card, MPIDI_VC_t *new_vc)
{
    int mpi_errno = MPI_SUCCESS;   
238
239
240
241
 fn_exit:
    return mpi_errno;
 fn_fail: ATTRIBUTE((unused))
    goto fn_exit;
242
243
244
245
246
247
248
249
250
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_newmad_vc_init
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int
MPID_nem_newmad_vc_init (MPIDI_VC_t *vc)
{
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
   MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
   char          *business_card;
   int            mpi_errno = MPI_SUCCESS;   
   int            val_max_sz;
   int            ret;
   
#ifdef USE_PMI2_API
   val_max_sz = PMI2_MAX_VALLEN;
#else
   mpi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
#endif
   business_card = (char *)MPIU_Malloc(val_max_sz);   
   mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card,val_max_sz,vc->pg);
   if (mpi_errno) MPIU_ERR_POP(mpi_errno);
   
   (((MPID_nem_newmad_vc_area *)((MPIDI_CH3I_VC *)(vc)->channel_private)->netmod_area.padding)->area) =
     (MPID_nem_newmad_vc_area_internal_t *)MPIU_Malloc(sizeof(MPID_nem_newmad_vc_area_internal_t));
   MPIU_Assert( (((MPID_nem_newmad_vc_area *)((MPIDI_CH3I_VC *)(vc)->channel_private)->netmod_area.padding)->area) != NULL);
   
   /* Very important */
   memset(VC_FIELD(vc, hostname),0,MPID_NEM_NMAD_MAX_SIZE);
   memset(VC_FIELD(vc, url),0,MPID_NEM_NMAD_MAX_SIZE);
273
   
274
275
276
277
278
   mpi_errno = MPID_nem_newmad_get_from_bc (business_card, VC_FIELD(vc, hostname), VC_FIELD(vc, url));
   if (mpi_errno) MPIU_ERR_POP (mpi_errno);

   ret = nm_session_connect(mpid_nem_newmad_session, &(VC_FIELD(vc,p_gate)), VC_FIELD(vc, url));
   if (ret != NM_ESUCCESS) fprintf(stdout,"nm_session_connect returned ret = %d\n", ret);
279

280
   nm_gate_ref_set(VC_FIELD(vc, p_gate),(void*)vc);
281

282
283
   MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
   
284
285
286
287
   vc->eager_max_msg_sz = 32768;
   vc->rndvSend_fn      = NULL;
   vc->sendNoncontig_fn = MPID_nem_newmad_SendNoncontig;
   vc->comm_ops         = &comm_ops;
288

289
290
291
   vc_ch->iStartContigMsg = MPID_nem_newmad_iStartContigMsg;
   vc_ch->iSendContig     = MPID_nem_newmad_iSendContig;
   
292
293
294
295
296
297
298
299
300
301
302
303
304
305
 fn_exit:
   return mpi_errno;
 fn_fail:
   goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_newmad_vc_destroy
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPID_nem_newmad_vc_destroy(MPIDI_VC_t *vc)
{
    int mpi_errno = MPI_SUCCESS;   

306
307
    MPIU_Free((((MPID_nem_newmad_vc_area *)((MPIDI_CH3I_VC *)(vc)->channel_private)->netmod_area.padding)->area));

308
309
 fn_exit:   
       return mpi_errno;
310
 fn_fail: ATTRIBUTE((unused))
311
312
313
314
315
316
317
318
319
320
321
322
       goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPID_nem_newmad_vc_terminate
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPID_nem_newmad_vc_terminate (MPIDI_VC_t *vc)
{
    return MPI_SUCCESS;
}