Commit daf29e33 authored by Kenneth Raffenetti's avatar Kenneth Raffenetti
Browse files

portals4: tuning



Changes the value of various static limits in the Portals4 netmod, based
on experimentation results and suggestions from collaborators.

1. Bump most ni_limits from 32K to 64K. These limits relate closely to
   queue depth. We can reasonably expect to support a queue depth
   of 64K.

2. Limit issued origin events to 500. This translates to sending ~250
   operations to Portals at a time, which over IB is roughly the
   saturation point. TODO: turn this into a CVAR.

3. Limit per target issued operations to 50. This will give the target a
   better chance to process events without being overwhelmed by a single
   process. TODO: turn this into a CVAR, also.

4. Allocate more buffer space for incoming control messages. Observed
   results, especially with larger messages, showed that more buffer space
   cuts down on flow-control events.
Signed-off-by: default avatarAntonio J. Pena <apenya@mcs.anl.gov>
parent 2f97f429
...@@ -12,10 +12,13 @@ ...@@ -12,10 +12,13 @@
#error Checkpointing not implemented #error Checkpointing not implemented
#endif #endif
#define UNEXPECTED_HDR_COUNT 32768 #define UNEXPECTED_HDR_COUNT (1024*64)
#define EVENT_COUNT 32768 #define EVENT_COUNT (1024*64)
#define LIST_SIZE 32768 #define LIST_SIZE (1024*64)
#define ENTRY_COUNT 32768 #define MAX_ENTRIES (1024*64)
#define ENTRY_COUNT (1024*64)
/* FIXME: turn ORIGIN_EVENTS into a CVAR */
#define ORIGIN_EVENTS (500)
#define NID_KEY "NID" #define NID_KEY "NID"
#define PID_KEY "PID" #define PID_KEY "PID"
#define PTI_KEY "PTI" #define PTI_KEY "PTI"
...@@ -245,7 +248,7 @@ static int ptl_init(MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max ...@@ -245,7 +248,7 @@ static int ptl_init(MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max
MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));
/* currently, rportlas only works with a single NI and EQ */ /* currently, rportlas only works with a single NI and EQ */
ret = MPID_nem_ptl_rptl_init(MPIDI_Process.my_pg->size, EVENT_COUNT, get_target_info); ret = MPID_nem_ptl_rptl_init(MPIDI_Process.my_pg->size, ORIGIN_EVENTS, get_target_info);
MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret)); MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret));
/* allow rportal to manage the primary portal and retransmit if needed */ /* allow rportal to manage the primary portal and retransmit if needed */
......
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
#include <mpl_utlist.h> #include <mpl_utlist.h>
#include "rptl.h" #include "rptl.h"
#define NUM_RECV_BUFS 8 #define NUM_RECV_BUFS 2
#define BUFSIZE (1024*1024) #define BUFSIZE (1024*1024*50)
#define CTL_TAG 0 #define CTL_TAG 0
#define GET_TAG 1 #define GET_TAG 1
#define PAYLOAD_SIZE (PTL_MAX_EAGER - sizeof(MPIDI_CH3_Pkt_t)) #define PAYLOAD_SIZE (PTL_MAX_EAGER - sizeof(MPIDI_CH3_Pkt_t))
......
...@@ -103,7 +103,7 @@ static int append_overflow(int i) ...@@ -103,7 +103,7 @@ static int append_overflow(int i)
me.match_id = id_any; me.match_id = id_any;
me.match_bits = 0; me.match_bits = 0;
me.ignore_bits = ~((ptl_match_bits_t)0); me.ignore_bits = ~((ptl_match_bits_t)0);
me.min_free = PTL_MAX_EAGER; me.min_free = PTL_LARGE_THRESHOLD;
/* if there is no space to append the entry, process outstanding events and try again */ /* if there is no space to append the entry, process outstanding events and try again */
ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i, ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i,
......
...@@ -7,6 +7,10 @@ ...@@ -7,6 +7,10 @@
#include "ptl_impl.h" #include "ptl_impl.h"
#include "rptl.h" #include "rptl.h"
/* FIXME: turn this into a CVAR, or fraction of the event limit from
rptl_init */
#define PER_TARGET_THRESHOLD 50
/* /*
* Prereqs: * Prereqs:
* *
...@@ -93,6 +97,7 @@ static int find_target(ptl_process_t id, struct rptl_target **target) ...@@ -93,6 +97,7 @@ static int find_target(ptl_process_t id, struct rptl_target **target)
t->op_pool = NULL; t->op_pool = NULL;
t->data_op_list = NULL; t->data_op_list = NULL;
t->control_op_list = NULL; t->control_op_list = NULL;
t->issued_data_ops = 0;
} }
*target = t; *target = t;
...@@ -220,7 +225,7 @@ static int poke_progress(void) ...@@ -220,7 +225,7 @@ static int poke_progress(void)
/* we should not get any NACKs on the control portal */ /* we should not get any NACKs on the control portal */
assert(op->state != RPTL_OP_STATE_NACKED); assert(op->state != RPTL_OP_STATE_NACKED);
if (rptl_info.origin_events_left < 2) { if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
/* too few origin events left. we can't issue this op /* too few origin events left. we can't issue this op
* or any following op to this target in order to * or any following op to this target in order to
* maintain ordering */ * maintain ordering */
...@@ -228,6 +233,7 @@ static int poke_progress(void) ...@@ -228,6 +233,7 @@ static int poke_progress(void)
} }
rptl_info.origin_events_left -= 2; rptl_info.origin_events_left -= 2;
target->issued_data_ops++;
/* force request for an ACK even if the user didn't ask /* force request for an ACK even if the user didn't ask
* for it. replace the user pointer with the OP id. */ * for it. replace the user pointer with the OP id. */
...@@ -255,7 +261,7 @@ static int poke_progress(void) ...@@ -255,7 +261,7 @@ static int poke_progress(void)
if (op->state == RPTL_OP_STATE_NACKED) if (op->state == RPTL_OP_STATE_NACKED)
break; break;
if (rptl_info.origin_events_left < 2) { if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
/* too few origin events left. we can't issue /* too few origin events left. we can't issue
* this op or any following op to this target in * this op or any following op to this target in
* order to maintain ordering */ * order to maintain ordering */
...@@ -263,6 +269,7 @@ static int poke_progress(void) ...@@ -263,6 +269,7 @@ static int poke_progress(void)
} }
rptl_info.origin_events_left -= 2; rptl_info.origin_events_left -= 2;
target->issued_data_ops++;
/* force request for an ACK even if the user didn't /* force request for an ACK even if the user didn't
* ask for it. replace the user pointer with the OP * ask for it. replace the user pointer with the OP
...@@ -283,7 +290,7 @@ static int poke_progress(void) ...@@ -283,7 +290,7 @@ static int poke_progress(void)
if (op->state == RPTL_OP_STATE_NACKED) if (op->state == RPTL_OP_STATE_NACKED)
break; break;
if (rptl_info.origin_events_left < 1) { if (rptl_info.origin_events_left < 1 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
/* too few origin events left. we can't issue /* too few origin events left. we can't issue
* this op or any following op to this target in * this op or any following op to this target in
* order to maintain ordering */ * order to maintain ordering */
...@@ -291,6 +298,7 @@ static int poke_progress(void) ...@@ -291,6 +298,7 @@ static int poke_progress(void)
} }
rptl_info.origin_events_left--; rptl_info.origin_events_left--;
target->issued_data_ops++;
ret = PtlGet(op->u.get.md_handle, op->u.get.local_offset, op->u.get.length, ret = PtlGet(op->u.get.md_handle, op->u.get.local_offset, op->u.get.length,
op->u.get.target_id, op->u.get.pt_index, op->u.get.match_bits, op->u.get.target_id, op->u.get.pt_index, op->u.get.match_bits,
...@@ -539,6 +547,7 @@ static int get_event_info(ptl_event_t * event, struct rptl **ret_rptl, struct rp ...@@ -539,6 +547,7 @@ static int get_event_info(ptl_event_t * event, struct rptl **ret_rptl, struct rp
op = (struct rptl_op *) event->user_ptr; op = (struct rptl_op *) event->user_ptr;
rptl_info.origin_events_left++; rptl_info.origin_events_left++;
op->target->issued_data_ops--;
/* see if there are any pending ops to be issued */ /* see if there are any pending ops to be issued */
ret = poke_progress(); ret = poke_progress();
......
...@@ -158,6 +158,8 @@ struct rptl_target { ...@@ -158,6 +158,8 @@ struct rptl_target {
struct rptl_target *next; struct rptl_target *next;
struct rptl_target *prev; struct rptl_target *prev;
int issued_data_ops;
}; };
struct rptl_info { struct rptl_info {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment