tp_network.hpp 4.66 KB
Newer Older
1 2
#ifndef CRAY_CX40_THETA_LUSTRE_H
#define CRAY_CX40_THETA_LUSTRE_H
Francois Tessier's avatar
Francois Tessier committed
3 4 5

#include <stdio.h>
#include <stdlib.h>
6
#include "../topology.hpp"
Francois Tessier's avatar
Francois Tessier committed
7

8
#include <pmi.h>
9 10

extern "C" {
11 12
#include <lustre/lustreapi.h>
#include <lustre/lustre_user.h>
13 14 15 16
}

#define LNETS_PER_OST   7
#define MAX_IONODES     392
17

Francois Tessier's avatar
Francois Tessier committed
18 19
class Topology: public iTopology {
public:
20 21 22 23
  /**********************/
  /*  |-- Nodes         */
  /**********************/
  int IONodeId () {
Francois Tessier's avatar
Francois Tessier committed
24 25 26
    return 0;
  }

27 28

  int BridgeNodeId () {
Francois Tessier's avatar
Francois Tessier committed
29 30 31
    return 0;
  }

32 33

  int ComputeNodeId () {
Francois Tessier's avatar
Francois Tessier committed
34 35
    return 0;
  }
36 37 38


  int ProcessPerNode () {
39 40 41 42 43 44 45 46 47
    int ppn, err;
    err = PMI_Get_numpes_on_smp ( &ppn );

    if ( err != PMI_SUCCESS ) {
      fprintf (stderr, "[ERROR] PMI_Get_numpes_on_smp failed!\n");
      MPI_Abort (MPI_COMM_WORLD, -1);
    }
    
    return ppn;
48
  }
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91

  /*
   *  7 LNET nodes per OST
   */
  int IONodesPerFile ( char* filename, int *nodesList ) {
    int err, stripeCount, nLnets, i, idx, oid, l;
    char fgrId [20];
    int *ssuId, *ostId, *lnets;
    struct find_param param = { 0 };
    int ssu2fgr [] = { 0, 0, 0, 0,
		       2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
		       4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5,
		       6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7,
		       8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9};
        
    err = llapi_getstripe ( filename, &param );
    if ( err )
      fprintf ( stdout, "[ERROR] llapi_getstripe\n");
    
    stripeCount  = (&param)->fp_lmd->lmd_lmm.lmm_stripe_count;
    nLnets       = stripeCount * LNETS_PER_OST;

    ssuId     = (int *) malloc ( stripeCount * sizeof ( int ) );
    ostId     = (int *) malloc ( stripeCount * sizeof ( int ) );
    
    // /!\ Hypothesis : OSS id == SNX - 4
    for ( i = 0; i < stripeCount; i++ ) {
      idx       = (&param)->fp_lmd->lmd_lmm.lmm_objects[i].l_ost_idx;
      ssuId[i] = idx + 4;
      lnets     = (int *) malloc ( LNETS_PER_OST * sizeof ( int ) );

      snprintf ( fgrId, 20, "o2ib100%d", ssu2fgr[ ssuId[i] ] );
      
      FgrToLnets ( fgrId, lnets );
      
      for ( l = 0; l < LNETS_PER_OST; l++ )
	nodesList [ i * LNETS_PER_OST + l ] = lnets [ l ];
      
      free ( lnets );
    }

    return nLnets;
  }
92 93 94 95 96
  

  /**********************/
  /*  |-- Network       */
  /**********************/
97 98 99 100 101 102 103 104 105
  /*   
   *  X   : Rank 3 (Optics cables between groups : 12.5 Gbps)
   *  Y   : Rank 2 (Copper cables between 6 backplanes : 14 Gbps)
   *  Z   : Rank 1 (Copper cables between 16 aries routers in a backplane :14 Gbps)
   *  nid : Node identifier
   *
   *  http://www.nersc.gov/users/computational-systems/edison/configuration/interconnect/
   *
   */
106
  int NetworkDimensions () {
107
    return 4;
Francois Tessier's avatar
Francois Tessier committed
108 109
  }

110 111 112

  /*  |---- Coordinates */
  void RankToCoordinates ( int rank, int* coord ) {
113 114 115 116 117 118
    pmi_mesh_coord_t xyz;
    int nid;
    
    /* Hypothesis : PMI_rank == MPI_rank */
    PMI_Get_nid(rank, &nid);
    PMI_Get_meshcoord((pmi_nid_t) nid, &xyz);
119
    
120 121 122 123
    coord[0] = xyz.mesh_x;
    coord[1] = xyz.mesh_y;
    coord[2] = xyz.mesh_z;
    coord[3] = nid;
124
    coord[4] = sched_getcpu();
125 126 127 128
  }
  
  
  void IONodeCoordinates ( int* coord ) {
129
    
130 131 132 133
  }

  
  /*  |---- Distance    */
134
  int DistanceToIONode ( int srcRank ) {
Francois Tessier's avatar
Francois Tessier committed
135 136
    return 0;
  }
137

138
  /* Minimal distance between two ranks considering a dragonfly network */
139
  int DistanceBetweenRanks ( int srcRank, int destRank ) {
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
    int dim = NetworkDimensions(), d;
    int srcCoord[dim], destCoord[dim];
    int distance;

    RankToCoordinates ( srcRank, srcCoord );
    RankToCoordinates ( destRank, destCoord );

    distance = 0;

    for ( d = 0; d < dim; d++ ) {
      if ( srcCoord[d] != destCoord[d] )
	distance++;
    }

    return distance;
Francois Tessier's avatar
Francois Tessier committed
155 156
  }

157 158 159

  /*  |---- Routes      */
  int RouteToIONode ( int srcRank, int* path ) {
Francois Tessier's avatar
Francois Tessier committed
160 161
    return 0;
  }
162 163 164 165 166 167 168 169 170 171 172


  int RouteBetweenRanks ( int srcRank, int destRank, int* path ) {
    return 0;
  }


  /*  |---- Links       */
  void LinksList ( int* linksList ) {

  }
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211

private:
  void FgrToLnets ( char *fgr_id, int *lnet ) {
    int count = 0;
    FILE *fp;
    char fline[100];
    char *lnet_list, *item;

    fp = fopen("/etc/lnet/routes.conf", "r");

    if ( fp == NULL ) {
      fprintf ( stdout, "[ERROR] Error while opening routes.conf file!\n" );
      return;
    }

    while ( fgets ( fline, 100, fp ) != NULL ) {

      const char *c = strstr ( fline, fgr_id );

      if ( c != NULL )  {
	const char *b1 = strstr ( fline, "[" ) + 1;
	const char *b2 = strstr ( fline, "]" );
	lnet_list = ( char * ) malloc ( sizeof ( char ) * ( b2 - b1 + 1 ) );
	strncpy ( lnet_list, b1, b2 - b1 );
	item = strtok ( lnet_list, "," );

	while ( item ) {
	  lnet [ count ] = atoi ( item );
	  item = strtok ( 0, "," );
	  count++;
	}
      }
      count = 0;
    }

    fclose ( fp );
    return;
  }

Francois Tessier's avatar
Francois Tessier committed
212 213
};

214
#endif // CRAY_CX40_THETA_LUSTRE_H