#ifndef CRAY_CX40_THETA_LUSTRE_H #define CRAY_CX40_THETA_LUSTRE_H #include #include #include "../topology.hpp" #include extern "C" { #include #include } #define LNETS_PER_OST 7 #define MAX_IONODES 392 class Topology: public iTopology { public: /**********************/ /* |-- Nodes */ /**********************/ int IONodeId () { return 0; } int BridgeNodeId () { return 0; } int ComputeNodeId () { return 0; } int ProcessPerNode () { int ppn, err; err = PMI_Get_numpes_on_smp ( &ppn ); if ( err != PMI_SUCCESS ) { fprintf (stderr, "[ERROR] PMI_Get_numpes_on_smp failed!\n"); MPI_Abort (MPI_COMM_WORLD, -1); } return ppn; } /* * 7 LNET nodes per OST */ int IONodesPerFile ( char* filename, int *nodesList ) { int err, stripeCount, nLnets, i, idx, oid, l; char fgrId [20]; int *ssuId, *ostId, *lnets; struct find_param param = { 0 }; int ssu2fgr [] = { 0, 0, 0, 0, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9}; err = llapi_getstripe ( filename, ¶m ); if ( err ) fprintf ( stdout, "[ERROR] llapi_getstripe\n"); stripeCount = (¶m)->fp_lmd->lmd_lmm.lmm_stripe_count; nLnets = stripeCount * LNETS_PER_OST; ssuId = (int *) malloc ( stripeCount * sizeof ( int ) ); ostId = (int *) malloc ( stripeCount * sizeof ( int ) ); // /!\ Hypothesis : OSS id == SNX - 4 for ( i = 0; i < stripeCount; i++ ) { idx = (¶m)->fp_lmd->lmd_lmm.lmm_objects[i].l_ost_idx; ssuId[i] = idx + 4; lnets = (int *) malloc ( LNETS_PER_OST * sizeof ( int ) ); snprintf ( fgrId, 20, "o2ib100%d", ssu2fgr[ ssuId[i] ] ); FgrToLnets ( fgrId, lnets ); for ( l = 0; l < LNETS_PER_OST; l++ ) nodesList [ i * LNETS_PER_OST + l ] = lnets [ l ]; free ( lnets ); } return nLnets; } /**********************/ /* |-- Network */ /**********************/ /* * X : Rank 3 (Optics cables between groups : 12.5 Gbps) * Y : Rank 2 (Copper cables between 6 backplanes : 14 Gbps) * Z : Rank 1 (Copper cables between 16 aries routers in a backplane :14 Gbps) * nid : Node identifier * * http://www.nersc.gov/users/computational-systems/edison/configuration/interconnect/ * */ int NetworkDimensions () { return 4; } /* |---- Coordinates */ void RankToCoordinates ( int rank, int* coord ) { pmi_mesh_coord_t xyz; int nid; /* Hypothesis : PMI_rank == MPI_rank */ PMI_Get_nid(rank, &nid); PMI_Get_meshcoord((pmi_nid_t) nid, &xyz); coord[0] = xyz.mesh_x; coord[1] = xyz.mesh_y; coord[2] = xyz.mesh_z; coord[3] = nid; coord[4] = sched_getcpu(); } void IONodeCoordinates ( int* coord ) { } /* |---- Distance */ int DistanceToIONode ( int srcRank ) { return 0; } /* Minimal distance between two ranks considering a dragonfly network */ int DistanceBetweenRanks ( int srcRank, int destRank ) { int dim = NetworkDimensions(), d; int srcCoord[dim], destCoord[dim]; int distance; RankToCoordinates ( srcRank, srcCoord ); RankToCoordinates ( destRank, destCoord ); distance = 0; for ( d = 0; d < dim; d++ ) { if ( srcCoord[d] != destCoord[d] ) distance++; } return distance; } /* |---- Routes */ int RouteToIONode ( int srcRank, int* path ) { return 0; } int RouteBetweenRanks ( int srcRank, int destRank, int* path ) { return 0; } /* |---- Links */ void LinksList ( int* linksList ) { } private: void FgrToLnets ( char *fgr_id, int *lnet ) { int count = 0; FILE *fp; char fline[100]; char *lnet_list, *item; fp = fopen("/etc/lnet/routes.conf", "r"); if ( fp == NULL ) { fprintf ( stdout, "[ERROR] Error while opening routes.conf file!\n" ); return; } while ( fgets ( fline, 100, fp ) != NULL ) { const char *c = strstr ( fline, fgr_id ); if ( c != NULL ) { const char *b1 = strstr ( fline, "[" ) + 1; const char *b2 = strstr ( fline, "]" ); lnet_list = ( char * ) malloc ( sizeof ( char ) * ( b2 - b1 + 1 ) ); strncpy ( lnet_list, b1, b2 - b1 ); item = strtok ( lnet_list, "," ); while ( item ) { lnet [ count ] = atoi ( item ); item = strtok ( 0, "," ); count++; } } count = 0; } fclose ( fp ); return; } }; #endif // CRAY_CX40_THETA_LUSTRE_H