3466/doxygen/balance_8cpp_source.html

 /*

 # =============================================================================

 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL

 #

 # See top-level LICENSE file for details.

 # =============================================================================

 */


 // use LPT algorithm to balance cells so all warps have similar number

 // of compartments.

 // NB: Ideally we'd balance so that warps have similar ncycle. But we do not

 // know how to predict warp quality without an apriori set of cells to

 // fill the warp. For large numbers of cells in a warp,

 // it is a justifiable speculation to presume that there will be very

 // few holes in warp filling. I.e., ncycle = ncompart/warpsize


 #include <algorithm>

 #include <cstdio>


 #if CORENRN_BUILD

 #include "coreneuron/nrnconf.h"

 #endif


 #include "coreneuron/network/tnode.hpp"

 #include "coreneuron/utils/lpt.hpp"


 #if CORENRN_BUILD

 namespace coreneuron {

 #else

 namespace neuron {

 #endif


 int cellorder_nwarp = 0;  // 0 means do not balance


 // ordering by warp, then old order

 bool warpcmp(const TNode* a, const TNode* b) {

     if (a->groupindex < b->groupindex) {

         return true;

     } else if (a->groupindex == b->groupindex && a->nodevec_index < b->nodevec_index) {

         return true;

     }

     return false;

 }


 // order the ncell nodevec roots for balance and return a displacement

 // vector specifying the contiguous roots for a warp.

 // The return vector should be freed by the caller.

 // On entry, nodevec is ordered so that each cell type is together and

 // largest cells first. On exit, nodevec is ordered so that warp i

 // should contain roots nodevec[displ[i]:displ[i+1]]


 size_t warp_balance(size_t ncell, VecTNode& nodevec) {

     if (ncell == 0) {

         return 0;

     }


     if (cellorder_nwarp == 0) {

         return 0;

     }

     size_t nwarp = size_t(cellorder_nwarp);

     // cannot be more warps than cells

     nwarp = std::min(ncell, nwarp);


     // cellsize vector and location of types.

     std::vector<size_t> cellsize(ncell);

     std::vector<size_t> typedispl;

     size_t total_compart = 0;

     typedispl.push_back(0);  // types are already in order

     for (size_t i = 0; i < ncell; ++i) {

         cellsize[i] = nodevec[i]->treesize;

         total_compart += cellsize[i];

         if (i == 0 || nodevec[i]->hash != nodevec[i - 1]->hash) {

             typedispl.push_back(typedispl.back() + 1);

         } else {

             typedispl.back() += 1;

         }

     }


     size_t ideal_compart_per_warp = total_compart / nwarp;


     size_t min_cells_per_warp = 0;

     for (size_t i = 0, sz = 0; sz < ideal_compart_per_warp; ++i) {

         ++min_cells_per_warp;

         sz += cellsize[i];

     }


     // balance when order is unrestricted (identical cells not together)

     // i.e. pieces are cellsize

     double best_balance = 0.0;

     auto inwarp = lpt(nwarp, cellsize, &best_balance);

     printf("best_balance=%g ncell=%ld ntype=%ld nwarp=%ld\n",

            best_balance,

            ncell,

            typedispl.size() - 1,

            nwarp);


     // order the roots for balance

     for (size_t i = 0; i < ncell; ++i) {

         TNode* nd = nodevec[i];

         nd->groupindex = inwarp[i];

     }

     std::sort(nodevec.begin(), nodevec.begin() + ncell, warpcmp);

     for (size_t i = 0; i < nodevec.size(); ++i) {

         TNode* nd = nodevec[i];

         for (size_t j = 0; j < nd->children.size(); ++j) {

             nd->children[j]->groupindex = nd->groupindex;

         }

         nd->nodevec_index = i;

     }


     return nwarp;

 }

 }  // namespace coreneuron

neuron::TNode
TNode is the tree node that represents the tree of the compartments.
Definition: tnode.hpp:27

neuron::TNode::groupindex
size_t groupindex
Cell ID that this compartment belongs to.
Definition: tnode.hpp:58

neuron::TNode::nodevec_index
size_t nodevec_index
Total number of compartments from the current node and below.
Definition: tnode.hpp:37

neuron::TNode::children
VecTNode children
Definition: tnode.hpp:32

i
#define i
Definition: md1redef.h:19

lpt
std::vector< std::size_t > lpt(std::size_t nbag, std::vector< std::size_t > &pieces, double *bal)
Definition: lpt.cpp:30

lpt.hpp

printf
printf
Definition: extdef.h:5

coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12

coreneuron::cellorder_nwarp
int cellorder_nwarp

neuron
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
Definition: tnode.hpp:17

neuron::ncell
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
Definition: cellorder.cpp:784

neuron::warp_balance
size_t warp_balance(size_t ncell, VecTNode &nodevec)
Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells.
Definition: balance.cpp:52

neuron::warpcmp
bool warpcmp(const TNode *a, const TNode *b)
Definition: balance.cpp:36

neuron::VecTNode
std::vector< TNode * > VecTNode
Definition: tnode.hpp:21

neuron::cellsize
int * cellsize
Definition: cellorder.cpp:793

nrnconf.h

j
size_t j
Definition: nrngsl_real_radix2.cpp:50

tnode.hpp