NEURON
balance.cpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 
9 // use LPT algorithm to balance cells so all warps have similar number
10 // of compartments.
11 // NB: Ideally we'd balance so that warps have similar ncycle. But we do not
12 // know how to predict warp quality without an apriori set of cells to
13 // fill the warp. For large numbers of cells in a warp,
14 // it is a justifiable speculation to presume that there will be very
15 // few holes in warp filling. I.e., ncycle = ncompart/warpsize
16 
17 #include <algorithm>
18 #include <cstdio>
19 
20 #if CORENRN_BUILD
21 #include "coreneuron/nrnconf.h"
22 #endif
23 
25 #include "coreneuron/utils/lpt.hpp"
26 
27 #if CORENRN_BUILD
28 namespace coreneuron {
29 #else
30 namespace neuron {
31 #endif
32 
33 int cellorder_nwarp = 0; // 0 means do not balance
34 
35 // ordering by warp, then old order
36 bool warpcmp(const TNode* a, const TNode* b) {
37  if (a->groupindex < b->groupindex) {
38  return true;
39  } else if (a->groupindex == b->groupindex && a->nodevec_index < b->nodevec_index) {
40  return true;
41  }
42  return false;
43 }
44 
45 // order the ncell nodevec roots for balance and return a displacement
46 // vector specifying the contiguous roots for a warp.
47 // The return vector should be freed by the caller.
48 // On entry, nodevec is ordered so that each cell type is together and
49 // largest cells first. On exit, nodevec is ordered so that warp i
50 // should contain roots nodevec[displ[i]:displ[i+1]]
51 
52 size_t warp_balance(size_t ncell, VecTNode& nodevec) {
53  if (ncell == 0) {
54  return 0;
55  }
56 
57  if (cellorder_nwarp == 0) {
58  return 0;
59  }
60  size_t nwarp = size_t(cellorder_nwarp);
61  // cannot be more warps than cells
62  nwarp = std::min(ncell, nwarp);
63 
64  // cellsize vector and location of types.
65  std::vector<size_t> cellsize(ncell);
66  std::vector<size_t> typedispl;
67  size_t total_compart = 0;
68  typedispl.push_back(0); // types are already in order
69  for (size_t i = 0; i < ncell; ++i) {
70  cellsize[i] = nodevec[i]->treesize;
71  total_compart += cellsize[i];
72  if (i == 0 || nodevec[i]->hash != nodevec[i - 1]->hash) {
73  typedispl.push_back(typedispl.back() + 1);
74  } else {
75  typedispl.back() += 1;
76  }
77  }
78 
79  size_t ideal_compart_per_warp = total_compart / nwarp;
80 
81  size_t min_cells_per_warp = 0;
82  for (size_t i = 0, sz = 0; sz < ideal_compart_per_warp; ++i) {
83  ++min_cells_per_warp;
84  sz += cellsize[i];
85  }
86 
87  // balance when order is unrestricted (identical cells not together)
88  // i.e. pieces are cellsize
89  double best_balance = 0.0;
90  auto inwarp = lpt(nwarp, cellsize, &best_balance);
91  printf("best_balance=%g ncell=%ld ntype=%ld nwarp=%ld\n",
92  best_balance,
93  ncell,
94  typedispl.size() - 1,
95  nwarp);
96 
97  // order the roots for balance
98  for (size_t i = 0; i < ncell; ++i) {
99  TNode* nd = nodevec[i];
100  nd->groupindex = inwarp[i];
101  }
102  std::sort(nodevec.begin(), nodevec.begin() + ncell, warpcmp);
103  for (size_t i = 0; i < nodevec.size(); ++i) {
104  TNode* nd = nodevec[i];
105  for (size_t j = 0; j < nd->children.size(); ++j) {
106  nd->children[j]->groupindex = nd->groupindex;
107  }
108  nd->nodevec_index = i;
109  }
110 
111  return nwarp;
112 }
113 } // namespace coreneuron
TNode is the tree node that represents the tree of the compartments.
Definition: tnode.hpp:27
size_t groupindex
Cell ID that this compartment belongs to.
Definition: tnode.hpp:58
size_t nodevec_index
Total number of compartments from the current node and below.
Definition: tnode.hpp:37
VecTNode children
Definition: tnode.hpp:32
#define i
Definition: md1redef.h:19
std::vector< std::size_t > lpt(std::size_t nbag, std::vector< std::size_t > &pieces, double *bal)
Definition: lpt.cpp:30
printf
Definition: extdef.h:5
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
int cellorder_nwarp
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
Definition: tnode.hpp:17
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
Definition: cellorder.cpp:784
size_t warp_balance(size_t ncell, VecTNode &nodevec)
Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells.
Definition: balance.cpp:52
bool warpcmp(const TNode *a, const TNode *b)
Definition: balance.cpp:36
std::vector< TNode * > VecTNode
Definition: tnode.hpp:21
int * cellsize
Definition: cellorder.cpp:793
size_t j