NEURON
cellorder1.cpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 
9 #include <cstdio>
10 #include <map>
11 #include <set>
12 #include <algorithm>
13 #include <cstring>
14 
15 #if CORENRN_BUILD
17 #else
18 #include "oc/nrnassrt.h"
19 #endif
20 
23 
24 // just for interleave_permute_type
25 #if CORENRN_BUILD
28 #else
30 #endif
31 
32 #if !CORENRN_BUILD && NRN_DEBUG
33 #undef CORENRN_DEBUG
34 #define CORENRN_DEBUG NRN_DEBUG
35 #endif
36 
37 #if CORENRN_BUILD
38 namespace coreneuron {
39 #else
40 namespace neuron {
41 #endif
42 
43 static size_t groupsize = 32;
44 
45 /**
46  * \brief Function to order trees by size, hash and nodeindex
47  */
48 static bool tnode_earlier(TNode* a, TNode* b) {
49  bool result = false;
50  if (a->treesize < b->treesize) { // treesize dominates
51  result = true;
52  } else if (a->treesize == b->treesize) {
53  if (a->hash < b->hash) { // if treesize same, keep identical trees together
54  result = true;
55  } else if (a->hash == b->hash) {
56  result = a->nodeindex < b->nodeindex; // identical trees ordered by nodeindex
57  }
58  }
59  return result;
60 }
61 
62 static bool ptr_tnode_earlier(TNode* a, TNode* b) {
63  return tnode_earlier(a, b);
64 }
65 
66 TNode::TNode(int ix) {
67  nodeindex = ix;
68  cellindex = 0;
69  groupindex = 0;
70  level = 0;
71  hash = 0;
72  treesize = 1;
73  nodevec_index = 0;
74  treenode_order = 0;
75  parent = nullptr;
76  children.reserve(2);
77 }
78 
79 TNode::~TNode() {}
80 
81 size_t TNode::mkhash() { // call on all nodes in leaf to root order
82  // concept from http://stackoverflow.com/questions/20511347/a-good-hash-function-for-a-vector
83  std::sort(children.begin(), children.end(), ptr_tnode_earlier);
84  hash = children.size();
85  treesize = 1;
86  for (size_t i = 0; i < children.size(); ++i) { // need sorted by child hash
87  hash ^= children[i]->hash + 0x9e3779b9 + (hash << 6) + (hash >> 2);
88  treesize += children[i]->treesize;
89  }
90  return hash; // hash of leaf nodes is 0
91 }
92 
93 static void tree_analysis(int* parent, int nnode, int ncell, VecTNode&);
94 static void node_interleave_order(int ncell, VecTNode&);
95 static void admin1(int ncell,
96  VecTNode& nodevec,
97  int& nwarp,
98  int& nstride,
99  int*& stride,
100  int*& firstnode,
101  int*& lastnode,
102  int*& cellsize);
103 static void admin2(int ncell,
104  VecTNode& nodevec,
105  int& nwarp,
106  int& nstride,
107  int*& stridedispl,
108  int*& strides,
109  int*& rootbegin,
110  int*& nodebegin,
111  int*& ncycles);
112 static void check(VecTNode&);
113 #if CORENRN_DEBUG
114 static void prtree(VecTNode&);
115 #endif
116 
117 using TNI = std::pair<TNode*, int>;
118 using HashCnt = std::map<size_t, std::pair<TNode*, int>>;
119 using TNIVec = std::vector<TNI>;
120 
121 /*
122 assess the quality of the ordering. The measure is the size of a contiguous
123 list of nodes whose parents have the same order. How many contiguous lists
124 have that same size. How many nodes participate in that size list.
125 Modify the quality measure from experience with performance. Start with
126 list of (nnode, size_participation)
127 */
128 static void quality(VecTNode& nodevec, size_t max = 32) {
129  size_t qcnt = 0; // how many contiguous nodes have contiguous parents
130 
131  // first ncell nodes are by definition in contiguous order
132  for (const auto& n: nodevec) {
133  if (n->parent != nullptr) {
134  break;
135  }
136  qcnt += 1;
137  }
138  size_t ncell = qcnt;
139 
140  // key is how many parents in contiguous order
141  // value is number of nodes that participate in that
142  std::map<size_t, size_t> qual;
143  size_t ip_last = 10000000000;
144  for (size_t i = ncell; i < nodevec.size(); ++i) {
145  size_t ip = nodevec[i]->parent->nodevec_index;
146  // i%max == 0 means that if we start a warp with 8 and then have 32
147  // the 32 is broken into 24 and 8. (modify if the arrangement during
148  // gaussian elimination becomes more sophisticated.)
149  if (ip == ip_last + 1 && i % max != 0) { // contiguous
150  qcnt += 1;
151  } else {
152  if (qcnt == 1) {
153  // printf("unique %ld p=%ld ix=%d\n", i, ip, nodevec[i]->nodeindex);
154  }
155  qual[max] += (qcnt / max) * max;
156  size_t x = qcnt % max;
157  if (x) {
158  qual[x] += x;
159  }
160  qcnt = 1;
161  }
162  ip_last = ip;
163  }
164  qual[max] += (qcnt / max) * max;
165  size_t x = qcnt % max;
166  if (x) {
167  qual[x] += x;
168  }
169 
170  // print result
171  qcnt = 0;
172 #if CORENRN_DEBUG
173  for (const auto& q: qual) {
174  qcnt += q.second;
175  printf("%6ld %6ld\n", q.first, q.second);
176  }
177 #endif
178 #if CORENRN_DEBUG
179  printf("qual.size=%ld qual total nodes=%ld nodevec.size=%ld\n",
180  qual.size(),
181  qcnt,
182  nodevec.size());
183 #endif
184 
185  // how many race conditions. ie refer to same parent on different core
186  // of warp (max cores) or parent in same group of max.
187  size_t maxip = ncell;
188  size_t nrace1 = 0;
189  size_t nrace2 = 0;
190  std::set<size_t> ipused;
191  for (size_t i = ncell; i < nodevec.size(); ++i) {
192  TNode* nd = nodevec[i];
193  size_t ip = nd->parent->nodevec_index;
194  if (i % max == 0) {
195  maxip = i;
196  ipused.clear();
197  }
198  if (ip >= maxip) {
199  nrace1 += 1;
200  } /*else*/
201  {
202  if (ipused.find(ip) != ipused.end()) {
203  nrace2 += 1;
204  if (ip >= maxip) {
205  // printf("race for parent %ld (parent in same group as multiple users))\n",
206  // ip);
207  }
208  } else {
209  ipused.insert(ip);
210  }
211  }
212  }
213  static_cast<void>(nrace1);
214  static_cast<void>(nrace2);
215 #if CORENRN_DEBUG
216  printf("nrace = %ld (parent in same group of %ld nodes)\n", nrace1, max);
217  printf("nrace = %ld (parent used more than once by same group of %ld nodes)\n", nrace2, max);
218 #endif
219 }
220 
221 size_t level_from_root(VecTNode& nodevec) {
222  size_t maxlevel = 0;
223  for (auto& nd: nodevec) {
224  if (nd->parent) {
225  nd->level = nd->parent->level + 1;
226  if (maxlevel < nd->level) {
227  maxlevel = nd->level;
228  }
229  } else {
230  nd->level = 0;
231  }
232  }
233  return maxlevel;
234 }
235 
236 size_t level_from_leaf(VecTNode& nodevec) {
237  size_t maxlevel = 0;
238  for (size_t i = nodevec.size() - 1; true; --i) {
239  TNode* nd = nodevec[i];
240  size_t lmax = 0;
241  for (auto& child: nd->children) {
242  if (lmax <= child->level) {
243  lmax = child->level + 1;
244  }
245  }
246  nd->level = lmax;
247  if (maxlevel < lmax) {
248  maxlevel = lmax;
249  }
250  if (i == 0) {
251  break;
252  }
253  }
254  return maxlevel;
255 }
256 
257 /**
258  * \brief Set the cellindex to distinguish the different cells.
259  */
260 static void set_cellindex(int ncell, VecTNode& nodevec) {
261  for (int i = 0; i < ncell; ++i) {
262  nodevec[i]->cellindex = i;
263  }
264  for (size_t i = 0; i < nodevec.size(); ++i) {
265  TNode& nd = *nodevec[i];
266  for (size_t j = 0; j < nd.children.size(); ++j) {
267  TNode* cnode = nd.children[j];
268  cnode->cellindex = nd.cellindex;
269  }
270  }
271 }
272 
273 /**
274  * \brief Initialization of the groupindex (groups)
275  *
276  * The cells are groupped at a later stage based on a load balancing algorithm.
277  * This is just an initialization function.
278  */
279 static void set_groupindex(VecTNode& nodevec) {
280  for (size_t i = 0; i < nodevec.size(); ++i) {
281  TNode* nd = nodevec[i];
282  if (nd->parent) {
283  nd->groupindex = nd->parent->groupindex;
284  } else {
285  nd->groupindex = i / groupsize;
286  }
287  }
288 }
289 
290 // how many identical trees and their levels
291 // print when more than one instance of a type
292 // reverse the sense of levels (all leaves are level 0) to get a good
293 // idea of the depth of identical subtrees.
294 static void ident_statistic(VecTNode& nodevec, size_t ncell) {
295  // reverse sense of levels
296  // size_t maxlevel = level_from_leaf(nodevec);
297  size_t maxlevel = level_from_root(nodevec);
298 
299  // # in each level
300  std::vector<std::vector<size_t>> n_in_level(maxlevel + 1);
301  for (auto& n: n_in_level) {
302  n.resize(ncell / groupsize);
303  }
304  for (const auto& n: nodevec) {
305  n_in_level[n->level][n->groupindex]++;
306  }
307  printf("n_in_level.size = %ld\n", n_in_level.size());
308  for (size_t i = 0; i < n_in_level.size(); ++i) {
309  printf("%5ld\n", i);
310  for (const auto& n: n_in_level[i]) {
311  printf(" %5ld", n);
312  }
313  printf("\n");
314  }
315 }
316 #undef MSS
317 
318 #if CORENRN_BUILD
319 int* node_order(int ncell,
320 #else
321 std::vector<int> node_order(int ncell,
322 #endif
323  int nnode,
324  int* parent,
325  int& nwarp,
326  int& nstride,
327  int*& stride,
328  int*& firstnode,
329  int*& lastnode,
330  int*& cellsize,
331  int*& stridedispl) {
332  VecTNode nodevec;
333 
334  // nodevec[0:ncell] in increasing size, with identical trees together,
335  // and otherwise nodeindex order
336  // nodevec.size = nnode
337  tree_analysis(parent, nnode, ncell, nodevec);
338  check(nodevec);
339 
340  set_cellindex(ncell, nodevec);
341  set_groupindex(nodevec);
342  level_from_root(nodevec);
343 
344  // nodevec[ncell:nnode] cells are interleaved in nodevec[0:ncell] cell order
345  if (interleave_permute_type == 1) {
346  node_interleave_order(ncell, nodevec);
347  } else {
348  group_order2(nodevec, groupsize, ncell);
349  }
350  check(nodevec);
351 
352 #if CORENRN_DEBUG
353  for (int i = 0; i < ncell; ++i) {
354  TNode& nd = *nodevec[i];
355  printf("%d size=%ld hash=%ld ix=%d\n", i, nd.treesize, nd.hash, nd.nodeindex);
356  }
357 #endif
358 
359  if (0)
360  ident_statistic(nodevec, ncell);
361  quality(nodevec);
362 
363  // the permutation
364 #if CORENRN_BUILD
365  int* nodeorder = new int[nnode];
366 #else
367  std::vector<int> nodeorder(nnode);
368 #endif
369  for (int i = 0; i < nnode; ++i) {
370  TNode& nd = *nodevec[i];
371  nodeorder[nd.nodeindex] = i;
372  }
373 
374  // administrative statistics for gauss elimination
375  if (interleave_permute_type == 1) {
376  admin1(ncell, nodevec, nwarp, nstride, stride, firstnode, lastnode, cellsize);
377  } else {
378  // admin2(ncell, nodevec, nwarp, nstride, stridedispl, stride, rootbegin, nodebegin,
379  // ncycles);
381  }
382 
383  int ntopol = 1;
384  for (int i = 1; i < ncell; ++i) {
385  if (nodevec[i - 1]->hash != nodevec[i]->hash) {
386  ntopol += 1;
387  }
388  }
389  static_cast<void>(ntopol);
390 #ifdef DEBUG
391  printf("%d distinct tree topologies\n", ntopol);
392 #endif
393 
394  for (size_t i = 0; i < nodevec.size(); ++i) {
395  delete nodevec[i];
396  }
397 
398  return nodeorder;
399 }
400 
401 void check(VecTNode& nodevec) {
402  // printf("check\n");
403  size_t nnode = nodevec.size();
404  size_t ncell = 0;
405  for (size_t i = 0; i < nnode; ++i) {
406  nodevec[i]->nodevec_index = i;
407  if (nodevec[i]->parent == nullptr) {
408  ncell++;
409  }
410  }
411  /// Check that the first compartments of nodevec are the root nodes (cells)
412  for (size_t i = 0; i < ncell; ++i) {
413  nrn_assert(nodevec[i]->parent == nullptr);
414  }
415  for (size_t i = ncell; i < nnode; ++i) {
416  TNode& nd = *nodevec[i];
417  if (nd.parent->nodevec_index >= nd.nodevec_index) {
418  printf("error i=%ld nodevec_index=%ld parent=%ld\n",
419  i,
420  nd.nodevec_index,
421  nd.parent->nodevec_index);
422  }
424  }
425 }
426 
427 #if CORENRN_DEBUG
428 void prtree(VecTNode& nodevec) {
429  size_t nnode = nodevec.size();
430  for (size_t i = 0; i < nnode; ++i) {
431  nodevec[i]->nodevec_index = i;
432  }
433  for (size_t i = 0; i < nnode; ++i) {
434  TNode& nd = *nodevec[i];
435  printf("%ld p=%d c=%ld l=%ld o=%ld ix=%d pix=%d\n",
436  i,
437  nd.parent ? int(nd.parent->nodevec_index) : -1,
438  nd.cellindex,
439  nd.level,
440  nd.treenode_order,
441  nd.nodeindex,
442  nd.parent ? int(nd.parent->nodeindex) : -1);
443  }
444 }
445 #endif
446 
447 /**
448  * \brief Perform tree preparation for interleaving strategies
449  *
450  * \param parent vector of parent indices
451  * \param nnode number of compartments in the cells
452  * \param ncell number of cells
453  */
454 void tree_analysis(int* parent, int nnode, int ncell, VecTNode& nodevec) {
455  // create empty TNodes (knowing only their index)
456  nodevec.reserve(nnode);
457  for (int i = 0; i < nnode; ++i) {
458  nodevec.push_back(new TNode(i));
459  }
460 
461  // determine the (sorted by hash) children of each node
462  for (int i = nnode - 1; i >= ncell; --i) {
463  nodevec[i]->parent = nodevec[parent[i]];
464  nodevec[i]->mkhash();
465  nodevec[parent[i]]->children.push_back(nodevec[i]);
466  }
467 
468  // determine hash of the cells
469  for (int i = 0; i < ncell; ++i) {
470  nodevec[i]->mkhash();
471  }
472 
473  // sort it by tree size (from smaller to larger)
474  std::sort(nodevec.begin(), nodevec.begin() + ncell, tnode_earlier);
475 }
476 
477 static bool interleave_comp(TNode* a, TNode* b) {
478  bool result = false;
479  if (a->treenode_order < b->treenode_order) {
480  result = true;
481  } else if (a->treenode_order == b->treenode_order) {
482  if (a->cellindex < b->cellindex) {
483  result = true;
484  }
485  }
486  return result;
487 }
488 
489 /**
490  * \brief Naive interleaving strategy (interleave_permute_type == 1)
491  *
492  * Sort so nodevec[ncell:nnode] cell instances are interleaved. Keep the
493  * secondary ordering with respect to treenode_order so each cell is still a tree.
494  *
495  * \param ncell number of cells (trees)
496  * \param nodevec vector that contains compartments (nodes of the trees)
497  */
498 void node_interleave_order(int ncell, VecTNode& nodevec) {
499  int* order = new int[ncell];
500  for (int i = 0; i < ncell; ++i) {
501  order[i] = 0;
502  nodevec[i]->treenode_order = order[i]++;
503  }
504  for (size_t i = 0; i < nodevec.size(); ++i) {
505  TNode& nd = *nodevec[i];
506  for (size_t j = 0; j < nd.children.size(); ++j) {
507  TNode* cnode = nd.children[j];
508  cnode->treenode_order = order[nd.cellindex]++;
509  }
510  }
511  delete[] order;
512 
513  // std::sort(nodevec.begin() + ncell, nodevec.end(), contig_comp);
514  // Traversal of nodevec: From root to leaves (this is why we compute the tree node order)
515  std::sort(nodevec.begin() + ncell, nodevec.end(), interleave_comp);
516 
517 #if CORENRN_DEBUG
518  for (size_t i = 0; i < nodevec.size(); ++i) {
519  TNode& nd = *nodevec[i];
520  printf("%ld cell=%ld ix=%d\n", i, nd.cellindex, nd.nodeindex);
521  }
522 #endif
523 }
524 
525 static void admin1(int ncell,
526  VecTNode& nodevec,
527  int& nwarp,
528  int& nstride,
529  int*& stride,
530  int*& firstnode,
531  int*& lastnode,
532  int*& cellsize) {
533  firstnode = (int*) ecalloc_align(ncell, sizeof(int));
534  lastnode = (int*) ecalloc_align(ncell, sizeof(int));
535  cellsize = (int*) ecalloc_align(ncell, sizeof(int));
536 
537  nwarp = (ncell % warpsize == 0) ? (ncell / warpsize) : (ncell / warpsize + 1);
538 
539  for (int i = 0; i < ncell; ++i) {
540  firstnode[i] = -1;
541  lastnode[i] = -1;
542  cellsize[i] = 0;
543  }
544 
545  nstride = 0;
546  for (size_t i = ncell; i < nodevec.size(); ++i) {
547  TNode& nd = *nodevec[i];
548  size_t ci = nd.cellindex;
549  if (firstnode[ci] == -1) {
550  firstnode[ci] = i;
551  }
552  lastnode[ci] = i;
553  cellsize[ci] += 1;
554  if (nstride < cellsize[ci]) {
555  nstride = cellsize[ci];
556  }
557  }
558 
559  // this vector is used to move from one compartment to the other (per cell)
560  // its length is equal to the cell with the highest number of compartments
561  stride = static_cast<int*>(ecalloc_align(nstride + 1, sizeof(int)));
562  for (size_t i = ncell; i < nodevec.size(); ++i) {
563  TNode& nd = *nodevec[i];
564  // compute how many compartments with the same order
565  // treenode_order : defined in breadth first fashion (for each cell separately)
566  stride[nd.treenode_order - 1] += 1; // -1 because treenode order includes root
567  }
568 }
569 
570 // for admin2 we allow the node organisation in warps of (say 4 cores per warp)
571 // ............... ideal warp but unbalanced relative to warp with max cycles
572 // ............... ncycle = 15, icore [0:4), all strides are 4.
573 // ...............
574 // ...............
575 //
576 // .......... unbalanced relative to warp with max cycles
577 // .......... ncycle = 10, not all strides the same because
578 // .......... of need to avoid occasional race conditions.
579 // . . .. icore [4:8) only 4 strides of 4
580 //
581 // .................... ncycle = 20, uses only one core in the warp (cable)
582 // icore 8, all ncycle strides are 1
583 
584 // One thing to be unhappy about is the large stride vector of size about
585 // number of compartments/warpsize. There are a lot of models where the
586 // stride for a warp is constant except for one cycle in the warp and that
587 // is easy to obtain when there are more than warpsize cells per warp.
588 
589 static size_t stride_length(size_t begin, size_t end, VecTNode& nodevec) {
590  // return stride length starting at i. Do not go past j.
591  // max stride is warpsize.
592  // At this time, only assume vicious parent race conditions matter.
593  if (end - begin > warpsize) {
594  end = begin + warpsize;
595  }
596  for (size_t i = begin; i < end; ++i) {
597  TNode* nd = nodevec[i];
598  nrn_assert(nd->nodevec_index == i);
599  size_t diff = dist2child(nd);
600  if (i + diff < end) {
601  end = i + diff;
602  }
603  }
604  return end - begin;
605 }
606 
607 /**
608  * \brief Prepare for solve_interleaved2
609  *
610  * One group of cells per warp.
611  *
612  * warp[i] has a number of compute cycles (ncycle[i])
613  * the index of its first root (rootbegin[i], last rootbegin[nwarp] = ncell)
614  * the index of its first node (nodebegin[i], last nodebegin[nwarp] = nnode)
615  *
616  * Each compute cycle has a stride
617  * A stride is how many nodes are processed by a warp in one compute cycle
618  * There are nstride strides. nstride is the sum of ncycles of all warps.
619  * warp[i] has ncycle[i] strides
620  * same as sum of ncycle
621  * warp[i] has a stridedispl[i] which is stridedispl[i-1] + ncycle[i].
622  * ie. The zeroth cycle of warp[j] works on stride[stridedispl[j]]
623  * The value of a stride beginning at node i (node i is computed by core 0 of
624  * some warp for some cycle) is determined by stride_length(i, j, nodevec)
625  *
626  */
627 static void admin2(int ncell,
628  VecTNode& nodevec,
629  int& nwarp,
630  int& nstride,
631  int*& stridedispl,
632  int*& strides,
633  int*& rootbegin,
634  int*& nodebegin,
635  int*& ncycles) {
636  // the number of groups is the number of warps needed
637  // ncore is the number of warps * warpsize
638  nwarp = nodevec[ncell - 1]->groupindex + 1;
639 
640  ncycles = (int*) ecalloc_align(nwarp, sizeof(int));
641  stridedispl = (int*) ecalloc_align(nwarp + 1,
642  sizeof(int)); // running sum of ncycles (start at 0)
643  rootbegin = (int*) ecalloc_align(nwarp + 1, sizeof(int)); // index (+1) of first root in warp.
644  nodebegin = (int*) ecalloc_align(nwarp + 1, sizeof(int)); // index (+1) of first node in warp.
645 
646  // rootbegin and nodebegin are the root index values + 1 of the last of
647  // the sequence of constant groupindex
648  rootbegin[0] = 0;
649  for (size_t i = 0; i < size_t(ncell); ++i) {
650  rootbegin[nodevec[i]->groupindex + 1] = i + 1;
651  }
652  nodebegin[0] = ncell;
653  // We start from the leaves and go backwards towards the root
654  for (size_t i = size_t(ncell); i < nodevec.size(); ++i) {
655  nodebegin[nodevec[i]->groupindex + 1] = i + 1;
656  }
657 
658  // ncycles, stridedispl, and nstride
659  nstride = 0;
660  stridedispl[0] = 0;
661  for (size_t iwarp = 0; iwarp < (size_t) nwarp; ++iwarp) {
662  size_t j = size_t(nodebegin[iwarp + 1]);
663  int nc = 0;
664  size_t i = nodebegin[iwarp];
665  // in this loop we traverse all the children of all the cells in the current warp (iwarp)
666  while (i < j) {
667  i += stride_length(i, j, nodevec);
668  ++nc; // how many times the warp should loop in order to finish with all the tree
669  // depths (for all the trees of the warp/group)
670  }
671  ncycles[iwarp] = nc;
672  stridedispl[iwarp + 1] = stridedispl[iwarp] + nc;
673  nstride += nc;
674  }
675 
676  // strides
677  strides = (int*) ecalloc_align(nstride, sizeof(int));
678  nstride = 0;
679  for (size_t iwarp = 0; iwarp < (size_t) nwarp; ++iwarp) {
680  size_t j = size_t(nodebegin[iwarp + 1]);
681  size_t i = nodebegin[iwarp];
682  while (i < j) {
683  int k = stride_length(i, j, nodevec);
684  i += k;
685  strides[nstride++] = k;
686  }
687  }
688 
689 #if CORENRN_DEBUG
690  printf("warp rootbegin nodebegin stridedispl\n");
691  for (int i = 0; i <= nwarp; ++i) {
692  printf("%4d %4d %4d %4d\n", i, rootbegin[i], nodebegin[i], stridedispl[i]);
693  }
694 #endif
695 }
696 } // namespace coreneuron
static int maxlevel
Definition: clamp.cpp:36
TNode is the tree node that represents the tree of the compartments.
Definition: tnode.hpp:27
size_t groupindex
Cell ID that this compartment belongs to.
Definition: tnode.hpp:58
size_t nodevec_index
Total number of compartments from the current node and below.
Definition: tnode.hpp:37
VecTNode children
Definition: tnode.hpp:32
size_t level
For cell permute 1 (Interleaved):
Definition: tnode.hpp:56
size_t treesize
Hash value generated by mkhash.
Definition: tnode.hpp:36
size_t hash
Hash algorith that generates a hash based on the hash of the children and the number of compartments ...
Definition: tnode.hpp:35
size_t cellindex
level of of this compartment in the tree
Definition: tnode.hpp:57
size_t treenode_order
index in nodevec that is set in check() In cell permute 2 this is set as Breadth First traversal
Definition: tnode.hpp:39
TNode * parent
Definition: tnode.hpp:31
int nodeindex
Initialized index / groupsize.
Definition: tnode.hpp:59
#define i
Definition: md1redef.h:19
static double order(void *v)
Definition: cvodeobj.cpp:218
static RNG::key_type k
Definition: nrnran123.cpp:9
printf
Definition: extdef.h:5
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
void * ecalloc_align(size_t n, size_t size, size_t alignment)
int interleave_permute_type
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
Definition: tnode.hpp:17
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
Definition: cellorder.cpp:784
static size_t stride_length(size_t begin, size_t end, VecTNode &nodevec)
Definition: cellorder1.cpp:589
void group_order2(VecTNode &, size_t groupsize, size_t ncell)
Implementation of the advanced interleaving strategy (interleave_permute_type == 2)
Definition: cellorder2.cpp:471
size_t level_from_root(VecTNode &)
Definition: cellorder1.cpp:221
static void admin1(int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize)
Definition: cellorder1.cpp:525
size_t level_from_leaf(VecTNode &)
Definition: cellorder1.cpp:236
std::pair< TNode *, int > TNI
Definition: cellorder1.cpp:117
int firstnode
Definition: cellorder.cpp:624
static void set_cellindex(int ncell, VecTNode &nodevec)
Set the cellindex to distinguish the different cells.
Definition: cellorder1.cpp:260
static void tree_analysis(int *parent, int nnode, int ncell, VecTNode &)
Perform tree preparation for interleaving strategies.
Definition: cellorder1.cpp:454
int nstride
Definition: cellorder.cpp:789
int int int * strides
Definition: cellorder.cpp:608
static size_t groupsize
Definition: cellorder1.cpp:43
static void set_groupindex(VecTNode &nodevec)
Initialization of the groupindex (groups)
Definition: cellorder1.cpp:279
int int * ncycles
Definition: cellorder.cpp:607
std::vector< int > node_order(int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl)
Function that returns a permutation of length nnode.
Definition: cellorder1.cpp:321
static void ident_statistic(VecTNode &nodevec, size_t ncell)
Definition: cellorder1.cpp:294
int * stride
Definition: cellorder.cpp:621
std::vector< TNI > TNIVec
Definition: cellorder1.cpp:119
static bool ptr_tnode_earlier(TNode *a, TNode *b)
Definition: cellorder1.cpp:62
int iwarp
Definition: cellorder.cpp:618
static void admin2(int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stridedispl, int *&strides, int *&rootbegin, int *&nodebegin, int *&ncycles)
Prepare for solve_interleaved2.
Definition: cellorder1.cpp:627
size_t dist2child(TNode *nd)
Definition: cellorder2.cpp:164
int lastnode
Definition: cellorder.cpp:625
std::map< size_t, std::pair< TNode *, int > > HashCnt
Definition: cellorder1.cpp:118
int int int int * stridedispl
Definition: cellorder.cpp:609
static void quality(VecTNode &nodevec, size_t max=32)
Definition: cellorder1.cpp:128
int int int int int int * nodebegin
Definition: cellorder.cpp:611
int int int int int * rootbegin
Definition: cellorder.cpp:610
std::vector< TNode * > VecTNode
Definition: tnode.hpp:21
static bool interleave_comp(TNode *a, TNode *b)
Definition: cellorder1.cpp:477
static bool tnode_earlier(TNode *a, TNode *b)
Function to order trees by size, hash and nodeindex.
Definition: cellorder1.cpp:48
int * cellsize
Definition: cellorder.cpp:793
static void node_interleave_order(int ncell, VecTNode &)
Naive interleaving strategy (interleave_permute_type == 1)
Definition: cellorder1.cpp:498
static void check(VecTNode &)
Definition: cellorder1.cpp:401
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
Definition: nrn_assert.h:33
int const size_t const size_t n
Definition: nrngsl.h:10
size_t q
size_t j
static double children(void *v)
Definition: seclist.cpp:96
#define warpsize
Definition: tnode.hpp:89