3466/doxygen/cellorder1_8cpp_source.html

 /*

 # =============================================================================

 # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL

 #

 # See top-level LICENSE file for details.

 # =============================================================================

 */


 #include <cstdio>

 #include <map>

 #include <set>

 #include <algorithm>

 #include <cstring>


 #if CORENRN_BUILD

 #include "coreneuron/utils/nrn_assert.h"

 #else

 #include "oc/nrnassrt.h"

 #endif


 #include "coreneuron/permute/cellorder.hpp"

 #include "coreneuron/network/tnode.hpp"


 // just for interleave_permute_type

 #if CORENRN_BUILD

 #include "coreneuron/nrniv/nrniv_decl.h"

 #include "coreneuron/utils/memory.h"

 #else

 #include "node_order_optim/node_order_optim.h"

 #endif


 #if !CORENRN_BUILD && NRN_DEBUG

 #undef CORENRN_DEBUG

 #define CORENRN_DEBUG NRN_DEBUG

 #endif


 #if CORENRN_BUILD

 namespace coreneuron {

 #else

 namespace neuron {

 #endif


 static size_t groupsize = 32;


 /**

  * \brief Function to order trees by size, hash and nodeindex

  */

 static bool tnode_earlier(TNode* a, TNode* b) {

     bool result = false;

     if (a->treesize < b->treesize) {  // treesize dominates

         result = true;

     } else if (a->treesize == b->treesize) {

         if (a->hash < b->hash) {  // if treesize same, keep identical trees together

             result = true;

         } else if (a->hash == b->hash) {

             result = a->nodeindex < b->nodeindex;  // identical trees ordered by nodeindex

         }

     }

     return result;

 }


 static bool ptr_tnode_earlier(TNode* a, TNode* b) {

     return tnode_earlier(a, b);

 }


 TNode::TNode(int ix) {

     nodeindex = ix;

     cellindex = 0;

     groupindex = 0;

     level = 0;

     hash = 0;

     treesize = 1;

     nodevec_index = 0;

     treenode_order = 0;

     parent = nullptr;

     children.reserve(2);

 }


 TNode::~TNode() {}


 size_t TNode::mkhash() {  // call on all nodes in leaf to root order

     // concept from http://stackoverflow.com/questions/20511347/a-good-hash-function-for-a-vector

     std::sort(children.begin(), children.end(), ptr_tnode_earlier);

     hash = children.size();

     treesize = 1;

     for (size_t i = 0; i < children.size(); ++i) {  // need sorted by child hash

         hash ^= children[i]->hash + 0x9e3779b9 + (hash << 6) + (hash >> 2);

         treesize += children[i]->treesize;

     }

     return hash;  // hash of leaf nodes is 0

 }


 static void tree_analysis(int* parent, int nnode, int ncell, VecTNode&);

 static void node_interleave_order(int ncell, VecTNode&);

 static void admin1(int ncell,

                    VecTNode& nodevec,

                    int& nwarp,

                    int& nstride,

                    int*& stride,

                    int*& firstnode,

                    int*& lastnode,

                    int*& cellsize);

 static void admin2(int ncell,

                    VecTNode& nodevec,

                    int& nwarp,

                    int& nstride,

                    int*& stridedispl,

                    int*& strides,

                    int*& rootbegin,

                    int*& nodebegin,

                    int*& ncycles);

 static void check(VecTNode&);

 #if CORENRN_DEBUG

 static void prtree(VecTNode&);

 #endif


 using TNI = std::pair<TNode*, int>;

 using HashCnt = std::map<size_t, std::pair<TNode*, int>>;

 using TNIVec = std::vector<TNI>;


 /*

 assess the quality of the ordering. The measure is the size of a contiguous

 list of nodes whose parents have the same order. How many contiguous lists

 have that same size. How many nodes participate in that size list.

 Modify the quality measure from experience with performance. Start with

 list of (nnode, size_participation)

 */

 static void quality(VecTNode& nodevec, size_t max = 32) {

     size_t qcnt = 0;  // how many contiguous nodes have contiguous parents


     // first ncell nodes are by definition in contiguous order

     for (const auto& n: nodevec) {

         if (n->parent != nullptr) {

             break;

         }

         qcnt += 1;

     }

     size_t ncell = qcnt;


     // key is how many parents in contiguous order

     // value is number of nodes that participate in that

     std::map<size_t, size_t> qual;

     size_t ip_last = 10000000000;

     for (size_t i = ncell; i < nodevec.size(); ++i) {

         size_t ip = nodevec[i]->parent->nodevec_index;

         // i%max == 0 means that if we start a warp with 8 and then have 32

         // the 32 is broken into 24 and 8. (modify if the arrangement during

         // gaussian elimination becomes more sophisticated.)

         if (ip == ip_last + 1 && i % max != 0) {  // contiguous

             qcnt += 1;

         } else {

             if (qcnt == 1) {

                 // printf("unique %ld p=%ld ix=%d\n", i, ip, nodevec[i]->nodeindex);

             }

             qual[max] += (qcnt / max) * max;

             size_t x = qcnt % max;

             if (x) {

                 qual[x] += x;

             }

             qcnt = 1;

         }

         ip_last = ip;

     }

     qual[max] += (qcnt / max) * max;

     size_t x = qcnt % max;

     if (x) {

         qual[x] += x;

     }


     // print result

     qcnt = 0;

 #if CORENRN_DEBUG

     for (const auto& q: qual) {

         qcnt += q.second;

         printf("%6ld %6ld\n", q.first, q.second);

     }

 #endif

 #if CORENRN_DEBUG

     printf("qual.size=%ld  qual total nodes=%ld  nodevec.size=%ld\n",

            qual.size(),

            qcnt,

            nodevec.size());

 #endif


     // how many race conditions. ie refer to same parent on different core

     // of warp (max cores) or parent in same group of max.

     size_t maxip = ncell;

     size_t nrace1 = 0;

     size_t nrace2 = 0;

     std::set<size_t> ipused;

     for (size_t i = ncell; i < nodevec.size(); ++i) {

         TNode* nd = nodevec[i];

         size_t ip = nd->parent->nodevec_index;

         if (i % max == 0) {

             maxip = i;

             ipused.clear();

         }

         if (ip >= maxip) {

             nrace1 += 1;

         } /*else*/

         {

             if (ipused.find(ip) != ipused.end()) {

                 nrace2 += 1;

                 if (ip >= maxip) {

                     // printf("race for parent %ld (parent in same group as multiple users))\n",

                     // ip);

                 }

             } else {

                 ipused.insert(ip);

             }

         }

     }

     static_cast<void>(nrace1);

     static_cast<void>(nrace2);

 #if CORENRN_DEBUG

     printf("nrace = %ld (parent in same group of %ld nodes)\n", nrace1, max);

     printf("nrace = %ld (parent used more than once by same group of %ld nodes)\n", nrace2, max);

 #endif

 }


 size_t level_from_root(VecTNode& nodevec) {

     size_t maxlevel = 0;

     for (auto& nd: nodevec) {

         if (nd->parent) {

             nd->level = nd->parent->level + 1;

             if (maxlevel < nd->level) {

                 maxlevel = nd->level;

             }

         } else {

             nd->level = 0;

         }

     }

     return maxlevel;

 }


 size_t level_from_leaf(VecTNode& nodevec) {

     size_t maxlevel = 0;

     for (size_t i = nodevec.size() - 1; true; --i) {

         TNode* nd = nodevec[i];

         size_t lmax = 0;

         for (auto& child: nd->children) {

             if (lmax <= child->level) {

                 lmax = child->level + 1;

             }

         }

         nd->level = lmax;

         if (maxlevel < lmax) {

             maxlevel = lmax;

         }

         if (i == 0) {

             break;

         }

     }

     return maxlevel;

 }


 /**

  * \brief Set the cellindex to distinguish the different cells.

  */

 static void set_cellindex(int ncell, VecTNode& nodevec) {

     for (int i = 0; i < ncell; ++i) {

         nodevec[i]->cellindex = i;

     }

     for (size_t i = 0; i < nodevec.size(); ++i) {

         TNode& nd = *nodevec[i];

         for (size_t j = 0; j < nd.children.size(); ++j) {

             TNode* cnode = nd.children[j];

             cnode->cellindex = nd.cellindex;

         }

     }

 }


 /**

  * \brief Initialization of the groupindex (groups)

  *

  * The cells are groupped at a later stage based on a load balancing algorithm.

  * This is just an initialization function.

  */

 static void set_groupindex(VecTNode& nodevec) {

     for (size_t i = 0; i < nodevec.size(); ++i) {

         TNode* nd = nodevec[i];

         if (nd->parent) {

             nd->groupindex = nd->parent->groupindex;

         } else {

             nd->groupindex = i / groupsize;

         }

     }

 }


 // how many identical trees and their levels

 // print when more than one instance of a type

 // reverse the sense of levels (all leaves are level 0) to get a good

 // idea of the depth of identical subtrees.

 static void ident_statistic(VecTNode& nodevec, size_t ncell) {

     // reverse sense of levels

     //  size_t maxlevel = level_from_leaf(nodevec);

     size_t maxlevel = level_from_root(nodevec);


     // # in each level

     std::vector<std::vector<size_t>> n_in_level(maxlevel + 1);

     for (auto& n: n_in_level) {

         n.resize(ncell / groupsize);

     }

     for (const auto& n: nodevec) {

         n_in_level[n->level][n->groupindex]++;

     }

     printf("n_in_level.size = %ld\n", n_in_level.size());

     for (size_t i = 0; i < n_in_level.size(); ++i) {

         printf("%5ld\n", i);

         for (const auto& n: n_in_level[i]) {

             printf(" %5ld", n);

         }

         printf("\n");

     }

 }

 #undef MSS


 #if CORENRN_BUILD

 int* node_order(int ncell,

 #else

 std::vector<int> node_order(int ncell,

 #endif

                 int nnode,

                 int* parent,

                 int& nwarp,

                 int& nstride,

                 int*& stride,

                 int*& firstnode,

                 int*& lastnode,

                 int*& cellsize,

                 int*& stridedispl) {

     VecTNode nodevec;


     // nodevec[0:ncell] in increasing size, with identical trees together,

     // and otherwise nodeindex order

     // nodevec.size = nnode

     tree_analysis(parent, nnode, ncell, nodevec);

     check(nodevec);


     set_cellindex(ncell, nodevec);

     set_groupindex(nodevec);

     level_from_root(nodevec);


     // nodevec[ncell:nnode] cells are interleaved in nodevec[0:ncell] cell order

     if (interleave_permute_type == 1) {

         node_interleave_order(ncell, nodevec);

     } else {

         group_order2(nodevec, groupsize, ncell);

     }

     check(nodevec);


 #if CORENRN_DEBUG

     for (int i = 0; i < ncell; ++i) {

         TNode& nd = *nodevec[i];

         printf("%d size=%ld hash=%ld ix=%d\n", i, nd.treesize, nd.hash, nd.nodeindex);

     }

 #endif


     if (0)

         ident_statistic(nodevec, ncell);

     quality(nodevec);


     // the permutation

 #if CORENRN_BUILD

     int* nodeorder = new int[nnode];

 #else

     std::vector<int> nodeorder(nnode);

 #endif

     for (int i = 0; i < nnode; ++i) {

         TNode& nd = *nodevec[i];

         nodeorder[nd.nodeindex] = i;

     }


     // administrative statistics for gauss elimination

     if (interleave_permute_type == 1) {

         admin1(ncell, nodevec, nwarp, nstride, stride, firstnode, lastnode, cellsize);

     } else {

         //  admin2(ncell, nodevec, nwarp, nstride, stridedispl, stride, rootbegin, nodebegin,

         //  ncycles);

         admin2(ncell, nodevec, nwarp, nstride, stridedispl, stride, firstnode, lastnode, cellsize);

     }


     int ntopol = 1;

     for (int i = 1; i < ncell; ++i) {

         if (nodevec[i - 1]->hash != nodevec[i]->hash) {

             ntopol += 1;

         }

     }

     static_cast<void>(ntopol);

 #ifdef DEBUG

     printf("%d distinct tree topologies\n", ntopol);

 #endif


     for (size_t i = 0; i < nodevec.size(); ++i) {

         delete nodevec[i];

     }


     return nodeorder;

 }


 void check(VecTNode& nodevec) {

     // printf("check\n");

     size_t nnode = nodevec.size();

     size_t ncell = 0;

     for (size_t i = 0; i < nnode; ++i) {

         nodevec[i]->nodevec_index = i;

         if (nodevec[i]->parent == nullptr) {

             ncell++;

         }

     }

     ///  Check that the first compartments of nodevec are the root nodes (cells)

     for (size_t i = 0; i < ncell; ++i) {

         nrn_assert(nodevec[i]->parent == nullptr);

     }

     for (size_t i = ncell; i < nnode; ++i) {

         TNode& nd = *nodevec[i];

         if (nd.parent->nodevec_index >= nd.nodevec_index) {

             printf("error i=%ld nodevec_index=%ld parent=%ld\n",

                    i,

                    nd.nodevec_index,

                    nd.parent->nodevec_index);

         }

         nrn_assert(nd.nodevec_index > nd.parent->nodevec_index);

     }

 }


 #if CORENRN_DEBUG

 void prtree(VecTNode& nodevec) {

     size_t nnode = nodevec.size();

     for (size_t i = 0; i < nnode; ++i) {

         nodevec[i]->nodevec_index = i;

     }

     for (size_t i = 0; i < nnode; ++i) {

         TNode& nd = *nodevec[i];

         printf("%ld p=%d   c=%ld l=%ld o=%ld   ix=%d pix=%d\n",

                i,

                nd.parent ? int(nd.parent->nodevec_index) : -1,

                nd.cellindex,

                nd.level,

                nd.treenode_order,

                nd.nodeindex,

                nd.parent ? int(nd.parent->nodeindex) : -1);

     }

 }

 #endif


 /**

  * \brief Perform tree preparation for interleaving strategies

  *

  * \param parent vector of parent indices

  * \param nnode number of compartments in the cells

  * \param ncell number of cells

  */

 void tree_analysis(int* parent, int nnode, int ncell, VecTNode& nodevec) {

     // create empty TNodes (knowing only their index)

     nodevec.reserve(nnode);

     for (int i = 0; i < nnode; ++i) {

         nodevec.push_back(new TNode(i));

     }


     // determine the (sorted by hash) children of each node

     for (int i = nnode - 1; i >= ncell; --i) {

         nodevec[i]->parent = nodevec[parent[i]];

         nodevec[i]->mkhash();

         nodevec[parent[i]]->children.push_back(nodevec[i]);

     }


     // determine hash of the cells

     for (int i = 0; i < ncell; ++i) {

         nodevec[i]->mkhash();

     }


     // sort it by tree size (from smaller to larger)

     std::sort(nodevec.begin(), nodevec.begin() + ncell, tnode_earlier);

 }


 static bool interleave_comp(TNode* a, TNode* b) {

     bool result = false;

     if (a->treenode_order < b->treenode_order) {

         result = true;

     } else if (a->treenode_order == b->treenode_order) {

         if (a->cellindex < b->cellindex) {

             result = true;

         }

     }

     return result;

 }


 /**

  * \brief Naive interleaving strategy (interleave_permute_type == 1)

  *

  * Sort so nodevec[ncell:nnode] cell instances are interleaved. Keep the

  * secondary ordering with respect to treenode_order so each cell is still a tree.

  *

  * \param ncell number of cells (trees)

  * \param nodevec vector that contains compartments (nodes of the trees)

  */

 void node_interleave_order(int ncell, VecTNode& nodevec) {

     int* order = new int[ncell];

     for (int i = 0; i < ncell; ++i) {

         order[i] = 0;

         nodevec[i]->treenode_order = order[i]++;

     }

     for (size_t i = 0; i < nodevec.size(); ++i) {

         TNode& nd = *nodevec[i];

         for (size_t j = 0; j < nd.children.size(); ++j) {

             TNode* cnode = nd.children[j];

             cnode->treenode_order = order[nd.cellindex]++;

         }

     }

     delete[] order;


     //  std::sort(nodevec.begin() + ncell, nodevec.end(), contig_comp);

     // Traversal of nodevec: From root to leaves (this is why we compute the tree node order)

     std::sort(nodevec.begin() + ncell, nodevec.end(), interleave_comp);


 #if CORENRN_DEBUG

     for (size_t i = 0; i < nodevec.size(); ++i) {

         TNode& nd = *nodevec[i];

         printf("%ld cell=%ld ix=%d\n", i, nd.cellindex, nd.nodeindex);

     }

 #endif

 }


 static void admin1(int ncell,

                    VecTNode& nodevec,

                    int& nwarp,

                    int& nstride,

                    int*& stride,

                    int*& firstnode,

                    int*& lastnode,

                    int*& cellsize) {

     firstnode = (int*) ecalloc_align(ncell, sizeof(int));

     lastnode = (int*) ecalloc_align(ncell, sizeof(int));

     cellsize = (int*) ecalloc_align(ncell, sizeof(int));


     nwarp = (ncell % warpsize == 0) ? (ncell / warpsize) : (ncell / warpsize + 1);


     for (int i = 0; i < ncell; ++i) {

         firstnode[i] = -1;

         lastnode[i] = -1;

         cellsize[i] = 0;

     }


     nstride = 0;

     for (size_t i = ncell; i < nodevec.size(); ++i) {

         TNode& nd = *nodevec[i];

         size_t ci = nd.cellindex;

         if (firstnode[ci] == -1) {

             firstnode[ci] = i;

         }

         lastnode[ci] = i;

         cellsize[ci] += 1;

         if (nstride < cellsize[ci]) {

             nstride = cellsize[ci];

         }

     }


     // this vector is used to move from one compartment to the other (per cell)

     // its length is equal to the cell with the highest number of compartments

     stride = static_cast<int*>(ecalloc_align(nstride + 1, sizeof(int)));

     for (size_t i = ncell; i < nodevec.size(); ++i) {

         TNode& nd = *nodevec[i];

         // compute how many compartments with the same order

         // treenode_order : defined in breadth first fashion (for each cell separately)

         stride[nd.treenode_order - 1] += 1;  // -1 because treenode order includes root

     }

 }


 // for admin2 we allow the node organisation in warps of (say 4 cores per warp)

 // ...............  ideal warp but unbalanced relative to warp with max cycles

 // ...............  ncycle = 15, icore [0:4), all strides are 4.

 // ...............

 // ...............

 //

 // ..........       unbalanced relative to warp with max cycles

 // ..........       ncycle = 10, not all strides the same because

 // ..........       of need to avoid occasional race conditions.

 //  .  . ..         icore [4:8) only 4 strides of 4

 //

 // ....................  ncycle = 20, uses only one core in the warp (cable)

 //                       icore 8, all ncycle strides are 1


 // One thing to be unhappy about is the large stride vector of size about

 // number of compartments/warpsize. There are a lot of models where the

 // stride for a warp is constant except for one cycle in the warp and that

 // is easy to obtain when there are more than warpsize cells per warp.


 static size_t stride_length(size_t begin, size_t end, VecTNode& nodevec) {

     // return stride length starting at i. Do not go past j.

     // max stride is warpsize.

     // At this time, only assume vicious parent race conditions matter.

     if (end - begin > warpsize) {

         end = begin + warpsize;

     }

     for (size_t i = begin; i < end; ++i) {

         TNode* nd = nodevec[i];

         nrn_assert(nd->nodevec_index == i);

         size_t diff = dist2child(nd);

         if (i + diff < end) {

             end = i + diff;

         }

     }

     return end - begin;

 }


 /**

  * \brief Prepare for solve_interleaved2

  *

  * One group of cells per warp.

  *

  * warp[i] has a number of compute cycles (ncycle[i])

  * the index of its first root (rootbegin[i], last rootbegin[nwarp] = ncell)

  * the index of its first node (nodebegin[i], last nodebegin[nwarp] = nnode)

  *

  * Each compute cycle has a stride

  * A stride is how many nodes are processed by a warp in one compute cycle

  * There are nstride strides. nstride is the sum of ncycles of all warps.

  * warp[i] has ncycle[i] strides

  * same as sum of ncycle

  * warp[i] has a stridedispl[i] which is stridedispl[i-1] + ncycle[i].

  * ie. The zeroth cycle of warp[j] works on stride[stridedispl[j]]

  * The value of a stride beginning at node i (node i is computed by core 0 of

  * some warp for some cycle) is determined by stride_length(i, j, nodevec)

  *

  */

 static void admin2(int ncell,

                    VecTNode& nodevec,

                    int& nwarp,

                    int& nstride,

                    int*& stridedispl,

                    int*& strides,

                    int*& rootbegin,

                    int*& nodebegin,

                    int*& ncycles) {

     // the number of groups is the number of warps needed

     // ncore is the number of warps * warpsize

     nwarp = nodevec[ncell - 1]->groupindex + 1;


     ncycles = (int*) ecalloc_align(nwarp, sizeof(int));

     stridedispl = (int*) ecalloc_align(nwarp + 1,

                                        sizeof(int));  // running sum of ncycles (start at 0)

     rootbegin = (int*) ecalloc_align(nwarp + 1, sizeof(int));  // index (+1) of first root in warp.

     nodebegin = (int*) ecalloc_align(nwarp + 1, sizeof(int));  // index (+1) of first node in warp.


     // rootbegin and nodebegin are the root index values + 1 of the last of

     // the sequence of constant groupindex

     rootbegin[0] = 0;

     for (size_t i = 0; i < size_t(ncell); ++i) {

         rootbegin[nodevec[i]->groupindex + 1] = i + 1;

     }

     nodebegin[0] = ncell;

     // We start from the leaves and go backwards towards the root

     for (size_t i = size_t(ncell); i < nodevec.size(); ++i) {

         nodebegin[nodevec[i]->groupindex + 1] = i + 1;

     }


     // ncycles, stridedispl, and nstride

     nstride = 0;

     stridedispl[0] = 0;

     for (size_t iwarp = 0; iwarp < (size_t) nwarp; ++iwarp) {

         size_t j = size_t(nodebegin[iwarp + 1]);

         int nc = 0;

         size_t i = nodebegin[iwarp];

         // in this loop we traverse all the children of all the cells in the current warp (iwarp)

         while (i < j) {

             i += stride_length(i, j, nodevec);

             ++nc;  // how many times the warp should loop in order to finish with all the tree

                    // depths (for all the trees of the warp/group)

         }

         ncycles[iwarp] = nc;

         stridedispl[iwarp + 1] = stridedispl[iwarp] + nc;

         nstride += nc;

     }


     // strides

     strides = (int*) ecalloc_align(nstride, sizeof(int));

     nstride = 0;

     for (size_t iwarp = 0; iwarp < (size_t) nwarp; ++iwarp) {

         size_t j = size_t(nodebegin[iwarp + 1]);

         size_t i = nodebegin[iwarp];

         while (i < j) {

             int k = stride_length(i, j, nodevec);

             i += k;

             strides[nstride++] = k;

         }

     }


 #if CORENRN_DEBUG

     printf("warp rootbegin nodebegin stridedispl\n");

     for (int i = 0; i <= nwarp; ++i) {

         printf("%4d %4d %4d %4d\n", i, rootbegin[i], nodebegin[i], stridedispl[i]);

     }

 #endif

 }

 }  // namespace coreneuron

cellorder.hpp

maxlevel
static int maxlevel
Definition: clamp.cpp:36

neuron::TNode
TNode is the tree node that represents the tree of the compartments.
Definition: tnode.hpp:27

neuron::TNode::groupindex
size_t groupindex
Cell ID that this compartment belongs to.
Definition: tnode.hpp:58

neuron::TNode::nodevec_index
size_t nodevec_index
Total number of compartments from the current node and below.
Definition: tnode.hpp:37

neuron::TNode::children
VecTNode children
Definition: tnode.hpp:32

neuron::TNode::level
size_t level
For cell permute 1 (Interleaved):
Definition: tnode.hpp:56

neuron::TNode::treesize
size_t treesize
Hash value generated by mkhash.
Definition: tnode.hpp:36

neuron::TNode::hash
size_t hash
Hash algorith that generates a hash based on the hash of the children and the number of compartments ...
Definition: tnode.hpp:35

neuron::TNode::cellindex
size_t cellindex
level of of this compartment in the tree
Definition: tnode.hpp:57

neuron::TNode::treenode_order
size_t treenode_order
index in nodevec that is set in check() In cell permute 2 this is set as Breadth First traversal
Definition: tnode.hpp:39

neuron::TNode::parent
TNode * parent
Definition: tnode.hpp:31

neuron::TNode::nodeindex
int nodeindex
Initialized index / groupsize.
Definition: tnode.hpp:59

i
#define i
Definition: md1redef.h:19

order
static double order(void *v)
Definition: cvodeobj.cpp:218

k
static RNG::key_type k
Definition: nrnran123.cpp:9

memory.h

printf
printf
Definition: extdef.h:5

coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12

coreneuron::ecalloc_align
void * ecalloc_align(size_t n, size_t size, size_t alignment)

coreneuron::interleave_permute_type
int interleave_permute_type

neuron
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
Definition: tnode.hpp:17

neuron::ncell
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
Definition: cellorder.cpp:784

neuron::stride_length
static size_t stride_length(size_t begin, size_t end, VecTNode &nodevec)
Definition: cellorder1.cpp:589

neuron::group_order2
void group_order2(VecTNode &, size_t groupsize, size_t ncell)
Implementation of the advanced interleaving strategy (interleave_permute_type == 2)
Definition: cellorder2.cpp:471

neuron::level_from_root
size_t level_from_root(VecTNode &)
Definition: cellorder1.cpp:221

neuron::admin1
static void admin1(int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize)
Definition: cellorder1.cpp:525

neuron::level_from_leaf
size_t level_from_leaf(VecTNode &)
Definition: cellorder1.cpp:236

neuron::TNI
std::pair< TNode *, int > TNI
Definition: cellorder1.cpp:117

neuron::firstnode
int firstnode
Definition: cellorder.cpp:624

neuron::set_cellindex
static void set_cellindex(int ncell, VecTNode &nodevec)
Set the cellindex to distinguish the different cells.
Definition: cellorder1.cpp:260

neuron::tree_analysis
static void tree_analysis(int *parent, int nnode, int ncell, VecTNode &)
Perform tree preparation for interleaving strategies.
Definition: cellorder1.cpp:454

neuron::nstride
int nstride
Definition: cellorder.cpp:789

neuron::strides
int int int * strides
Definition: cellorder.cpp:608

neuron::groupsize
static size_t groupsize
Definition: cellorder1.cpp:43

neuron::set_groupindex
static void set_groupindex(VecTNode &nodevec)
Initialization of the groupindex (groups)
Definition: cellorder1.cpp:279

neuron::ncycles
int int * ncycles
Definition: cellorder.cpp:607

neuron::node_order
std::vector< int > node_order(int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl)
Function that returns a permutation of length nnode.
Definition: cellorder1.cpp:321

neuron::ident_statistic
static void ident_statistic(VecTNode &nodevec, size_t ncell)
Definition: cellorder1.cpp:294

neuron::stride
int * stride
Definition: cellorder.cpp:621

neuron::TNIVec
std::vector< TNI > TNIVec
Definition: cellorder1.cpp:119

neuron::ptr_tnode_earlier
static bool ptr_tnode_earlier(TNode *a, TNode *b)
Definition: cellorder1.cpp:62

neuron::iwarp
int iwarp
Definition: cellorder.cpp:618

neuron::admin2
static void admin2(int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stridedispl, int *&strides, int *&rootbegin, int *&nodebegin, int *&ncycles)
Prepare for solve_interleaved2.
Definition: cellorder1.cpp:627

neuron::dist2child
size_t dist2child(TNode *nd)
Definition: cellorder2.cpp:164

neuron::lastnode
int lastnode
Definition: cellorder.cpp:625

neuron::HashCnt
std::map< size_t, std::pair< TNode *, int > > HashCnt
Definition: cellorder1.cpp:118

neuron::stridedispl
int int int int * stridedispl
Definition: cellorder.cpp:609

neuron::quality
static void quality(VecTNode &nodevec, size_t max=32)
Definition: cellorder1.cpp:128

neuron::nodebegin
int int int int int int * nodebegin
Definition: cellorder.cpp:611

neuron::rootbegin
int int int int int * rootbegin
Definition: cellorder.cpp:610

neuron::VecTNode
std::vector< TNode * > VecTNode
Definition: tnode.hpp:21

neuron::interleave_comp
static bool interleave_comp(TNode *a, TNode *b)
Definition: cellorder1.cpp:477

neuron::tnode_earlier
static bool tnode_earlier(TNode *a, TNode *b)
Function to order trees by size, hash and nodeindex.
Definition: cellorder1.cpp:48

neuron::cellsize
int * cellsize
Definition: cellorder.cpp:793

neuron::node_interleave_order
static void node_interleave_order(int ncell, VecTNode &)
Naive interleaving strategy (interleave_permute_type == 1)
Definition: cellorder1.cpp:498

neuron::check
static void check(VecTNode &)
Definition: cellorder1.cpp:401

node_order_optim.h

nrn_assert.h

nrn_assert
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
Definition: nrn_assert.h:33

nrnassrt.h

n
int const size_t const size_t n
Definition: nrngsl.h:10

q
size_t q
Definition: nrngsl_hc_radix2.cpp:49

result
result
Definition: nrngsl_hc_radix2.cpp:61

j
size_t j
Definition: nrngsl_real_radix2.cpp:50

nrniv_decl.h

children
static double children(void *v)
Definition: seclist.cpp:96

tnode.hpp

warpsize
#define warpsize
Definition: tnode.hpp:89