32 #if !CORENRN_BUILD && NRN_DEBUG
34 #define CORENRN_DEBUG NRN_DEBUG
66 TNode::TNode(
int ix) {
81 size_t TNode::mkhash() {
87 hash ^=
children[
i]->hash + 0x9e3779b9 + (hash << 6) + (hash >> 2);
117 using TNI = std::pair<TNode*, int>;
118 using HashCnt = std::map<size_t, std::pair<TNode*, int>>;
132 for (
const auto&
n: nodevec) {
133 if (
n->parent !=
nullptr) {
142 std::map<size_t, size_t> qual;
143 size_t ip_last = 10000000000;
144 for (
size_t i =
ncell;
i < nodevec.size(); ++
i) {
145 size_t ip = nodevec[
i]->parent->nodevec_index;
149 if (ip == ip_last + 1 &&
i % max != 0) {
155 qual[max] += (qcnt / max) * max;
156 size_t x = qcnt % max;
164 qual[max] += (qcnt / max) * max;
165 size_t x = qcnt % max;
173 for (
const auto&
q: qual) {
175 printf(
"%6ld %6ld\n",
q.first,
q.second);
179 printf(
"qual.size=%ld qual total nodes=%ld nodevec.size=%ld\n",
187 size_t maxip =
ncell;
190 std::set<size_t> ipused;
191 for (
size_t i =
ncell;
i < nodevec.size(); ++
i) {
202 if (ipused.find(ip) != ipused.end()) {
213 static_cast<void>(nrace1);
214 static_cast<void>(nrace2);
216 printf(
"nrace = %ld (parent in same group of %ld nodes)\n", nrace1, max);
217 printf(
"nrace = %ld (parent used more than once by same group of %ld nodes)\n", nrace2, max);
223 for (
auto& nd: nodevec) {
225 nd->level = nd->parent->level + 1;
226 if (maxlevel < nd->level) {
238 for (
size_t i = nodevec.size() - 1;
true; --
i) {
242 if (lmax <= child->level) {
243 lmax = child->
level + 1;
262 nodevec[
i]->cellindex =
i;
264 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
280 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
300 std::vector<std::vector<size_t>> n_in_level(
maxlevel + 1);
301 for (
auto&
n: n_in_level) {
304 for (
const auto&
n: nodevec) {
305 n_in_level[
n->level][
n->groupindex]++;
307 printf(
"n_in_level.size = %ld\n", n_in_level.size());
308 for (
size_t i = 0;
i < n_in_level.size(); ++
i) {
310 for (
const auto&
n: n_in_level[
i]) {
365 int* nodeorder =
new int[nnode];
367 std::vector<int> nodeorder(nnode);
369 for (
int i = 0;
i < nnode; ++
i) {
385 if (nodevec[
i - 1]->hash != nodevec[
i]->hash) {
389 static_cast<void>(ntopol);
391 printf(
"%d distinct tree topologies\n", ntopol);
394 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
403 size_t nnode = nodevec.size();
405 for (
size_t i = 0;
i < nnode; ++
i) {
406 nodevec[
i]->nodevec_index =
i;
407 if (nodevec[
i]->parent ==
nullptr) {
412 for (
size_t i = 0;
i <
ncell; ++
i) {
415 for (
size_t i =
ncell;
i < nnode; ++
i) {
418 printf(
"error i=%ld nodevec_index=%ld parent=%ld\n",
429 size_t nnode = nodevec.size();
430 for (
size_t i = 0;
i < nnode; ++
i) {
431 nodevec[
i]->nodevec_index =
i;
433 for (
size_t i = 0;
i < nnode; ++
i) {
434 TNode& nd = *nodevec[
i];
435 printf(
"%ld p=%d c=%ld l=%ld o=%ld ix=%d pix=%d\n",
437 nd.parent ?
int(nd.parent->nodevec_index) : -1,
442 nd.parent ?
int(nd.parent->nodeindex) : -1);
456 nodevec.reserve(nnode);
457 for (
int i = 0;
i < nnode; ++
i) {
458 nodevec.push_back(
new TNode(
i));
462 for (
int i = nnode - 1;
i >=
ncell; --
i) {
463 nodevec[
i]->parent = nodevec[parent[
i]];
464 nodevec[
i]->mkhash();
465 nodevec[parent[
i]]->children.push_back(nodevec[
i]);
470 nodevec[
i]->mkhash();
502 nodevec[
i]->treenode_order =
order[
i]++;
504 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
518 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
546 for (
size_t i =
ncell;
i < nodevec.size(); ++
i) {
562 for (
size_t i =
ncell;
i < nodevec.size(); ++
i) {
596 for (
size_t i = begin;
i < end; ++
i) {
600 if (
i + diff < end) {
638 nwarp = nodevec[
ncell - 1]->groupindex + 1;
649 for (
size_t i = 0;
i < size_t(
ncell); ++
i) {
654 for (
size_t i =
size_t(
ncell);
i < nodevec.size(); ++
i) {
690 printf(
"warp rootbegin nodebegin stridedispl\n");
691 for (
int i = 0;
i <= nwarp; ++
i) {
TNode is the tree node that represents the tree of the compartments.
size_t groupindex
Cell ID that this compartment belongs to.
size_t nodevec_index
Total number of compartments from the current node and below.
size_t level
For cell permute 1 (Interleaved):
size_t treesize
Hash value generated by mkhash.
size_t hash
Hash algorith that generates a hash based on the hash of the children and the number of compartments ...
size_t cellindex
level of of this compartment in the tree
size_t treenode_order
index in nodevec that is set in check() In cell permute 2 this is set as Breadth First traversal
int nodeindex
Initialized index / groupsize.
static double order(void *v)
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
void * ecalloc_align(size_t n, size_t size, size_t alignment)
int interleave_permute_type
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
static size_t stride_length(size_t begin, size_t end, VecTNode &nodevec)
void group_order2(VecTNode &, size_t groupsize, size_t ncell)
Implementation of the advanced interleaving strategy (interleave_permute_type == 2)
size_t level_from_root(VecTNode &)
static void admin1(int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize)
size_t level_from_leaf(VecTNode &)
std::pair< TNode *, int > TNI
static void set_cellindex(int ncell, VecTNode &nodevec)
Set the cellindex to distinguish the different cells.
static void tree_analysis(int *parent, int nnode, int ncell, VecTNode &)
Perform tree preparation for interleaving strategies.
static void set_groupindex(VecTNode &nodevec)
Initialization of the groupindex (groups)
std::vector< int > node_order(int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl)
Function that returns a permutation of length nnode.
static void ident_statistic(VecTNode &nodevec, size_t ncell)
std::vector< TNI > TNIVec
static bool ptr_tnode_earlier(TNode *a, TNode *b)
static void admin2(int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stridedispl, int *&strides, int *&rootbegin, int *&nodebegin, int *&ncycles)
Prepare for solve_interleaved2.
size_t dist2child(TNode *nd)
std::map< size_t, std::pair< TNode *, int > > HashCnt
int int int int * stridedispl
static void quality(VecTNode &nodevec, size_t max=32)
int int int int int int * nodebegin
int int int int int * rootbegin
std::vector< TNode * > VecTNode
static bool interleave_comp(TNode *a, TNode *b)
static bool tnode_earlier(TNode *a, TNode *b)
Function to order trees by size, hash and nodeindex.
static void node_interleave_order(int ncell, VecTNode &)
Naive interleaving strategy (interleave_permute_type == 1)
static void check(VecTNode &)
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
int const size_t const size_t n
static double children(void *v)