NEURON
cellorder.hpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 
9 #pragma once
10 
12 #include <algorithm>
13 #include <vector>
14 
15 #if CORENRN_BUILD
16 namespace coreneuron {
17 #else
18 namespace neuron {
19 #endif
20 
21 /**
22  * \brief Function that performs the permutation of the cells such that the
23  * execution threads access coalesced memory.
24  *
25  * \param ith NrnThread to access
26  * \param ncell number of cells in NrnThread
27  * \param nnode number of compartments in the ncells
28  * \param parent parent indices of cells
29  *
30  * \return int* order, interleaved order of the cells
31  */
32 #if CORENRN_BUILD
33 int* interleave_order(int ith, int ncell, int nnode, int* parent);
34 #else
35 std::vector<int> interleave_order(int ith, int ncell, int nnode, int* parent);
36 #endif
37 
40 
41 #if CORENRN_BUILD
42 /**
43  *
44  * \brief Solve the Hines matrices based on the interleave_permute_type (1 or 2).
45  *
46  * For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one
47  * Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix
48  * is solved by multiple execution threads (with coalesced memory access as well)
49  */
50 extern void solve_interleaved(int ith);
51 #endif
52 
53 class InterleaveInfo; // forward declaration
54 #if CORENRN_BUILD
55 /**
56  *
57  * \brief CUDA branch of the solve_interleaved with interleave_permute_type == 2.
58  *
59  * This branch is activated in runtime with the --cuda-interface CLI flag
60  */
61 void solve_interleaved2_launcher(NrnThread* nt, InterleaveInfo* info, int ncore, void* stream);
62 #endif
63 
65  public:
66  InterleaveInfo() = default;
68  InterleaveInfo& operator=(const InterleaveInfo&);
69  ~InterleaveInfo();
70  int nwarp = 0; // used only by interleave2
71  int nstride = 0;
72  int* stridedispl = nullptr; // interleave2: nwarp+1
73  int* stride = nullptr; // interleave2: stride length is stridedispl[nwarp]
74  int* firstnode = nullptr; // interleave2: rootbegin nwarp+1 displacements
75  int* lastnode = nullptr; // interleave2: nodebegin nwarp+1 displacements
76  int* cellsize = nullptr; // interleave2: ncycles nwarp
77 
78  // statistics (nwarp of each)
79  size_t* nnode = nullptr;
80  size_t* ncycle = nullptr;
81  size_t* idle = nullptr;
82  size_t* cache_access = nullptr;
83  size_t* child_race = nullptr;
84 
85  private:
86  void swap(InterleaveInfo& info);
87 };
88 
89 /**
90  * \brief Function that returns a permutation of length nnode.
91  *
92  * There are two permutation strategies:
93  * For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one
94  * Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix
95  * is solved by multiple execution threads (with coalesced memory access as well)
96  *
97  * \param ncell number of cells
98  * \param nnode number of compartments in the ncells
99  * \param parents parent indices of the cells
100  * \param nwarp number of warps
101  * \param nstride nstride is the maximum cell size (not counting root)
102  * \param stride stride[i] is the number of cells with an ith node:
103  * using stride[i] we know how many positions to move in order to
104  * access the next element of the same cell (given that the cells are
105  * ordered with the treenode_order).
106  * \param firstnode firstnode[i] is the index of the first nonroot node of the cell
107  * \param lastnode lastnode[i] is the index of the last node of the cell
108  * \param cellsize cellsize is the number of nodes in the cell not counting root.
109  * \param stridedispl
110  * \return int* : a permutation of length nnode
111  */
112 #if CORENRN_BUILD
113 int* node_order(int ncell,
114 #else
115 std::vector<int> node_order(int ncell,
116 #endif
117  int nnode,
118  int* parents,
119  int& nwarp,
120  int& nstride,
121  int*& stride,
122  int*& firstnode,
123  int*& lastnode,
124  int*& cellsize,
125  int*& stridedispl);
126 
127 // copy src array to dest with new allocation
128 template <typename T>
129 void copy_array(T*& dest, T* src, size_t n) {
130  dest = new T[n];
131  std::copy(src, src + n, dest);
132 }
133 
134 // copy src array to dest with NRN_SOA_BYTE_ALIGN ecalloc_align allocation
135 template <typename T>
136 void copy_align_array(T*& dest, T* src, size_t n) {
137  dest = static_cast<T*>(ecalloc_align(n, sizeof(T)));
138  std::copy(src, src + n, dest);
139 }
140 
141 #ifndef INTERLEAVE_DEBUG
142 #define INTERLEAVE_DEBUG 0
143 #endif
144 
145 #if INTERLEAVE_DEBUG
146 void mk_cell_indices();
147 #endif
148 } // namespace coreneuron
for gpu builds with unified memory support
Definition: memory.h:181
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
void * ecalloc_align(size_t n, size_t size, size_t alignment)
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
Definition: tnode.hpp:17
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
Definition: cellorder.cpp:784
int ncycle
Definition: cellorder.cpp:620
int firstnode
Definition: cellorder.cpp:624
std::vector< int > interleave_order(int ith, int ncell, int nnode, int *parent)
Function that performs the permutation of the cells such that the execution threads access coalesced ...
Definition: cellorder.cpp:348
int nstride
Definition: cellorder.cpp:789
void copy_array(T *&dest, T *src, size_t n)
Definition: cellorder.hpp:129
std::vector< int > node_order(int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl)
Function that returns a permutation of length nnode.
Definition: cellorder1.cpp:321
void copy_align_array(T *&dest, T *src, size_t n)
Definition: cellorder.hpp:136
int * stride
Definition: cellorder.cpp:621
void create_interleave_info()
Definition: cellorder.cpp:110
int lastnode
Definition: cellorder.cpp:625
void solve_interleaved(int ith)
Solve the Hines matrices based on the interleave_permute_type (1 or 2).
int int int int * stridedispl
Definition: cellorder.cpp:609
int * cellsize
Definition: cellorder.cpp:793
void destroy_interleave_info()
Definition: cellorder.cpp:115
static List * info
int const size_t const size_t n
Definition: nrngsl.h:10
Represent main neuron object computed by single thread.
Definition: multicore.h:58