61 void solve_interleaved2_launcher(
NrnThread* nt, InterleaveInfo*
info,
int ncore,
void* stream);
79 size_t* nnode =
nullptr;
81 size_t* idle =
nullptr;
82 size_t* cache_access =
nullptr;
83 size_t* child_race =
nullptr;
128 template <
typename T>
131 std::copy(src, src +
n, dest);
135 template <
typename T>
138 std::copy(src, src +
n, dest);
141 #ifndef INTERLEAVE_DEBUG
142 #define INTERLEAVE_DEBUG 0
146 void mk_cell_indices();
for gpu builds with unified memory support
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
void * ecalloc_align(size_t n, size_t size, size_t alignment)
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
std::vector< int > interleave_order(int ith, int ncell, int nnode, int *parent)
Function that performs the permutation of the cells such that the execution threads access coalesced ...
void copy_array(T *&dest, T *src, size_t n)
std::vector< int > node_order(int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl)
Function that returns a permutation of length nnode.
void copy_align_array(T *&dest, T *src, size_t n)
void create_interleave_info()
void solve_interleaved(int ith)
Solve the Hines matrices based on the interleave_permute_type (1 or 2).
int int int int * stridedispl
void destroy_interleave_info()
int const size_t const size_t n
Represent main neuron object computed by single thread.