![]() |
NEURON
|
#include <set>
#include <vector>
#include "nrnoc/multicore.h"
#include "nrnoc/nrn_ansi.h"
#include "nrnoc/nrniv_mf.h"
#include "nrnoc/section.h"
#include "oc/nrnassrt.h"
#include "node_order_optim/permute_utils.hpp"
#include "coreneuron/permute/cellorder.hpp"
#include "coreneuron/network/tnode.hpp"
#include "coreneuron/utils/lpt.hpp"
#include "coreneuron/utils/memory.h"
#include "coreneuron/utils/offload.hpp"
#include "coreneuron/permute/node_permute.h"
Go to the source code of this file.
Namespaces | |
neuron | |
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the return type is incomplete (from include/neuron/model_data.hpp). | |
Macros | |
#define | GPU_V(i) vec_v[i] |
#define | GPU_A(i) vec_a[i] |
#define | GPU_B(i) vec_b[i] |
#define | GPU_D(i) vec_d[i] |
#define | GPU_RHS(i) vec_rhs[i] |
#define | GPU_PARENT(i) nt->_v_parent_index[i] |
Functions | |
void | neuron::create_interleave_info () |
void | neuron::destroy_interleave_info () |
static void | neuron::print_quality2 (int iwarp, InterleaveInfo &ii, int *p) |
static void | neuron::print_quality1 (int iwarp, InterleaveInfo &ii, int ncell, int *p) |
static void | neuron::warp_balance (int ith, InterleaveInfo &ii) |
static void | neuron::prnode (const char *mes, NrnThread &nt) |
int | neuron::nrn_optimize_node_order (int type) |
Select node ordering for optimum gaussian elimination. More... | |
std::vector< int > | neuron::interleave_order (int ith, int ncell, int nnode, int *parent) |
Function that performs the permutation of the cells such that the execution threads access coalesced memory. More... | |
void | neuron::nrn_permute_node_order () |
Compute and carry out the permutation for interleave_permute_type. More... | |
static void | neuron::triang_interleaved (NrnThread *nt, int icell, int icellsize, int nstride, int *stride, int *lastnode) |
static void | neuron::bksub_interleaved (NrnThread *nt, int icell, int icellsize, int, int *stride, int *firstnode) |
neuron::nrn_pragma_acc (routine vector) static void solve_interleaved2_loop_body(NrnThread *nt | |
neuron::nrn_pragma_acc (loop seq) for( | |
neuron::if (ncell==0) | |
neuron::nrn_pragma_acc (parallel loop present(nt[0:1], stride[0:nstride], firstnode[0:ncell], lastnode[0:ncell], cellsize[0:ncell]) if(nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(nt -> compute_gpu)) for(int icell=0 | |
Variables | |
int | neuron::interleave_permute_type |
InterleaveInfo * | neuron::interleave_info |
int | neuron::icore |
int int * | neuron::ncycles |
int int int * | neuron::strides |
int int int int * | neuron::stridedispl |
int int int int int * | neuron::rootbegin |
int int int int int int * | neuron::nodebegin |
auto *const | neuron::vec_b = nt->node_b_storage() |
auto *const | neuron::vec_d = nt->node_d_storage() |
auto *const | neuron::vec_rhs = nt->node_rhs_storage() |
int | neuron::iwarp = icore / warpsize |
int | neuron::ic = icore & (warpsize - 1) |
int | neuron::ncycle = ncycles[iwarp] |
int * | neuron::stride = strides + stridedispl[iwarp] |
int | neuron::root = rootbegin[iwarp] |
int | neuron::lastroot = rootbegin[iwarp + 1] |
int | neuron::firstnode = nodebegin[iwarp] |
int | neuron::lastnode = nodebegin[iwarp + 1] |
int | neuron::istride = stride[icycle] |
int | neuron::ii = lastnode - istride + ic |
bool | neuron::has_subtrees_to_compute = true |
auto const | neuron::bksub_root = root + ic |
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> | neuron::ncell |
int | neuron::nstride = ii.nstride |
int * | neuron::cellsize = ii.cellsize |
Definition at line 539 of file cellorder.cpp.
Definition at line 540 of file cellorder.cpp.
Definition at line 541 of file cellorder.cpp.
Definition at line 544 of file cellorder.cpp.
Definition at line 542 of file cellorder.cpp.
Definition at line 538 of file cellorder.cpp.