![]() |
NEURON
|
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the return type is incomplete (from include/neuron/model_data.hpp). More...
Namespaces | |
cache | |
container | |
detail | |
extracellular | |
legacy | |
mechanism | |
oc | |
python | |
test | |
Classes | |
class | TNode |
TNode is the tree node that represents the tree of the compartments. More... | |
class | InterleaveInfo |
struct | unified_allocator |
C++ allocator that uses [de]allocate_unified. More... | |
struct | alloc_deleter |
Allocator-aware deleter for use with std::unique_ptr. More... | |
struct | Model |
Top-level structure. More... | |
struct | model_sorted_token |
class | unique_cstr |
A RAII wrapper for C-style strings. More... | |
struct | opaque_model_sorted_token |
Typedefs | |
using | VecTNode = std::vector< TNode * > |
using | TNI = std::pair< TNode *, int > |
using | HashCnt = std::map< size_t, std::pair< TNode *, int > > |
using | TNIVec = std::vector< TNI > |
using | VTN = VecTNode |
using | VVTN = std::vector< VTN > |
using | VVVTN = std::vector< VVTN > |
Functions | |
size_t | level_from_leaf (VecTNode &) |
size_t | level_from_root (VecTNode &) |
void | group_order2 (VecTNode &, size_t groupsize, size_t ncell) |
Implementation of the advanced interleaving strategy (interleave_permute_type == 2) More... | |
size_t | dist2child (TNode *nd) |
size_t | warp_balance (size_t ncell, VecTNode &nodevec) |
Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells. More... | |
bool | warpcmp (const TNode *a, const TNode *b) |
void | create_interleave_info () |
void | destroy_interleave_info () |
static void | print_quality2 (int iwarp, InterleaveInfo &ii, int *p) |
static void | print_quality1 (int iwarp, InterleaveInfo &ii, int ncell, int *p) |
static void | warp_balance (int ith, InterleaveInfo &ii) |
static void | prnode (const char *mes, NrnThread &nt) |
int | nrn_optimize_node_order (int type) |
Select node ordering for optimum gaussian elimination. More... | |
std::vector< int > | interleave_order (int ith, int ncell, int nnode, int *parent) |
Function that performs the permutation of the cells such that the execution threads access coalesced memory. More... | |
void | nrn_permute_node_order () |
Compute and carry out the permutation for interleave_permute_type. More... | |
static void | triang_interleaved (NrnThread *nt, int icell, int icellsize, int nstride, int *stride, int *lastnode) |
static void | bksub_interleaved (NrnThread *nt, int icell, int icellsize, int, int *stride, int *firstnode) |
nrn_pragma_acc (routine vector) static void solve_interleaved2_loop_body(NrnThread *nt | |
nrn_pragma_acc (loop seq) for( | |
if (ncell==0) | |
nrn_pragma_acc (parallel loop present(nt[0:1], stride[0:nstride], firstnode[0:ncell], lastnode[0:ncell], cellsize[0:ncell]) if(nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(nt -> compute_gpu)) for(int icell=0 | |
std::vector< int > | node_order (int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl) |
Function that returns a permutation of length nnode. More... | |
template<typename T > | |
void | copy_array (T *&dest, T *src, size_t n) |
template<typename T > | |
void | copy_align_array (T *&dest, T *src, size_t n) |
static bool | tnode_earlier (TNode *a, TNode *b) |
Function to order trees by size, hash and nodeindex. More... | |
static bool | ptr_tnode_earlier (TNode *a, TNode *b) |
static void | tree_analysis (int *parent, int nnode, int ncell, VecTNode &nodevec) |
Perform tree preparation for interleaving strategies. More... | |
static void | node_interleave_order (int ncell, VecTNode &nodevec) |
Naive interleaving strategy (interleave_permute_type == 1) More... | |
static void | admin1 (int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize) |
static void | admin2 (int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stridedispl, int *&strides, int *&rootbegin, int *&nodebegin, int *&ncycles) |
Prepare for solve_interleaved2. More... | |
static void | check (VecTNode &) |
static void | quality (VecTNode &nodevec, size_t max=32) |
static void | set_cellindex (int ncell, VecTNode &nodevec) |
Set the cellindex to distinguish the different cells. More... | |
static void | set_groupindex (VecTNode &nodevec) |
Initialization of the groupindex (groups) More... | |
static void | ident_statistic (VecTNode &nodevec, size_t ncell) |
static bool | interleave_comp (TNode *a, TNode *b) |
static size_t | stride_length (size_t begin, size_t end, VecTNode &nodevec) |
void | chklevel (VTN &level, size_t nident=8) |
static bool | sortlevel_cmp (TNode *a, TNode *b) |
static void | sortlevel (VTN &level) |
static void | set_treenode_order (VVTN &levels) |
static bool | is_parent_race2 (TNode *nd) |
static bool | is_child_race2 (TNode *nd) |
template<typename T > | |
static void | move_range (size_t start, size_t length, size_t dst, std::vector< T > &v) |
static void | move_nodes (size_t start, size_t length, size_t dst, VTN &nodes) |
static size_t | next_leaf (TNode *nd, VTN &nodes) |
static void | checkrace (TNode *nd, VTN &nodes) |
static bool | eliminate_race (TNode *nd, size_t d, VTN &nodes, TNode *look) |
static void | eliminate_prace (TNode *nd, VTN &nodes) |
static void | eliminate_crace (TNode *nd, VTN &nodes) |
static void | question2 (VVTN &levels) |
static void | analyze (VVTN &levels) |
void | prgroupsize (VVVTN &groups) |
static bool | final_nodevec_cmp (TNode *a, TNode *b) |
static void | set_nodeindex (VecTNode &nodevec) |
static int | nrn_soa_padded_size (int cnt, int layout) |
static int | nrn_i_layout (int icnt, int cnt, int isz, int sz, int layout) |
template<typename T > | |
void | permute (T *data, int cnt, int sz, int layout, int *p) |
int * | inverse_permute (int *p, int n) |
static void | invert_permute (int *p, int n) |
void | update_parent_index (int *vec, int vec_size, const std::vector< int > &permute) |
void | permute_ptr (int *vec, int n, int *p) |
void | permute_data (double *vec, int n, int *p) |
static bool | nrn_index_sort_cmp (const std::pair< int, int > &a, const std::pair< int, int > &b) |
std::vector< int > | nrn_index_sort (int *values, int n) |
void | sort_ml (Memb_list *ml) |
void | permute_nodeindices (Memb_list *ml, int *permute) |
void | permute_ml (Memb_list *ml, int type, NrnThread &nt) |
int | type_of_ntdata (NrnThread &, int index, bool reset) |
bool | gpu_enabled () |
Check if GPU support is enabled. More... | |
void * | allocate_unified (std::size_t num_bytes) |
Allocate unified memory in GPU builds iff GPU enabled, otherwise new. More... | |
void | deallocate_unified (void *ptr, std::size_t num_bytes) |
Deallocate memory allocated by allocate_unified . More... | |
template<typename T , typename U > | |
bool | operator== (unified_allocator< T > const &, unified_allocator< U > const &) noexcept |
template<typename T , typename U > | |
bool | operator!= (unified_allocator< T > const &x, unified_allocator< U > const &y) noexcept |
template<typename T , typename Alloc , typename... Args> | |
auto | allocate_unique (const Alloc &alloc, Args &&... args) |
Model & | model () |
Access the global Model instance. More... | |
void | solve_interleaved (int ith) |
Solve the Hines matrices based on the interleave_permute_type (1 or 2). More... | |
template<std::size_t N, typename... Args> | |
int | Sprintf (char(&buf)[N], const char *fmt, Args &&... args) |
Redirect sprintf to snprintf if the buffer size can be deduced. More... | |
template<std::size_t N, typename... Args> | |
void | SprintfAsrt (char(&buf)[N], const char *fmt, Args &&... args) |
assert if the Sprintf format data does not fit into buf More... | |
Variables | |
int | cellorder_nwarp = 0 |
int | interleave_permute_type |
InterleaveInfo * | interleave_info |
int | icore |
int int * | ncycles |
int int int * | strides |
int int int int * | stridedispl |
int int int int int * | rootbegin |
int int int int int int * | nodebegin |
auto *const | vec_b = nt->node_b_storage() |
auto *const | vec_d = nt->node_d_storage() |
auto *const | vec_rhs = nt->node_rhs_storage() |
int | iwarp = icore / warpsize |
int | ic = icore & (warpsize - 1) |
int | ncycle = ncycles[iwarp] |
int * | stride = strides + stridedispl[iwarp] |
int | root = rootbegin[iwarp] |
int | lastroot = rootbegin[iwarp + 1] |
int | firstnode = nodebegin[iwarp] |
int | lastnode = nodebegin[iwarp + 1] |
int | istride = stride[icycle] |
int | ii = lastnode - istride + ic |
bool | has_subtrees_to_compute = true |
auto const | bksub_root = root + ic |
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> | ncell |
int | nstride = ii.nstride |
int * | cellsize = ii.cellsize |
static size_t | groupsize = 32 |
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the return type is incomplete (from include/neuron/model_data.hpp).
And we do not want to fix by installing more *.hpp files in the include/neuron directory because of potential ABI incompatibility (anything with std::string anywhere in it). The work around is to provide an extra layer of indirection via unique_ptr so the opaque token has a definite size (one pointer) and declaration.
The "trick" is just that you have to make sure the parts of the opaque token that need the definition of the non-opaque token are defined in the right place. That's why the constructor and destructor are defined in fadvance.cpp
Instead, use auto const token = nrn_ensure_model_data_are_sorted_opaque(); This file is already included in all translated mod files.
using neuron::HashCnt = typedef std::map<size_t, std::pair<TNode*, int> > |
Definition at line 118 of file cellorder1.cpp.
using neuron::TNI = typedef std::pair<TNode*, int> |
Definition at line 117 of file cellorder1.cpp.
using neuron::TNIVec = typedef std::vector<TNI> |
Definition at line 119 of file cellorder1.cpp.
using neuron::VecTNode = typedef std::vector<TNode*> |
using neuron::VTN = typedef VecTNode |
Definition at line 36 of file cellorder2.cpp.
using neuron::VVTN = typedef std::vector<VTN> |
Definition at line 37 of file cellorder2.cpp.
using neuron::VVVTN = typedef std::vector<VVTN> |
Definition at line 38 of file cellorder2.cpp.
|
static |
Definition at line 525 of file cellorder1.cpp.
|
static |
Prepare for solve_interleaved2.
One group of cells per warp.
warp[i] has a number of compute cycles (ncycle[i]) the index of its first root (rootbegin[i], last rootbegin[nwarp] = ncell) the index of its first node (nodebegin[i], last nodebegin[nwarp] = nnode)
Each compute cycle has a stride A stride is how many nodes are processed by a warp in one compute cycle There are nstride strides. nstride is the sum of ncycles of all warps. warp[i] has ncycle[i] strides same as sum of ncycle warp[i] has a stridedispl[i] which is stridedispl[i-1] + ncycle[i]. ie. The zeroth cycle of warp[j] works on stride[stridedispl[j]] The value of a stride beginning at node i (node i is computed by core 0 of some warp for some cycle) is determined by stride_length(i, j, nodevec)
Definition at line 627 of file cellorder1.cpp.
void* neuron::allocate_unified | ( | std::size_t | num_bytes | ) |
Allocate unified memory in GPU builds iff GPU enabled, otherwise new.
auto neuron::allocate_unique | ( | const Alloc & | alloc, |
Args &&... | args | ||
) |
|
static |
Definition at line 416 of file cellorder2.cpp.
|
static |
Definition at line 579 of file cellorder.cpp.
|
static |
Check that the first compartments of nodevec are the root nodes (cells)
Check that the first compartments of nodevec are the root nodes (cells)
Definition at line 401 of file cellorder1.cpp.
Definition at line 276 of file cellorder2.cpp.
void neuron::chklevel | ( | VTN & | level, |
size_t | nident = 8 |
||
) |
Definition at line 41 of file cellorder2.cpp.
void neuron::copy_align_array | ( | T *& | dest, |
T * | src, | ||
size_t | n | ||
) |
Definition at line 136 of file cellorder.hpp.
void neuron::copy_array | ( | T *& | dest, |
T * | src, | ||
size_t | n | ||
) |
Definition at line 129 of file cellorder.hpp.
void neuron::create_interleave_info | ( | ) |
Definition at line 110 of file cellorder.cpp.
void neuron::deallocate_unified | ( | void * | ptr, |
std::size_t | num_bytes | ||
) |
Deallocate memory allocated by allocate_unified
.
void neuron::destroy_interleave_info | ( | ) |
Definition at line 115 of file cellorder.cpp.
size_t neuron::dist2child | ( | TNode * | nd | ) |
Definition at line 164 of file cellorder2.cpp.
Definition at line 320 of file cellorder2.cpp.
Definition at line 307 of file cellorder2.cpp.
Definition at line 284 of file cellorder2.cpp.
Definition at line 453 of file cellorder2.cpp.
bool neuron::gpu_enabled | ( | ) |
Check if GPU support is enabled.
This returns true if GPU support was enabled at compile time and at runtime via coreneuron.gpu = True and/or –gpu, otherwise it returns false.
void neuron::group_order2 | ( | VecTNode & | nodevec, |
size_t | groupsize, | ||
size_t | ncell | ||
) |
Implementation of the advanced interleaving strategy (interleave_permute_type == 2)
The main steps are the following:
Definition at line 471 of file cellorder2.cpp.
|
static |
Definition at line 294 of file cellorder1.cpp.
neuron::if | ( | ncell | = = 0 | ) |
Definition at line 785 of file cellorder.cpp.
Definition at line 477 of file cellorder1.cpp.
std::vector< int > neuron::interleave_order | ( | int | ith, |
int | ncell, | ||
int | nnode, | ||
int * | parent | ||
) |
Function that performs the permutation of the cells such that the execution threads access coalesced memory.
ith | NrnThread to access |
ncell | number of cells in NrnThread |
nnode | number of compartments in the ncells |
parent | parent indices of cells |
Definition at line 348 of file cellorder.cpp.
int * neuron::inverse_permute | ( | int * | p, |
int | n | ||
) |
Definition at line 159 of file node_permute.cpp.
|
static |
Definition at line 167 of file node_permute.cpp.
|
static |
Definition at line 143 of file cellorder2.cpp.
|
static |
Definition at line 113 of file cellorder2.cpp.
size_t neuron::level_from_leaf | ( | VecTNode & | nodevec | ) |
Definition at line 236 of file cellorder1.cpp.
size_t neuron::level_from_root | ( | VecTNode & | nodevec | ) |
Definition at line 221 of file cellorder1.cpp.
|
inline |
Access the global Model instance.
Just to be going on with. Needs more thought about who actually holds/owns the structures that own the SOA data. Could use a static local if we need to control/defer when this is constructed.
Definition at line 206 of file model_data.hpp.
|
static |
Definition at line 192 of file cellorder2.cpp.
|
static |
Definition at line 178 of file cellorder2.cpp.
Definition at line 265 of file cellorder2.cpp.
|
static |
Naive interleaving strategy (interleave_permute_type == 1)
Sort so nodevec[ncell:nnode] cell instances are interleaved. Keep the secondary ordering with respect to treenode_order so each cell is still a tree.
ncell | number of cells (trees) |
nodevec | vector that contains compartments (nodes of the trees) |
Definition at line 498 of file cellorder1.cpp.
std::vector< int > neuron::node_order | ( | int | ncell, |
int | nnode, | ||
int * | parents, | ||
int & | nwarp, | ||
int & | nstride, | ||
int *& | stride, | ||
int *& | firstnode, | ||
int *& | lastnode, | ||
int *& | cellsize, | ||
int *& | stridedispl | ||
) |
Function that returns a permutation of length nnode.
There are two permutation strategies: For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix is solved by multiple execution threads (with coalesced memory access as well)
ncell | number of cells |
nnode | number of compartments in the ncells |
parents | parent indices of the cells |
nwarp | number of warps |
nstride | nstride is the maximum cell size (not counting root) |
stride | stride[i] is the number of cells with an ith node: using stride[i] we know how many positions to move in order to access the next element of the same cell (given that the cells are ordered with the treenode_order). |
firstnode | firstnode[i] is the index of the first nonroot node of the cell |
lastnode | lastnode[i] is the index of the last node of the cell |
cellsize | cellsize is the number of nodes in the cell not counting root. |
stridedispl |
Definition at line 321 of file cellorder1.cpp.
|
static |
Definition at line 115 of file node_permute.cpp.
std::vector<int> neuron::nrn_index_sort | ( | int * | values, |
int | n | ||
) |
Definition at line 445 of file node_permute.cpp.
|
static |
Definition at line 430 of file node_permute.cpp.
void neuron::nrn_optimize_node_order | ( | int | type | ) |
Select node ordering for optimum gaussian elimination.
type | 0 cell together (Section construction order) 1 Interleave, identical cells warp adjacent 2 Depth order, optimize adjacent nodes to have adjacent parents. |
Definition at line 336 of file cellorder.cpp.
void neuron::nrn_permute_node_order | ( | ) |
Compute and carry out the permutation for interleave_permute_type.
Definition at line 425 of file cellorder.cpp.
neuron::nrn_pragma_acc | ( | loop | seq | ) |
|
pure virtual |
neuron::nrn_pragma_acc | ( | routine | vector | ) |
|
static |
Definition at line 111 of file node_permute.cpp.
|
noexcept |
|
noexcept |
void neuron::permute | ( | T * | data, |
int | cnt, | ||
int | sz, | ||
int | layout, | ||
int * | p | ||
) |
Definition at line 124 of file node_permute.cpp.
void neuron::permute_data | ( | double * | vec, |
int | n, | ||
int * | p | ||
) |
Definition at line 392 of file node_permute.cpp.
void neuron::permute_nodeindices | ( | Memb_list * | ml, |
int * | permute | ||
) |
void neuron::permute_ptr | ( | int * | vec, |
int | n, | ||
int * | p | ||
) |
Definition at line 388 of file node_permute.cpp.
void neuron::prgroupsize | ( | VVVTN & | groups | ) |
Definition at line 440 of file cellorder2.cpp.
|
static |
Definition at line 193 of file cellorder.cpp.
|
static |
Definition at line 124 of file cellorder.cpp.
|
static |
Definition at line 307 of file cellorder.cpp.
Definition at line 62 of file cellorder1.cpp.
|
static |
Definition at line 128 of file cellorder1.cpp.
|
static |
Definition at line 336 of file cellorder2.cpp.
|
static |
Set the cellindex to distinguish the different cells.
Definition at line 260 of file cellorder1.cpp.
|
static |
Initialization of the groupindex (groups)
The cells are groupped at a later stage based on a load balancing algorithm. This is just an initialization function.
Definition at line 279 of file cellorder1.cpp.
|
static |
Definition at line 465 of file cellorder2.cpp.
|
static |
Definition at line 84 of file cellorder2.cpp.
void neuron::solve_interleaved | ( | int | ith | ) |
Solve the Hines matrices based on the interleave_permute_type (1 or 2).
For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix is solved by multiple execution threads (with coalesced memory access as well)
void neuron::sort_ml | ( | Memb_list * | ml | ) |
Definition at line 465 of file node_permute.cpp.
|
static |
Definition at line 75 of file cellorder2.cpp.
Definition at line 48 of file cellorder2.cpp.
int neuron::Sprintf | ( | char(&) | buf[N], |
const char * | fmt, | ||
Args &&... | args | ||
) |
Redirect sprintf to snprintf if the buffer size can be deduced.
This is useful to avoid deprecation warnings for sprintf. In general it works if the buffer is something like char buf[512] in the calling scope, but not if it is char* or char buf[].
Definition at line 14 of file wrap_sprintf.h.
void neuron::SprintfAsrt | ( | char(&) | buf[N], |
const char * | fmt, | ||
Args &&... | args | ||
) |
assert if the Sprintf format data does not fit into buf
Definition at line 27 of file wrap_sprintf.h.
|
static |
Definition at line 589 of file cellorder1.cpp.
Function to order trees by size, hash and nodeindex.
Definition at line 48 of file cellorder1.cpp.
|
static |
Perform tree preparation for interleaving strategies.
parent | vector of parent indices |
nnode | number of compartments in the cells |
ncell | number of cells |
Definition at line 454 of file cellorder1.cpp.
|
static |
Definition at line 550 of file cellorder.cpp.
int neuron::type_of_ntdata | ( | NrnThread & | , |
int | index, | ||
bool | reset | ||
) |
void neuron::update_parent_index | ( | int * | vec, |
int | vec_size, | ||
const std::vector< int > & | permute | ||
) |
Definition at line 378 of file node_permute.cpp.
|
static |
Definition at line 259 of file cellorder.cpp.
size_t neuron::warp_balance | ( | size_t | ncell, |
VecTNode & | nodevec | ||
) |
Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells.
Competing objectives are to keep identical cells together and also balance warps.
ncell | number of cells |
nodevec | vector of compartments from all cells |
Definition at line 52 of file balance.cpp.
Definition at line 36 of file balance.cpp.
Definition at line 667 of file cellorder.cpp.
int neuron::cellorder_nwarp = 0 |
Definition at line 33 of file balance.cpp.
int* neuron::cellsize = ii.cellsize |
Definition at line 793 of file cellorder.cpp.
Definition at line 624 of file cellorder.cpp.
|
static |
Definition at line 43 of file cellorder1.cpp.
neuron::has_subtrees_to_compute = true |
Definition at line 634 of file cellorder.cpp.
Definition at line 619 of file cellorder.cpp.
int neuron::icore |
Definition at line 606 of file cellorder.cpp.
InterleaveInfo & neuron::ii = lastnode - istride + ic |
Definition at line 631 of file cellorder.cpp.
InterleaveInfo* neuron::interleave_info |
Definition at line 43 of file cellorder.cpp.
int neuron::interleave_permute_type |
Definition at line 42 of file cellorder.cpp.
neuron::istride = stride[icycle] |
Definition at line 630 of file cellorder.cpp.
Definition at line 618 of file cellorder.cpp.
Definition at line 625 of file cellorder.cpp.
Definition at line 623 of file cellorder.cpp.
icycle< ncycle; ++icycle) { int istride = stride[icycle]; nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for (int icore = 0; icore < warpsize; ++icore) { int i = ii + icore; if (icore < istride) { int ip = GPU_PARENT(i); GPU_RHS(i) -= GPU_B(i) * GPU_RHS(ip); GPU_RHS(i) /= GPU_D(i); } i += istride; } ii += istride; } }}void solve_interleaved2(int ith) { NrnThread* nt = nrn_threads + ith; InterleaveInfo& ii = interleave_info[ith]; int nwarp = ii.nwarp; if (nwarp == 0) return; int ncore = nwarp * warpsize; int* ncycles = ii.cellsize; int* stridedispl = ii.stridedispl; int* strides = ii.stride; int* rootbegin = ii.firstnode; int* nodebegin = ii.lastnode; if (0) { nrn_pragma_acc(parallel loop gang present(nt [0:1], strides [0:nstride], ncycles [0:nwarp], stridedispl [0:nwarp + 1], rootbegin [0:nwarp + 1], nodebegin [0:nwarp + 1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc: nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp + 1], rootbegin[:nwarp + 1], nodebegin[:nwarp + 1])) for (int icore = 0; icore < ncore; icore += warpsize) { solve_interleaved2_loop_body( nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin); } nrn_pragma_acc(wait(nt->stream_id)) } else { for (int icore = 0; icore < ncore; icore += warpsize) { solve_interleaved2_loop_body( nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin); } }}void solve_interleaved1(int ith) { NrnThread* nt = nrn_threads + ith; int ncell = nt-> neuron::ncell |
Definition at line 784 of file cellorder.cpp.
Definition at line 620 of file cellorder.cpp.
int int* neuron::ncycles |
Definition at line 607 of file cellorder.cpp.
int int int int int int* neuron::nodebegin |
Definition at line 611 of file cellorder.cpp.
int neuron::nstride = ii.nstride |
Definition at line 789 of file cellorder.cpp.
Definition at line 622 of file cellorder.cpp.
int int int int int* neuron::rootbegin |
Definition at line 610 of file cellorder.cpp.
int * neuron::stride = strides + stridedispl[iwarp] |
Definition at line 621 of file cellorder.cpp.
int int int int* neuron::stridedispl |
Definition at line 609 of file cellorder.cpp.
int int int* neuron::strides |
Definition at line 608 of file cellorder.cpp.
auto* const neuron::vec_b = nt->node_b_storage() |
Definition at line 614 of file cellorder.cpp.
auto* const neuron::vec_d = nt->node_d_storage() |
Definition at line 615 of file cellorder.cpp.
auto* const neuron::vec_rhs = nt->node_rhs_storage() |
Definition at line 616 of file cellorder.cpp.