NEURON
neuron Namespace Reference

In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the return type is incomplete (from include/neuron/model_data.hpp). More...

Namespaces

 cache
 
 container
 
 detail
 
 extracellular
 
 legacy
 
 mechanism
 
 oc
 
 python
 
 test
 

Classes

class  TNode
 TNode is the tree node that represents the tree of the compartments. More...
 
class  InterleaveInfo
 
struct  unified_allocator
 C++ allocator that uses [de]allocate_unified. More...
 
struct  alloc_deleter
 Allocator-aware deleter for use with std::unique_ptr. More...
 
struct  Model
 Top-level structure. More...
 
struct  model_sorted_token
 
class  unique_cstr
 A RAII wrapper for C-style strings. More...
 
struct  opaque_model_sorted_token
 

Typedefs

using VecTNode = std::vector< TNode * >
 
using TNI = std::pair< TNode *, int >
 
using HashCnt = std::map< size_t, std::pair< TNode *, int > >
 
using TNIVec = std::vector< TNI >
 
using VTN = VecTNode
 
using VVTN = std::vector< VTN >
 
using VVVTN = std::vector< VVTN >
 

Functions

size_t level_from_leaf (VecTNode &)
 
size_t level_from_root (VecTNode &)
 
void group_order2 (VecTNode &, size_t groupsize, size_t ncell)
 Implementation of the advanced interleaving strategy (interleave_permute_type == 2) More...
 
size_t dist2child (TNode *nd)
 
size_t warp_balance (size_t ncell, VecTNode &nodevec)
 Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells. More...
 
bool warpcmp (const TNode *a, const TNode *b)
 
void create_interleave_info ()
 
void destroy_interleave_info ()
 
static void print_quality2 (int iwarp, InterleaveInfo &ii, int *p)
 
static void print_quality1 (int iwarp, InterleaveInfo &ii, int ncell, int *p)
 
static void warp_balance (int ith, InterleaveInfo &ii)
 
static void prnode (const char *mes, NrnThread &nt)
 
int nrn_optimize_node_order (int type)
 Select node ordering for optimum gaussian elimination. More...
 
std::vector< int > interleave_order (int ith, int ncell, int nnode, int *parent)
 Function that performs the permutation of the cells such that the execution threads access coalesced memory. More...
 
void nrn_permute_node_order ()
 Compute and carry out the permutation for interleave_permute_type. More...
 
static void triang_interleaved (NrnThread *nt, int icell, int icellsize, int nstride, int *stride, int *lastnode)
 
static void bksub_interleaved (NrnThread *nt, int icell, int icellsize, int, int *stride, int *firstnode)
 
 nrn_pragma_acc (routine vector) static void solve_interleaved2_loop_body(NrnThread *nt
 
 nrn_pragma_acc (loop seq) for(
 
 if (ncell==0)
 
 nrn_pragma_acc (parallel loop present(nt[0:1], stride[0:nstride], firstnode[0:ncell], lastnode[0:ncell], cellsize[0:ncell]) if(nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(nt -> compute_gpu)) for(int icell=0
 
std::vector< int > node_order (int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl)
 Function that returns a permutation of length nnode. More...
 
template<typename T >
void copy_array (T *&dest, T *src, size_t n)
 
template<typename T >
void copy_align_array (T *&dest, T *src, size_t n)
 
static bool tnode_earlier (TNode *a, TNode *b)
 Function to order trees by size, hash and nodeindex. More...
 
static bool ptr_tnode_earlier (TNode *a, TNode *b)
 
static void tree_analysis (int *parent, int nnode, int ncell, VecTNode &nodevec)
 Perform tree preparation for interleaving strategies. More...
 
static void node_interleave_order (int ncell, VecTNode &nodevec)
 Naive interleaving strategy (interleave_permute_type == 1) More...
 
static void admin1 (int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize)
 
static void admin2 (int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stridedispl, int *&strides, int *&rootbegin, int *&nodebegin, int *&ncycles)
 Prepare for solve_interleaved2. More...
 
static void check (VecTNode &)
 
static void quality (VecTNode &nodevec, size_t max=32)
 
static void set_cellindex (int ncell, VecTNode &nodevec)
 Set the cellindex to distinguish the different cells. More...
 
static void set_groupindex (VecTNode &nodevec)
 Initialization of the groupindex (groups) More...
 
static void ident_statistic (VecTNode &nodevec, size_t ncell)
 
static bool interleave_comp (TNode *a, TNode *b)
 
static size_t stride_length (size_t begin, size_t end, VecTNode &nodevec)
 
void chklevel (VTN &level, size_t nident=8)
 
static bool sortlevel_cmp (TNode *a, TNode *b)
 
static void sortlevel (VTN &level)
 
static void set_treenode_order (VVTN &levels)
 
static bool is_parent_race2 (TNode *nd)
 
static bool is_child_race2 (TNode *nd)
 
template<typename T >
static void move_range (size_t start, size_t length, size_t dst, std::vector< T > &v)
 
static void move_nodes (size_t start, size_t length, size_t dst, VTN &nodes)
 
static size_t next_leaf (TNode *nd, VTN &nodes)
 
static void checkrace (TNode *nd, VTN &nodes)
 
static bool eliminate_race (TNode *nd, size_t d, VTN &nodes, TNode *look)
 
static void eliminate_prace (TNode *nd, VTN &nodes)
 
static void eliminate_crace (TNode *nd, VTN &nodes)
 
static void question2 (VVTN &levels)
 
static void analyze (VVTN &levels)
 
void prgroupsize (VVVTN &groups)
 
static bool final_nodevec_cmp (TNode *a, TNode *b)
 
static void set_nodeindex (VecTNode &nodevec)
 
static int nrn_soa_padded_size (int cnt, int layout)
 
static int nrn_i_layout (int icnt, int cnt, int isz, int sz, int layout)
 
template<typename T >
void permute (T *data, int cnt, int sz, int layout, int *p)
 
int * inverse_permute (int *p, int n)
 
static void invert_permute (int *p, int n)
 
void update_parent_index (int *vec, int vec_size, const std::vector< int > &permute)
 
void permute_ptr (int *vec, int n, int *p)
 
void permute_data (double *vec, int n, int *p)
 
static bool nrn_index_sort_cmp (const std::pair< int, int > &a, const std::pair< int, int > &b)
 
std::vector< int > nrn_index_sort (int *values, int n)
 
void sort_ml (Memb_list *ml)
 
void permute_nodeindices (Memb_list *ml, int *permute)
 
void permute_ml (Memb_list *ml, int type, NrnThread &nt)
 
int type_of_ntdata (NrnThread &, int index, bool reset)
 
bool gpu_enabled ()
 Check if GPU support is enabled. More...
 
void * allocate_unified (std::size_t num_bytes)
 Allocate unified memory in GPU builds iff GPU enabled, otherwise new. More...
 
void deallocate_unified (void *ptr, std::size_t num_bytes)
 Deallocate memory allocated by allocate_unified. More...
 
template<typename T , typename U >
bool operator== (unified_allocator< T > const &, unified_allocator< U > const &) noexcept
 
template<typename T , typename U >
bool operator!= (unified_allocator< T > const &x, unified_allocator< U > const &y) noexcept
 
template<typename T , typename Alloc , typename... Args>
auto allocate_unique (const Alloc &alloc, Args &&... args)
 
Modelmodel ()
 Access the global Model instance. More...
 
void solve_interleaved (int ith)
 Solve the Hines matrices based on the interleave_permute_type (1 or 2). More...
 
template<std::size_t N, typename... Args>
int Sprintf (char(&buf)[N], const char *fmt, Args &&... args)
 Redirect sprintf to snprintf if the buffer size can be deduced. More...
 
template<std::size_t N, typename... Args>
void SprintfAsrt (char(&buf)[N], const char *fmt, Args &&... args)
 assert if the Sprintf format data does not fit into buf More...
 

Variables

int cellorder_nwarp = 0
 
int interleave_permute_type
 
InterleaveInfointerleave_info
 
int icore
 
int int * ncycles
 
int int int * strides
 
int int int int * stridedispl
 
int int int int int * rootbegin
 
int int int int int int * nodebegin
 
auto *const vec_b = nt->node_b_storage()
 
auto *const vec_d = nt->node_d_storage()
 
auto *const vec_rhs = nt->node_rhs_storage()
 
int iwarp = icore / warpsize
 
int ic = icore & (warpsize - 1)
 
int ncycle = ncycles[iwarp]
 
int * stride = strides + stridedispl[iwarp]
 
int root = rootbegin[iwarp]
 
int lastroot = rootbegin[iwarp + 1]
 
int firstnode = nodebegin[iwarp]
 
int lastnode = nodebegin[iwarp + 1]
 
int istride = stride[icycle]
 
int ii = lastnode - istride + ic
 
bool has_subtrees_to_compute = true
 
auto const bksub_root = root + ic
 
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
 
int nstride = ii.nstride
 
int * cellsize = ii.cellsize
 
static size_t groupsize = 32
 

Detailed Description

In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the return type is incomplete (from include/neuron/model_data.hpp).

And we do not want to fix by installing more *.hpp files in the include/neuron directory because of potential ABI incompatibility (anything with std::string anywhere in it). The work around is to provide an extra layer of indirection via unique_ptr so the opaque token has a definite size (one pointer) and declaration.

The "trick" is just that you have to make sure the parts of the opaque token that need the definition of the non-opaque token are defined in the right place. That's why the constructor and destructor are defined in fadvance.cpp

Instead, use auto const token = nrn_ensure_model_data_are_sorted_opaque(); This file is already included in all translated mod files.

Typedef Documentation

◆ HashCnt

using neuron::HashCnt = typedef std::map<size_t, std::pair<TNode*, int> >

Definition at line 118 of file cellorder1.cpp.

◆ TNI

using neuron::TNI = typedef std::pair<TNode*, int>

Definition at line 117 of file cellorder1.cpp.

◆ TNIVec

using neuron::TNIVec = typedef std::vector<TNI>

Definition at line 119 of file cellorder1.cpp.

◆ VecTNode

using neuron::VecTNode = typedef std::vector<TNode*>

Definition at line 21 of file tnode.hpp.

◆ VTN

using neuron::VTN = typedef VecTNode

Definition at line 36 of file cellorder2.cpp.

◆ VVTN

using neuron::VVTN = typedef std::vector<VTN>

Definition at line 37 of file cellorder2.cpp.

◆ VVVTN

using neuron::VVVTN = typedef std::vector<VVTN>

Definition at line 38 of file cellorder2.cpp.

Function Documentation

◆ admin1()

static void neuron::admin1 ( int  ncell,
VecTNode nodevec,
int &  nwarp,
int &  nstride,
int *&  stride,
int *&  firstnode,
int *&  lastnode,
int *&  cellsize 
)
static

Definition at line 525 of file cellorder1.cpp.

◆ admin2()

static void neuron::admin2 ( int  ncell,
VecTNode nodevec,
int &  nwarp,
int &  nstride,
int *&  stridedispl,
int *&  strides,
int *&  rootbegin,
int *&  nodebegin,
int *&  ncycles 
)
static

Prepare for solve_interleaved2.

One group of cells per warp.

warp[i] has a number of compute cycles (ncycle[i]) the index of its first root (rootbegin[i], last rootbegin[nwarp] = ncell) the index of its first node (nodebegin[i], last nodebegin[nwarp] = nnode)

Each compute cycle has a stride A stride is how many nodes are processed by a warp in one compute cycle There are nstride strides. nstride is the sum of ncycles of all warps. warp[i] has ncycle[i] strides same as sum of ncycle warp[i] has a stridedispl[i] which is stridedispl[i-1] + ncycle[i]. ie. The zeroth cycle of warp[j] works on stride[stridedispl[j]] The value of a stride beginning at node i (node i is computed by core 0 of some warp for some cycle) is determined by stride_length(i, j, nodevec)

Definition at line 627 of file cellorder1.cpp.

◆ allocate_unified()

void* neuron::allocate_unified ( std::size_t  num_bytes)

Allocate unified memory in GPU builds iff GPU enabled, otherwise new.

◆ allocate_unique()

template<typename T , typename Alloc , typename... Args>
auto neuron::allocate_unique ( const Alloc &  alloc,
Args &&...  args 
)

Definition at line 107 of file memory.h.

◆ analyze()

static void neuron::analyze ( VVTN levels)
static

Definition at line 416 of file cellorder2.cpp.

◆ bksub_interleaved()

static void neuron::bksub_interleaved ( NrnThread nt,
int  icell,
int  icellsize,
int  ,
int *  stride,
int *  firstnode 
)
static

Definition at line 579 of file cellorder.cpp.

◆ check()

void neuron::check ( VecTNode nodevec)
static

Check that the first compartments of nodevec are the root nodes (cells)

Check that the first compartments of nodevec are the root nodes (cells)

Definition at line 401 of file cellorder1.cpp.

◆ checkrace()

static void neuron::checkrace ( TNode nd,
VTN nodes 
)
static

Definition at line 276 of file cellorder2.cpp.

◆ chklevel()

void neuron::chklevel ( VTN level,
size_t  nident = 8 
)

Definition at line 41 of file cellorder2.cpp.

◆ copy_align_array()

template<typename T >
void neuron::copy_align_array ( T *&  dest,
T *  src,
size_t  n 
)

Definition at line 136 of file cellorder.hpp.

◆ copy_array()

template<typename T >
void neuron::copy_array ( T *&  dest,
T *  src,
size_t  n 
)

Definition at line 129 of file cellorder.hpp.

◆ create_interleave_info()

void neuron::create_interleave_info ( )

Definition at line 110 of file cellorder.cpp.

◆ deallocate_unified()

void neuron::deallocate_unified ( void *  ptr,
std::size_t  num_bytes 
)

Deallocate memory allocated by allocate_unified.

◆ destroy_interleave_info()

void neuron::destroy_interleave_info ( )

Definition at line 115 of file cellorder.cpp.

◆ dist2child()

size_t neuron::dist2child ( TNode nd)

Definition at line 164 of file cellorder2.cpp.

◆ eliminate_crace()

static void neuron::eliminate_crace ( TNode nd,
VTN nodes 
)
static

Definition at line 320 of file cellorder2.cpp.

◆ eliminate_prace()

static void neuron::eliminate_prace ( TNode nd,
VTN nodes 
)
static

Definition at line 307 of file cellorder2.cpp.

◆ eliminate_race()

static bool neuron::eliminate_race ( TNode nd,
size_t  d,
VTN nodes,
TNode look 
)
static

Definition at line 284 of file cellorder2.cpp.

◆ final_nodevec_cmp()

static bool neuron::final_nodevec_cmp ( TNode a,
TNode b 
)
static

Definition at line 453 of file cellorder2.cpp.

◆ gpu_enabled()

bool neuron::gpu_enabled ( )

Check if GPU support is enabled.

This returns true if GPU support was enabled at compile time and at runtime via coreneuron.gpu = True and/or –gpu, otherwise it returns false.

◆ group_order2()

void neuron::group_order2 ( VecTNode nodevec,
size_t  groupsize,
size_t  ncell 
)

Implementation of the advanced interleaving strategy (interleave_permute_type == 2)

The main steps are the following:

  1. warp_balance function creates balanced groups of cells.
  2. The compartments/tree nodes populate the groups vector (VVVTN) based on their groupindex and their level (see level_from_root).
  3. The analyze() & question2() functions (operating per group) make sure that each cell is still a tree (treenode_order) and that the nodes with same parents belong to separate warps.

Definition at line 471 of file cellorder2.cpp.

◆ ident_statistic()

static void neuron::ident_statistic ( VecTNode nodevec,
size_t  ncell 
)
static

Definition at line 294 of file cellorder1.cpp.

◆ if()

neuron::if ( ncell  = = 0)

Definition at line 785 of file cellorder.cpp.

◆ interleave_comp()

static bool neuron::interleave_comp ( TNode a,
TNode b 
)
static

Definition at line 477 of file cellorder1.cpp.

◆ interleave_order()

std::vector< int > neuron::interleave_order ( int  ith,
int  ncell,
int  nnode,
int *  parent 
)

Function that performs the permutation of the cells such that the execution threads access coalesced memory.

Parameters
ithNrnThread to access
ncellnumber of cells in NrnThread
nnodenumber of compartments in the ncells
parentparent indices of cells
Returns
int* order, interleaved order of the cells

Definition at line 348 of file cellorder.cpp.

◆ inverse_permute()

int * neuron::inverse_permute ( int *  p,
int  n 
)

Definition at line 159 of file node_permute.cpp.

◆ invert_permute()

static void neuron::invert_permute ( int *  p,
int  n 
)
static

Definition at line 167 of file node_permute.cpp.

◆ is_child_race2()

static bool neuron::is_child_race2 ( TNode nd)
static

Definition at line 143 of file cellorder2.cpp.

◆ is_parent_race2()

static bool neuron::is_parent_race2 ( TNode nd)
static

Definition at line 113 of file cellorder2.cpp.

◆ level_from_leaf()

size_t neuron::level_from_leaf ( VecTNode nodevec)

Definition at line 236 of file cellorder1.cpp.

◆ level_from_root()

size_t neuron::level_from_root ( VecTNode nodevec)

Definition at line 221 of file cellorder1.cpp.

◆ model()

Model & neuron::model ( )
inline

Access the global Model instance.

Just to be going on with. Needs more thought about who actually holds/owns the structures that own the SOA data. Could use a static local if we need to control/defer when this is constructed.

Definition at line 206 of file model_data.hpp.

◆ move_nodes()

static void neuron::move_nodes ( size_t  start,
size_t  length,
size_t  dst,
VTN nodes 
)
static

Definition at line 192 of file cellorder2.cpp.

◆ move_range()

template<typename T >
static void neuron::move_range ( size_t  start,
size_t  length,
size_t  dst,
std::vector< T > &  v 
)
static

Definition at line 178 of file cellorder2.cpp.

◆ next_leaf()

static size_t neuron::next_leaf ( TNode nd,
VTN nodes 
)
static

Definition at line 265 of file cellorder2.cpp.

◆ node_interleave_order()

void neuron::node_interleave_order ( int  ncell,
VecTNode nodevec 
)
static

Naive interleaving strategy (interleave_permute_type == 1)

Sort so nodevec[ncell:nnode] cell instances are interleaved. Keep the secondary ordering with respect to treenode_order so each cell is still a tree.

Parameters
ncellnumber of cells (trees)
nodevecvector that contains compartments (nodes of the trees)

Definition at line 498 of file cellorder1.cpp.

◆ node_order()

std::vector< int > neuron::node_order ( int  ncell,
int  nnode,
int *  parents,
int &  nwarp,
int &  nstride,
int *&  stride,
int *&  firstnode,
int *&  lastnode,
int *&  cellsize,
int *&  stridedispl 
)

Function that returns a permutation of length nnode.

There are two permutation strategies: For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix is solved by multiple execution threads (with coalesced memory access as well)

Parameters
ncellnumber of cells
nnodenumber of compartments in the ncells
parentsparent indices of the cells
nwarpnumber of warps
nstridenstride is the maximum cell size (not counting root)
stridestride[i] is the number of cells with an ith node: using stride[i] we know how many positions to move in order to access the next element of the same cell (given that the cells are ordered with the treenode_order).
firstnodefirstnode[i] is the index of the first nonroot node of the cell
lastnodelastnode[i] is the index of the last node of the cell
cellsizecellsize is the number of nodes in the cell not counting root.
stridedispl
Returns
int* : a permutation of length nnode

Definition at line 321 of file cellorder1.cpp.

◆ nrn_i_layout()

static int neuron::nrn_i_layout ( int  icnt,
int  cnt,
int  isz,
int  sz,
int  layout 
)
static

Definition at line 115 of file node_permute.cpp.

◆ nrn_index_sort()

std::vector<int> neuron::nrn_index_sort ( int *  values,
int  n 
)

Definition at line 445 of file node_permute.cpp.

◆ nrn_index_sort_cmp()

static bool neuron::nrn_index_sort_cmp ( const std::pair< int, int > &  a,
const std::pair< int, int > &  b 
)
static

Definition at line 430 of file node_permute.cpp.

◆ nrn_optimize_node_order()

void neuron::nrn_optimize_node_order ( int  type)

Select node ordering for optimum gaussian elimination.

Parameters
type0 cell together (Section construction order) 1 Interleave, identical cells warp adjacent 2 Depth order, optimize adjacent nodes to have adjacent parents.

Definition at line 336 of file cellorder.cpp.

◆ nrn_permute_node_order()

void neuron::nrn_permute_node_order ( )

Compute and carry out the permutation for interleave_permute_type.

Definition at line 425 of file cellorder.cpp.

◆ nrn_pragma_acc() [1/3]

neuron::nrn_pragma_acc ( loop  seq)

◆ nrn_pragma_acc() [2/3]

neuron::nrn_pragma_acc ( parallel loop   presentnt[0:1], stride[0:nstride], firstnode[0:ncell], lastnode[0:ncell], cellsize[0:ncell]) if(nt->compute_gpu) async(nt->stream_id) -> compute_gpu)) for(int icell
pure virtual

◆ nrn_pragma_acc() [3/3]

neuron::nrn_pragma_acc ( routine  vector)

◆ nrn_soa_padded_size()

static int neuron::nrn_soa_padded_size ( int  cnt,
int  layout 
)
static

Definition at line 111 of file node_permute.cpp.

◆ operator!=()

template<typename T , typename U >
bool neuron::operator!= ( unified_allocator< T > const &  x,
unified_allocator< U > const &  y 
)
noexcept

Definition at line 76 of file memory.h.

◆ operator==()

template<typename T , typename U >
bool neuron::operator== ( unified_allocator< T > const &  ,
unified_allocator< U > const &   
)
noexcept

Definition at line 71 of file memory.h.

◆ permute()

template<typename T >
void neuron::permute ( T *  data,
int  cnt,
int  sz,
int  layout,
int *  p 
)

Definition at line 124 of file node_permute.cpp.

◆ permute_data()

void neuron::permute_data ( double *  vec,
int  n,
int *  p 
)

Definition at line 392 of file node_permute.cpp.

◆ permute_ml()

void neuron::permute_ml ( Memb_list ml,
int  type,
NrnThread nt 
)

◆ permute_nodeindices()

void neuron::permute_nodeindices ( Memb_list ml,
int *  permute 
)

◆ permute_ptr()

void neuron::permute_ptr ( int *  vec,
int  n,
int *  p 
)

Definition at line 388 of file node_permute.cpp.

◆ prgroupsize()

void neuron::prgroupsize ( VVVTN groups)

Definition at line 440 of file cellorder2.cpp.

◆ print_quality1()

static void neuron::print_quality1 ( int  iwarp,
InterleaveInfo ii,
int  ncell,
int *  p 
)
static

Definition at line 193 of file cellorder.cpp.

◆ print_quality2()

static void neuron::print_quality2 ( int  iwarp,
InterleaveInfo ii,
int *  p 
)
static

Definition at line 124 of file cellorder.cpp.

◆ prnode()

static void neuron::prnode ( const char *  mes,
NrnThread nt 
)
static

Definition at line 307 of file cellorder.cpp.

◆ ptr_tnode_earlier()

static bool neuron::ptr_tnode_earlier ( TNode a,
TNode b 
)
static

Definition at line 62 of file cellorder1.cpp.

◆ quality()

static void neuron::quality ( VecTNode nodevec,
size_t  max = 32 
)
static

Definition at line 128 of file cellorder1.cpp.

◆ question2()

static void neuron::question2 ( VVTN levels)
static

Definition at line 336 of file cellorder2.cpp.

◆ set_cellindex()

static void neuron::set_cellindex ( int  ncell,
VecTNode nodevec 
)
static

Set the cellindex to distinguish the different cells.

Definition at line 260 of file cellorder1.cpp.

◆ set_groupindex()

static void neuron::set_groupindex ( VecTNode nodevec)
static

Initialization of the groupindex (groups)

The cells are groupped at a later stage based on a load balancing algorithm. This is just an initialization function.

Definition at line 279 of file cellorder1.cpp.

◆ set_nodeindex()

static void neuron::set_nodeindex ( VecTNode nodevec)
static

Definition at line 465 of file cellorder2.cpp.

◆ set_treenode_order()

static void neuron::set_treenode_order ( VVTN levels)
static

Definition at line 84 of file cellorder2.cpp.

◆ solve_interleaved()

void neuron::solve_interleaved ( int  ith)

Solve the Hines matrices based on the interleave_permute_type (1 or 2).

For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix is solved by multiple execution threads (with coalesced memory access as well)

◆ sort_ml()

void neuron::sort_ml ( Memb_list ml)

Definition at line 465 of file node_permute.cpp.

◆ sortlevel()

static void neuron::sortlevel ( VTN level)
static

Definition at line 75 of file cellorder2.cpp.

◆ sortlevel_cmp()

static bool neuron::sortlevel_cmp ( TNode a,
TNode b 
)
static

Definition at line 48 of file cellorder2.cpp.

◆ Sprintf()

template<std::size_t N, typename... Args>
int neuron::Sprintf ( char(&)  buf[N],
const char *  fmt,
Args &&...  args 
)

Redirect sprintf to snprintf if the buffer size can be deduced.

This is useful to avoid deprecation warnings for sprintf. In general it works if the buffer is something like char buf[512] in the calling scope, but not if it is char* or char buf[].

Definition at line 14 of file wrap_sprintf.h.

◆ SprintfAsrt()

template<std::size_t N, typename... Args>
void neuron::SprintfAsrt ( char(&)  buf[N],
const char *  fmt,
Args &&...  args 
)

assert if the Sprintf format data does not fit into buf

Definition at line 27 of file wrap_sprintf.h.

◆ stride_length()

static size_t neuron::stride_length ( size_t  begin,
size_t  end,
VecTNode nodevec 
)
static

Definition at line 589 of file cellorder1.cpp.

◆ tnode_earlier()

static bool neuron::tnode_earlier ( TNode a,
TNode b 
)
static

Function to order trees by size, hash and nodeindex.

Definition at line 48 of file cellorder1.cpp.

◆ tree_analysis()

void neuron::tree_analysis ( int *  parent,
int  nnode,
int  ncell,
VecTNode nodevec 
)
static

Perform tree preparation for interleaving strategies.

Parameters
parentvector of parent indices
nnodenumber of compartments in the cells
ncellnumber of cells

Definition at line 454 of file cellorder1.cpp.

◆ triang_interleaved()

static void neuron::triang_interleaved ( NrnThread nt,
int  icell,
int  icellsize,
int  nstride,
int *  stride,
int *  lastnode 
)
static

Definition at line 550 of file cellorder.cpp.

◆ type_of_ntdata()

int neuron::type_of_ntdata ( NrnThread ,
int  index,
bool  reset 
)

◆ update_parent_index()

void neuron::update_parent_index ( int *  vec,
int  vec_size,
const std::vector< int > &  permute 
)

Definition at line 378 of file node_permute.cpp.

◆ warp_balance() [1/2]

static void neuron::warp_balance ( int  ith,
InterleaveInfo ii 
)
static

Definition at line 259 of file cellorder.cpp.

◆ warp_balance() [2/2]

size_t neuron::warp_balance ( size_t  ncell,
VecTNode nodevec 
)

Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells.

Competing objectives are to keep identical cells together and also balance warps.

Parameters
ncellnumber of cells
nodevecvector of compartments from all cells
Returns
number of warps

Definition at line 52 of file balance.cpp.

◆ warpcmp()

bool neuron::warpcmp ( const TNode a,
const TNode b 
)

Definition at line 36 of file balance.cpp.

Variable Documentation

◆ bksub_root

auto const neuron::bksub_root = root + ic

Definition at line 667 of file cellorder.cpp.

◆ cellorder_nwarp

int neuron::cellorder_nwarp = 0

Definition at line 33 of file balance.cpp.

◆ cellsize

int* neuron::cellsize = ii.cellsize

Definition at line 793 of file cellorder.cpp.

◆ firstnode

int * neuron::firstnode = nodebegin[iwarp]

Definition at line 624 of file cellorder.cpp.

◆ groupsize

size_t neuron::groupsize = 32
static

Definition at line 43 of file cellorder1.cpp.

◆ has_subtrees_to_compute

neuron::has_subtrees_to_compute = true

Definition at line 634 of file cellorder.cpp.

◆ ic

int neuron::ic = icore & (warpsize - 1)

Definition at line 619 of file cellorder.cpp.

◆ icore

int neuron::icore

Definition at line 606 of file cellorder.cpp.

◆ ii

InterleaveInfo & neuron::ii = lastnode - istride + ic

Definition at line 631 of file cellorder.cpp.

◆ interleave_info

InterleaveInfo* neuron::interleave_info

Definition at line 43 of file cellorder.cpp.

◆ interleave_permute_type

int neuron::interleave_permute_type

Definition at line 42 of file cellorder.cpp.

◆ istride

neuron::istride = stride[icycle]

Definition at line 630 of file cellorder.cpp.

◆ iwarp

int neuron::iwarp = icore / warpsize

Definition at line 618 of file cellorder.cpp.

◆ lastnode

int * neuron::lastnode = nodebegin[iwarp + 1]

Definition at line 625 of file cellorder.cpp.

◆ lastroot

int neuron::lastroot = rootbegin[iwarp + 1]

Definition at line 623 of file cellorder.cpp.

◆ ncell

icycle< ncycle; ++icycle) { int istride = stride[icycle]; nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for (int icore = 0; icore < warpsize; ++icore) { int i = ii + icore; if (icore < istride) { int ip = GPU_PARENT(i); GPU_RHS(i) -= GPU_B(i) * GPU_RHS(ip); GPU_RHS(i) /= GPU_D(i); } i += istride; } ii += istride; } }}void solve_interleaved2(int ith) { NrnThread* nt = nrn_threads + ith; InterleaveInfo& ii = interleave_info[ith]; int nwarp = ii.nwarp; if (nwarp == 0) return; int ncore = nwarp * warpsize; int* ncycles = ii.cellsize; int* stridedispl = ii.stridedispl; int* strides = ii.stride; int* rootbegin = ii.firstnode; int* nodebegin = ii.lastnode; if (0) { nrn_pragma_acc(parallel loop gang present(nt [0:1], strides [0:nstride], ncycles [0:nwarp], stridedispl [0:nwarp + 1], rootbegin [0:nwarp + 1], nodebegin [0:nwarp + 1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc: nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp + 1], rootbegin[:nwarp + 1], nodebegin[:nwarp + 1])) for (int icore = 0; icore < ncore; icore += warpsize) { solve_interleaved2_loop_body( nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin); } nrn_pragma_acc(wait(nt->stream_id)) } else { for (int icore = 0; icore < ncore; icore += warpsize) { solve_interleaved2_loop_body( nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin); } }}void solve_interleaved1(int ith) { NrnThread* nt = nrn_threads + ith; int ncell = nt-> neuron::ncell

Definition at line 784 of file cellorder.cpp.

◆ ncycle

int neuron::ncycle = ncycles[iwarp]

Definition at line 620 of file cellorder.cpp.

◆ ncycles

int int* neuron::ncycles

Definition at line 607 of file cellorder.cpp.

◆ nodebegin

int int int int int int* neuron::nodebegin
Initial value:
{
auto* const vec_a = nt->node_a_storage()

Definition at line 611 of file cellorder.cpp.

◆ nstride

int neuron::nstride = ii.nstride

Definition at line 789 of file cellorder.cpp.

◆ root

int neuron::root = rootbegin[iwarp]

Definition at line 622 of file cellorder.cpp.

◆ rootbegin

int int int int int* neuron::rootbegin

Definition at line 610 of file cellorder.cpp.

◆ stride

int * neuron::stride = strides + stridedispl[iwarp]

Definition at line 621 of file cellorder.cpp.

◆ stridedispl

int int int int* neuron::stridedispl

Definition at line 609 of file cellorder.cpp.

◆ strides

int int int* neuron::strides

Definition at line 608 of file cellorder.cpp.

◆ vec_b

auto* const neuron::vec_b = nt->node_b_storage()

Definition at line 614 of file cellorder.cpp.

◆ vec_d

auto* const neuron::vec_d = nt->node_d_storage()

Definition at line 615 of file cellorder.cpp.

◆ vec_rhs

auto* const neuron::vec_rhs = nt->node_rhs_storage()

Definition at line 616 of file cellorder.cpp.