In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the return type is incomplete (from include/neuron/model_data.hpp). More...

Namespaces
	cache

	container

	detail

	extracellular

	legacy

	mechanism

	oc

	python

	test

Classes
class	TNode
	TNode is the tree node that represents the tree of the compartments. More...

class	InterleaveInfo

struct	unified_allocator
	C++ allocator that uses [de]allocate_unified. More...

struct	alloc_deleter
	Allocator-aware deleter for use with std::unique_ptr. More...

struct	Model
	Top-level structure. More...

struct	model_sorted_token

class	unique_cstr
	A RAII wrapper for C-style strings. More...

struct	opaque_model_sorted_token

Typedefs
using	VecTNode = std::vector< TNode * >

using	TNI = std::pair< TNode *, int >

using	HashCnt = std::map< size_t, std::pair< TNode *, int > >

using	TNIVec = std::vector< TNI >

using	VTN = VecTNode

using	VVTN = std::vector< VTN >

using	VVVTN = std::vector< VVTN >

Functions
size_t	level_from_leaf (VecTNode &)

size_t	level_from_root (VecTNode &)

void	group_order2 (VecTNode &, size_t groupsize, size_t ncell)
	Implementation of the advanced interleaving strategy (interleave_permute_type == 2) More...

size_t	dist2child (TNode *nd)

size_t	warp_balance (size_t ncell, VecTNode &nodevec)
	Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells. More...

bool	warpcmp (const TNode a, const TNode b)

void	create_interleave_info ()

void	destroy_interleave_info ()

static void	print_quality2 (int iwarp, InterleaveInfo &ii, int *p)

static void	print_quality1 (int iwarp, InterleaveInfo &ii, int ncell, int *p)

static void	warp_balance (int ith, InterleaveInfo &ii)

static void	prnode (const char *mes, NrnThread &nt)

int	nrn_optimize_node_order (int type)
	Select node ordering for optimum gaussian elimination. More...

std::vector< int >	interleave_order (int ith, int ncell, int nnode, int *parent)
	Function that performs the permutation of the cells such that the execution threads access coalesced memory. More...

void	nrn_permute_node_order ()
	Compute and carry out the permutation for interleave_permute_type. More...

static void	triang_interleaved (NrnThread nt, int icell, int icellsize, int nstride, int stride, int *lastnode)

static void	bksub_interleaved (NrnThread nt, int icell, int icellsize, int, int stride, int *firstnode)

	nrn_pragma_acc (routine vector) static void solve_interleaved2_loop_body(NrnThread *nt

	nrn_pragma_acc (loop seq) for(

	if (ncell==0)

	nrn_pragma_acc (parallel loop present(nt[0:1], stride[0:nstride], firstnode[0:ncell], lastnode[0:ncell], cellsize[0:ncell]) if(nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(nt -> compute_gpu)) for(int icell=0

std::vector< int >	node_order (int ncell, int nnode, int parents, int &nwarp, int &nstride, int &stride, int &firstnode, int &lastnode, int &cellsize, int &stridedispl)
	Function that returns a permutation of length nnode. More...

template<typename T >
void	copy_array (T &dest, T src, size_t n)

template<typename T >
void	copy_align_array (T &dest, T src, size_t n)

static bool	tnode_earlier (TNode a, TNode b)
	Function to order trees by size, hash and nodeindex. More...

static bool	ptr_tnode_earlier (TNode a, TNode b)

static void	tree_analysis (int *parent, int nnode, int ncell, VecTNode &nodevec)
	Perform tree preparation for interleaving strategies. More...

static void	node_interleave_order (int ncell, VecTNode &nodevec)
	Naive interleaving strategy (interleave_permute_type == 1) More...

static void	admin1 (int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int &stride, int &firstnode, int &lastnode, int &cellsize)

static void	admin2 (int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int &stridedispl, int &strides, int &rootbegin, int &nodebegin, int *&ncycles)
	Prepare for solve_interleaved2. More...

static void	check (VecTNode &)

static void	quality (VecTNode &nodevec, size_t max=32)

static void	set_cellindex (int ncell, VecTNode &nodevec)
	Set the cellindex to distinguish the different cells. More...

static void	set_groupindex (VecTNode &nodevec)
	Initialization of the groupindex (groups) More...

static void	ident_statistic (VecTNode &nodevec, size_t ncell)

static bool	interleave_comp (TNode a, TNode b)

static size_t	stride_length (size_t begin, size_t end, VecTNode &nodevec)

void	chklevel (VTN &level, size_t nident=8)

static bool	sortlevel_cmp (TNode a, TNode b)

static void	sortlevel (VTN &level)

static void	set_treenode_order (VVTN &levels)

static bool	is_parent_race2 (TNode *nd)

static bool	is_child_race2 (TNode *nd)

template<typename T >
static void	move_range (size_t start, size_t length, size_t dst, std::vector< T > &v)

static void	move_nodes (size_t start, size_t length, size_t dst, VTN &nodes)

static size_t	next_leaf (TNode *nd, VTN &nodes)

static void	checkrace (TNode *nd, VTN &nodes)

static bool	eliminate_race (TNode nd, size_t d, VTN &nodes, TNode look)

static void	eliminate_prace (TNode *nd, VTN &nodes)

static void	eliminate_crace (TNode *nd, VTN &nodes)

static void	question2 (VVTN &levels)

static void	analyze (VVTN &levels)

void	prgroupsize (VVVTN &groups)

static bool	final_nodevec_cmp (TNode a, TNode b)

static void	set_nodeindex (VecTNode &nodevec)

static int	nrn_soa_padded_size (int cnt, int layout)

static int	nrn_i_layout (int icnt, int cnt, int isz, int sz, int layout)

template<typename T >
void	permute (T data, int cnt, int sz, int layout, int p)

int *	inverse_permute (int *p, int n)

static void	invert_permute (int *p, int n)

void	update_parent_index (int *vec, int vec_size, const std::vector< int > &permute)

void	permute_ptr (int vec, int n, int p)

void	permute_data (double vec, int n, int p)

static bool	nrn_index_sort_cmp (const std::pair< int, int > &a, const std::pair< int, int > &b)

std::vector< int >	nrn_index_sort (int *values, int n)

void	sort_ml (Memb_list *ml)

void	permute_nodeindices (Memb_list ml, int permute)

void	permute_ml (Memb_list *ml, int type, NrnThread &nt)

int	type_of_ntdata (NrnThread &, int index, bool reset)

bool	gpu_enabled ()
	Check if GPU support is enabled. More...

void *	allocate_unified (std::size_t num_bytes)
	Allocate unified memory in GPU builds iff GPU enabled, otherwise new. More...

void	deallocate_unified (void *ptr, std::size_t num_bytes)
	Deallocate memory allocated by `allocate_unified`. More...

template<typename T , typename U >
bool	operator== (unified_allocator< T > const &, unified_allocator< U > const &) noexcept

template<typename T , typename U >
bool	operator!= (unified_allocator< T > const &x, unified_allocator< U > const &y) noexcept

template<typename T , typename Alloc , typename... Args>
auto	allocate_unique (const Alloc &alloc, Args &&... args)

Model &	model ()
	Access the global Model instance. More...

void	solve_interleaved (int ith)
	Solve the Hines matrices based on the interleave_permute_type (1 or 2). More...

template<std::size_t N, typename... Args>
int	Sprintf (char(&buf)[N], const char *fmt, Args &&... args)
	Redirect sprintf to snprintf if the buffer size can be deduced. More...

template<std::size_t N, typename... Args>
void	SprintfAsrt (char(&buf)[N], const char *fmt, Args &&... args)
	assert if the Sprintf format data does not fit into buf More...

Variables
int	cellorder_nwarp = 0

int	interleave_permute_type

InterleaveInfo *	interleave_info

int	icore

int int *	ncycles

int int int *	strides

int int int int *	stridedispl

int int int int int *	rootbegin

int int int int int int *	nodebegin

auto *const	vec_b = nt->node_b_storage()

auto *const	vec_d = nt->node_d_storage()

auto *const	vec_rhs = nt->node_rhs_storage()

int	iwarp = icore / warpsize

int	ic = icore & (warpsize - 1)

int	ncycle = ncycles[iwarp]

int *	stride = strides + stridedispl[iwarp]

int	root = rootbegin[iwarp]

int	lastroot = rootbegin[iwarp + 1]

int	firstnode = nodebegin[iwarp]

int	lastnode = nodebegin[iwarp + 1]

int	istride = stride[icycle]

int	ii = lastnode - istride + ic

bool	has_subtrees_to_compute = true

auto const	bksub_root = root + ic

icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp warpsize;int ncycles=ii.cellsize;int stridedispl=ii.stridedispl;int strides=ii.stride;int rootbegin=ii.firstnode;int nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt->	ncell

int	nstride = ii.nstride

int *	cellsize = ii.cellsize

static size_t	groupsize = 32

Detailed Description

In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the return type is incomplete (from include/neuron/model_data.hpp).

And we do not want to fix by installing more *.hpp files in the include/neuron directory because of potential ABI incompatibility (anything with std::string anywhere in it). The work around is to provide an extra layer of indirection via unique_ptr so the opaque token has a definite size (one pointer) and declaration.

The "trick" is just that you have to make sure the parts of the opaque token that need the definition of the non-opaque token are defined in the right place. That's why the constructor and destructor are defined in fadvance.cpp

Instead, use auto const token = nrn_ensure_model_data_are_sorted_opaque(); This file is already included in all translated mod files.

Typedef Documentation

◆ HashCnt

using neuron::HashCnt = typedef std::map<size_t, std::pair<TNode*, int> >

Definition at line 118 of file cellorder1.cpp.

◆ TNI

using neuron::TNI = typedef std::pair<TNode*, int>

Definition at line 117 of file cellorder1.cpp.

◆ TNIVec

using neuron::TNIVec = typedef std::vector<TNI>

Definition at line 119 of file cellorder1.cpp.

◆ VecTNode

using neuron::VecTNode = typedef std::vector<TNode*>

Definition at line 21 of file tnode.hpp.

◆ VTN

using neuron::VTN = typedef VecTNode

Definition at line 36 of file cellorder2.cpp.

◆ VVTN

using neuron::VVTN = typedef std::vector<VTN>

Definition at line 37 of file cellorder2.cpp.

◆ VVVTN

using neuron::VVVTN = typedef std::vector<VVTN>

Definition at line 38 of file cellorder2.cpp.

Function Documentation

◆ admin1()

static void neuron::admin1	(	int	ncell,
		VecTNode &	nodevec,
		int &	nwarp,
		int &	nstride,
		int *&	stride,
		int *&	firstnode,
		int *&	lastnode,
		int *&	cellsize
	)

static

Definition at line 525 of file cellorder1.cpp.

◆ admin2()

static void neuron::admin2	(	int	ncell,
		VecTNode &	nodevec,
		int &	nwarp,
		int &	nstride,
		int *&	stridedispl,
		int *&	strides,
		int *&	rootbegin,
		int *&	nodebegin,
		int *&	ncycles
	)

static

Prepare for solve_interleaved2.

One group of cells per warp.

warp[i] has a number of compute cycles (ncycle[i]) the index of its first root (rootbegin[i], last rootbegin[nwarp] = ncell) the index of its first node (nodebegin[i], last nodebegin[nwarp] = nnode)

Each compute cycle has a stride A stride is how many nodes are processed by a warp in one compute cycle There are nstride strides. nstride is the sum of ncycles of all warps. warp[i] has ncycle[i] strides same as sum of ncycle warp[i] has a stridedispl[i] which is stridedispl[i-1] + ncycle[i]. ie. The zeroth cycle of warp[j] works on stride[stridedispl[j]] The value of a stride beginning at node i (node i is computed by core 0 of some warp for some cycle) is determined by stride_length(i, j, nodevec)

Definition at line 627 of file cellorder1.cpp.

◆ allocate_unified()

void* neuron::allocate_unified ( std::size_t num_bytes )

Allocate unified memory in GPU builds iff GPU enabled, otherwise new.

◆ allocate_unique()

template<typename T , typename Alloc , typename... Args>

auto neuron::allocate_unique	(	const Alloc &	alloc,
		Args &&...	args
	)

Definition at line 107 of file memory.h.

◆ analyze()

static void neuron::analyze ( VVTN & levels )

static

Definition at line 416 of file cellorder2.cpp.

◆ bksub_interleaved()

static void neuron::bksub_interleaved	(	NrnThread *	nt,
		int	icell,
		int	icellsize,
		int	,
		int *	stride,
		int *	firstnode
	)

static

Definition at line 579 of file cellorder.cpp.

◆ check()

void neuron::check ( VecTNode & nodevec )

static

Check that the first compartments of nodevec are the root nodes (cells)

Definition at line 401 of file cellorder1.cpp.

◆ checkrace()

static void neuron::checkrace	(	TNode *	nd,
		VTN &	nodes
	)

static

Definition at line 276 of file cellorder2.cpp.

◆ chklevel()

void neuron::chklevel	(	VTN &	level,
		size_t	nident = `8`
	)

Definition at line 41 of file cellorder2.cpp.

◆ copy_align_array()

template<typename T >

void neuron::copy_align_array	(	T *&	dest,
		T *	src,
		size_t	n
	)

Definition at line 136 of file cellorder.hpp.

◆ copy_array()

template<typename T >

void neuron::copy_array	(	T *&	dest,
		T *	src,
		size_t	n
	)

Definition at line 129 of file cellorder.hpp.

◆ create_interleave_info()

void neuron::create_interleave_info ( )

Definition at line 110 of file cellorder.cpp.

◆ deallocate_unified()

void neuron::deallocate_unified	(	void *	ptr,
		std::size_t	num_bytes
	)

Deallocate memory allocated by allocate_unified.

◆ destroy_interleave_info()

void neuron::destroy_interleave_info ( )

Definition at line 115 of file cellorder.cpp.

◆ dist2child()

size_t neuron::dist2child ( TNode * nd )

Definition at line 164 of file cellorder2.cpp.

◆ eliminate_crace()

static void neuron::eliminate_crace	(	TNode *	nd,
		VTN &	nodes
	)

static

Definition at line 320 of file cellorder2.cpp.

◆ eliminate_prace()

static void neuron::eliminate_prace	(	TNode *	nd,
		VTN &	nodes
	)

static

Definition at line 307 of file cellorder2.cpp.

◆ eliminate_race()

static bool neuron::eliminate_race	(	TNode *	nd,
		size_t	d,
		VTN &	nodes,
		TNode *	look
	)

static

Definition at line 284 of file cellorder2.cpp.

◆ final_nodevec_cmp()

static bool neuron::final_nodevec_cmp	(	TNode *	a,
		TNode *	b
	)

static

Definition at line 453 of file cellorder2.cpp.

◆ gpu_enabled()

bool neuron::gpu_enabled ( )

Check if GPU support is enabled.

This returns true if GPU support was enabled at compile time and at runtime via coreneuron.gpu = True and/or –gpu, otherwise it returns false.

◆ group_order2()

void neuron::group_order2	(	VecTNode &	nodevec,
		size_t	groupsize,
		size_t	ncell
	)

Implementation of the advanced interleaving strategy (interleave_permute_type == 2)

The main steps are the following:

warp_balance function creates balanced groups of cells.
The compartments/tree nodes populate the groups vector (VVVTN) based on their groupindex and their level (see level_from_root).
The analyze() & question2() functions (operating per group) make sure that each cell is still a tree (treenode_order) and that the nodes with same parents belong to separate warps.

Definition at line 471 of file cellorder2.cpp.

◆ ident_statistic()

static void neuron::ident_statistic	(	VecTNode &	nodevec,
		size_t	ncell
	)

static

Definition at line 294 of file cellorder1.cpp.

◆ if()

neuron::if ( ncell = = 0 )

Definition at line 785 of file cellorder.cpp.

◆ interleave_comp()

static bool neuron::interleave_comp	(	TNode *	a,
		TNode *	b
	)

static

Definition at line 477 of file cellorder1.cpp.

◆ interleave_order()

std::vector< int > neuron::interleave_order	(	int	ith,
		int	ncell,
		int	nnode,
		int *	parent
	)

Function that performs the permutation of the cells such that the execution threads access coalesced memory.

Parameters

ith	NrnThread to access
ncell	number of cells in NrnThread
nnode	number of compartments in the ncells
parent	parent indices of cells

Returns: int* order, interleaved order of the cells

Definition at line 348 of file cellorder.cpp.

◆ inverse_permute()

int * neuron::inverse_permute	(	int *	p,
		int	n
	)

Definition at line 159 of file node_permute.cpp.

◆ invert_permute()

static void neuron::invert_permute	(	int *	p,
		int	n
	)

static

Definition at line 167 of file node_permute.cpp.

◆ is_child_race2()

static bool neuron::is_child_race2 ( TNode * nd )

static

Definition at line 143 of file cellorder2.cpp.

◆ is_parent_race2()

static bool neuron::is_parent_race2 ( TNode * nd )

static

Definition at line 113 of file cellorder2.cpp.

◆ level_from_leaf()

size_t neuron::level_from_leaf ( VecTNode & nodevec )

Definition at line 236 of file cellorder1.cpp.

◆ level_from_root()

size_t neuron::level_from_root ( VecTNode & nodevec )

Definition at line 221 of file cellorder1.cpp.

◆ model()

Model & neuron::model ( )

inline

Access the global Model instance.

Just to be going on with. Needs more thought about who actually holds/owns the structures that own the SOA data. Could use a static local if we need to control/defer when this is constructed.

Definition at line 206 of file model_data.hpp.

◆ move_nodes()

static void neuron::move_nodes	(	size_t	start,
		size_t	length,
		size_t	dst,
		VTN &	nodes
	)

static

Definition at line 192 of file cellorder2.cpp.

◆ move_range()

template<typename T >

static void neuron::move_range	(	size_t	start,
		size_t	length,
		size_t	dst,
		std::vector< T > &	v
	)

static

Definition at line 178 of file cellorder2.cpp.

◆ next_leaf()

static size_t neuron::next_leaf	(	TNode *	nd,
		VTN &	nodes
	)

static

Definition at line 265 of file cellorder2.cpp.

◆ node_interleave_order()

void neuron::node_interleave_order	(	int	ncell,
		VecTNode &	nodevec
	)

static

Naive interleaving strategy (interleave_permute_type == 1)

Sort so nodevec[ncell:nnode] cell instances are interleaved. Keep the secondary ordering with respect to treenode_order so each cell is still a tree.

Parameters

ncell	number of cells (trees)
nodevec	vector that contains compartments (nodes of the trees)

Definition at line 498 of file cellorder1.cpp.

◆ node_order()

std::vector< int > neuron::node_order	(	int	ncell,
		int	nnode,
		int *	parents,
		int &	nwarp,
		int &	nstride,
		int *&	stride,
		int *&	firstnode,
		int *&	lastnode,
		int *&	cellsize,
		int *&	stridedispl
	)

Function that returns a permutation of length nnode.

There are two permutation strategies: For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix is solved by multiple execution threads (with coalesced memory access as well)

Parameters

ncell	number of cells
nnode	number of compartments in the ncells
parents	parent indices of the cells
nwarp	number of warps
nstride	nstride is the maximum cell size (not counting root)
stride	stride[i] is the number of cells with an ith node: using stride[i] we know how many positions to move in order to access the next element of the same cell (given that the cells are ordered with the treenode_order).
firstnode	firstnode[i] is the index of the first nonroot node of the cell
lastnode	lastnode[i] is the index of the last node of the cell
cellsize	cellsize is the number of nodes in the cell not counting root.
stridedispl

Returns: int* : a permutation of length nnode

Definition at line 321 of file cellorder1.cpp.

◆ nrn_i_layout()

static int neuron::nrn_i_layout	(	int	icnt,
		int	cnt,
		int	isz,
		int	sz,
		int	layout
	)

static

Definition at line 115 of file node_permute.cpp.

◆ nrn_index_sort()

std::vector<int> neuron::nrn_index_sort	(	int *	values,
		int	n
	)

Definition at line 445 of file node_permute.cpp.

◆ nrn_index_sort_cmp()

static bool neuron::nrn_index_sort_cmp	(	const std::pair< int, int > &	a,
		const std::pair< int, int > &	b
	)

static

Definition at line 430 of file node_permute.cpp.

◆ nrn_optimize_node_order()

void neuron::nrn_optimize_node_order ( int type )

Select node ordering for optimum gaussian elimination.

Parameters

type	0 cell together (Section construction order) 1 Interleave, identical cells warp adjacent 2 Depth order, optimize adjacent nodes to have adjacent parents.

Definition at line 336 of file cellorder.cpp.

◆ nrn_permute_node_order()

void neuron::nrn_permute_node_order ( )

Compute and carry out the permutation for interleave_permute_type.

Definition at line 425 of file cellorder.cpp.

◆ nrn_pragma_acc() [1/3]

neuron::nrn_pragma_acc ( loop seq )

◆ nrn_pragma_acc() [2/3]

neuron::nrn_pragma_acc ( parallel loop presentnt[0:1], stride[0:nstride], firstnode[0:ncell], lastnode[0:ncell], cellsize[0:ncell]) if(nt->compute_gpu) async(nt->stream_id ) -> compute_gpu)) for(int icell

pure virtual

◆ nrn_pragma_acc() [3/3]

neuron::nrn_pragma_acc ( routine vector )

◆ nrn_soa_padded_size()

static int neuron::nrn_soa_padded_size	(	int	cnt,
		int	layout
	)

static

Definition at line 111 of file node_permute.cpp.

◆ operator!=()

template<typename T , typename U >

bool neuron::operator!=	(	unified_allocator< T > const &	x,
		unified_allocator< U > const &	y
	)

noexcept

Definition at line 76 of file memory.h.

◆ operator==()

template<typename T , typename U >

bool neuron::operator==	(	unified_allocator< T > const &	,
		unified_allocator< U > const &
	)

noexcept

Definition at line 71 of file memory.h.

◆ permute()

template<typename T >

void neuron::permute	(	T *	data,
		int	cnt,
		int	sz,
		int	layout,
		int *	p
	)

Definition at line 124 of file node_permute.cpp.

◆ permute_data()

void neuron::permute_data	(	double *	vec,
		int	n,
		int *	p
	)

Definition at line 392 of file node_permute.cpp.

◆ permute_ml()

void neuron::permute_ml	(	Memb_list *	ml,
		int	type,
		NrnThread &	nt
	)

◆ permute_nodeindices()

void neuron::permute_nodeindices	(	Memb_list *	ml,
		int *	permute
	)

◆ permute_ptr()

void neuron::permute_ptr	(	int *	vec,
		int	n,
		int *	p
	)

Definition at line 388 of file node_permute.cpp.

◆ prgroupsize()

void neuron::prgroupsize ( VVVTN & groups )

Definition at line 440 of file cellorder2.cpp.

◆ print_quality1()

static void neuron::print_quality1	(	int	iwarp,
		InterleaveInfo &	ii,
		int	ncell,
		int *	p
	)

static

Definition at line 193 of file cellorder.cpp.

◆ print_quality2()

static void neuron::print_quality2	(	int	iwarp,
		InterleaveInfo &	ii,
		int *	p
	)

static

Definition at line 124 of file cellorder.cpp.

◆ prnode()

static void neuron::prnode	(	const char *	mes,
		NrnThread &	nt
	)

static

Definition at line 307 of file cellorder.cpp.

◆ ptr_tnode_earlier()

static bool neuron::ptr_tnode_earlier	(	TNode *	a,
		TNode *	b
	)

static

Definition at line 62 of file cellorder1.cpp.

◆ quality()

static void neuron::quality	(	VecTNode &	nodevec,
		size_t	max = `32`
	)

static

Definition at line 128 of file cellorder1.cpp.

◆ question2()

static void neuron::question2 ( VVTN & levels )

static

Definition at line 336 of file cellorder2.cpp.

◆ set_cellindex()

static void neuron::set_cellindex	(	int	ncell,
		VecTNode &	nodevec
	)

static

Set the cellindex to distinguish the different cells.

Definition at line 260 of file cellorder1.cpp.

◆ set_groupindex()

static void neuron::set_groupindex ( VecTNode & nodevec )

static

Initialization of the groupindex (groups)

The cells are groupped at a later stage based on a load balancing algorithm. This is just an initialization function.

Definition at line 279 of file cellorder1.cpp.

◆ set_nodeindex()

static void neuron::set_nodeindex ( VecTNode & nodevec )

static

Definition at line 465 of file cellorder2.cpp.

◆ set_treenode_order()

static void neuron::set_treenode_order ( VVTN & levels )

static

Definition at line 84 of file cellorder2.cpp.

◆ solve_interleaved()

void neuron::solve_interleaved ( int ith )

Solve the Hines matrices based on the interleave_permute_type (1 or 2).

For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix is solved by multiple execution threads (with coalesced memory access as well)

◆ sort_ml()

void neuron::sort_ml ( Memb_list * ml )

Definition at line 465 of file node_permute.cpp.

◆ sortlevel()

static void neuron::sortlevel ( VTN & level )

static

Definition at line 75 of file cellorder2.cpp.

◆ sortlevel_cmp()

static bool neuron::sortlevel_cmp	(	TNode *	a,
		TNode *	b
	)

static

Definition at line 48 of file cellorder2.cpp.

◆ Sprintf()

template<std::size_t N, typename... Args>

int neuron::Sprintf	(	char(&)	buf[N],
		const char *	fmt,
		Args &&...	args
	)

Redirect sprintf to snprintf if the buffer size can be deduced.

This is useful to avoid deprecation warnings for sprintf. In general it works if the buffer is something like char buf[512] in the calling scope, but not if it is char* or char buf[].

Definition at line 14 of file wrap_sprintf.h.

◆ SprintfAsrt()

template<std::size_t N, typename... Args>

void neuron::SprintfAsrt	(	char(&)	buf[N],
		const char *	fmt,
		Args &&...	args
	)

assert if the Sprintf format data does not fit into buf

Definition at line 27 of file wrap_sprintf.h.

◆ stride_length()

static size_t neuron::stride_length	(	size_t	begin,
		size_t	end,
		VecTNode &	nodevec
	)

static

Definition at line 589 of file cellorder1.cpp.

◆ tnode_earlier()

static bool neuron::tnode_earlier	(	TNode *	a,
		TNode *	b
	)

static

Function to order trees by size, hash and nodeindex.

Definition at line 48 of file cellorder1.cpp.

◆ tree_analysis()

void neuron::tree_analysis	(	int *	parent,
		int	nnode,
		int	ncell,
		VecTNode &	nodevec
	)

static

Perform tree preparation for interleaving strategies.

Parameters

parent	vector of parent indices
nnode	number of compartments in the cells
ncell	number of cells

Definition at line 454 of file cellorder1.cpp.

◆ triang_interleaved()

static void neuron::triang_interleaved	(	NrnThread *	nt,
		int	icell,
		int	icellsize,
		int	nstride,
		int *	stride,
		int *	lastnode
	)

static

Definition at line 550 of file cellorder.cpp.

◆ type_of_ntdata()

int neuron::type_of_ntdata	(	NrnThread &	,
		int	index,
		bool	reset
	)

◆ update_parent_index()

void neuron::update_parent_index	(	int *	vec,
		int	vec_size,
		const std::vector< int > &	permute
	)

Definition at line 378 of file node_permute.cpp.

◆ warp_balance() [1/2]

static void neuron::warp_balance	(	int	ith,
		InterleaveInfo &	ii
	)

static

Definition at line 259 of file cellorder.cpp.

◆ warp_balance() [2/2]

size_t neuron::warp_balance	(	size_t	ncell,
		VecTNode &	nodevec
	)

Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells.

Competing objectives are to keep identical cells together and also balance warps.

Parameters

ncell	number of cells
nodevec	vector of compartments from all cells

Returns: number of warps

Definition at line 52 of file balance.cpp.

◆ warpcmp()

bool neuron::warpcmp	(	const TNode *	a,
		const TNode *	b
	)

Definition at line 36 of file balance.cpp.

Variable Documentation

◆ bksub_root

auto const neuron::bksub_root = root + ic

Definition at line 667 of file cellorder.cpp.

◆ cellorder_nwarp

int neuron::cellorder_nwarp = 0

Definition at line 33 of file balance.cpp.

◆ cellsize

int* neuron::cellsize = ii.cellsize

Definition at line 793 of file cellorder.cpp.

◆ firstnode

int * neuron::firstnode = nodebegin[iwarp]

Definition at line 624 of file cellorder.cpp.

◆ groupsize

size_t neuron::groupsize = 32

static

Definition at line 43 of file cellorder1.cpp.

◆ has_subtrees_to_compute

neuron::has_subtrees_to_compute = true

Definition at line 634 of file cellorder.cpp.

◆ ic

int neuron::ic = icore & (warpsize - 1)

Definition at line 619 of file cellorder.cpp.

◆ icore

int neuron::icore

Definition at line 606 of file cellorder.cpp.

◆ ii

InterleaveInfo & neuron::ii = lastnode - istride + ic

Definition at line 631 of file cellorder.cpp.

◆ interleave_info

InterleaveInfo* neuron::interleave_info

Definition at line 43 of file cellorder.cpp.

◆ interleave_permute_type

int neuron::interleave_permute_type

Definition at line 42 of file cellorder.cpp.

◆ istride

neuron::istride = stride[icycle]

Definition at line 630 of file cellorder.cpp.

◆ iwarp

int neuron::iwarp = icore / warpsize

Definition at line 618 of file cellorder.cpp.

◆ lastnode

int * neuron::lastnode = nodebegin[iwarp + 1]

Definition at line 625 of file cellorder.cpp.

◆ lastroot

int neuron::lastroot = rootbegin[iwarp + 1]

Definition at line 623 of file cellorder.cpp.

◆ ncell

icycle< ncycle; ++icycle) { int istride = stride[icycle]; nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for (int icore = 0; icore < warpsize; ++icore) { int i = ii + icore; if (icore < istride) { int ip = GPU_PARENT(i); GPU_RHS(i) -= GPU_B(i) * GPU_RHS(ip); GPU_RHS(i) /= GPU_D(i); } i += istride; } ii += istride; } }}void solve_interleaved2(int ith) { NrnThread* nt = nrn_threads + ith; InterleaveInfo& ii = interleave_info[ith]; int nwarp = ii.nwarp; if (nwarp == 0) return; int ncore = nwarp * warpsize; int* ncycles = ii.cellsize; int* stridedispl = ii.stridedispl; int* strides = ii.stride; int* rootbegin = ii.firstnode; int* nodebegin = ii.lastnode; if (0) { nrn_pragma_acc(parallel loop gang present(nt [0:1], strides [0:nstride], ncycles [0:nwarp], stridedispl [0:nwarp + 1], rootbegin [0:nwarp + 1], nodebegin [0:nwarp + 1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc: nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp + 1], rootbegin[:nwarp + 1], nodebegin[:nwarp + 1])) for (int icore = 0; icore < ncore; icore += warpsize) { solve_interleaved2_loop_body( nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin); } nrn_pragma_acc(wait(nt->stream_id)) } else { for (int icore = 0; icore < ncore; icore += warpsize) { solve_interleaved2_loop_body( nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin); } }}void solve_interleaved1(int ith) { NrnThread* nt = nrn_threads + ith; int ncell = nt-> neuron::ncell

Definition at line 784 of file cellorder.cpp.

◆ ncycle

int neuron::ncycle = ncycles[iwarp]

Definition at line 620 of file cellorder.cpp.

◆ ncycles

int int* neuron::ncycles

Definition at line 607 of file cellorder.cpp.

◆ nodebegin

int int int int int int* neuron::nodebegin

Initial value:

{
 
    auto* const vec_a = nt->node_a_storage()

Definition at line 611 of file cellorder.cpp.

◆ nstride

int neuron::nstride = ii.nstride

Definition at line 789 of file cellorder.cpp.

◆ root

int neuron::root = rootbegin[iwarp]

Definition at line 622 of file cellorder.cpp.

◆ rootbegin

int int int int int* neuron::rootbegin

Definition at line 610 of file cellorder.cpp.

◆ stride

int * neuron::stride = strides + stridedispl[iwarp]

Definition at line 621 of file cellorder.cpp.

◆ stridedispl

int int int int* neuron::stridedispl

Definition at line 609 of file cellorder.cpp.

◆ strides

int int int* neuron::strides

Definition at line 608 of file cellorder.cpp.

◆ vec_b

auto* const neuron::vec_b = nt->node_b_storage()

Definition at line 614 of file cellorder.cpp.

◆ vec_d

auto* const neuron::vec_d = nt->node_d_storage()

Definition at line 615 of file cellorder.cpp.

◆ vec_rhs

auto* const neuron::vec_rhs = nt->node_rhs_storage()

Definition at line 616 of file cellorder.cpp.

Namespaces

Classes

Typedefs

Functions

Variables

Detailed Description

Typedef Documentation

◆ HashCnt

◆ TNI

◆ TNIVec

◆ VecTNode

◆ VTN

◆ VVTN

◆ VVVTN

Function Documentation

◆ admin1()

◆ admin2()

◆ allocate_unified()

◆ allocate_unique()

◆ analyze()

◆ bksub_interleaved()

◆ check()

◆ checkrace()

◆ chklevel()

◆ copy_align_array()

◆ copy_array()

◆ create_interleave_info()

◆ deallocate_unified()

◆ destroy_interleave_info()

◆ dist2child()

◆ eliminate_crace()

◆ eliminate_prace()

◆ eliminate_race()

◆ final_nodevec_cmp()

◆ gpu_enabled()

◆ group_order2()

◆ ident_statistic()

◆ if()

◆ interleave_comp()

◆ interleave_order()

◆ inverse_permute()

◆ invert_permute()

◆ is_child_race2()

◆ is_parent_race2()

◆ level_from_leaf()

◆ level_from_root()

◆ model()

◆ move_nodes()

◆ move_range()

◆ next_leaf()

◆ node_interleave_order()

◆ node_order()

◆ nrn_i_layout()

◆ nrn_index_sort()

◆ nrn_index_sort_cmp()

◆ nrn_optimize_node_order()

◆ nrn_permute_node_order()

◆ nrn_pragma_acc() [1/3]

◆ nrn_pragma_acc() [2/3]

◆ nrn_pragma_acc() [3/3]

◆ nrn_soa_padded_size()

◆ operator!=()

◆ operator==()

◆ permute()

◆ permute_data()

◆ permute_ml()

◆ permute_nodeindices()

◆ permute_ptr()

◆ prgroupsize()

◆ print_quality1()

◆ print_quality2()

◆ prnode()

◆ ptr_tnode_earlier()

◆ quality()

◆ question2()

◆ set_cellindex()

◆ set_groupindex()

◆ set_nodeindex()

◆ set_treenode_order()

◆ solve_interleaved()