3466/doxygen/cellorder_8hpp_source.html

 /*

 # =============================================================================

 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL

 #

 # See top-level LICENSE file for details.

 # =============================================================================

 */


 #pragma once


 #include "coreneuron/utils/memory.h"

 #include <algorithm>

 #include <vector>


 #if CORENRN_BUILD

 namespace coreneuron {

 #else

 namespace neuron {

 #endif


 /**

  * \brief Function that performs the permutation of the cells such that the

  *        execution threads access coalesced memory.

  *

  * \param ith NrnThread to access

  * \param ncell number of cells in NrnThread

  * \param nnode number of compartments in the ncells

  * \param parent parent indices of cells

  *

  * \return int* order, interleaved order of the cells

  */

 #if CORENRN_BUILD

 int* interleave_order(int ith, int ncell, int nnode, int* parent);

 #else

 std::vector<int> interleave_order(int ith, int ncell, int nnode, int* parent);

 #endif


 void create_interleave_info();

 void destroy_interleave_info();


 #if CORENRN_BUILD

 /**

  *

  * \brief Solve the Hines matrices based on the interleave_permute_type (1 or 2).

  *

  * For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one

  * Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix

  * is solved by multiple execution threads (with coalesced memory access as well)

  */

 extern void solve_interleaved(int ith);

 #endif


 class InterleaveInfo;  // forward declaration

 #if CORENRN_BUILD

 /**

  *

  * \brief CUDA branch of the solve_interleaved with interleave_permute_type == 2.

  *

  * This branch is activated in runtime with the --cuda-interface CLI flag

  */

 void solve_interleaved2_launcher(NrnThread* nt, InterleaveInfo* info, int ncore, void* stream);

 #endif


 class InterleaveInfo: public MemoryManaged {

   public:

     InterleaveInfo() = default;

     InterleaveInfo(const InterleaveInfo&);

     InterleaveInfo& operator=(const InterleaveInfo&);

     ~InterleaveInfo();

     int nwarp = 0;  // used only by interleave2

     int nstride = 0;

     int* stridedispl = nullptr;  // interleave2: nwarp+1

     int* stride = nullptr;       // interleave2: stride  length is stridedispl[nwarp]

     int* firstnode = nullptr;    // interleave2: rootbegin nwarp+1 displacements

     int* lastnode = nullptr;     // interleave2: nodebegin nwarp+1 displacements

     int* cellsize = nullptr;     // interleave2: ncycles nwarp


     // statistics (nwarp of each)

     size_t* nnode = nullptr;

     size_t* ncycle = nullptr;

     size_t* idle = nullptr;

     size_t* cache_access = nullptr;

     size_t* child_race = nullptr;


   private:

     void swap(InterleaveInfo& info);

 };


 /**

  * \brief Function that returns a permutation of length nnode.

  *

  * There are two permutation strategies:

  * For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one

  * Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix

  * is solved by multiple execution threads (with coalesced memory access as well)

  *

  * \param ncell number of cells

  * \param nnode number of compartments in the ncells

  * \param parents parent indices of the cells

  * \param nwarp number of warps

  * \param nstride nstride is the maximum cell size (not counting root)

  * \param stride stride[i] is the number of cells with an ith node:

  *               using stride[i] we know how many positions to move in order to

  *               access the next element of the same cell (given that the cells are

  *               ordered with the treenode_order).

  * \param firstnode firstnode[i] is the index of the first nonroot node of the cell

  * \param lastnode lastnode[i] is the index of the last node of the cell

  * \param cellsize cellsize is the number of nodes in the cell not counting root.

  * \param stridedispl

  * \return int* : a permutation of length nnode

  */

 #if CORENRN_BUILD

 int* node_order(int ncell,

 #else

 std::vector<int> node_order(int ncell,

 #endif

                 int nnode,

                 int* parents,

                 int& nwarp,

                 int& nstride,

                 int*& stride,

                 int*& firstnode,

                 int*& lastnode,

                 int*& cellsize,

                 int*& stridedispl);


 // copy src array to dest with new allocation

 template <typename T>

 void copy_array(T*& dest, T* src, size_t n) {

     dest = new T[n];

     std::copy(src, src + n, dest);

 }


 // copy src array to dest with NRN_SOA_BYTE_ALIGN ecalloc_align allocation

 template <typename T>

 void copy_align_array(T*& dest, T* src, size_t n) {

     dest = static_cast<T*>(ecalloc_align(n, sizeof(T)));

     std::copy(src, src + n, dest);

 }


 #ifndef INTERLEAVE_DEBUG

 #define INTERLEAVE_DEBUG 0

 #endif


 #if INTERLEAVE_DEBUG

 void mk_cell_indices();

 #endif

 }  // namespace coreneuron

MemoryManaged
for gpu builds with unified memory support
Definition: memory.h:181

neuron::InterleaveInfo
Definition: cellorder.hpp:64

neuron::InterleaveInfo::InterleaveInfo
InterleaveInfo()=default

memory.h

coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12

coreneuron::ecalloc_align
void * ecalloc_align(size_t n, size_t size, size_t alignment)

neuron
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
Definition: tnode.hpp:17

neuron::ncell
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;} }}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;if(0) { nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) async(nt->stream_id)) nrn_pragma_omp(target teams loop map(present, alloc:nt[:1], strides[:nstride], ncycles[:nwarp], stridedispl[:nwarp+1], rootbegin[:nwarp+1], nodebegin[:nwarp+1])) for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} nrn_pragma_acc(wait(nt->stream_id)) } else { for(int icore=0;icore< ncore;icore+=warpsize) { solve_interleaved2_loop_body(nt, icore, ncycles, strides, stridedispl, rootbegin, nodebegin);} }}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
Definition: cellorder.cpp:784

neuron::ncycle
int ncycle
Definition: cellorder.cpp:620

neuron::firstnode
int firstnode
Definition: cellorder.cpp:624

neuron::interleave_order
std::vector< int > interleave_order(int ith, int ncell, int nnode, int *parent)
Function that performs the permutation of the cells such that the execution threads access coalesced ...
Definition: cellorder.cpp:348

neuron::nstride
int nstride
Definition: cellorder.cpp:789

neuron::copy_array
void copy_array(T *&dest, T *src, size_t n)
Definition: cellorder.hpp:129

neuron::node_order
std::vector< int > node_order(int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl)
Function that returns a permutation of length nnode.
Definition: cellorder1.cpp:321

neuron::copy_align_array
void copy_align_array(T *&dest, T *src, size_t n)
Definition: cellorder.hpp:136

neuron::stride
int * stride
Definition: cellorder.cpp:621

neuron::create_interleave_info
void create_interleave_info()
Definition: cellorder.cpp:110

neuron::lastnode
int lastnode
Definition: cellorder.cpp:625

neuron::solve_interleaved
void solve_interleaved(int ith)
Solve the Hines matrices based on the interleave_permute_type (1 or 2).

neuron::stridedispl
int int int int * stridedispl
Definition: cellorder.cpp:609

neuron::cellsize
int * cellsize
Definition: cellorder.cpp:793

neuron::destroy_interleave_info
void destroy_interleave_info()
Definition: cellorder.cpp:115

info
static List * info
Definition: netrec_discon.cpp:64

n
int const size_t const size_t n
Definition: nrngsl.h:10

NrnThread
Represent main neuron object computed by single thread.
Definition: multicore.h:58