3466/doxygen/node__permute_8cpp_source.html

 /*

 # =============================================================================

 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL

 #

 # See top-level LICENSE file for details.

 # =============================================================================

 */


 /*

 Below, the sense of permutation, is reversed. Though consistent, forward

 permutation should be defined as (and the code should eventually transformed)

 so that

   v: original vector

   p: forward permutation

   pv: permuted vector

   pv[i] = v[p[i]]

 and

   pinv: inverse permutation

   pv[pinv[i]] = v[i]

 Note: pinv[p[i]] = i = p[pinv[i]]

 */


 /*

 Permute nodes.


 To make gaussian elimination on gpu more efficient.


 Permutation vector p[i] applied to a data vector, moves the data_original[i]

 to data[p[i]].

 That suffices for node properties such as area[i], a[i], b[i]. e.g.

   area[p[i]] <- area_original[i]


 Notice that p on the left side is a forward permutation. On the right side

 it serves as the inverse permutation.

 area_original[i] <- area_permuted[p[i]]


 but things

 get a bit more complicated when the data is an integer index into the

 original data.


 For example:


 parent[i] needs to be transformed so that

 parent[p[i]] <- p[parent_original[i]] except that if parent_original[j] = -1

   then parent[p[j]] = -1


 membrane mechanism nodelist ( a subset of nodes) needs to be at least

 minimally transformed so that

 nodelist_new[k] <- p[nodelist_original[k]]

 This does not affect the order of the membrane mechanism property data.


 However, computation is more efficient to permute (sort) nodelist_new so that

 it follows as much as possible the permuted node ordering, ie in increasing

 node order.  Consider this further mechanism specific nodelist permutation,

 which is to be applied to the above nodelist_new, to be p_m, which has the same

 size as nodelist. ie.

 nodelist[p_m[k]] <- nodelist_new[k].


 Notice the similarity to the parent case...

 nodelist[p_m[k]] = p[nodelist_original[k]]


 and now the membrane mechanism node data, does need to be permuted to have an

 order consistent with the new nodelist. Since there are nm instances of the

 mechanism each with sz data values (consider AoS layout).

 The data permutation is

 for k=[0:nm] for isz=[0:sz]

   data_m[p_m[k]*sz + isz] = data_m_original[k*sz + isz]


 For an SoA layout the indexing is k + isz*nm (where nm may include padding).


 A more complicated case is a mechanisms dparam array (nm instances each with

 dsz values) Some of those values are indices into another mechanism (eg

 pointers to ion properties) or voltage or area depending on the semantics of

 the value. We can use the above data_m permutation but then need to update

 the values according to the permutation of the object the value indexes into.

 Consider the permutation of the target object to be p_t . Then a value

 iold = pdata_m(k, isz) - data_t in AoS format

 refers to k_t = iold % sz_t and isz_t = iold - k_t*sz_t

 and for a target in SoA format isz_t = iold % nm_t and k_t = iold - isz_t*nm_t

 ie k_t_new = p_m_t[k_t] so, for AoS, inew = k_t_new*sz_t + isz_t

 or , for SoA, inew = k_t_new + isz_t*nm_t

 so pdata_m(k, isz) = inew + data_t


 */


 #include <vector>

 #include <utility>

 #include <algorithm>


 #if CORENRN_BUILD

 #include "coreneuron/sim/multicore.hpp"

 #include "coreneuron/io/nrn_setup.hpp"

 #include "coreneuron/nrniv/nrniv_decl.h"

 #include "coreneuron/utils/nrn_assert.h"

 #include "coreneuron/coreneuron.hpp"

 #include "nrnoc/ion_semantics.h"

 #else

 #include "nrnoc/multicore.h"

 #include "oc/nrnassrt.h"

 #include "node_order_optim/permute_utils.hpp"

 #endif


 #if CORENRN_BUILD

 namespace coreneuron {

 #else

 namespace neuron {

 #endif


 #if !CORENRN_BUILD

 static int nrn_soa_padded_size(int cnt, int layout) {

     assert(layout == 1);

     return cnt;

 }

 static int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout) {

     assert(isz == 0);

     assert(sz == 1);

     assert(layout == 1);

     return icnt;

 }

 #endif  // !CORENRN_BUILD


 template <typename T>

 void permute(T* data, int cnt, int sz, int layout, int* p) {

     // data(p[icnt], isz) <- data(icnt, isz)

     // this does not change data, merely permutes it.

     // assert len(p) == cnt

     if (!p) {

         return;

     }

     int n = cnt * sz;

     if (n < 1) {

         return;

     }


 #if CORENRN_BUILD

     if (layout == Layout::SoA) {  // for SoA, n might be larger due to cnt padding

         n = nrn_soa_padded_size(cnt, layout) * sz;

     }

 #endif


     T* data_orig = new T[n];

     for (int i = 0; i < n; ++i) {

         data_orig[i] = data[i];

     }


     for (int icnt = 0; icnt < cnt; ++icnt) {

         for (int isz = 0; isz < sz; ++isz) {

             // note that when layout==0, nrn_i_layout takes into account SoA padding.

             int i = nrn_i_layout(icnt, cnt, isz, sz, layout);

             int ip = nrn_i_layout(p[icnt], cnt, isz, sz, layout);

             data[ip] = data_orig[i];

         }

     }


     delete[] data_orig;

 }


 int* inverse_permute(int* p, int n) {

     int* pinv = new int[n];

     for (int i = 0; i < n; ++i) {

         pinv[p[i]] = i;

     }

     return pinv;

 }


 static void invert_permute(int* p, int n) {

     int* pinv = inverse_permute(p, n);

     for (int i = 0; i < n; ++i) {

         p[i] = pinv[i];

     }

     delete[] pinv;

 }


 // type_of_ntdata: Return the mechanism type (or voltage)  for nt._data[i].

 // Used for updating POINTER. Analogous to nrn_dblpntr2nrncore in NEURON.

 // To reduce search time, consider voltage first, then a few of the previous

 // search results.

 // type_hint first and store a few

 // of the previous search result types to try next.

 // Most usage is for voltage. Most of the rest is likely for a specific type.

 // Occasionally, eg. axial current, there are two types oscillationg between

 // a SUFFIX (for non-zero area node) and POINT_PROCESS (for zero area nodes)

 // version

 // full_search: helper for type_of_ntdata. Return mech type for nt._data[i].

 // Update type_hints.


 #if CORENRN_BUILD

 static std::vector<int> type_hints;


 static int full_search(NrnThread& nt, double* pd) {

     int type = -1;

     for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) {

         Memb_list* ml = tml->ml;

         int n = corenrn.get_prop_param_size()[tml->index] * ml->_nodecount_padded;

         if (pd >= ml->data && pd < ml->data + n) {

             type = tml->index;

             // insert into type_hints

             int i = 0;

             for (int type_hint: type_hints) {

                 if (type < type_hint) {

                     break;

                 }

                 i++;

             }

             type_hints.insert(type_hints.begin() + i, type);

             break;

         }

     }

     assert(type > 0);

     return type;

 }


 // no longer static because also used by POINTER in nrn_checkpoint.cpp

 int type_of_ntdata(NrnThread& nt, int i, bool reset) {

     double* pd = nt._data + i;

     assert(pd >= nt._actual_v);

     if (pd < nt._actual_area) {  // voltage first (area just after voltage)

         return voltage;

     }

     assert(size_t(i) < nt._ndata);

     // then check the type hints. When inserting a hint, keep in type order

     if (reset) {

         type_hints.clear();

     }

     for (int type: type_hints) {

         Memb_list* ml = nt._ml_list[type];

         if (pd >= ml->data) {  // this or later

             int n = corenrn.get_prop_param_size()[type] * ml->_nodecount_padded;

             if (pd < ml->data + n) {  // this is the one

                 return type;

             }

         } else {  // earlier

             return full_search(nt, pd);

         }

     }

     // after the last type_hints

     return full_search(nt, pd);

 }

 #endif  // CORENRN_BUILD


 #if CORENRN_BUILD

 static void update_pdata_values(Memb_list* ml, int type, NrnThread& nt) {

     // assumes AoS to SoA transformation already made since we are using

     // nrn_i_layout to determine indices into both ml->pdata and into target data

     int psz = corenrn.get_prop_dparam_size()[type];

     if (psz == 0) {

         return;

     }

     if (corenrn.get_is_artificial()[type]) {

         return;

     }

     int* semantics = corenrn.get_memb_func(type).dparam_semantics;

     if (!semantics) {

         return;

     }

     int* pdata = ml->pdata;

     int layout = corenrn.get_mech_data_layout()[type];

     int cnt = ml->nodecount;

     // ml padding does not matter (but target padding does matter)


     // interesting semantics are -1 (area), -5 (pointer), -9 (diam), or 0-999 (ion variables)

     for (int i = 0; i < psz; ++i) {

         int s = semantics[i];

         if (s == -1) {                               // area

             int area0 = nt._actual_area - nt._data;  // includes padding if relevant

             int* p_target = nt._permute;

             for (int iml = 0; iml < cnt; ++iml) {

                 int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);

                 // *pd is the original integer into nt._data . Needs to be replaced

                 // by the permuted value


                 // This is ok whether or not area changed by padding?

                 // since old *pd updated appropriately by earlier AoS to SoA

                 // transformation

                 int ix = *pd - area0;  // original integer into area array.

                 nrn_assert((ix >= 0) && (ix < nt.end));

                 int ixnew = p_target[ix];

                 *pd = ixnew + area0;

             }

         } else if (s == -9) {                        // diam

             int diam0 = nt._actual_diam - nt._data;  // includes padding if relevant

             int* p_target = nt._permute;

             for (int iml = 0; iml < cnt; ++iml) {

                 int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);

                 // *pd is the original integer into nt._data . Needs to be replaced

                 // by the permuted value


                 // This is ok whether or not diam changed by padding?

                 // since old *pd updated appropriately by earlier AoS to SoA

                 // transformation

                 int ix = *pd - diam0;  // original integer into actual_diam array.

                 nrn_assert((ix >= 0) && (ix < nt.end));

                 int ixnew = p_target[ix];

                 *pd = ixnew + diam0;

             }

         } else if (s == -5) {  // POINTER

             // assume pointer into nt._data. Most likely voltage.

             // If not voltage, most likely same mechanism for all indices.

             for (int iml = 0; iml < cnt; ++iml) {

                 int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);

                 int etype = type_of_ntdata(nt, *pd, iml == 0);

                 if (etype == voltage) {

                     int v0 = nt._actual_v - nt._data;

                     int* e_target = nt._permute;

                     int ix = *pd - v0;  // original integer into area array.

                     nrn_assert((ix >= 0) && (ix < nt.end));

                     int ixnew = e_target[ix];

                     *pd = ixnew + v0;

                 } else if (etype > 0) {

                     // about same as for ion below but check each instance

                     Memb_list* eml = nt._ml_list[etype];

                     int edata0 = eml->data - nt._data;

                     int ecnt = eml->nodecount;

                     int esz = corenrn.get_prop_param_size()[etype];

                     int elayout = corenrn.get_mech_data_layout()[etype];

                     int* e_permute = eml->_permute;

                     int i_ecnt, i_esz, padded_ecnt;

                     int ix = *pd - edata0;

                     if (elayout == Layout::AoS) {

                         padded_ecnt = ecnt;

                         i_ecnt = ix / esz;

                         i_esz = ix % esz;

                     } else {  // SoA

                         assert(elayout == Layout::SoA);

                         padded_ecnt = nrn_soa_padded_size(ecnt, elayout);

                         i_ecnt = ix % padded_ecnt;

                         i_esz = ix / padded_ecnt;

                     }

                     int i_ecnt_new = e_permute ? e_permute[i_ecnt] : i_ecnt;

                     int ix_new = nrn_i_layout(i_ecnt_new, ecnt, i_esz, esz, elayout);

                     *pd = ix_new + edata0;

                 } else {

                     nrn_assert(0);

                 }

             }

         } else if (nrn_semantics_is_ion(s)) {  // ion

             int etype = nrn_semantics_ion_type(s);

             int elayout = corenrn.get_mech_data_layout()[etype];

             Memb_list* eml = nt._ml_list[etype];

             int edata0 = eml->data - nt._data;

             int ecnt = eml->nodecount;

             int esz = corenrn.get_prop_param_size()[etype];

             int* e_permute = eml->_permute;

             for (int iml = 0; iml < cnt; ++iml) {

                 int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);

                 int ix = *pd - edata0;

                 // from ix determine i_ecnt and i_esz (need to permute i_ecnt)

                 int i_ecnt, i_esz, padded_ecnt;

                 if (elayout == Layout::AoS) {

                     padded_ecnt = ecnt;

                     i_ecnt = ix / esz;

                     i_esz = ix % esz;

                 } else {  // SoA

                     assert(elayout == Layout::SoA);

                     padded_ecnt = nrn_soa_padded_size(ecnt, elayout);

                     i_ecnt = ix % padded_ecnt;

                     i_esz = ix / padded_ecnt;

                 }

                 int i_ecnt_new = e_permute[i_ecnt];

                 int ix_new = nrn_i_layout(i_ecnt_new, ecnt, i_esz, esz, elayout);

                 *pd = ix_new + edata0;

             }

         }

     }

 }


 void node_permute(int* vec, int n, int* permute) {

     for (int i = 0; i < n; ++i) {

         if (vec[i] >= 0) {

             vec[i] = permute[vec[i]];

         }

     }

 }


 #else  // not CORENRN_BUILD


 void update_parent_index(int* vec, int vec_size, const std::vector<int>& permute) {

     for (int i = 0; i < vec_size; ++i) {

         if (vec[i] >= 0) {

             vec[i] = permute[vec[i]];

         }

     }

 }


 #endif  // not CORENRN_BUILD


 void permute_ptr(int* vec, int n, int* p) {

     permute(vec, n, 1, 1, p);

 }


 void permute_data(double* vec, int n, int* p) {

     permute(vec, n, 1, 1, p);

 }


 #if CORENRN_BUILD

 void permute_ml(Memb_list* ml, int type, NrnThread& nt) {

     int sz = corenrn.get_prop_param_size()[type];

     int psz = corenrn.get_prop_dparam_size()[type];

     int layout = corenrn.get_mech_data_layout()[type];

     permute(ml->data, ml->nodecount, sz, layout, ml->_permute);

     permute(ml->pdata, ml->nodecount, psz, layout, ml->_permute);


     update_pdata_values(ml, type, nt);

 }

 #endif  // CORENRN_BUILD


 #if CORENRN_DEBUG

 static void pr(const char* s, int* x, int n) {

     printf("%s:", s);

     for (int i = 0; i < n; ++i) {

         printf("  %d %d", i, x[i]);

     }

     printf("\n");

 }


 static void pr(const char* s, double* x, int n) {

     printf("%s:", s);

     for (int i = 0; i < n; ++i) {

         printf("  %d %g", i, x[i]);

     }

     printf("\n");

 }

 #endif


 // note that sort_indices has the sense of an inverse permutation in that

 // the value of sort_indices[0] is the index with the smallest value in the

 // indices array


 static bool nrn_index_sort_cmp(const std::pair<int, int>& a, const std::pair<int, int>& b) {

     bool result = false;

     if (a.first < b.first) {

         result = true;

     } else if (a.first == b.first) {

         if (a.second < b.second) {

             result = true;

         }

     }

     return result;

 }


 #if CORENRN_BUILD

 static int* nrn_index_sort(int* values, int n) {

 #else

 std::vector<int> nrn_index_sort(int* values, int n) {

 #endif

     std::vector<std::pair<int, int>> vi(n);

     for (int i = 0; i < n; ++i) {

         vi[i].first = values[i];

         vi[i].second = i;

     }

     std::sort(vi.begin(), vi.end(), nrn_index_sort_cmp);

 #if CORENRN_BUILD

     int* sort_indices = new int[n];

 #else

     std::vector<int> sort_indices(n);

 #endif

     for (int i = 0; i < n; ++i) {

         sort_indices[i] = vi[i].second;

     }

     return sort_indices;

 }


 #if !CORENRN_BUILD

 void sort_ml(Memb_list* ml) {

     auto isrt = nrn_index_sort(ml->nodeindices, ml->nodecount);

     forward_permute(ml->nodeindices, ml->nodecount, isrt);

     forward_permute(ml->nodelist, ml->nodecount, isrt);

     forward_permute(ml->prop, ml->nodecount, isrt);

     forward_permute(ml->pdata, ml->nodecount, isrt);

 }

 #endif  // !CORENRN_BUILD


 #if CORENRN_BUILD

 void permute_nodeindices(Memb_list* ml, int* p) {

     // nodeindices values are permuted according to p (that per se does

     //  not affect vec).


     node_permute(ml->nodeindices, ml->nodecount, p);


     // Then the new node indices are sorted by

     // increasing index. Instances using the same node stay in same

     // original relative order so that their contributions to rhs, d (if any)

     // remain in same order (except for gpu parallelism).

     // That becomes ml->_permute


     ml->_permute = nrn_index_sort(ml->nodeindices, ml->nodecount);

     invert_permute(ml->_permute, ml->nodecount);

     permute_ptr(ml->nodeindices, ml->nodecount, ml->_permute);

 }

 #endif  // CORENRN_BUILD


 }  // namespace coreneuron

coreneuron::CoreNeuron::get_memb_func
auto & get_memb_func(size_t idx)
Definition: coreneuron.hpp:135

coreneuron::CoreNeuron::get_prop_dparam_size
auto & get_prop_dparam_size()
Definition: coreneuron.hpp:171

coreneuron::CoreNeuron::get_mech_data_layout
auto & get_mech_data_layout()
Definition: coreneuron.hpp:175

coreneuron::CoreNeuron::get_is_artificial
auto & get_is_artificial()
Definition: coreneuron.hpp:179

coreneuron::CoreNeuron::get_prop_param_size
auto & get_prop_param_size()
Definition: coreneuron.hpp:167

cnt
#define cnt
Definition: tqueue.hpp:44

i
#define i
Definition: md1redef.h:19

pdata
#define pdata
Definition: md1redef.h:37

coreneuron.hpp

assert
#define assert(ex)
Definition: hocassrt.h:24

ion_semantics.h

nrn_semantics_is_ion
bool nrn_semantics_is_ion(int i)
Definition: ion_semantics.h:6

nrn_semantics_ion_type
int nrn_semantics_ion_type(int i)
Definition: ion_semantics.h:12

printf
printf
Definition: extdef.h:5

multicore.h

multicore.hpp

coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12

coreneuron::nrn_i_layout
int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout)
This function return the index in a flat array of a matrix coordinate (icnt, isz).
Definition: mem_layout_util.cpp:34

coreneuron::voltage
@ voltage
Definition: nrniv_decl.h:19

coreneuron::inverse_permute
int * inverse_permute(int *p, int n)

coreneuron::AoS
@ AoS
Definition: nrniv_decl.h:70

coreneuron::SoA
@ SoA
Definition: nrniv_decl.h:70

coreneuron::corenrn
CoreNeuron corenrn
Definition: multicore.cpp:53

coreneuron::nrn_soa_padded_size
int nrn_soa_padded_size(int cnt, int layout)
calculate size after padding for specific memory layout
Definition: mem_layout_util.cpp:17

coreneuron::permute
static int permute(int i, NrnThread &nt)
Definition: prcellstate.cpp:28

neuron
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
Definition: tnode.hpp:17

neuron::permute_nodeindices
void permute_nodeindices(Memb_list *ml, int *permute)

neuron::permute_ml
void permute_ml(Memb_list *ml, int type, NrnThread &nt)

neuron::permute_data
void permute_data(double *vec, int n, int *p)
Definition: node_permute.cpp:392

neuron::invert_permute
static void invert_permute(int *p, int n)
Definition: node_permute.cpp:167

neuron::nrn_index_sort
std::vector< int > nrn_index_sort(int *values, int n)
Definition: node_permute.cpp:445

neuron::nrn_index_sort_cmp
static bool nrn_index_sort_cmp(const std::pair< int, int > &a, const std::pair< int, int > &b)
Definition: node_permute.cpp:430

neuron::update_parent_index
void update_parent_index(int *vec, int vec_size, const std::vector< int > &permute)
Definition: node_permute.cpp:378

neuron::sort_ml
void sort_ml(Memb_list *ml)
Definition: node_permute.cpp:465

neuron::permute_ptr
void permute_ptr(int *vec, int n, int *p)
Definition: node_permute.cpp:388

neuron::type_of_ntdata
int type_of_ntdata(NrnThread &, int index, bool reset)

nrn_assert.h

nrn_assert
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
Definition: nrn_assert.h:33

nrn_setup.hpp

nrnassrt.h

n
int const size_t const size_t n
Definition: nrngsl.h:10

result
result
Definition: nrngsl_hc_radix2.cpp:61

p
size_t p
Definition: nrngsl_hc_radix2.cpp:49

s
s
Definition: multisend.cpp:521

nrniv_decl.h

type
short type
Definition: cabvars.h:10

pr
static void pr(N_Vector x)
Definition: nvector_nrnthread.cpp:399

permute_utils.hpp

forward_permute
void forward_permute(std::vector< T > &data, const std::vector< int > &perm)
Definition: permute_utils.hpp:5

vi
void vi(double *p1, double *p2, double v1, double v2, double *out)
Definition: rxd_marching_cubes.cpp:295

Memb_list
A view into a set of mechanism instances.
Definition: nrnoc_ml.h:34

Memb_list::nodecount
int nodecount
Definition: nrnoc_ml.h:78

Memb_list::nodeindices
int * nodeindices
Definition: nrnoc_ml.h:74

Memb_list::nodelist
Node ** nodelist
Definition: nrnoc_ml.h:68

Memb_list::pdata
Datum ** pdata
Definition: nrnoc_ml.h:75

Memb_list::data
std::vector< double * > data()
Get a vector of double* representing the model data.
Definition: memblist.cpp:64

Memb_list::prop
Prop ** prop
Definition: nrnoc_ml.h:76

NrnThread
Represent main neuron object computed by single thread.
Definition: multicore.h:58

NrnThread::tml
NrnThreadMembList * tml
Definition: multicore.h:62

NrnThread::end
int end
Definition: multicore.h:65

NrnThread::_ml_list
Memb_list ** _ml_list
Definition: multicore.h:63

NrnThreadMembList
Definition: multicore.h:33

NrnThreadMembList::next
struct NrnThreadMembList * next
Definition: multicore.h:34

data
Definition: alignment.cpp:18