NEURON
node_permute.cpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 
9 /*
10 Below, the sense of permutation, is reversed. Though consistent, forward
11 permutation should be defined as (and the code should eventually transformed)
12 so that
13  v: original vector
14  p: forward permutation
15  pv: permuted vector
16  pv[i] = v[p[i]]
17 and
18  pinv: inverse permutation
19  pv[pinv[i]] = v[i]
20 Note: pinv[p[i]] = i = p[pinv[i]]
21 */
22 
23 /*
24 Permute nodes.
25 
26 To make gaussian elimination on gpu more efficient.
27 
28 Permutation vector p[i] applied to a data vector, moves the data_original[i]
29 to data[p[i]].
30 That suffices for node properties such as area[i], a[i], b[i]. e.g.
31  area[p[i]] <- area_original[i]
32 
33 Notice that p on the left side is a forward permutation. On the right side
34 it serves as the inverse permutation.
35 area_original[i] <- area_permuted[p[i]]
36 
37 but things
38 get a bit more complicated when the data is an integer index into the
39 original data.
40 
41 For example:
42 
43 parent[i] needs to be transformed so that
44 parent[p[i]] <- p[parent_original[i]] except that if parent_original[j] = -1
45  then parent[p[j]] = -1
46 
47 membrane mechanism nodelist ( a subset of nodes) needs to be at least
48 minimally transformed so that
49 nodelist_new[k] <- p[nodelist_original[k]]
50 This does not affect the order of the membrane mechanism property data.
51 
52 However, computation is more efficient to permute (sort) nodelist_new so that
53 it follows as much as possible the permuted node ordering, ie in increasing
54 node order. Consider this further mechanism specific nodelist permutation,
55 which is to be applied to the above nodelist_new, to be p_m, which has the same
56 size as nodelist. ie.
57 nodelist[p_m[k]] <- nodelist_new[k].
58 
59 Notice the similarity to the parent case...
60 nodelist[p_m[k]] = p[nodelist_original[k]]
61 
62 and now the membrane mechanism node data, does need to be permuted to have an
63 order consistent with the new nodelist. Since there are nm instances of the
64 mechanism each with sz data values (consider AoS layout).
65 The data permutation is
66 for k=[0:nm] for isz=[0:sz]
67  data_m[p_m[k]*sz + isz] = data_m_original[k*sz + isz]
68 
69 For an SoA layout the indexing is k + isz*nm (where nm may include padding).
70 
71 A more complicated case is a mechanisms dparam array (nm instances each with
72 dsz values) Some of those values are indices into another mechanism (eg
73 pointers to ion properties) or voltage or area depending on the semantics of
74 the value. We can use the above data_m permutation but then need to update
75 the values according to the permutation of the object the value indexes into.
76 Consider the permutation of the target object to be p_t . Then a value
77 iold = pdata_m(k, isz) - data_t in AoS format
78 refers to k_t = iold % sz_t and isz_t = iold - k_t*sz_t
79 and for a target in SoA format isz_t = iold % nm_t and k_t = iold - isz_t*nm_t
80 ie k_t_new = p_m_t[k_t] so, for AoS, inew = k_t_new*sz_t + isz_t
81 or , for SoA, inew = k_t_new + isz_t*nm_t
82 so pdata_m(k, isz) = inew + data_t
83 
84 
85 */
86 
87 #include <vector>
88 #include <utility>
89 #include <algorithm>
90 
91 #if CORENRN_BUILD
97 #include "nrnoc/ion_semantics.h"
98 #else
99 #include "nrnoc/multicore.h"
100 #include "oc/nrnassrt.h"
102 #endif
103 
104 #if CORENRN_BUILD
105 namespace coreneuron {
106 #else
107 namespace neuron {
108 #endif
109 
110 #if !CORENRN_BUILD
111 static int nrn_soa_padded_size(int cnt, int layout) {
112  assert(layout == 1);
113  return cnt;
114 }
115 static int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout) {
116  assert(isz == 0);
117  assert(sz == 1);
118  assert(layout == 1);
119  return icnt;
120 }
121 #endif // !CORENRN_BUILD
122 
123 template <typename T>
124 void permute(T* data, int cnt, int sz, int layout, int* p) {
125  // data(p[icnt], isz) <- data(icnt, isz)
126  // this does not change data, merely permutes it.
127  // assert len(p) == cnt
128  if (!p) {
129  return;
130  }
131  int n = cnt * sz;
132  if (n < 1) {
133  return;
134  }
135 
136 #if CORENRN_BUILD
137  if (layout == Layout::SoA) { // for SoA, n might be larger due to cnt padding
138  n = nrn_soa_padded_size(cnt, layout) * sz;
139  }
140 #endif
141 
142  T* data_orig = new T[n];
143  for (int i = 0; i < n; ++i) {
144  data_orig[i] = data[i];
145  }
146 
147  for (int icnt = 0; icnt < cnt; ++icnt) {
148  for (int isz = 0; isz < sz; ++isz) {
149  // note that when layout==0, nrn_i_layout takes into account SoA padding.
150  int i = nrn_i_layout(icnt, cnt, isz, sz, layout);
151  int ip = nrn_i_layout(p[icnt], cnt, isz, sz, layout);
152  data[ip] = data_orig[i];
153  }
154  }
155 
156  delete[] data_orig;
157 }
158 
159 int* inverse_permute(int* p, int n) {
160  int* pinv = new int[n];
161  for (int i = 0; i < n; ++i) {
162  pinv[p[i]] = i;
163  }
164  return pinv;
165 }
166 
167 static void invert_permute(int* p, int n) {
168  int* pinv = inverse_permute(p, n);
169  for (int i = 0; i < n; ++i) {
170  p[i] = pinv[i];
171  }
172  delete[] pinv;
173 }
174 
175 // type_of_ntdata: Return the mechanism type (or voltage) for nt._data[i].
176 // Used for updating POINTER. Analogous to nrn_dblpntr2nrncore in NEURON.
177 // To reduce search time, consider voltage first, then a few of the previous
178 // search results.
179 // type_hint first and store a few
180 // of the previous search result types to try next.
181 // Most usage is for voltage. Most of the rest is likely for a specific type.
182 // Occasionally, eg. axial current, there are two types oscillationg between
183 // a SUFFIX (for non-zero area node) and POINT_PROCESS (for zero area nodes)
184 // version
185 // full_search: helper for type_of_ntdata. Return mech type for nt._data[i].
186 // Update type_hints.
187 
188 #if CORENRN_BUILD
189 static std::vector<int> type_hints;
190 
191 static int full_search(NrnThread& nt, double* pd) {
192  int type = -1;
193  for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) {
194  Memb_list* ml = tml->ml;
195  int n = corenrn.get_prop_param_size()[tml->index] * ml->_nodecount_padded;
196  if (pd >= ml->data && pd < ml->data + n) {
197  type = tml->index;
198  // insert into type_hints
199  int i = 0;
200  for (int type_hint: type_hints) {
201  if (type < type_hint) {
202  break;
203  }
204  i++;
205  }
206  type_hints.insert(type_hints.begin() + i, type);
207  break;
208  }
209  }
210  assert(type > 0);
211  return type;
212 }
213 
214 // no longer static because also used by POINTER in nrn_checkpoint.cpp
215 int type_of_ntdata(NrnThread& nt, int i, bool reset) {
216  double* pd = nt._data + i;
217  assert(pd >= nt._actual_v);
218  if (pd < nt._actual_area) { // voltage first (area just after voltage)
219  return voltage;
220  }
221  assert(size_t(i) < nt._ndata);
222  // then check the type hints. When inserting a hint, keep in type order
223  if (reset) {
224  type_hints.clear();
225  }
226  for (int type: type_hints) {
227  Memb_list* ml = nt._ml_list[type];
228  if (pd >= ml->data) { // this or later
229  int n = corenrn.get_prop_param_size()[type] * ml->_nodecount_padded;
230  if (pd < ml->data + n) { // this is the one
231  return type;
232  }
233  } else { // earlier
234  return full_search(nt, pd);
235  }
236  }
237  // after the last type_hints
238  return full_search(nt, pd);
239 }
240 #endif // CORENRN_BUILD
241 
242 #if CORENRN_BUILD
243 static void update_pdata_values(Memb_list* ml, int type, NrnThread& nt) {
244  // assumes AoS to SoA transformation already made since we are using
245  // nrn_i_layout to determine indices into both ml->pdata and into target data
246  int psz = corenrn.get_prop_dparam_size()[type];
247  if (psz == 0) {
248  return;
249  }
250  if (corenrn.get_is_artificial()[type]) {
251  return;
252  }
253  int* semantics = corenrn.get_memb_func(type).dparam_semantics;
254  if (!semantics) {
255  return;
256  }
257  int* pdata = ml->pdata;
258  int layout = corenrn.get_mech_data_layout()[type];
259  int cnt = ml->nodecount;
260  // ml padding does not matter (but target padding does matter)
261 
262  // interesting semantics are -1 (area), -5 (pointer), -9 (diam), or 0-999 (ion variables)
263  for (int i = 0; i < psz; ++i) {
264  int s = semantics[i];
265  if (s == -1) { // area
266  int area0 = nt._actual_area - nt._data; // includes padding if relevant
267  int* p_target = nt._permute;
268  for (int iml = 0; iml < cnt; ++iml) {
269  int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);
270  // *pd is the original integer into nt._data . Needs to be replaced
271  // by the permuted value
272 
273  // This is ok whether or not area changed by padding?
274  // since old *pd updated appropriately by earlier AoS to SoA
275  // transformation
276  int ix = *pd - area0; // original integer into area array.
277  nrn_assert((ix >= 0) && (ix < nt.end));
278  int ixnew = p_target[ix];
279  *pd = ixnew + area0;
280  }
281  } else if (s == -9) { // diam
282  int diam0 = nt._actual_diam - nt._data; // includes padding if relevant
283  int* p_target = nt._permute;
284  for (int iml = 0; iml < cnt; ++iml) {
285  int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);
286  // *pd is the original integer into nt._data . Needs to be replaced
287  // by the permuted value
288 
289  // This is ok whether or not diam changed by padding?
290  // since old *pd updated appropriately by earlier AoS to SoA
291  // transformation
292  int ix = *pd - diam0; // original integer into actual_diam array.
293  nrn_assert((ix >= 0) && (ix < nt.end));
294  int ixnew = p_target[ix];
295  *pd = ixnew + diam0;
296  }
297  } else if (s == -5) { // POINTER
298  // assume pointer into nt._data. Most likely voltage.
299  // If not voltage, most likely same mechanism for all indices.
300  for (int iml = 0; iml < cnt; ++iml) {
301  int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);
302  int etype = type_of_ntdata(nt, *pd, iml == 0);
303  if (etype == voltage) {
304  int v0 = nt._actual_v - nt._data;
305  int* e_target = nt._permute;
306  int ix = *pd - v0; // original integer into area array.
307  nrn_assert((ix >= 0) && (ix < nt.end));
308  int ixnew = e_target[ix];
309  *pd = ixnew + v0;
310  } else if (etype > 0) {
311  // about same as for ion below but check each instance
312  Memb_list* eml = nt._ml_list[etype];
313  int edata0 = eml->data - nt._data;
314  int ecnt = eml->nodecount;
315  int esz = corenrn.get_prop_param_size()[etype];
316  int elayout = corenrn.get_mech_data_layout()[etype];
317  int* e_permute = eml->_permute;
318  int i_ecnt, i_esz, padded_ecnt;
319  int ix = *pd - edata0;
320  if (elayout == Layout::AoS) {
321  padded_ecnt = ecnt;
322  i_ecnt = ix / esz;
323  i_esz = ix % esz;
324  } else { // SoA
325  assert(elayout == Layout::SoA);
326  padded_ecnt = nrn_soa_padded_size(ecnt, elayout);
327  i_ecnt = ix % padded_ecnt;
328  i_esz = ix / padded_ecnt;
329  }
330  int i_ecnt_new = e_permute ? e_permute[i_ecnt] : i_ecnt;
331  int ix_new = nrn_i_layout(i_ecnt_new, ecnt, i_esz, esz, elayout);
332  *pd = ix_new + edata0;
333  } else {
334  nrn_assert(0);
335  }
336  }
337  } else if (nrn_semantics_is_ion(s)) { // ion
338  int etype = nrn_semantics_ion_type(s);
339  int elayout = corenrn.get_mech_data_layout()[etype];
340  Memb_list* eml = nt._ml_list[etype];
341  int edata0 = eml->data - nt._data;
342  int ecnt = eml->nodecount;
343  int esz = corenrn.get_prop_param_size()[etype];
344  int* e_permute = eml->_permute;
345  for (int iml = 0; iml < cnt; ++iml) {
346  int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);
347  int ix = *pd - edata0;
348  // from ix determine i_ecnt and i_esz (need to permute i_ecnt)
349  int i_ecnt, i_esz, padded_ecnt;
350  if (elayout == Layout::AoS) {
351  padded_ecnt = ecnt;
352  i_ecnt = ix / esz;
353  i_esz = ix % esz;
354  } else { // SoA
355  assert(elayout == Layout::SoA);
356  padded_ecnt = nrn_soa_padded_size(ecnt, elayout);
357  i_ecnt = ix % padded_ecnt;
358  i_esz = ix / padded_ecnt;
359  }
360  int i_ecnt_new = e_permute[i_ecnt];
361  int ix_new = nrn_i_layout(i_ecnt_new, ecnt, i_esz, esz, elayout);
362  *pd = ix_new + edata0;
363  }
364  }
365  }
366 }
367 
368 void node_permute(int* vec, int n, int* permute) {
369  for (int i = 0; i < n; ++i) {
370  if (vec[i] >= 0) {
371  vec[i] = permute[vec[i]];
372  }
373  }
374 }
375 
376 #else // not CORENRN_BUILD
377 
378 void update_parent_index(int* vec, int vec_size, const std::vector<int>& permute) {
379  for (int i = 0; i < vec_size; ++i) {
380  if (vec[i] >= 0) {
381  vec[i] = permute[vec[i]];
382  }
383  }
384 }
385 
386 #endif // not CORENRN_BUILD
387 
388 void permute_ptr(int* vec, int n, int* p) {
389  permute(vec, n, 1, 1, p);
390 }
391 
392 void permute_data(double* vec, int n, int* p) {
393  permute(vec, n, 1, 1, p);
394 }
395 
396 #if CORENRN_BUILD
397 void permute_ml(Memb_list* ml, int type, NrnThread& nt) {
398  int sz = corenrn.get_prop_param_size()[type];
399  int psz = corenrn.get_prop_dparam_size()[type];
400  int layout = corenrn.get_mech_data_layout()[type];
401  permute(ml->data, ml->nodecount, sz, layout, ml->_permute);
402  permute(ml->pdata, ml->nodecount, psz, layout, ml->_permute);
403 
404  update_pdata_values(ml, type, nt);
405 }
406 #endif // CORENRN_BUILD
407 
408 #if CORENRN_DEBUG
409 static void pr(const char* s, int* x, int n) {
410  printf("%s:", s);
411  for (int i = 0; i < n; ++i) {
412  printf(" %d %d", i, x[i]);
413  }
414  printf("\n");
415 }
416 
417 static void pr(const char* s, double* x, int n) {
418  printf("%s:", s);
419  for (int i = 0; i < n; ++i) {
420  printf(" %d %g", i, x[i]);
421  }
422  printf("\n");
423 }
424 #endif
425 
426 // note that sort_indices has the sense of an inverse permutation in that
427 // the value of sort_indices[0] is the index with the smallest value in the
428 // indices array
429 
430 static bool nrn_index_sort_cmp(const std::pair<int, int>& a, const std::pair<int, int>& b) {
431  bool result = false;
432  if (a.first < b.first) {
433  result = true;
434  } else if (a.first == b.first) {
435  if (a.second < b.second) {
436  result = true;
437  }
438  }
439  return result;
440 }
441 
442 #if CORENRN_BUILD
443 static int* nrn_index_sort(int* values, int n) {
444 #else
445 std::vector<int> nrn_index_sort(int* values, int n) {
446 #endif
447  std::vector<std::pair<int, int>> vi(n);
448  for (int i = 0; i < n; ++i) {
449  vi[i].first = values[i];
450  vi[i].second = i;
451  }
452  std::sort(vi.begin(), vi.end(), nrn_index_sort_cmp);
453 #if CORENRN_BUILD
454  int* sort_indices = new int[n];
455 #else
456  std::vector<int> sort_indices(n);
457 #endif
458  for (int i = 0; i < n; ++i) {
459  sort_indices[i] = vi[i].second;
460  }
461  return sort_indices;
462 }
463 
464 #if !CORENRN_BUILD
465 void sort_ml(Memb_list* ml) {
466  auto isrt = nrn_index_sort(ml->nodeindices, ml->nodecount);
467  forward_permute(ml->nodeindices, ml->nodecount, isrt);
468  forward_permute(ml->nodelist, ml->nodecount, isrt);
469  forward_permute(ml->prop, ml->nodecount, isrt);
470  forward_permute(ml->pdata, ml->nodecount, isrt);
471 }
472 #endif // !CORENRN_BUILD
473 
474 #if CORENRN_BUILD
475 void permute_nodeindices(Memb_list* ml, int* p) {
476  // nodeindices values are permuted according to p (that per se does
477  // not affect vec).
478 
479  node_permute(ml->nodeindices, ml->nodecount, p);
480 
481  // Then the new node indices are sorted by
482  // increasing index. Instances using the same node stay in same
483  // original relative order so that their contributions to rhs, d (if any)
484  // remain in same order (except for gpu parallelism).
485  // That becomes ml->_permute
486 
487  ml->_permute = nrn_index_sort(ml->nodeindices, ml->nodecount);
488  invert_permute(ml->_permute, ml->nodecount);
489  permute_ptr(ml->nodeindices, ml->nodecount, ml->_permute);
490 }
491 #endif // CORENRN_BUILD
492 
493 } // namespace coreneuron
auto & get_memb_func(size_t idx)
Definition: coreneuron.hpp:135
#define cnt
Definition: tqueue.hpp:44
#define i
Definition: md1redef.h:19
#define pdata
Definition: md1redef.h:37
#define assert(ex)
Definition: hocassrt.h:24
bool nrn_semantics_is_ion(int i)
Definition: ion_semantics.h:6
int nrn_semantics_ion_type(int i)
Definition: ion_semantics.h:12
printf
Definition: extdef.h:5
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout)
This function return the index in a flat array of a matrix coordinate (icnt, isz).
int * inverse_permute(int *p, int n)
CoreNeuron corenrn
Definition: multicore.cpp:53
int nrn_soa_padded_size(int cnt, int layout)
calculate size after padding for specific memory layout
static int permute(int i, NrnThread &nt)
Definition: prcellstate.cpp:28
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
Definition: tnode.hpp:17
void permute_nodeindices(Memb_list *ml, int *permute)
void permute_ml(Memb_list *ml, int type, NrnThread &nt)
void permute_data(double *vec, int n, int *p)
static void invert_permute(int *p, int n)
std::vector< int > nrn_index_sort(int *values, int n)
static bool nrn_index_sort_cmp(const std::pair< int, int > &a, const std::pair< int, int > &b)
void update_parent_index(int *vec, int vec_size, const std::vector< int > &permute)
void sort_ml(Memb_list *ml)
void permute_ptr(int *vec, int n, int *p)
int type_of_ntdata(NrnThread &, int index, bool reset)
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
Definition: nrn_assert.h:33
int const size_t const size_t n
Definition: nrngsl.h:10
size_t p
s
Definition: multisend.cpp:521
short type
Definition: cabvars.h:10
static void pr(N_Vector x)
void forward_permute(std::vector< T > &data, const std::vector< int > &perm)
void vi(double *p1, double *p2, double v1, double v2, double *out)
A view into a set of mechanism instances.
Definition: nrnoc_ml.h:34
int nodecount
Definition: nrnoc_ml.h:78
int * nodeindices
Definition: nrnoc_ml.h:74
Node ** nodelist
Definition: nrnoc_ml.h:68
Datum ** pdata
Definition: nrnoc_ml.h:75
std::vector< double * > data()
Get a vector of double* representing the model data.
Definition: memblist.cpp:64
Prop ** prop
Definition: nrnoc_ml.h:76
Represent main neuron object computed by single thread.
Definition: multicore.h:58
NrnThreadMembList * tml
Definition: multicore.h:62
int end
Definition: multicore.h:65
Memb_list ** _ml_list
Definition: multicore.h:63
struct NrnThreadMembList * next
Definition: multicore.h:34