NEURON
multicore.cpp
Go to the documentation of this file.
1 #include "multicore.h"
2 
3 #include <nrnmpi.h>
4 
5 #include "hoclist.h"
6 #include "section.h"
7 /*
8 Now that threads have taken over the actual_v, v_node, etc, it might
9 be a good time to regularize the method of freeing, allocating, and
10 updating those arrays. To recapitulate the history, Node used to
11 be the structure that held direct values for v, area, d, rhs, etc.
12 That continued to hold for the cray vectorization project which
13 introduced v_node, v_parent, memb_list. Cache efficiency introduced
14 actual_v, actual_area, actual_d, etc and the Node started pointing
15 into those arrays. Additional nodes after allocation required updating
16 pointers to v and area since those arrays were freed and reallocated.
17 Now, the threads hold all these arrays and we want to update them
18 properly under the circumstances of changing topology, changing
19 number of threads, and changing distribution of cells on threads.
20 Note there are no longer global versions of any of these arrays.
21 We do not want to update merely due to a change in area. Recently
22 we have dealt with diam, area, ri on a section basis. We generally
23 desire an update just before a simulation when the efficient
24 structures are necessary. This is reasonably well handled by the
25 v_structure_change flag which historically freed and reallocated
26 v_node and v_parent and, just before this comment,
27 ended up setting the NrnThread tml. This makes most of the old
28 memb_list vestigial and we now got rid of it except for
29 the artificial cells (and it is possibly not really necessary there).
30 Switching between sparse and tree matrix just cause freeing and
31 reallocation of actual_rhs.
32 
33 If we can get the freeing, reallocation, and pointer update correct
34 for _actual_v, I am guessing everything else can be dragged along with
35 it. We have two major cases, call to pc.nthread and change in
36 model structure. We want to use Node* as much as possible and defer
37 the handling of v_structure_change as long as possible.
38 */
39 
40 #include "nmodlmutex.h"
41 
42 #include <cstdint>
43 #include <condition_variable>
44 #include <mutex>
45 #include <thread>
46 #include <utility>
47 #include <variant>
48 #include <vector>
49 #include <iostream>
50 #include <queue> // for breadth first cell traversal without recursion
51 
52 #define CACHELINE_ALLOC(name, type, size) \
53  name = (type*) nrn_cacheline_alloc((void**) &name, size * sizeof(type))
54 #define CACHELINE_CALLOC(name, type, size) \
55  name = (type*) nrn_cacheline_calloc((void**) &name, size, sizeof(type))
56 
59 
61 
62 static int busywait_;
63 static int busywait_main_;
64 extern void (*nrn_multisplit_setup_)();
65 extern int v_structure_change;
66 extern int diam_changed;
67 extern Section** secorder;
68 extern int section_count;
69 extern void spDestroy(char*);
70 
71 void nrn_mk_table_check();
72 static std::vector<std::pair<int, NrnThreadMembList*>> table_check_;
73 static int allow_busywait_;
74 
75 static void* nulljob(NrnThread* nt) {
76  return nullptr;
77 }
78 
80 
81 namespace nrn {
82 std::unique_ptr<std::mutex> nmodlmutex;
83 }
84 
85 namespace {
86 bool interpreter_locked{false};
87 std::unique_ptr<std::mutex> interpreter_lock;
88 
89 enum struct worker_flag { execute_job, exit, wait };
90 
91 // With C++17 and alignment-aware allocators we could do something like
92 // alignas(std::hardware_destructive_interference_size) here and then use a
93 // regular vector. https://en.cppreference.com/w/cpp/compiler_support/17 shows
94 // that std::hardware_destructive_interference_size is not very well supported.
95 struct worker_conf_t {
96  /* for nrn_solve etc.*/
97  std::variant<std::monostate,
99  std::pair<worker_job_with_token_t, neuron::model_sorted_token const*>>
100  job{};
101  std::size_t thread_id{};
102  worker_flag flag{worker_flag::wait};
103  friend bool operator==(worker_conf_t const& lhs, worker_conf_t const& rhs) {
104  return lhs.flag == rhs.flag && lhs.thread_id == rhs.thread_id && lhs.job == rhs.job;
105  }
106 };
107 
108 struct worker_kernel {
109  worker_kernel(std::size_t thread_id)
110  : m_thread_id{thread_id} {}
111  void operator()(std::monostate const&) const {
112  throw std::runtime_error("worker_kernel");
113  }
114  void operator()(worker_job_t job) const {
115  job(nrn_threads + m_thread_id);
116  }
117  void operator()(
118  std::pair<worker_job_with_token_t, neuron::model_sorted_token const*> const& pair) const {
119  auto const& [job, token_ptr] = pair;
120  job(*token_ptr, nrn_threads[m_thread_id]);
121  }
122 
123  private:
124  std::size_t m_thread_id{};
125 };
126 
127 void worker_main(worker_conf_t* my_wc_ptr,
128  std::condition_variable* my_cond_ptr,
129  std::mutex* my_mut_ptr) {
130  assert(my_cond_ptr);
131  assert(my_mut_ptr);
132  assert(my_wc_ptr);
133  auto& cond{*my_cond_ptr};
134  auto& mut{*my_mut_ptr};
135  auto& wc{*my_wc_ptr};
136  for (;;) {
137  if (busywait_) {
138  // WARNING: this branch has not been extensively tested after the
139  // std::thread migration.
140  while (wc.flag == worker_flag::wait) {
141  ;
142  }
143  if (wc.flag == worker_flag::exit) {
144  return;
145  }
146  assert(wc.flag == worker_flag::execute_job);
147  std::visit(worker_kernel{wc.thread_id}, wc.job);
148  wc.flag = worker_flag::wait;
149  wc.job = std::monostate{};
150  cond.notify_one();
151  } else {
152  worker_conf_t conf{};
153  {
154  // Wait until we have a job to execute or we have been told to
155  // shut down.
156  std::unique_lock<std::mutex> lock{mut};
157  cond.wait(lock, [&wc] { return wc.flag != worker_flag::wait; });
158  // Received instructions from the coordinator thread.
159  assert(wc.flag == worker_flag::execute_job || wc.flag == worker_flag::exit);
160  if (wc.flag == worker_flag::exit) {
161  return;
162  }
163  assert(wc.flag == worker_flag::execute_job);
164  // Save the workload + argument to local variables before
165  // releasing the mutex.
166  conf = wc;
167  }
168  // Execute the workload without keeping the mutex
169  std::visit(worker_kernel{conf.thread_id}, conf.job);
170  // Signal that the work is completed and this thread is becoming
171  // idle
172  {
173  std::lock_guard<std::mutex> _{mut};
174  // Make sure we don't accidentally overwrite an exit signal from
175  // the coordinating thread.
176  if (wc.flag == worker_flag::execute_job) {
177  assert(wc == conf);
178  wc.flag = worker_flag::wait;
179  wc.job = std::monostate{};
180  }
181  }
182  // Notify the coordinating thread.
183  cond.notify_one();
184  }
185  }
186 }
187 
188 // Using an instance of a custom type allows us to manage the teardown process
189 // more easily. TODO: remove the pointless zeroth entry in the vectors/arrays.
190 struct worker_threads_t {
191  worker_threads_t()
192  : m_cond{std::make_unique<std::condition_variable[]>(nrn_nthread)}
193  , m_mut{std::make_unique<std::mutex[]>(nrn_nthread)} {
194  // Note that this does not call the worker_conf_t constructor.
195  CACHELINE_ALLOC(m_wc, worker_conf_t, nrn_nthread);
196  m_worker_threads.reserve(nrn_nthread);
197  // worker_threads[0] does not appear to be used
198  m_worker_threads.emplace_back();
199  for (std::size_t i = 1; i < nrn_nthread; ++i) {
200  new (m_wc + i) worker_conf_t{};
201  m_wc[i].thread_id = i;
202  m_worker_threads.emplace_back(worker_main, &(m_wc[i]), &(m_cond[i]), &(m_mut[i]));
203  }
204  if (!interpreter_lock) {
205  interpreter_locked = false;
206  interpreter_lock = std::make_unique<std::mutex>();
207  }
208  if (!nrn::nmodlmutex) {
209  nrn::nmodlmutex = std::make_unique<std::mutex>();
210  }
211  }
212 
213  ~worker_threads_t() {
214  assert(m_worker_threads.size() == nrn_nthread);
215  wait();
216  for (std::size_t i = 1; i < nrn_nthread; ++i) {
217  {
218  std::lock_guard<std::mutex> _{m_mut[i]};
219  m_wc[i].flag = worker_flag::exit;
220  }
221  m_cond[i].notify_one();
222  m_worker_threads[i].join();
223  }
224  if (interpreter_lock) {
225  interpreter_lock.reset();
226  interpreter_locked = 0;
227  }
228  nrn::nmodlmutex.reset();
229  free(std::exchange(m_wc, nullptr));
230  }
231 
232  void assign_job(std::size_t worker, worker_job_t job) {
233  assert(worker > 0);
234  auto& cond = m_cond[worker];
235  auto& wc = m_wc[worker];
236  {
237  std::unique_lock<std::mutex> lock{m_mut[worker]};
238  // Wait until the worker is idle.
239  cond.wait(lock, [&wc] { return wc.flag == worker_flag::wait; });
240  assert(std::holds_alternative<std::monostate>(wc.job));
241  assert(wc.thread_id == worker);
242  wc.job = job;
243  wc.flag = worker_flag::execute_job;
244  }
245  // Notify the worker that it has new work to do.
246  cond.notify_one();
247  }
248 
249  void assign_job(std::size_t worker,
250  neuron::model_sorted_token const& cache_token,
252  assert(worker > 0);
253  auto& cond = m_cond[worker];
254  auto& wc = m_wc[worker];
255  {
256  std::unique_lock<std::mutex> lock{m_mut[worker]};
257  // Wait until the worker is idle.
258  cond.wait(lock, [&wc] { return wc.flag == worker_flag::wait; });
259  assert(std::holds_alternative<std::monostate>(wc.job));
260  assert(wc.thread_id == worker);
261  wc.job = std::make_pair(job, &cache_token);
262  wc.flag = worker_flag::execute_job;
263  }
264  // Notify the worker that it has new work to do.
265  cond.notify_one();
266  }
267 
268  // Wait until all worker threads are waiting
269  void wait() const {
270  for (std::size_t i = 1; i < nrn_nthread; ++i) {
271  auto& wc{m_wc[i]};
272  if (busywait_main_) {
273  while (wc.flag != worker_flag::wait) {
274  ;
275  }
276  } else {
277  std::unique_lock<std::mutex> lock{m_mut[i]};
278  m_cond[i].wait(lock, [&wc] { return wc.flag == worker_flag::wait; });
279  }
280  }
281  }
282  std::size_t num_workers() const {
283  return m_worker_threads.size();
284  }
285 
286  private:
287  // Cannot easily use std::vector because std::condition_variable is not moveable.
288  std::unique_ptr<std::condition_variable[]> m_cond;
289  // Cannot easily use std::vector because std::mutex is not moveable.
290  std::unique_ptr<std::mutex[]> m_mut;
291  std::vector<std::thread> m_worker_threads;
292  worker_conf_t* m_wc{};
293 };
294 std::unique_ptr<worker_threads_t> worker_threads{};
295 } // namespace
296 
297 void nrn_thread_error(const char* s) {
298  if (nrn_nthread != 1) {
299  hoc_execerror(s, (char*) 0);
300  }
301 }
302 
303 void nrn_threads_create(int n, bool parallel) {
304  int i, j;
305  NrnThread* nt;
306  if (nrn_nthread != n) {
307  worker_threads.reset();
308  // If the number of threads changes then the node storage data is
309  // implicitly no longer sorted, as "sorted" includes being partitioned
310  // by NrnThread. Similarly for the mechanism data then "sorted" includes
311  // being partitioned by thread.
312  // TODO: consider if we can be smarter about how/when we call
313  // mark_as_unsorted() for different containers.
315  neuron::model().apply_to_mechanisms([](auto& mech_data) { mech_data.mark_as_unsorted(); });
317  for (i = 0; i < nrn_nthread; ++i) {
318  nt = nrn_threads + i;
319  if (nt->userpart) {
320  hoc_obj_unref(nt->userpart);
321  }
322  }
323  free(std::exchange(nrn_threads, nullptr));
324  nrn_nthread = n;
325  if (n > 0) {
327  for (i = 0; i < n; ++i) {
328  nt = nrn_threads + i;
329  nt->_t = 0.;
330  nt->_dt = -1e9;
331  nt->id = i;
332  nt->_stop_stepping = 0;
333  nt->tml = (NrnThreadMembList*) 0;
334  nt->_ml_list = NULL;
335  nt->roots = (hoc_List*) 0;
336  nt->userpart = 0;
337  nt->ncell = 0;
338  nt->end = 0;
339  for (j = 0; j < BEFORE_AFTER_SIZE; ++j) {
340  nt->tbl[j] = (NrnThreadBAList*) 0;
341  }
342  nt->_sp13_rhs = nullptr;
343  nt->_v_parent_index = 0;
344  nt->_v_node = 0;
345  nt->_v_parent = 0;
346  nt->_ecell_memb_list = 0;
347  nt->_ecell_child_cnt = 0;
348  nt->_ecell_children = NULL;
349  nt->_sp13mat = 0;
350  nt->_ctime = 0.0;
351  nt->_vcv = 0;
352  nt->_node_data_offset = 0;
353  }
354  }
355  v_structure_change = 1;
356  diam_changed = 1;
357  }
358 #if NRN_ENABLE_THREADS
359  // Check if we are enabling/disabling parallelisation over threads
360  if (parallel != static_cast<bool>(worker_threads)) {
361  worker_threads.reset();
362 #if NRNMPI
363  if (nrn_nthread > 1 && nrnmpi_numprocs > 1 && nrn_cannot_use_threads_and_mpi == 1) {
364  if (nrnmpi_myid == 0) {
365  printf("This MPI is not threadsafe so threads are disabled.\n");
366  }
367  return;
368  }
369 #endif
370  if (parallel && nrn_nthread > 1) {
371  worker_threads = std::make_unique<worker_threads_t>();
372  }
373  }
374 #endif
375 }
376 
378  // Make sure that storage for the fast_imem calculation exists/is destroyed according to
379  // nrn_use_fast_imem
380  neuron::model()
381  .node_data()
384 }
385 
387  int it, i;
388  for (it = 0; it < nrn_nthread; ++it) {
389  NrnThread* nt = nrn_threads + it;
390  NrnThreadMembList *tml, *tml2;
391  for (tml = nt->tml; tml; tml = tml2) {
392  Memb_list* ml = tml->ml;
393  tml2 = tml->next;
394  free((char*) std::exchange(ml->nodelist, nullptr));
395  free((char*) std::exchange(ml->nodeindices, nullptr));
396  delete[] std::exchange(ml->prop, nullptr);
397  if (!memb_func[tml->index].hoc_mech) {
398  free((char*) std::exchange(ml->pdata, nullptr));
399  }
400  if (ml->_thread) {
401  if (memb_func[tml->index].thread_cleanup_) {
402  (*memb_func[tml->index].thread_cleanup_)(ml->_thread);
403  }
404  delete[] std::exchange(ml->_thread, nullptr);
405  }
406  delete std::exchange(ml, nullptr);
407  free((char*) std::exchange(tml, nullptr));
408  }
409  if (nt->_ml_list) {
410  free((char*) std::exchange(nt->_ml_list, nullptr));
411  }
412  for (i = 0; i < BEFORE_AFTER_SIZE; ++i) {
413  NrnThreadBAList *tbl, *tbl2;
414  for (tbl = nt->tbl[i]; tbl; tbl = tbl2) {
415  tbl2 = tbl->next;
416  free((char*) tbl);
417  }
418  nt->tbl[i] = (NrnThreadBAList*) 0;
419  }
420  nt->tml = (NrnThreadMembList*) 0;
421  if (nt->userpart == 0 && nt->roots) {
422  hoc_l_freelist(&nt->roots);
423  nt->ncell = 0;
424  }
425  if (nt->_v_parent_index) {
426  free((char*) nt->_v_parent_index);
427  nt->_v_parent_index = 0;
428  }
429  if (nt->_v_node) {
430  free((char*) nt->_v_node);
431  nt->_v_node = 0;
432  }
433  if (nt->_v_parent) {
434  free((char*) nt->_v_parent);
435  nt->_v_parent = 0;
436  }
437  nt->_ecell_memb_list = 0;
438  if (nt->_ecell_children) {
439  nt->_ecell_child_cnt = 0;
440  free(nt->_ecell_children);
441  nt->_ecell_children = NULL;
442  }
443  if (nt->_sp13mat) {
444  spDestroy(nt->_sp13mat);
445  nt->_sp13mat = 0;
446  }
447  nt->end = 0;
448  nt->ncell = 0;
449  nt->_vcv = 0;
450  }
451 }
452 
453 /* be careful to make the tml list in proper memb_order. This is important */
454 /* for correct finitialize where mechanisms that write concentrations must be */
455 /* after ions and before mechanisms that read concentrations. */
456 
457 static void thread_memblist_setup(NrnThread* _nt, int* mlcnt, void** vmap) {
458  int i, ii;
459  Node* nd;
460  Prop* p;
461  NrnThreadMembList *tml, **ptml;
462  Memb_list** mlmap = (Memb_list**) vmap;
463  BAMech** bamap = (BAMech**) vmap;
464 #if 0
465 printf("thread_memblist_setup %lx v_node_count=%d ncell=%d end=%d\n", (long)nth, v_node_count, nth->ncell, nth->end);
466 #endif
467  for (i = 0; i < n_memb_func; ++i) {
468  mlcnt[i] = 0;
469  }
470 
471  /* count */
472  for (i = 0; i < _nt->end; ++i) {
473  nd = _nt->_v_node[i];
474  for (p = nd->prop; p; p = p->next) {
475  if (memb_func[p->_type].current || memb_func[p->_type].state ||
476  memb_func[p->_type].has_initialize()) {
477  ++mlcnt[p->_type];
478  }
479  }
480  }
481  /* allocate */
482  ptml = &_nt->tml;
483  for (ii = 0; ii < n_memb_func; ++ii) {
484  i = memb_order_[ii];
485  if (mlcnt[i]) {
486  if (_nt->id > 0 && memb_func[i].vectorized == 0) {
487  hoc_execerror(memb_func[i].sym->name, "is not thread safe");
488  }
489  /*printf("thread_memblist_setup %lx type=%d cnt=%d\n", (long)nth, i, mlcnt[i]);*/
491  tml->index = i;
492  tml->next = (NrnThreadMembList*) 0;
493  *ptml = tml;
494  ptml = &tml->next;
495  tml->ml = new Memb_list{i};
496  if (i == EXTRACELL) {
497  _nt->_ecell_memb_list = tml->ml;
498  }
499  mlmap[i] = tml->ml;
500  CACHELINE_ALLOC(tml->ml->nodelist, Node*, mlcnt[i]);
501  CACHELINE_ALLOC(tml->ml->nodeindices, int, mlcnt[i]);
502  tml->ml->prop = new Prop*[mlcnt[i]]; // used for ode_map
503  if (!memb_func[i].hoc_mech) {
504  CACHELINE_ALLOC(tml->ml->pdata, Datum*, mlcnt[i]);
505  }
506  tml->ml->_thread = (Datum*) 0;
507  if (memb_func[i].thread_size_) {
508  tml->ml->_thread = new Datum[memb_func[i].thread_size_]{};
509  if (memb_func[tml->index].thread_mem_init_) {
510  (*memb_func[tml->index].thread_mem_init_)(tml->ml->_thread);
511  }
512  }
513  tml->ml->nodecount = 0; /* counted again below */
514  }
515  }
517  for (tml = _nt->tml; tml; tml = tml->next) {
518  _nt->_ml_list[tml->index] = tml->ml;
519  }
520 
521  /* fill */
522  for (i = 0; i < _nt->end; ++i) {
523  nd = _nt->_v_node[i];
524  for (p = nd->prop; p; p = p->next) {
525  if (memb_func[p->_type].current || memb_func[p->_type].state ||
526  memb_func[p->_type].has_initialize()) {
527  Memb_list* ml = mlmap[p->_type];
528  ml->nodelist[ml->nodecount] = nd;
529  ml->nodeindices[ml->nodecount] = nd->v_node_index;
530  ml->prop[ml->nodecount] = p;
531  if (!memb_func[p->_type].hoc_mech) {
532  // ml->_data[ml->nodecount] = p->param;
533  ml->pdata[ml->nodecount] = p->dparam;
534  }
535  ++ml->nodecount;
536  }
537  }
538  }
539  /* count and store any Node* with the property
540  nd->extnode == NULL && nd->pnd != NULL && nd->pnd->extcell != NULL
541  */
542  if (_nt->_ecell_memb_list) {
543  Node* pnd;
544  int cnt = 0;
545  for (i = 0; i < _nt->end; ++i) {
546  nd = _nt->_v_node[i];
547  pnd = _nt->_v_parent[i];
548  if (nd->extnode == NULL && pnd && pnd->extnode) {
549  ++cnt;
550  }
551  }
552  if (cnt) {
553  Node** p;
555  _nt->_ecell_child_cnt = cnt;
556  p = _nt->_ecell_children;
557  cnt = 0;
558  for (i = 0; i < _nt->end; ++i) {
559  nd = _nt->_v_node[i];
560  pnd = _nt->_v_parent[i];
561  if (nd->extnode == NULL && pnd && pnd->extnode) {
562  p[cnt++] = nd;
563  }
564  }
565  }
566  }
567 #if 0
568  for (i=0; i < n_memb_func; ++i) {
569  if (mlcnt[i]) {assert(mlcnt[i] = mlmap[i]->nodecount);}
570  }
571  for (tml = _nt->tml; tml; tml = tml->next) {
572  assert(mlcnt[tml->index] == tml->ml->nodecount);
573  }
574 #endif
575  /* fill the ba lists */
576  /* need map from ml type to BA type. Reuse vmap */
577  for (i = 0; i < BEFORE_AFTER_SIZE; ++i) {
578  BAMech* bam;
579  NrnThreadBAList *tbl, **ptbl;
580  for (ii = 0; ii < n_memb_func; ++ii) {
581  bamap[ii] = (BAMech*) 0;
582  }
583  for (bam = bamech_[i]; bam; bam = bam->next) {
584  // Save first before-after block only. In case of multiple
585  // before-after blocks with the same mech type, we will get
586  // subsequent ones using linked list below.
587  if (!bamap[bam->type]) {
588  bamap[bam->type] = bam;
589  }
590  }
591  // necessary to keep in order wrt multiple BAMech with same mech type
592  ptbl = _nt->tbl + i;
593  for (tml = _nt->tml; tml; tml = tml->next) {
594  if (bamap[tml->index]) {
595  int mtype = tml->index;
596  Memb_list* ml = tml->ml;
597  for (bam = bamap[mtype]; bam && bam->type == mtype; bam = bam->next) {
598  tbl = (NrnThreadBAList*) emalloc(sizeof(NrnThreadBAList));
599  *ptbl = tbl;
600  tbl->next = (NrnThreadBAList*) 0;
601  tbl->bam = bam;
602  tbl->ml = ml;
603  ptbl = &(tbl->next);
604  }
605  }
606  }
607  }
608  /* fill in the Point_process._vnt value. */
609  /* artificial cells done in v_setup_vectors() */
610  for (tml = _nt->tml; tml; tml = tml->next) {
611  if (!memb_func[tml->index].is_point) {
612  continue;
613  }
614  if (memb_func[tml->index].hoc_mech) {
615  // I don't think hoc_mech works with multiple threads.
616  for (int i = 0; i < tml->ml->nodecount; ++i) {
617  auto* pnt = tml->ml->prop[i]->dparam[1].get<Point_process*>();
618  pnt->_vnt = _nt;
619  }
620  } else {
621  for (int i = 0; i < tml->ml->nodecount; ++i) {
622  auto* pnt = tml->ml->pdata[i][1].get<Point_process*>();
623  pnt->_vnt = _nt;
624  }
625  }
626  }
627 }
628 
630  int* mlcnt = (int*) emalloc(n_memb_func * sizeof(int));
631  void** vmap = (void**) emalloc(n_memb_func * sizeof(void*));
632  for (int it = 0; it < nrn_nthread; ++it) {
633  thread_memblist_setup(nrn_threads + it, mlcnt, vmap);
634  }
635  // Right now the sorting method updates the storage offsets inside the
636  // Memb_list* structures owned by NrnThreads. This is a bit of a design
637  // failure, as those offsets should have a lifetime linked to the sorted
638  // status of the underlying storage, i.e. they should be part of a cache
639  // structure. In any case, because we have just created new Memb_list then
640  // their offsets are empty, so we need to trigger a re-sort before they are
641  // used.
642  neuron::model().apply_to_mechanisms([](auto& mech_data) { mech_data.mark_as_unsorted(); });
644  free((char*) vmap);
645  free((char*) mlcnt);
648  (*nrn_mk_transfer_thread_data_)();
649  }
650 }
651 
652 /* secorder needs to correspond to cells in NrnThread with roots */
653 /* at the beginning of each thread region */
654 /* this differs from original secorder where all roots are at the beginning */
655 /* in passing, also set start and end indices. */
656 // Until 2025-01-03 I was under the misapprehension that (except for roots)
657 // this ordering kept cells contiguous. Lvardt sizes for CvMembList.ml disabused
658 // me of that. reorder_secorder now, in fact, keeps cells contiguous except
659 // for roots.
660 
662  Section *sec, *ch;
663  Node* nd;
664  hoc_Item* qsec;
665  hoc_List* sl;
666  int order, inode;
667 
668  /* count and allocate */
669  // First pass: count nodes per thread to determine _nt->end for allocation
670  // of _v_node, _v_parent, and _v_parent_index.
671  ITERATE(qsec, section_list) {
672  Section* sec = hocSEC(qsec);
673  sec->order = -1;
674  }
676  sl = _nt->roots;
677  inode = 0;
678  // Count root nodes
679  ITERATE(qsec, sl) {
680  sec = hocSEC(qsec);
681  assert(sec->order == -1);
682  inode += 1;
683  }
684  // Count nodes in all sections of each cell via breadth-first traversal
685  ITERATE(qsec, sl) {
686  std::queue<Section*> que;
687  que.push(hocSEC(qsec));
688  while (!que.empty()) {
689  sec = que.front();
690  que.pop();
691  for (ch = sec->child; ch; ch = ch->sibling) {
692  que.push(ch);
693  }
694  inode += sec->nnode;
695  }
696  }
697  _nt->end = inode;
698  CACHELINE_CALLOC(_nt->_v_node, Node*, inode);
699  CACHELINE_CALLOC(_nt->_v_parent, Node*, inode);
700  CACHELINE_CALLOC(_nt->_v_parent_index, int, inode);
701  }
702 
703  /* traverse and fill _v_node, _v_parent, and secorder */
704  order = 0;
706  sl = _nt->roots;
707  inode = 0;
708  // Process root nodes
709  ITERATE(qsec, sl) {
710  sec = hocSEC(qsec);
711  assert(sec->order == -1);
712  nd = sec->parentnode;
713  nd->_nt = _nt;
714  _nt->_v_node[inode] = nd;
715  _nt->_v_parent[inode] = nullptr; // Root nodes have no parent
716  _nt->_v_node[inode]->v_node_index = inode;
717  ++inode;
718  }
719  // Breadth-first traversal to fill arrays and set section order
720  ITERATE(qsec, sl) {
721  std::queue<Section*> que;
722  que.push(hocSEC(qsec));
723  while (!que.empty()) {
724  sec = que.front();
725  que.pop();
726  for (ch = sec->child; ch; ch = ch->sibling) {
727  que.push(ch);
728  }
729  // Assign section order and thread
730  assert(sec->order == -1);
731  sec->prop->dparam[9] = {neuron::container::do_not_search, _nt};
732  secorder[order] = sec;
733  sec->order = order++;
734  // Process nodes
735  for (int j = 0; j < sec->nnode; ++j) {
736  nd = sec->pnode[j];
737  nd->_nt = _nt;
738  _nt->_v_node[inode] = nd;
739  if (j) {
740  _nt->_v_parent[inode] = sec->pnode[j - 1];
741  } else {
742  _nt->_v_parent[inode] = sec->parentnode;
743  }
744  _nt->_v_node[inode]->v_node_index = inode;
745  ++inode;
746  }
747  }
748  }
749  assert(_nt->end == inode);
750  }
752  /*assert(inode == v_node_count);*/
753 
754  /* not missing any */
755  ITERATE(qsec, section_list) {
756  Section* sec = hocSEC(qsec);
757  assert(sec->order != -1);
758  }
759 
760  /* here is where multisplit reorders the nodes. Afterwards
761  in either case, we can then point to v, d, rhs in proper
762  node order
763  */
764  for (const NrnThread* _nt: for_threads(nrn_threads, nrn_nthread)) {
765  for (inode = 0; inode < _nt->end; ++inode) {
766  _nt->_v_node[inode]->_classical_parent = _nt->_v_parent[inode];
767  }
768  }
769  if (nrn_multisplit_setup_) {
770  /* classical order abandoned */
771  (*nrn_multisplit_setup_)();
772  }
773  /* because the d,rhs changed, if multisplit is used we need to update
774  the reduced tree gather/scatter pointers
775  */
776  if (nrn_multisplit_setup_) {
778  }
779 }
780 
782  std::size_t table_check_cnt_{};
783  std::vector<int> ix(n_memb_func, -1);
784  for (int id = 0; id < nrn_nthread; ++id) {
785  NrnThread* nt = nrn_threads + id;
786  for (NrnThreadMembList* tml = nt->tml; tml; tml = tml->next) {
787  int index = tml->index;
788  if (memb_func[index].thread_table_check_ && ix[index] == -1) {
789  ix[index] = id;
791  }
792  }
793  }
794  table_check_.clear();
796  for (int id = 0; id < nrn_nthread; ++id) {
797  NrnThread* nt = nrn_threads + id;
798  for (NrnThreadMembList* tml = nt->tml; tml; tml = tml->next) {
799  int index = tml->index;
800  if (memb_func[index].thread_table_check_ && ix[index] == id) {
801  table_check_.emplace_back(id, tml);
802  }
803  }
804  }
805 }
806 
808  for (auto [id, tml]: table_check_) {
809  Memb_list* ml = tml->ml;
810  // here _globals cannot be guessed (missing _gth) so we give nullptr, and set the variable
811  // locally in _check_table_thread
812  memb_func[tml->index].thread_table_check_(
813  ml, 0, ml->pdata[0], ml->_thread, nullptr, nrn_threads + id, tml->index, sorted_token);
814  }
815 }
816 
817 /* if it is possible for more than one thread to get into the
818  interpreter, lock it. */
819 void nrn_hoc_lock() {
820 #if NRN_ENABLE_THREADS
821  if (nrn_inthread_) {
822  interpreter_lock->lock();
823  interpreter_locked = true;
824  }
825 #endif
826 }
828 #if NRN_ENABLE_THREADS
829  if (interpreter_locked) {
830  interpreter_locked = false;
831  interpreter_lock->unlock();
832  }
833 #endif
834 }
835 
837 #if NRN_ENABLE_THREADS
838  if (worker_threads) {
839  nrn_inthread_ = 1;
840  for (std::size_t i = 1; i < nrn_nthread; ++i) {
841  worker_threads->assign_job(i, job);
842  }
843  (*job)(nrn_threads);
844  worker_threads->wait();
845  nrn_inthread_ = 0;
846  return;
847  }
848 #endif
849  for (std::size_t i = 1; i < nrn_nthread; ++i) {
850  (*job)(nrn_threads + i);
851  }
852  (*job)(nrn_threads);
853 }
854 
857 #if NRN_ENABLE_THREADS
858  if (worker_threads) {
859  nrn_inthread_ = 1;
860  for (std::size_t i = 1; i < nrn_nthread; ++i) {
861  worker_threads->assign_job(i, cache_token, job);
862  }
863  job(cache_token, nrn_threads[0]);
864  worker_threads->wait();
865  nrn_inthread_ = 0;
866  return;
867  }
868 #endif
869  for (std::size_t i = 1; i < nrn_nthread; ++i) {
870  job(cache_token, nrn_threads[i]);
871  }
872  job(cache_token, nrn_threads[0]);
873 }
874 
875 void nrn_onethread_job(int i, void* (*job)(NrnThread*) ) {
876  assert(i >= 0 && i < nrn_nthread);
877 #if NRN_ENABLE_THREADS
878  if (worker_threads) {
879  if (i > 0) {
880  worker_threads->assign_job(i, job);
881  worker_threads->wait();
882  } else {
883  (*job)(nrn_threads);
884  }
885  return;
886  }
887 #endif
888  (*job)(nrn_threads + i);
889 }
890 
892 #if NRN_ENABLE_THREADS
893  if (worker_threads) {
894  worker_threads->wait();
895  }
896 #endif
897 }
898 
899 void nrn_thread_partition(int it, Object* sl) {
900  NrnThread* nt;
901  assert(it >= 0 && it < nrn_nthread);
902  nt = nrn_threads + it;
903  if (nt->userpart == nullptr && nt->roots) {
904  hoc_l_freelist(&nt->roots);
905  }
906  if (sl) {
907  hoc_obj_ref(sl);
908  }
909  if (nt->userpart) {
910  hoc_obj_unref(nt->userpart);
911  nt->userpart = nullptr;
912  nt->roots = (hoc_List*) 0;
913  }
914  if (sl) {
915  nt->userpart = sl; /* already reffed above */
916  nt->roots = (hoc_List*) sl->u.this_pointer;
917  }
918  v_structure_change = 1;
919 }
920 
922  assert(it >= 0 && it < nrn_nthread);
923  NrnThread* nt = nrn_threads + it;
924  if (!nt->roots) {
925  v_setup_vectors();
926  }
927  // nt->roots is a hoc_List of Section*. Create a new SectionList and copy
928  // those Section* into it and ref them.
929  hoc_List* sl = hoc_l_newlist();
930  Object** po = hoc_temp_objvar(hoc_lookup("SectionList"), sl);
931  hoc_Item* qsec;
932  ITERATE(qsec, nt->roots) {
933  Section* sec = hocSEC(qsec);
934  section_ref(sec);
935  hoc_l_lappendsec(sl, sec);
936  }
937  return po;
938 }
939 
941  int i, it, b, n;
942  hoc_Item* qsec;
943  hoc_List* sl;
944  char buf[256];
945  Section* sec;
946  /* all one or all the other*/
947  b = (nrn_threads[0].userpart != nullptr);
948  for (it = 1; it < nrn_nthread; ++it) {
949  if ((nrn_threads[it].userpart != nullptr) != b) {
950  hoc_execerror("some threads have a user defined partition", "and some do not");
951  }
952  }
953  if (!b) {
954  return 0;
955  }
956 
957  /* discard partition if any section mentioned has been deleted. The
958  model has changed */
960  sl = nt->roots;
961  ITERATE(qsec, sl) {
962  sec = hocSEC(qsec);
963  if (!sec->prop) {
964  for (i = 0; i < nrn_nthread; ++i) {
965  nrn_thread_partition(i, nullptr);
966  }
967  return 0;
968  }
969  }
970  }
971 
972  // ForAllSections(sec)
973  ITERATE(qsec, section_list) {
974  Section* sec = hocSEC(qsec);
975  sec->volatile_mark = 0;
976  }
977  /* fill in ncell and verify consistency */
978  n = 0;
979  for (it = 0; it < nrn_nthread; ++it) {
980  NrnThread* nt = nrn_threads + it;
981  sl = nt->roots;
982  nt->ncell = 0;
983  ITERATE(qsec, sl) {
984  sec = hocSEC(qsec);
985  ++nt->ncell;
986  ++n;
987  if (sec->parentsec) {
988  Sprintf(buf, "in thread partition %d is not a root section", it);
990  }
991  if (sec->volatile_mark) {
992  Sprintf(buf, "appeared again in partition %d", it);
994  }
995  sec->volatile_mark = 1;
996  }
997  }
998  if (n != nrn_global_ncell) {
999  Sprintf(buf,
1000  "The total number of cells, %d, is different than the number of user partition "
1001  "cells, %d\n",
1003  n);
1004  hoc_execerror(buf, (char*) 0);
1005  }
1006  return 1;
1007 }
1008 
1009 void nrn_use_busywait(int b) {
1010 #if NRN_ENABLE_THREADS
1011  if (allow_busywait_ && worker_threads) {
1012  if (b == 0 && busywait_main_ == 1) {
1013  busywait_ = 0;
1015  busywait_main_ = 0;
1016  } else if (b == 1 && busywait_main_ == 0) {
1017  busywait_main_ = 1;
1018  worker_threads->wait();
1019  busywait_ = 1;
1021  }
1022  } else {
1023  if (busywait_main_ == 1) {
1024  busywait_ = 0;
1026  busywait_main_ = 0;
1027  }
1028  }
1029 #endif
1030 }
1031 
1033  int old = allow_busywait_;
1034  allow_busywait_ = b;
1035  return old;
1036 }
1037 
1039 #if NRN_ENABLE_THREADS
1040  // For machines with hyperthreading this probably returns the number of
1041  // logical, not physical, cores.
1042  return std::thread::hardware_concurrency();
1043 #else
1044  return 1;
1045 #endif
1046 }
1047 
1048 std::size_t nof_worker_threads() {
1049  return worker_threads.get() ? worker_threads->num_workers() : 0;
1050 }
1051 
1052 // Need to be able to use these methods while the model is frozen, so avoid calling the
1053 // zero-parameter get().
1057 }
1058 
1062 }
1063 
1067 }
1068 
1072 }
1073 
1076 }
1077 
1079  auto& node_data = neuron::model().node_data();
1081  if (node_data.field_active<Tag>()) {
1082  return &node_data.get<Tag>(_node_data_offset);
1083  } else {
1084  return nullptr;
1085  }
1086 }
1087 
1089  auto& node_data = neuron::model().node_data();
1091  if (node_data.field_active<Tag>()) {
1092  return &node_data.get<Tag>(_node_data_offset);
1093  } else {
1094  return nullptr;
1095  }
1096 }
1097 
1101 }
const char * secname(Section *sec)
name of section (for use in error messages)
Definition: cabcode.cpp:1674
static Node * pnd
Definition: clamp.cpp:33
#define cnt
Definition: tqueue.hpp:44
#define sec
Definition: md1redef.h:20
#define nodecount
Definition: md1redef.h:39
#define id
Definition: md1redef.h:41
#define i
Definition: md1redef.h:19
static double order(void *v)
Definition: cvodeobj.cpp:218
char buf[512]
Definition: init.cpp:13
void nrn_hoc_unlock()
Definition: multicore.cpp:827
Object ** hoc_temp_objvar(Symbol *symtemp, void *v)
Definition: hoc_oop.cpp:484
void hoc_obj_ref(Object *obj)
Definition: hoc_oop.cpp:1844
Symbol * hoc_lookup(const char *)
Definition: symbol.cpp:59
void nrn_hoc_lock()
Definition: multicore.cpp:819
void hoc_obj_unref(Object *obj)
Definition: hoc_oop.cpp:1881
#define assert(ex)
Definition: hocassrt.h:24
#define hocSEC(q)
Definition: hoclist.h:87
hoc_List * hoc_l_newlist()
hoc_Item * hoc_l_lappendsec(hoc_List *, struct Section *)
void hoc_l_freelist(hoc_List **)
#define rhs
Definition: lineq.h:6
#define BEFORE_AFTER_SIZE
Definition: membfunc.hpp:72
#define EXTRACELL
Definition: membfunc.hpp:61
#define ITERATE(itm, lst)
Definition: model.h:18
printf
Definition: extdef.h:5
void(*)(neuron::model_sorted_token const &, NrnThread &) worker_job_with_token_t
Definition: multicore.h:117
void *(*)(NrnThread *) worker_job_t
Definition: multicore.h:116
auto for_threads(NrnThread *threads, int num_threads)
Definition: multicore.h:133
void nrn_multisplit_ptr_update()
static OMP_Mutex mut
Definition: nrn_setup.cpp:154
static int table_check_cnt_
--> CoreNeuron class
Definition: multicore.cpp:60
void hoc_execerror(const char *s1, const char *s2)
Definition: nrnoc_aux.cpp:39
static void * emalloc(size_t size)
Definition: mpispike.cpp:30
bool nrn_use_fast_imem
Definition: fast_imem.cpp:19
constexpr do_not_search_t do_not_search
Definition: data_handle.hpp:11
Model & model()
Access the global Model instance.
Definition: model_data.hpp:206
int Sprintf(char(&buf)[N], const char *fmt, Args &&... args)
Redirect sprintf to snprintf if the buffer size can be deduced.
Definition: wrap_sprintf.h:14
int ii
Definition: cellorder.cpp:631
bool operator==(unified_allocator< T > const &, unified_allocator< U > const &) noexcept
Definition: memory.h:71
Definition: bimap.hpp:13
std::unique_ptr< std::mutex > nmodlmutex
Definition: multicore.cpp:82
void v_setup_vectors()
Definition: treeset.cpp:1596
void section_ref(Section *)
Definition: solve.cpp:520
int const size_t const size_t n
Definition: nrngsl.h:10
size_t p
size_t j
s
Definition: multisend.cpp:521
short index
Definition: cabvars.h:11
std::vector< Memb_func > memb_func
Definition: init.cpp:145
hoc_List * section_list
Definition: init.cpp:113
int nrn_global_ncell
Definition: init.cpp:114
BAMech ** bamech_
Definition: init.cpp:151
short * memb_order_
Definition: init.cpp:147
int n_memb_func
Definition: init.cpp:448
static int allow_busywait_
Definition: multicore.cpp:73
int diam_changed
Definition: cabcode.cpp:55
Section ** secorder
Definition: solve.cpp:82
void nrn_thread_memblist_setup()
Definition: multicore.cpp:629
void(* nrn_multisplit_setup_)()
Definition: treeset.cpp:50
void nrn_onethread_job(int i, void *(*job)(NrnThread *))
Definition: multicore.cpp:875
int nrn_allow_busywait(int b)
Definition: multicore.cpp:1032
void spDestroy(char *)
Definition: spalloc.cpp:533
static void thread_memblist_setup(NrnThread *_nt, int *mlcnt, void **vmap)
Definition: multicore.cpp:457
#define CACHELINE_ALLOC(name, type, size)
Definition: multicore.cpp:52
void nrn_threads_free()
Definition: multicore.cpp:386
int nrn_nthread
Definition: multicore.cpp:57
void nrn_use_busywait(int b)
Definition: multicore.cpp:1009
void nrn_thread_error(const char *s)
Definition: multicore.cpp:297
static int busywait_main_
Definition: multicore.cpp:63
NrnThread * nrn_threads
Definition: multicore.cpp:58
void nrn_multithread_job(worker_job_t job)
Definition: multicore.cpp:836
Object ** nrn_get_thread_partition(int it)
Definition: multicore.cpp:921
int nrn_inthread_
Definition: multicore.cpp:79
int nrn_user_partition()
Definition: multicore.cpp:940
int nrn_how_many_processors()
Definition: multicore.cpp:1038
int section_count
Definition: solve.cpp:81
static std::vector< std::pair< int, NrnThreadMembList * > > table_check_
Definition: multicore.cpp:72
void nrn_thread_partition(int it, Object *sl)
Definition: multicore.cpp:899
#define CACHELINE_CALLOC(name, type, size)
Definition: multicore.cpp:54
void nrn_mk_table_check()
Definition: multicore.cpp:781
static void * nulljob(NrnThread *nt)
Definition: multicore.cpp:75
void nrn_fast_imem_alloc()
Definition: multicore.cpp:377
void nrn_threads_create(int n, bool parallel)
Definition: multicore.cpp:303
void nrn_wait_for_threads()
Definition: multicore.cpp:891
static int busywait_
Definition: multicore.cpp:62
void reorder_secorder()
Definition: multicore.cpp:661
void nrn_thread_table_check(neuron::model_sorted_token const &sorted_token)
Definition: multicore.cpp:807
int v_structure_change
Definition: treeset.cpp:65
std::size_t nof_worker_threads()
Definition: multicore.cpp:1048
void(* nrn_mk_transfer_thread_data_)()
Definition: multicore.cpp:60
#define lock
int nrnmpi_myid
static double worker(void *v)
Definition: ocbbs.cpp:239
#define NULL
Definition: spdefs.h:105
struct BAMech * next
Definition: membfunc.h:178
int type
Definition: membfunc.h:177
A view into a set of mechanism instances.
Definition: nrnoc_ml.h:34
int nodecount
Definition: nrnoc_ml.h:78
int * nodeindices
Definition: nrnoc_ml.h:74
Node ** nodelist
Definition: nrnoc_ml.h:68
Datum ** pdata
Definition: nrnoc_ml.h:75
Prop ** prop
Definition: nrnoc_ml.h:76
Datum * _thread
Definition: nrnoc_ml.h:77
Definition: section.h:105
struct NrnThread * _nt
Definition: section.h:196
int v_node_index
Definition: section.h:212
Extnode * extnode
Definition: section.h:199
Prop * prop
Definition: section.h:190
BAMech * bam
Definition: multicore.h:41
struct NrnThreadBAList * next
Definition: multicore.h:42
Memb_list * ml
Definition: multicore.h:40
Represent main neuron object computed by single thread.
Definition: multicore.h:58
double * node_sav_d_storage()
Definition: multicore.cpp:1078
double _dt
Definition: multicore.h:60
int * _v_parent_index
Definition: multicore.h:89
NrnThreadMembList * tml
Definition: multicore.h:62
int ncell
Definition: multicore.h:64
double * node_a_storage()
Definition: multicore.cpp:1054
char * _sp13mat
Definition: multicore.h:94
std::size_t _node_data_offset
Offset in the global node data where this NrnThread's values start.
Definition: multicore.h:72
int id
Definition: multicore.h:66
NrnThreadBAList * tbl[BEFORE_AFTER_SIZE]
Definition: multicore.h:103
double * node_sav_rhs_storage()
Definition: multicore.cpp:1088
int _stop_stepping
Definition: multicore.h:67
double * node_rhs_storage()
Definition: multicore.cpp:1074
double * node_area_storage()
Definition: multicore.cpp:1059
int end
Definition: multicore.h:65
double _ctime
Definition: multicore.h:100
Memb_list ** _ml_list
Definition: multicore.h:63
int _ecell_child_cnt
Definition: multicore.h:68
Node ** _v_parent
Definition: multicore.h:91
hoc_List * roots
Definition: multicore.h:104
Node ** _ecell_children
Definition: multicore.h:96
double * node_d_storage()
Definition: multicore.cpp:1069
Memb_list * _ecell_memb_list
Definition: multicore.h:95
double * _sp13_rhs
Definition: multicore.h:92
void * _vcv
Definition: multicore.h:97
Object * userpart
Definition: multicore.h:105
Node ** _v_node
Definition: multicore.h:90
double * node_voltage_storage()
Definition: multicore.cpp:1098
double * node_b_storage()
Definition: multicore.cpp:1064
double _t
Definition: multicore.h:59
struct NrnThreadMembList * next
Definition: multicore.h:34
Memb_list * ml
Definition: multicore.h:35
Definition: hocdec.h:173
void * this_pointer
Definition: hocdec.h:178
union Object::@47 u
A point process is computed just like regular mechanisms.
Definition: section_fwd.hpp:77
Definition: section.h:231
Datum * dparam
Definition: section.h:247
Section * sibling
Definition: section.h:56
void apply_to_mechanisms(Callable const &callable)
Apply a function to each non-null Mechanism.
Definition: model_data.hpp:37
container::Node::storage & node_data()
Access the structure containing the data of all Nodes.
Definition: model_data.hpp:24
Above-diagonal element in node equation.
Definition: node.hpp:17
Area in um^2 but see treeset.cpp.
Definition: node.hpp:24
Below-diagonal element in node equation.
Definition: node.hpp:34
Diagonal element in node equation.
Definition: node.hpp:50
Field for fast_imem calculation.
Definition: node.hpp:57
Field for fast_imem calculation.
Definition: node.hpp:65
Non-template stable handle to a generic value.
T get() const
Explicit conversion to any T.
Tag::type & get(std::size_t offset)
Get the offset-th element of the column named by Tag.
void mark_as_unsorted()
Tell the container it is no longer sorted.
void set_field_status(bool enabled)
Enable/disable the fields associated with the given tags.