NEURON
rxd_extracellular.cpp
Go to the documentation of this file.
1 #include <../../nrnconf.h>
2 #include <stdio.h>
3 #include <assert.h>
4 #include <string.h>
5 #include "grids.h"
6 #include "rxd.h"
7 #include "nrnwrap_Python.h"
8 #include <cmath>
9 #include <ocmatrix.h>
10 #include <cfloat>
11 
12 #define loc(x, y, z) ((z) + (y) *grid->size_z + (x) *grid->size_z * grid->size_y)
13 
14 static void ecs_refresh_reactions(int);
15 /*
16  Globals
17 */
18 
19 /*Store the onset/offset for each threads reaction tasks*/
21 
22 extern int NUM_THREADS;
23 extern TaskQueue* AllTasks;
24 extern double* t_ptr;
25 extern double* states;
26 
28 
30 
31 /*Update the global array of reaction tasks when the number of reactions
32  *or threads change.
33  *n - the old number of threads - use to free the old threaded_reactions_tasks*/
34 static void ecs_refresh_reactions(const int n) {
35  int k;
37  for (k = 0; k < NUM_THREADS; k++) {
38  free(threaded_reactions_tasks[k].onset);
39  free(threaded_reactions_tasks[k].offset);
40  }
42  }
44 }
45 
46 void set_num_threads_3D(const int n) {
47  Grid_node* grid;
48  if (n != NUM_THREADS) {
49  for (grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
50  grid->set_num_threads(n);
51  }
52  }
54 }
55 
56 
57 /*Removal all reactions*/
58 void clear_rates_ecs(void) {
59  Reaction *r, *tmp;
60  ECS_Grid_node* g;
61 
62  for (r = ecs_reactions; r != NULL; r = tmp) {
63  free(r->species_states);
64  if (r->subregion) {
65  free(r->subregion);
66  }
67 
68  tmp = r->next;
69  free(r);
70  }
72 
74  for (Grid_node* grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
75  g = dynamic_cast<ECS_Grid_node*>(grid);
76  if (g)
78  }
79 }
80 
81 /*Create a reaction
82  * list_idx - the index for the linked list in the global array Parallel_grids
83  * grid_id - the grid id within the linked list
84  * ECSReactionRate - the reaction function
85  * subregion - either NULL or a boolean array the same size as the grid
86  * indicating if reaction occurs at a specific voxel
87  */
89  int num_species,
90  int num_params,
91  int* species_ids,
93  unsigned char* subregion,
94  uint64_t* mc3d_start_indices,
95  int mc3d_region_size,
96  double* mc3d_mults) {
97  Grid_node* grid;
98  Reaction* r;
99  int i, j;
100 
101  r = (Reaction*) malloc(sizeof(Reaction));
102  assert(r);
103  r->reaction = f;
104  /*place reaction on the top of the stack of reactions*/
105  r->next = ecs_reactions;
106  ecs_reactions = r;
107 
108  for (grid = Parallel_grids[list_idx], i = 0; grid != NULL; grid = grid->next, i++) {
109  /* Assume all species have the same grid */
110  if (i == species_ids[0]) {
111  if (mc3d_region_size > 0) {
112  r->subregion = NULL;
113  r->region_size = mc3d_region_size;
114  r->mc3d_indices_offsets = (uint64_t*) malloc(sizeof(uint64_t) *
115  (num_species + num_params));
116  memcpy(r->mc3d_indices_offsets,
117  mc3d_start_indices,
118  sizeof(uint64_t) * (num_species + num_params));
119  r->mc3d_mults = (double**) malloc(sizeof(double*) * (num_species + num_params));
120  int mult_idx = 0;
121  for (int row = 0; row < num_species + num_params; row++) {
122  r->mc3d_mults[row] = (double*) malloc(sizeof(double) * mc3d_region_size);
123  for (int c = 0; c < mc3d_region_size; c++, mult_idx++) {
124  r->mc3d_mults[row][c] = mc3d_mults[mult_idx];
125  }
126  }
127 
128 
129  } else if (subregion == NULL) {
130  r->subregion = NULL;
131  r->region_size = grid->size_x * grid->size_y * grid->size_z;
133  } else {
134  for (r->region_size = 0, j = 0; j < grid->size_x * grid->size_y * grid->size_z; j++)
135  r->region_size += subregion[j];
136  r->subregion = subregion;
138  }
139  }
140  }
141 
142  r->num_species_involved = num_species;
143  r->num_params_involved = num_params;
144  r->species_states = (double**) malloc(sizeof(Grid_node*) * (num_species + num_params));
146 
147  for (i = 0; i < num_species + num_params; i++) {
148  /*Assumes grids are the same size (no interpolation)
149  *Assumes all the species will be in the same Parallel_grids list*/
150  for (grid = Parallel_grids[list_idx], j = 0; grid != NULL; grid = grid->next, j++) {
151  if (j == species_ids[i])
152  r->species_states[i] = grid->states;
153  }
154  }
155 
156  return r;
157 }
158 
159 /*ics_register_reaction is called from python it creates a reaction with
160  * list_idx - the index for the linked list in the global array Parallel_grids
161  * (currently this is always 0)
162  * grid_id - the grid id within the linked list - this corresponds to species
163  * ECSReactionRate - the reaction function
164  */
165 extern "C" NRN_EXPORT void ics_register_reaction(int list_idx,
166  int num_species,
167  int num_params,
168  int* species_id,
169  uint64_t* mc3d_start_indices,
170  int mc3d_region_size,
171  double* mc3d_mults,
172  ECSReactionRate f) {
173  ecs_create_reaction(list_idx,
174  num_species,
175  num_params,
176  species_id,
177  f,
178  NULL,
179  mc3d_start_indices,
180  mc3d_region_size,
181  mc3d_mults);
183 }
184 
185 /*ecs_register_reaction is called from python it creates a reaction with
186  * list_idx - the index for the linked list in the global array Parallel_grids
187  * (currently this is always 0)
188  * grid_id - the grid id within the linked list - this corresponds to species
189  * ECSReactionRate - the reaction function
190  */
191 extern "C" NRN_EXPORT void ecs_register_reaction(int list_idx,
192  int num_species,
193  int num_params,
194  int* species_id,
195  ECSReactionRate f) {
196  ecs_create_reaction(list_idx, num_species, num_params, species_id, f, NULL, NULL, 0, NULL);
198 }
199 
200 
201 /*register_subregion_reaction is called from python it creates a reaction with
202  * list_idx - the index for the linked list in the global array Parallel_grids
203  * (currently this is always 0)
204  * grid_id - the grid id within the linked list - this corresponds to species
205  * my_subregion - a boolean array indicating the voxels where a reaction occurs
206  * ECSReactionRate - the reaction function
207  */
209  int num_species,
210  int num_params,
211  int* species_id,
212  unsigned char* my_subregion,
213  ECSReactionRate f) {
215  list_idx, num_species, num_params, species_id, f, my_subregion, NULL, 0, NULL);
217 }
218 
219 
220 /*create_threaded_reactions is used to generate evenly distribution of reactions
221  * across NUM_THREADS
222  * returns ReactGridData* which can be used as the global
223  * threaded_reactions_tasks
224  */
226  unsigned int i;
227  int load, k;
228  int react_count = 0;
229  Reaction* react;
230 
231  for (react = ecs_reactions; react != NULL; react = react->next)
232  react_count += react->region_size;
233 
234  if (react_count == 0)
235  return NULL;
236 
237  /*Divide the number of reactions between the threads*/
238  const int tasks_per_thread = (react_count) / n;
239  ReactGridData* tasks = (ReactGridData*) calloc(sizeof(ReactGridData), n);
240  const int extra = react_count % n;
241 
242  tasks[0].onset = (ReactSet*) malloc(sizeof(ReactSet));
243  tasks[0].onset->reaction = ecs_reactions;
244  tasks[0].onset->idx = 0;
245 
246  for (k = 0, load = 0, react = ecs_reactions; react != NULL; react = react->next) {
247  for (i = 0; i < react->region_size; i++) {
248  if (!react->subregion || react->subregion[i])
249  load++;
250  if (load >= tasks_per_thread + (extra > k)) {
251  tasks[k].offset = (ReactSet*) malloc(sizeof(ReactSet));
252  tasks[k].offset->reaction = react;
253  tasks[k].offset->idx = i;
254 
255  if (++k < n) {
256  tasks[k].onset = (ReactSet*) malloc(sizeof(ReactSet));
257  tasks[k].onset->reaction = react;
258  tasks[k].onset->idx = i + 1;
259  load = 0;
260  }
261  }
262  if (k == n - 1 && react->next == NULL) {
263  tasks[k].offset = (ReactSet*) malloc(sizeof(ReactSet));
264  tasks[k].offset->reaction = react;
265  tasks[k].offset->idx = i;
266  }
267  }
268  }
269  return tasks;
270 }
271 
272 /*ecs_do_reactions takes ReactGridData which defines the set of reactions to be
273  * performed. It calculate the reaction based on grid->old_states and updates
274  * grid->states
275  */
276 void* ecs_do_reactions(void* dataptr) {
277  ReactGridData task = *(ReactGridData*) dataptr;
278  unsigned char started = FALSE, stop = FALSE;
279  unsigned int i, j, k, n, start_idx, stop_idx, offset_idx;
280  double temp, ge_value, val_to_set;
281  double dt = *dt_ptr;
282  Reaction* react;
283 
284  double* states_cache = NULL;
285  double* params_cache = NULL;
286  double* states_cache_dx = NULL;
287  double* results_array = NULL;
288  double* results_array_dx = NULL;
289  double* mc_mults_array = NULL;
290  double dx = FLT_EPSILON;
291  double pd;
292  std::vector<double> x{};
293  std::vector<double> b{};
294 
295  for (react = ecs_reactions; react != NULL; react = react->next) {
296  // TODO: This is bad. Need to refactor
297  if (react->mc3d_indices_offsets != NULL) {
298  /*find start location for this thread*/
299  if (started || react == task.onset->reaction) {
300  if (!started) {
301  started = TRUE;
302  start_idx = task.onset->idx;
303  } else {
304  start_idx = 0;
305  }
306 
307  if (task.offset->reaction == react) {
308  stop_idx = task.offset->idx;
309  stop = TRUE;
310  } else {
311  stop_idx = react->region_size - 1;
312  stop = FALSE;
313  }
314  if (react->num_species_involved == 0)
315  continue;
316  /*allocate data structures*/
318  b.resize(react->num_species_involved);
319  x.resize(react->num_species_involved);
320  states_cache = (double*) malloc(sizeof(double) * react->num_species_involved);
321  params_cache = (double*) malloc(sizeof(double) * react->num_params_involved);
322  states_cache_dx = (double*) malloc(sizeof(double) * react->num_species_involved);
323  results_array = (double*) malloc(react->num_species_involved * sizeof(double));
324  results_array_dx = (double*) malloc(react->num_species_involved * sizeof(double));
325  mc_mults_array = (double*) malloc(react->num_species_involved * sizeof(double));
326  for (i = start_idx; i <= stop_idx; i++) {
327  if (!react->subregion || react->subregion[i]) {
328  // TODO: this assumes grids are the same size/shape
329  // add interpolation in case they aren't
330  for (j = 0; j < react->num_species_involved; j++) {
331  offset_idx = i + react->mc3d_indices_offsets[j];
332  states_cache[j] = react->species_states[j][offset_idx];
333  states_cache_dx[j] = react->species_states[j][offset_idx];
334  mc_mults_array[j] = react->mc3d_mults[j][i];
335  }
336  memset(results_array, 0, react->num_species_involved * sizeof(double));
337  for (k = 0; j < react->num_species_involved + react->num_params_involved;
338  k++, j++) {
339  offset_idx = i + react->mc3d_indices_offsets[j];
340  params_cache[k] = react->species_states[j][offset_idx];
341  mc_mults_array[k] = react->mc3d_mults[j][i];
342  }
343  react->reaction(states_cache, params_cache, results_array, mc_mults_array);
344 
345  for (j = 0; j < react->num_species_involved; j++) {
346  states_cache_dx[j] += dx;
347  memset(results_array_dx,
348  0,
349  react->num_species_involved * sizeof(double));
350  react->reaction(states_cache_dx,
351  params_cache,
352  results_array_dx,
353  mc_mults_array);
354  b[j] = dt * results_array[j];
355 
356  for (k = 0; k < react->num_species_involved; k++) {
357  pd = (results_array_dx[k] - results_array[k]) / dx;
358  jacobian(k, j) = (j == k) - dt * pd;
359  }
360  states_cache_dx[j] -= dx;
361  }
362  // solve for x
363  if (react->num_species_involved == 1) {
364  react->species_states[0][i] += b[0] / jacobian(0, 0);
365  } else {
366  // find entry in leftmost column with largest absolute value
367  // Pivot
368  for (j = 0; j < react->num_species_involved; j++) {
369  for (k = j + 1; k < react->num_species_involved; k++) {
370  if (abs(jacobian(j, j)) < abs(jacobian(k, j))) {
371  for (n = 0; n < react->num_species_involved; n++) {
372  temp = jacobian(j, n);
373  jacobian(j, n) = jacobian(k, n);
374  jacobian(k, n) = temp;
375  }
376  }
377  }
378  }
379 
380  for (j = 0; j < react->num_species_involved - 1; j++) {
381  for (k = j + 1; k < react->num_species_involved; k++) {
382  ge_value = jacobian(k, j) / jacobian(j, j);
383  for (n = 0; n < react->num_species_involved; n++) {
384  val_to_set = jacobian(k, n) - ge_value * jacobian(j, n);
385  jacobian(k, n) = val_to_set;
386  }
387  b[k] = b[k] - ge_value * b[j];
388  }
389  }
390 
391  for (j = react->num_species_involved - 1; j + 1 > 0; j--) {
392  x[j] = b[j];
393  for (k = j + 1; k < react->num_species_involved; k++) {
394  if (k != j) {
395  x[j] = x[j] - jacobian(j, k) * x[k];
396  }
397  }
398  x[j] = x[j] / jacobian(j, j);
399  }
400  for (j = 0; j < react->num_species_involved; j++) {
401  // I think this should be something like
402  // react->species_states[j][mc3d_indices[i]] += v_get_val(x,j);
403  // Since the grid has a uniform discretization, the mc3d_indices
404  // should be the same length. So just need to access the correct
405  // mc3d_indices[i] maybe do two lines?: index =
406  // react->species_indices[j][i] react->species_states[j][index] +=
407  // v_get_val(x,j);
408  offset_idx = i + react->mc3d_indices_offsets[j];
409  react->species_states[j][offset_idx] += x[j];
410  }
411  }
412  }
413  }
414 
415  free(states_cache);
416  free(states_cache_dx);
417  free(params_cache);
418  free(results_array);
419  free(results_array_dx);
420  free(mc_mults_array);
421 
422  if (stop)
423  return NULL;
424  }
425  } else {
426  /*find start location for this thread*/
427  if (started || react == task.onset->reaction) {
428  if (!started) {
429  started = TRUE;
430  start_idx = task.onset->idx;
431  } else {
432  start_idx = 0;
433  }
434 
435  if (task.offset->reaction == react) {
436  stop_idx = task.offset->idx;
437  stop = TRUE;
438  } else {
439  stop_idx = react->region_size - 1;
440  stop = FALSE;
441  }
442  if (react->num_species_involved == 0)
443  continue;
444  /*allocate data structures*/
446  b.resize(react->num_species_involved);
447  x.resize(react->num_species_involved);
448  states_cache = (double*) malloc(sizeof(double) * react->num_species_involved);
449  params_cache = (double*) malloc(sizeof(double) * react->num_params_involved);
450  states_cache_dx = (double*) malloc(sizeof(double) * react->num_species_involved);
451  results_array = (double*) malloc(react->num_species_involved * sizeof(double));
452  results_array_dx = (double*) malloc(react->num_species_involved * sizeof(double));
453  for (i = start_idx; i <= stop_idx; i++) {
454  if (!react->subregion || react->subregion[i]) {
455  // TODO: this assumes grids are the same size/shape
456  // add interpolation in case they aren't
457  for (j = 0; j < react->num_species_involved; j++) {
458  states_cache[j] = react->species_states[j][i];
459  states_cache_dx[j] = react->species_states[j][i];
460  }
461  memset(results_array, 0, react->num_species_involved * sizeof(double));
462  for (k = 0; j < react->num_species_involved + react->num_params_involved;
463  k++, j++) {
464  params_cache[k] = react->species_states[j][i];
465  }
466  react->reaction(states_cache, params_cache, results_array, NULL);
467 
468  for (j = 0; j < react->num_species_involved; j++) {
469  states_cache_dx[j] += dx;
470  memset(results_array_dx,
471  0,
472  react->num_species_involved * sizeof(double));
473  react->reaction(states_cache_dx, params_cache, results_array_dx, NULL);
474  b[j] = dt * results_array[j];
475 
476  for (k = 0; k < react->num_species_involved; k++) {
477  pd = (results_array_dx[k] - results_array[k]) / dx;
478  jacobian(k, j) = (j == k) - dt * pd;
479  }
480  states_cache_dx[j] -= dx;
481  }
482  // solve for x
483  if (react->num_species_involved == 1) {
484  react->species_states[0][i] += b[0] / jacobian(0, 0);
485  } else {
486  // find entry in leftmost column with largest absolute value
487  // Pivot
488  for (j = 0; j < react->num_species_involved; j++) {
489  for (k = j + 1; k < react->num_species_involved; k++) {
490  if (abs(jacobian(j, j)) < abs(jacobian(k, j))) {
491  for (n = 0; n < react->num_species_involved; n++) {
492  temp = jacobian(j, n);
493  jacobian(j, n) = jacobian(k, n);
494  jacobian(k, n) = temp;
495  }
496  }
497  }
498  }
499 
500  for (j = 0; j < react->num_species_involved - 1; j++) {
501  for (k = j + 1; k < react->num_species_involved; k++) {
502  ge_value = jacobian(k, j) / jacobian(j, j);
503  for (n = 0; n < react->num_species_involved; n++) {
504  val_to_set = jacobian(k, n) - ge_value * jacobian(j, n);
505  jacobian(k, n) = val_to_set;
506  }
507  b[k] = b[k] - ge_value * b[j];
508  }
509  }
510 
511  for (j = react->num_species_involved - 1; j + 1 > 0; j--) {
512  x[j] = b[j];
513  for (k = j + 1; k < react->num_species_involved; k++) {
514  if (k != j) {
515  x[j] = x[j] - jacobian(j, k) * x[k];
516  }
517  }
518  x[j] = x[j] / jacobian(j, j);
519  }
520  for (j = 0; j < react->num_species_involved; j++) {
521  // I think this should be something like
522  // react->species_states[j][mc3d_indices[i]] += x[j];
523  // Since the grid has a uniform discretization, the mc3d_indices
524  // should be the same length. So just need to access the correct
525  // mc3d_indices[i] maybe do two lines?: index =
526  // react->species_indices[j][i] react->species_states[j][index] +=
527  // x[j];
528  react->species_states[j][i] += x[j];
529  }
530  }
531  }
532  }
533 
534  free(states_cache);
535  free(states_cache_dx);
536  free(params_cache);
537  free(results_array);
538  free(results_array_dx);
539 
540  if (stop)
541  return NULL;
542  }
543  }
544  }
545  return NULL;
546 }
547 
548 /* run_threaded_reactions
549  * Array ReactGridData tasks length NUM_THREADS and calls ecs_do_reactions and
550  * executes each task with a separate thread
551  */
553  int k;
554  /* launch threads */
555  for (k = 0; k < NUM_THREADS - 1; k++) {
557  }
558  /* run one task in the main thread */
559  ecs_do_reactions(&tasks[NUM_THREADS - 1]);
560 
561  /* wait for them to finish */
563 }
564 
566  Grid_node* grid;
567  ECS_Grid_node* g;
568  double dt = (*dt_ptr);
569 
570  /*Currents to broadcast via MPI*/
571  /*TODO: Handle multiple grids with one pass*/
572  /*Maybe TODO: Should check #currents << #voxels and not the other way round*/
573  int id;
574 
577 
578  for (id = 0, grid = Parallel_grids[0]; grid != NULL; grid = grid->next, id++) {
579  memset(grid->states_cur, 0, sizeof(double) * grid->size_x * grid->size_y * grid->size_z);
580  g = dynamic_cast<ECS_Grid_node*>(grid);
581  if (g)
583  grid->do_grid_currents(grid->states_cur, dt, id);
584  grid->apply_node_flux3D(dt, NULL);
585 
586  // grid->volume_setup();
587  if (grid->hybrid) {
588  grid->hybrid_connections();
589  }
590  grid->dg_adi();
591  }
592  /* transfer concentrations */
594 }
595 
596 extern "C" NRN_EXPORT void scatter_concentrations(void) {
597  /* transfer concentrations to classic NEURON */
598  Grid_node* grid;
599 
600  for (grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
602  }
603 }
604 
605 /*****************************************************************************
606  *
607  * Begin variable step code
608  *
609  *****************************************************************************/
610 
611 /* count the total number of state variables AND store their offset (passed in) in the cvode vector
612  */
613 int ode_count(const int offset) {
614  int count = 0;
615  states_cvode_offset = offset;
616  Grid_node* grid;
617  for (grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
618  count += grid->size_x * grid->size_y * grid->size_z;
619  }
620  return count;
621 }
622 
623 
624 void ecs_atolscale(double* y) {
625  Grid_node* grid;
626  ssize_t i;
627  int grid_size;
628  y += states_cvode_offset;
629  for (grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
630  grid_size = grid->size_x * grid->size_y * grid->size_z;
631  for (i = 0; i < grid_size; i++) {
632  y[i] *= grid->atolscale;
633  }
634  y += grid_size;
635  }
636 }
637 
638 void _ecs_ode_reinit(double* y) {
639  Grid_node* grid;
640  ECS_Grid_node* g;
641 
642  ssize_t i;
643  int grid_size;
644  double* grid_states;
645  y += states_cvode_offset;
646  for (grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
647  grid_states = grid->states;
648  grid_size = grid->size_x * grid->size_y * grid->size_z;
649 
650  for (i = 0; i < grid_size; i++) {
651  y[i] = grid_states[i];
652  }
653  y += grid_size;
654  g = dynamic_cast<ECS_Grid_node*>(grid);
655  if (g)
657  }
658 }
659 
660 
661 void _rhs_variable_step_ecs(const double* states, double* ydot) {
662  Grid_node* grid;
663  ECS_Grid_node* g;
664  ssize_t i;
665  int grid_size;
666  double dt = *dt_ptr;
667  double* grid_states;
668  double* const orig_ydot = ydot + states_cvode_offset;
669  double const* const orig_states = states + states_cvode_offset;
670  const unsigned char calculate_rhs = ydot == NULL ? 0 : 1;
671  states = orig_states;
672  ydot = orig_ydot;
673  /* prepare for advance by syncing data with local copy */
674  for (grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
675  grid_states = grid->states;
676  grid_size = grid->size_x * grid->size_y * grid->size_z;
677 
678  /* copy the passed in states to local memory (needed to make reaction rates correct) */
679  for (i = 0; i < grid_size; i++) {
680  grid_states[i] = states[i];
681  }
682  states += grid_size;
683  }
684  /* transfer concentrations to classic NEURON states */
686  if (!calculate_rhs) {
687  return;
688  }
689 
690  states = orig_states;
691  ydot = orig_ydot;
692  /* TODO: reactions contribute to adaptive step-size*/
695  }
696  for (grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
697  grid_states = grid->states;
698  grid_size = grid->size_x * grid->size_y * grid->size_z;
699  for (i = 0; i < grid_size; i++) {
700  ydot[i] += (grid_states[i] - states[i]) / dt;
701  grid_states[i] = states[i];
702  }
703  states += grid_size;
704  ydot += grid_size;
705  }
706 
707  ydot = orig_ydot;
708  states = orig_states;
709  /* process currents */
710  for (i = 0, grid = Parallel_grids[0]; grid != NULL; grid = grid->next, i++) {
711  g = dynamic_cast<ECS_Grid_node*>(grid);
712  if (g)
714  grid->do_grid_currents(ydot, 1.0, i);
715 
716  /*Add node fluxes to the result*/
717  grid->apply_node_flux3D(1.0, ydot);
718 
719  ydot += grid_size;
720  }
721  ydot = orig_ydot;
722 
723  /* do the diffusion rates */
724  for (grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
725  grid_size = grid->size_x * grid->size_y * grid->size_z;
726  grid->variable_step_diffusion(states, ydot);
727 
728  ydot += grid_size;
729  states += grid_size;
730  }
731 }
732 
733 
734 void _rhs_variable_step_helper(Grid_node* g, double const* const states, double* ydot) {
735  int num_states_x = g->size_x, num_states_y = g->size_y, num_states_z = g->size_z;
736  double dx = g->dx, dy = g->dy, dz = g->dz;
737  int i, j, k, stop_i, stop_j, stop_k;
738  double dc_x = g->dc_x, dc_y = g->dc_y, dc_z = g->dc_z;
739 
740  double rate_x = dc_x / (dx * dx);
741  double rate_y = dc_y / (dy * dy);
742  double rate_z = dc_z / (dz * dz);
743 
744  /*indices*/
745  int index, prev_i, prev_j, prev_k, next_i, next_j, next_k;
746  double div_x, div_y, div_z;
747 
748  /* Euler advance x, y, z (all points) */
749  stop_i = num_states_x - 1;
750  stop_j = num_states_y - 1;
751  stop_k = num_states_z - 1;
752  if (g->bc->type == NEUMANN) {
753  for (i = 0,
754  index = 0,
755  prev_i = num_states_y * num_states_z,
756  next_i = num_states_y * num_states_z;
757  i < num_states_x;
758  i++) {
759  /*Zero flux boundary conditions*/
760  div_x = (i == 0 || i == stop_i) ? 2. : 1.;
761 
762  for (j = 0, prev_j = index + num_states_z, next_j = index + num_states_z;
763  j < num_states_y;
764  j++) {
765  div_y = (j == 0 || j == stop_j) ? 2. : 1.;
766 
767  for (k = 0, prev_k = index + 1, next_k = index + 1; k < num_states_z;
768  k++, index++, prev_i++, next_i++, prev_j++, next_j++) {
769  div_z = (k == 0 || k == stop_k) ? 2. : 1.;
770 
771  if (stop_i > 0) {
772  ydot[index] += rate_x *
773  (states[prev_i] - 2.0 * states[index] + states[next_i]) /
774  div_x;
775  }
776  if (stop_j > 0) {
777  ydot[index] += rate_y *
778  (states[prev_j] - 2.0 * states[index] + states[next_j]) /
779  div_y;
780  }
781  if (stop_k > 0) {
782  ydot[index] += rate_z *
783  (states[prev_k] - 2.0 * states[index] + states[next_k]) /
784  div_z;
785  }
786  next_k = (k == stop_k - 1) ? index : index + 2;
787  prev_k = index;
788  }
789  prev_j = index - num_states_z;
790  next_j = (j == stop_j - 1) ? prev_j : index + num_states_z;
791  }
792  prev_i = index - num_states_y * num_states_z;
793  next_i = (i == stop_i - 1) ? prev_i : index + num_states_y * num_states_z;
794  }
795  } else {
796  for (i = 0, index = 0, prev_i = 0, next_i = num_states_y * num_states_z; i < num_states_x;
797  i++) {
798  for (j = 0, prev_j = index - num_states_z, next_j = index + num_states_z;
799  j < num_states_y;
800  j++) {
801  for (k = 0, prev_k = index - 1, next_k = index + 1; k < num_states_z;
802  k++, index++, prev_i++, next_i++, prev_j++, next_j++, next_k++, prev_k++) {
803  if (i == 0 || i == stop_i || j == 0 || j == stop_j || k == 0 || k == stop_k) {
804  // set to zero to prevent currents altering concentrations at the boundary
805  ydot[index] = 0;
806  } else {
807  ydot[index] += rate_x *
808  (states[prev_i] - 2.0 * states[index] + states[next_i]);
809 
810  ydot[index] += rate_y *
811  (states[prev_j] - 2.0 * states[index] + states[next_j]);
812 
813  ydot[index] += rate_z *
814  (states[prev_k] - 2.0 * states[index] + states[next_k]);
815  }
816  }
817  prev_j = index - num_states_z;
818  next_j = index + num_states_z;
819  }
820  prev_i = index - num_states_y * num_states_z;
821  next_i = index + num_states_y * num_states_z;
822  }
823  }
824  /*
825  for (i = 1, index = (num_states_y+1)*num_states_z + 1,
826  prev_i = num_states_z + 1, next_i = (2*num_states_y+1)*num_states_z + 1;
827  i < stop_i; i++) {
828  for (j = 1, prev_j = index - num_states_z, next_j = index + num_states_z; j <
829  stop_j; j++) {
830 
831  for(k = 1, prev_k = index - 1, next_k = index + 1; k < stop_k;
832  k++, index++, prev_i++, next_i++, prev_j++, next_j++, next_k++) {
833  if(index != i*num_states_y*num_states_z + j*num_states_z + k)
834  {
835  fprintf(stderr,"%i \t %i %i %i\n", index,i,j,k);
836  }
837  if(prev_i != (i-1)*num_states_y*num_states_z + j*num_states_z + k)
838  {
839  fprintf(stderr,"prev_i %i %i \t %i %i %i\n",
840  prev_i,(i-1)*num_states_y*num_states_z + j*num_states_z + k,i,j,k);
841  }
842  if(next_i != (i+1)*num_states_y*num_states_z + j*num_states_z + k)
843  {
844  fprintf(stderr,"next_i %i %i \t %i %i %i\n",
845  next_i,(i+1)*num_states_y*num_states_z + j*num_states_z + k,i,j,k);
846  }
847  if(prev_j != i*num_states_y*num_states_z + (j-1)*num_states_z + k)
848  {
849  fprintf(stderr,"prev_j %i %i \t %i %i %i\n",
850  prev_j,i*num_states_y*num_states_z + (j-1)*num_states_z + k,i,j,k);
851  }
852  if(next_j != i*num_states_y*num_states_z + (j+1)*num_states_z + k)
853  {
854  fprintf(stderr,"next_j %i %i \t %i %i %i\n",
855  next_j,i*num_states_y*num_states_z + (j+1)*num_states_z + k,i,j,k);
856  }
857  if(prev_k != i*num_states_y*num_states_z + j*num_states_z + k-1)
858  {
859  fprintf(stderr,"prev_k %i %i \t %i %i %i\n",
860  prev_k,i*num_states_y*num_states_z + j*num_states_z + k-1,i,j,k);
861  }
862  if(next_k != i*num_states_y*num_states_z + j*num_states_z + k+1)
863  {
864  fprintf(stderr,"next_k %i %i \t %i %i %i\n",
865  next_k,i*num_states_y*num_states_z + j*num_states_z + k+1,i,j,k);
866  }
867 
868  ydot[index] += rate_x * (states[prev_i] -
869  2.0 * states[index] + states[next_i]);
870 
871  ydot[index] += rate_y * (states[prev_j] -
872  2.0 * states[index] + states[next_j]);
873 
874  ydot[index] += rate_z * (states[prev_k] -
875  2.0 * states[index] + states[next_k]);
876 
877  }
878  index += 2; //skip z boundaries
879  //prev_j = index - num_states_z;
880  //next_j = index + num_states_z;
881  }
882  index += 2*num_states_z; //skip y boundaries
883  //prev_i = index - num_states_y*num_states_z;
884  //next_i = index + num_states_y*num_states_z;
885  }
886  }
887  */
888 }
889 
890 // p1 = b p2 = states
891 void ics_ode_solve(double dt, double* RHS, const double* states) {
892  Grid_node* grid;
893  ssize_t i;
894  int grid_size = 0;
895  double* grid_states;
896  double const* const orig_states = states + states_cvode_offset;
897  const unsigned char calculate_rhs = RHS == NULL ? 0 : 1;
898  double* const orig_RHS = RHS + states_cvode_offset;
899  states = orig_states;
900  RHS = orig_RHS;
901 
902  /* prepare for advance by syncing data with local copy */
903  for (grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
904  grid_states = grid->states;
905  grid_size = grid->size_x * grid->size_y * grid->size_z;
906 
907  /* copy the passed in states to local memory (needed to make reaction rates correct) */
908  for (i = 0; i < grid_size; i++) {
909  grid_states[i] = states[i];
910  }
911  states += grid_size;
912  }
913  /* transfer concentrations to classic NEURON states */
915  if (!calculate_rhs) {
916  return;
917  }
918 
919  states = orig_states;
920  RHS = orig_RHS;
921 
922  /* TODO: reactions contribute to adaptive step-size*/
925  }
926  /* do the diffusion rates */
927  for (grid = Parallel_grids[0]; grid != NULL; grid = grid->next) {
929  RHS += grid_size;
930  states += grid_size;
931  }
932 }
933 /*****************************************************************************
934  *
935  * End variable step code
936  *
937  *****************************************************************************/
938 
939 
940 /* solve_dd_clhs_tridiag uses Thomas Algorithm to solves a
941  * diagonally dominant tridiagonal matrix (Ax=b), where the
942  * triple (lower, diagonal and upper) are repeated to form A.
943  * N - length of the matrix
944  * l_diag - lower diagonal
945  * diag - diagonal
946  * u_diag - upper diagonal
947  * b - pointer to the RHS (length N)
948  * lbc_diag - the lower boundary diagonal (the first diagonal element)
949  * lbc_u_diag - the lower boundary upper diagonal (the first upper
950  * diagonal element)
951  * ubc_l_diag - the upper boundary lower diagonal (the last lower
952  * diagonal element)
953  * ubc_diag - the upper boundary diagonal (the last diagonal element)
954  * The solution (x) is stored in b.
955  * c - scratchpad array, N - 1 doubles long
956  */
957 static int solve_dd_clhs_tridiag(const int N,
958  const double l_diag,
959  const double diag,
960  const double u_diag,
961  const double lbc_diag,
962  const double lbc_u_diag,
963  const double ubc_l_diag,
964  const double ubc_diag,
965  double* const b,
966  double* const c) {
967  int i;
968 
969 
970  c[0] = lbc_u_diag / lbc_diag;
971  b[0] = b[0] / lbc_diag;
972 
973  for (i = 1; i < N - 1; i++) {
974  c[i] = u_diag / (diag - l_diag * c[i - 1]);
975  b[i] = (b[i] - l_diag * b[i - 1]) / (diag - l_diag * c[i - 1]);
976  }
977  b[N - 1] = (b[N - 1] - ubc_l_diag * b[N - 2]) / (ubc_diag - ubc_l_diag * c[N - 2]);
978 
979  /*back substitution*/
980  for (i = N - 2; i >= 0; i--) {
981  b[i] = b[i] - c[i] * b[i + 1];
982  }
983  return 0;
984 }
985 
986 /*
987 static int solve_dd_clhs_tridiag_rev(const int N, const double l_diag, const double diag,
988  const double u_diag, const double lbc_diag, const double lbc_u_diag,
989  const double ubc_l_diag, const double ubc_diag, double* const d, double* const a)
990 {
991  int i;
992 
993 
994  a[N-2] = ubc_l_diag/ubc_diag;
995  d[N-1] = d[N-1]/ubc_diag;
996 
997  for(i=N-2; i>0; i--)
998  {
999  a[i-1] = l_diag/(diag - u_diag*a[i]);
1000  d[i] = (d[i] - u_diag*d[i+1])/(diag - u_diag*a[i]);
1001  }
1002  d[0] = (d[0] - lbc_u_diag*d[1])/(lbc_diag - lbc_u_diag*a[0]);
1003 
1004  for(i=1; i<N; i++)
1005  {
1006  d[i] = d[i] - a[i-1]*d[i-1];
1007 
1008  }
1009  return 0;
1010 }
1011 unsigned char callcount = TRUE;
1012 static int solve_dd_clhs_tridiag(const int N, const double l_diag, const double diag,
1013  const double u_diag, const double lbc_diag, const double lbc_u_diag,
1014  const double ubc_l_diag, const double ubc_diag, double* const b, double* const c)
1015 {
1016  if(callcount)
1017  {
1018  callcount = FALSE;
1019  return solve_dd_clhs_tridiag_for(N, l_diag, diag, u_diag, lbc_diag, lbc_u_diag, ubc_l_diag,
1020 ubc_diag, b, c);
1021  }
1022  else
1023  {
1024  callcount = TRUE;
1025  return solve_dd_clhs_tridiag_rev(N, l_diag, diag, u_diag, lbc_diag, lbc_u_diag, ubc_l_diag,
1026 ubc_diag, b, c);
1027  }
1028 }
1029 */
1030 
1031 
1032 /* dg_adi_x performs the first of 3 steps in DG-ADI
1033  * g - the parameters and state of the grid
1034  * dt - the time step
1035  * y - the index for the y plane
1036  * z - the index for the z plane
1037  * state - where the output of this step is stored
1038  * scratch - scratchpad array of doubles, length g->size_x - 1
1039  */
1041  const double dt,
1042  const int y,
1043  const int z,
1044  double const* const state,
1045  double* const RHS,
1046  double* const scratch) {
1047  int yp, ym, zp, zm;
1048  int x;
1049  double r = g->dc_x * dt / SQ(g->dx);
1050  double div_y, div_z;
1051  /*TODO: Get rid of this by not calling dg_adi when on the boundary for DIRICHLET conditions*/
1052  if (g->bc->type == DIRICHLET &&
1053  (y == 0 || z == 0 || y == g->size_y - 1 || z == g->size_z - 1)) {
1054  for (x = 0; x < g->size_x; x++)
1055  RHS[x] = g->bc->value;
1056  return;
1057  }
1058 
1059  if (g->size_y > 1) {
1060  yp = (y == g->size_y - 1) ? y - 1 : y + 1;
1061  ym = (y == 0) ? y + 1 : y - 1;
1062  div_y = (y == 0 || y == g->size_y - 1) ? 2. : 1.;
1063  } else {
1064  yp = 0;
1065  ym = 0;
1066  div_y = 1;
1067  }
1068  if (g->size_z > 1) {
1069  zp = (z == g->size_z - 1) ? z - 1 : z + 1;
1070  zm = (z == 0) ? z + 1 : z - 1;
1071  div_z = (z == 0 || z == g->size_z - 1) ? 2. : 1.;
1072  } else {
1073  zp = 0;
1074  zm = 0;
1075  div_z = 1;
1076  }
1077 
1078  if (g->bc->type == NEUMANN) {
1079  /*zero flux boundary condition*/
1080  RHS[0] = state[IDX(0, y, z)] + g->states_cur[IDX(0, y, z)] +
1081  dt *
1082  ((g->dc_y / SQ(g->dy)) *
1083  (state[IDX(0, yp, z)] - 2. * state[IDX(0, y, z)] + state[IDX(0, ym, z)]) /
1084  div_y +
1085  (g->dc_z / SQ(g->dz)) *
1086  (state[IDX(0, y, zp)] - 2. * state[IDX(0, y, z)] + state[IDX(0, y, zm)]) /
1087  div_z);
1088  if (g->size_x > 1) {
1089  RHS[0] += dt * (g->dc_x / SQ(g->dx)) * (state[IDX(1, y, z)] - state[IDX(0, y, z)]);
1090  x = g->size_x - 1;
1091  RHS[x] =
1092  state[IDX(x, y, z)] + g->states_cur[IDX(x, y, z)] +
1093  dt * ((g->dc_x / SQ(g->dx)) * (state[IDX(x - 1, y, z)] - state[IDX(x, y, z)]) +
1094  (g->dc_y / SQ(g->dy)) *
1095  (state[IDX(x, yp, z)] - 2. * state[IDX(x, y, z)] + state[IDX(x, ym, z)]) /
1096  div_y +
1097  (g->dc_z / SQ(g->dz)) *
1098  (state[IDX(x, y, zp)] - 2. * state[IDX(x, y, z)] + state[IDX(x, y, zm)]) /
1099  div_z);
1100  }
1101  } else {
1102  RHS[0] = g->bc->value;
1103  RHS[g->size_x - 1] = g->bc->value;
1104  }
1105  for (x = 1; x < g->size_x - 1; x++) {
1106 #ifndef __PGI
1107  __builtin_prefetch(&(state[IDX(x + PREFETCH, y, z)]), 0, 1);
1108  __builtin_prefetch(&(state[IDX(x + PREFETCH, yp, z)]), 0, 0);
1109  __builtin_prefetch(&(state[IDX(x + PREFETCH, ym, z)]), 0, 0);
1110 #endif
1111  RHS[x] = state[IDX(x, y, z)] +
1112  dt *
1113  ((g->dc_x / SQ(g->dx)) *
1114  (state[IDX(x + 1, y, z)] - 2. * state[IDX(x, y, z)] +
1115  state[IDX(x - 1, y, z)]) /
1116  2. +
1117  (g->dc_y / SQ(g->dy)) *
1118  (state[IDX(x, yp, z)] - 2. * state[IDX(x, y, z)] + state[IDX(x, ym, z)]) /
1119  div_y +
1120  (g->dc_z / SQ(g->dz)) *
1121  (state[IDX(x, y, zp)] - 2. * state[IDX(x, y, z)] + state[IDX(x, y, zm)]) /
1122  div_z) +
1123  g->states_cur[IDX(x, y, z)];
1124  }
1125  if (g->size_x > 1) {
1126  if (g->bc->type == NEUMANN)
1128  -r / 2.0,
1129  1.0 + r,
1130  -r / 2.0,
1131  1.0 + r / 2.0,
1132  -r / 2.0,
1133  -r / 2.0,
1134  1.0 + r / 2.0,
1135  RHS,
1136  scratch);
1137  else
1139  g->size_x, -r / 2.0, 1.0 + r, -r / 2.0, 1.0, 0, 0, 1.0, RHS, scratch);
1140  }
1141 }
1142 
1143 
1144 /* dg_adi_y performs the second of 3 steps in DG-ADI
1145  * g - the parameters and state of the grid
1146  * dt - the time step
1147  * x - the index for the x plane
1148  * z - the index for the z plane
1149  * state - the values from the first step, which are
1150  * overwritten by the output of this step
1151  * scratch - scratchpad array of doubles, length g->size_y - 1
1152  */
1154  double const dt,
1155  int const x,
1156  int const z,
1157  double const* const state,
1158  double* const RHS,
1159  double* const scratch) {
1160  int y;
1161  double r = (g->dc_y * dt / SQ(g->dy));
1162  /*TODO: Get rid of this by not calling dg_adi when on the boundary for DIRICHLET conditions*/
1163  if (g->bc->type == DIRICHLET &&
1164  (x == 0 || z == 0 || x == g->size_x - 1 || z == g->size_z - 1)) {
1165  for (y = 0; y < g->size_y; y++)
1166  RHS[y] = g->bc->value;
1167  return;
1168  }
1169  if (g->size_y == 1) {
1170  if (g->bc->type == NEUMANN)
1171  RHS[0] = state[x + z * g->size_x];
1172  else
1173  RHS[0] = g->bc->value;
1174  return;
1175  }
1176  if (g->bc->type == NEUMANN) {
1177  /*zero flux boundary condition*/
1178  RHS[0] = state[x + z * g->size_x] -
1179  (g->dc_y * dt / SQ(g->dy)) *
1180  (g->states[IDX(x, 1, z)] - 2.0 * g->states[IDX(x, 0, z)] +
1181  g->states[IDX(x, 1, z)]) /
1182  4.0;
1183  y = g->size_y - 1;
1184  RHS[y] = state[x + (z + y * g->size_z) * g->size_x] -
1185  (g->dc_y * dt / SQ(g->dy)) *
1186  (g->states[IDX(x, y - 1, z)] - 2. * g->states[IDX(x, y, z)] +
1187  g->states[IDX(x, y - 1, z)]) /
1188  4.0;
1189  } else {
1190  RHS[0] = g->bc->value;
1191  RHS[g->size_y - 1] = g->bc->value;
1192  }
1193  for (y = 1; y < g->size_y - 1; y++) {
1194 #ifndef __PGI
1195  __builtin_prefetch(&state[x + (z + (y + PREFETCH) * g->size_z) * g->size_x], 0, 0);
1196  __builtin_prefetch(&(g->states[IDX(x, y + PREFETCH, z)]), 0, 1);
1197 #endif
1198  RHS[y] = state[x + (z + y * g->size_z) * g->size_x] -
1199  (g->dc_y * dt / SQ(g->dy)) *
1200  (g->states[IDX(x, y + 1, z)] - 2. * g->states[IDX(x, y, z)] +
1201  g->states[IDX(x, y - 1, z)]) /
1202  2.0;
1203  }
1204  if (g->bc->type == NEUMANN)
1206  -r / 2.0,
1207  1.0 + r,
1208  -r / 2.0,
1209  1.0 + r / 2.0,
1210  -r / 2.0,
1211  -r / 2.0,
1212  1.0 + r / 2.0,
1213  RHS,
1214  scratch);
1215  else
1216  solve_dd_clhs_tridiag(g->size_y, -r / 2., 1. + r, -r / 2., 1.0, 0, 0, 1.0, RHS, scratch);
1217 }
1218 
1219 
1220 /* dg_adi_z performs the final step in DG-ADI
1221  * g - the parameters and state of the grid
1222  * dt - the time step
1223  * x - the index for the x plane
1224  * y - the index for the y plane
1225  * state - the values from the second step, which are
1226  * overwritten by the output of this step
1227  * scratch - scratchpad array of doubles, length g->size_z - 1
1228  */
1230  double const dt,
1231  int const x,
1232  int const y,
1233  double const* const state,
1234  double* const RHS,
1235  double* const scratch) {
1236  int z;
1237  double r = g->dc_z * dt / SQ(g->dz);
1238  /*TODO: Get rid of this by not calling dg_adi when on the boundary for DIRICHLET conditions*/
1239  if (g->bc->type == DIRICHLET &&
1240  (x == 0 || y == 0 || x == g->size_x - 1 || y == g->size_y - 1)) {
1241  for (z = 0; z < g->size_z; z++)
1242  RHS[z] = g->bc->value;
1243  return;
1244  }
1245 
1246  if (g->size_z == 1) {
1247  if (g->bc->type == NEUMANN)
1248  RHS[0] = state[y + g->size_y * x];
1249  else
1250  RHS[0] = g->bc->value;
1251  return;
1252  }
1253 
1254  if (g->bc->type == NEUMANN) {
1255  /*zero flux boundary condition*/
1256  RHS[0] = state[y + g->size_y * (x * g->size_z)] -
1257  (g->dc_z * dt / SQ(g->dz)) *
1258  (g->states[IDX(x, y, 1)] - 2.0 * g->states[IDX(x, y, 0)] +
1259  g->states[IDX(x, y, 1)]) /
1260  4.0;
1261  z = g->size_z - 1;
1262  RHS[z] = state[y + g->size_y * (x * g->size_z + z)] -
1263  (g->dc_z * dt / SQ(g->dz)) *
1264  (g->states[IDX(x, y, z - 1)] - 2.0 * g->states[IDX(x, y, z)] +
1265  g->states[IDX(x, y, z - 1)]) /
1266  4.0;
1267 
1268  } else {
1269  RHS[0] = g->bc->value;
1270  RHS[g->size_z - 1] = g->bc->value;
1271  }
1272  for (z = 1; z < g->size_z - 1; z++) {
1273  RHS[z] = state[y + g->size_y * (x * g->size_z + z)] -
1274  (g->dc_z * dt / SQ(g->dz)) *
1275  (g->states[IDX(x, y, z + 1)] - 2. * g->states[IDX(x, y, z)] +
1276  g->states[IDX(x, y, z - 1)]) /
1277  2.;
1278  }
1279 
1280  if (g->bc->type == NEUMANN)
1282  -r / 2.0,
1283  1.0 + r,
1284  -r / 2.0,
1285  1.0 + r / 2.0,
1286  -r / 2.0,
1287  -r / 2.0,
1288  1.0 + r / 2.0,
1289  RHS,
1290  scratch);
1291  else
1292  solve_dd_clhs_tridiag(g->size_z, -r / 2., 1. + r, -r / 2., 1.0, 0, 0, 1.0, RHS, scratch);
1293 }
1294 
1295 static void* ecs_do_dg_adi(void* dataptr) {
1296  ECSAdiGridData* data = (ECSAdiGridData*) dataptr;
1297  int start = data->start;
1298  int stop = data->stop;
1299  int i, j, k;
1300  ECSAdiDirection* ecs_adi_dir = data->ecs_adi_dir;
1301  double dt = *dt_ptr;
1302  int sizej = data->sizej;
1303  ECS_Grid_node* g = data->g;
1304  double* state_in = ecs_adi_dir->states_in;
1305  double* state_out = ecs_adi_dir->states_out;
1306  int offset = ecs_adi_dir->line_size;
1307  double* scratchpad = data->scratchpad;
1308  void (*ecs_dg_adi_dir)(
1309  ECS_Grid_node*, double, int, int, double const* const, double* const, double* const) =
1310  ecs_adi_dir->ecs_dg_adi_dir;
1311  for (k = start; k < stop; k++) {
1312  i = k / sizej;
1313  j = k % sizej;
1314  ecs_dg_adi_dir(g, dt, i, j, state_in, &state_out[k * offset], scratchpad);
1315  }
1316 
1317  return NULL;
1318 }
1319 
1321  const int j,
1322  ECS_Grid_node* g,
1323  ECSAdiDirection* ecs_adi_dir,
1324  const int n) {
1325  int k;
1326  /* when doing any given direction, the number of tasks is the product of the other two, so
1327  * multiply everything then divide out the current direction */
1328  const int tasks_per_thread = (g->size_x * g->size_y * g->size_z / n) / NUM_THREADS;
1329  const int extra = (g->size_x * g->size_y * g->size_z / n) % NUM_THREADS;
1330 
1331  g->ecs_tasks[0].start = 0;
1332  g->ecs_tasks[0].stop = tasks_per_thread + (extra > 0);
1333  g->ecs_tasks[0].sizej = j;
1334  g->ecs_tasks[0].ecs_adi_dir = ecs_adi_dir;
1335  for (k = 1; k < NUM_THREADS; k++) {
1336  g->ecs_tasks[k].start = g->ecs_tasks[k - 1].stop;
1337  g->ecs_tasks[k].stop = g->ecs_tasks[k].start + tasks_per_thread + (extra > k);
1338  g->ecs_tasks[k].sizej = j;
1339  g->ecs_tasks[k].ecs_adi_dir = ecs_adi_dir;
1340  }
1341  g->ecs_tasks[NUM_THREADS - 1].stop = i * j;
1342  /* launch threads */
1343  for (k = 0; k < NUM_THREADS - 1; k++) {
1345  }
1346  /* run one task in the main thread */
1347  ecs_do_dg_adi(&(g->ecs_tasks[NUM_THREADS - 1]));
1348  /* wait for them to finish */
1350 }
1351 
1356 }
struct ECSAdiDirection * ecs_adi_dir_z
Definition: grids.h:184
void initialize_multicompartment_reaction()
Definition: grids.cpp:1111
struct ECSAdiDirection * ecs_adi_dir_y
Definition: grids.h:183
struct ECSAdiGridData * ecs_tasks
Definition: grids.h:181
void clear_multicompartment_reaction()
Definition: grids.cpp:1094
struct ECSAdiDirection * ecs_adi_dir_x
Definition: grids.h:182
void do_multicompartment_reactions(double *)
Definition: grids.cpp:1221
virtual void apply_node_flux3D(double dt, double *states)=0
bool hybrid
Definition: grids.h:101
int size_y
Definition: grids.h:92
virtual void hybrid_connections()=0
virtual void variable_step_diffusion(const double *states, double *ydot)=0
int size_x
Definition: grids.h:91
double * states_cur
Definition: grids.h:90
double * states
Definition: grids.h:86
double atolscale
Definition: grids.h:129
BoundaryConditions * bc
Definition: grids.h:102
double dy
Definition: grids.h:98
virtual void set_num_threads(const int n)=0
Grid_node * next
Definition: grids.h:84
virtual void do_grid_currents(double *, double, int)=0
virtual void variable_step_ode_solve(double *RHS, double dt)=0
double dc_z
Definition: grids.h:96
double dc_y
Definition: grids.h:95
int size_z
Definition: grids.h:93
double dc_x
Definition: grids.h:94
virtual int dg_adi()=0
double dz
Definition: grids.h:99
double dx
Definition: grids.h:97
virtual void scatter_grid_concentrations()=0
#define id
Definition: md1redef.h:41
#define i
Definition: md1redef.h:19
static double jacobian(void *v)
Definition: cvodeobj.cpp:245
static RNG::key_type k
Definition: nrnran123.cpp:9
double * dt_ptr
Definition: grids.cpp:15
Grid_node * Parallel_grids[100]
Definition: grids.cpp:17
#define IDX(x, y, z)
Definition: grids.h:14
#define NEUMANN
Definition: grids.h:28
#define DIRICHLET
Definition: grids.h:29
void(double *, double *, double *, double *) ECSReactionRate
Definition: grids.h:58
#define TRUE
Definition: grids.h:22
#define FALSE
Definition: grids.h:23
#define SQ(x)
Definition: grids.h:20
static int c
Definition: hoc.cpp:169
#define assert(ex)
Definition: hocassrt.h:24
#define RHS(i)
Definition: multisplit.cpp:57
if(ncell==0)
Definition: cellorder.cpp:785
static unsigned row
Definition: nonlin.cpp:78
#define diag(s)
Definition: nonlin.cpp:19
#define NRN_EXPORT
Definition: nrn_export.hpp:6
int const size_t const size_t n
Definition: nrngsl.h:10
for(i=0;i< n;i++)
size_t j
short index
Definition: cabvars.h:11
void TaskQueue_add_task(TaskQueue *q, void *(*task)(void *), void *args, void *result)
Definition: rxd.cpp:1142
void TaskQueue_sync(TaskQueue *q)
Definition: rxd.cpp:1257
#define PREFETCH
Definition: rxd.h:8
static int solve_dd_clhs_tridiag(const int N, const double l_diag, const double diag, const double u_diag, const double lbc_diag, const double lbc_u_diag, const double ubc_l_diag, const double ubc_diag, double *const b, double *const c)
static void ecs_dg_adi_y(ECS_Grid_node *g, double const dt, int const x, int const z, double const *const state, double *const RHS, double *const scratch)
void set_num_threads_3D(const int n)
static void run_threaded_reactions(ReactGridData *tasks)
ReactGridData * create_threaded_reactions(const int n)
void * ecs_do_reactions(void *dataptr)
static void ecs_refresh_reactions(int)
void _rhs_variable_step_helper(Grid_node *g, double const *const states, double *ydot)
static void ecs_dg_adi_x(ECS_Grid_node *g, const double dt, const int y, const int z, double const *const state, double *const RHS, double *const scratch)
void ecs_set_adi_homogeneous(ECS_Grid_node *g)
double * t_ptr
Definition: grids.cpp:16
int ode_count(const int offset)
NRN_EXPORT void ics_register_reaction(int list_idx, int num_species, int num_params, int *species_id, uint64_t *mc3d_start_indices, int mc3d_region_size, double *mc3d_mults, ECSReactionRate f)
TaskQueue * AllTasks
Definition: rxd.cpp:41
int states_cvode_offset
double * states
Definition: rxd.cpp:75
void ecs_atolscale(double *y)
void ecs_run_threaded_dg_adi(const int i, const int j, ECS_Grid_node *g, ECSAdiDirection *ecs_adi_dir, const int n)
Reaction * ecs_create_reaction(int list_idx, int num_species, int num_params, int *species_ids, ECSReactionRate f, unsigned char *subregion, uint64_t *mc3d_start_indices, int mc3d_region_size, double *mc3d_mults)
void clear_rates_ecs(void)
static void ecs_dg_adi_z(ECS_Grid_node *g, double const dt, int const x, int const y, double const *const state, double *const RHS, double *const scratch)
static void * ecs_do_dg_adi(void *dataptr)
void _rhs_variable_step_ecs(const double *states, double *ydot)
void _fadvance_fixed_step_3D(void)
void _ecs_ode_reinit(double *y)
NRN_EXPORT void scatter_concentrations(void)
int NUM_THREADS
Definition: rxd.cpp:34
void ecs_register_subregion_reaction_ecs(int list_idx, int num_species, int num_params, int *species_id, unsigned char *my_subregion, ECSReactionRate f)
void ics_ode_solve(double dt, double *RHS, const double *states)
ReactGridData * threaded_reactions_tasks
NRN_EXPORT void ecs_register_reaction(int list_idx, int num_species, int num_params, int *species_id, ECSReactionRate f)
Reaction * ecs_reactions
#define NULL
Definition: spdefs.h:105
double value
Definition: grids.h:79
unsigned char type
Definition: grids.h:78
double * states_in
Definition: grids.h:236
int line_size
Definition: grids.h:238
double * states_out
Definition: grids.h:237
void(* ecs_dg_adi_dir)(ECS_Grid_node *, const double, const int, const int, double const *const, double *const, double *const)
Definition: grids.h:229
ECSAdiDirection * ecs_adi_dir
Definition: grids.h:246
ReactSet * onset
Definition: rxd.h:21
ReactSet * offset
Definition: rxd.h:22
Definition: rxd.h:15
int idx
Definition: rxd.h:17
Reaction * reaction
Definition: rxd.h:16
double ** species_states
Definition: grids.h:64
unsigned int region_size
Definition: grids.h:66
uint64_t * mc3d_indices_offsets
Definition: grids.h:67
unsigned char * subregion
Definition: grids.h:65
double ** mc3d_mults
Definition: grids.h:68
Reaction * next
Definition: grids.h:60
unsigned int num_params_involved
Definition: grids.h:63
unsigned int num_species_involved
Definition: grids.h:62
ECSReactionRate * reaction
Definition: grids.h:61
Definition: rxd.h:89