NEURON
test_multicore.cpp
Go to the documentation of this file.
1 #include "test_multicore.h"
2 
3 #include "code.h"
4 #include "hocdec.h"
5 #include "multicore.h"
6 #include "nrn_ansi.h"
7 #include "ocfunc.h"
8 
9 #include <catch2/generators/catch_generators_range.hpp>
10 #include <catch2/catch_test_macros.hpp>
11 
12 #include <algorithm>
13 #include <iostream>
14 #include <numeric>
15 #include <vector>
16 
17 /* @brief
18  * Test multicore implementation:
19  * * parallel mode (std::threads)
20  * * parallel mode with busywait
21  * * serial mode
22  * * performance
23  * * NOTE: GitHub runners don't have enough capabilities for performance KPIs
24  */
25 
26 
27 TEST_CASE("Multicore unit and performance testing", "[NEURON][multicore]") {
28  static const auto nof_threads_range{nrn::test::make_available_threads_range()};
29  SECTION("Simulation set-up", "[NEURON][multicore][setup]") {
30  WHEN("We create a parallel context") {
31  THEN("we make sure we have a parallel context") {
32  REQUIRE(hoc_oc("objref pc\n"
33  "pc = new ParallelContext()") == 0);
34  }
35  THEN("we assert nrn_thread is equal to 1") {
36  REQUIRE(nrn_nthread == 1);
37  }
38  THEN("we check we have no worker threads") {
39  REQUIRE(nof_worker_threads() == 0);
40  }
41  }
42  WHEN("We setup the cells for the simulation") {
43  THEN("we create 1000 passive membrane cells for the simulation") {
44  REQUIRE(hoc_oc(pass_cell_template) == 0);
45  REQUIRE(hoc_oc(prun) == 0);
46  std::string cells{1000_pas_cells};
47  REQUIRE(hoc_oc(cells.c_str()) == 0);
48  }
49  }
50  }
51 
52  SECTION("Test parallel mode", "[NEURON][multicore][parallel]") {
53  static std::vector<double> cache_sim_times;
54  GIVEN("we do prun() over each nof_threads{nof_threads_range}") {
55  auto nof_threads = GENERATE_COPY(from_range(nof_threads_range));
56  THEN("we run the simulation with " + std::to_string(nof_threads) + " threads") {
57  nrn_threads_create(nof_threads, 1);
58  REQUIRE(nrn_nthread == nof_threads);
59  REQUIRE(nof_worker_threads() == (nof_threads > 1 ? nof_threads : 0));
60  auto start = std::chrono::high_resolution_clock::now();
61  REQUIRE(hoc_oc("prun()") == 0);
62  auto end = std::chrono::high_resolution_clock::now();
63  auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
64  cache_sim_times.push_back(duration.count());
65  REQUIRE(nof_worker_threads() == (nof_threads > 1 ? nof_threads : 0));
66  }
67  }
68  THEN("we assert all simulations ran") {
69  REQUIRE(cache_sim_times.size() == nof_threads_range.size());
70  }
71  THEN("we print the results") {
72  std::cout << "[parallel][simulation times] : " << std::endl;
73  std::cout << "nt"
74  << "\t"
75  << "cache=0"
76  << "\t\t"
77  << "cache=1" << std::endl;
78  for (auto i = 0; i < cache_sim_times.size(); ++i) {
79  std::cout << nof_threads_range[i] << "\t" << cache_sim_times[i] << std::endl;
80  }
81  }
82  THEN("we check that the more threads we have the faster the simulation runs") {
83  if (nof_threads_range.size() > 2) {
84  REQUIRE(std::is_sorted(cache_sim_times.rbegin(), cache_sim_times.rend()));
85  THEN(
86  "we check that the standard deviaton is above 25% from the mean for simulation "
87  "vectors") {
88  const auto cache_mean =
89  std::accumulate(cache_sim_times.begin(), cache_sim_times.end(), 0.0) /
90  cache_sim_times.size();
91  const auto cache_std_dev = std::sqrt(
92  std::accumulate(cache_sim_times.begin(),
93  cache_sim_times.end(),
94  0.0,
95  [cache_mean](double a, double b) {
96  return a + (b - cache_mean) * (b - cache_mean);
97  }) /
98  cache_sim_times.size());
99  REQUIRE(cache_std_dev / cache_mean > 0.2);
100  // print the standard deviations
101  std::cout << "[parallel][cache][standard deviation] : " << cache_std_dev
102  << std::endl;
103  }
104  } else {
105  WARN("Not enough threads to test parallel performance KPI");
106  }
107  }
108  }
109 
110  SECTION("Test serial mode", "[NEURON][multicore][serial]") {
111  static std::vector<double> sim_times;
112  GIVEN("we do prun() over each nof_threads{nof_threads_range} with serial mode on") {
113  auto nof_threads = GENERATE_COPY(from_range(nof_threads_range));
114  THEN("we run the serial simulation with " << nof_threads << " threads") {
115  nrn_threads_create(nof_threads, 0);
116  REQUIRE(nrn_nthread == nof_threads);
117  REQUIRE(nof_worker_threads() == 0);
118  auto start = std::chrono::high_resolution_clock::now();
119  REQUIRE(hoc_oc("prun()") == 0);
120  auto end = std::chrono::high_resolution_clock::now();
121  auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
122  sim_times.push_back(duration.count());
123  REQUIRE(nof_worker_threads() == 0);
124  }
125  }
126  THEN("we assert all simulations ran") {
127  REQUIRE(sim_times.size() == nof_threads_range.size());
128  }
129  THEN("we print the results") {
130  std::cout << "[serial][simulation times] : " << std::endl;
131  std::cout << "nt"
132  << "\t"
133  << "cache=1" << std::endl;
134  for (auto i = 0; i < sim_times.size(); ++i) {
135  std::cout << nof_threads_range[i] << "\t" << sim_times[i] << std::endl;
136  }
137  }
138  THEN("we assert sim_times have under 10% standard deviation from the mean") {
139  if (nof_threads_range.size() > 2) {
140  const auto mean = std::accumulate(sim_times.begin(), sim_times.end(), 0.0) /
141  sim_times.size();
142  const auto sq_sum =
143  std::inner_product(sim_times.begin(), sim_times.end(), sim_times.begin(), 0.0);
144  const auto stdev = std::sqrt(sq_sum / sim_times.size() - mean * mean);
145 
146  std::cout << "[serial][standard deviation] : " << stdev << std::endl;
147  REQUIRE(stdev < 0.1 * mean);
148  } else {
149  WARN("Not enough threads to test serial performance KPI");
150  }
151  }
152  }
153 
154  SECTION("Test busywait parallel mode", "[NEURON][multicore][parallel][busywait]") {
155  WHEN("busywait is set to 1") {
156  THEN("set thread_busywait to 1") {
157  REQUIRE(hoc_oc("pc.thread_busywait(1)") == 0);
158  }
159  static std::vector<double> sim_times;
160  GIVEN("we do prun() over each nof_threads{nof_threads_range} with serial mode on") {
161  auto nof_threads = GENERATE_COPY(from_range(nof_threads_range));
162  THEN("we run the parallel busywait simulation with " << nof_threads << " threads") {
163  nrn_threads_create(nof_threads, 1);
164  REQUIRE(nrn_nthread == nof_threads);
165  REQUIRE(nof_worker_threads() == (nof_threads > 1 ? nof_threads : 0));
166  auto start = std::chrono::high_resolution_clock::now();
167  REQUIRE(hoc_oc("prun()") == 0);
168  auto end = std::chrono::high_resolution_clock::now();
169  auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end -
170  start);
171  sim_times.push_back(duration.count());
172  REQUIRE(nof_worker_threads() == (nof_threads > 1 ? nof_threads : 0));
173  }
174  }
175  THEN("we assert all simulations ran") {
176  REQUIRE(sim_times.size() == nof_threads_range.size());
177  }
178  THEN("we print the results") {
179  std::cout << "[parallel][busywait][simulation times] : " << std::endl;
180  std::cout << "nt"
181  << "\t"
182  << "cache=1" << std::endl;
183  for (auto i = 0; i < sim_times.size(); ++i) {
184  std::cout << nof_threads_range[i] << "\t" << sim_times[i] << std::endl;
185  }
186  }
187  THEN("we assert sim_times have over 20% standard deviation from the mean") {
188  if (nof_threads_range.size() > 2) {
189  const auto mean = std::accumulate(sim_times.begin(), sim_times.end(), 0.0) /
190  sim_times.size();
191  const auto sq_sum = std::inner_product(sim_times.begin(),
192  sim_times.end(),
193  sim_times.begin(),
194  0.0);
195  const auto stdev = std::sqrt(sq_sum / sim_times.size() - mean * mean);
196  std::cout << "[parallel][busywait][standard deviation] : " << stdev
197  << std::endl;
198  // standard deviation should be less than 5% of the mean
199  REQUIRE(stdev > 0.2 * mean);
200  } else {
201  WARN("Not enough threads to test busywait+parallel performance KPI");
202  }
203  }
204  }
205  }
206 }
static double * duration
Definition: clamp.cpp:37
#define i
Definition: md1redef.h:19
int hoc_oc(const char *buf)
Definition: hoc.cpp:1314
sqrt
Definition: extdef.h:3
void nrn_threads_create(int n)
Definition: multicore.cpp:102
int nrn_nthread
Definition: multicore.cpp:55
std::string to_string(const T &obj)
auto make_available_threads_range()
std::size_t nof_worker_threads()
Definition: multicore.cpp:1048
TEST_CASE("Multicore unit and performance testing", "[NEURON][multicore]")
constexpr auto pass_cell_template
constexpr auto prun
prun requires a parallel context to be created before -> pc