9 #include <catch2/generators/catch_generators_range.hpp>
10 #include <catch2/catch_test_macros.hpp>
27 TEST_CASE(
"Multicore unit and performance testing",
"[NEURON][multicore]") {
29 SECTION(
"Simulation set-up",
"[NEURON][multicore][setup]") {
30 WHEN(
"We create a parallel context") {
31 THEN(
"we make sure we have a parallel context") {
32 REQUIRE(
hoc_oc(
"objref pc\n"
33 "pc = new ParallelContext()") == 0);
35 THEN(
"we assert nrn_thread is equal to 1") {
38 THEN(
"we check we have no worker threads") {
42 WHEN(
"We setup the cells for the simulation") {
43 THEN(
"we create 1000 passive membrane cells for the simulation") {
46 std::string cells{1000_pas_cells};
47 REQUIRE(
hoc_oc(cells.c_str()) == 0);
52 SECTION(
"Test parallel mode",
"[NEURON][multicore][parallel]") {
53 static std::vector<double> cache_sim_times;
54 GIVEN(
"we do prun() over each nof_threads{nof_threads_range}") {
55 auto nof_threads = GENERATE_COPY(from_range(nof_threads_range));
56 THEN(
"we run the simulation with " +
std::to_string(nof_threads) +
" threads") {
60 auto start = std::chrono::high_resolution_clock::now();
61 REQUIRE(
hoc_oc(
"prun()") == 0);
62 auto end = std::chrono::high_resolution_clock::now();
63 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
64 cache_sim_times.push_back(
duration.count());
68 THEN(
"we assert all simulations ran") {
69 REQUIRE(cache_sim_times.size() == nof_threads_range.size());
71 THEN(
"we print the results") {
72 std::cout <<
"[parallel][simulation times] : " << std::endl;
77 <<
"cache=1" << std::endl;
78 for (
auto i = 0;
i < cache_sim_times.size(); ++
i) {
79 std::cout << nof_threads_range[
i] <<
"\t" << cache_sim_times[
i] << std::endl;
82 THEN(
"we check that the more threads we have the faster the simulation runs") {
83 if (nof_threads_range.size() > 2) {
84 REQUIRE(std::is_sorted(cache_sim_times.rbegin(), cache_sim_times.rend()));
86 "we check that the standard deviaton is above 25% from the mean for simulation "
88 const auto cache_mean =
89 std::accumulate(cache_sim_times.begin(), cache_sim_times.end(), 0.0) /
90 cache_sim_times.size();
92 std::accumulate(cache_sim_times.begin(),
93 cache_sim_times.end(),
95 [cache_mean](
double a,
double b) {
96 return a + (b - cache_mean) * (b - cache_mean);
98 cache_sim_times.size());
99 REQUIRE(cache_std_dev / cache_mean > 0.2);
101 std::cout <<
"[parallel][cache][standard deviation] : " << cache_std_dev
105 WARN(
"Not enough threads to test parallel performance KPI");
110 SECTION(
"Test serial mode",
"[NEURON][multicore][serial]") {
111 static std::vector<double> sim_times;
112 GIVEN(
"we do prun() over each nof_threads{nof_threads_range} with serial mode on") {
113 auto nof_threads = GENERATE_COPY(from_range(nof_threads_range));
114 THEN(
"we run the serial simulation with " << nof_threads <<
" threads") {
118 auto start = std::chrono::high_resolution_clock::now();
119 REQUIRE(
hoc_oc(
"prun()") == 0);
120 auto end = std::chrono::high_resolution_clock::now();
121 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
122 sim_times.push_back(
duration.count());
126 THEN(
"we assert all simulations ran") {
127 REQUIRE(sim_times.size() == nof_threads_range.size());
129 THEN(
"we print the results") {
130 std::cout <<
"[serial][simulation times] : " << std::endl;
133 <<
"cache=1" << std::endl;
134 for (
auto i = 0;
i < sim_times.size(); ++
i) {
135 std::cout << nof_threads_range[
i] <<
"\t" << sim_times[
i] << std::endl;
138 THEN(
"we assert sim_times have under 10% standard deviation from the mean") {
139 if (nof_threads_range.size() > 2) {
140 const auto mean = std::accumulate(sim_times.begin(), sim_times.end(), 0.0) /
143 std::inner_product(sim_times.begin(), sim_times.end(), sim_times.begin(), 0.0);
144 const auto stdev =
std::sqrt(sq_sum / sim_times.size() - mean * mean);
146 std::cout <<
"[serial][standard deviation] : " << stdev << std::endl;
147 REQUIRE(stdev < 0.1 * mean);
149 WARN(
"Not enough threads to test serial performance KPI");
154 SECTION(
"Test busywait parallel mode",
"[NEURON][multicore][parallel][busywait]") {
155 WHEN(
"busywait is set to 1") {
156 THEN(
"set thread_busywait to 1") {
157 REQUIRE(
hoc_oc(
"pc.thread_busywait(1)") == 0);
159 static std::vector<double> sim_times;
160 GIVEN(
"we do prun() over each nof_threads{nof_threads_range} with serial mode on") {
161 auto nof_threads = GENERATE_COPY(from_range(nof_threads_range));
162 THEN(
"we run the parallel busywait simulation with " << nof_threads <<
" threads") {
166 auto start = std::chrono::high_resolution_clock::now();
167 REQUIRE(
hoc_oc(
"prun()") == 0);
168 auto end = std::chrono::high_resolution_clock::now();
169 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end -
171 sim_times.push_back(
duration.count());
175 THEN(
"we assert all simulations ran") {
176 REQUIRE(sim_times.size() == nof_threads_range.size());
178 THEN(
"we print the results") {
179 std::cout <<
"[parallel][busywait][simulation times] : " << std::endl;
182 <<
"cache=1" << std::endl;
183 for (
auto i = 0;
i < sim_times.size(); ++
i) {
184 std::cout << nof_threads_range[
i] <<
"\t" << sim_times[
i] << std::endl;
187 THEN(
"we assert sim_times have over 20% standard deviation from the mean") {
188 if (nof_threads_range.size() > 2) {
189 const auto mean = std::accumulate(sim_times.begin(), sim_times.end(), 0.0) /
191 const auto sq_sum = std::inner_product(sim_times.begin(),
195 const auto stdev =
std::sqrt(sq_sum / sim_times.size() - mean * mean);
196 std::cout <<
"[parallel][busywait][standard deviation] : " << stdev
199 REQUIRE(stdev > 0.2 * mean);
201 WARN(
"Not enough threads to test busywait+parallel performance KPI");
int hoc_oc(const char *buf)
void nrn_threads_create(int n)
std::string to_string(const T &obj)
auto make_available_threads_range()
std::size_t nof_worker_threads()
TEST_CASE("Multicore unit and performance testing", "[NEURON][multicore]")
constexpr auto pass_cell_template
constexpr auto prun
prun requires a parallel context to be created before -> pc