NEURON
memory.h
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================.
7 */
8 
9 #pragma once
10 
11 #include <cstdint>
12 #include <cstring>
13 #include <cstdlib>
14 #include <memory>
15 
16 #if CORENRN_BUILD
19 #else
20 #include "oc/nrnassrt.h"
21 #endif
22 
23 #if !defined(NRN_SOA_BYTE_ALIGN)
24 // for layout 0, every range variable array must be aligned by at least 16 bytes (the size of the
25 // simd memory bus)
26 #define NRN_SOA_BYTE_ALIGN (8 * sizeof(double))
27 #endif
28 
29 #if CORENRN_BUILD
30 namespace coreneuron {
31 #else
32 namespace neuron {
33 #endif
34 /**
35  * @brief Check if GPU support is enabled.
36  *
37  * This returns true if GPU support was enabled at compile time and at runtime
38  * via coreneuron.gpu = True and/or --gpu, otherwise it returns false.
39  */
40 bool gpu_enabled();
41 
42 /** @brief Allocate unified memory in GPU builds iff GPU enabled, otherwise new
43  */
44 void* allocate_unified(std::size_t num_bytes);
45 
46 /** @brief Deallocate memory allocated by `allocate_unified`.
47  */
48 void deallocate_unified(void* ptr, std::size_t num_bytes);
49 
50 /** @brief C++ allocator that uses [de]allocate_unified.
51  */
52 template <typename T>
54  using value_type = T;
55 
56  unified_allocator() = default;
57 
58  template <typename U>
60 
61  value_type* allocate(std::size_t n) {
62  return static_cast<value_type*>(allocate_unified(n * sizeof(value_type)));
63  }
64 
65  void deallocate(value_type* p, std::size_t n) noexcept {
66  deallocate_unified(p, n * sizeof(value_type));
67  }
68 };
69 
70 template <typename T, typename U>
71 bool operator==(unified_allocator<T> const&, unified_allocator<U> const&) noexcept {
72  return true;
73 }
74 
75 template <typename T, typename U>
76 bool operator!=(unified_allocator<T> const& x, unified_allocator<U> const& y) noexcept {
77  return !(x == y);
78 }
79 
80 /** @brief Allocator-aware deleter for use with std::unique_ptr.
81  *
82  * This is copied from https://stackoverflow.com/a/23132307. See also
83  * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0316r0.html,
84  * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0211r3.html, and
85  * boost::allocate_unique<...>.
86  * Hopefully std::allocate_unique will be included in C++23.
87  */
88 template <typename Alloc>
89 struct alloc_deleter {
90  alloc_deleter() = default; // OL210813 addition
91  alloc_deleter(const Alloc& a)
92  : a(a) {}
93 
94  using pointer = typename std::allocator_traits<Alloc>::pointer;
95 
96  void operator()(pointer p) const {
97  Alloc aa(a);
98  std::allocator_traits<Alloc>::destroy(aa, std::addressof(*p));
99  std::allocator_traits<Alloc>::deallocate(aa, p, 1);
100  }
101 
102  private:
103  Alloc a;
104 };
105 
106 template <typename T, typename Alloc, typename... Args>
107 auto allocate_unique(const Alloc& alloc, Args&&... args) {
108  using AT = std::allocator_traits<Alloc>;
109  static_assert(std::is_same<typename AT::value_type, std::remove_cv_t<T>>{}(),
110  "Allocator has the wrong value_type");
111 
112  Alloc a(alloc);
113  auto p = AT::allocate(a, 1);
114  try {
115  AT::construct(a, std::addressof(*p), std::forward<Args>(args)...);
116  using D = alloc_deleter<Alloc>;
117  return std::unique_ptr<T, D>(p, D(a));
118  } catch (...) {
119  AT::deallocate(a, p, 1);
120  throw;
121  }
122 }
123 } // namespace coreneuron
124 
125 /// for gpu builds with unified memory support
126 #ifdef CORENEURON_UNIFIED_MEMORY
127 
128 #include <cuda_runtime_api.h>
129 
130 // TODO : error handling for CUDA routines
131 inline void alloc_memory(void*& pointer, size_t num_bytes, size_t /*alignment*/) {
132  cudaMallocManaged(&pointer, num_bytes);
133 }
134 
135 inline void calloc_memory(void*& pointer, size_t num_bytes, size_t /*alignment*/) {
136  alloc_memory(pointer, num_bytes, 64);
137  cudaMemset(pointer, 0, num_bytes);
138 }
139 
140 inline void free_memory(void* pointer) {
141  cudaFree(pointer);
142 }
143 
144 /**
145  * A base class providing overloaded new and delete operators for CUDA allocation
146  *
147  * Classes that should be allocated on the GPU should inherit from this class. Additionally they
148  * may need to implement a special copy-construtor. This is documented here:
149  * \link: https://devblogs.nvidia.com/unified-memory-in-cuda-6/
150  */
151 class MemoryManaged {
152  public:
153  void* operator new(size_t len) {
154  void* ptr;
155  cudaMallocManaged(&ptr, len);
156  cudaDeviceSynchronize();
157  return ptr;
158  }
159 
160  void* operator new[](size_t len) {
161  void* ptr;
162  cudaMallocManaged(&ptr, len);
163  cudaDeviceSynchronize();
164  return ptr;
165  }
166 
167  void operator delete(void* ptr) {
168  cudaDeviceSynchronize();
169  cudaFree(ptr);
170  }
171 
172  void operator delete[](void* ptr) {
173  cudaDeviceSynchronize();
174  cudaFree(ptr);
175  }
176 };
177 
178 
179 /// for cpu builds use posix memalign
180 #else
182  // does nothing by default
183 };
184 
185 #include <cstdlib>
186 
187 inline void alloc_memory(void*& pointer, size_t num_bytes, size_t alignment) {
188  size_t fill = 0;
189  if (alignment > 0) {
190  if (num_bytes % alignment != 0) {
191  size_t multiple = num_bytes / alignment;
192  fill = alignment * (multiple + 1) - num_bytes;
193  }
194 #ifndef _WIN32
195  pointer = aligned_alloc(alignment, num_bytes + fill);
196  nrn_assert(pointer != nullptr);
197 #else // is _WIN32
198  // Windows has _aligned_alloc, but that must be paired with
199  // _aligned_free
200  fprintf(stderr, "Windows has no std::aligned_alloc\n");
201  nrn_assert((pointer = std::malloc(num_bytes)) != nullptr);
202 #endif // is _WIN32
203  } else {
204  nrn_assert((pointer = std::malloc(num_bytes)) != nullptr);
205  }
206 }
207 
208 inline void calloc_memory(void*& pointer, size_t num_bytes, size_t alignment) {
209  alloc_memory(pointer, num_bytes, alignment);
210  memset(pointer, 0, num_bytes);
211 }
212 
213 inline void free_memory(void* pointer) {
214  free(pointer);
215 }
216 
217 #endif
218 
219 #if CORENRN_BUILD
220 namespace coreneuron {
221 #else
222 namespace neuron {
223 #endif
224 
225 /** Independent function to compute the needed chunkding,
226  the chunk argument is the number of doubles the chunk is chunkded upon.
227 */
228 template <int chunk>
229 inline int soa_padded_size(int cnt, int layout) {
230 #if CORENRN_BUILD
231  if (layout == Layout::AoS) {
232  return cnt;
233  } else {
234  return ((cnt + chunk - 1) / chunk) * chunk;
235  }
236 #else
237  return ((cnt + chunk - 1) / chunk) * chunk;
238 #endif
239 }
240 
241 /** Check for the pointer alignment.
242  */
243 inline bool is_aligned(void* pointer, std::size_t alignment) {
244  return (reinterpret_cast<std::uintptr_t>(pointer) % alignment) == 0;
245 }
246 
247 /**
248  * Allocate aligned memory. This will be unified memory if the corresponding
249  * CMake option is set. This must be freed with the free_memory method.
250  *
251  * \param size Size of buffer to allocate in bytes.
252  * \param alignment Memory alignment, defaults to NRN_SOA_BYTE_ALIGN. Pass 0 for no alignment.
253  */
254 inline void* emalloc_align(size_t size, size_t alignment = NRN_SOA_BYTE_ALIGN) {
255  void* memptr;
256  alloc_memory(memptr, size, alignment);
257  if (alignment != 0) {
258  nrn_assert(is_aligned(memptr, alignment));
259  }
260  return memptr;
261 }
262 
263 /**
264  * Allocate the aligned memory and set it to 0. This will be unified memory if
265  * the corresponding CMake option is set. This must be freed with the
266  * free_memory method.
267  *
268  * \param n Number of objects to allocate
269  * \param size Size of buffer for each object to allocate in bytes.
270  * \param alignment Memory alignment, defaults to NRN_SOA_BYTE_ALIGN. Pass 0 for no alignment.
271  *
272  * \note the allocated size will be \code n*size
273  */
274 inline void* ecalloc_align(size_t n, size_t size, size_t alignment = NRN_SOA_BYTE_ALIGN) {
275  void* p;
276  if (n == 0) {
277  return nullptr;
278  }
279  calloc_memory(p, n * size, alignment);
280  if (alignment != 0) {
281  nrn_assert(is_aligned(p, alignment));
282  }
283  return p;
284 }
285 } // namespace coreneuron
for gpu builds with unified memory support
Definition: memory.h:181
#define cnt
Definition: tqueue.hpp:44
void free_memory(void *pointer)
Definition: memory.h:213
void alloc_memory(void *&pointer, size_t num_bytes, size_t alignment)
Definition: memory.h:187
#define NRN_SOA_BYTE_ALIGN
Definition: memory.h:26
void calloc_memory(void *&pointer, size_t num_bytes, size_t alignment)
Definition: memory.h:208
#define D(i)
Definition: multisplit.cpp:56
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
void * emalloc_align(size_t size, size_t alignment)
void * allocate_unified(std::size_t num_bytes)
Definition: memory.cpp:26
void * ecalloc_align(size_t n, size_t size, size_t alignment)
void deallocate_unified(void *ptr, std::size_t num_bytes)
Definition: memory.cpp:44
bool gpu_enabled()
Definition: memory.cpp:18
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
Definition: tnode.hpp:17
auto allocate_unique(const Alloc &alloc, Args &&... args)
Definition: memory.h:107
bool operator==(unified_allocator< T > const &, unified_allocator< U > const &) noexcept
Definition: memory.h:71
bool operator!=(unified_allocator< T > const &x, unified_allocator< U > const &y) noexcept
Definition: memory.h:76
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
Definition: nrn_assert.h:33
int const size_t const size_t n
Definition: nrngsl.h:10
size_t p
Allocator-aware deleter for use with std::unique_ptr.
Definition: memory.h:89
typename std::allocator_traits< Alloc >::pointer pointer
Definition: memory.h:94
alloc_deleter(const Alloc &a)
Definition: memory.h:91
void operator()(pointer p) const
Definition: memory.h:96
C++ allocator that uses [de]allocate_unified.
Definition: memory.h:53
unified_allocator(unified_allocator< U > const &) noexcept
Definition: memory.h:59
value_type * allocate(std::size_t n)
Definition: memory.h:61
void deallocate(value_type *p, std::size_t n) noexcept
Definition: memory.h:65