3466/doxygen/memory_8h_source.html

 /*

 # =============================================================================

 # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL

 #

 # See top-level LICENSE file for details.

 # =============================================================================.

 */


 #pragma once


 #include <cstdint>

 #include <cstring>

 #include <cstdlib>

 #include <memory>


 #if CORENRN_BUILD

 #include "coreneuron/utils/nrn_assert.h"

 #include "coreneuron/nrniv/nrniv_decl.h"

 #else

 #include "oc/nrnassrt.h"

 #endif


 #if !defined(NRN_SOA_BYTE_ALIGN)

 // for layout 0, every range variable array must be aligned by at least 16 bytes (the size of the

 // simd memory bus)

 #define NRN_SOA_BYTE_ALIGN (8 * sizeof(double))

 #endif


 #if CORENRN_BUILD

 namespace coreneuron {

 #else

 namespace neuron {

 #endif

 /**

  * @brief Check if GPU support is enabled.

  *

  * This returns true if GPU support was enabled at compile time and at runtime

  * via coreneuron.gpu = True and/or --gpu, otherwise it returns false.

  */

 bool gpu_enabled();


 /** @brief Allocate unified memory in GPU builds iff GPU enabled, otherwise new

  */

 void* allocate_unified(std::size_t num_bytes);


 /** @brief Deallocate memory allocated by `allocate_unified`.

  */

 void deallocate_unified(void* ptr, std::size_t num_bytes);


 /** @brief C++ allocator that uses [de]allocate_unified.

  */

 template <typename T>

 struct unified_allocator {

     using value_type = T;


     unified_allocator() = default;


     template <typename U>

     unified_allocator(unified_allocator<U> const&) noexcept {}


     value_type* allocate(std::size_t n) {

         return static_cast<value_type*>(allocate_unified(n * sizeof(value_type)));

     }


     void deallocate(value_type* p, std::size_t n) noexcept {

         deallocate_unified(p, n * sizeof(value_type));

     }

 };


 template <typename T, typename U>

 bool operator==(unified_allocator<T> const&, unified_allocator<U> const&) noexcept {

     return true;

 }


 template <typename T, typename U>

 bool operator!=(unified_allocator<T> const& x, unified_allocator<U> const& y) noexcept {

     return !(x == y);

 }


 /** @brief Allocator-aware deleter for use with std::unique_ptr.

  *

  *  This is copied from https://stackoverflow.com/a/23132307. See also

  *  http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0316r0.html,

  *  http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0211r3.html, and

  *  boost::allocate_unique<...>.

  *  Hopefully std::allocate_unique will be included in C++23.

  */

 template <typename Alloc>

 struct alloc_deleter {

     alloc_deleter() = default;  // OL210813 addition

     alloc_deleter(const Alloc& a)

         : a(a) {}


     using pointer = typename std::allocator_traits<Alloc>::pointer;


     void operator()(pointer p) const {

         Alloc aa(a);

         std::allocator_traits<Alloc>::destroy(aa, std::addressof(*p));

         std::allocator_traits<Alloc>::deallocate(aa, p, 1);

     }


   private:

     Alloc a;

 };


 template <typename T, typename Alloc, typename... Args>

 auto allocate_unique(const Alloc& alloc, Args&&... args) {

     using AT = std::allocator_traits<Alloc>;

     static_assert(std::is_same<typename AT::value_type, std::remove_cv_t<T>>{}(),

                   "Allocator has the wrong value_type");


     Alloc a(alloc);

     auto p = AT::allocate(a, 1);

     try {

         AT::construct(a, std::addressof(*p), std::forward<Args>(args)...);

         using D = alloc_deleter<Alloc>;

         return std::unique_ptr<T, D>(p, D(a));

     } catch (...) {

         AT::deallocate(a, p, 1);

         throw;

     }

 }

 }  // namespace coreneuron


 /// for gpu builds with unified memory support

 #ifdef CORENEURON_UNIFIED_MEMORY


 #include <cuda_runtime_api.h>


 // TODO : error handling for CUDA routines

 inline void alloc_memory(void*& pointer, size_t num_bytes, size_t /*alignment*/) {

     cudaMallocManaged(&pointer, num_bytes);

 }


 inline void calloc_memory(void*& pointer, size_t num_bytes, size_t /*alignment*/) {

     alloc_memory(pointer, num_bytes, 64);

     cudaMemset(pointer, 0, num_bytes);

 }


 inline void free_memory(void* pointer) {

     cudaFree(pointer);

 }


 /**

  * A base class providing overloaded new and delete operators for CUDA allocation

  *

  * Classes that should be allocated on the GPU should inherit from this class. Additionally they

  * may need to implement a special copy-construtor. This is documented here:

  * \link: https://devblogs.nvidia.com/unified-memory-in-cuda-6/

  */

 class MemoryManaged {

   public:

     void* operator new(size_t len) {

         void* ptr;

         cudaMallocManaged(&ptr, len);

         cudaDeviceSynchronize();

         return ptr;

     }


     void* operator new[](size_t len) {

         void* ptr;

         cudaMallocManaged(&ptr, len);

         cudaDeviceSynchronize();

         return ptr;

     }


     void operator delete(void* ptr) {

         cudaDeviceSynchronize();

         cudaFree(ptr);

     }


     void operator delete[](void* ptr) {

         cudaDeviceSynchronize();

         cudaFree(ptr);

     }

 };


 /// for cpu builds use posix memalign

 #else

 class MemoryManaged {

     // does nothing by default

 };


 #include <cstdlib>


 inline void alloc_memory(void*& pointer, size_t num_bytes, size_t alignment) {

     size_t fill = 0;

     if (alignment > 0) {

         if (num_bytes % alignment != 0) {

             size_t multiple = num_bytes / alignment;

             fill = alignment * (multiple + 1) - num_bytes;

         }

 #ifndef _WIN32

         pointer = aligned_alloc(alignment, num_bytes + fill);

         nrn_assert(pointer != nullptr);

 #else   // is _WIN32

         // Windows has _aligned_alloc, but that must be paired with

         // _aligned_free

         fprintf(stderr, "Windows has no std::aligned_alloc\n");

         nrn_assert((pointer = std::malloc(num_bytes)) != nullptr);

 #endif  // is _WIN32

     } else {

         nrn_assert((pointer = std::malloc(num_bytes)) != nullptr);

     }

 }


 inline void calloc_memory(void*& pointer, size_t num_bytes, size_t alignment) {

     alloc_memory(pointer, num_bytes, alignment);

     memset(pointer, 0, num_bytes);

 }


 inline void free_memory(void* pointer) {

     free(pointer);

 }


 #endif


 #if CORENRN_BUILD

 namespace coreneuron {

 #else

 namespace neuron {

 #endif


 /** Independent function to compute the needed chunkding,

     the chunk argument is the number of doubles the chunk is chunkded upon.

 */

 template <int chunk>

 inline int soa_padded_size(int cnt, int layout) {

 #if CORENRN_BUILD

     if (layout == Layout::AoS) {

         return cnt;

     } else {

         return ((cnt + chunk - 1) / chunk) * chunk;

     }

 #else

     return ((cnt + chunk - 1) / chunk) * chunk;

 #endif

 }


 /** Check for the pointer alignment.

  */

 inline bool is_aligned(void* pointer, std::size_t alignment) {

     return (reinterpret_cast<std::uintptr_t>(pointer) % alignment) == 0;

 }


 /**

  * Allocate aligned memory. This will be unified memory if the corresponding

  * CMake option is set. This must be freed with the free_memory method.

  *

  * \param size      Size of buffer to allocate in bytes.

  * \param alignment Memory alignment, defaults to NRN_SOA_BYTE_ALIGN. Pass 0 for no alignment.

  */

 inline void* emalloc_align(size_t size, size_t alignment = NRN_SOA_BYTE_ALIGN) {

     void* memptr;

     alloc_memory(memptr, size, alignment);

     if (alignment != 0) {

         nrn_assert(is_aligned(memptr, alignment));

     }

     return memptr;

 }


 /**

  * Allocate the aligned memory and set it to 0. This will be unified memory if

  * the corresponding CMake option is set. This must be freed with the

  * free_memory method.

  *

  * \param n         Number of objects to allocate

  * \param size      Size of buffer for each object to allocate in bytes.

  * \param alignment Memory alignment, defaults to NRN_SOA_BYTE_ALIGN. Pass 0 for no alignment.

  *

  * \note the allocated size will be \code n*size

  */

 inline void* ecalloc_align(size_t n, size_t size, size_t alignment = NRN_SOA_BYTE_ALIGN) {

     void* p;

     if (n == 0) {

         return nullptr;

     }

     calloc_memory(p, n * size, alignment);

     if (alignment != 0) {

         nrn_assert(is_aligned(p, alignment));

     }

     return p;

 }

 }  // namespace coreneuron

MemoryManaged
for gpu builds with unified memory support
Definition: memory.h:181

cnt
#define cnt
Definition: tqueue.hpp:44

free_memory
void free_memory(void *pointer)
Definition: memory.h:213

alloc_memory
void alloc_memory(void *&pointer, size_t num_bytes, size_t alignment)
Definition: memory.h:187

NRN_SOA_BYTE_ALIGN
#define NRN_SOA_BYTE_ALIGN
Definition: memory.h:26

calloc_memory
void calloc_memory(void *&pointer, size_t num_bytes, size_t alignment)
Definition: memory.h:208

D
#define D(i)
Definition: multisplit.cpp:56

coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12

coreneuron::emalloc_align
void * emalloc_align(size_t size, size_t alignment)

coreneuron::allocate_unified
void * allocate_unified(std::size_t num_bytes)
Definition: memory.cpp:26

coreneuron::ecalloc_align
void * ecalloc_align(size_t n, size_t size, size_t alignment)

coreneuron::deallocate_unified
void deallocate_unified(void *ptr, std::size_t num_bytes)
Definition: memory.cpp:44

coreneuron::gpu_enabled
bool gpu_enabled()
Definition: memory.cpp:18

coreneuron::AoS
@ AoS
Definition: nrniv_decl.h:70

neuron
In mechanism libraries, cannot use auto const token = nrn_ensure_model_data_are_sorted(); because the...
Definition: tnode.hpp:17

neuron::allocate_unique
auto allocate_unique(const Alloc &alloc, Args &&... args)
Definition: memory.h:107

neuron::operator==
bool operator==(unified_allocator< T > const &, unified_allocator< U > const &) noexcept
Definition: memory.h:71

neuron::operator!=
bool operator!=(unified_allocator< T > const &x, unified_allocator< U > const &y) noexcept
Definition: memory.h:76

nrn_assert.h

nrn_assert
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
Definition: nrn_assert.h:33

nrnassrt.h

n
int const size_t const size_t n
Definition: nrngsl.h:10

p
size_t p
Definition: nrngsl_hc_radix2.cpp:49

nrniv_decl.h

neuron::alloc_deleter
Allocator-aware deleter for use with std::unique_ptr.
Definition: memory.h:89

neuron::alloc_deleter::pointer
typename std::allocator_traits< Alloc >::pointer pointer
Definition: memory.h:94

neuron::alloc_deleter::a
Alloc a
Definition: memory.h:103

neuron::alloc_deleter::alloc_deleter
alloc_deleter(const Alloc &a)
Definition: memory.h:91

neuron::alloc_deleter::operator()
void operator()(pointer p) const
Definition: memory.h:96

neuron::alloc_deleter::alloc_deleter
alloc_deleter()=default

neuron::unified_allocator
C++ allocator that uses [de]allocate_unified.
Definition: memory.h:53

neuron::unified_allocator::unified_allocator
unified_allocator()=default

neuron::unified_allocator::value_type
T value_type
Definition: memory.h:54

neuron::unified_allocator::unified_allocator
unified_allocator(unified_allocator< U > const &) noexcept
Definition: memory.h:59

neuron::unified_allocator::allocate
value_type * allocate(std::size_t n)
Definition: memory.h:61

neuron::unified_allocator::deallocate
void deallocate(value_type *p, std::size_t n) noexcept
Definition: memory.h:65