14 #ifdef CORENEURON_USE_BOOST_POOL
15 #include <boost/pool/pool_alloc.hpp>
16 #include <unordered_map>
25 #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
26 defined(_OPENMP) && defined(__CUDACC__)
27 #define CORENRN_HOST_DEVICE __host__ __device__
29 #define CORENRN_HOST_DEVICE
33 #ifdef CORENEURON_USE_BOOST_POOL
44 struct random123_allocate_unified {
45 using size_type = std::size_t;
46 using difference_type = std::size_t;
47 static char* malloc(
const size_type bytes) {
48 std::lock_guard<std::mutex>
const lock{m_mutex};
49 static_cast<void>(
lock);
51 m_block_sizes[buffer] = bytes;
52 return reinterpret_cast<char*
>(buffer);
54 static void free(
char*
const block) {
55 std::lock_guard<std::mutex>
const lock{m_mutex};
56 static_cast<void>(
lock);
57 auto const iter = m_block_sizes.find(block);
58 assert(iter != m_block_sizes.end());
59 auto const size = iter->second;
60 m_block_sizes.erase(iter);
63 static std::mutex m_mutex;
64 static std::unordered_map<void*, std::size_t> m_block_sizes;
67 std::mutex random123_allocate_unified::m_mutex{};
68 std::unordered_map<void*, std::size_t> random123_allocate_unified::m_block_sizes{};
70 using random123_allocator =
71 boost::fast_pool_allocator<coreneuron::nrnran123_State, random123_allocate_unified>;
73 using random123_allocator = coreneuron::unified_allocator<coreneuron::nrnran123_State>;
81 std::size_t g_instance_count{};
87 #define g_k_qualifiers __device__ __constant__
89 #define g_k_qualifiers
103 return philox4x32(
s->c, random123_global::global_state());
108 return g_instance_count;
113 return random123_global::global_state().v[0];
119 auto&
g_k = random123_global::global_state();
121 std::lock_guard<OMP_Mutex> _{g_instance_count_mutex};
124 <<
"nrnran123_set_globalindex(" << gix
125 <<
") called when a non-zero number of Random123 streams (" << g_instance_count
126 <<
") were active. This is not safe, some streams will remember the old value ("
127 <<
g_k.v[0] <<
')' << std::endl;
130 if (
g_k.v[0] != gix) {
135 auto const code = cudaMemcpyToSymbol(
g_k, &
g_k,
sizeof(
g_k));
136 assert(code == cudaSuccess);
139 auto const code = cudaDeviceSynchronize();
140 assert(code == cudaSuccess);
175 bool use_unified_memory) {
181 #ifndef CORENEURON_ENABLE_GPU
182 if (use_unified_memory) {
183 throw std::runtime_error(
"Tried to use CUDA unified memory in a non-GPU build.");
187 if (use_unified_memory) {
188 s = coreneuron::allocate_unique<nrnran123_State>(random123_allocator{}).release();
198 std::lock_guard<OMP_Mutex> _{g_instance_count_mutex};
206 #ifndef CORENEURON_ENABLE_GPU
207 if (use_unified_memory) {
208 throw std::runtime_error(
"Tried to use CUDA unified memory in a non-GPU build.");
212 std::lock_guard<OMP_Mutex> _{g_instance_count_mutex};
215 if (use_unified_memory) {
216 std::unique_ptr<nrnran123_State, coreneuron::alloc_deleter<random123_allocator>> _{
s};
#define CORENRN_HOST_DEVICE
CORENRN_HOST_DEVICE philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_State *s)
nrn_pragma_acc(routine seq) nrn_pragma_omp(declare target) philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron nrn_pragma_omp(end declare target) namespace coreneuron
Provide a helper function in global namespace that is declared target for OpenMP offloading to functi...
void nrnran123_setseq(nrnran123_State *s, std::uint32_t seq, char which)
Set a Random123 sequence for a sequnece ID and which selector.
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
nrnran123_State * nrnran123_newstream3(uint32_t id1, uint32_t id2, uint32_t id3, bool use_unified_memory)
Allocate a new Random123 stream.
void nrnran123_set_globalindex(uint32_t gix)
void * allocate_unified(std::size_t num_bytes)
void update(NrnThread *_nt)
void deallocate_unified(void *ptr, std::size_t num_bytes)
void nrnran123_deletestream(nrnran123_State *s, bool use_unified_memory)
nrn_pragma_acc(routine seq) int vector_capacity(void *v)
void nrnran123_destroy_global_state_on_device()
std::size_t nrnran123_instance_count()
void nrnran123_initialise_global_state_on_device()
uint32_t nrnran123_get_globalindex()
__attribute__((noinline)) philox4x32_key_t &global_state()
g_k_qualifiers philox4x32_key_t g_k