/home/runner/work/HiCR/HiCR/include/hicr/backends/mpi/communicationManager.hpp Source File

HiCR: /home/runner/work/HiCR/HiCR/include/hicr/backends/mpi/communicationManager.hpp Source File
HiCR
communicationManager.hpp
Go to the documentation of this file.
1/*
2 * Copyright 2025 Huawei Technologies Co., Ltd.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
24#pragma once
25
26#include <mpi.h>
27#include <set>
28#include <hicr/core/definitions.hpp>
31#include "localMemorySlot.hpp"
32#include "globalMemorySlot.hpp"
33
34namespace HiCR::backend::mpi
35{
36
43{
44 public:
45
52 CommunicationManager(MPI_Comm comm = MPI_COMM_WORLD)
53 : HiCR::CommunicationManager(),
54 _comm(comm)
55 {
56 MPI_Comm_size(_comm, &_size);
57 MPI_Comm_rank(_comm, &_rank);
58 }
59
60 ~CommunicationManager() override = default;
61
66 [[nodiscard]] const MPI_Comm getComm() const { return _comm; }
67
72 [[nodiscard]] const int getSize() const { return _size; }
73
78 [[nodiscard]] const int getRank() const { return _rank; }
79
80 protected:
81
91 virtual __INLINE__ void fenceImpl(HiCR::GlobalMemorySlot::tag_t tag) override
92 {
93 MPI_Barrier(_comm);
94
95 // Call the slot destruction collective routine
96 destroyGlobalMemorySlotsCollectiveImpl(tag);
97 }
98
105 virtual __INLINE__ void exchangeGlobalMemorySlotsImpl(HiCR::GlobalMemorySlot::tag_t tag, const std::vector<globalKeyMemorySlotPair_t> &memorySlots) override
106 {
107 // Obtaining local slots to exchange
108 int localSlotCount = (int)memorySlots.size();
109
110 // Obtaining the local slots to exchange per process in the communicator
111 std::vector<int> perProcessSlotCount(_size);
112
113 MPI_Allgather(&localSlotCount, 1, MPI_INT, perProcessSlotCount.data(), 1, MPI_INT, _comm);
114
115 // Calculating respective offsets
116 std::vector<int> perProcessSlotOffsets(_size);
117 int currentOffset = 0;
118 for (int i = 0; i < _size; i++)
119 {
120 perProcessSlotOffsets[i] += currentOffset;
121 currentOffset += perProcessSlotCount[i];
122 }
123
124 // Calculating number of global slots
125 int globalSlotCount = 0;
126 for (const auto count : perProcessSlotCount) globalSlotCount += count;
127
128 // Allocating storage for local and global memory slot sizes, keys and process id
129 std::vector<size_t> localSlotSizes(localSlotCount);
130 std::vector<size_t> globalSlotSizes(globalSlotCount);
131 std::vector<HiCR::GlobalMemorySlot::globalKey_t> localSlotKeys(localSlotCount);
132 std::vector<HiCR::GlobalMemorySlot::globalKey_t> globalSlotKeys(globalSlotCount);
133 std::vector<int> localSlotProcessId(localSlotCount);
134 std::vector<int> globalSlotProcessId(globalSlotCount);
135
136 // Filling in the local size and keys storage
137 for (size_t i = 0; i < memorySlots.size(); i++)
138 {
139 const auto key = memorySlots[i].first;
140 const auto memorySlot = std::dynamic_pointer_cast<HiCR::backend::mpi::LocalMemorySlot>(memorySlots[i].second);
141 if (memorySlot.get() == nullptr) HICR_THROW_LOGIC("Trying to use MPI to promote a non-MPI local memory slot.");
142 localSlotSizes[i] = memorySlot->getSize();
143 localSlotKeys[i] = key;
144 localSlotProcessId[i] = _rank;
145 }
146
147 // Exchanging global sizes, keys and process ids
148 MPI_Allgatherv(
149 localSlotSizes.data(), localSlotCount, MPI_UNSIGNED_LONG, globalSlotSizes.data(), perProcessSlotCount.data(), perProcessSlotOffsets.data(), MPI_UNSIGNED_LONG, _comm);
150 MPI_Allgatherv(
151 localSlotKeys.data(), localSlotCount, MPI_UNSIGNED_LONG, globalSlotKeys.data(), perProcessSlotCount.data(), perProcessSlotOffsets.data(), MPI_UNSIGNED_LONG, _comm);
152 MPI_Allgatherv(localSlotProcessId.data(), localSlotCount, MPI_INT, globalSlotProcessId.data(), perProcessSlotCount.data(), perProcessSlotOffsets.data(), MPI_INT, _comm);
153
154 // Now also creating pointer vector to remember local pointers, when required for memcpys
155 std::vector<void **> globalSlotPointers(globalSlotCount);
156 std::vector<std::shared_ptr<HiCR::LocalMemorySlot>> globalSourceSlots(globalSlotCount);
157 size_t localPointerPos = 0;
158 for (size_t i = 0; i < globalSlotPointers.size(); i++)
159 {
160 // If the rank associated with this slot is remote, don't store the pointer, otherwise store it.
161 if (globalSlotProcessId[i] != _rank)
162 {
163 globalSlotPointers[i] = nullptr;
164 globalSourceSlots[i] = nullptr;
165 }
166 else
167 {
168 const auto memorySlot = memorySlots[localPointerPos++].second;
169 globalSlotPointers[i] = &memorySlot->getPointer();
170 globalSourceSlots[i] = memorySlot;
171 }
172 }
173
174 // Now creating global slots and their MPI windows
175 for (size_t i = 0; i < globalSlotProcessId.size(); i++)
176 {
177 // Creating new memory slot object
178 auto memorySlot = std::make_shared<mpi::GlobalMemorySlot>(globalSlotProcessId[i], tag, globalSlotKeys[i], globalSourceSlots[i]);
179
180 // Allocating MPI windows
181 memorySlot->getDataWindow() = std::make_unique<MPI_Win>();
182 memorySlot->getRecvMessageCountWindow() = std::make_unique<MPI_Win>();
183 memorySlot->getSentMessageCountWindow() = std::make_unique<MPI_Win>();
184
185 // Termporary storage for the pointer returned by MPI_Win_Allocate. We will assign this a new internal storage to the local memory slot
186 void *ptr = nullptr;
187
188 // Creating MPI window for data transferring
189 auto status = MPI_Win_allocate(globalSlotProcessId[i] == _rank ? (int)globalSlotSizes[i] : 0, 1, MPI_INFO_NULL, _comm, &ptr, memorySlot->getDataWindow().get());
190 MPI_Win_set_errhandler(*memorySlot->getDataWindow(), MPI_ERRORS_RETURN);
191
192 // Unfortunately, we need to do an effective duplucation of the original local memory slot storage
193 // since no modern MPI library supports MPI_Win_create over user-allocated storage anymore
194 if (globalSlotProcessId[i] == _rank)
195 {
196 // Copying existing data over to the new storage
197 std::memcpy(ptr, *(globalSlotPointers[i]), globalSlotSizes[i]);
198
199 // Freeing up memory
200 MPI_Free_mem(*(globalSlotPointers[i]));
201
202 // Swapping pointers
203 *(globalSlotPointers[i]) = ptr;
204 }
205
206 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("Failed to create MPI data window on exchange global memory slots.");
207
208 // Creating MPI window for message received count transferring
209 status = MPI_Win_allocate(globalSlotProcessId[i] == _rank ? sizeof(size_t) : 0, 1, MPI_INFO_NULL, _comm, &ptr, memorySlot->getRecvMessageCountWindow().get());
210 MPI_Win_set_errhandler(*memorySlot->getRecvMessageCountWindow(), MPI_ERRORS_RETURN);
211
212 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("Failed to create MPI received message count window on exchange global memory slots.");
213
214 // Creating MPI window for message sent count transferring
215 status = MPI_Win_allocate(globalSlotProcessId[i] == _rank ? sizeof(size_t) : 0, 1, MPI_INFO_NULL, _comm, &ptr, memorySlot->getSentMessageCountWindow().get());
216 MPI_Win_set_errhandler(*memorySlot->getSentMessageCountWindow(), MPI_ERRORS_RETURN);
217
218 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("Failed to create MPI sent message count window on exchange global memory slots.");
219
220 // Registering global slot
221 registerGlobalMemorySlot(memorySlot);
222 }
223 }
224
225 private:
226
235 __INLINE__ void destroyGlobalMemorySlotImpl(std::shared_ptr<HiCR::GlobalMemorySlot> memorySlotPtr) override
236 {
237 // Getting up-casted pointer for the execution unit
238 auto memorySlot = dynamic_pointer_cast<mpi::GlobalMemorySlot>(memorySlotPtr);
239
240 // Checking whether the execution unit passed is compatible with this backend
241 if (memorySlot == nullptr) HICR_THROW_LOGIC("The memory slot is not supported by this backend\n");
242
243 auto status = MPI_Win_free(memorySlot->getDataWindow().get());
244 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("On deregister global memory slot, could not free MPI data window");
245
246 status = MPI_Win_free(memorySlot->getRecvMessageCountWindow().get());
247 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("On deregister global memory slot, could not free MPI recv message count window");
248
249 status = MPI_Win_free(memorySlot->getSentMessageCountWindow().get());
250 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("On deregister global memory slot, could not free MPI sent message count window");
251 }
252
253 __INLINE__ bool acquireGlobalLockImpl(std::shared_ptr<HiCR::GlobalMemorySlot> memorySlot) override
254 {
255 // Getting up-casted pointer for the execution unit
256 auto m = dynamic_pointer_cast<mpi::GlobalMemorySlot>(memorySlot);
257
258 // Checking whether the execution unit passed is compatible with this backend
259 if (m == nullptr) HICR_THROW_LOGIC("The passed memory slot is not supported by this backend\n");
260
261 // Locking access to all relevant memory slot windows
262 lockMPIWindow(m->getRank(), m->getDataWindow().get(), MPI_LOCK_EXCLUSIVE, 0);
263
264 // Setting memory slot lock as aquired
265 m->setLockAcquiredValue(true);
266
267 // This function is assumed to always succeed
268 return true;
269 }
270
271 __INLINE__ void releaseGlobalLockImpl(std::shared_ptr<HiCR::GlobalMemorySlot> memorySlot) override
272 {
273 // Getting up-casted pointer for the execution unit
274 auto m = dynamic_pointer_cast<mpi::GlobalMemorySlot>(memorySlot);
275
276 // Checking whether the execution unit passed is compatible with this backend
277 if (m == nullptr) HICR_THROW_LOGIC("The passed memory slot is not supported by this backend\n");
278
279 // Releasing access to all relevant memory slot windows
280 unlockMPIWindow(m->getRank(), m->getDataWindow().get());
281
282 // Setting memory slot lock as released
283 m->setLockAcquiredValue(false);
284 }
285
286 std::shared_ptr<HiCR::GlobalMemorySlot> getGlobalMemorySlotImpl(HiCR::GlobalMemorySlot::tag_t tag, HiCR::GlobalMemorySlot::globalKey_t globalKey) override { return nullptr; }
287
288 __INLINE__ void destroyGlobalMemorySlotsCollectiveImpl(HiCR::GlobalMemorySlot::tag_t tag)
289 {
290 // Destruction of global memory slots marked for destruction
291 // note: MPI expects int, not size_t as the parameter for allgather which we use here, so we have to work with int
292 int localDestroySlotsCount = (int)getGlobalMemorySlotsToDestroyPerTag()[tag].size();
293 std::vector<int> perProcessDestroySlotCount(_size);
294
295 // Obtaining the number of slots to destroy per process in the communicator
296 MPI_Allgather(&localDestroySlotsCount, 1, MPI_INT, perProcessDestroySlotCount.data(), 1, MPI_INT, _comm);
297
298 // Calculating respective offsets; TODO fix offset types for both this method and exchangeGlobalMemorySlotsImpl
299 std::vector<int> perProcessSlotOffsets(_size);
300 int currentOffset = 0;
301 for (int i = 0; i < _size; i++)
302 {
303 perProcessSlotOffsets[i] += currentOffset;
304 currentOffset += perProcessDestroySlotCount[i];
305 }
306
307 // Calculating number of global slots to destroy
308 int globalDestroySlotsCount = 0;
309 for (const auto count : perProcessDestroySlotCount) globalDestroySlotsCount += count;
310
311 // If there are no slots to destroy from any instance, return to avoid a second round of collectives
312 if (globalDestroySlotsCount == 0) return;
313
314 // Allocating storage for global memory slot keys
315 std::vector<HiCR::GlobalMemorySlot::globalKey_t> localDestroySlotKeys(localDestroySlotsCount);
316 std::vector<HiCR::GlobalMemorySlot::globalKey_t> globalDestroySlotKeys(globalDestroySlotsCount);
317
318 // Filling in the local keys storage
319 for (auto i = 0; i < localDestroySlotsCount; i++)
320 {
321 const auto memorySlot = getGlobalMemorySlotsToDestroyPerTag()[tag][i];
322 const auto key = memorySlot->getGlobalKey();
323 localDestroySlotKeys[i] = key;
324 }
325
326 // Exchanging global keys
327 MPI_Allgatherv(localDestroySlotKeys.data(),
328 localDestroySlotsCount,
329 MPI_UNSIGNED_LONG,
330 globalDestroySlotKeys.data(),
331 perProcessDestroySlotCount.data(),
332 perProcessSlotOffsets.data(),
333 MPI_UNSIGNED_LONG,
334 _comm);
335
336 // Deduplicating the global keys, as more than one process might want to destroy the same key
337 std::set<HiCR::GlobalMemorySlot::globalKey_t> globalDestroySlotKeysSet(globalDestroySlotKeys.begin(), globalDestroySlotKeys.end());
338
339 // Now we can iterate over the global slots to destroy one by one
340 for (auto key : globalDestroySlotKeysSet)
341 {
342 std::shared_ptr<HiCR::GlobalMemorySlot> memorySlot = nullptr;
343 // Getting the memory slot to destroy
344 // First check the standard map
345 if (getGlobalMemorySlotTagKeyMap()[tag].contains(key))
346 {
347 memorySlot = getGlobalMemorySlotTagKeyMap()[tag].at(key);
348 // Deregister because a later destroy will try and fail to destroy
349 getGlobalMemorySlotTagKeyMap()[tag].erase(key);
350 }
351 // If not found, check the deregistered map
352 else if (_deregisteredGlobalMemorySlotsTagKeyMap[tag].contains(key))
353 {
354 memorySlot = _deregisteredGlobalMemorySlotsTagKeyMap[tag].at(key);
355 _deregisteredGlobalMemorySlotsTagKeyMap[tag].erase(key);
356 }
357 else
358 HICR_THROW_FATAL("Could not find memory slot to destroy in this backend. Tag: %d, Key: %lu", tag, key);
359
360 // Destroying the memory slot collectively; there might be a case where the slot is not found, due to double calls to destroy
361 destroyGlobalMemorySlotImpl(memorySlot);
362 }
363 }
364
365 __INLINE__ void memcpyImpl(const std::shared_ptr<HiCR::LocalMemorySlot> &destinationSlot,
366 size_t dst_offset,
367 const std::shared_ptr<HiCR::GlobalMemorySlot> &sourceSlotPtr,
368 size_t sourceOffset,
369 size_t size) override
370 {
371 // Getting up-casted pointer for the execution unit
372 auto source = dynamic_pointer_cast<mpi::GlobalMemorySlot>(sourceSlotPtr);
373
374 // Checking whether the execution unit passed is compatible with this backend
375 if (source == nullptr) HICR_THROW_LOGIC("The passed source memory slot is not supported by this backend\n");
376
377 // Getting ranks for the involved processes
378 const auto sourceRank = source->getRank();
379
380 // Check if we already acquired a lock on the memory slots
381 bool isSourceSlotLockAcquired = source->getLockAcquiredValue();
382
383 // Calculating pointer
384 auto destinationPointer = (void *)(static_cast<uint8_t *>(destinationSlot->getPointer()) + dst_offset);
385
386 // Getting data window for the involved processes
387 auto sourceDataWindow = source->getDataWindow().get();
388
389 // Getting recv message count window for the involved processes
390 auto sourceSentMessageWindow = source->getSentMessageCountWindow().get();
391
392 // Locking MPI window to ensure the messages arrives before returning. This will not exclude other processes from accessing the data (MPI_LOCK_SHARED)
393 if (isSourceSlotLockAcquired == false) lockMPIWindow(sourceRank, sourceDataWindow, MPI_LOCK_SHARED, MPI_MODE_NOCHECK);
394
395 // Executing the get operation
396 {
397 auto status = MPI_Get(destinationPointer, (int)size, MPI_BYTE, sourceRank, (int)sourceOffset, (int)size, MPI_BYTE, *sourceDataWindow);
398
399 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("Failed to run MPI_Get");
400 }
401
402 // Making sure the operation finished
403 {
404 auto status = MPI_Win_flush(sourceRank, *sourceDataWindow);
405
406 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("Failed to run MPI_Win_flush");
407 }
408
409 // Unlocking window, if taken, after copy is completed
410 if (isSourceSlotLockAcquired == false) unlockMPIWindow(sourceRank, sourceDataWindow);
411
412 // Increasing the remote sent message counter and local destination received message counter
413 increaseWindowCounter(sourceRank, sourceSentMessageWindow);
414 increaseMessageRecvCounter(*destinationSlot);
415 }
416
417 __INLINE__ void memcpyImpl(const std::shared_ptr<HiCR::GlobalMemorySlot> &destinationSlotPtr,
418 size_t dst_offset,
419 const std::shared_ptr<HiCR::LocalMemorySlot> &sourceSlot,
420 size_t sourceOffset,
421 size_t size) override
422 {
423 // Getting up-casted pointer for the execution unit
424 auto destination = dynamic_pointer_cast<mpi::GlobalMemorySlot>(destinationSlotPtr);
425
426 // Checking whether the execution unit passed is compatible with this backend
427 if (destination == nullptr) HICR_THROW_LOGIC("The passed destination memory slot is not supported by this backend\n");
428
429 // Getting ranks for the involved processes
430 const auto destinationRank = destination->getRank();
431
432 // Check if we already acquired a lock on the memory slots
433 bool isDestinationSlotLockAcquired = destination->getLockAcquiredValue();
434
435 // Calculating pointers
436 auto sourcePointer = (void *)(static_cast<uint8_t *>(sourceSlot->getPointer()) + sourceOffset);
437
438 // Getting data window for the involved processes
439 auto destinationDataWindow = destination->getDataWindow().get();
440
441 // Getting recv message count windows for the involved process
442 auto destinationRecvMessageWindow = destination->getRecvMessageCountWindow().get();
443
444 // Locking MPI window to ensure the messages arrives before returning. This will not exclude other processes from accessing the data (MPI_LOCK_SHARED)
445 if (isDestinationSlotLockAcquired == false) lockMPIWindow(destinationRank, destinationDataWindow, MPI_LOCK_SHARED, MPI_MODE_NOCHECK);
446
447 // Executing the put operation
448 {
449 auto status = MPI_Put(sourcePointer, (int)size, MPI_BYTE, destinationRank, (int)dst_offset, (int)size, MPI_BYTE, *destinationDataWindow);
450
451 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("Failed to run data MPI_Put");
452 }
453
454 // Making sure the operation finished
455 {
456 auto status = MPI_Win_flush(destinationRank, *destinationDataWindow);
457
458 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("Failed to run data MPI_Win_flush");
459 }
460
461 // Unlocking window, if taken, after copy is completed
462 if (isDestinationSlotLockAcquired == false) unlockMPIWindow(destinationRank, destinationDataWindow);
463
464 // Increasing the remote received message counter and local sent message counter
465 increaseMessageSentCounter(*sourceSlot);
466 increaseWindowCounter(destinationRank, destinationRecvMessageWindow);
467 }
468
476 __INLINE__ void queryMemorySlotUpdatesImpl(std::shared_ptr<HiCR::LocalMemorySlot> memorySlot) override {}
477
491 __INLINE__ void deregisterGlobalMemorySlotImpl(const std::shared_ptr<HiCR::GlobalMemorySlot> &memorySlot) override
492 {
493 // Getting up-casted pointer for the slot
494 auto slot = dynamic_pointer_cast<mpi::GlobalMemorySlot>(memorySlot);
495
496 // Checking whether the slot passed is compatible with this backend
497 if (slot == nullptr) HICR_THROW_LOGIC("The memory slot is not supported by this backend\n");
498
499 // Getting the slot information
500 const auto tag = slot->getGlobalTag();
501 const auto key = slot->getGlobalKey();
502
503 // Storing the deregistered slot, and it is guaranteed that the (MPI) type is correct
504 _deregisteredGlobalMemorySlotsTagKeyMap[tag][key] = slot;
505 }
506
510 const MPI_Comm _comm;
511
515 int _size{};
516
520 int _rank{};
521
527 HiCR::CommunicationManager::globalMemorySlotTagKeyMap_t _deregisteredGlobalMemorySlotsTagKeyMap{};
528
529 __INLINE__ void lockMPIWindow(int rank, MPI_Win *window, int MPILockType, int MPIAssert)
530 {
531 // Locking MPI window to ensure the messages arrives before returning
532 int mpiStatus = MPI_Win_lock(MPILockType, rank, MPIAssert, *window);
533 if (mpiStatus != MPI_SUCCESS)
534 {
535 char err_string[MPI_MAX_ERROR_STRING];
536 int len;
537 MPI_Error_string(mpiStatus, err_string, &len);
538 HICR_THROW_LOGIC("MPI_Win_lock failed for rank %d: %s", rank, err_string);
539 }
540 }
541
542 __INLINE__ void unlockMPIWindow(int rank, MPI_Win *window)
543 {
544 // Unlocking window after copy is completed
545 int mpiStatus = MPI_Win_unlock(rank, *window);
546 if (mpiStatus != MPI_SUCCESS)
547 {
548 char err_string[MPI_MAX_ERROR_STRING];
549 int len;
550 MPI_Error_string(mpiStatus, err_string, &len);
551 HICR_THROW_LOGIC("MPI_Win_unlock failed for rank %d: %s", rank, err_string);
552 }
553 }
554
555 __INLINE__ void increaseWindowCounter(int rank, MPI_Win *window)
556 {
557 // This operation should be possible to do in one go with MPI_Accumulate or MPI_Fetch_and_op. However, the current implementation of openMPI deadlocks
558 // on these operations, so I rather do the whole thing manually.
559
560 // Locking MPI window to ensure the messages arrives before returning
561 lockMPIWindow(rank, window, MPI_LOCK_EXCLUSIVE, 0);
562
563 // Use atomic MPI operation to increment counter
564 const size_t one = 1;
565 size_t value = 0;
566
567 // There is no datatype in MPI for size_t (the counters), but
568 // MPI_AINT is supposed to be large enough and portable
569 auto status = MPI_Fetch_and_op(&one, &value, MPI_AINT, rank, 0, MPI_SUM, *window);
570
571 // Checking execution status
572 if (status != MPI_SUCCESS) HICR_THROW_RUNTIME("Failed to increase remote message counter (on operation: MPI_Put) for rank %d, MPI Window pointer %p", rank, window);
573
574 // Unlocking window after copy is completed
575 unlockMPIWindow(rank, window);
576 }
577
578 __INLINE__ void memcpyImpl(const std::shared_ptr<HiCR::LocalMemorySlot> &destination,
579 const size_t dst_offset,
580 const std::shared_ptr<HiCR::LocalMemorySlot> &source,
581 const size_t src_offset,
582 const size_t size) override
583 {
584 // Getting slot pointers
585 const auto srcPtr = source->getPointer();
586 const auto dstPtr = destination->getPointer();
587
588 // Calculating actual offsets
589 const auto actualSrcPtr = (void *)(static_cast<uint8_t *>(srcPtr) + src_offset);
590 const auto actualDstPtr = (void *)(static_cast<uint8_t *>(dstPtr) + dst_offset);
591
592 // Running memcpy now
593 std::memcpy(actualDstPtr, actualSrcPtr, size);
594
595 // Increasing recv/send counters
596 increaseMessageRecvCounter(*destination);
598 }
599};
600
601} // namespace HiCR::backend::mpi
Definition communicationManager.hpp:54
std::map< GlobalMemorySlot::tag_t, globalKeyToMemorySlotMap_t > globalMemorySlotTagKeyMap_t
Definition communicationManager.hpp:70
__INLINE__ auto & getGlobalMemorySlotTagKeyMap()
Definition communicationManager.hpp:479
__INLINE__ void registerGlobalMemorySlot(const std::shared_ptr< GlobalMemorySlot > &memorySlot)
Definition communicationManager.hpp:499
__INLINE__ void increaseMessageRecvCounter(HiCR::LocalMemorySlot &memorySlot) noexcept
Definition communicationManager.hpp:640
__INLINE__ auto & getGlobalMemorySlotsToDestroyPerTag()
Definition communicationManager.hpp:633
__INLINE__ void increaseMessageSentCounter(HiCR::LocalMemorySlot &memorySlot) noexcept
Definition communicationManager.hpp:647
uint64_t tag_t
Definition globalMemorySlot.hpp:49
uint64_t globalKey_t
Definition globalMemorySlot.hpp:44
Definition communicationManager.hpp:43
const int getRank() const
Definition communicationManager.hpp:78
virtual __INLINE__ void fenceImpl(HiCR::GlobalMemorySlot::tag_t tag) override
Definition communicationManager.hpp:91
const MPI_Comm getComm() const
Definition communicationManager.hpp:66
const int getSize() const
Definition communicationManager.hpp:72
CommunicationManager(MPI_Comm comm=MPI_COMM_WORLD)
Definition communicationManager.hpp:52
virtual __INLINE__ void exchangeGlobalMemorySlotsImpl(HiCR::GlobalMemorySlot::tag_t tag, const std::vector< globalKeyMemorySlotPair_t > &memorySlots) override
Definition communicationManager.hpp:105
Provides a definition for the base backend's communication manager class.
Provides a definition for a HiCR Global Memory Slot class.
Provides a definition for a HiCR Local Memory Slot class.
#define HICR_THROW_RUNTIME(...)
Definition exceptions.hpp:74
#define HICR_THROW_LOGIC(...)
Definition exceptions.hpp:67
#define HICR_THROW_FATAL(...)
Definition exceptions.hpp:81