computeResource.hpp Source File

HiCR: /home/runner/work/HiCR/HiCR/include/hicr/backends/hwloc/computeResource.hpp Source File
HiCR
/*
 *   Copyright 2025 Huawei Technologies Co., Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 
#pragma once
 
#include <unordered_set>
#include <hwloc.h>
#include <hicr/backends/hwloc/cache.hpp>
#include <hicr/core/definitions.hpp>
#include <hicr/core/exceptions.hpp>
#include <hicr/core/computeResource.hpp>
 
namespace HiCR::backend::hwloc
{
 
class ComputeResource final : public HiCR::ComputeResource
{
  public:
 
  using logicalProcessorId_t = unsigned int;
 
  using physicalProcessorId_t = unsigned int;
 
  using numaAffinity_t = unsigned int;
 
  ComputeResource(hwloc_topology_t topology, const logicalProcessorId_t logicalProcessorId)
    : HiCR::ComputeResource(),
      _logicalProcessorId(logicalProcessorId),
      _physicalProcessorId(detectPhysicalProcessorId(topology, logicalProcessorId)),
      _numaAffinity(detectCoreNUMAffinity(topology, logicalProcessorId)),
      _caches(detectCpuCaches(topology, logicalProcessorId)){};
 
  ComputeResource(const logicalProcessorId_t                                 logicalProcessorId,
                  const physicalProcessorId_t                                physicalProcessorId,
                  const numaAffinity_t                                       numaAffinity,
                  std::unordered_set<std::shared_ptr<backend::hwloc::Cache>> caches)
    : HiCR::ComputeResource(),
      _logicalProcessorId(logicalProcessorId),
      _physicalProcessorId(physicalProcessorId),
      _numaAffinity(numaAffinity),
      _caches(std::move(caches)){};
  ~ComputeResource() override = default;
 
  ComputeResource() = default;
 
  __INLINE__ std::string getType() const override { return "Processing Unit"; }
 
  __INLINE__ logicalProcessorId_t getProcessorId() const { return _logicalProcessorId; }
 
  __INLINE__ physicalProcessorId_t getPhysicalProcessorId() const { return _physicalProcessorId; }
 
  ComputeResource(const nlohmann::json &input) { deserialize(input); }
 
  __INLINE__ static void detectThreadPUs(hwloc_topology_t topology, hwloc_obj_t obj, int depth, std::vector<logicalProcessorId_t> &threadPUs)
  {
    if (obj->arity == 0) threadPUs.push_back(obj->logical_index);
    for (unsigned int i = 0; i < obj->arity; i++) detectThreadPUs(topology, obj->children[i], depth + 1, threadPUs);
  }
 
  __INLINE__ static physicalProcessorId_t detectPhysicalProcessorId(hwloc_topology_t topology, const logicalProcessorId_t logicalProcessorId)
  {
    hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, logicalProcessorId);
    if (!obj) HICR_THROW_RUNTIME("Attempting to access a compute resource that does not exist (%lu) in this backend", logicalProcessorId);
 
    // Acquire the parent core object
    // There is an asumption here that a HWLOC_OBJ_PU type always has a parent of type HWLOC_OBJ_CORE,
    // which is consistent with current HWloc, but maybe reconsider it.
    obj = obj->parent;
    if (obj->type != HWLOC_OBJ_CORE) HICR_THROW_RUNTIME("Unexpected hwloc object type while trying to access Core/CPU (%lu)", logicalProcessorId);
 
    return obj->logical_index;
  }
 
  __INLINE__ static numaAffinity_t detectCoreNUMAffinity(hwloc_topology_t topology, const logicalProcessorId_t logicalProcessorId)
  {
    // Sanitize input? So far we only call it internally so assume ID given is safe?
    hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, logicalProcessorId);
 
    if (!obj) HICR_THROW_RUNTIME("Attempting to access a compute resource that does not exist (%lu) in this backend", logicalProcessorId);
 
    size_t ret = 0;
 
    // obj is a leaf/PU; get to its parents to discover the hwloc memory space it belongs to
    hwloc_obj_t ancestor = obj->parent;
    hwloc_obj_t nodeNUMA = nullptr;
    bool        found    = false;
 
    // iterate over parents until we find a memory node
    while (ancestor && !ancestor->memory_arity) ancestor = ancestor->parent;
 
    // iterate over potential sibling nodes (the likely behavior though is to run only once)
    for (size_t memChild = 0; memChild < ancestor->memory_arity; memChild++)
    {
      if (memChild == 0)
        nodeNUMA = ancestor->memory_first_child;
      else if (nodeNUMA)
        nodeNUMA = nodeNUMA->next_sibling;
 
      if (hwloc_obj_type_is_memory(nodeNUMA->type) && hwloc_bitmap_isset(obj->nodeset, nodeNUMA->os_index))
      {
        found = true;
        ret   = nodeNUMA->logical_index;
        break;
      }
    }
 
    if (!found) HICR_THROW_RUNTIME("NUMA Domain not detected for compute resource (%lu)", logicalProcessorId);
 
    return ret;
  }
 
  __INLINE__ static std::unordered_set<std::shared_ptr<backend::hwloc::Cache>> detectCpuCaches(hwloc_topology_t topology, const logicalProcessorId_t logicalProcessorId)
  {
    // Sanitize input? So far we only call it internally so assume ID given is safe?
    hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, logicalProcessorId);
 
    if (!obj) HICR_THROW_RUNTIME("Attempting to access a compute resource that does not exist (%lu) in this backend", logicalProcessorId);
 
    std::unordered_set<std::shared_ptr<backend::hwloc::Cache>> ret;
 
    // Start from 1 level above our leaf/PU
    hwloc_obj_t cache = obj->parent;
    while (cache)
    {
      Cache::cacheLevel_t level = Cache::cacheLevel_t::L1;
      std::string         type;
 
      // Check if the current object is a cache-type object
      if (cache->type == HWLOC_OBJ_L1CACHE || cache->type == HWLOC_OBJ_L2CACHE || cache->type == HWLOC_OBJ_L3CACHE || cache->type == HWLOC_OBJ_L4CACHE ||
          cache->type == HWLOC_OBJ_L5CACHE || cache->type == HWLOC_OBJ_L1ICACHE || cache->type == HWLOC_OBJ_L2ICACHE || cache->type == HWLOC_OBJ_L3ICACHE)
      {
        // In case it is a cache, deduce the level from the types HWloc supports
        switch (cache->type)
        {
        case HWLOC_OBJ_L1CACHE:
        case HWLOC_OBJ_L1ICACHE: level = Cache::cacheLevel_t::L1; break;
        case HWLOC_OBJ_L2CACHE:
        case HWLOC_OBJ_L2ICACHE: level = Cache::cacheLevel_t::L2; break;
        case HWLOC_OBJ_L3CACHE:
        case HWLOC_OBJ_L3ICACHE: level = Cache::cacheLevel_t::L3; break;
        case HWLOC_OBJ_L4CACHE: level = Cache::cacheLevel_t::L4; break;
        case HWLOC_OBJ_L5CACHE: level = Cache::cacheLevel_t::L5; break;
        // We never expect to get here; this is for compiler warning suppresion
        default: HICR_THROW_RUNTIME("Unsupported Cache level detected (%lu)", cache->type);
        }
 
        // Storage for cache type
        std::string type = "Unknown";
 
        // Discover the type: Instruction, Data or Unified
        switch (cache->attr->cache.type)
        {
        case HWLOC_OBJ_CACHE_UNIFIED: type = "Unified"; break;
        case HWLOC_OBJ_CACHE_INSTRUCTION: type = "Instruction"; break;
        case HWLOC_OBJ_CACHE_DATA: type = "Data"; break;
        }
 
        // Storage for more cache information
        const bool shared   = cache->arity > 1;
        const auto size     = cache->attr->cache.size;
        const auto lineSize = cache->attr->cache.linesize;
 
        // Insert element to our return container
        ret.insert(std::make_shared<backend::hwloc::Cache>(level, type, size, lineSize, shared));
      }
 
      // Repeat the search 1 level above
      cache = cache->parent;
    }
 
    return ret;
  }
 
  __INLINE__ static numaAffinity_t getCpuNumaAffinity(hwloc_topology_t topology, const logicalProcessorId_t logicalProcessorId)
  {
    // Sanitize input? So far we only call it internally so assume ID given is safe?
    hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, logicalProcessorId);
 
    if (!obj) HICR_THROW_RUNTIME("Attempting to access a compute resource that does not exist (%lu) in this backend", logicalProcessorId);
 
    numaAffinity_t ret = 0;
 
    // obj is a leaf/PU; get to its parents to discover the hwloc memory space it belongs to
    hwloc_obj_t ancestor = obj->parent;
    hwloc_obj_t nodeNUMA = nullptr;
    bool        found    = false;
 
    // iterate over parents until we find a memory node
    while (ancestor && !ancestor->memory_arity) ancestor = ancestor->parent;
 
    // iterate over potential sibling nodes (the likely behavior though is to run only once)
    for (size_t memChild = 0; memChild < ancestor->memory_arity; memChild++)
    {
      if (memChild == 0)
        nodeNUMA = ancestor->memory_first_child;
      else if (nodeNUMA)
        nodeNUMA = nodeNUMA->next_sibling;
 
      if (hwloc_obj_type_is_memory(nodeNUMA->type) && hwloc_bitmap_isset(obj->nodeset, nodeNUMA->os_index))
      {
        found = true;
        ret   = (numaAffinity_t)nodeNUMA->logical_index;
        break;
      }
    }
 
    if (found == false) HICR_THROW_RUNTIME("NUMA Domain not detected for compute resource (%lu)", logicalProcessorId);
 
    return ret;
  }
 
  protected:
 
  __INLINE__ void serializeImpl(nlohmann::json &output) const override
  {
    // Writing core's information into the serialized object
    output["Logical Processor Id"]  = _logicalProcessorId;
    output["Physical Processor Id"] = _physicalProcessorId;
    output["NUMA Affinity"]         = _numaAffinity;
 
    // Writing Cache information
    std::string cachesKey = "Caches";
    output[cachesKey]     = std::vector<nlohmann::json>();
    for (const auto &cache : _caches) output[cachesKey] += cache->serialize();
  }
 
  __INLINE__ void deserializeImpl(const nlohmann::json &input) override
  {
    std::string key = "Logical Processor Id";
    if (input.contains(key) == false) HICR_THROW_LOGIC("The serialized object contains no '%s' key", key.c_str());
    if (input[key].is_number() == false) HICR_THROW_LOGIC("The '%s' entry is not a number", key.c_str());
    _logicalProcessorId = input[key].get<logicalProcessorId_t>();
 
    key = "Physical Processor Id";
    if (input.contains(key) == false) HICR_THROW_LOGIC("The serialized object contains no '%s' key", key.c_str());
    if (input[key].is_number() == false) HICR_THROW_LOGIC("The '%s' entry is not a number", key.c_str());
    _physicalProcessorId = input[key].get<physicalProcessorId_t>();
 
    key = "NUMA Affinity";
    if (input.contains(key) == false) HICR_THROW_LOGIC("The serialized object contains no '%s' key", key.c_str());
    if (input[key].is_number() == false) HICR_THROW_LOGIC("The '%s' entry is not a number", key.c_str());
    _numaAffinity = input[key].get<numaAffinity_t>();
 
    key = "Caches";
    if (input.contains(key) == false) HICR_THROW_LOGIC("The serialized object contains no '%s' key", key.c_str());
    if (input[key].is_array() == false) HICR_THROW_LOGIC("The '%s' entry is not an array", key.c_str());
 
    _caches.clear();
    for (const auto &c : input[key])
    {
      // Deserializing cache
      auto cache = std::make_shared<backend::hwloc::Cache>(c);
 
      // Adding it to the list
      _caches.insert(cache);
    }
  }
 
  private:
 
  logicalProcessorId_t _logicalProcessorId{};
 
  physicalProcessorId_t _physicalProcessorId{};
 
  numaAffinity_t _numaAffinity{};
 
  std::unordered_set<std::shared_ptr<backend::hwloc::Cache>> _caches;
};
 
} // namespace HiCR::backend::hwloc