computationKernel.hpp Source File

HiCR: /home/runner/work/HiCR/HiCR/include/hicr/backends/ascend/computationKernel.hpp Source File
HiCR
/*
 *   Copyright 2025 Huawei Technologies Co., Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 
#pragma once
 
#include <filesystem>
#include <fstream>
#include <iostream>
#include <regex>
#include <vector>
#include <acl/acl.h>
#include <hicr/core/localMemorySlot.hpp>
#include <hicr/core/exceptions.hpp>
#include <hicr/backends/ascend/localMemorySlot.hpp>
#include <hicr/backends/ascend/kernel.hpp>
 
namespace HiCR::backend::ascend
{
class ComputationKernel final : public Kernel
{
  public:
 
  struct tensorData_t
  {
    const aclDataBuffer *dataBuffer;
    const aclTensorDesc *tensorDescriptor;
  };
 
  ComputationKernel(const char *kernelName, const std::vector<tensorData_t> &inputs, const std::vector<tensorData_t> &outputs, const aclopAttr *kernelAttrs)
    : Kernel(),
      _kernelName(kernelName),
      _kernelAttrs(kernelAttrs)
  {
    // populate internal data structure with input and output tensor data
    initializeDataBuffersAndDescriptors(inputs, _inputTensorDescriptors, _inputDataBuffers);
    initializeDataBuffersAndDescriptors(outputs, _outputTensorDescriptors, _outputDataBuffers);
  };
 
  ComputationKernel(const char *kernelPath, const char *kernelName, const std::vector<tensorData_t> &inputs, const std::vector<tensorData_t> &outputs, const aclopAttr *kernelAttrs)
    : ComputationKernel(kernelName, inputs, outputs, kernelAttrs)
  {
    // load kernel in memory
    loadKernel(std::string(kernelPath));
  };
 
  ComputationKernel()  = delete;
  ~ComputationKernel() = default;
 
  static tensorData_t createTensorData(const std::shared_ptr<HiCR::LocalMemorySlot> &memorySlot, aclTensorDesc *tensorDescriptor)
  {
    // Using up-casting to determine device types
    auto ascendSlot = dynamic_pointer_cast<ascend::LocalMemorySlot>(memorySlot);
 
    // Checking whether the memory slot passed is compatible with this backend
    if (ascendSlot == NULL) HICR_THROW_LOGIC("Attempting to create Ascend tensor data with a memory slot that is not supported by this backend\n");
 
    // Creating and returning new tensor
    return ascend::ComputationKernel::tensorData_t{.dataBuffer = ascendSlot->getDataBuffer(), .tensorDescriptor = tensorDescriptor};
  }
 
  __INLINE__ void start(const aclrtStream stream) override
  {
    // start the kernel
    aclError err = aclopExecuteV2(_kernelName.c_str(),
                                  (int)_inputTensorDescriptors.size(),
                                  (aclTensorDesc **)_inputTensorDescriptors.data(),
                                  (aclDataBuffer **)_inputDataBuffers.data(),
                                  (int)_outputTensorDescriptors.size(),
                                  (aclTensorDesc **)_outputTensorDescriptors.data(),
                                  (aclDataBuffer **)_outputDataBuffers.data(),
                                  (aclopAttr *)_kernelAttrs,
                                  stream);
 
    if (err != ACL_SUCCESS) HICR_THROW_RUNTIME("Failed to run the kernel. Error %d", err);
  }
 
  private:
 
  const std::string _kernelName;
  const aclopAttr *_kernelAttrs;
  std::vector<const aclTensorDesc *> _inputTensorDescriptors;
  std::vector<const aclTensorDesc *> _outputTensorDescriptors;
  std::vector<const aclDataBuffer *> _inputDataBuffers;
  std::vector<const aclDataBuffer *> _outputDataBuffers;
  std::string _kernelPtr;
  size_t _kernelSize;
 
  __INLINE__ void initializeDataBuffersAndDescriptors(const std::vector<tensorData_t>     tensors,
                                                      std::vector<const aclTensorDesc *> &descriptors,
                                                      std::vector<const aclDataBuffer *> &dataBuffers)
  {
    for (const auto &tensor : tensors)
    {
      dataBuffers.push_back(tensor.dataBuffer);
      descriptors.push_back(tensor.tensorDescriptor);
    }
  }
 
  __INLINE__ void loadKernel(const std::string &kernelPath)
  {
    // get size of file to know how much memory to allocate
    std::uintmax_t filesize = std::filesystem::file_size(kernelPath);
    _kernelSize             = filesize;
 
    // allocate buffer to hold file
    _kernelPtr.resize(_kernelSize);
 
    // read file
    std::ifstream fin(kernelPath, std::ios::binary);
    fin.read((char *)_kernelPtr.data(), _kernelSize);
    if (!fin) HICR_THROW_RUNTIME("Error reading file could only read %d bytes", fin.gcount());
 
    fin.close();
 
    // register the operator in the ACL runtime
    aclError err = aclopLoad(_kernelPtr.data(), _kernelSize);
 
    if (err != ACL_SUCCESS) HICR_THROW_RUNTIME("Failed to load kernel into memory. Error %d", err);
  }
};
 
} // namespace HiCR::backend::ascend