测试用例
import mindspore
import numpy as np
import mindspore.nn as nn
from mindspore import context, Tensor
context.set_context(device_target="CPU", mode=context.GRAPH_MODE)
# 32, 16
net = nn.Dense(32, 16, weight_init='ones', bias_init=1.2)#, activation='relu')
# 48, 32
input_data = Tensor(np.ones([48, 32]).astype(np.float32), mindspore.float32)
output = net(input_data)
print(output.asnumpy())
context.set_context(device_target="XPU", mode=context.GRAPH_MODE)
添加新的device target参数选项
首先从前端ME Python层需要添加新的valid_targets:https://gitee.com/mindspore/mindspore/blob/r1.1/mindspore/context.py
def set_device_target(self, target):
valid_targets = ["CPU", "GPU", "Ascend", "Davinci", "XPU"] # 将新的后端添加到此list中
if not target in valid_targets:
raise ValueError(f"Target device name {target} is invalid! It must be one of {valid_targets}")
if target == "Davinci":
target = "Ascend"
self.set_param(ms_ctx_param.device_target, target)
if self.enable_debug_runtime and target == "CPU":
self.set_backend_policy("vm")
接着需要在C++的ms context组件中添加新的target:https://gitee.com/mindspore/mindspore/blob/r1.1/mindspore/core/utils/ms_context.h
const int kGraphMode = 0;
const int kPynativeMode = 1;
const char kCPUDevice[] = "CPU";
const char kGPUDevice[] = "GPU";
const char kXPUDevice[] = "XPU"; // 添加新的硬件target
const char kAscendDevice[] = "Ascend";
const char kDavinciInferenceDevice[] = "AscendInference";
const char kDavinciDevice[] = "Davinci";
const char KNpuLog[] = "_npu_log";
const unsigned int MAX_CALL_DEPTH_DEFAULT = 1000;
// 添加新的硬件到以下set中
const std::set<std::string> kTargetSet = {kCPUDevice, kGPUDevice, kXPUDevice, kAscendDevice, kDavinciDevice};
添加新的runtime device
#include <string>
#include <vector>
#include "runtime/device/device_address.h"
#include "utils/shape_utils.h"
namespace mindspore {
namespace device {
namespace xpu {
class XPUDeviceAddress : public DeviceAddress {
public:
XPUDeviceAddress(void *ptr, size_t size) : DeviceAddress(ptr, size) {}
XPUDeviceAddress(void *ptr, size_t size, const string &format, TypeId type_id)
: DeviceAddress(ptr, size, format, type_id) {}
~XPUDeviceAddress() override = default;
bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const override;
bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr) const override;
DeviceAddressType DeviceType() const override { return DeviceAddressType::kXPU; }
};
} // namespace xpu
} // namespace device
} // namespace mindspore
#include <vector>
#include <map>
#include "backend/session/kernel_graph.h"
#include "backend/session/session_basic.h"
#include "runtime/device/device_address.h"
#include "runtime/device/xpu/xpu_simple_mem_plan.h"
namespace mindspore {
namespace device {
namespace xpu {
class XPUResourceManager {
public:
XPUResourceManager() = default;
~XPUResourceManager();
void AssignMemory(const session::KernelGraph *graph);
void IncreaseAddressRefCount(const session::KernelGraph *graph);
void DecreaseAddressRefCount(const AnfNodePtr &kernel);
void *MemMalloc(size_t mem_size);
void MemFree(void *ptr);
private:
void MemFree();
XPUSimpleMemPlan mem_plan_;
size_t mem_size_{0};
uint8_t *mem_ptr_{nullptr};
bool dynamic_malloc_{false};
std::map<void *, size_t> dynamic_mem_;
};
} // namespace xpu
} // namespace device
} // namespace mindspore
#include <memory>
#include <vector>
#include <string>
#include <map>
#include <set>
#include "runtime/device/kernel_runtime.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "backend/session/kernel_graph.h"
#include "backend/session/session_basic.h"
#include "runtime/device/xpu/xpu_resource_manager.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/any.h"
namespace mindspore {
namespace device {
namespace xpu {
class XPUKernelRuntime : public KernelRuntime {
public:
XPUKernelRuntime() = default;
~XPUKernelRuntime() override = default;
bool Init() override;
void ReleaseDeviceRes() override;
bool Run(session::KernelGraph *graph, bool is_task_sink) override;
void AssignKernelAddress(session::KernelGraph *kernel_graph);
void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs);
void BindInputOutput(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs);
protected:
bool SyncStream() override { return true; };
DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
TypeId type_id) override;
private:
XPUResourceManager resource_manager_;
std::set<DeviceAddressPtr> bound_addresses_;
std::map<AnfNodePtr, tensor::TensorPtr> input_param_tensor_map_;
};
MS_REG_KERNEL_RUNTIME(kXPUDevice, XPUKernelRuntime);
} // namespace xpu
} // namespace device
} // namespace mindspore
添加新的target session
#include <string>
#include <memory>
#include <map>
#include <vector>
#include "backend/session/session_basic.h"
#include "backend/session/kernel_graph.h"
#include "runtime/device/xpu/xpu_kernel_runtime.h" // use the new xpu kernel runtime
#include "backend/session/session_factory.h"
namespace mindspore {
namespace session {
class XPUSession : public SessionBasic {
public:
XPUSession() = default;
~XPUSession() override = default;
void Init(uint32_t device_id) override { InitExecutor(kXPUDevice, device_id); }
GraphId CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) override;
void RunGraphImpl(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) override;
void Optimize(const std::shared_ptr<KernelGraph> &kernel_graph);
protected:
void UnifyMindIR(const KernelGraphPtr &graph) override { return; }
void CreateOutputTensors(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &input_tensors, VectorRef *,
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) override;
private:
void SetKernelInfo(const KernelGraph *kernel_graph);
void BuildKernel(const KernelGraph *kernel_graph);
device::xpu::XPUKernelRuntime *runtime_ = dynamic_cast<device::xpu::XPUKernelRuntime*>(device::KernelRuntimeManager::Instance().GetKernelRuntime(kXPUDevice, 0));
};
MS_REG_SESSION(kXPUDevice, XPUSession);
} // namespace session
} // namespace mindspore
在图编译(CompileGraphImpl(..))的步骤中,主要是要生成(BuildKernel(..))表示神经网络数据流图中的每个节点op相对应的kernel,并保存每个节点的kernel信息在图中(SetKernelInfo(..)),以供在后面的图执行(RunGraphImpl(..))步骤中被调用。
添加针对新硬件的kernel
#include <string>
#include <vector>
#include <memory>
#include <numeric>
#include <functional>
#include "backend/kernel_compiler/kernel.h"
#include "ir/anf.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/ms_utils.h"
using mindspore::kernel::Address;
using mindspore::kernel::AddressPtr;
namespace mindspore {
namespace kernel {
class XPUKernel : public kernel::KernelMod {
public:
XPUKernel() = default;
~XPUKernel() override = default;
void Init(const CNodePtr &kernel_node);
virtual void InitKernel(const CNodePtr &kernel_node) = 0;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void * stream_ptr) override {
return Launch(inputs, workspace, outputs);
};
virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) = 0;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
void SetOpName(const std::string &op_name) { op_name_ = op_name; }
const std::string GetOpName() const { return op_name_; }
protected:
virtual void InitInputOutputSize(const CNodePtr &kernel_node);
std::vector<size_t> input_size_list_ = {};
std::vector<size_t> output_size_list_ = {};
std::vector<size_t> workspace_size_list_ = {};
std::string bin_path_ = {};
std::string tilingName_ = {};
};
} // namespace kernel
} // namespace mindspore
#include "backend/kernel_compiler/xpu/xpu_kernel.h" // xpu kernel base class
#include "backend/kernel_compiler/xpu/xpu_kernel_factory.h"
#include <stdio.h>
#include <limits.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <dirent.h>
#include <algorithm>
#include <fstream>
#include <iostream>
namespace mindspore {
namespace kernel {
class TwoInOneOutXPUKernel : public XPUKernel {
public:
TwoInOneOutXPUKernel() = default;
~TwoInOneOutXPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
bool NeedsFormatTransformation();
char trans_a_{TRANSPOSE_NO};
char trans_b_{TRANSPOSE_NO};
int32_t dim_m_{0};
int32_t dim_n_{0};
int32_t dim_k_{0};
std::vector<size_t> inputA_shape_;
std::vector<size_t> inputB_shape_;
std::vector<size_t> output_shape_;
size_t input_a_size_ = 0;
size_t input_b_size_ = 0;
size_t output_size_ = 0;
void *inputA_data_ = nullptr;
void *inputB_data_ = nullptr;
void *output_data_ = nullptr;
};
MS_REG_XPU_KERNEL(
TwoInOneOutXPU,
mindspore::device::xpu::KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
TwoInOneOutXPUKernel);
} // namespace kernel
} // namespace mindspore
inputA_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
inputB_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
· 获取算子属性信息,e.g. MatMul的转置信息:
bool trans_a = AnfAlgo::GetNodeAttr<bool>(kernel_node, TRANSPOSE_A);
bool trans_b = AnfAlgo::GetNodeAttr<bool>(kernel_node, TRANSPOSE_B);
· 在Launch里获得输入,输出memory的指针:
auto input_a = reinterpret_cast<float *>(inputs[0]->addr);
auto input_b = reinterpret_cast<float *>(inputs[1]->addr);
auto output = reinterpret_cast<float *>(outputs[0]->addr);
其他注意事项
self.matmul = P.MatMul(transpose_b=True)
self.batch_matmul = P.BatchMatMul(transpose_b=True)
self.activation = get_activation(activation) if isinstance(activation, str) else activation
if activation is not None and not isinstance(self.activation, (Cell, Primitive)):
raise TypeError("The activation must be str or Cell or Primitive,"" but got {}.".format(activation))
self.activation_flag = self.activation is not None
· 对于Debug,可以添加下面的环境变量来帮助输出信息:
export GLOG_v=1
export SLOG_PRINT_TO_STDOUT=1
· 对于CMake文件的修改,可以在开始测试时把新添加的文件都添加在if (ENABLE_CPU)下,CPU对于MindSpore相当于一个基线平台,也就是说无论是你build GPU还是华为的D/Ascend target, CPU相关的文件都会被build。
总结
了解完MindSpore的关键技术是不是很心动呢!赶紧【点击链接】并【立即报名】,即可在 ModelArts 平台学习到一个经典案例掌握基于MindSpore的深度学习!