diff --git a/CMakeLists.txt b/CMakeLists.txt index f7c34fa22bf7..7c40a08b9be5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,7 @@ include(cmake/utils/Utils.cmake) include(cmake/utils/Summary.cmake) include(cmake/utils/Linker.cmake) include(cmake/utils/FindCUDA.cmake) +include(cmake/utils/FindNCCL.cmake) include(cmake/utils/FindOpenCL.cmake) include(cmake/utils/FindVulkan.cmake) include(cmake/utils/FindLLVM.cmake) @@ -25,6 +26,7 @@ endif() # and add set(OPTION VALUE) to override these build options. # Alernatively, use cmake -DOPTION=VALUE through command-line. tvm_option(USE_CUDA "Build with CUDA" OFF) +tvm_option(USE_NCCL "Build with NCCL" OFF) tvm_option(USE_OPENCL "Build with OpenCL" OFF) tvm_option(USE_OPENCL_ENABLE_HOST_PTR "Enable OpenCL memory object access to host" OFF) tvm_option(USE_OPENCL_GTEST "Path to OpenCL specific gtest version for runtime cpp tests." /path/to/opencl/gtest) @@ -350,6 +352,7 @@ list(APPEND COMPILER_SRCS "src/target/datatype/myfloat/myfloat.cc") tvm_file_glob(GLOB RUNTIME_SRCS src/runtime/*.cc src/runtime/vm/*.cc + src/runtime/disco/*.cc src/runtime/minrpc/*.cc src/runtime/relax_vm/*.cc ) @@ -434,6 +437,13 @@ if(USE_PROFILER) list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS}) endif(USE_PROFILER) +if(USE_CUDA AND USE_NCCL) + message(STATUS "Build with NCCL...") + find_nccl(${USE_NCCL}) + tvm_file_glob(GLOB RUNTIME_NCCL_SRC src/runtime/disco/nccl/*.cc) + list(APPEND RUNTIME_SRCS ${RUNTIME_NCCL_SRC}) +endif() + if(USE_AOT_EXECUTOR) message(STATUS "Build with AOT Executor support...") file(GLOB RUNTIME_AOT_EXECUTOR_SRCS src/runtime/aot_executor/*.cc) @@ -850,3 +860,8 @@ if(USE_CUDA AND USE_CUTLASS) target_link_libraries(tvm PRIVATE -Wl,--no-as-needed flash_attn) target_link_libraries(tvm_runtime PRIVATE -Wl,--no-as-needed flash_attn) endif() + +if(USE_CUDA AND USE_NCCL) + target_link_libraries(tvm_runtime PRIVATE nccl) + target_link_libraries(tvm PRIVATE nccl) +endif() diff --git a/cmake/config.cmake b/cmake/config.cmake index 4990e52d634f..1fa1765da5d6 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -48,6 +48,12 @@ # - /path/to/cuda: use specific path to cuda toolkit set(USE_CUDA OFF) +# Whether to enable NCCL support: +# - ON: enable NCCL with cmake's auto search +# - OFF: disable NCCL +# - /path/to/nccl: use specific path to nccl +set(USE_NCCL OFF) + # Whether enable ROCM runtime # # Possible values: diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake index 9e1f71c72938..bb283912af98 100644 --- a/cmake/modules/LibInfo.cmake +++ b/cmake/modules/LibInfo.cmake @@ -64,6 +64,7 @@ function(add_lib_info src_file) TVM_INFO_USE_CPP_RTVM="${USE_CPP_RTVM}" TVM_INFO_USE_CUBLAS="${USE_CUBLAS}" TVM_INFO_USE_CUDA="${USE_CUDA}" + TVM_INFO_USE_NCCL="${USE_NCCL}" TVM_INFO_USE_CUDNN="${USE_CUDNN}" TVM_INFO_USE_CUSTOM_LOGGING="${USE_CUSTOM_LOGGING}" TVM_INFO_USE_CUTLASS="${USE_CUTLASS}" diff --git a/cmake/utils/FindNCCL.cmake b/cmake/utils/FindNCCL.cmake new file mode 100644 index 000000000000..0cabaf74f879 --- /dev/null +++ b/cmake/utils/FindNCCL.cmake @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# NCCL_ROOT - When set, this path is inspected instead of standard library +# locations as the root of the NCCL installation. +# The environment variable NCCL_ROOT overrides this variable. +# +# This module defines +# Nccl_FOUND, whether nccl has been found +# NCCL_INCLUDE_DIR, directory containing header +# NCCL_LIBRARY, directory containing nccl library +# This module assumes that the user has already called find_package(CUDA) + +macro(find_nccl use_nccl) + if(${use_nccl} MATCHES ${IS_FALSE_PATTERN}) + return() + endif() + if(${use_nccl} MATCHES ${IS_TRUE_PATTERN}) + find_path(NCCL_INCLUDE_DIR NAMES nccl.h) + find_library(NCCL_LIBRARY NAMES nccl) + else() + find_path(NCCL_INCLUDE_DIR NAMES nccl.h HINTS ${use_nccl} ${use_nccl}/include) + find_library(NCCL_LIBRARY NAMES nccl HINTS ${use_nccl} ${use_nccl}/lib) + endif() + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Nccl DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARY) + if (Nccl_FOUND) + message(STATUS "Found NCCL_LIBRARY: ${NCCL_LIBRARY}") + message(STATUS "Found NCCL_INCLUDE_DIR: ${NCCL_INCLUDE_DIR}") + add_library(nccl SHARED IMPORTED) + set_target_properties(nccl + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${NCCL_INCLUDE_DIR}" + IMPORTED_LOCATION "${NCCL_LIBRARY}") + else() + message(STATUS "NCCL not found") + endif() + mark_as_advanced(NCCL_INCLUDE_DIR NCCL_LIBRARY) +endmacro(find_nccl) diff --git a/include/tvm/relax/attrs/ccl.h b/include/tvm/relax/attrs/ccl.h new file mode 100644 index 000000000000..45de0e949c4c --- /dev/null +++ b/include/tvm/relax/attrs/ccl.h @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/relax/attrs/ccl.h + * \brief Attributes for ccl operators. + */ +#ifndef TVM_RELAX_ATTRS_CCL_H_ +#define TVM_RELAX_ATTRS_CCL_H_ + +#include + +namespace tvm { +namespace relax { + +/*! \brief Attributes used in allreduce operators */ +struct AllReduceAttrs : public tvm::AttrsNode { + String op_type; + + TVM_DECLARE_ATTRS(AllReduceAttrs, "relax.attrs.AllReduceAttrs") { + TVM_ATTR_FIELD(op_type).describe( + "The type of reduction operation to be applied to the input data. Now only sum is " + "supported."); + } +}; // struct AllReduceAttrs + +} // namespace relax +} // namespace tvm + +#endif // TVM_RELAX_ATTRS_CCL_H_ diff --git a/include/tvm/runtime/data_type.h b/include/tvm/runtime/data_type.h index 9fb113f56b2c..ac7e879a644d 100644 --- a/include/tvm/runtime/data_type.h +++ b/include/tvm/runtime/data_type.h @@ -339,6 +339,7 @@ inline const char* DLDataTypeCode2Str(DLDataTypeCode type_code) { default: LOG(FATAL) << "unknown type_code=" << static_cast(type_code); } + throw; } inline std::ostream& operator<<(std::ostream& os, DLDataType t) { // NOLINT(*) diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index 654018565716..cb0eb7c21f11 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -245,54 +245,6 @@ class TVM_DLL DeviceAPI { constexpr int kRPCSessMask = 128; static_assert(kRPCSessMask >= TVMDeviceExtType_End); -/*! - * \brief The name of Device API factory. - * \param type The device type. - * \return the device name. - */ -inline const char* DeviceName(int type) { - switch (type) { - case kDLCPU: - return "cpu"; - case kDLCUDA: - return "cuda"; - case kDLCUDAHost: - return "cuda_host"; - case kDLCUDAManaged: - return "cuda_managed"; - case kDLOpenCL: - return "opencl"; - case kDLSDAccel: - return "sdaccel"; - case kDLAOCL: - return "aocl"; - case kDLVulkan: - return "vulkan"; - case kDLMetal: - return "metal"; - case kDLVPI: - return "vpi"; - case kDLROCM: - return "rocm"; - case kDLROCMHost: - return "rocm_host"; - case kDLExtDev: - return "ext_dev"; - case kDLOneAPI: - return "oneapi"; - case kDLWebGPU: - return "webgpu"; - case kDLHexagon: - return "hexagon"; - case kOpenGL: - return "opengl"; - case kDLMicroDev: - return "microdev"; - default: - LOG(FATAL) << "unknown type =" << type; - } -} - /*! * \brief Return true if a Device is owned by an RPC session. */ @@ -324,7 +276,7 @@ inline std::ostream& operator<<(std::ostream& os, DLDevice dev) { // NOLINT(*) os << "remote[" << tvm::runtime::GetRPCSessionIndex(dev) << "]-"; dev = tvm::runtime::RemoveRPCSessionMask(dev); } - os << tvm::runtime::DeviceName(static_cast(dev.device_type)) << "(" << dev.device_id << ")"; + os << tvm::runtime::DLDeviceType2Str(static_cast(dev.device_type)) << ":" << dev.device_id; return os; } diff --git a/include/tvm/runtime/disco/session.h b/include/tvm/runtime/disco/session.h new file mode 100644 index 000000000000..0346fc79fc64 --- /dev/null +++ b/include/tvm/runtime/disco/session.h @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/*! + * \file session.h + * \brief This file serves as the entry point of Disco and defines key data structures and + * interfaces. + * + * Disco is a distributed runtime that consists of a controler and a cluster of workers. The + * controler is responsible for managing the workers by broadcasting commands to all the workers + * together, and the workers are responsible for executing the commands and. The controler and + * workers communicate with each other through a bi-directional channel. + * + * Different from a generic system, Disco is designed to as "single-program-multiple-data" (SPMD) + * runtime, which means that all the workers execute the same instruction at the same time, but the + * data they are working on may be different. For example, in data parallelism, each worker may + * work on a different batches of the data, but they all execute the same set of instructions. + * Therefore, imagine there is a virtual machine that executes the program, the structures of + * workers' register files could be considered as "identical" (single program) although the values + * may differ (multiple data). + * + * **DRef.** Following the design above, consider the program in SPMD in a virtual ISA, then each + * worker is a virtual machine instance to execute the ISA maintaining its own register file. + * The controler denotes each of their register files with a unique integer "register id", + * and the workers use this id to refer to the register file that resides on itself. + * DRef is a control-side object backed by such a register id. The data it contains is not assumed + * to be directly accessible by the controler, with an exception for worker-0, which is a special + * worker that is always co-located with the controler. + * + * **Worker-0.** Worker-0 is a special worker that is always co-located with the controler. + * It is assumed that the controler can synchronize with and access the registers of worker-0. + * The Disco session provides multiple APIs to interact specifically with the worker-0. + * To shared data with other workers, a common paradigm in Disco is to copy data from the + * controler-side NDArray to the worker-0, and then copy it to other workers using primitives on + * the data plane, for example, `broadcast` and `send`. + * + * **Control plane.** The controler broadcasts commands to all the workers as control signals. + * For example, the control may ask all workers to load a library or call a function respectively. + * Common control signals include: shutdown, retrievel a global PackedFunc, call packed function, + * etc. The controler is assumed to keep a message channel to each worker to implement the broadcast + * behavior, and the message channel may vary depends on usecases. + * + * **Data plane.** The data channel is usually used to exchange data between workers, especially for + * tensor data which is usually large. For example, performing an allreduce operator for sharded + * matrix multiplication, or broadcasting for an input tensor. For efficiency, the data channel is + * usually backed by NCCL on NVIDIA GPUs, RCCL on AMD GPUs, or MPI on CPUs. + * + * **Session.** A Disco session is a primary interface to interact with the Disco runtime, serving + * as a global context that manages the control and workers. It could be implemented as a + * multi-threaded with a pool of workers for single-node multi-gpu scenarios, or TCP sockets for + * workloads that span over a cluster of nodes. + * + * **Channel.** Disco channel is a bi-directional communication channel between the controler and + * workers for exchanging control signals. It is no different from a generic RPC channel, but + * adopts TVM's PackedFunc calling convention to support polymorphic and variadic arguments. + */ +#ifndef TVM_RUNTIME_DISCO_SESSION_H_ +#define TVM_RUNTIME_DISCO_SESSION_H_ + +#include +#include + +#include +#include + +namespace tvm { +namespace runtime { + +/*! + * \brief All possible kinds of Disco commands. + */ +enum class DiscoAction : int32_t { + kShutDown = 0, + kKillReg = 1, + kGetGlobalFunc = 2, + kCallPacked = 3, + kSyncWorker = 4, + kCopyFromWorker0 = 5, + kCopyToWorker0 = 6, +}; + +/*! \brief Converts the enum class `DiscoAction` to string */ +inline std::string DiscoAction2String(DiscoAction action) { + switch (action) { + case DiscoAction::kShutDown: + return "kShutDown"; + case DiscoAction::kKillReg: + return "kKillReg"; + case DiscoAction::kGetGlobalFunc: + return "kGetGlobalFunc"; + case DiscoAction::kCallPacked: + return "kCallPacked"; + case DiscoAction::kSyncWorker: + return "kSyncWorker"; + case DiscoAction::kCopyFromWorker0: + return "kCopyFromWorker0"; + case DiscoAction::kCopyToWorker0: + return "kCopyToWorker0"; + } + LOG(FATAL) << "ValueError: Unknown DiscoAction: " << static_cast(action); +} + +/*! + * \brief An object that exists on all workers. + * + * The controler assigns a unique "register id" to each object, and the worker uses this id to + * refer to the object residing on itself. + */ +class DRefObj : public Object { + public: + /*!\ brief Send dellocation command for `reg_id` */ + inline ~DRefObj(); + /*! + * \brief Get the value of a DRef from a remote worker. + * \param worker_id The id of the worker to be fetched from. + * \return The value of the register. + */ + inline TVMRetValue DebugGetFromRemote(int worker_id); + /*! + * \brief Copy from the NDArray provided to a remote worker. + * \param worker_id The id of the worker to be copied to. + * \param source The NDArray to be copied. + */ + void DebugCopyFrom(int worker_id, NDArray source); + + static constexpr const char* _type_key = "runtime.disco.DRef"; + static constexpr const uint32_t _type_index = TypeIndex::kRuntimeDiscoDRef; + TVM_DECLARE_FINAL_OBJECT_INFO(DRefObj, Object); + + /*! \brief The id of the register */ + int64_t reg_id; + /*! \brief Back-pointer to the host controler session */ + ObjectRef session{nullptr}; +}; + +/*! + * \brief Managed reference to DRefObj. + * \sa DRefObj + * \note No public constructor is provided as it is not supposed to be directly created by users. + */ +class DRef : public ObjectRef { + public: + TVM_DEFINE_MUTABLE_NOTNULLABLE_OBJECT_REF_METHODS(DRef, ObjectRef, DRefObj); +}; + +/*! + * \brief A Disco interactive session. It allows users to interact with the Disco command queue with + * various PackedFunc calling convention. + */ +class SessionObj : public Object { + public: + virtual ~SessionObj() = default; + /*! + * \brief Call a PackedFunc on workers providing variadic arguments. + * \tparam Args In the variadic arguments, the supported types include: + * - integers and floating point numbers; + * - DataType; + * - Device; + * - std::string; + * - DRef. + * Examples of unsupported types: + * - NDArray, DLTensor; + * - TVM Objects, including PackedFunc, Module and String; + * \param func The function to be called. + * \param args The variadic arguments. + * \return The return value of function call + */ + template + DRef TVM_ALWAYS_INLINE CallPacked(const DRef& func, Args&&... args); + /*! \brief Get a global functions on workers. */ + virtual DRef GetGlobalFunc(const std::string& name) = 0; + /*! + * \brief Copy the controler-side NDArray to worker-0 + * \param host_array The array to be copied to worker-0 + * \param remote_array The NDArray on worker-0 + */ + virtual void CopyFromWorker0(const NDArray& host_array, const DRef& remote_array) = 0; + /*! + * \brief Copy an NDArray from worker-0 to the controler-side NDArray + * \param host_array The array to be copied to worker-0 + * \param remote_array The NDArray on worker-0 + */ + virtual void CopyToWorker0(const NDArray& host_array, const DRef& remote_array) = 0; + /*! + * \brief Synchrnoize the controler with a worker, and it will wait until worker finishes + * executing this instruction. + * \param worker_id The id of the worker to be synced with. + * \note This function is usually used for worker-0, because it is the only worker that is + * assumed to collocate with the controler. Syncing with other workers may not be supported. + */ + virtual void SyncWorker(int worker_id) = 0; + /*! \brief Signal all the workers to shutdown */ + virtual void Shutdown() = 0; + /*! + * \brief Get the value of a register from a remote worker. + * \param reg_id The id of the register to be fetched. + * \param worker_id The id of the worker to be fetched from. + * \return The value of the register. + */ + virtual TVMRetValue DebugGetFromRemote(int64_t reg_id, int worker_id) = 0; + + static constexpr const char* _type_key = "runtime.disco.Session"; + TVM_DECLARE_BASE_OBJECT_INFO(SessionObj, Object); + + struct FFI; + friend struct SessionObj::FFI; + friend class DRefObj; + + protected: + /*! \brief Deallocate a register id, kill it on all workers, and append it to `free_regs_`. */ + virtual void DeallocReg(int reg_id) = 0; + /*! \brief Call packed function on each worker using a packed sequence */ + virtual DRef CallWithPacked(const TVMArgs& args) = 0; +}; + +/*! + * \brief Managed reference to SessionObj + * \sa SessionObj + */ +class Session : public ObjectRef { + public: + /*! \brief Create a session backed by a thread pool of workers */ + static Session ThreadedSession(int num_workers); + TVM_DEFINE_MUTABLE_NOTNULLABLE_OBJECT_REF_METHODS(Session, ObjectRef, SessionObj); +}; + +/*! + * \brief A bi-directional channel for controler-worker communication. + * This channel is primarily used to transfer control messages but not data. + */ +class DiscoChannel { + public: + /*! \brief Send a packed sequence to the receiver */ + virtual void Send(const TVMArgs& args) = 0; + /*! \brief Receive a packed sequence from worker */ + virtual TVMArgs Recv() = 0; + /*! \brief Reply a packed sequence to the sender */ + virtual void Reply(const TVMArgs& args) = 0; + /*! \brief Receive a reply from the worker */ + virtual TVMArgs RecvReply() = 0; +}; + +// Implementation details + +DRefObj::~DRefObj() { + if (this->session.defined()) { + Downcast(this->session)->DeallocReg(reg_id); + } +} + +TVMRetValue DRefObj::DebugGetFromRemote(int worker_id) { + return Downcast(this->session)->DebugGetFromRemote(this->reg_id, worker_id); +} + +template +DRef SessionObj::CallPacked(const DRef& func, Args&&... args) { + constexpr int offset = 3; + constexpr int kNumArgs = offset + sizeof...(Args); + TVMValue values[kNumArgs]; + int type_codes[kNumArgs]; + PackArgs(values, type_codes, + /*.0=*/static_cast(DiscoAction::kCallPacked), // action + /*.1=*/0, // reg_id, which will be updated by this->CallWithPacked + /*.2=*/func, // the function to be called + std::forward(args)...); + return this->CallWithPacked(TVMArgs(values, type_codes, kNumArgs)); +} + +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_DISCO_SESSION_H_ diff --git a/include/tvm/runtime/object.h b/include/tvm/runtime/object.h index b10aff96a116..94644d797c1a 100644 --- a/include/tvm/runtime/object.h +++ b/include/tvm/runtime/object.h @@ -72,6 +72,8 @@ struct TypeIndex { kRuntimeShapeTuple = 6, /*! \brief runtime::PackedFunc. */ kRuntimePackedFunc = 7, + /*! \brief runtime::DRef */ + kRuntimeDiscoDRef = 8, // static assignments that may subject to change. kRuntimeClosure, kRuntimeADT, diff --git a/include/tvm/runtime/packed_func.h b/include/tvm/runtime/packed_func.h index 660c24284b8d..e63e92835cc5 100644 --- a/include/tvm/runtime/packed_func.h +++ b/include/tvm/runtime/packed_func.h @@ -418,6 +418,8 @@ class TVMArgs { */ inline const char* ArgTypeCode2Str(int type_code); +inline std::ostream& operator<<(std::ostream& os, DLDevice dev); // NOLINT(*) + // macro to check type code. #define TVM_CHECK_TYPE_CODE(CODE, T) \ ICHECK_EQ(CODE, T) << "expected " << ArgTypeCode2Str(T) << " but got " << ArgTypeCode2Str(CODE) @@ -1257,6 +1259,56 @@ inline const char* ArgTypeCode2Str(int type_code) { default: LOG(FATAL) << "unknown type_code=" << static_cast(type_code); } + throw; +} + +/*! + * \brief The name of DLDeviceType. + * \param type The device type. + * \return the device name. + */ +inline const char* DLDeviceType2Str(int type) { + switch (type) { + case kDLCPU: + return "cpu"; + case kDLCUDA: + return "cuda"; + case kDLCUDAHost: + return "cuda_host"; + case kDLCUDAManaged: + return "cuda_managed"; + case kDLOpenCL: + return "opencl"; + case kDLSDAccel: + return "sdaccel"; + case kDLAOCL: + return "aocl"; + case kDLVulkan: + return "vulkan"; + case kDLMetal: + return "metal"; + case kDLVPI: + return "vpi"; + case kDLROCM: + return "rocm"; + case kDLROCMHost: + return "rocm_host"; + case kDLExtDev: + return "ext_dev"; + case kDLOneAPI: + return "oneapi"; + case kDLWebGPU: + return "webgpu"; + case kDLHexagon: + return "hexagon"; + case kOpenGL: + return "opengl"; + case kDLMicroDev: + return "microdev"; + default: + LOG(FATAL) << "unknown type = " << type; + } + throw; } namespace detail { @@ -1284,13 +1336,27 @@ namespace parameter_pack { template struct EnumeratedParamPack { - struct Invoke { - template