From e2c8d7b33ea158a6775273431cb09aec776d311e Mon Sep 17 00:00:00 2001 From: Egor Churaev Date: Thu, 27 Jul 2023 04:15:02 +0300 Subject: [PATCH] [VM][OpenCL] Introduce textures allocation to VM memory manager (#15400) * [VM][OpenCL] Introduce textures allocation to VM memory manager VM memory manager is extended to support allocation memory with different memory scope. This functionality is fully implemented for naive allocator. For pooled allocator is should be implemented in the future. * Fix lint * Fix OpenCL tests --- include/tvm/runtime/vm/memory_manager.h | 18 +- src/runtime/vm/memory_manager.cc | 18 ++ src/runtime/vm/naive_allocator.h | 26 +++ src/runtime/vm/pooled_allocator.h | 10 + tests/cpp/runtime/vm/memory_manager_tests.cc | 204 +++++++++++++++++++ 5 files changed, 275 insertions(+), 1 deletion(-) create mode 100644 tests/cpp/runtime/vm/memory_manager_tests.cc diff --git a/include/tvm/runtime/vm/memory_manager.h b/include/tvm/runtime/vm/memory_manager.h index fb2354bca4ec..feafc01f63d9 100644 --- a/include/tvm/runtime/vm/memory_manager.h +++ b/include/tvm/runtime/vm/memory_manager.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,8 @@ struct Buffer { void* data{nullptr}; /*! \brief The size of the block. */ size_t size{0}; + /*! \brief The shape of the tensor. */ + std::vector shape; /*! \brief The context of the allocated buffers. */ Device device; }; @@ -72,6 +75,15 @@ class Allocator { * \return A sized allocation in the form of a buffer. */ virtual Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) = 0; + /*! \brief Allocate a buffer given a shape and type. + * \param ndims The rank of the tensor. + * \param shape The shape of the tensor. + * \param type_hint A type hint to the allocator. + * \param mem_scope A memory scope of the buffer. + * \return A sized allocation in the form of a buffer. + */ + virtual Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope = "") = 0; /*! \brief Free a buffer allocated by the allocator. * \param buffer The buffer to free. */ @@ -81,6 +93,10 @@ class Allocator { */ virtual size_t UsedMemory() const = 0; + protected: + virtual Buffer Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope); + private: AllocatorType type_; }; @@ -105,7 +121,7 @@ class MemoryManager { private: MemoryManager() {} - private: + protected: std::mutex mu_; std::unordered_map> allocators_; }; diff --git a/src/runtime/vm/memory_manager.cc b/src/runtime/vm/memory_manager.cc index 22afcce6a01e..2855722a4cf4 100644 --- a/src/runtime/vm/memory_manager.cc +++ b/src/runtime/vm/memory_manager.cc @@ -170,6 +170,24 @@ NDArray Allocator::Empty(std::vector shape, DLDataType dtype, DLDevice return NDArray(GetObjectPtr(container)); } +Buffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) { + if (mem_scope.empty() || mem_scope == "global") { + // by default, we can always redirect to the flat memory allocations + std::vector s; + for (int i = 0; i < ndims; ++i) { + s.push_back(shape[i]); + } + NDArray::Container container(nullptr, s, type_hint, dev); + size_t size = GetDataSize(container.dl_tensor); + size_t alignment = GetDataAlignment(container.dl_tensor); + return Alloc(size, alignment, type_hint); + } + LOG(FATAL) << "Allocator cannot allocate data space with " + << "specified memory scope: " << mem_scope; + return {}; +} + } // namespace vm } // namespace runtime } // namespace tvm diff --git a/src/runtime/vm/naive_allocator.h b/src/runtime/vm/naive_allocator.h index 9fce66f60669..799f16ad60bc 100644 --- a/src/runtime/vm/naive_allocator.h +++ b/src/runtime/vm/naive_allocator.h @@ -27,6 +27,7 @@ #include #include +#include namespace tvm { namespace runtime { @@ -46,6 +47,31 @@ class NaiveAllocator final : public Allocator { return buf; } + Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) override { + Buffer buf; + size_t nbytes = 1; + for (int i = 0; i < ndims; ++i) { + buf.shape.push_back(shape[i]); + nbytes *= static_cast(shape[i]); + } + nbytes *= (type_hint.bits * type_hint.lanes + 7) / 8; + buf.device = device_; + if (mem_scope.empty() || mem_scope == "global") { + auto tmp_buf = Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope); + buf.size = tmp_buf.size; + buf.data = tmp_buf.data; + return buf; + } + + buf.size = nbytes; + buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, ndims, shape, type_hint, + String(mem_scope)); + used_memory_.fetch_add(nbytes, std::memory_order_relaxed); + DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ << " B"; + return buf; + } + void Free(const Buffer& buffer) override { DeviceAPI::Get(device_)->FreeDataSpace(buffer.device, buffer.data); used_memory_.fetch_sub(buffer.size, std::memory_order_relaxed); diff --git a/src/runtime/vm/pooled_allocator.h b/src/runtime/vm/pooled_allocator.h index 9c11c783011e..ea6059e0c64c 100644 --- a/src/runtime/vm/pooled_allocator.h +++ b/src/runtime/vm/pooled_allocator.h @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -71,6 +72,15 @@ class PooledAllocator final : public Allocator { return buf; } + Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint, + const std::string& mem_scope) override { + if (mem_scope.empty() || mem_scope == "global") { + return Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope); + } + LOG(FATAL) << "This alloc should be implemented"; + return {}; + } + void Free(const Buffer& buffer) override { std::lock_guard lock(mu_); if (memory_pool_.find(buffer.size) == memory_pool_.end()) { diff --git a/tests/cpp/runtime/vm/memory_manager_tests.cc b/tests/cpp/runtime/vm/memory_manager_tests.cc new file mode 100644 index 000000000000..ac1ff201cf34 --- /dev/null +++ b/tests/cpp/runtime/vm/memory_manager_tests.cc @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +#include + +#include "../../../../src/runtime/vm/pooled_allocator.h" + +namespace tvm { +namespace runtime { +namespace vm { + +// MemoryManangerWrapper is necessary because in class MemoryManager we don't have access to its +// protected members. In this class we add a new method which allow us to clear internal state of +// the global memory manager. +class MemoryManagerWrapper : public MemoryManager { + public: + static MemoryManagerWrapper* Global() { + return reinterpret_cast(MemoryManager::Global()); + } + void clear() { allocators_.clear(); } +}; + +class TvmVMMemoryManagerTest : public ::testing::Test { + protected: + void SetUp() override { + // Clear allocators from previous tests + MemoryManagerWrapper::Global()->clear(); + } +}; + +TEST_F(TvmVMMemoryManagerTest, NaiveAllocBasic) { + Device dev = {kDLCPU, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto buff = allocator->Alloc(64, 32, DataType::Float(32)); + EXPECT_EQ(allocator->UsedMemory(), 64); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), 0); +} + +TEST_F(TvmVMMemoryManagerTest, PooledAllocBasic) { + Device dev = {kDLCPU, 0}; + size_t nbytes = 64; + size_t page_size = PooledAllocator::kDefaultPageSize; + size_t size = ((nbytes + page_size - 1) / page_size) * page_size; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto buff = allocator->Alloc(nbytes, 32, DataType::Float(32)); + EXPECT_EQ(allocator->UsedMemory(), size); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), size); +} + +TEST_F(TvmVMMemoryManagerTest, NaiveEmptyBasic) { + Device dev = {kDLCPU, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + std::vector shape = {1, 3, 6, 6}; + { + auto ndarray = allocator->Empty(shape, dt, dev); + EXPECT_EQ(allocator->UsedMemory(), nbytes); + } + EXPECT_EQ(allocator->UsedMemory(), 0); +} + +TEST_F(TvmVMMemoryManagerTest, PooledEmptyBasic) { + Device dev = {kDLCPU, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + size_t page_size = PooledAllocator::kDefaultPageSize; + size_t size = ((nbytes + page_size - 1) / page_size) * page_size; + std::vector shape = {1, 3, 6, 6}; + { + auto ndarray = allocator->Empty(shape, dt, dev); + EXPECT_EQ(allocator->UsedMemory(), size); + } + EXPECT_EQ(allocator->UsedMemory(), size); +} + +TEST_F(TvmVMMemoryManagerTest, NaiveAllocWithShape) { + Device dev = {kDLCPU, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + std::vector shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + EXPECT_EQ(allocator->UsedMemory(), nbytes); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), 0); + + try { + auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + FAIL(); + } catch (std::exception& e) { + std::string pattern = + "Device does not support allocate data space with specified memory scope: global.texture"; + std::string what = e.what(); + EXPECT_NE(what.find(pattern), std::string::npos) << what; + } +} + +TEST_F(TvmVMMemoryManagerTest, PooledAllocWithShape) { + Device dev = {kDLCPU, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + size_t page_size = PooledAllocator::kDefaultPageSize; + size_t size = ((nbytes + page_size - 1) / page_size) * page_size; + std::vector shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + EXPECT_EQ(allocator->UsedMemory(), size); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), size); + + try { + auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + FAIL(); + } catch (std::exception& e) { + std::string pattern = "This alloc should be implemented"; + std::string what = e.what(); + EXPECT_NE(what.find(pattern), std::string::npos) << what; + } +} + +TEST_F(TvmVMMemoryManagerTest, NaiveAllocOpenCLTexture) { + bool enabled = tvm::runtime::RuntimeEnabled("opencl"); + if (!enabled) { + LOG(INFO) << "Skip OpenCL Texture alloc test because opencl runtime is disabled.\n"; + return; + } + Device dev = {kDLOpenCL, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + std::vector shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + EXPECT_EQ(allocator->UsedMemory(), nbytes); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), 0); + + auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + EXPECT_EQ(allocator->UsedMemory(), nbytes); + allocator->Free(texture); + EXPECT_EQ(allocator->UsedMemory(), 0); +} + +TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) { + bool enabled = tvm::runtime::RuntimeEnabled("opencl"); + if (!enabled) { + LOG(INFO) << "Skip OpenCL Texture alloc test because opencl runtime is disabled.\n"; + return; + } + Device dev = {kDLOpenCL, 0}; + Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled); + EXPECT_EQ(allocator->UsedMemory(), 0); + auto dt = DataType::Float(32); + size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes(); + size_t page_size = PooledAllocator::kDefaultPageSize; + size_t size = ((nbytes + page_size - 1) / page_size) * page_size; + std::vector shape = {1, 3, 6, 6}; + auto buff = allocator->Alloc(shape.size(), shape.data(), dt); + EXPECT_EQ(allocator->UsedMemory(), size); + allocator->Free(buff); + EXPECT_EQ(allocator->UsedMemory(), size); + + try { + auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture"); + FAIL(); + } catch (std::exception& e) { + std::string pattern = "This alloc should be implemented"; + std::string what = e.what(); + EXPECT_NE(what.find(pattern), std::string::npos) << what; + } +} +} // namespace vm +} // namespace runtime +} // namespace tvm