Skip to content

Commit

Permalink
[VM][OpenCL] Introduce textures allocation to VM memory manager (#15400)
Browse files Browse the repository at this point in the history
* [VM][OpenCL] Introduce textures allocation to VM memory manager

VM memory manager is extended to support allocation memory with
different memory scope. This functionality is fully implemented for
naive allocator. For pooled allocator is should be implemented in the
future.

* Fix lint

* Fix OpenCL tests
  • Loading branch information
echuraev authored Jul 27, 2023
1 parent 3e00253 commit e2c8d7b
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 1 deletion.
18 changes: 17 additions & 1 deletion include/tvm/runtime/vm/memory_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <functional>
#include <memory>
#include <mutex>
#include <string>
#include <unordered_map>
#include <vector>

Expand All @@ -43,6 +44,8 @@ struct Buffer {
void* data{nullptr};
/*! \brief The size of the block. */
size_t size{0};
/*! \brief The shape of the tensor. */
std::vector<int64_t> shape;
/*! \brief The context of the allocated buffers. */
Device device;
};
Expand Down Expand Up @@ -72,6 +75,15 @@ class Allocator {
* \return A sized allocation in the form of a buffer.
*/
virtual Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) = 0;
/*! \brief Allocate a buffer given a shape and type.
* \param ndims The rank of the tensor.
* \param shape The shape of the tensor.
* \param type_hint A type hint to the allocator.
* \param mem_scope A memory scope of the buffer.
* \return A sized allocation in the form of a buffer.
*/
virtual Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint,
const std::string& mem_scope = "") = 0;
/*! \brief Free a buffer allocated by the allocator.
* \param buffer The buffer to free.
*/
Expand All @@ -81,6 +93,10 @@ class Allocator {
*/
virtual size_t UsedMemory() const = 0;

protected:
virtual Buffer Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint,
const std::string& mem_scope);

private:
AllocatorType type_;
};
Expand All @@ -105,7 +121,7 @@ class MemoryManager {
private:
MemoryManager() {}

private:
protected:
std::mutex mu_;
std::unordered_map<Device, std::unique_ptr<Allocator>> allocators_;
};
Expand Down
18 changes: 18 additions & 0 deletions src/runtime/vm/memory_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,24 @@ NDArray Allocator::Empty(std::vector<int64_t> shape, DLDataType dtype, DLDevice
return NDArray(GetObjectPtr<Object>(container));
}

Buffer Allocator::Alloc(Device dev, int ndims, int64_t* shape, DLDataType type_hint,
const std::string& mem_scope) {
if (mem_scope.empty() || mem_scope == "global") {
// by default, we can always redirect to the flat memory allocations
std::vector<int64_t> s;
for (int i = 0; i < ndims; ++i) {
s.push_back(shape[i]);
}
NDArray::Container container(nullptr, s, type_hint, dev);
size_t size = GetDataSize(container.dl_tensor);
size_t alignment = GetDataAlignment(container.dl_tensor);
return Alloc(size, alignment, type_hint);
}
LOG(FATAL) << "Allocator cannot allocate data space with "
<< "specified memory scope: " << mem_scope;
return {};
}

} // namespace vm
} // namespace runtime
} // namespace tvm
26 changes: 26 additions & 0 deletions src/runtime/vm/naive_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <tvm/runtime/vm/memory_manager.h>

#include <atomic>
#include <string>

namespace tvm {
namespace runtime {
Expand All @@ -46,6 +47,31 @@ class NaiveAllocator final : public Allocator {
return buf;
}

Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint,
const std::string& mem_scope) override {
Buffer buf;
size_t nbytes = 1;
for (int i = 0; i < ndims; ++i) {
buf.shape.push_back(shape[i]);
nbytes *= static_cast<size_t>(shape[i]);
}
nbytes *= (type_hint.bits * type_hint.lanes + 7) / 8;
buf.device = device_;
if (mem_scope.empty() || mem_scope == "global") {
auto tmp_buf = Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope);
buf.size = tmp_buf.size;
buf.data = tmp_buf.data;
return buf;
}

buf.size = nbytes;
buf.data = DeviceAPI::Get(device_)->AllocDataSpace(device_, ndims, shape, type_hint,
String(mem_scope));
used_memory_.fetch_add(nbytes, std::memory_order_relaxed);
DLOG(INFO) << "allocate " << nbytes << " B, used memory " << used_memory_ << " B";
return buf;
}

void Free(const Buffer& buffer) override {
DeviceAPI::Get(device_)->FreeDataSpace(buffer.device, buffer.data);
used_memory_.fetch_sub(buffer.size, std::memory_order_relaxed);
Expand Down
10 changes: 10 additions & 0 deletions src/runtime/vm/pooled_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#include <atomic>
#include <mutex>
#include <string>
#include <unordered_map>
#include <vector>

Expand Down Expand Up @@ -71,6 +72,15 @@ class PooledAllocator final : public Allocator {
return buf;
}

Buffer Alloc(int ndims, int64_t* shape, DLDataType type_hint,
const std::string& mem_scope) override {
if (mem_scope.empty() || mem_scope == "global") {
return Allocator::Alloc(device_, ndims, shape, type_hint, mem_scope);
}
LOG(FATAL) << "This alloc should be implemented";
return {};
}

void Free(const Buffer& buffer) override {
std::lock_guard<std::recursive_mutex> lock(mu_);
if (memory_pool_.find(buffer.size) == memory_pool_.end()) {
Expand Down
204 changes: 204 additions & 0 deletions tests/cpp/runtime/vm/memory_manager_tests.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <tvm/runtime/vm/memory_manager.h>

#include <exception>

#include "../../../../src/runtime/vm/pooled_allocator.h"

namespace tvm {
namespace runtime {
namespace vm {

// MemoryManangerWrapper is necessary because in class MemoryManager we don't have access to its
// protected members. In this class we add a new method which allow us to clear internal state of
// the global memory manager.
class MemoryManagerWrapper : public MemoryManager {
public:
static MemoryManagerWrapper* Global() {
return reinterpret_cast<MemoryManagerWrapper*>(MemoryManager::Global());
}
void clear() { allocators_.clear(); }
};

class TvmVMMemoryManagerTest : public ::testing::Test {
protected:
void SetUp() override {
// Clear allocators from previous tests
MemoryManagerWrapper::Global()->clear();
}
};

TEST_F(TvmVMMemoryManagerTest, NaiveAllocBasic) {
Device dev = {kDLCPU, 0};
Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive);
EXPECT_EQ(allocator->UsedMemory(), 0);
auto buff = allocator->Alloc(64, 32, DataType::Float(32));
EXPECT_EQ(allocator->UsedMemory(), 64);
allocator->Free(buff);
EXPECT_EQ(allocator->UsedMemory(), 0);
}

TEST_F(TvmVMMemoryManagerTest, PooledAllocBasic) {
Device dev = {kDLCPU, 0};
size_t nbytes = 64;
size_t page_size = PooledAllocator::kDefaultPageSize;
size_t size = ((nbytes + page_size - 1) / page_size) * page_size;
Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled);
EXPECT_EQ(allocator->UsedMemory(), 0);
auto buff = allocator->Alloc(nbytes, 32, DataType::Float(32));
EXPECT_EQ(allocator->UsedMemory(), size);
allocator->Free(buff);
EXPECT_EQ(allocator->UsedMemory(), size);
}

TEST_F(TvmVMMemoryManagerTest, NaiveEmptyBasic) {
Device dev = {kDLCPU, 0};
Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive);
EXPECT_EQ(allocator->UsedMemory(), 0);
auto dt = DataType::Float(32);
size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
std::vector<int64_t> shape = {1, 3, 6, 6};
{
auto ndarray = allocator->Empty(shape, dt, dev);
EXPECT_EQ(allocator->UsedMemory(), nbytes);
}
EXPECT_EQ(allocator->UsedMemory(), 0);
}

TEST_F(TvmVMMemoryManagerTest, PooledEmptyBasic) {
Device dev = {kDLCPU, 0};
Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled);
EXPECT_EQ(allocator->UsedMemory(), 0);
auto dt = DataType::Float(32);
size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
size_t page_size = PooledAllocator::kDefaultPageSize;
size_t size = ((nbytes + page_size - 1) / page_size) * page_size;
std::vector<int64_t> shape = {1, 3, 6, 6};
{
auto ndarray = allocator->Empty(shape, dt, dev);
EXPECT_EQ(allocator->UsedMemory(), size);
}
EXPECT_EQ(allocator->UsedMemory(), size);
}

TEST_F(TvmVMMemoryManagerTest, NaiveAllocWithShape) {
Device dev = {kDLCPU, 0};
Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive);
EXPECT_EQ(allocator->UsedMemory(), 0);
auto dt = DataType::Float(32);
size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
std::vector<int64_t> shape = {1, 3, 6, 6};
auto buff = allocator->Alloc(shape.size(), shape.data(), dt);
EXPECT_EQ(allocator->UsedMemory(), nbytes);
allocator->Free(buff);
EXPECT_EQ(allocator->UsedMemory(), 0);

try {
auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture");
FAIL();
} catch (std::exception& e) {
std::string pattern =
"Device does not support allocate data space with specified memory scope: global.texture";
std::string what = e.what();
EXPECT_NE(what.find(pattern), std::string::npos) << what;
}
}

TEST_F(TvmVMMemoryManagerTest, PooledAllocWithShape) {
Device dev = {kDLCPU, 0};
Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled);
EXPECT_EQ(allocator->UsedMemory(), 0);
auto dt = DataType::Float(32);
size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
size_t page_size = PooledAllocator::kDefaultPageSize;
size_t size = ((nbytes + page_size - 1) / page_size) * page_size;
std::vector<int64_t> shape = {1, 3, 6, 6};
auto buff = allocator->Alloc(shape.size(), shape.data(), dt);
EXPECT_EQ(allocator->UsedMemory(), size);
allocator->Free(buff);
EXPECT_EQ(allocator->UsedMemory(), size);

try {
auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture");
FAIL();
} catch (std::exception& e) {
std::string pattern = "This alloc should be implemented";
std::string what = e.what();
EXPECT_NE(what.find(pattern), std::string::npos) << what;
}
}

TEST_F(TvmVMMemoryManagerTest, NaiveAllocOpenCLTexture) {
bool enabled = tvm::runtime::RuntimeEnabled("opencl");
if (!enabled) {
LOG(INFO) << "Skip OpenCL Texture alloc test because opencl runtime is disabled.\n";
return;
}
Device dev = {kDLOpenCL, 0};
Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kNaive);
EXPECT_EQ(allocator->UsedMemory(), 0);
auto dt = DataType::Float(32);
size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
std::vector<int64_t> shape = {1, 3, 6, 6};
auto buff = allocator->Alloc(shape.size(), shape.data(), dt);
EXPECT_EQ(allocator->UsedMemory(), nbytes);
allocator->Free(buff);
EXPECT_EQ(allocator->UsedMemory(), 0);

auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture");
EXPECT_EQ(allocator->UsedMemory(), nbytes);
allocator->Free(texture);
EXPECT_EQ(allocator->UsedMemory(), 0);
}

TEST_F(TvmVMMemoryManagerTest, PooledAllocOpenCLTexture) {
bool enabled = tvm::runtime::RuntimeEnabled("opencl");
if (!enabled) {
LOG(INFO) << "Skip OpenCL Texture alloc test because opencl runtime is disabled.\n";
return;
}
Device dev = {kDLOpenCL, 0};
Allocator* allocator = MemoryManagerWrapper::GetOrCreateAllocator(dev, kPooled);
EXPECT_EQ(allocator->UsedMemory(), 0);
auto dt = DataType::Float(32);
size_t nbytes = 1 * 3 * 6 * 6 * dt.bytes();
size_t page_size = PooledAllocator::kDefaultPageSize;
size_t size = ((nbytes + page_size - 1) / page_size) * page_size;
std::vector<int64_t> shape = {1, 3, 6, 6};
auto buff = allocator->Alloc(shape.size(), shape.data(), dt);
EXPECT_EQ(allocator->UsedMemory(), size);
allocator->Free(buff);
EXPECT_EQ(allocator->UsedMemory(), size);

try {
auto texture = allocator->Alloc(shape.size(), shape.data(), dt, "global.texture");
FAIL();
} catch (std::exception& e) {
std::string pattern = "This alloc should be implemented";
std::string what = e.what();
EXPECT_NE(what.find(pattern), std::string::npos) << what;
}
}
} // namespace vm
} // namespace runtime
} // namespace tvm

0 comments on commit e2c8d7b

Please sign in to comment.