Skip to content

Commit

Permalink
apacheGH-41126: [Python] Basic bindings for Device and MemoryManager …
Browse files Browse the repository at this point in the history
…classes (apache#41685)

### Rationale for this change

Add bindings for the C++ `arrow::Device` and `arrow::MemoryManager` classes.

### What changes are included in this PR?

Basic bindings by adding the `pyarrow.Device` and `pyarrow.MemoryManager` classes, and just tested for CPU.

What is not included here are additional methods on the `MemoryManager` class (eg to allocate or copy buffers), and this is also not yet tested for CUDA. Planning to do this as follow-ups, and first doing those basic bindings should enable further enhancements to be done in parallel.

### Are these changes tested?

Yes, for the CPU device only.

* GitHub Issue: apache#41126

Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
  • Loading branch information
jorisvandenbossche authored May 31, 2024
1 parent 052c330 commit 31fe24d
Show file tree
Hide file tree
Showing 8 changed files with 301 additions and 0 deletions.
3 changes: 3 additions & 0 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ def print_entry(label, value):
RunEndEncodedScalar, ExtensionScalar)

# Buffers, allocation
from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
default_cpu_memory_manager)

from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
Codec, compress, decompress, allocate_buffer)

Expand Down
162 changes: 162 additions & 0 deletions python/pyarrow/device.pxi
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True


cpdef enum DeviceAllocationType:
CPU = <char> CDeviceAllocationType_kCPU
CUDA = <char> CDeviceAllocationType_kCUDA
CUDA_HOST = <char> CDeviceAllocationType_kCUDA_HOST
OPENCL = <char> CDeviceAllocationType_kOPENCL
VULKAN = <char> CDeviceAllocationType_kVULKAN
METAL = <char> CDeviceAllocationType_kMETAL
VPI = <char> CDeviceAllocationType_kVPI
ROCM = <char> CDeviceAllocationType_kROCM
ROCM_HOST = <char> CDeviceAllocationType_kROCM_HOST
EXT_DEV = <char> CDeviceAllocationType_kEXT_DEV
CUDA_MANAGED = <char> CDeviceAllocationType_kCUDA_MANAGED
ONEAPI = <char> CDeviceAllocationType_kONEAPI
WEBGPU = <char> CDeviceAllocationType_kWEBGPU
HEXAGON = <char> CDeviceAllocationType_kHEXAGON


cdef object _wrap_device_allocation_type(CDeviceAllocationType device_type):
return DeviceAllocationType(<char> device_type)


cdef class Device(_Weakrefable):
"""
Abstract interface for hardware devices
This object represents a device with access to some memory spaces.
When handling a Buffer or raw memory address, it allows deciding in which
context the raw memory address should be interpreted
(e.g. CPU-accessible memory, or embedded memory on some particular GPU).
"""

def __init__(self):
raise TypeError("Do not call Device's constructor directly, "
"use the device attribute of the MemoryManager instead.")

cdef void init(self, const shared_ptr[CDevice]& device):
self.device = device

@staticmethod
cdef wrap(const shared_ptr[CDevice]& device):
cdef Device self = Device.__new__(Device)
self.init(device)
return self

def __eq__(self, other):
if not isinstance(other, Device):
return False
return self.device.get().Equals(deref((<Device>other).device.get()))

def __repr__(self):
return "<pyarrow.Device: {}>".format(frombytes(self.device.get().ToString()))

@property
def type_name(self):
"""
A shorthand for this device's type.
"""
return frombytes(self.device.get().type_name())

@property
def device_id(self):
"""
A device ID to identify this device if there are multiple of this type.
If there is no "device_id" equivalent (such as for the main CPU device on
non-numa systems) returns -1.
"""
return self.device.get().device_id()

@property
def is_cpu(self):
"""
Whether this device is the main CPU device.
This shorthand method is very useful when deciding whether a memory address
is CPU-accessible.
"""
return self.device.get().is_cpu()

@property
def device_type(self):
"""
Return the DeviceAllocationType of this device.
"""
return _wrap_device_allocation_type(self.device.get().device_type())


cdef class MemoryManager(_Weakrefable):
"""
An object that provides memory management primitives.
A MemoryManager is always tied to a particular Device instance.
It can also have additional parameters (such as a MemoryPool to
allocate CPU memory).
"""

def __init__(self):
raise TypeError("Do not call MemoryManager's constructor directly, "
"use pyarrow.default_cpu_memory_manager() instead.")

cdef void init(self, const shared_ptr[CMemoryManager]& mm):
self.memory_manager = mm

@staticmethod
cdef wrap(const shared_ptr[CMemoryManager]& mm):
cdef MemoryManager self = MemoryManager.__new__(MemoryManager)
self.init(mm)
return self

def __repr__(self):
return "<pyarrow.MemoryManager device: {}>".format(
frombytes(self.memory_manager.get().device().get().ToString())
)

@property
def device(self):
"""
The device this MemoryManager is tied to.
"""
return Device.wrap(self.memory_manager.get().device())

@property
def is_cpu(self):
"""
Whether this MemoryManager is tied to the main CPU device.
This shorthand method is very useful when deciding whether a memory
address is CPU-accessible.
"""
return self.memory_manager.get().is_cpu()


def default_cpu_memory_manager():
"""
Return the default CPU MemoryManager instance.
The returned singleton instance uses the default MemoryPool.
"""
return MemoryManager.wrap(c_default_cpu_memory_manager())
35 changes: 35 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,38 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CProxyMemoryPool" arrow::ProxyMemoryPool"(CMemoryPool):
CProxyMemoryPool(CMemoryPool*)

ctypedef enum CDeviceAllocationType "arrow::DeviceAllocationType":
CDeviceAllocationType_kCPU "arrow::DeviceAllocationType::kCPU"
CDeviceAllocationType_kCUDA "arrow::DeviceAllocationType::kCUDA"
CDeviceAllocationType_kCUDA_HOST "arrow::DeviceAllocationType::kCUDA_HOST"
CDeviceAllocationType_kOPENCL "arrow::DeviceAllocationType::kOPENCL"
CDeviceAllocationType_kVULKAN "arrow::DeviceAllocationType::kVULKAN"
CDeviceAllocationType_kMETAL "arrow::DeviceAllocationType::kMETAL"
CDeviceAllocationType_kVPI "arrow::DeviceAllocationType::kVPI"
CDeviceAllocationType_kROCM "arrow::DeviceAllocationType::kROCM"
CDeviceAllocationType_kROCM_HOST "arrow::DeviceAllocationType::kROCM_HOST"
CDeviceAllocationType_kEXT_DEV "arrow::DeviceAllocationType::kEXT_DEV"
CDeviceAllocationType_kCUDA_MANAGED "arrow::DeviceAllocationType::kCUDA_MANAGED"
CDeviceAllocationType_kONEAPI "arrow::DeviceAllocationType::kONEAPI"
CDeviceAllocationType_kWEBGPU "arrow::DeviceAllocationType::kWEBGPU"
CDeviceAllocationType_kHEXAGON "arrow::DeviceAllocationType::kHEXAGON"

cdef cppclass CDevice" arrow::Device":
const char* type_name()
c_string ToString()
c_bool Equals(const CDevice& other)
int64_t device_id()
c_bool is_cpu() const
shared_ptr[CMemoryManager] default_memory_manager()
CDeviceAllocationType device_type()

cdef cppclass CMemoryManager" arrow::MemoryManager":
const shared_ptr[CDevice] device()
c_bool is_cpu() const

shared_ptr[CMemoryManager] c_default_cpu_memory_manager \
" arrow::default_cpu_memory_manager"()

cdef cppclass CBuffer" arrow::Buffer":
CBuffer(const uint8_t* data, int64_t size)
const uint8_t* data()
Expand All @@ -328,6 +360,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_bool is_mutable() const
c_string ToHexString()
c_bool Equals(const CBuffer& other)
shared_ptr[CDevice] device()
const shared_ptr[CMemoryManager] memory_manager()
CDeviceAllocationType device_type()

CResult[shared_ptr[CBuffer]] SliceBufferSafe(
const shared_ptr[CBuffer]& buffer, int64_t offset)
Expand Down
33 changes: 33 additions & 0 deletions python/pyarrow/io.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1327,6 +1327,39 @@ cdef class Buffer(_Weakrefable):
"""
return self.buffer.get().is_cpu()

@property
def device(self):
"""
The device where the buffer resides.
Returns
-------
Device
"""
return Device.wrap(self.buffer.get().device())

@property
def memory_manager(self):
"""
The memory manager associated with the buffer.
Returns
-------
MemoryManager
"""
return MemoryManager.wrap(self.buffer.get().memory_manager())

@property
def device_type(self):
"""
The device type where the buffer resides.
Returns
-------
DeviceAllocationType
"""
return _wrap_device_allocation_type(self.buffer.get().device_type())

@property
def parent(self):
cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent()
Expand Down
20 changes: 20 additions & 0 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,26 @@ cdef class RecordBatch(_Tabular):
cdef void init(self, const shared_ptr[CRecordBatch]& table)


cdef class Device(_Weakrefable):
cdef:
shared_ptr[CDevice] device

cdef void init(self, const shared_ptr[CDevice]& device)

@staticmethod
cdef wrap(const shared_ptr[CDevice]& device)


cdef class MemoryManager(_Weakrefable):
cdef:
shared_ptr[CMemoryManager] memory_manager

cdef void init(self, const shared_ptr[CMemoryManager]& memory_manager)

@staticmethod
cdef wrap(const shared_ptr[CMemoryManager]& mm)


cdef class Buffer(_Weakrefable):
cdef:
shared_ptr[CBuffer] buffer
Expand Down
3 changes: 3 additions & 0 deletions python/pyarrow/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ include "pandas-shim.pxi"
# Memory pools and allocation
include "memory.pxi"

# Device type and memory manager
include "device.pxi"

# DataType, Field, Schema
include "types.pxi"

Expand Down
43 changes: 43 additions & 0 deletions python/pyarrow/tests/test_device.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import pyarrow as pa


def test_device_memory_manager():
mm = pa.default_cpu_memory_manager()
assert mm.is_cpu
device = mm.device
assert device.is_cpu
assert device.device_id == -1
assert device.device_type == pa.DeviceAllocationType.CPU
assert device.type_name == "arrow::CPUDevice"
assert device == device
assert repr(device) == "<pyarrow.Device: CPUDevice()>"
assert repr(mm) == "<pyarrow.MemoryManager device: CPUDevice()>"


def test_buffer_device():
arr = pa.array([0, 1, 2])
buf = arr.buffers()[1]
assert buf.device_type == pa.DeviceAllocationType.CPU
assert isinstance(buf.device, pa.Device)
assert isinstance(buf.memory_manager, pa.MemoryManager)
assert buf.is_cpu
assert buf.device.is_cpu
assert buf.device == pa.default_cpu_memory_manager().device
assert buf.memory_manager.is_cpu
2 changes: 2 additions & 0 deletions python/pyarrow/tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ def test_set_timezone_db_path_non_windows():
pa.MemoryPool,
pa.LoggingMemoryPool,
pa.ProxyMemoryPool,
pa.Device,
pa.MemoryManager,
])
def test_extension_type_constructor_errors(klass):
# ARROW-2638: prevent calling extension class constructors directly
Expand Down

0 comments on commit 31fe24d

Please sign in to comment.