From 31fe24dd3345d387ba52d46c2915a909a5667813 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 31 May 2024 09:48:54 +0200 Subject: [PATCH] GH-41126: [Python] Basic bindings for Device and MemoryManager classes (#41685) ### Rationale for this change Add bindings for the C++ `arrow::Device` and `arrow::MemoryManager` classes. ### What changes are included in this PR? Basic bindings by adding the `pyarrow.Device` and `pyarrow.MemoryManager` classes, and just tested for CPU. What is not included here are additional methods on the `MemoryManager` class (eg to allocate or copy buffers), and this is also not yet tested for CUDA. Planning to do this as follow-ups, and first doing those basic bindings should enable further enhancements to be done in parallel. ### Are these changes tested? Yes, for the CPU device only. * GitHub Issue: #41126 Authored-by: Joris Van den Bossche Signed-off-by: Joris Van den Bossche --- python/pyarrow/__init__.py | 3 + python/pyarrow/device.pxi | 162 +++++++++++++++++++++++++++ python/pyarrow/includes/libarrow.pxd | 35 ++++++ python/pyarrow/io.pxi | 33 ++++++ python/pyarrow/lib.pxd | 20 ++++ python/pyarrow/lib.pyx | 3 + python/pyarrow/tests/test_device.py | 43 +++++++ python/pyarrow/tests/test_misc.py | 2 + 8 files changed, 301 insertions(+) create mode 100644 python/pyarrow/device.pxi create mode 100644 python/pyarrow/tests/test_device.py diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 936f4736977c8..e52e0d242bee5 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -236,6 +236,9 @@ def print_entry(label, value): RunEndEncodedScalar, ExtensionScalar) # Buffers, allocation +from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager, + default_cpu_memory_manager) + from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer, Codec, compress, decompress, allocate_buffer) diff --git a/python/pyarrow/device.pxi b/python/pyarrow/device.pxi new file mode 100644 index 0000000000000..6e6034752085a --- /dev/null +++ b/python/pyarrow/device.pxi @@ -0,0 +1,162 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True + + +cpdef enum DeviceAllocationType: + CPU = CDeviceAllocationType_kCPU + CUDA = CDeviceAllocationType_kCUDA + CUDA_HOST = CDeviceAllocationType_kCUDA_HOST + OPENCL = CDeviceAllocationType_kOPENCL + VULKAN = CDeviceAllocationType_kVULKAN + METAL = CDeviceAllocationType_kMETAL + VPI = CDeviceAllocationType_kVPI + ROCM = CDeviceAllocationType_kROCM + ROCM_HOST = CDeviceAllocationType_kROCM_HOST + EXT_DEV = CDeviceAllocationType_kEXT_DEV + CUDA_MANAGED = CDeviceAllocationType_kCUDA_MANAGED + ONEAPI = CDeviceAllocationType_kONEAPI + WEBGPU = CDeviceAllocationType_kWEBGPU + HEXAGON = CDeviceAllocationType_kHEXAGON + + +cdef object _wrap_device_allocation_type(CDeviceAllocationType device_type): + return DeviceAllocationType( device_type) + + +cdef class Device(_Weakrefable): + """ + Abstract interface for hardware devices + + This object represents a device with access to some memory spaces. + When handling a Buffer or raw memory address, it allows deciding in which + context the raw memory address should be interpreted + (e.g. CPU-accessible memory, or embedded memory on some particular GPU). + """ + + def __init__(self): + raise TypeError("Do not call Device's constructor directly, " + "use the device attribute of the MemoryManager instead.") + + cdef void init(self, const shared_ptr[CDevice]& device): + self.device = device + + @staticmethod + cdef wrap(const shared_ptr[CDevice]& device): + cdef Device self = Device.__new__(Device) + self.init(device) + return self + + def __eq__(self, other): + if not isinstance(other, Device): + return False + return self.device.get().Equals(deref((other).device.get())) + + def __repr__(self): + return "".format(frombytes(self.device.get().ToString())) + + @property + def type_name(self): + """ + A shorthand for this device's type. + """ + return frombytes(self.device.get().type_name()) + + @property + def device_id(self): + """ + A device ID to identify this device if there are multiple of this type. + + If there is no "device_id" equivalent (such as for the main CPU device on + non-numa systems) returns -1. + """ + return self.device.get().device_id() + + @property + def is_cpu(self): + """ + Whether this device is the main CPU device. + + This shorthand method is very useful when deciding whether a memory address + is CPU-accessible. + """ + return self.device.get().is_cpu() + + @property + def device_type(self): + """ + Return the DeviceAllocationType of this device. + """ + return _wrap_device_allocation_type(self.device.get().device_type()) + + +cdef class MemoryManager(_Weakrefable): + """ + An object that provides memory management primitives. + + A MemoryManager is always tied to a particular Device instance. + It can also have additional parameters (such as a MemoryPool to + allocate CPU memory). + + """ + + def __init__(self): + raise TypeError("Do not call MemoryManager's constructor directly, " + "use pyarrow.default_cpu_memory_manager() instead.") + + cdef void init(self, const shared_ptr[CMemoryManager]& mm): + self.memory_manager = mm + + @staticmethod + cdef wrap(const shared_ptr[CMemoryManager]& mm): + cdef MemoryManager self = MemoryManager.__new__(MemoryManager) + self.init(mm) + return self + + def __repr__(self): + return "".format( + frombytes(self.memory_manager.get().device().get().ToString()) + ) + + @property + def device(self): + """ + The device this MemoryManager is tied to. + """ + return Device.wrap(self.memory_manager.get().device()) + + @property + def is_cpu(self): + """ + Whether this MemoryManager is tied to the main CPU device. + + This shorthand method is very useful when deciding whether a memory + address is CPU-accessible. + """ + return self.memory_manager.get().is_cpu() + + +def default_cpu_memory_manager(): + """ + Return the default CPU MemoryManager instance. + + The returned singleton instance uses the default MemoryPool. + """ + return MemoryManager.wrap(c_default_cpu_memory_manager()) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 8bfc31edc747d..a66f584b83f5b 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -316,6 +316,38 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CProxyMemoryPool" arrow::ProxyMemoryPool"(CMemoryPool): CProxyMemoryPool(CMemoryPool*) + ctypedef enum CDeviceAllocationType "arrow::DeviceAllocationType": + CDeviceAllocationType_kCPU "arrow::DeviceAllocationType::kCPU" + CDeviceAllocationType_kCUDA "arrow::DeviceAllocationType::kCUDA" + CDeviceAllocationType_kCUDA_HOST "arrow::DeviceAllocationType::kCUDA_HOST" + CDeviceAllocationType_kOPENCL "arrow::DeviceAllocationType::kOPENCL" + CDeviceAllocationType_kVULKAN "arrow::DeviceAllocationType::kVULKAN" + CDeviceAllocationType_kMETAL "arrow::DeviceAllocationType::kMETAL" + CDeviceAllocationType_kVPI "arrow::DeviceAllocationType::kVPI" + CDeviceAllocationType_kROCM "arrow::DeviceAllocationType::kROCM" + CDeviceAllocationType_kROCM_HOST "arrow::DeviceAllocationType::kROCM_HOST" + CDeviceAllocationType_kEXT_DEV "arrow::DeviceAllocationType::kEXT_DEV" + CDeviceAllocationType_kCUDA_MANAGED "arrow::DeviceAllocationType::kCUDA_MANAGED" + CDeviceAllocationType_kONEAPI "arrow::DeviceAllocationType::kONEAPI" + CDeviceAllocationType_kWEBGPU "arrow::DeviceAllocationType::kWEBGPU" + CDeviceAllocationType_kHEXAGON "arrow::DeviceAllocationType::kHEXAGON" + + cdef cppclass CDevice" arrow::Device": + const char* type_name() + c_string ToString() + c_bool Equals(const CDevice& other) + int64_t device_id() + c_bool is_cpu() const + shared_ptr[CMemoryManager] default_memory_manager() + CDeviceAllocationType device_type() + + cdef cppclass CMemoryManager" arrow::MemoryManager": + const shared_ptr[CDevice] device() + c_bool is_cpu() const + + shared_ptr[CMemoryManager] c_default_cpu_memory_manager \ + " arrow::default_cpu_memory_manager"() + cdef cppclass CBuffer" arrow::Buffer": CBuffer(const uint8_t* data, int64_t size) const uint8_t* data() @@ -328,6 +360,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: c_bool is_mutable() const c_string ToHexString() c_bool Equals(const CBuffer& other) + shared_ptr[CDevice] device() + const shared_ptr[CMemoryManager] memory_manager() + CDeviceAllocationType device_type() CResult[shared_ptr[CBuffer]] SliceBufferSafe( const shared_ptr[CBuffer]& buffer, int64_t offset) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 9e8026deb435c..48b7934209c3a 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1327,6 +1327,39 @@ cdef class Buffer(_Weakrefable): """ return self.buffer.get().is_cpu() + @property + def device(self): + """ + The device where the buffer resides. + + Returns + ------- + Device + """ + return Device.wrap(self.buffer.get().device()) + + @property + def memory_manager(self): + """ + The memory manager associated with the buffer. + + Returns + ------- + MemoryManager + """ + return MemoryManager.wrap(self.buffer.get().memory_manager()) + + @property + def device_type(self): + """ + The device type where the buffer resides. + + Returns + ------- + DeviceAllocationType + """ + return _wrap_device_allocation_type(self.buffer.get().device_type()) + @property def parent(self): cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent() diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index bfd266a807c40..1bc639cc8d2ba 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -524,6 +524,26 @@ cdef class RecordBatch(_Tabular): cdef void init(self, const shared_ptr[CRecordBatch]& table) +cdef class Device(_Weakrefable): + cdef: + shared_ptr[CDevice] device + + cdef void init(self, const shared_ptr[CDevice]& device) + + @staticmethod + cdef wrap(const shared_ptr[CDevice]& device) + + +cdef class MemoryManager(_Weakrefable): + cdef: + shared_ptr[CMemoryManager] memory_manager + + cdef void init(self, const shared_ptr[CMemoryManager]& memory_manager) + + @staticmethod + cdef wrap(const shared_ptr[CMemoryManager]& mm) + + cdef class Buffer(_Weakrefable): cdef: shared_ptr[CBuffer] buffer diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx index 3245e50f0fe69..904e018ffddcc 100644 --- a/python/pyarrow/lib.pyx +++ b/python/pyarrow/lib.pyx @@ -162,6 +162,9 @@ include "pandas-shim.pxi" # Memory pools and allocation include "memory.pxi" +# Device type and memory manager +include "device.pxi" + # DataType, Field, Schema include "types.pxi" diff --git a/python/pyarrow/tests/test_device.py b/python/pyarrow/tests/test_device.py new file mode 100644 index 0000000000000..6bdb015be1a95 --- /dev/null +++ b/python/pyarrow/tests/test_device.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa + + +def test_device_memory_manager(): + mm = pa.default_cpu_memory_manager() + assert mm.is_cpu + device = mm.device + assert device.is_cpu + assert device.device_id == -1 + assert device.device_type == pa.DeviceAllocationType.CPU + assert device.type_name == "arrow::CPUDevice" + assert device == device + assert repr(device) == "" + assert repr(mm) == "" + + +def test_buffer_device(): + arr = pa.array([0, 1, 2]) + buf = arr.buffers()[1] + assert buf.device_type == pa.DeviceAllocationType.CPU + assert isinstance(buf.device, pa.Device) + assert isinstance(buf.memory_manager, pa.MemoryManager) + assert buf.is_cpu + assert buf.device.is_cpu + assert buf.device == pa.default_cpu_memory_manager().device + assert buf.memory_manager.is_cpu diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index 39dac4eb81dfb..308c37fd0de1e 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -242,6 +242,8 @@ def test_set_timezone_db_path_non_windows(): pa.MemoryPool, pa.LoggingMemoryPool, pa.ProxyMemoryPool, + pa.Device, + pa.MemoryManager, ]) def test_extension_type_constructor_errors(klass): # ARROW-2638: prevent calling extension class constructors directly