Skip to content

Commit

Permalink
Several changes related to initial code review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
raulcd committed Jun 11, 2024
1 parent d74ca34 commit 03719ff
Show file tree
Hide file tree
Showing 11 changed files with 40 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
- name: conda-python-3.10-no-numpy
cache: conda-python-3.10
image: conda-python-no-numpy
title: AMD64 Conda Python 3.10 Testing without numpy
title: AMD64 Conda Python 3.10 without NumPy
python: "3.10"
env:
PYTHON: ${{ matrix.python || 3.8 }}
Expand Down
4 changes: 2 additions & 2 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -323,17 +323,17 @@ set(PYARROW_CPP_SRCS
${PYARROW_CPP_SOURCE_DIR}/gdb.cc
${PYARROW_CPP_SOURCE_DIR}/helpers.cc
${PYARROW_CPP_SOURCE_DIR}/inference.cc
${PYARROW_CPP_SOURCE_DIR}/init.cc
${PYARROW_CPP_SOURCE_DIR}/io.cc
${PYARROW_CPP_SOURCE_DIR}/ipc.cc
${PYARROW_CPP_SOURCE_DIR}/numpy_convert.cc
${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
${PYARROW_CPP_SOURCE_DIR}/numpy_to_arrow.cc
${PYARROW_CPP_SOURCE_DIR}/python_test.cc
${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
${PYARROW_CPP_SOURCE_DIR}/serialize.cc
${PYARROW_CPP_SOURCE_DIR}/udf.cc)
set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/init.cc
set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
PROPERTIES SKIP_PRECOMPILE_HEADERS ON
SKIP_UNITY_BUILD_INCLUSION ON)

Expand Down
14 changes: 8 additions & 6 deletions python/pyarrow/builder.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.

import math


cdef class StringBuilder(_Weakrefable):
"""
Expand Down Expand Up @@ -42,10 +44,10 @@ cdef class StringBuilder(_Weakrefable):
value : string/bytes or np.nan/None
The value to append to the string array builder.
"""
if value is None or ('numpy' in sys.modules and value is np.nan):
self.builder.get().AppendNull()
elif isinstance(value, (bytes, str)):
if isinstance(value, (bytes, str)):
self.builder.get().Append(tobytes(value))
elif value is None or math.isnan(value):
self.builder.get().AppendNull()
else:
raise TypeError('StringBuilder only accepts string objects')

Expand Down Expand Up @@ -108,10 +110,10 @@ cdef class StringViewBuilder(_Weakrefable):
value : string/bytes or np.nan/None
The value to append to the string array builder.
"""
if value is None or ('numpy' in sys.modules and value is np.nan):
self.builder.get().AppendNull()
elif isinstance(value, (bytes, str)):
if isinstance(value, (bytes, str)):
self.builder.get().Append(tobytes(value))
elif value is None or math.isnan(value):
self.builder.get().AppendNull()
else:
raise TypeError('StringViewBuilder only accepts string objects')

Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/includes/libarrow_python.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
CResult[PyObject*] StringToTzinfo(c_string)


cdef extern from "arrow/python/init.h":
int arrow_init_numpy(c_bool import_numpy) except -1
cdef extern from "arrow/python/numpy_init.h" namespace "arrow::py":
int arrow_init_numpy() except -1


cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
Expand Down
11 changes: 3 additions & 8 deletions python/pyarrow/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import decimal as _pydecimal
try:
import numpy as np
except ImportError:
pass
np = None
import os
import sys

Expand All @@ -38,14 +38,9 @@ cimport cpython as cp
# Initialize NumPy C API only if numpy was able to be imported


def initialize_numpy():
if "numpy" in sys.modules:
arrow_init_numpy(True)
else:
arrow_init_numpy(False)
if np is not None:
arrow_init_numpy()


initialize_numpy()
# Initialize PyArrow C++ API
# (used from some of our C++ code, see e.g. ARROW-5260)
import_pyarrow()
Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/src/arrow/python/inference.cc
Original file line number Diff line number Diff line change
Expand Up @@ -395,11 +395,11 @@ class TypeInferrer {
*keep_going = make_unions_;
} else if (arrow::py::is_scalar(obj)) {
RETURN_NOT_OK(VisitArrowScalar(obj, keep_going));
} else if (get_numpy_imported() && PyArray_CheckAnyScalarExact(obj)) {
} else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
RETURN_NOT_OK(VisitDType(PyArray_DescrFromScalar(obj), keep_going));
} else if (PySet_Check(obj) || (Py_TYPE(obj) == &PyDictValues_Type)) {
RETURN_NOT_OK(VisitSet(obj, keep_going));
} else if (get_numpy_imported() && PyArray_Check(obj)) {
} else if (has_numpy() && PyArray_Check(obj)) {
RETURN_NOT_OK(VisitNdarray(obj, keep_going));
} else if (PyDict_Check(obj)) {
RETURN_NOT_OK(VisitDict(obj));
Expand Down
6 changes: 3 additions & 3 deletions python/pyarrow/src/arrow/python/iterators.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include "arrow/array/array_primitive.h"

#include "arrow/python/common.h"
#include "arrow/python/init.h"
#include "arrow/python/numpy_init.h"
#include "arrow/python/numpy_internal.h"

namespace arrow {
Expand All @@ -45,7 +45,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
// VisitorFunc may set to false to terminate iteration
bool keep_going = true;

if (get_numpy_imported() && PyArray_Check(obj)) {
if (has_numpy() && PyArray_Check(obj)) {
PyArrayObject* arr_obj = reinterpret_cast<PyArrayObject*>(obj);
if (PyArray_NDIM(arr_obj) != 1) {
return Status::Invalid("Only 1D arrays accepted");
Expand Down Expand Up @@ -103,7 +103,7 @@ inline Status VisitSequence(PyObject* obj, int64_t offset, VisitorFunc&& func) {
template <class VisitorFunc>
inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, int64_t offset,
VisitorFunc&& func) {
if (get_numpy_imported() && PyArray_Check(mo)) {
if (has_numpy() && PyArray_Check(mo)) {
PyArrayObject* mask = reinterpret_cast<PyArrayObject*>(mo);
if (PyArray_NDIM(mask) != 1) {
return Status::Invalid("Mask must be 1D array");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,16 @@
// Trigger the array import (inversion of NO_IMPORT_ARRAY)
#define NUMPY_IMPORT_ARRAY

#include "arrow/python/init.h"
#include "arrow/python/numpy_init.h"
#include "arrow/python/numpy_interop.h"

namespace arrow::py {
bool numpy_imported = false;

int arrow_init_numpy(bool import_numpy) {
if (import_numpy) {
numpy_imported = true;
return arrow::py::import_numpy();
} else {
return 0;
}
int arrow_init_numpy() {
numpy_imported = true;
return arrow::py::import_numpy();
}

bool get_numpy_imported() { return numpy_imported; }
bool has_numpy() { return numpy_imported; }
} // namespace arrow::py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#include "arrow/python/platform.h"
#include "arrow/python/visibility.h"

extern "C" {
namespace arrow::py {
ARROW_PYTHON_EXPORT
int arrow_init_numpy(bool import_numpy);
bool get_numpy_imported();
}
int arrow_init_numpy();
bool has_numpy();
} // namespace arrow::py
8 changes: 4 additions & 4 deletions python/pyarrow/src/arrow/python/numpy_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#pragma once

#include "arrow/python/init.h"
#include "arrow/python/numpy_init.h"
#include "arrow/python/numpy_interop.h"

#include "arrow/status.h"
Expand Down Expand Up @@ -156,23 +156,23 @@ inline Status VisitNumpyArrayInline(PyArrayObject* arr, VISITOR* visitor) {
namespace internal {

inline bool PyFloatScalar_Check(PyObject* obj) {
if (get_numpy_imported()) {
if (has_numpy()) {
return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
} else {
return PyFloat_Check(obj);
}
}

inline bool PyIntScalar_Check(PyObject* obj) {
if (get_numpy_imported()) {
if (has_numpy()) {
return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
} else {
return PyLong_Check(obj);
}
}

inline bool PyBoolScalar_Check(PyObject* obj) {
if (get_numpy_imported()) {
if (has_numpy()) {
return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
} else {
return PyBool_Check(obj);
Expand Down
8 changes: 4 additions & 4 deletions python/pyarrow/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ class PyValue {
return true;
} else if (obj == Py_False) {
return false;
} else if (get_numpy_imported() && PyArray_IsScalar(obj, Bool)) {
} else if (has_numpy() && PyArray_IsScalar(obj, Bool)) {
return reinterpret_cast<PyBoolScalarObject*>(obj)->obval == NPY_TRUE;
} else {
return internal::InvalidValue(obj, "tried to convert to boolean");
Expand Down Expand Up @@ -384,7 +384,7 @@ class PyValue {
default:
return Status::UnknownError("Invalid time unit");
}
} else if (get_numpy_imported() && PyArray_CheckAnyScalarExact(obj)) {
} else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
// validate that the numpy scalar has np.datetime64 dtype
ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
if (!numpy_type->Equals(*type)) {
Expand Down Expand Up @@ -663,7 +663,7 @@ class PyPrimitiveConverter<
ARROW_ASSIGN_OR_RAISE(
auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
// Numpy NaT sentinels can be checked after the conversion
if (get_numpy_imported() && PyArray_CheckAnyScalarExact(value) &&
if (has_numpy() && PyArray_CheckAnyScalarExact(value) &&
PyValue::IsNaT(this->primitive_type_, converted)) {
this->primitive_builder_->UnsafeAppendNull();
} else {
Expand Down Expand Up @@ -803,7 +803,7 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
if (PyValue::IsNull(this->options_, value)) {
return this->list_builder_->AppendNull();
}
if (get_numpy_imported() && PyArray_Check(value)) {
if (has_numpy() && PyArray_Check(value)) {
RETURN_NOT_OK(AppendNdarray(value));
} else if (PySequence_Check(value)) {
RETURN_NOT_OK(AppendSequence(value));
Expand Down

0 comments on commit 03719ff

Please sign in to comment.