Typed and type-erased Tensor variants, impl Index/Mut for Tensor (#26)

* Rename Array to_tensor_impl -> as_tensor_impl * Cpp accept &self instead of *self * Move TensorInfo to program.rs * Rename Tensor.tensor_ref() -> Tensor.as_cpp_tensor() * Typed and type-erased Tensor variants * Add runtime/core/exec_aten/util headers * Implement Index/Mut for Tensor * Return an immutable tensor in as_typed and as_type_erased
barakugav · Aug 28, 2024 · 7e9aca6 · 7e9aca6
1 parent 5efedff
commit 7e9aca6
Show file tree

Hide file tree

Showing 14 changed files with 3,309 additions and 435 deletions.
diff --git a/README.md b/README.md
@@ -28,29 +28,28 @@ with open("model.pte", "wb") as file:
 ```
 Execute the model in Rust:
 ```rust
-use executorch::evalue::{EValue, Tag};
+use executorch::evalue::EValue;
 use executorch::module::Module;
 use executorch::tensor::{Array, Tensor};
 use ndarray::array;
 
 let mut module = Module::new("model.pte", None);
 
 let input_array1 = Array::new(array![1.0_f32]);
-let input_tensor1 = input_array1.to_tensor_impl();
+let input_tensor1 = input_array1.as_tensor_impl();
 let input_evalue1 = EValue::new(Tensor::new(&input_tensor1));
 
 let input_array2 = Array::new(array![1.0_f32]);
-let input_tensor2 = input_array2.to_tensor_impl();
+let input_tensor2 = input_array2.as_tensor_impl();
 let input_evalue2 = EValue::new(Tensor::new(&input_tensor2));
 
 let outputs = module.forward(&[input_evalue1, input_evalue2]).unwrap();
 assert_eq!(outputs.len(), 1);
 let output = outputs.into_iter().next().unwrap();
-assert_eq!(output.tag(), Some(Tag::Tensor));
-let output = output.as_tensor();
+let output = output.as_tensor().into_typed::<f32>();
 
 println!("Output tensor computed: {:?}", output);
-assert_eq!(array![2.0_f32], output.as_array());
+assert_eq!(array![2.0], output.as_array());
 ```
 See `example/hello_world_add` and `example/hello_world_add_no_std` for the complete examples.
 

diff --git a/examples/hello_world_add/src/main.rs b/examples/hello_world_add/src/main.rs
@@ -2,7 +2,7 @@
 
 use std::path::PathBuf;
 
-use executorch::evalue::{EValue, Tag};
+use executorch::evalue::EValue;
 use executorch::module::Module;
 use executorch::tensor::{Array, Tensor};
 use ndarray::array;
@@ -20,19 +20,18 @@ fn main() {
     );
 
     let input_array1 = Array::new(array![1.0_f32]);
-    let input_tensor1 = input_array1.to_tensor_impl();
+    let input_tensor1 = input_array1.as_tensor_impl();
     let input_evalue1 = EValue::new(Tensor::new(&input_tensor1));
 
     let input_array2 = Array::new(array![1.0_f32]);
-    let input_tensor2 = input_array2.to_tensor_impl();
+    let input_tensor2 = input_array2.as_tensor_impl();
     let input_evalue2 = EValue::new(Tensor::new(&input_tensor2));
 
     let outputs = module.forward(&[input_evalue1, input_evalue2]).unwrap();
     assert_eq!(outputs.len(), 1);
     let output = outputs.into_iter().next().unwrap();
-    assert_eq!(output.tag(), Some(Tag::Tensor));
-    let output = output.as_tensor();
+    let output = output.as_tensor().into_typed::<f32>();
 
     println!("Output tensor computed: {:?}", output);
-    assert_eq!(array![2.0_f32], output.as_array());
+    assert_eq!(array![2.0], output.as_array());
 }
diff --git a/examples/hello_world_add_no_std/src/main.rs b/examples/hello_world_add_no_std/src/main.rs
@@ -3,7 +3,7 @@
 // #![no_main]
 
 use executorch::data_loader::FileDataLoader;
-use executorch::evalue::{EValue, Tag};
+use executorch::evalue::EValue;
 use executorch::memory::{HierarchicalAllocator, MemoryAllocator, MemoryManager};
 use executorch::program::{Program, ProgramVerification};
 use executorch::tensor::{Array, Tensor};
@@ -50,15 +50,15 @@ fn real_main() {
         .unwrap();
 
     let input_array1 = Array::new(ndarray::arr1(&[1.0_f32]));
-    let input_tensor_impl1 = input_array1.to_tensor_impl();
-    let storage = executorch::storage!(Tensor);
+    let input_tensor_impl1 = input_array1.as_tensor_impl();
+    let storage = executorch::storage!(Tensor<f32>);
     let input_tensor1 = storage.new(&input_tensor_impl1);
     let storage = executorch::storage!(EValue);
     let input_evalue1 = storage.new(input_tensor1);
 
     let input_array2 = Array::new(ndarray::arr1(&[1.0_f32]));
-    let input_tensor_impl2 = input_array2.to_tensor_impl();
-    let storage = executorch::storage!(Tensor);
+    let input_tensor_impl2 = input_array2.as_tensor_impl();
+    let storage = executorch::storage!(Tensor<f32>);
     let input_tensor2 = storage.new(&input_tensor_impl2);
     let storage = executorch::storage!(EValue);
     let input_evalue2 = storage.new(input_tensor2);
@@ -70,11 +70,10 @@ fn real_main() {
 
     let outputs = method_exe.execute().unwrap();
     let output = outputs.get(0);
-    assert_eq!(output.tag(), Some(Tag::Tensor));
-    let output = output.as_tensor();
+    let output = output.as_tensor().into_typed::<f32>();
 
     libc_println!("Output tensor computed: {:?}", output);
-    assert_eq!(ndarray::arr1(&[2.0_f32]), output.as_array());
+    assert_eq!(ndarray::arr1(&[2.0]), output.as_array());
 }
 
 // FIXME: Unfortunatelly, no_std is WIP

diff --git a/executorch-sys/cpp/executorch/runtime/core/exec_aten/util/dim_order_util.h b/executorch-sys/cpp/executorch/runtime/core/exec_aten/util/dim_order_util.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <cstdint>
+
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/platform/assert.h>
+#include <executorch/runtime/platform/compiler.h>
+namespace torch {
+namespace executor {
+
+namespace {
+template <typename DimOrderType>
+bool validate_dim_order(const DimOrderType* dim_order, const size_t dims) {
+  for (int32_t i = 0; i < dims; ++i) {
+    if (dim_order[i] >= dims) {
+      return false;
+    }
+  }
+  return true;
+}
+} // namespace
+
+/**
+ * Check if a given dim_order array is equivalent to the contiguous dim order of
+ * {0, 1, 2, 3, ...}
+ *
+ * @param[in] dim_order pointer to dim_order array
+ * @param[in] dims length of the dim_order array
+ */
+template <typename DimOrderType>
+inline bool is_contiguous_dim_order(
+    const DimOrderType* dim_order,
+    const size_t dims) {
+  for (int i = 0; i < dims; ++i) {
+    if (dim_order[i] != i) {
+      return false;
+    }
+  }
+  return true;
+}
+
+/**
+ * Check if a given dim_order array is equivalent to a channels last dim order.
+ * Channels last dim order is only valid for 4-dim and 5-dim tensors.
+ *
+ * @param[in] dim_order pointer to dim_order array
+ * @param[in] dims length of the dim_order array
+ */
+template <typename DimOrderType>
+bool is_channels_last_dim_order(
+    const DimOrderType* dim_order,
+    const size_t dims) {
+  if (dims != 4 && dims != 5) {
+    return false;
+  }
+  // 4-dim tensor is interpreted as NCHW, 5-dim tensor is interpreted as NCHWD
+  size_t channels_dim = 1;
+  // Last value in the dim order should be the channels dim
+  if (dim_order[dims - 1] != channels_dim) {
+    return false;
+  }
+
+  if (dim_order[0] != 0) {
+    return false;
+  }
+  int d = 1;
+  while (d < dims - 1) {
+    if (dim_order[d] != d + 1) {
+      return false;
+    }
+    d++;
+  }
+  return true;
+}
+
+/*
+ * This utility translated sizes to strides by using dimension order
+ * information. Dimension order specifies how the dimensions are laid out in the
+ * memory. For example for Size = [2, 3, 4, 5] dim_names = [N, C, H, W]
+ * dim_order = [0, 2, 3, 1]
+ * strides = [60, 1, 15, 3]
+ * param[in]: sizes, pointer to sizes array
+ * param[in]: dim_order, pointer to dimension order array
+ * param[in]: dims, number of dims. Sizes and dim_order must be sizes to dims
+ * param[out]: strides, pointer to strides array that is filled in
+ *
+ * NB: Reason for not using ArrayRef is the dependency on kernel_types.h
+ * This header cannot be included, because of circular dep it causes.
+ * kernel_types depends on executorch_kernel_types in lean mode, which compiles
+ * TensorImpl.cpp. executorch_kernel_types needs to depend on dim_order_utils
+ * in order to utilize dim_order_to_stride in its resize impl. If
+ * dim_order_utils depends on kernel_type, we have circular deps. This is also
+ * the reason for templatizing this function. Better ideas welcome!
+ * TODO(T148342910)
+ *
+ * Note that this function does not check that the provided dim order is valid.
+ * This function should only be used when the validity of the dim order has been
+ * checked beforehand. A safer version of this function is provided below as
+ * dim_order_to_stride which will check that the dim order is valid.
+ */
+template <typename SizesType, typename DimOrderType, typename StridesType>
+inline void dim_order_to_stride_nocheck(
+    const SizesType* sizes,
+    const DimOrderType* dim_order,
+    const size_t dims,
+    StridesType* strides) {
+  // For 0 dim tensors, just return ok.
+  if (dims == 0) {
+    return;
+  }
+  // Fastest moving dim has stride of 1.
+  // For example:
+  // Size = [2, 3, 4, 5] dim_names = [N, C, H, W]
+  // dim_order = [0, 2, 3, 1]
+  // strides = [60, 1, 15, 3]
+  strides[dim_order[dims - 1]] = 1;
+  for (int32_t i = dims - 2; i >= 0; --i) {
+    if (sizes[dim_order[i + 1]] == 0) {
+      strides[dim_order[i]] = strides[dim_order[i + 1]];
+    } else {
+      strides[dim_order[i]] =
+          strides[dim_order[i + 1]] * sizes[dim_order[i + 1]];
+    }
+  }
+}
+
+template <typename SizesType, typename DimOrderType, typename StridesType>
+__ET_NODISCARD inline Error dim_order_to_stride(
+    const SizesType* sizes,
+    const DimOrderType* dim_order,
+    const size_t dims,
+    StridesType* strides) {
+  // For 0 dim tensors, just return ok.
+  if (dims == 0) {
+    return Error::Ok;
+  }
+  ET_CHECK_OR_RETURN_ERROR(
+      validate_dim_order(dim_order, dims),
+      InvalidArgument,
+      "Invalid dim order. One of the value is larger than the number of dims %zu",
+      dims);
+
+  dim_order_to_stride_nocheck(sizes, dim_order, dims, strides);
+  return Error::Ok;
+}
+
+template <typename StridesType, typename DimOrderType>
+struct StrideDimOrder {
+  StridesType stride;
+  DimOrderType dim_order;
+
+  StrideDimOrder(StridesType stride, DimOrderType dim_order)
+      : stride(stride), dim_order(dim_order) {}
+  StrideDimOrder() = default;
+  bool operator>(const StrideDimOrder& other) const {
+    // descending order
+    return stride < other.stride;
+  }
+};
+
+template <typename ValueType>
+struct Sorter {
+ public:
+  void quick_sort(ValueType arr[], int32_t low, int32_t high) {
+    if (low < high) {
+      ValueType pivot = arr[high];
+      int32_t pos = partition(arr, low, high, pivot);
+
+      quick_sort(arr, low, pos - 1);
+      quick_sort(arr, pos + 1, high);
+    }
+  }
+
+ private:
+  void swap(ValueType arr[], int32_t pos1, int32_t pos2) noexcept {
+    ValueType temp = arr[pos1];
+    arr[pos1] = arr[pos2];
+    arr[pos2] = temp;
+  }
+
+  int32_t
+  partition(ValueType arr[], int32_t low, int32_t high, ValueType pivot) {
+    int32_t i = low;
+    int32_t j = low;
+    while (i <= high) {
+      if (arr[i] > pivot) {
+        i++;
+      } else {
+        swap(arr, i++, j++);
+      }
+    }
+    return j - 1;
+  }
+};
+
+/*
+ * This utility translated strides to dimension order
+ * information. Dimension order specifies how the dimensions are laid out in the
+ * memory. For example for tensor with sizes [3, 5, 2] and strides [5, 1, 15],
+ * dim order should be [2, 0, 1], which is obtained by sorting strides in
+ * descending order. param[in]: sizes, pointer to sizes array param[in]:
+ * dim_order, pointer to dimension order array param[in]: dims, number of dims.
+ * Sizes and dim_order must be sizes to dims param[out]: strides, pointer to
+ * strides array that is filled in
+ *
+ * NB: Reason for not using ArrayRef is the dependency on kernel_types.h
+ * This header cannot be included, because of circular dep it causes.
+ * kernel_types depends on executorch_kernel_types in lean mode, which compiles
+ * TensorImpl.cpp. executorch_kernel_types needs to depend on dim_order_utils
+ * in order to utilize dim_order_to_stride in its resize impl. If
+ * dim_order_utils depends on kernel_type, we have circular deps. This is also
+ * the reason for templatizing this function. Better ideas welcome!
+ * TODO(T148342910)
+ */
+template <typename DimOrderType, typename StridesType>
+__ET_NODISCARD inline Error stride_to_dim_order(
+    const StridesType* strides,
+    const size_t dims,
+    DimOrderType* dim_order) {
+  const size_t kMaxNumOfDimensions = 16;
+  ET_CHECK_OR_RETURN_ERROR(
+      dim_order != nullptr,
+      MemoryAllocationFailed,
+      "Need memory to get dim_order.");
+  ET_CHECK_OR_RETURN_ERROR(
+      dims <= kMaxNumOfDimensions,
+      NotSupported,
+      "dims %zu exceeds maximum allowed %zu",
+      dims,
+      kMaxNumOfDimensions);
+  StrideDimOrder<StridesType, DimOrderType> array[kMaxNumOfDimensions];
+  for (DimOrderType i = 0; i < dims; i++) {
+    array[i].dim_order = i;
+    array[i].stride = strides[i];
+  }
+
+  Sorter<StrideDimOrder<StridesType, DimOrderType>> sorter;
+
+  sorter.quick_sort(array, 0, dims - 1);
+
+  for (auto i = 0; i < dims; i++) {
+    dim_order[i] = array[i].dim_order;
+  }
+  return Error::Ok;
+}
+} // namespace executor
+} // namespace torch