From c657fe4c676eef10aca9faf4514536b46b37b784 Mon Sep 17 00:00:00 2001
From: Donghyeon Jeong <dhyeon.jeong@samsung.com>
Date: Tue, 27 Feb 2024 16:09:25 +0900
Subject: [PATCH] [TensorV2] Add utility member functions to TensorV2 class

This pull request adds several new utility member functions to the TensorV2 class, enabling users to perform various tasks with their tensors more easily and efficiently.
These include saving and loading tensors, updating batches, getting argmax and max absolute values, and more.
The implementation is based on the current Tensor class and aims to improve the overall usability and flexibility of the TensorV2 class.

**Changes proposed in this PR:**
- Added save() and read() methods to allow saving and loading of saved tensor data.
- Added Map() method to create a new Tensor object from a buffer.
- Added argmax() and max_abs() methods to retrieve the indices of max value by batch and the value of the maximum absolute element in a tensor.
- Added updateBatch() to update tensor batch size.

**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghyeon Jeong <dhyeon.jeong@samsung.com>
---
 nntrainer/tensor/float_tensor.cpp |  22 +++++++
 nntrainer/tensor/float_tensor.h   |  10 +++
 nntrainer/tensor/half_tensor.cpp  |  23 +++++++
 nntrainer/tensor/half_tensor.h    |  10 +++
 nntrainer/tensor/tensor_base.cpp  |  25 ++++++++
 nntrainer/tensor/tensor_base.h    |  32 ++++++++++
 nntrainer/tensor/tensor_v2.cpp    |  56 +++++++++++++++++
 nntrainer/tensor/tensor_v2.h      | 100 ++++++++++++++++++++++++++++++
 8 files changed, 278 insertions(+)
diff --git a/nntrainer/tensor/float_tensor.cpp b/nntrainer/tensor/float_tensor.cpp
index 467c2e36f9..b0be11fbeb 100644
--- a/nntrainer/tensor/float_tensor.cpp
+++ b/nntrainer/tensor/float_tensor.cpp
@@ -783,6 +783,28 @@ void FloatTensor::copyData(const TensorV2 &from) {
   }
 }
 
+std::vector<unsigned int> FloatTensor::argmax() const {
+  std::vector<unsigned int> result;
+  const float *data = (float *)getData();
+  size_t batch_size = batch();
+  size_t feature_len = dim.getFeatureLen();
+
+  result.resize(batch_size);
+
+  for (unsigned int b = 0; b < batch_size; b++) {
+    auto max_iter =
+      std::max_element(data + b * feature_len, data + (b + 1) * feature_len);
+    result[b] = std::distance(data, max_iter) - (b * feature_len);
+  }
+  return result;
+}
+
+float FloatTensor::max_abs() const {
+  const float *data = (float *)getData();
+  unsigned int idx = isamax(size(), data, 1);
+  return *(data + idx);
+}
+
 TensorV2 &FloatTensor::transpose(const std::string &direction,
                                  TensorV2 &output) const {
   unsigned int SL, SI, SJ, SK;
diff --git a/nntrainer/tensor/float_tensor.h b/nntrainer/tensor/float_tensor.h
index c8f239ef26..31e1f52a32 100644
--- a/nntrainer/tensor/float_tensor.h
+++ b/nntrainer/tensor/float_tensor.h
@@ -312,6 +312,16 @@ class FloatTensor : public TensorBase {
    */
   void copyData(const TensorV2 &from);
 
+  /**
+   * @copydoc TensorV2::argmax()
+   */
+  std::vector<unsigned int> argmax() const override;
+
+  /**
+   * @copydoc TensorV2::max_abs()
+   */
+  float max_abs() const override;
+
   /**
    * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out)
    */
diff --git a/nntrainer/tensor/half_tensor.cpp b/nntrainer/tensor/half_tensor.cpp
index 5902c8b6ac..30f339a802 100644
--- a/nntrainer/tensor/half_tensor.cpp
+++ b/nntrainer/tensor/half_tensor.cpp
@@ -858,6 +858,29 @@ void HalfTensor::copyData(const TensorV2 &from) {
   }
 }
 
+std::vector<unsigned int> HalfTensor::argmax() const {
+  std::vector<unsigned int> result;
+  const _FP16 *data = (_FP16 *)getData();
+  size_t batch_size = batch();
+  size_t feature_len = dim.getFeatureLen();
+
+  result.resize(batch_size);
+
+  for (unsigned int b = 0; b < batch_size; b++) {
+    auto max_iter =
+      std::max_element(data + b * feature_len, data + (b + 1) * feature_len);
+    result[b] = std::distance(data, max_iter) - (b * feature_len);
+  }
+
+  return result;
+}
+
+float HalfTensor::max_abs() const {
+  const _FP16 *data = (_FP16 *)getData();
+  unsigned int idx = isamax(size(), data, 1);
+  return (float)(*(data + idx));
+}
+
 TensorV2 &HalfTensor::transpose(const std::string &direction,
                                 TensorV2 &output) const {
   unsigned int SL, SI, SJ, SK;
diff --git a/nntrainer/tensor/half_tensor.h b/nntrainer/tensor/half_tensor.h
index a905ba4bb8..6590e2198c 100644
--- a/nntrainer/tensor/half_tensor.h
+++ b/nntrainer/tensor/half_tensor.h
@@ -311,6 +311,16 @@ class HalfTensor : public TensorBase {
    */
   void copyData(const TensorV2 &from);
 
+  /**
+   * @copydoc TensorV2::argmax()
+   */
+  std::vector<unsigned int> argmax() const override;
+
+  /**
+   * @copydoc TensorV2::max_abs()
+   */
+  float max_abs() const override;
+
   /**
    * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out)
    */
diff --git a/nntrainer/tensor/tensor_base.cpp b/nntrainer/tensor/tensor_base.cpp
index e94c35a27f..d2aacde8e3 100644
--- a/nntrainer/tensor/tensor_base.cpp
+++ b/nntrainer/tensor/tensor_base.cpp
@@ -40,6 +40,15 @@ bool TensorBase::operator==(const TensorBase &rhs) const {
   return true;
 }
 
+void TensorBase::setTensorVar(TensorDim d, void *buf, size_t offset) {
+  dim = d;
+  strides = d.computeStrides();
+  /// Tensor does not own the memory
+  data = std::shared_ptr<MemoryData>(new MemoryData((void *)buf),
+                                     std::default_delete<MemoryData>());
+  offset = offset;
+}
+
 void TensorBase::putData() const {
   if (!data)
     return;
@@ -47,6 +56,12 @@ void TensorBase::putData() const {
   data->invalidate();
 }
 
+const std::shared_ptr<MemoryData> TensorBase::getMemoryData() const {
+  return data;
+}
+
+size_t TensorBase::getOffset() const { return offset; }
+
 void TensorBase::reshape(const TensorDim &d) {
   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     << getName() << " is not contiguous, cannot reshape.";
@@ -64,6 +79,16 @@ void TensorBase::reshape(const TensorDim &d) {
   strides = d.computeStrides();
 }
 
+void TensorBase::updateBatch(unsigned int batch) {
+  if (dim.batch() == batch) {
+    return;
+  }
+
+  if (isAllocated())
+    throw std::invalid_argument("Cannot update batch for an allocated tensor");
+  dim.batch(batch);
+}
+
 size_t TensorBase::getIndex(unsigned int b, unsigned int c, unsigned int h,
                             unsigned int w) const noexcept {
   if (getFormat() == Tformat::NCHW) {
diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h
index 28484ed9e9..894f9fd8bd 100644
--- a/nntrainer/tensor/tensor_base.h
+++ b/nntrainer/tensor/tensor_base.h
@@ -128,6 +128,11 @@ class TensorBase {
    */
   bool operator!=(const TensorBase &rhs) const { return !(*this == rhs); }
 
+  /**
+   * @copydoc TensorV2::setTensorVar(TensorDim d, void *buf, size_t offset)
+   */
+  void setTensorVar(TensorDim d, void *buf, size_t offset);
+
   /**
    * @brief Basic Destructor
    */
@@ -364,6 +369,16 @@ class TensorBase {
    */
   virtual void copyData(const TensorV2 &from) = 0;
 
+  /**
+   * @copydoc TensorV2::argmax()
+   */
+  virtual std::vector<unsigned int> argmax() const = 0;
+
+  /**
+   * @copydoc TensorV2::max_abs()
+   */
+  virtual float max_abs() const = 0;
+
   /**
    * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out)
    */
@@ -376,6 +391,17 @@ class TensorBase {
    */
   void putData() const;
 
+  /**
+   * @brief     return Data pointer of Tensor
+   * @retval    template T pointer (float pointer as default)
+   */
+  const std::shared_ptr<MemoryData> getMemoryData() const;
+
+  /**
+   * @brief     return offset
+   */
+  size_t getOffset() const;
+
   /**
    * @brief     set Tensor Dim
    * @param[in] d TensorDim
@@ -412,6 +438,12 @@ class TensorBase {
    */
   Tdatatype getDataType() const { return dim.getDataType(); }
 
+  /**
+   * @brief     update batch size for this tensor
+   * @param     batch size
+   */
+  void updateBatch(unsigned int batch);
+
   /**
    * @brief     return whether tensor is contiguous or not.
    * @retval    bool contiguous
diff --git a/nntrainer/tensor/tensor_v2.cpp b/nntrainer/tensor/tensor_v2.cpp
index ef3e2fee38..c2a9c5a185 100644
--- a/nntrainer/tensor/tensor_v2.cpp
+++ b/nntrainer/tensor/tensor_v2.cpp
@@ -613,6 +613,12 @@ void TensorV2::print(std::ostream &out) const { itensor->print(out); }
 
 void TensorV2::putData() const { itensor->putData(); }
 
+const std::shared_ptr<MemoryData> TensorV2::getMemoryData() const {
+  return itensor->getMemoryData();
+}
+
+size_t TensorV2::getOffset() const { return itensor->getOffset(); }
+
 void TensorV2::copy(const TensorV2 &from) {
   /// @todo enable copy to non-contiguous tensor
   if (!itensor->getContiguous()) {
@@ -684,6 +690,45 @@ TensorV2 TensorV2::clone() const {
   return output;
 }
 
+void TensorV2::save(std::ostream &file) {
+  NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
+    << getName() << " is not contiguous, cannot save.";
+
+  std::streamsize sz = static_cast<std::streamsize>(bytes());
+  NNTR_THROW_IF(sz < 0, std::invalid_argument)
+    << "save size: " << bytes()
+    << " is too big. It cannot be represented by std::streamsize";
+
+  checkedWrite(file, getData<char>(), sz, "[Tensor::save] operation failed");
+  putData();
+}
+
+void TensorV2::read(std::ifstream &file) {
+  NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
+    << getName() << " is not contiguous, cannot read.";
+
+  std::streamsize sz = static_cast<std::streamsize>(bytes());
+
+  NNTR_THROW_IF(sz < 0, std::invalid_argument)
+    << "read size: " << bytes()
+    << " is too big. It cannot be represented by std::streamsize";
+
+  checkedRead(file, getData<char>(), sz, "[Tensor::read] operation failed");
+  putData();
+}
+
+std::vector<unsigned int> TensorV2::argmax() const {
+  NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
+    << getName() << " is not contiguous, cannot get argmax.";
+  return itensor->argmax();
+}
+
+float TensorV2::max_abs() const {
+  NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
+    << getName() << " is not contiguous, cannot get max_abs.";
+  return itensor->max_abs();
+}
+
 TensorV2 TensorV2::transpose(const std::string &direction) const {
   TensorV2 output(getDim());
   transpose(direction, output);
@@ -721,6 +766,8 @@ TensorDim::Format TensorV2::getFormat() const { return itensor->getFormat(); }
 
 Tdatatype TensorV2::getDataType() const { return itensor->getDataType(); }
 
+void TensorV2::updateBatch(unsigned int batch) { itensor->updateBatch(batch); }
+
 const bool TensorV2::getContiguous() const noexcept {
   return itensor->getContiguous();
 }
@@ -797,4 +844,13 @@ TensorV2 TensorV2::getSharedDataTensor(const TensorDim dim_, size_t offset,
   return ret;
 }
 
+void TensorV2::setTensorVar(TensorDim d, void *buf, size_t offset) {
+  itensor->setTensorVar(d, buf, offset);
+}
+
+std::ostream &operator<<(std::ostream &out, TensorV2 const &input) {
+  input.print(out);
+  return out;
+}
+
 } // namespace nntrainer
diff --git a/nntrainer/tensor/tensor_v2.h b/nntrainer/tensor/tensor_v2.h
index 62988edb02..ac5de9e671 100644
--- a/nntrainer/tensor/tensor_v2.h
+++ b/nntrainer/tensor/tensor_v2.h
@@ -245,6 +245,35 @@ class TensorV2 {
    */
   bool operator!=(const TensorV2 &rhs) const { return !(*this == rhs); }
 
+  /**
+   * @brief Construct a new Tensor object from a buffer
+   * This will not copy buffer to a new tensor but directly uses it
+   *
+   * @param[in] buf buffer
+   * @param[in] bytes buffer size in bytes
+   * @param[in] d tensor dim
+   * @param[in] offset offset to be used from current
+   * @return    Tensor object
+   * @throws    std::invalid_argument if buf is null
+   */
+  template <typename T = float>
+  static TensorV2 Map(T *buf, unsigned int bytes, const TensorDim &d,
+                      size_t offset = 0) {
+    if (d.getDataLen() == 0 || buf == nullptr) {
+      throw std::invalid_argument(
+        "[Tensor::Map] empty tensor dim is not allowed");
+    }
+
+    if (d.getDataLen() * sizeof(T) + offset > bytes) {
+      throw std::invalid_argument(
+        "Creating shared tensor of size bigger than tensor memory.");
+    }
+
+    TensorV2 output;
+    output.setTensorVar(d, buf, offset);
+    return output;
+  };
+
   /**
    * @brief    Allocate memory for this tensor
    */
@@ -978,6 +1007,17 @@ class TensorV2 {
    */
   void putData() const;
 
+  /**
+   * @brief     return Data pointer of Tensor
+   * @retval    template T pointer (float pointer as default)
+   */
+  const std::shared_ptr<MemoryData> getMemoryData() const;
+
+  /**
+   * @brief     return offset
+   */
+  size_t getOffset() const;
+
   /**
    * @brief     Copy the Tensor
    * @param[in] from Tensor to be copied
@@ -1017,6 +1057,30 @@ class TensorV2 {
    */
   TensorV2 clone() const;
 
+  /**
+   * @brief     Save the Tensor into file
+   * @param[in] file output file stream
+   */
+  void save(std::ostream &file);
+
+  /**
+   * @brief     Read the Tensor from file
+   * @param[in] file input file stream
+   */
+  void read(std::ifstream &file);
+
+  /**
+   * @brief     return argument index which value is max by batch
+   * @retval    unsigned int argument indices
+   */
+  std::vector<unsigned int> argmax() const;
+
+  /**
+   * @brief     return max of the absolute values of the tensor
+   * @retval    maximum absolute value
+   */
+  float max_abs() const;
+
   /**
    * @brief  Transpose Tensor
    * @param  direction to transpose ex) 0:2:1
@@ -1070,6 +1134,28 @@ class TensorV2 {
    */
   Tdatatype getDataType() const;
 
+  /**
+   * @brief     update batch size for this tensor
+   * @param     batch size
+   * @note      The batchsize of src_tensor need not be related with this
+   * tensor's batch size
+   *
+   * @note      The memory for this tensor will re-allocated/re-assigned if the
+   * updated batch size is different than the current batch size.
+   *
+   * @note      If this tensor is/was the src_tensor for some other, then
+   * reduction in batch size can make the dependent tensors allocate fail due to
+   * memory smaller. Caller must handle this in their own end.
+   *
+   * @note      If this tensor is re-allocated, then the memory might not be
+   * immediately freed as the tensor already depending on this tensor also
+   * share the same memory. So, the peak memory consumption in worst case can
+   * reach the total memory requirements of a model with old batchsize and the
+   * new batch size. It is recommended to first deallocate all the tensors,
+   * updateBatch and then allocate again to avoid such issues.
+   */
+  void updateBatch(unsigned int batch);
+
   /**
    * @brief     return whether tensor is contiguous or not.
    * @retval    bool contiguous
@@ -1197,8 +1283,22 @@ class TensorV2 {
 
 private:
   std::shared_ptr<TensorBase> itensor;
+
+  /**
+   * @brief Set tensor variables
+   *
+   * @param[in] d TensorDim
+   * @param[in] buf buffer
+   * @param[in] offset offset to be used
+   */
+  void setTensorVar(TensorDim d, void *buf, size_t offset);
 };
 
+/**
+ * @brief   Overriding output stream
+ */
+std::ostream &operator<<(std::ostream &out, TensorV2 const &input);
+
 } // namespace nntrainer
 
 #endif /* __cplusplus */