From c657fe4c676eef10aca9faf4514536b46b37b784 Mon Sep 17 00:00:00 2001 From: Donghyeon Jeong Date: Tue, 27 Feb 2024 16:09:25 +0900 Subject: [PATCH] [TensorV2] Add utility member functions to TensorV2 class This pull request adds several new utility member functions to the TensorV2 class, enabling users to perform various tasks with their tensors more easily and efficiently. These include saving and loading tensors, updating batches, getting argmax and max absolute values, and more. The implementation is based on the current Tensor class and aims to improve the overall usability and flexibility of the TensorV2 class. **Changes proposed in this PR:** - Added save() and read() methods to allow saving and loading of saved tensor data. - Added Map() method to create a new Tensor object from a buffer. - Added argmax() and max_abs() methods to retrieve the indices of max value by batch and the value of the maximum absolute element in a tensor. - Added updateBatch() to update tensor batch size. **Self-evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghyeon Jeong --- nntrainer/tensor/float_tensor.cpp | 22 +++++++ nntrainer/tensor/float_tensor.h | 10 +++ nntrainer/tensor/half_tensor.cpp | 23 +++++++ nntrainer/tensor/half_tensor.h | 10 +++ nntrainer/tensor/tensor_base.cpp | 25 ++++++++ nntrainer/tensor/tensor_base.h | 32 ++++++++++ nntrainer/tensor/tensor_v2.cpp | 56 +++++++++++++++++ nntrainer/tensor/tensor_v2.h | 100 ++++++++++++++++++++++++++++++ 8 files changed, 278 insertions(+) diff --git a/nntrainer/tensor/float_tensor.cpp b/nntrainer/tensor/float_tensor.cpp index 467c2e36f9..b0be11fbeb 100644 --- a/nntrainer/tensor/float_tensor.cpp +++ b/nntrainer/tensor/float_tensor.cpp @@ -783,6 +783,28 @@ void FloatTensor::copyData(const TensorV2 &from) { } } +std::vector FloatTensor::argmax() const { + std::vector result; + const float *data = (float *)getData(); + size_t batch_size = batch(); + size_t feature_len = dim.getFeatureLen(); + + result.resize(batch_size); + + for (unsigned int b = 0; b < batch_size; b++) { + auto max_iter = + std::max_element(data + b * feature_len, data + (b + 1) * feature_len); + result[b] = std::distance(data, max_iter) - (b * feature_len); + } + return result; +} + +float FloatTensor::max_abs() const { + const float *data = (float *)getData(); + unsigned int idx = isamax(size(), data, 1); + return *(data + idx); +} + TensorV2 &FloatTensor::transpose(const std::string &direction, TensorV2 &output) const { unsigned int SL, SI, SJ, SK; diff --git a/nntrainer/tensor/float_tensor.h b/nntrainer/tensor/float_tensor.h index c8f239ef26..31e1f52a32 100644 --- a/nntrainer/tensor/float_tensor.h +++ b/nntrainer/tensor/float_tensor.h @@ -312,6 +312,16 @@ class FloatTensor : public TensorBase { */ void copyData(const TensorV2 &from); + /** + * @copydoc TensorV2::argmax() + */ + std::vector argmax() const override; + + /** + * @copydoc TensorV2::max_abs() + */ + float max_abs() const override; + /** * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out) */ diff --git a/nntrainer/tensor/half_tensor.cpp b/nntrainer/tensor/half_tensor.cpp index 5902c8b6ac..30f339a802 100644 --- a/nntrainer/tensor/half_tensor.cpp +++ b/nntrainer/tensor/half_tensor.cpp @@ -858,6 +858,29 @@ void HalfTensor::copyData(const TensorV2 &from) { } } +std::vector HalfTensor::argmax() const { + std::vector result; + const _FP16 *data = (_FP16 *)getData(); + size_t batch_size = batch(); + size_t feature_len = dim.getFeatureLen(); + + result.resize(batch_size); + + for (unsigned int b = 0; b < batch_size; b++) { + auto max_iter = + std::max_element(data + b * feature_len, data + (b + 1) * feature_len); + result[b] = std::distance(data, max_iter) - (b * feature_len); + } + + return result; +} + +float HalfTensor::max_abs() const { + const _FP16 *data = (_FP16 *)getData(); + unsigned int idx = isamax(size(), data, 1); + return (float)(*(data + idx)); +} + TensorV2 &HalfTensor::transpose(const std::string &direction, TensorV2 &output) const { unsigned int SL, SI, SJ, SK; diff --git a/nntrainer/tensor/half_tensor.h b/nntrainer/tensor/half_tensor.h index a905ba4bb8..6590e2198c 100644 --- a/nntrainer/tensor/half_tensor.h +++ b/nntrainer/tensor/half_tensor.h @@ -311,6 +311,16 @@ class HalfTensor : public TensorBase { */ void copyData(const TensorV2 &from); + /** + * @copydoc TensorV2::argmax() + */ + std::vector argmax() const override; + + /** + * @copydoc TensorV2::max_abs() + */ + float max_abs() const override; + /** * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out) */ diff --git a/nntrainer/tensor/tensor_base.cpp b/nntrainer/tensor/tensor_base.cpp index e94c35a27f..d2aacde8e3 100644 --- a/nntrainer/tensor/tensor_base.cpp +++ b/nntrainer/tensor/tensor_base.cpp @@ -40,6 +40,15 @@ bool TensorBase::operator==(const TensorBase &rhs) const { return true; } +void TensorBase::setTensorVar(TensorDim d, void *buf, size_t offset) { + dim = d; + strides = d.computeStrides(); + /// Tensor does not own the memory + data = std::shared_ptr(new MemoryData((void *)buf), + std::default_delete()); + offset = offset; +} + void TensorBase::putData() const { if (!data) return; @@ -47,6 +56,12 @@ void TensorBase::putData() const { data->invalidate(); } +const std::shared_ptr TensorBase::getMemoryData() const { + return data; +} + +size_t TensorBase::getOffset() const { return offset; } + void TensorBase::reshape(const TensorDim &d) { NNTR_THROW_IF(!contiguous, std::invalid_argument) << getName() << " is not contiguous, cannot reshape."; @@ -64,6 +79,16 @@ void TensorBase::reshape(const TensorDim &d) { strides = d.computeStrides(); } +void TensorBase::updateBatch(unsigned int batch) { + if (dim.batch() == batch) { + return; + } + + if (isAllocated()) + throw std::invalid_argument("Cannot update batch for an allocated tensor"); + dim.batch(batch); +} + size_t TensorBase::getIndex(unsigned int b, unsigned int c, unsigned int h, unsigned int w) const noexcept { if (getFormat() == Tformat::NCHW) { diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h index 28484ed9e9..894f9fd8bd 100644 --- a/nntrainer/tensor/tensor_base.h +++ b/nntrainer/tensor/tensor_base.h @@ -128,6 +128,11 @@ class TensorBase { */ bool operator!=(const TensorBase &rhs) const { return !(*this == rhs); } + /** + * @copydoc TensorV2::setTensorVar(TensorDim d, void *buf, size_t offset) + */ + void setTensorVar(TensorDim d, void *buf, size_t offset); + /** * @brief Basic Destructor */ @@ -364,6 +369,16 @@ class TensorBase { */ virtual void copyData(const TensorV2 &from) = 0; + /** + * @copydoc TensorV2::argmax() + */ + virtual std::vector argmax() const = 0; + + /** + * @copydoc TensorV2::max_abs() + */ + virtual float max_abs() const = 0; + /** * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out) */ @@ -376,6 +391,17 @@ class TensorBase { */ void putData() const; + /** + * @brief return Data pointer of Tensor + * @retval template T pointer (float pointer as default) + */ + const std::shared_ptr getMemoryData() const; + + /** + * @brief return offset + */ + size_t getOffset() const; + /** * @brief set Tensor Dim * @param[in] d TensorDim @@ -412,6 +438,12 @@ class TensorBase { */ Tdatatype getDataType() const { return dim.getDataType(); } + /** + * @brief update batch size for this tensor + * @param batch size + */ + void updateBatch(unsigned int batch); + /** * @brief return whether tensor is contiguous or not. * @retval bool contiguous diff --git a/nntrainer/tensor/tensor_v2.cpp b/nntrainer/tensor/tensor_v2.cpp index ef3e2fee38..c2a9c5a185 100644 --- a/nntrainer/tensor/tensor_v2.cpp +++ b/nntrainer/tensor/tensor_v2.cpp @@ -613,6 +613,12 @@ void TensorV2::print(std::ostream &out) const { itensor->print(out); } void TensorV2::putData() const { itensor->putData(); } +const std::shared_ptr TensorV2::getMemoryData() const { + return itensor->getMemoryData(); +} + +size_t TensorV2::getOffset() const { return itensor->getOffset(); } + void TensorV2::copy(const TensorV2 &from) { /// @todo enable copy to non-contiguous tensor if (!itensor->getContiguous()) { @@ -684,6 +690,45 @@ TensorV2 TensorV2::clone() const { return output; } +void TensorV2::save(std::ostream &file) { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot save."; + + std::streamsize sz = static_cast(bytes()); + NNTR_THROW_IF(sz < 0, std::invalid_argument) + << "save size: " << bytes() + << " is too big. It cannot be represented by std::streamsize"; + + checkedWrite(file, getData(), sz, "[Tensor::save] operation failed"); + putData(); +} + +void TensorV2::read(std::ifstream &file) { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot read."; + + std::streamsize sz = static_cast(bytes()); + + NNTR_THROW_IF(sz < 0, std::invalid_argument) + << "read size: " << bytes() + << " is too big. It cannot be represented by std::streamsize"; + + checkedRead(file, getData(), sz, "[Tensor::read] operation failed"); + putData(); +} + +std::vector TensorV2::argmax() const { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot get argmax."; + return itensor->argmax(); +} + +float TensorV2::max_abs() const { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot get max_abs."; + return itensor->max_abs(); +} + TensorV2 TensorV2::transpose(const std::string &direction) const { TensorV2 output(getDim()); transpose(direction, output); @@ -721,6 +766,8 @@ TensorDim::Format TensorV2::getFormat() const { return itensor->getFormat(); } Tdatatype TensorV2::getDataType() const { return itensor->getDataType(); } +void TensorV2::updateBatch(unsigned int batch) { itensor->updateBatch(batch); } + const bool TensorV2::getContiguous() const noexcept { return itensor->getContiguous(); } @@ -797,4 +844,13 @@ TensorV2 TensorV2::getSharedDataTensor(const TensorDim dim_, size_t offset, return ret; } +void TensorV2::setTensorVar(TensorDim d, void *buf, size_t offset) { + itensor->setTensorVar(d, buf, offset); +} + +std::ostream &operator<<(std::ostream &out, TensorV2 const &input) { + input.print(out); + return out; +} + } // namespace nntrainer diff --git a/nntrainer/tensor/tensor_v2.h b/nntrainer/tensor/tensor_v2.h index 62988edb02..ac5de9e671 100644 --- a/nntrainer/tensor/tensor_v2.h +++ b/nntrainer/tensor/tensor_v2.h @@ -245,6 +245,35 @@ class TensorV2 { */ bool operator!=(const TensorV2 &rhs) const { return !(*this == rhs); } + /** + * @brief Construct a new Tensor object from a buffer + * This will not copy buffer to a new tensor but directly uses it + * + * @param[in] buf buffer + * @param[in] bytes buffer size in bytes + * @param[in] d tensor dim + * @param[in] offset offset to be used from current + * @return Tensor object + * @throws std::invalid_argument if buf is null + */ + template + static TensorV2 Map(T *buf, unsigned int bytes, const TensorDim &d, + size_t offset = 0) { + if (d.getDataLen() == 0 || buf == nullptr) { + throw std::invalid_argument( + "[Tensor::Map] empty tensor dim is not allowed"); + } + + if (d.getDataLen() * sizeof(T) + offset > bytes) { + throw std::invalid_argument( + "Creating shared tensor of size bigger than tensor memory."); + } + + TensorV2 output; + output.setTensorVar(d, buf, offset); + return output; + }; + /** * @brief Allocate memory for this tensor */ @@ -978,6 +1007,17 @@ class TensorV2 { */ void putData() const; + /** + * @brief return Data pointer of Tensor + * @retval template T pointer (float pointer as default) + */ + const std::shared_ptr getMemoryData() const; + + /** + * @brief return offset + */ + size_t getOffset() const; + /** * @brief Copy the Tensor * @param[in] from Tensor to be copied @@ -1017,6 +1057,30 @@ class TensorV2 { */ TensorV2 clone() const; + /** + * @brief Save the Tensor into file + * @param[in] file output file stream + */ + void save(std::ostream &file); + + /** + * @brief Read the Tensor from file + * @param[in] file input file stream + */ + void read(std::ifstream &file); + + /** + * @brief return argument index which value is max by batch + * @retval unsigned int argument indices + */ + std::vector argmax() const; + + /** + * @brief return max of the absolute values of the tensor + * @retval maximum absolute value + */ + float max_abs() const; + /** * @brief Transpose Tensor * @param direction to transpose ex) 0:2:1 @@ -1070,6 +1134,28 @@ class TensorV2 { */ Tdatatype getDataType() const; + /** + * @brief update batch size for this tensor + * @param batch size + * @note The batchsize of src_tensor need not be related with this + * tensor's batch size + * + * @note The memory for this tensor will re-allocated/re-assigned if the + * updated batch size is different than the current batch size. + * + * @note If this tensor is/was the src_tensor for some other, then + * reduction in batch size can make the dependent tensors allocate fail due to + * memory smaller. Caller must handle this in their own end. + * + * @note If this tensor is re-allocated, then the memory might not be + * immediately freed as the tensor already depending on this tensor also + * share the same memory. So, the peak memory consumption in worst case can + * reach the total memory requirements of a model with old batchsize and the + * new batch size. It is recommended to first deallocate all the tensors, + * updateBatch and then allocate again to avoid such issues. + */ + void updateBatch(unsigned int batch); + /** * @brief return whether tensor is contiguous or not. * @retval bool contiguous @@ -1197,8 +1283,22 @@ class TensorV2 { private: std::shared_ptr itensor; + + /** + * @brief Set tensor variables + * + * @param[in] d TensorDim + * @param[in] buf buffer + * @param[in] offset offset to be used + */ + void setTensorVar(TensorDim d, void *buf, size_t offset); }; +/** + * @brief Overriding output stream + */ +std::ostream &operator<<(std::ostream &out, TensorV2 const &input); + } // namespace nntrainer #endif /* __cplusplus */