diff --git a/nntrainer/tensor/float_tensor.cpp b/nntrainer/tensor/float_tensor.cpp index 467c2e36f9..b0be11fbeb 100644 --- a/nntrainer/tensor/float_tensor.cpp +++ b/nntrainer/tensor/float_tensor.cpp @@ -783,6 +783,28 @@ void FloatTensor::copyData(const TensorV2 &from) { } } +std::vector FloatTensor::argmax() const { + std::vector result; + const float *data = (float *)getData(); + size_t batch_size = batch(); + size_t feature_len = dim.getFeatureLen(); + + result.resize(batch_size); + + for (unsigned int b = 0; b < batch_size; b++) { + auto max_iter = + std::max_element(data + b * feature_len, data + (b + 1) * feature_len); + result[b] = std::distance(data, max_iter) - (b * feature_len); + } + return result; +} + +float FloatTensor::max_abs() const { + const float *data = (float *)getData(); + unsigned int idx = isamax(size(), data, 1); + return *(data + idx); +} + TensorV2 &FloatTensor::transpose(const std::string &direction, TensorV2 &output) const { unsigned int SL, SI, SJ, SK; diff --git a/nntrainer/tensor/float_tensor.h b/nntrainer/tensor/float_tensor.h index c8f239ef26..31e1f52a32 100644 --- a/nntrainer/tensor/float_tensor.h +++ b/nntrainer/tensor/float_tensor.h @@ -312,6 +312,16 @@ class FloatTensor : public TensorBase { */ void copyData(const TensorV2 &from); + /** + * @copydoc TensorV2::argmax() + */ + std::vector argmax() const override; + + /** + * @copydoc TensorV2::max_abs() + */ + float max_abs() const override; + /** * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out) */ diff --git a/nntrainer/tensor/half_tensor.cpp b/nntrainer/tensor/half_tensor.cpp index 5902c8b6ac..30f339a802 100644 --- a/nntrainer/tensor/half_tensor.cpp +++ b/nntrainer/tensor/half_tensor.cpp @@ -858,6 +858,29 @@ void HalfTensor::copyData(const TensorV2 &from) { } } +std::vector HalfTensor::argmax() const { + std::vector result; + const _FP16 *data = (_FP16 *)getData(); + size_t batch_size = batch(); + size_t feature_len = dim.getFeatureLen(); + + result.resize(batch_size); + + for (unsigned int b = 0; b < batch_size; b++) { + auto max_iter = + std::max_element(data + b * feature_len, data + (b + 1) * feature_len); + result[b] = std::distance(data, max_iter) - (b * feature_len); + } + + return result; +} + +float HalfTensor::max_abs() const { + const _FP16 *data = (_FP16 *)getData(); + unsigned int idx = isamax(size(), data, 1); + return (float)(*(data + idx)); +} + TensorV2 &HalfTensor::transpose(const std::string &direction, TensorV2 &output) const { unsigned int SL, SI, SJ, SK; diff --git a/nntrainer/tensor/half_tensor.h b/nntrainer/tensor/half_tensor.h index a905ba4bb8..6590e2198c 100644 --- a/nntrainer/tensor/half_tensor.h +++ b/nntrainer/tensor/half_tensor.h @@ -311,6 +311,16 @@ class HalfTensor : public TensorBase { */ void copyData(const TensorV2 &from); + /** + * @copydoc TensorV2::argmax() + */ + std::vector argmax() const override; + + /** + * @copydoc TensorV2::max_abs() + */ + float max_abs() const override; + /** * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out) */ diff --git a/nntrainer/tensor/tensor_base.cpp b/nntrainer/tensor/tensor_base.cpp index e94c35a27f..d2aacde8e3 100644 --- a/nntrainer/tensor/tensor_base.cpp +++ b/nntrainer/tensor/tensor_base.cpp @@ -40,6 +40,15 @@ bool TensorBase::operator==(const TensorBase &rhs) const { return true; } +void TensorBase::setTensorVar(TensorDim d, void *buf, size_t offset) { + dim = d; + strides = d.computeStrides(); + /// Tensor does not own the memory + data = std::shared_ptr(new MemoryData((void *)buf), + std::default_delete()); + offset = offset; +} + void TensorBase::putData() const { if (!data) return; @@ -47,6 +56,12 @@ void TensorBase::putData() const { data->invalidate(); } +const std::shared_ptr TensorBase::getMemoryData() const { + return data; +} + +size_t TensorBase::getOffset() const { return offset; } + void TensorBase::reshape(const TensorDim &d) { NNTR_THROW_IF(!contiguous, std::invalid_argument) << getName() << " is not contiguous, cannot reshape."; @@ -64,6 +79,16 @@ void TensorBase::reshape(const TensorDim &d) { strides = d.computeStrides(); } +void TensorBase::updateBatch(unsigned int batch) { + if (dim.batch() == batch) { + return; + } + + if (isAllocated()) + throw std::invalid_argument("Cannot update batch for an allocated tensor"); + dim.batch(batch); +} + size_t TensorBase::getIndex(unsigned int b, unsigned int c, unsigned int h, unsigned int w) const noexcept { if (getFormat() == Tformat::NCHW) { diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h index 28484ed9e9..894f9fd8bd 100644 --- a/nntrainer/tensor/tensor_base.h +++ b/nntrainer/tensor/tensor_base.h @@ -128,6 +128,11 @@ class TensorBase { */ bool operator!=(const TensorBase &rhs) const { return !(*this == rhs); } + /** + * @copydoc TensorV2::setTensorVar(TensorDim d, void *buf, size_t offset) + */ + void setTensorVar(TensorDim d, void *buf, size_t offset); + /** * @brief Basic Destructor */ @@ -364,6 +369,16 @@ class TensorBase { */ virtual void copyData(const TensorV2 &from) = 0; + /** + * @copydoc TensorV2::argmax() + */ + virtual std::vector argmax() const = 0; + + /** + * @copydoc TensorV2::max_abs() + */ + virtual float max_abs() const = 0; + /** * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out) */ @@ -376,6 +391,17 @@ class TensorBase { */ void putData() const; + /** + * @brief return Data pointer of Tensor + * @retval template T pointer (float pointer as default) + */ + const std::shared_ptr getMemoryData() const; + + /** + * @brief return offset + */ + size_t getOffset() const; + /** * @brief set Tensor Dim * @param[in] d TensorDim @@ -412,6 +438,12 @@ class TensorBase { */ Tdatatype getDataType() const { return dim.getDataType(); } + /** + * @brief update batch size for this tensor + * @param batch size + */ + void updateBatch(unsigned int batch); + /** * @brief return whether tensor is contiguous or not. * @retval bool contiguous diff --git a/nntrainer/tensor/tensor_v2.cpp b/nntrainer/tensor/tensor_v2.cpp index d37c6b2371..203bac455b 100644 --- a/nntrainer/tensor/tensor_v2.cpp +++ b/nntrainer/tensor/tensor_v2.cpp @@ -673,6 +673,12 @@ void TensorV2::print(std::ostream &out) const { itensor->print(out); } void TensorV2::putData() const { itensor->putData(); } +const std::shared_ptr TensorV2::getMemoryData() const { + return itensor->getMemoryData(); +} + +size_t TensorV2::getOffset() const { return itensor->getOffset(); } + void TensorV2::copy(const TensorV2 &from) { /// @todo enable copy to non-contiguous tensor if (!itensor->getContiguous()) { @@ -744,6 +750,45 @@ TensorV2 TensorV2::clone() const { return output; } +void TensorV2::save(std::ostream &file) { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot save."; + + std::streamsize sz = static_cast(bytes()); + NNTR_THROW_IF(sz < 0, std::invalid_argument) + << "save size: " << bytes() + << " is too big. It cannot be represented by std::streamsize"; + + checkedWrite(file, getData(), sz, "[Tensor::save] operation failed"); + putData(); +} + +void TensorV2::read(std::ifstream &file) { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot read."; + + std::streamsize sz = static_cast(bytes()); + + NNTR_THROW_IF(sz < 0, std::invalid_argument) + << "read size: " << bytes() + << " is too big. It cannot be represented by std::streamsize"; + + checkedRead(file, getData(), sz, "[Tensor::read] operation failed"); + putData(); +} + +std::vector TensorV2::argmax() const { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot get argmax."; + return itensor->argmax(); +} + +float TensorV2::max_abs() const { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot get max_abs."; + return itensor->max_abs(); +} + TensorV2 TensorV2::transpose(const std::string &direction) const { TensorV2 output(getDim()); transpose(direction, output); @@ -781,6 +826,8 @@ TensorDim::Format TensorV2::getFormat() const { return itensor->getFormat(); } Tdatatype TensorV2::getDataType() const { return itensor->getDataType(); } +void TensorV2::updateBatch(unsigned int batch) { itensor->updateBatch(batch); } + const bool TensorV2::getContiguous() const noexcept { return itensor->getContiguous(); } @@ -857,4 +904,13 @@ TensorV2 TensorV2::getSharedDataTensor(const TensorDim dim_, size_t offset, return ret; } +void TensorV2::setTensorVar(TensorDim d, void *buf, size_t offset) { + itensor->setTensorVar(d, buf, offset); +} + +std::ostream &operator<<(std::ostream &out, TensorV2 const &input) { + input.print(out); + return out; +} + } // namespace nntrainer diff --git a/nntrainer/tensor/tensor_v2.h b/nntrainer/tensor/tensor_v2.h index ba0bf36a4d..683fd234eb 100644 --- a/nntrainer/tensor/tensor_v2.h +++ b/nntrainer/tensor/tensor_v2.h @@ -245,6 +245,35 @@ class TensorV2 { */ bool operator!=(const TensorV2 &rhs) const { return !(*this == rhs); } + /** + * @brief Construct a new Tensor object from a buffer + * This will not copy buffer to a new tensor but directly uses it + * + * @param[in] buf buffer + * @param[in] bytes buffer size in bytes + * @param[in] d tensor dim + * @param[in] offset offset to be used from current + * @return Tensor object + * @throws std::invalid_argument if buf is null + */ + template + static TensorV2 Map(T *buf, unsigned int bytes, const TensorDim &d, + size_t offset = 0) { + if (d.getDataLen() == 0 || buf == nullptr) { + throw std::invalid_argument( + "[Tensor::Map] empty tensor dim is not allowed"); + } + + if (d.getDataLen() * sizeof(T) + offset > bytes) { + throw std::invalid_argument( + "Creating shared tensor of size bigger than tensor memory."); + } + + TensorV2 output; + output.setTensorVar(d, buf, offset); + return output; + }; + /** * @brief Allocate memory for this tensor */ @@ -1022,6 +1051,17 @@ class TensorV2 { */ void putData() const; + /** + * @brief return Data pointer of Tensor + * @retval template T pointer (float pointer as default) + */ + const std::shared_ptr getMemoryData() const; + + /** + * @brief return offset + */ + size_t getOffset() const; + /** * @brief Copy the Tensor * @param[in] from Tensor to be copied @@ -1061,6 +1101,30 @@ class TensorV2 { */ TensorV2 clone() const; + /** + * @brief Save the Tensor into file + * @param[in] file output file stream + */ + void save(std::ostream &file); + + /** + * @brief Read the Tensor from file + * @param[in] file input file stream + */ + void read(std::ifstream &file); + + /** + * @brief return argument index which value is max by batch + * @retval unsigned int argument indices + */ + std::vector argmax() const; + + /** + * @brief return max of the absolute values of the tensor + * @retval maximum absolute value + */ + float max_abs() const; + /** * @brief Transpose Tensor * @param direction to transpose ex) 0:2:1 @@ -1114,6 +1178,28 @@ class TensorV2 { */ Tdatatype getDataType() const; + /** + * @brief update batch size for this tensor + * @param batch size + * @note The batchsize of src_tensor need not be related with this + * tensor's batch size + * + * @note The memory for this tensor will re-allocated/re-assigned if the + * updated batch size is different than the current batch size. + * + * @note If this tensor is/was the src_tensor for some other, then + * reduction in batch size can make the dependent tensors allocate fail due to + * memory smaller. Caller must handle this in their own end. + * + * @note If this tensor is re-allocated, then the memory might not be + * immediately freed as the tensor already depending on this tensor also + * share the same memory. So, the peak memory consumption in worst case can + * reach the total memory requirements of a model with old batchsize and the + * new batch size. It is recommended to first deallocate all the tensors, + * updateBatch and then allocate again to avoid such issues. + */ + void updateBatch(unsigned int batch); + /** * @brief return whether tensor is contiguous or not. * @retval bool contiguous @@ -1241,8 +1327,22 @@ class TensorV2 { private: std::shared_ptr itensor; + + /** + * @brief Set tensor variables + * + * @param[in] d TensorDim + * @param[in] buf buffer + * @param[in] offset offset to be used + */ + void setTensorVar(TensorDim d, void *buf, size_t offset); }; +/** + * @brief Overriding output stream + */ +std::ostream &operator<<(std::ostream &out, TensorV2 const &input); + } // namespace nntrainer #endif /* __cplusplus */