Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Wait for #2724][Layer] add "mul layer" #2725

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions api/ccapi/include/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ namespace train {
enum LayerType {
LAYER_IN = ML_TRAIN_LAYER_TYPE_INPUT, /**< Input Layer type */
LAYER_WEIGHT = ML_TRAIN_LAYER_TYPE_WEIGHT, /**< Weight Layer type */
LAYER_ADD = ML_TRAIN_LAYER_TYPE_ADD, /**< Add Layer type */
LAYER_SUB = ML_TRAIN_LAYER_TYPE_SUB, /**< Subtract Layer type */
LAYER_MUL = ML_TRAIN_LAYER_TYPE_MUL, /**< Multiply Layer type */
LAYER_FC = ML_TRAIN_LAYER_TYPE_FC, /**< Fully Connected Layer type */
LAYER_SWIGLU = ML_TRAIN_LAYER_TYPE_SWIGLU, /**< Swiglu Layer type */
LAYER_BN = ML_TRAIN_LAYER_TYPE_BN, /**< Batch Normalization Layer type */
Expand Down Expand Up @@ -299,6 +302,30 @@ WeightLayer(const std::vector<std::string> &properties = {}) {
return createLayer(LayerType::LAYER_WEIGHT, properties);
}

/**
* @brief Helper function to create add layer
*/
inline std::unique_ptr<Layer>
AddLayer(const std::vector<std::string> &properties = {}) {
return createLayer(LayerType::LAYER_ADD, properties);
}

/**
* @brief Helper function to create sub layer
*/
inline std::unique_ptr<Layer>
SubLayer(const std::vector<std::string> &properties = {}) {
return createLayer(LayerType::LAYER_SUB, properties);
}

/**
* @brief Helper function to create mul layer
*/
inline std::unique_ptr<Layer>
MulLayer(const std::vector<std::string> &properties = {}) {
return createLayer(LayerType::LAYER_MUL, properties);
}

/**
* @brief Helper function to create fully connected layer
*/
Expand Down
3 changes: 3 additions & 0 deletions api/nntrainer-api-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ typedef enum {
ML_TRAIN_LAYER_TYPE_IDENTITY = 29, /**< Identity Layer type (Since 8.0) */
ML_TRAIN_LAYER_TYPE_SWIGLU = 30, /**< Swiglu Layer type */
ML_TRAIN_LAYER_TYPE_WEIGHT = 31, /**< Weight Layer type (Since 9.0)*/
ML_TRAIN_LAYER_TYPE_ADD = 32, /**< Add Layer type (Since 9.0)*/
ML_TRAIN_LAYER_TYPE_SUB = 33, /**< Sub Layer type (Since 9.0)*/
ML_TRAIN_LAYER_TYPE_MUL = 34, /**< Mul Layer type (Since 9.0)*/
ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP =
300, /**< Preprocess flip Layer (Since 6.5) */
ML_TRAIN_LAYER_TYPE_PREPROCESS_TRANSLATE =
Expand Down
9 changes: 9 additions & 0 deletions nntrainer/app_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <sgd.h>

#include <activation_layer.h>
#include <add_layer.h>
#include <addition_layer.h>
#include <attention_layer.h>
#include <bn_layer.h>
Expand All @@ -57,6 +58,7 @@
#include <lstmcell.h>
#include <mol_attention_layer.h>
#include <mse_loss_layer.h>
#include <mul_layer.h>
#include <multi_head_attention_layer.h>
#include <multiout_layer.h>
#include <nntrainer_error.h>
Expand All @@ -72,6 +74,7 @@
#include <rnn.h>
#include <rnncell.h>
#include <split_layer.h>
#include <sub_layer.h>
#include <time_dist.h>
#include <upsample2d_layer.h>
#include <weight_layer.h>
Expand Down Expand Up @@ -248,6 +251,12 @@ static void add_default_object(AppContext &ac) {
LayerType::LAYER_IN);
ac.registerFactory(nntrainer::createLayer<WeightLayer>, WeightLayer::type,
LayerType::LAYER_WEIGHT);
ac.registerFactory(nntrainer::createLayer<AddLayer>, AddLayer::type,
LayerType::LAYER_ADD);
ac.registerFactory(nntrainer::createLayer<SubLayer>, SubLayer::type,
LayerType::LAYER_SUB);
ac.registerFactory(nntrainer::createLayer<MulLayer>, MulLayer::type,
LayerType::LAYER_MUL);
ac.registerFactory(nntrainer::createLayer<FullyConnectedLayer>,
FullyConnectedLayer::type, LayerType::LAYER_FC);
ac.registerFactory(nntrainer::createLayer<BatchNormalizationLayer>,
Expand Down
94 changes: 94 additions & 0 deletions nntrainer/layers/add_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 SeungBaek Hong <[email protected]>
*
* @file add_layer.cpp
* @date 2 August 2024
* @see https://github.com/nnstreamer/nntrainer
* @author SeungBaek Hong <[email protected]>
* @bug No known bugs except for NYI items
* @brief This is add layer class (operation layer)
*
*/

#include <add_layer.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <node_exporter.h>
#include <util_func.h>

#include <layer_context.h>

namespace nntrainer {

static constexpr size_t SINGLE_INOUT_IDX = 0;

void AddLayer::finalize(InitLayerContext &context) {
context.setOutputDimensions({context.getInputDimensions()[0]});
}

void AddLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);

const Tensor &input0 = context.getInput(0);
const Tensor &input1 = context.getInput(1);

input0.add(input1, hidden_);
}

void AddLayer::incremental_forwarding(RunLayerContext &context,
unsigned int from, unsigned int to,
bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
TensorDim hidden_dim = hidden_.getDim();
TensorDim hidden_step_dim = hidden_dim;

if (from) {
NNTR_THROW_IF(to - from != 1, std::invalid_argument)
<< "incremental step size is not 1";
from = 0;
to = 1;
}

hidden_step_dim.batch(1);
hidden_step_dim.height(to - from);

for (unsigned int b = 0; b < hidden_.batch(); ++b) {
Tensor hidden_step = hidden_.getSharedDataTensor(
hidden_step_dim, b * hidden_dim.getFeatureLen(), true);

const Tensor &input0 = context.getInput(0);
const Tensor &input1 = context.getInput(1);

TensorDim input_dim = input0.getDim();
TensorDim input_step_dim = input_dim;
input_step_dim.batch(1);
input_step_dim.height(to - from);

Tensor input0_step = input0.getSharedDataTensor(
input_step_dim, b * input_dim.getFeatureLen(), true);

Tensor input1_step = input1.getSharedDataTensor(
input_step_dim, b * input_dim.getFeatureLen(), true);

input0_step.add(input1_step, hidden_step);
}
}

void AddLayer::calcDerivative(RunLayerContext &context) {
context.getOutgoingDerivative(0).copy(
context.getIncomingDerivative(SINGLE_INOUT_IDX));

context.getOutgoingDerivative(1).copy(
context.getIncomingDerivative(SINGLE_INOUT_IDX));
}

void AddLayer::setProperty(const std::vector<std::string> &values) {
auto remain_props = loadProperties(values, add_props);
if (!remain_props.empty()) {
std::string msg = "[AddLayer] Unknown Layer Properties count " +
std::to_string(values.size());
throw exception::not_supported(msg);
}
}
} /* namespace nntrainer */
103 changes: 103 additions & 0 deletions nntrainer/layers/add_layer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 SeungBaek Hong <[email protected]>
*
* @file add_layer.h
* @date 2 August 2024
* @see https://github.com/nnstreamer/nntrainer
* @author SeungBaek Hong <[email protected]>
* @bug No known bugs except for NYI items
* @brief This is add layer class (operation layer)
*
*/

#ifndef __ADD_LAYER_H__
#define __ADD_LAYER_H__
#ifdef __cplusplus

#include <common_properties.h>
#include <layer_devel.h>

namespace nntrainer {

/**
* @class Add Layer
* @brief Add Layer
*/
class AddLayer : public Layer {
public:
/**
* @brief Constructor of Add Layer
*/
AddLayer() : Layer(), add_props(props::Print()) {}

/**
* @brief Destructor of Add Layer
*/
~AddLayer(){};

/**
* @brief Move constructor of Add Layer.
* @param[in] AddLayer &&
*/
AddLayer(AddLayer &&rhs) noexcept = default;

/**
* @brief Move assignment operator.
* @parma[in] rhs AddLayer to be moved.
*/
AddLayer &operator=(AddLayer &&rhs) = default;

/**
* @copydoc Layer::finalize(InitLayerContext &context)
*/
void finalize(InitLayerContext &context) override;

/**
* @copydoc Layer::forwarding(RunLayerContext &context, bool training)
*/
void forwarding(RunLayerContext &context, bool training) override;

/**
* @copydoc Layer::incremental_forwarding(RunLayerContext &context, unsigned
* int from, unsigned int to, bool training)
*/
void incremental_forwarding(RunLayerContext &context, unsigned int from,
unsigned int to, bool training) override;

/**
* @copydoc Layer::calcDerivative(RunLayerContext &context)
*/
void calcDerivative(RunLayerContext &context) override;

/**
* @copydoc bool supportBackwarding() const
*/
bool supportBackwarding() const override { return true; };

/**
* @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
* method)
*/
void exportTo(Exporter &exporter,
const ml::train::ExportMethods &method) const override {}

/**
* @copydoc Layer::setProperty(const std::vector<std::string> &values)
*/
void setProperty(const std::vector<std::string> &values) override;

/**
* @copydoc Layer::getType()
*/
const std::string getType() const override { return AddLayer::type; };

std::tuple<props::Print> add_props;

inline static const std::string type = "add";
};

} // namespace nntrainer

#endif /* __cplusplus */
#endif /* __ADD_LAYER_H__ */
3 changes: 3 additions & 0 deletions nntrainer/layers/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ nntrainer_inc_abs += meson.current_source_dir() / 'loss'
layer_sources = [
'activation_layer.cpp',
'weight_layer.cpp',
'add_layer.cpp',
'sub_layer.cpp',
'mul_layer.cpp',
'addition_layer.cpp',
'attention_layer.cpp',
'mol_attention_layer.cpp',
Expand Down
96 changes: 96 additions & 0 deletions nntrainer/layers/mul_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 SeungBaek Hong <[email protected]>
*
* @file mul_layer.cpp
* @date 30 August 2024
* @see https://github.com/nnstreamer/nntrainer
* @author SeungBaek Hong <[email protected]>
* @bug No known bugs except for NYI items
* @brief This is mul layer class (operation layer)
*
*/

#include <mul_layer.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <node_exporter.h>
#include <util_func.h>

#include <layer_context.h>

namespace nntrainer {

static constexpr size_t SINGLE_INOUT_IDX = 0;

void MulLayer::finalize(InitLayerContext &context) {
context.setOutputDimensions({context.getInputDimensions()[0]});
}

void MulLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);

const Tensor &input0 = context.getInput(0);
const Tensor &input1 = context.getInput(1);

input0.multiply(input1, hidden_);
}

void MulLayer::incremental_forwarding(RunLayerContext &context,
unsigned int from, unsigned int to,
bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
TensorDim hidden_dim = hidden_.getDim();
TensorDim hidden_step_dim = hidden_dim;

if (from) {
NNTR_THROW_IF(to - from != 1, std::invalid_argument)
<< "incremental step size is not 1";
from = 0;
to = 1;
}

hidden_step_dim.batch(1);
hidden_step_dim.height(to - from);

for (unsigned int b = 0; b < hidden_.batch(); ++b) {
Tensor hidden_step = hidden_.getSharedDataTensor(
hidden_step_dim, b * hidden_dim.getFeatureLen(), true);

const Tensor &input0 = context.getInput(0);
const Tensor &input1 = context.getInput(1);

TensorDim input_dim = input0.getDim();
TensorDim input_step_dim = input_dim;
input_step_dim.batch(1);
input_step_dim.height(to - from);

Tensor input0_step = input0.getSharedDataTensor(
input_step_dim, b * input_dim.getFeatureLen(), true);

Tensor input1_step = input1.getSharedDataTensor(
input_step_dim, b * input_dim.getFeatureLen(), true);

input0_step.multiply(input1_step, hidden_step);
}
}

void MulLayer::calcDerivative(RunLayerContext &context) {
context.getOutgoingDerivative(0).copy(
context.getIncomingDerivative(SINGLE_INOUT_IDX)
.multiply(context.getInput(1)));

context.getOutgoingDerivative(1).copy(
context.getIncomingDerivative(SINGLE_INOUT_IDX)
.multiply(context.getInput(0)));
}

void MulLayer::setProperty(const std::vector<std::string> &values) {
auto remain_props = loadProperties(values, mul_props);
if (!remain_props.empty()) {
std::string msg = "[MulLayer] Unknown Layer Properties count " +
std::to_string(values.size());
throw exception::not_supported(msg);
}
}
} /* namespace nntrainer */
Loading
Loading