From 474c7cd94c4d51224b6d16fdc0f58937068dc869 Mon Sep 17 00:00:00 2001 From: Weiyan Date: Mon, 24 Jul 2023 17:08:12 -0700 Subject: [PATCH] Gelu and leaky relu ops for mps (#245) * gelu backward * add leaky relu * format --- lib/nnc/cmd/ccv_nnc_cmd.inc | 1264 +++++++++-------- lib/nnc/cmd/gelu/mps/ccv_nnc_gelu_mps.m | 104 ++ .../leaky_relu/mps/ccv_nnc_leaky_relu_mps.m | 52 + test/int/nnc/gelu.tests.c | 132 ++ test/int/nnc/mpsdnn.tests.c | 60 + 5 files changed, 982 insertions(+), 630 deletions(-) diff --git a/lib/nnc/cmd/ccv_nnc_cmd.inc b/lib/nnc/cmd/ccv_nnc_cmd.inc index c3e83d4bb..b1528ba94 100644 --- a/lib/nnc/cmd/ccv_nnc_cmd.inc +++ b/lib/nnc/cmd/ccv_nnc_cmd.inc @@ -1,132 +1,132 @@ static ccv_nnc_cmd_init_t init_map[] = { - {.name = "CCV_NNC_TRANSPOSE_FORWARD", .cmd = 0xb4d506e0}, - {.name = "CCV_NNC_TRANSPOSE_BACKWARD", .cmd = 0xb4d506e1}, - {.name = "CCV_NNC_ARGMIN_FORWARD", .cmd = 0xeb8747f2}, - {.name = "CCV_NNC_ARGMIN_BACKWARD", .cmd = 0xeb8747f3}, - {.name = "CCV_NNC_LAYER_NORM_FORWARD", .cmd = 0xbed3c264}, - {.name = "CCV_NNC_LAYER_NORM_BACKWARD", .cmd = 0xbed3c265}, - {.name = "CCV_NNC_UPSAMPLE_FORWARD", .cmd = 0x73875556}, - {.name = "CCV_NNC_UPSAMPLE_BACKWARD", .cmd = 0x73875557}, + {.name = "CCV_NNC_RMSPROP_FORWARD", .cmd = 0x9c886b1c}, + {.name = "CCV_NNC_RMSPROP_BACKWARD", .cmd = 0x9c886b1d}, + {.name = "CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD", .cmd = 0x1eb327a2}, + {.name = "CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD", .cmd = 0x1eb327a3}, + {.name = "CCV_NNC_RANDOM_UNIFORM_FORWARD", .cmd = 0xa0cd1d5e}, + {.name = "CCV_NNC_RANDOM_UNIFORM_BACKWARD", .cmd = 0xa0cd1d5f}, + {.name = "CCV_NNC_EWEXP_FORWARD", .cmd = 0xd784b170}, + {.name = "CCV_NNC_EWEXP_BACKWARD", .cmd = 0xd784b171}, + {.name = "CCV_NNC_NMS_FORWARD", .cmd = 0xdba26106}, + {.name = "CCV_NNC_NMS_BACKWARD", .cmd = 0xdba26107}, {.name = "CCV_NNC_CONVOLUTION_FORWARD", .cmd = 0x254d05f4}, {.name = "CCV_NNC_CONVOLUTION_BACKWARD", .cmd = 0x254d05f5}, - {.name = "CCV_NNC_MAX_FORWARD", .cmd = 0xdf6f014c}, - {.name = "CCV_NNC_MAX_BACKWARD", .cmd = 0xdf6f014d}, - {.name = "CCV_NNC_GROUP_NORM_FORWARD", .cmd = 0x17deb074}, - {.name = "CCV_NNC_GROUP_NORM_BACKWARD", .cmd = 0x17deb075}, - {.name = "CCV_NNC_ARGMAX_FORWARD", .cmd = 0x68af2804}, - {.name = "CCV_NNC_ARGMAX_BACKWARD", .cmd = 0x68af2805}, + {.name = "CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD", .cmd = 0xc26b7b5e}, + {.name = "CCV_NNC_SOFTMAX_CROSSENTROPY_BACKWARD", .cmd = 0xc26b7b5f}, + {.name = "CCV_NNC_COMM_ALLREDUCE_FORWARD", .cmd = 0x75c8d340}, + {.name = "CCV_NNC_COMM_ALLREDUCE_BACKWARD", .cmd = 0x75c8d341}, + {.name = "CCV_NNC_REDUCE_SUM_FORWARD", .cmd = 0x52970f06}, + {.name = "CCV_NNC_REDUCE_SUM_BACKWARD", .cmd = 0x52970f07}, + {.name = "CCV_NNC_DATATYPE_CONVERSION_FORWARD", .cmd = 0xd873e38c}, + {.name = "CCV_NNC_DATATYPE_CONVERSION_BACKWARD", .cmd = 0xd873e38d}, + {.name = "CCV_NNC_BATCH_NORM_FORWARD", .cmd = 0x5419819c}, + {.name = "CCV_NNC_BATCH_NORM_BACKWARD", .cmd = 0x5419819d}, {.name = "CCV_NNC_LAMB_FORWARD", .cmd = 0x450edb1a}, {.name = "CCV_NNC_LAMB_BACKWARD", .cmd = 0x450edb1b}, + {.name = "CCV_NNC_REDUCE_MAX_FORWARD", .cmd = 0x80f1a506}, + {.name = "CCV_NNC_REDUCE_MAX_BACKWARD", .cmd = 0x80f1a507}, + {.name = "CCV_NNC_ADD_FORWARD", .cmd = 0x58fb3664}, + {.name = "CCV_NNC_ADD_BACKWARD", .cmd = 0x58fb3665}, + {.name = "CCV_NNC_SGD_FORWARD", .cmd = 0xe650ad26}, + {.name = "CCV_NNC_SGD_BACKWARD", .cmd = 0xe650ad27}, {.name = "CCV_NNC_LSTM_FORWARD", .cmd = 0xc5cb998c}, {.name = "CCV_NNC_LSTM_BACKWARD", .cmd = 0xc5cb998d}, - {.name = "CCV_NNC_GEMM_FORWARD", .cmd = 0x7e87d00c}, - {.name = "CCV_NNC_GEMM_BACKWARD", .cmd = 0x7e87d00d}, - {.name = "CCV_NNC_DROPOUT_FORWARD", .cmd = 0x7f2dc3e4}, - {.name = "CCV_NNC_DROPOUT_BACKWARD", .cmd = 0x7f2dc3e5}, - {.name = "CCV_NNC_AVERAGE_POOL_FORWARD", .cmd = 0x51267ab8}, - {.name = "CCV_NNC_AVERAGE_POOL_BACKWARD", .cmd = 0x51267ab9}, - {.name = "CCV_NNC_ADAMW_FORWARD", .cmd = 0x4f5d4870}, - {.name = "CCV_NNC_ADAMW_BACKWARD", .cmd = 0x4f5d4871}, - {.name = "CCV_NNC_REDUCE_NORM2_FORWARD", .cmd = 0xb3034e16}, - {.name = "CCV_NNC_REDUCE_NORM2_BACKWARD", .cmd = 0xb3034e17}, - {.name = "CCV_NNC_COMM_ALLREDUCE_FORWARD", .cmd = 0x75c8d340}, - {.name = "CCV_NNC_COMM_ALLREDUCE_BACKWARD", .cmd = 0x75c8d341}, - {.name = "CCV_NNC_EWEXP_FORWARD", .cmd = 0xd784b170}, - {.name = "CCV_NNC_EWEXP_BACKWARD", .cmd = 0xd784b171}, - {.name = "CCV_NNC_FORMAT_TRANSFORM_FORWARD", .cmd = 0xe4a2b192}, - {.name = "CCV_NNC_FORMAT_TRANSFORM_BACKWARD", .cmd = 0xe4a2b193}, - {.name = "CCV_NNC_SMOOTH_L1_FORWARD", .cmd = 0x4e428e}, - {.name = "CCV_NNC_SMOOTH_L1_BACKWARD", .cmd = 0x4e428f}, - {.name = "CCV_NNC_RMSPROP_FORWARD", .cmd = 0x9c886b1c}, - {.name = "CCV_NNC_RMSPROP_BACKWARD", .cmd = 0x9c886b1d}, - {.name = "CCV_NNC_MIN_FORWARD", .cmd = 0x972fbd26}, - {.name = "CCV_NNC_MIN_BACKWARD", .cmd = 0x972fbd27}, - {.name = "CCV_NNC_ROI_ALIGN_FORWARD", .cmd = 0xfef55168}, - {.name = "CCV_NNC_ROI_ALIGN_BACKWARD", .cmd = 0xfef55169}, - {.name = "CCV_NNC_ADAM_FORWARD", .cmd = 0xe30099dc}, - {.name = "CCV_NNC_ADAM_BACKWARD", .cmd = 0xe30099dd}, - {.name = "CCV_NNC_EWSQRT_FORWARD", .cmd = 0x8870a61e}, - {.name = "CCV_NNC_EWSQRT_BACKWARD", .cmd = 0x8870a61f}, - {.name = "CCV_NNC_SWISH_FORWARD", .cmd = 0x583d90c2}, - {.name = "CCV_NNC_SWISH_BACKWARD", .cmd = 0x583d90c3}, - {.name = "CCV_NNC_MASKED_FILL_FORWARD", .cmd = 0x7f992d84}, - {.name = "CCV_NNC_MASKED_FILL_BACKWARD", .cmd = 0x7f992d85}, - {.name = "CCV_NNC_NMS_FORWARD", .cmd = 0xdba26106}, - {.name = "CCV_NNC_NMS_BACKWARD", .cmd = 0xdba26107}, - {.name = "CCV_NNC_GELU_FORWARD", .cmd = 0xb1527ab8}, - {.name = "CCV_NNC_GELU_BACKWARD", .cmd = 0xb1527ab9}, - {.name = "CCV_NNC_CLAMP_FORWARD", .cmd = 0x2640d854}, - {.name = "CCV_NNC_CLAMP_BACKWARD", .cmd = 0x2640d855}, + {.name = "CCV_NNC_MSE_FORWARD", .cmd = 0x6904a9a2}, + {.name = "CCV_NNC_MSE_BACKWARD", .cmd = 0x6904a9a3}, {.name = "CCV_NNC_SOFTMAX_FORWARD", .cmd = 0xc969a252}, {.name = "CCV_NNC_SOFTMAX_BACKWARD", .cmd = 0xc969a253}, - {.name = "CCV_NNC_BINARY_CROSSENTROPY_FORWARD", .cmd = 0xcd2107ec}, - {.name = "CCV_NNC_BINARY_CROSSENTROPY_BACKWARD", .cmd = 0xcd2107ed}, {.name = "CCV_NNC_EWPROD_FORWARD", .cmd = 0xee07e8fe}, {.name = "CCV_NNC_EWPROD_BACKWARD", .cmd = 0xee07e8ff}, - {.name = "CCV_NNC_LEAKY_RELU_FORWARD", .cmd = 0x507144e0}, - {.name = "CCV_NNC_LEAKY_RELU_BACKWARD", .cmd = 0x507144e1}, - {.name = "CCV_NNC_TANH_FORWARD", .cmd = 0x6a62be30}, - {.name = "CCV_NNC_TANH_BACKWARD", .cmd = 0x6a62be31}, - {.name = "CCV_NNC_RANDOM_UNIFORM_FORWARD", .cmd = 0xa0cd1d5e}, - {.name = "CCV_NNC_RANDOM_UNIFORM_BACKWARD", .cmd = 0xa0cd1d5f}, - {.name = "CCV_NNC_MAX_POOL_FORWARD", .cmd = 0x7bec9360}, - {.name = "CCV_NNC_MAX_POOL_BACKWARD", .cmd = 0x7bec9361}, + {.name = "CCV_NNC_REDUCE_ISNAN_FORWARD", .cmd = 0xee0a4ade}, + {.name = "CCV_NNC_REDUCE_ISNAN_BACKWARD", .cmd = 0xee0a4adf}, + {.name = "CCV_NNC_COMM_REDUCE_FORWARD", .cmd = 0x3434ead8}, + {.name = "CCV_NNC_COMM_REDUCE_BACKWARD", .cmd = 0x3434ead9}, {.name = "CCV_NNC_SIGMOID_FORWARD", .cmd = 0xf2f69650}, {.name = "CCV_NNC_SIGMOID_BACKWARD", .cmd = 0xf2f69651}, - {.name = "CCV_NNC_ADD_FORWARD", .cmd = 0x58fb3664}, - {.name = "CCV_NNC_ADD_BACKWARD", .cmd = 0x58fb3665}, + {.name = "CCV_NNC_EWDIV_FORWARD", .cmd = 0x1cd2fa18}, + {.name = "CCV_NNC_EWDIV_BACKWARD", .cmd = 0x1cd2fa19}, + {.name = "CCV_NNC_TANH_FORWARD", .cmd = 0x6a62be30}, + {.name = "CCV_NNC_TANH_BACKWARD", .cmd = 0x6a62be31}, + {.name = "CCV_NNC_DATA_TRANSFER_FORWARD", .cmd = 0x12d21e1a}, + {.name = "CCV_NNC_DATA_TRANSFER_BACKWARD", .cmd = 0x12d21e1b}, + {.name = "CCV_NNC_CLAMP_FORWARD", .cmd = 0x2640d854}, + {.name = "CCV_NNC_CLAMP_BACKWARD", .cmd = 0x2640d855}, + {.name = "CCV_NNC_SET_FORWARD", .cmd = 0x2b070804}, + {.name = "CCV_NNC_SET_BACKWARD", .cmd = 0x2b070805}, + {.name = "CCV_NNC_MAX_FORWARD", .cmd = 0xdf6f014c}, + {.name = "CCV_NNC_MAX_BACKWARD", .cmd = 0xdf6f014d}, + {.name = "CCV_NNC_SWISH_FORWARD", .cmd = 0x583d90c2}, + {.name = "CCV_NNC_SWISH_BACKWARD", .cmd = 0x583d90c3}, + {.name = "CCV_NNC_FORMAT_TRANSFORM_FORWARD", .cmd = 0xe4a2b192}, + {.name = "CCV_NNC_FORMAT_TRANSFORM_BACKWARD", .cmd = 0xe4a2b193}, + {.name = "CCV_NNC_EWSQRT_FORWARD", .cmd = 0x8870a61e}, + {.name = "CCV_NNC_EWSQRT_BACKWARD", .cmd = 0x8870a61f}, + {.name = "CCV_NNC_TRANSPOSE_FORWARD", .cmd = 0xb4d506e0}, + {.name = "CCV_NNC_TRANSPOSE_BACKWARD", .cmd = 0xb4d506e1}, {.name = "CCV_NNC_COMPRESSION_LSSC_FORWARD", .cmd = 0x17ea8f72}, {.name = "CCV_NNC_COMPRESSION_LSSC_BACKWARD", .cmd = 0x17ea8f73}, - {.name = "CCV_NNC_EWSUM_FORWARD", .cmd = 0xe21a2c4c}, - {.name = "CCV_NNC_EWSUM_BACKWARD", .cmd = 0xe21a2c4d}, - {.name = "CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD", .cmd = 0xd9e0e4a}, - {.name = "CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD", .cmd = 0xd9e0e4b}, + {.name = "CCV_NNC_ROI_ALIGN_FORWARD", .cmd = 0xfef55168}, + {.name = "CCV_NNC_ROI_ALIGN_BACKWARD", .cmd = 0xfef55169}, + {.name = "CCV_NNC_MIN_FORWARD", .cmd = 0x972fbd26}, + {.name = "CCV_NNC_MIN_BACKWARD", .cmd = 0x972fbd27}, + {.name = "CCV_NNC_REDUCE_NORM2_FORWARD", .cmd = 0xb3034e16}, + {.name = "CCV_NNC_REDUCE_NORM2_BACKWARD", .cmd = 0xb3034e17}, + {.name = "CCV_NNC_DROPOUT_FORWARD", .cmd = 0x7f2dc3e4}, + {.name = "CCV_NNC_DROPOUT_BACKWARD", .cmd = 0x7f2dc3e5}, + {.name = "CCV_NNC_MASKED_FILL_FORWARD", .cmd = 0x7f992d84}, + {.name = "CCV_NNC_MASKED_FILL_BACKWARD", .cmd = 0x7f992d85}, + {.name = "CCV_NNC_SMOOTH_L1_FORWARD", .cmd = 0x4e428e}, + {.name = "CCV_NNC_SMOOTH_L1_BACKWARD", .cmd = 0x4e428f}, + {.name = "CCV_NNC_BINARY_CROSSENTROPY_FORWARD", .cmd = 0xcd2107ec}, + {.name = "CCV_NNC_BINARY_CROSSENTROPY_BACKWARD", .cmd = 0xcd2107ed}, + {.name = "CCV_NNC_UPSAMPLE_FORWARD", .cmd = 0x73875556}, + {.name = "CCV_NNC_UPSAMPLE_BACKWARD", .cmd = 0x73875557}, + {.name = "CCV_NNC_ADAM_FORWARD", .cmd = 0xe30099dc}, + {.name = "CCV_NNC_ADAM_BACKWARD", .cmd = 0xe30099dd}, {.name = "CCV_NNC_RELU_FORWARD", .cmd = 0xc51eaa80}, {.name = "CCV_NNC_RELU_BACKWARD", .cmd = 0xc51eaa81}, - {.name = "CCV_NNC_RANDOM_NORMAL_FORWARD", .cmd = 0x7062c8b4}, - {.name = "CCV_NNC_RANDOM_NORMAL_BACKWARD", .cmd = 0x7062c8b5}, - {.name = "CCV_NNC_REDUCE_MAX_FORWARD", .cmd = 0x80f1a506}, - {.name = "CCV_NNC_REDUCE_MAX_BACKWARD", .cmd = 0x80f1a507}, - {.name = "CCV_NNC_REDUCE_ISNAN_FORWARD", .cmd = 0xee0a4ade}, - {.name = "CCV_NNC_REDUCE_ISNAN_BACKWARD", .cmd = 0xee0a4adf}, - {.name = "CCV_NNC_EWLOG_FORWARD", .cmd = 0xf4191bf2}, - {.name = "CCV_NNC_EWLOG_BACKWARD", .cmd = 0xf4191bf3}, - {.name = "CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD", .cmd = 0x1eb327a2}, - {.name = "CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD", .cmd = 0x1eb327a3}, + {.name = "CCV_NNC_AVERAGE_POOL_FORWARD", .cmd = 0x51267ab8}, + {.name = "CCV_NNC_AVERAGE_POOL_BACKWARD", .cmd = 0x51267ab9}, + {.name = "CCV_NNC_REDUCE_MIN_FORWARD", .cmd = 0x6785ef96}, + {.name = "CCV_NNC_REDUCE_MIN_BACKWARD", .cmd = 0x6785ef97}, + {.name = "CCV_NNC_GELU_FORWARD", .cmd = 0xb1527ab8}, + {.name = "CCV_NNC_GELU_BACKWARD", .cmd = 0xb1527ab9}, {.name = "CCV_NNC_COMM_BROADCAST_FORWARD", .cmd = 0x830eee}, {.name = "CCV_NNC_COMM_BROADCAST_BACKWARD", .cmd = 0x830eef}, - {.name = "CCV_NNC_REDUCE_SUM_FORWARD", .cmd = 0x52970f06}, - {.name = "CCV_NNC_REDUCE_SUM_BACKWARD", .cmd = 0x52970f07}, - {.name = "CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD", .cmd = 0xc26b7b5e}, - {.name = "CCV_NNC_SOFTMAX_CROSSENTROPY_BACKWARD", .cmd = 0xc26b7b5f}, - {.name = "CCV_NNC_SET_FORWARD", .cmd = 0x2b070804}, - {.name = "CCV_NNC_SET_BACKWARD", .cmd = 0x2b070805}, {.name = "CCV_NNC_MUL_FORWARD", .cmd = 0x24721a46}, {.name = "CCV_NNC_MUL_BACKWARD", .cmd = 0x24721a47}, + {.name = "CCV_NNC_GROUP_NORM_FORWARD", .cmd = 0x17deb074}, + {.name = "CCV_NNC_GROUP_NORM_BACKWARD", .cmd = 0x17deb075}, + {.name = "CCV_NNC_ARGMAX_FORWARD", .cmd = 0x68af2804}, + {.name = "CCV_NNC_ARGMAX_BACKWARD", .cmd = 0x68af2805}, + {.name = "CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD", .cmd = 0xd9e0e4a}, + {.name = "CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD", .cmd = 0xd9e0e4b}, + {.name = "CCV_NNC_ARGMIN_FORWARD", .cmd = 0xeb8747f2}, + {.name = "CCV_NNC_ARGMIN_BACKWARD", .cmd = 0xeb8747f3}, + {.name = "CCV_NNC_REDUCE_MEAN_FORWARD", .cmd = 0xf23556c6}, + {.name = "CCV_NNC_REDUCE_MEAN_BACKWARD", .cmd = 0xf23556c7}, + {.name = "CCV_NNC_GEMM_FORWARD", .cmd = 0x7e87d00c}, + {.name = "CCV_NNC_GEMM_BACKWARD", .cmd = 0x7e87d00d}, + {.name = "CCV_NNC_RANDOM_NORMAL_FORWARD", .cmd = 0x7062c8b4}, + {.name = "CCV_NNC_RANDOM_NORMAL_BACKWARD", .cmd = 0x7062c8b5}, + {.name = "CCV_NNC_ADAMW_FORWARD", .cmd = 0x4f5d4870}, + {.name = "CCV_NNC_ADAMW_BACKWARD", .cmd = 0x4f5d4871}, + {.name = "CCV_NNC_LEAKY_RELU_FORWARD", .cmd = 0x507144e0}, + {.name = "CCV_NNC_LEAKY_RELU_BACKWARD", .cmd = 0x507144e1}, {.name = "CCV_NNC_SCALAR_MUL_FORWARD", .cmd = 0x8b4d86aa}, {.name = "CCV_NNC_SCALAR_MUL_BACKWARD", .cmd = 0x8b4d86ab}, - {.name = "CCV_NNC_BATCH_NORM_FORWARD", .cmd = 0x5419819c}, - {.name = "CCV_NNC_BATCH_NORM_BACKWARD", .cmd = 0x5419819d}, - {.name = "CCV_NNC_DATATYPE_CONVERSION_FORWARD", .cmd = 0xd873e38c}, - {.name = "CCV_NNC_DATATYPE_CONVERSION_BACKWARD", .cmd = 0xd873e38d}, + {.name = "CCV_NNC_MAX_POOL_FORWARD", .cmd = 0x7bec9360}, + {.name = "CCV_NNC_MAX_POOL_BACKWARD", .cmd = 0x7bec9361}, + {.name = "CCV_NNC_EWSUM_FORWARD", .cmd = 0xe21a2c4c}, + {.name = "CCV_NNC_EWSUM_BACKWARD", .cmd = 0xe21a2c4d}, + {.name = "CCV_NNC_LAYER_NORM_FORWARD", .cmd = 0xbed3c264}, + {.name = "CCV_NNC_LAYER_NORM_BACKWARD", .cmd = 0xbed3c265}, {.name = "CCV_NNC_HISTOGRAM_FORWARD", .cmd = 0xc5473e44}, {.name = "CCV_NNC_HISTOGRAM_BACKWARD", .cmd = 0xc5473e45}, - {.name = "CCV_NNC_REDUCE_MIN_FORWARD", .cmd = 0x6785ef96}, - {.name = "CCV_NNC_REDUCE_MIN_BACKWARD", .cmd = 0x6785ef97}, - {.name = "CCV_NNC_EWDIV_FORWARD", .cmd = 0x1cd2fa18}, - {.name = "CCV_NNC_EWDIV_BACKWARD", .cmd = 0x1cd2fa19}, - {.name = "CCV_NNC_DATA_TRANSFER_FORWARD", .cmd = 0x12d21e1a}, - {.name = "CCV_NNC_DATA_TRANSFER_BACKWARD", .cmd = 0x12d21e1b}, - {.name = "CCV_NNC_COMM_REDUCE_FORWARD", .cmd = 0x3434ead8}, - {.name = "CCV_NNC_COMM_REDUCE_BACKWARD", .cmd = 0x3434ead9}, - {.name = "CCV_NNC_MSE_FORWARD", .cmd = 0x6904a9a2}, - {.name = "CCV_NNC_MSE_BACKWARD", .cmd = 0x6904a9a3}, - {.name = "CCV_NNC_REDUCE_MEAN_FORWARD", .cmd = 0xf23556c6}, - {.name = "CCV_NNC_REDUCE_MEAN_BACKWARD", .cmd = 0xf23556c7}, - {.name = "CCV_NNC_SGD_FORWARD", .cmd = 0xe650ad26}, - {.name = "CCV_NNC_SGD_BACKWARD", .cmd = 0xe650ad27}, {.name = "CCV_NNC_INDEX_SELECT_FORWARD", .cmd = 0x7ee7771e}, {.name = "CCV_NNC_INDEX_SELECT_BACKWARD", .cmd = 0x7ee7771f}, + {.name = "CCV_NNC_EWLOG_FORWARD", .cmd = 0xf4191bf2}, + {.name = "CCV_NNC_EWLOG_BACKWARD", .cmd = 0xf4191bf3}, }; static ccv_nnc_cmd_backend_init_t backend_init_map[] = { @@ -141,29 +141,29 @@ static ccv_nnc_cmd_backend_init_t backend_init_map[] = { static inline int _ccv_nnc_cmd_ph(const uint32_t cmd) { - switch ((cmd >> 19) % 10) + switch ((cmd >> 8) % 10) { case 0: - return ((((cmd >> 9) % 6) + 58) << 1) | (cmd & 1); + return ((((cmd >> 7) % 39) + 25) << 1) | (cmd & 1); case 1: - return ((((cmd >> 6) % 13) + 45) << 1) | (cmd & 1); + return ((((cmd >> 1) % 13) + 51) << 1) | (cmd & 1); case 2: - return ((((cmd >> 1) % 20) + 41) << 1) | (cmd & 1); + return ((((cmd >> 19) % 43) + 13) << 1) | (cmd & 1); case 3: - return ((((cmd >> 16) % 31) + 3) << 1) | (cmd & 1); + return ((((cmd >> 1) % 17) + 0) << 1) | (cmd & 1); case 4: - return ((((cmd >> 1) % 15) + 22) << 1) | (cmd & 1); + return ((((cmd >> 1) % 48) + 15) << 1) | (cmd & 1); case 5: - return ((((cmd >> 1) % 13) + 28) << 1) | (cmd & 1); + return ((((cmd >> 1) % 22) + 25) << 1) | (cmd & 1); case 6: - return ((((cmd >> 1) % 56) + 0) << 1) | (cmd & 1); + return ((((cmd >> 1) % 17) + 43) << 1) | (cmd & 1); case 7: - return ((((cmd >> 1) % 38) + 15) << 1) | (cmd & 1); + return ((((cmd >> 2) % 26) + 11) << 1) | (cmd & 1); case 8: - return ((((cmd >> 2) % 26) + 1) << 1) | (cmd & 1); + return ((((cmd >> 1) % 19) + 10) << 1) | (cmd & 1); case 9: default: - return ((((cmd >> 1) % 22) + 3) << 1) | (cmd & 1); + return ((((cmd >> 25) % 13) + 0) << 1) | (cmd & 1); } } @@ -177,134 +177,134 @@ static inline int _ccv_nnc_cmd_backend_ph(const uint32_t backend) } } -void _register_command_CCV_NNC_TRANSPOSE_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_TRANSPOSE_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ARGMIN_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ARGMIN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_LAYER_NORM_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_LAYER_NORM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_UPSAMPLE_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_UPSAMPLE_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_RMSPROP_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_RMSPROP_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_RANDOM_UNIFORM_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_RANDOM_UNIFORM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_EWEXP_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_EWEXP_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_NMS_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_NMS_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_CONVOLUTION_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_CONVOLUTION_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_MAX_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_MAX_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_GROUP_NORM_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_GROUP_NORM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ARGMAX_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ARGMAX_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_COMM_ALLREDUCE_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_COMM_ALLREDUCE_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_SUM_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_SUM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_DATATYPE_CONVERSION_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_BATCH_NORM_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_BATCH_NORM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_LAMB_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_LAMB_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_MAX_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_MAX_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ADD_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ADD_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SGD_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SGD_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_LSTM_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_LSTM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_GEMM_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_GEMM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_DROPOUT_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_DROPOUT_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_AVERAGE_POOL_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_AVERAGE_POOL_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ADAMW_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ADAMW_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_NORM2_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_NORM2_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_COMM_ALLREDUCE_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_COMM_ALLREDUCE_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_EWEXP_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_EWEXP_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_FORMAT_TRANSFORM_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_FORMAT_TRANSFORM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SMOOTH_L1_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SMOOTH_L1_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_RMSPROP_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_RMSPROP_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_MIN_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_MIN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ROI_ALIGN_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ROI_ALIGN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ADAM_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ADAM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_EWSQRT_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_EWSQRT_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SWISH_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SWISH_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_MASKED_FILL_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_MASKED_FILL_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_NMS_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_NMS_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_GELU_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_GELU_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_CLAMP_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_CLAMP_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_MSE_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_MSE_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_SOFTMAX_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_SOFTMAX_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_BINARY_CROSSENTROPY_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_BINARY_CROSSENTROPY_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_EWPROD_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_EWPROD_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_LEAKY_RELU_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_LEAKY_RELU_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_TANH_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_TANH_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_RANDOM_UNIFORM_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_RANDOM_UNIFORM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_MAX_POOL_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_MAX_POOL_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_ISNAN_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_ISNAN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_COMM_REDUCE_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_COMM_REDUCE_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_SIGMOID_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_SIGMOID_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ADD_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_ADD_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_EWDIV_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_EWDIV_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_TANH_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_TANH_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_DATA_TRANSFER_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_DATA_TRANSFER_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_CLAMP_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_CLAMP_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SET_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SET_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_MAX_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_MAX_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SWISH_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SWISH_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_FORMAT_TRANSFORM_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_FORMAT_TRANSFORM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_EWSQRT_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_EWSQRT_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_TRANSPOSE_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_TRANSPOSE_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_COMPRESSION_LSSC_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_COMPRESSION_LSSC_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_EWSUM_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_EWSUM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ROI_ALIGN_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ROI_ALIGN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_MIN_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_MIN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_NORM2_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_NORM2_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_DROPOUT_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_DROPOUT_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_MASKED_FILL_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_MASKED_FILL_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SMOOTH_L1_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SMOOTH_L1_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_BINARY_CROSSENTROPY_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_BINARY_CROSSENTROPY_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_UPSAMPLE_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_UPSAMPLE_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ADAM_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ADAM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_RELU_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_RELU_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_RANDOM_NORMAL_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_RANDOM_NORMAL_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_MAX_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_MAX_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_ISNAN_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_ISNAN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_EWLOG_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_EWLOG_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_AVERAGE_POOL_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_AVERAGE_POOL_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_MIN_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_MIN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_GELU_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_GELU_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_COMM_BROADCAST_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_COMM_BROADCAST_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_SUM_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_SUM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SET_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SET_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_MUL_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_MUL_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_GROUP_NORM_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_GROUP_NORM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ARGMAX_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ARGMAX_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ARGMIN_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ARGMIN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_MEAN_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_REDUCE_MEAN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_GEMM_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_GEMM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_RANDOM_NORMAL_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_RANDOM_NORMAL_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ADAMW_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_ADAMW_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_LEAKY_RELU_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_LEAKY_RELU_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_SCALAR_MUL_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_SCALAR_MUL_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_BATCH_NORM_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_BATCH_NORM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_DATATYPE_CONVERSION_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_MAX_POOL_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_MAX_POOL_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_EWSUM_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_EWSUM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_LAYER_NORM_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_LAYER_NORM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_HISTOGRAM_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_HISTOGRAM_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_MIN_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_MIN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_EWDIV_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_EWDIV_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_DATA_TRANSFER_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_DATA_TRANSFER_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_COMM_REDUCE_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_COMM_REDUCE_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_MSE_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_MSE_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_MEAN_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_REDUCE_MEAN_BACKWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SGD_FORWARD(ccv_nnc_cmd_registry_t* const registry); -void _register_command_CCV_NNC_SGD_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_INDEX_SELECT_FORWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_INDEX_SELECT_BACKWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_EWLOG_FORWARD(ccv_nnc_cmd_registry_t* const registry); +void _register_command_CCV_NNC_EWLOG_BACKWARD(ccv_nnc_cmd_registry_t* const registry); void _register_command_CCV_NNC_ADAM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry); void _register_command_CCV_NNC_ADAM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry); @@ -565,8 +565,10 @@ void _register_command_CCV_NNC_EWLOG_FORWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc void _register_command_CCV_NNC_EWSQRT_FORWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc_cmd_backend_registry_t* const registry); void _register_command_CCV_NNC_CLAMP_FORWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc_cmd_backend_registry_t* const registry); void _register_command_CCV_NNC_GELU_FORWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc_cmd_backend_registry_t* const registry); +void _register_command_CCV_NNC_GELU_BACKWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc_cmd_backend_registry_t* const registry); void _register_command_CCV_NNC_INDEX_SELECT_FORWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc_cmd_backend_registry_t* const registry); void _register_command_CCV_NNC_LEAKY_RELU_FORWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc_cmd_backend_registry_t* const registry); +void _register_command_CCV_NNC_LEAKY_RELU_BACKWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc_cmd_backend_registry_t* const registry); void _register_command_CCV_NNC_MSE_FORWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc_cmd_backend_registry_t* const registry); void _register_command_CCV_NNC_LAYER_NORM_FORWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc_cmd_backend_registry_t* const registry); void _register_command_CCV_NNC_GROUP_NORM_FORWARD_backend_CCV_NNC_BACKEND_MPS(ccv_nnc_cmd_backend_registry_t* const registry); @@ -600,424 +602,426 @@ void _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD_backend_CCV_NNC_BACK static inline void _ccv_nnc_cmd_init(void) { - _register_command_CCV_NNC_TRANSPOSE_FORWARD(&init_map[0].registry); - _register_command_CCV_NNC_TRANSPOSE_BACKWARD(&init_map[1].registry); - _register_command_CCV_NNC_ARGMIN_FORWARD(&init_map[2].registry); - _register_command_CCV_NNC_ARGMIN_BACKWARD(&init_map[3].registry); - _register_command_CCV_NNC_LAYER_NORM_FORWARD(&init_map[4].registry); - _register_command_CCV_NNC_LAYER_NORM_BACKWARD(&init_map[5].registry); - _register_command_CCV_NNC_UPSAMPLE_FORWARD(&init_map[6].registry); - _register_command_CCV_NNC_UPSAMPLE_BACKWARD(&init_map[7].registry); - _register_command_CCV_NNC_CONVOLUTION_FORWARD(&init_map[8].registry); - _register_command_CCV_NNC_CONVOLUTION_BACKWARD(&init_map[9].registry); - _register_command_CCV_NNC_MAX_FORWARD(&init_map[10].registry); - _register_command_CCV_NNC_MAX_BACKWARD(&init_map[11].registry); - _register_command_CCV_NNC_GROUP_NORM_FORWARD(&init_map[12].registry); - _register_command_CCV_NNC_GROUP_NORM_BACKWARD(&init_map[13].registry); - _register_command_CCV_NNC_ARGMAX_FORWARD(&init_map[14].registry); - _register_command_CCV_NNC_ARGMAX_BACKWARD(&init_map[15].registry); - _register_command_CCV_NNC_LAMB_FORWARD(&init_map[16].registry); - _register_command_CCV_NNC_LAMB_BACKWARD(&init_map[17].registry); - _register_command_CCV_NNC_LSTM_FORWARD(&init_map[18].registry); - _register_command_CCV_NNC_LSTM_BACKWARD(&init_map[19].registry); - _register_command_CCV_NNC_GEMM_FORWARD(&init_map[20].registry); - _register_command_CCV_NNC_GEMM_BACKWARD(&init_map[21].registry); - _register_command_CCV_NNC_DROPOUT_FORWARD(&init_map[22].registry); - _register_command_CCV_NNC_DROPOUT_BACKWARD(&init_map[23].registry); - _register_command_CCV_NNC_AVERAGE_POOL_FORWARD(&init_map[24].registry); - _register_command_CCV_NNC_AVERAGE_POOL_BACKWARD(&init_map[25].registry); - _register_command_CCV_NNC_ADAMW_FORWARD(&init_map[26].registry); - _register_command_CCV_NNC_ADAMW_BACKWARD(&init_map[27].registry); - _register_command_CCV_NNC_REDUCE_NORM2_FORWARD(&init_map[28].registry); - _register_command_CCV_NNC_REDUCE_NORM2_BACKWARD(&init_map[29].registry); - _register_command_CCV_NNC_COMM_ALLREDUCE_FORWARD(&init_map[30].registry); - _register_command_CCV_NNC_COMM_ALLREDUCE_BACKWARD(&init_map[31].registry); - _register_command_CCV_NNC_EWEXP_FORWARD(&init_map[32].registry); - _register_command_CCV_NNC_EWEXP_BACKWARD(&init_map[33].registry); - _register_command_CCV_NNC_FORMAT_TRANSFORM_FORWARD(&init_map[34].registry); - _register_command_CCV_NNC_FORMAT_TRANSFORM_BACKWARD(&init_map[35].registry); - _register_command_CCV_NNC_SMOOTH_L1_FORWARD(&init_map[36].registry); - _register_command_CCV_NNC_SMOOTH_L1_BACKWARD(&init_map[37].registry); - _register_command_CCV_NNC_RMSPROP_FORWARD(&init_map[38].registry); - _register_command_CCV_NNC_RMSPROP_BACKWARD(&init_map[39].registry); - _register_command_CCV_NNC_MIN_FORWARD(&init_map[40].registry); - _register_command_CCV_NNC_MIN_BACKWARD(&init_map[41].registry); - _register_command_CCV_NNC_ROI_ALIGN_FORWARD(&init_map[42].registry); - _register_command_CCV_NNC_ROI_ALIGN_BACKWARD(&init_map[43].registry); - _register_command_CCV_NNC_ADAM_FORWARD(&init_map[44].registry); - _register_command_CCV_NNC_ADAM_BACKWARD(&init_map[45].registry); - _register_command_CCV_NNC_EWSQRT_FORWARD(&init_map[46].registry); - _register_command_CCV_NNC_EWSQRT_BACKWARD(&init_map[47].registry); - _register_command_CCV_NNC_SWISH_FORWARD(&init_map[48].registry); - _register_command_CCV_NNC_SWISH_BACKWARD(&init_map[49].registry); - _register_command_CCV_NNC_MASKED_FILL_FORWARD(&init_map[50].registry); - _register_command_CCV_NNC_MASKED_FILL_BACKWARD(&init_map[51].registry); - _register_command_CCV_NNC_NMS_FORWARD(&init_map[52].registry); - _register_command_CCV_NNC_NMS_BACKWARD(&init_map[53].registry); - _register_command_CCV_NNC_GELU_FORWARD(&init_map[54].registry); - _register_command_CCV_NNC_GELU_BACKWARD(&init_map[55].registry); - _register_command_CCV_NNC_CLAMP_FORWARD(&init_map[56].registry); - _register_command_CCV_NNC_CLAMP_BACKWARD(&init_map[57].registry); - _register_command_CCV_NNC_SOFTMAX_FORWARD(&init_map[58].registry); - _register_command_CCV_NNC_SOFTMAX_BACKWARD(&init_map[59].registry); - _register_command_CCV_NNC_BINARY_CROSSENTROPY_FORWARD(&init_map[60].registry); - _register_command_CCV_NNC_BINARY_CROSSENTROPY_BACKWARD(&init_map[61].registry); - _register_command_CCV_NNC_EWPROD_FORWARD(&init_map[62].registry); - _register_command_CCV_NNC_EWPROD_BACKWARD(&init_map[63].registry); - _register_command_CCV_NNC_LEAKY_RELU_FORWARD(&init_map[64].registry); - _register_command_CCV_NNC_LEAKY_RELU_BACKWARD(&init_map[65].registry); - _register_command_CCV_NNC_TANH_FORWARD(&init_map[66].registry); - _register_command_CCV_NNC_TANH_BACKWARD(&init_map[67].registry); - _register_command_CCV_NNC_RANDOM_UNIFORM_FORWARD(&init_map[68].registry); - _register_command_CCV_NNC_RANDOM_UNIFORM_BACKWARD(&init_map[69].registry); - _register_command_CCV_NNC_MAX_POOL_FORWARD(&init_map[70].registry); - _register_command_CCV_NNC_MAX_POOL_BACKWARD(&init_map[71].registry); - _register_command_CCV_NNC_SIGMOID_FORWARD(&init_map[72].registry); - _register_command_CCV_NNC_SIGMOID_BACKWARD(&init_map[73].registry); - _register_command_CCV_NNC_ADD_FORWARD(&init_map[74].registry); - _register_command_CCV_NNC_ADD_BACKWARD(&init_map[75].registry); - _register_command_CCV_NNC_COMPRESSION_LSSC_FORWARD(&init_map[76].registry); - _register_command_CCV_NNC_COMPRESSION_LSSC_BACKWARD(&init_map[77].registry); - _register_command_CCV_NNC_EWSUM_FORWARD(&init_map[78].registry); - _register_command_CCV_NNC_EWSUM_BACKWARD(&init_map[79].registry); - _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD(&init_map[80].registry); - _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(&init_map[81].registry); - _register_command_CCV_NNC_RELU_FORWARD(&init_map[82].registry); - _register_command_CCV_NNC_RELU_BACKWARD(&init_map[83].registry); - _register_command_CCV_NNC_RANDOM_NORMAL_FORWARD(&init_map[84].registry); - _register_command_CCV_NNC_RANDOM_NORMAL_BACKWARD(&init_map[85].registry); - _register_command_CCV_NNC_REDUCE_MAX_FORWARD(&init_map[86].registry); - _register_command_CCV_NNC_REDUCE_MAX_BACKWARD(&init_map[87].registry); - _register_command_CCV_NNC_REDUCE_ISNAN_FORWARD(&init_map[88].registry); - _register_command_CCV_NNC_REDUCE_ISNAN_BACKWARD(&init_map[89].registry); - _register_command_CCV_NNC_EWLOG_FORWARD(&init_map[90].registry); - _register_command_CCV_NNC_EWLOG_BACKWARD(&init_map[91].registry); - _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD(&init_map[92].registry); - _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD(&init_map[93].registry); - _register_command_CCV_NNC_COMM_BROADCAST_FORWARD(&init_map[94].registry); - _register_command_CCV_NNC_COMM_BROADCAST_BACKWARD(&init_map[95].registry); - _register_command_CCV_NNC_REDUCE_SUM_FORWARD(&init_map[96].registry); - _register_command_CCV_NNC_REDUCE_SUM_BACKWARD(&init_map[97].registry); - _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD(&init_map[98].registry); - _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_BACKWARD(&init_map[99].registry); - _register_command_CCV_NNC_SET_FORWARD(&init_map[100].registry); - _register_command_CCV_NNC_SET_BACKWARD(&init_map[101].registry); - _register_command_CCV_NNC_MUL_FORWARD(&init_map[102].registry); - _register_command_CCV_NNC_MUL_BACKWARD(&init_map[103].registry); - _register_command_CCV_NNC_SCALAR_MUL_FORWARD(&init_map[104].registry); - _register_command_CCV_NNC_SCALAR_MUL_BACKWARD(&init_map[105].registry); - _register_command_CCV_NNC_BATCH_NORM_FORWARD(&init_map[106].registry); - _register_command_CCV_NNC_BATCH_NORM_BACKWARD(&init_map[107].registry); - _register_command_CCV_NNC_DATATYPE_CONVERSION_FORWARD(&init_map[108].registry); - _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD(&init_map[109].registry); - _register_command_CCV_NNC_HISTOGRAM_FORWARD(&init_map[110].registry); - _register_command_CCV_NNC_HISTOGRAM_BACKWARD(&init_map[111].registry); - _register_command_CCV_NNC_REDUCE_MIN_FORWARD(&init_map[112].registry); - _register_command_CCV_NNC_REDUCE_MIN_BACKWARD(&init_map[113].registry); - _register_command_CCV_NNC_EWDIV_FORWARD(&init_map[114].registry); - _register_command_CCV_NNC_EWDIV_BACKWARD(&init_map[115].registry); - _register_command_CCV_NNC_DATA_TRANSFER_FORWARD(&init_map[116].registry); - _register_command_CCV_NNC_DATA_TRANSFER_BACKWARD(&init_map[117].registry); - _register_command_CCV_NNC_COMM_REDUCE_FORWARD(&init_map[118].registry); - _register_command_CCV_NNC_COMM_REDUCE_BACKWARD(&init_map[119].registry); - _register_command_CCV_NNC_MSE_FORWARD(&init_map[120].registry); - _register_command_CCV_NNC_MSE_BACKWARD(&init_map[121].registry); - _register_command_CCV_NNC_REDUCE_MEAN_FORWARD(&init_map[122].registry); - _register_command_CCV_NNC_REDUCE_MEAN_BACKWARD(&init_map[123].registry); - _register_command_CCV_NNC_SGD_FORWARD(&init_map[124].registry); - _register_command_CCV_NNC_SGD_BACKWARD(&init_map[125].registry); - _register_command_CCV_NNC_INDEX_SELECT_FORWARD(&init_map[126].registry); - _register_command_CCV_NNC_INDEX_SELECT_BACKWARD(&init_map[127].registry); + _register_command_CCV_NNC_RMSPROP_FORWARD(&init_map[0].registry); + _register_command_CCV_NNC_RMSPROP_BACKWARD(&init_map[1].registry); + _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD(&init_map[2].registry); + _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD(&init_map[3].registry); + _register_command_CCV_NNC_RANDOM_UNIFORM_FORWARD(&init_map[4].registry); + _register_command_CCV_NNC_RANDOM_UNIFORM_BACKWARD(&init_map[5].registry); + _register_command_CCV_NNC_EWEXP_FORWARD(&init_map[6].registry); + _register_command_CCV_NNC_EWEXP_BACKWARD(&init_map[7].registry); + _register_command_CCV_NNC_NMS_FORWARD(&init_map[8].registry); + _register_command_CCV_NNC_NMS_BACKWARD(&init_map[9].registry); + _register_command_CCV_NNC_CONVOLUTION_FORWARD(&init_map[10].registry); + _register_command_CCV_NNC_CONVOLUTION_BACKWARD(&init_map[11].registry); + _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD(&init_map[12].registry); + _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_BACKWARD(&init_map[13].registry); + _register_command_CCV_NNC_COMM_ALLREDUCE_FORWARD(&init_map[14].registry); + _register_command_CCV_NNC_COMM_ALLREDUCE_BACKWARD(&init_map[15].registry); + _register_command_CCV_NNC_REDUCE_SUM_FORWARD(&init_map[16].registry); + _register_command_CCV_NNC_REDUCE_SUM_BACKWARD(&init_map[17].registry); + _register_command_CCV_NNC_DATATYPE_CONVERSION_FORWARD(&init_map[18].registry); + _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD(&init_map[19].registry); + _register_command_CCV_NNC_BATCH_NORM_FORWARD(&init_map[20].registry); + _register_command_CCV_NNC_BATCH_NORM_BACKWARD(&init_map[21].registry); + _register_command_CCV_NNC_LAMB_FORWARD(&init_map[22].registry); + _register_command_CCV_NNC_LAMB_BACKWARD(&init_map[23].registry); + _register_command_CCV_NNC_REDUCE_MAX_FORWARD(&init_map[24].registry); + _register_command_CCV_NNC_REDUCE_MAX_BACKWARD(&init_map[25].registry); + _register_command_CCV_NNC_ADD_FORWARD(&init_map[26].registry); + _register_command_CCV_NNC_ADD_BACKWARD(&init_map[27].registry); + _register_command_CCV_NNC_SGD_FORWARD(&init_map[28].registry); + _register_command_CCV_NNC_SGD_BACKWARD(&init_map[29].registry); + _register_command_CCV_NNC_LSTM_FORWARD(&init_map[30].registry); + _register_command_CCV_NNC_LSTM_BACKWARD(&init_map[31].registry); + _register_command_CCV_NNC_MSE_FORWARD(&init_map[32].registry); + _register_command_CCV_NNC_MSE_BACKWARD(&init_map[33].registry); + _register_command_CCV_NNC_SOFTMAX_FORWARD(&init_map[34].registry); + _register_command_CCV_NNC_SOFTMAX_BACKWARD(&init_map[35].registry); + _register_command_CCV_NNC_EWPROD_FORWARD(&init_map[36].registry); + _register_command_CCV_NNC_EWPROD_BACKWARD(&init_map[37].registry); + _register_command_CCV_NNC_REDUCE_ISNAN_FORWARD(&init_map[38].registry); + _register_command_CCV_NNC_REDUCE_ISNAN_BACKWARD(&init_map[39].registry); + _register_command_CCV_NNC_COMM_REDUCE_FORWARD(&init_map[40].registry); + _register_command_CCV_NNC_COMM_REDUCE_BACKWARD(&init_map[41].registry); + _register_command_CCV_NNC_SIGMOID_FORWARD(&init_map[42].registry); + _register_command_CCV_NNC_SIGMOID_BACKWARD(&init_map[43].registry); + _register_command_CCV_NNC_EWDIV_FORWARD(&init_map[44].registry); + _register_command_CCV_NNC_EWDIV_BACKWARD(&init_map[45].registry); + _register_command_CCV_NNC_TANH_FORWARD(&init_map[46].registry); + _register_command_CCV_NNC_TANH_BACKWARD(&init_map[47].registry); + _register_command_CCV_NNC_DATA_TRANSFER_FORWARD(&init_map[48].registry); + _register_command_CCV_NNC_DATA_TRANSFER_BACKWARD(&init_map[49].registry); + _register_command_CCV_NNC_CLAMP_FORWARD(&init_map[50].registry); + _register_command_CCV_NNC_CLAMP_BACKWARD(&init_map[51].registry); + _register_command_CCV_NNC_SET_FORWARD(&init_map[52].registry); + _register_command_CCV_NNC_SET_BACKWARD(&init_map[53].registry); + _register_command_CCV_NNC_MAX_FORWARD(&init_map[54].registry); + _register_command_CCV_NNC_MAX_BACKWARD(&init_map[55].registry); + _register_command_CCV_NNC_SWISH_FORWARD(&init_map[56].registry); + _register_command_CCV_NNC_SWISH_BACKWARD(&init_map[57].registry); + _register_command_CCV_NNC_FORMAT_TRANSFORM_FORWARD(&init_map[58].registry); + _register_command_CCV_NNC_FORMAT_TRANSFORM_BACKWARD(&init_map[59].registry); + _register_command_CCV_NNC_EWSQRT_FORWARD(&init_map[60].registry); + _register_command_CCV_NNC_EWSQRT_BACKWARD(&init_map[61].registry); + _register_command_CCV_NNC_TRANSPOSE_FORWARD(&init_map[62].registry); + _register_command_CCV_NNC_TRANSPOSE_BACKWARD(&init_map[63].registry); + _register_command_CCV_NNC_COMPRESSION_LSSC_FORWARD(&init_map[64].registry); + _register_command_CCV_NNC_COMPRESSION_LSSC_BACKWARD(&init_map[65].registry); + _register_command_CCV_NNC_ROI_ALIGN_FORWARD(&init_map[66].registry); + _register_command_CCV_NNC_ROI_ALIGN_BACKWARD(&init_map[67].registry); + _register_command_CCV_NNC_MIN_FORWARD(&init_map[68].registry); + _register_command_CCV_NNC_MIN_BACKWARD(&init_map[69].registry); + _register_command_CCV_NNC_REDUCE_NORM2_FORWARD(&init_map[70].registry); + _register_command_CCV_NNC_REDUCE_NORM2_BACKWARD(&init_map[71].registry); + _register_command_CCV_NNC_DROPOUT_FORWARD(&init_map[72].registry); + _register_command_CCV_NNC_DROPOUT_BACKWARD(&init_map[73].registry); + _register_command_CCV_NNC_MASKED_FILL_FORWARD(&init_map[74].registry); + _register_command_CCV_NNC_MASKED_FILL_BACKWARD(&init_map[75].registry); + _register_command_CCV_NNC_SMOOTH_L1_FORWARD(&init_map[76].registry); + _register_command_CCV_NNC_SMOOTH_L1_BACKWARD(&init_map[77].registry); + _register_command_CCV_NNC_BINARY_CROSSENTROPY_FORWARD(&init_map[78].registry); + _register_command_CCV_NNC_BINARY_CROSSENTROPY_BACKWARD(&init_map[79].registry); + _register_command_CCV_NNC_UPSAMPLE_FORWARD(&init_map[80].registry); + _register_command_CCV_NNC_UPSAMPLE_BACKWARD(&init_map[81].registry); + _register_command_CCV_NNC_ADAM_FORWARD(&init_map[82].registry); + _register_command_CCV_NNC_ADAM_BACKWARD(&init_map[83].registry); + _register_command_CCV_NNC_RELU_FORWARD(&init_map[84].registry); + _register_command_CCV_NNC_RELU_BACKWARD(&init_map[85].registry); + _register_command_CCV_NNC_AVERAGE_POOL_FORWARD(&init_map[86].registry); + _register_command_CCV_NNC_AVERAGE_POOL_BACKWARD(&init_map[87].registry); + _register_command_CCV_NNC_REDUCE_MIN_FORWARD(&init_map[88].registry); + _register_command_CCV_NNC_REDUCE_MIN_BACKWARD(&init_map[89].registry); + _register_command_CCV_NNC_GELU_FORWARD(&init_map[90].registry); + _register_command_CCV_NNC_GELU_BACKWARD(&init_map[91].registry); + _register_command_CCV_NNC_COMM_BROADCAST_FORWARD(&init_map[92].registry); + _register_command_CCV_NNC_COMM_BROADCAST_BACKWARD(&init_map[93].registry); + _register_command_CCV_NNC_MUL_FORWARD(&init_map[94].registry); + _register_command_CCV_NNC_MUL_BACKWARD(&init_map[95].registry); + _register_command_CCV_NNC_GROUP_NORM_FORWARD(&init_map[96].registry); + _register_command_CCV_NNC_GROUP_NORM_BACKWARD(&init_map[97].registry); + _register_command_CCV_NNC_ARGMAX_FORWARD(&init_map[98].registry); + _register_command_CCV_NNC_ARGMAX_BACKWARD(&init_map[99].registry); + _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD(&init_map[100].registry); + _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(&init_map[101].registry); + _register_command_CCV_NNC_ARGMIN_FORWARD(&init_map[102].registry); + _register_command_CCV_NNC_ARGMIN_BACKWARD(&init_map[103].registry); + _register_command_CCV_NNC_REDUCE_MEAN_FORWARD(&init_map[104].registry); + _register_command_CCV_NNC_REDUCE_MEAN_BACKWARD(&init_map[105].registry); + _register_command_CCV_NNC_GEMM_FORWARD(&init_map[106].registry); + _register_command_CCV_NNC_GEMM_BACKWARD(&init_map[107].registry); + _register_command_CCV_NNC_RANDOM_NORMAL_FORWARD(&init_map[108].registry); + _register_command_CCV_NNC_RANDOM_NORMAL_BACKWARD(&init_map[109].registry); + _register_command_CCV_NNC_ADAMW_FORWARD(&init_map[110].registry); + _register_command_CCV_NNC_ADAMW_BACKWARD(&init_map[111].registry); + _register_command_CCV_NNC_LEAKY_RELU_FORWARD(&init_map[112].registry); + _register_command_CCV_NNC_LEAKY_RELU_BACKWARD(&init_map[113].registry); + _register_command_CCV_NNC_SCALAR_MUL_FORWARD(&init_map[114].registry); + _register_command_CCV_NNC_SCALAR_MUL_BACKWARD(&init_map[115].registry); + _register_command_CCV_NNC_MAX_POOL_FORWARD(&init_map[116].registry); + _register_command_CCV_NNC_MAX_POOL_BACKWARD(&init_map[117].registry); + _register_command_CCV_NNC_EWSUM_FORWARD(&init_map[118].registry); + _register_command_CCV_NNC_EWSUM_BACKWARD(&init_map[119].registry); + _register_command_CCV_NNC_LAYER_NORM_FORWARD(&init_map[120].registry); + _register_command_CCV_NNC_LAYER_NORM_BACKWARD(&init_map[121].registry); + _register_command_CCV_NNC_HISTOGRAM_FORWARD(&init_map[122].registry); + _register_command_CCV_NNC_HISTOGRAM_BACKWARD(&init_map[123].registry); + _register_command_CCV_NNC_INDEX_SELECT_FORWARD(&init_map[124].registry); + _register_command_CCV_NNC_INDEX_SELECT_BACKWARD(&init_map[125].registry); + _register_command_CCV_NNC_EWLOG_FORWARD(&init_map[126].registry); + _register_command_CCV_NNC_EWLOG_BACKWARD(&init_map[127].registry); - _register_command_CCV_NNC_ADAM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[44].backends[2])); - _register_command_CCV_NNC_ADAM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[45].backends[2])); - _register_command_CCV_NNC_ADAMW_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[26].backends[2])); - _register_command_CCV_NNC_ADAMW_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[27].backends[2])); - _register_command_CCV_NNC_GEMM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[20].backends[2])); - _register_command_CCV_NNC_GEMM_FORWARD_backend_CCV_NNC_BACKEND_CPU_OPT(&(init_map[20].backends[4])); - _register_command_CCV_NNC_GEMM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[21].backends[2])); - _register_command_CCV_NNC_GEMM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_OPT(&(init_map[21].backends[4])); - _register_command_CCV_NNC_ADD_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[74].backends[2])); - _register_command_CCV_NNC_ADD_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[75].backends[2])); - _register_command_CCV_NNC_MUL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[102].backends[2])); - _register_command_CCV_NNC_MUL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[103].backends[2])); - _register_command_CCV_NNC_SCALAR_MUL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[104].backends[2])); - _register_command_CCV_NNC_SCALAR_MUL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[105].backends[2])); - _register_command_CCV_NNC_MIN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[40].backends[2])); - _register_command_CCV_NNC_MIN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[41].backends[2])); - _register_command_CCV_NNC_MAX_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[10].backends[2])); - _register_command_CCV_NNC_MAX_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[11].backends[2])); - _register_command_CCV_NNC_COMPRESSION_LSSC_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[76].backends[2])); - _register_command_CCV_NNC_COMPRESSION_LSSC_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[77].backends[2])); - _register_command_CCV_NNC_CONVOLUTION_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[8].backends[2])); - _register_command_CCV_NNC_CONVOLUTION_FORWARD_backend_CCV_NNC_BACKEND_CPU_OPT(&(init_map[8].backends[4])); - _register_command_CCV_NNC_CONVOLUTION_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[9].backends[2])); - _register_command_CCV_NNC_DROPOUT_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[22].backends[2])); - _register_command_CCV_NNC_DROPOUT_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[23].backends[2])); - _register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[78].backends[2])); - _register_command_CCV_NNC_EWSUM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[79].backends[2])); - _register_command_CCV_NNC_EWPROD_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[62].backends[2])); - _register_command_CCV_NNC_EWPROD_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[63].backends[2])); - _register_command_CCV_NNC_EWDIV_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[114].backends[2])); - _register_command_CCV_NNC_EWDIV_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[115].backends[2])); - _register_command_CCV_NNC_EWEXP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[32].backends[2])); - _register_command_CCV_NNC_EWEXP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[33].backends[2])); - _register_command_CCV_NNC_EWLOG_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[90].backends[2])); - _register_command_CCV_NNC_EWLOG_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[91].backends[2])); - _register_command_CCV_NNC_EWSQRT_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[46].backends[2])); - _register_command_CCV_NNC_EWSQRT_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[47].backends[2])); - _register_command_CCV_NNC_CLAMP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[56].backends[2])); - _register_command_CCV_NNC_CLAMP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[57].backends[2])); - _register_command_CCV_NNC_GELU_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[54].backends[2])); - _register_command_CCV_NNC_GELU_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[55].backends[2])); - _register_command_CCV_NNC_HISTOGRAM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[110].backends[2])); - _register_command_CCV_NNC_HISTOGRAM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[111].backends[2])); - _register_command_CCV_NNC_INDEX_SELECT_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[126].backends[2])); - _register_command_CCV_NNC_INDEX_SELECT_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[127].backends[2])); - _register_command_CCV_NNC_REDUCE_ISNAN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[88].backends[2])); - _register_command_CCV_NNC_REDUCE_ISNAN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[89].backends[2])); - _register_command_CCV_NNC_LAMB_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[16].backends[2])); - _register_command_CCV_NNC_LAMB_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[17].backends[2])); - _register_command_CCV_NNC_LEAKY_RELU_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[64].backends[2])); - _register_command_CCV_NNC_LEAKY_RELU_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[65].backends[2])); - _register_command_CCV_NNC_BINARY_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[60].backends[2])); - _register_command_CCV_NNC_BINARY_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[61].backends[2])); - _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[92].backends[2])); - _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[93].backends[2])); - _register_command_CCV_NNC_MSE_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[120].backends[2])); - _register_command_CCV_NNC_MSE_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[121].backends[2])); - _register_command_CCV_NNC_SMOOTH_L1_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[36].backends[2])); - _register_command_CCV_NNC_SMOOTH_L1_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[37].backends[2])); - _register_command_CCV_NNC_NMS_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[52].backends[2])); - _register_command_CCV_NNC_NMS_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[53].backends[2])); - _register_command_CCV_NNC_BATCH_NORM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[106].backends[2])); - _register_command_CCV_NNC_BATCH_NORM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[107].backends[2])); - _register_command_CCV_NNC_LAYER_NORM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[4].backends[2])); - _register_command_CCV_NNC_LAYER_NORM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[5].backends[2])); - _register_command_CCV_NNC_GROUP_NORM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[12].backends[2])); - _register_command_CCV_NNC_GROUP_NORM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[13].backends[2])); - _register_command_CCV_NNC_MAX_POOL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[70].backends[2])); - _register_command_CCV_NNC_MAX_POOL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[71].backends[2])); - _register_command_CCV_NNC_AVERAGE_POOL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[24].backends[2])); - _register_command_CCV_NNC_AVERAGE_POOL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[25].backends[2])); - _register_command_CCV_NNC_RANDOM_UNIFORM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[68].backends[2])); - _register_command_CCV_NNC_RANDOM_UNIFORM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[69].backends[2])); - _register_command_CCV_NNC_RANDOM_NORMAL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[84].backends[2])); - _register_command_CCV_NNC_RANDOM_NORMAL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[85].backends[2])); - _register_command_CCV_NNC_REDUCE_SUM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[96].backends[2])); - _register_command_CCV_NNC_REDUCE_SUM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[97].backends[2])); - _register_command_CCV_NNC_REDUCE_MEAN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[122].backends[2])); - _register_command_CCV_NNC_REDUCE_MEAN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[123].backends[2])); - _register_command_CCV_NNC_REDUCE_MAX_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[86].backends[2])); - _register_command_CCV_NNC_REDUCE_MAX_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[87].backends[2])); - _register_command_CCV_NNC_REDUCE_MIN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[112].backends[2])); - _register_command_CCV_NNC_REDUCE_MIN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[113].backends[2])); - _register_command_CCV_NNC_REDUCE_NORM2_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[28].backends[2])); - _register_command_CCV_NNC_REDUCE_NORM2_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[29].backends[2])); - _register_command_CCV_NNC_ARGMAX_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[14].backends[2])); - _register_command_CCV_NNC_ARGMAX_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[15].backends[2])); - _register_command_CCV_NNC_ARGMIN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[2].backends[2])); - _register_command_CCV_NNC_ARGMIN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[3].backends[2])); - _register_command_CCV_NNC_RELU_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[82].backends[2])); - _register_command_CCV_NNC_RELU_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[83].backends[2])); - _register_command_CCV_NNC_RMSPROP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[38].backends[2])); - _register_command_CCV_NNC_RMSPROP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[39].backends[2])); - _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[42].backends[2])); - _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[43].backends[2])); - _register_command_CCV_NNC_SGD_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[124].backends[2])); - _register_command_CCV_NNC_SGD_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[125].backends[2])); - _register_command_CCV_NNC_SIGMOID_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[72].backends[2])); - _register_command_CCV_NNC_SIGMOID_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[73].backends[2])); - _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[80].backends[2])); - _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[81].backends[2])); - _register_command_CCV_NNC_SOFTMAX_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[58].backends[2])); - _register_command_CCV_NNC_SOFTMAX_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[59].backends[2])); - _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[98].backends[2])); - _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[99].backends[2])); - _register_command_CCV_NNC_SWISH_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[48].backends[2])); - _register_command_CCV_NNC_SWISH_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[49].backends[2])); - _register_command_CCV_NNC_TANH_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[66].backends[2])); - _register_command_CCV_NNC_TANH_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[67].backends[2])); - _register_command_CCV_NNC_UPSAMPLE_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[6].backends[2])); - _register_command_CCV_NNC_UPSAMPLE_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[7].backends[2])); - _register_command_CCV_NNC_SET_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[100].backends[2])); - _register_command_CCV_NNC_SET_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[101].backends[2])); - _register_command_CCV_NNC_MASKED_FILL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[50].backends[2])); - _register_command_CCV_NNC_MASKED_FILL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[51].backends[2])); - _register_command_CCV_NNC_DATA_TRANSFER_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[116].backends[2])); - _register_command_CCV_NNC_DATA_TRANSFER_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[117].backends[2])); - _register_command_CCV_NNC_FORMAT_TRANSFORM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[34].backends[2])); - _register_command_CCV_NNC_FORMAT_TRANSFORM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[35].backends[2])); - _register_command_CCV_NNC_TRANSPOSE_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[0].backends[2])); - _register_command_CCV_NNC_TRANSPOSE_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[1].backends[2])); - _register_command_CCV_NNC_DATATYPE_CONVERSION_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[108].backends[2])); - _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[109].backends[2])); + _register_command_CCV_NNC_ADAM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[82].backends[2])); + _register_command_CCV_NNC_ADAM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[83].backends[2])); + _register_command_CCV_NNC_ADAMW_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[110].backends[2])); + _register_command_CCV_NNC_ADAMW_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[111].backends[2])); + _register_command_CCV_NNC_GEMM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[106].backends[2])); + _register_command_CCV_NNC_GEMM_FORWARD_backend_CCV_NNC_BACKEND_CPU_OPT(&(init_map[106].backends[4])); + _register_command_CCV_NNC_GEMM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[107].backends[2])); + _register_command_CCV_NNC_GEMM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_OPT(&(init_map[107].backends[4])); + _register_command_CCV_NNC_ADD_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[26].backends[2])); + _register_command_CCV_NNC_ADD_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[27].backends[2])); + _register_command_CCV_NNC_MUL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[94].backends[2])); + _register_command_CCV_NNC_MUL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[95].backends[2])); + _register_command_CCV_NNC_SCALAR_MUL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[114].backends[2])); + _register_command_CCV_NNC_SCALAR_MUL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[115].backends[2])); + _register_command_CCV_NNC_MIN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[68].backends[2])); + _register_command_CCV_NNC_MIN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[69].backends[2])); + _register_command_CCV_NNC_MAX_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[54].backends[2])); + _register_command_CCV_NNC_MAX_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[55].backends[2])); + _register_command_CCV_NNC_COMPRESSION_LSSC_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[64].backends[2])); + _register_command_CCV_NNC_COMPRESSION_LSSC_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[65].backends[2])); + _register_command_CCV_NNC_CONVOLUTION_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[10].backends[2])); + _register_command_CCV_NNC_CONVOLUTION_FORWARD_backend_CCV_NNC_BACKEND_CPU_OPT(&(init_map[10].backends[4])); + _register_command_CCV_NNC_CONVOLUTION_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[11].backends[2])); + _register_command_CCV_NNC_DROPOUT_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[72].backends[2])); + _register_command_CCV_NNC_DROPOUT_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[73].backends[2])); + _register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[118].backends[2])); + _register_command_CCV_NNC_EWSUM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[119].backends[2])); + _register_command_CCV_NNC_EWPROD_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[36].backends[2])); + _register_command_CCV_NNC_EWPROD_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[37].backends[2])); + _register_command_CCV_NNC_EWDIV_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[44].backends[2])); + _register_command_CCV_NNC_EWDIV_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[45].backends[2])); + _register_command_CCV_NNC_EWEXP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[6].backends[2])); + _register_command_CCV_NNC_EWEXP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[7].backends[2])); + _register_command_CCV_NNC_EWLOG_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[126].backends[2])); + _register_command_CCV_NNC_EWLOG_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[127].backends[2])); + _register_command_CCV_NNC_EWSQRT_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[60].backends[2])); + _register_command_CCV_NNC_EWSQRT_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[61].backends[2])); + _register_command_CCV_NNC_CLAMP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[50].backends[2])); + _register_command_CCV_NNC_CLAMP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[51].backends[2])); + _register_command_CCV_NNC_GELU_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[90].backends[2])); + _register_command_CCV_NNC_GELU_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[91].backends[2])); + _register_command_CCV_NNC_HISTOGRAM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[122].backends[2])); + _register_command_CCV_NNC_HISTOGRAM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[123].backends[2])); + _register_command_CCV_NNC_INDEX_SELECT_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[124].backends[2])); + _register_command_CCV_NNC_INDEX_SELECT_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[125].backends[2])); + _register_command_CCV_NNC_REDUCE_ISNAN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[38].backends[2])); + _register_command_CCV_NNC_REDUCE_ISNAN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[39].backends[2])); + _register_command_CCV_NNC_LAMB_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[22].backends[2])); + _register_command_CCV_NNC_LAMB_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[23].backends[2])); + _register_command_CCV_NNC_LEAKY_RELU_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[112].backends[2])); + _register_command_CCV_NNC_LEAKY_RELU_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[113].backends[2])); + _register_command_CCV_NNC_BINARY_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[78].backends[2])); + _register_command_CCV_NNC_BINARY_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[79].backends[2])); + _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[2].backends[2])); + _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[3].backends[2])); + _register_command_CCV_NNC_MSE_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[32].backends[2])); + _register_command_CCV_NNC_MSE_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[33].backends[2])); + _register_command_CCV_NNC_SMOOTH_L1_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[76].backends[2])); + _register_command_CCV_NNC_SMOOTH_L1_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[77].backends[2])); + _register_command_CCV_NNC_NMS_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[8].backends[2])); + _register_command_CCV_NNC_NMS_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[9].backends[2])); + _register_command_CCV_NNC_BATCH_NORM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[20].backends[2])); + _register_command_CCV_NNC_BATCH_NORM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[21].backends[2])); + _register_command_CCV_NNC_LAYER_NORM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[120].backends[2])); + _register_command_CCV_NNC_LAYER_NORM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[121].backends[2])); + _register_command_CCV_NNC_GROUP_NORM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[96].backends[2])); + _register_command_CCV_NNC_GROUP_NORM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[97].backends[2])); + _register_command_CCV_NNC_MAX_POOL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[116].backends[2])); + _register_command_CCV_NNC_MAX_POOL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[117].backends[2])); + _register_command_CCV_NNC_AVERAGE_POOL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[86].backends[2])); + _register_command_CCV_NNC_AVERAGE_POOL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[87].backends[2])); + _register_command_CCV_NNC_RANDOM_UNIFORM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[4].backends[2])); + _register_command_CCV_NNC_RANDOM_UNIFORM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[5].backends[2])); + _register_command_CCV_NNC_RANDOM_NORMAL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[108].backends[2])); + _register_command_CCV_NNC_RANDOM_NORMAL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[109].backends[2])); + _register_command_CCV_NNC_REDUCE_SUM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[16].backends[2])); + _register_command_CCV_NNC_REDUCE_SUM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[17].backends[2])); + _register_command_CCV_NNC_REDUCE_MEAN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[104].backends[2])); + _register_command_CCV_NNC_REDUCE_MEAN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[105].backends[2])); + _register_command_CCV_NNC_REDUCE_MAX_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[24].backends[2])); + _register_command_CCV_NNC_REDUCE_MAX_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[25].backends[2])); + _register_command_CCV_NNC_REDUCE_MIN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[88].backends[2])); + _register_command_CCV_NNC_REDUCE_MIN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[89].backends[2])); + _register_command_CCV_NNC_REDUCE_NORM2_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[70].backends[2])); + _register_command_CCV_NNC_REDUCE_NORM2_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[71].backends[2])); + _register_command_CCV_NNC_ARGMAX_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[98].backends[2])); + _register_command_CCV_NNC_ARGMAX_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[99].backends[2])); + _register_command_CCV_NNC_ARGMIN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[102].backends[2])); + _register_command_CCV_NNC_ARGMIN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[103].backends[2])); + _register_command_CCV_NNC_RELU_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[84].backends[2])); + _register_command_CCV_NNC_RELU_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[85].backends[2])); + _register_command_CCV_NNC_RMSPROP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[0].backends[2])); + _register_command_CCV_NNC_RMSPROP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[1].backends[2])); + _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[66].backends[2])); + _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[67].backends[2])); + _register_command_CCV_NNC_SGD_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[28].backends[2])); + _register_command_CCV_NNC_SGD_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[29].backends[2])); + _register_command_CCV_NNC_SIGMOID_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[42].backends[2])); + _register_command_CCV_NNC_SIGMOID_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[43].backends[2])); + _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[100].backends[2])); + _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[101].backends[2])); + _register_command_CCV_NNC_SOFTMAX_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[34].backends[2])); + _register_command_CCV_NNC_SOFTMAX_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[35].backends[2])); + _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[12].backends[2])); + _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[13].backends[2])); + _register_command_CCV_NNC_SWISH_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[56].backends[2])); + _register_command_CCV_NNC_SWISH_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[57].backends[2])); + _register_command_CCV_NNC_TANH_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[46].backends[2])); + _register_command_CCV_NNC_TANH_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[47].backends[2])); + _register_command_CCV_NNC_UPSAMPLE_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[80].backends[2])); + _register_command_CCV_NNC_UPSAMPLE_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[81].backends[2])); + _register_command_CCV_NNC_SET_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[52].backends[2])); + _register_command_CCV_NNC_SET_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[53].backends[2])); + _register_command_CCV_NNC_MASKED_FILL_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[74].backends[2])); + _register_command_CCV_NNC_MASKED_FILL_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[75].backends[2])); + _register_command_CCV_NNC_DATA_TRANSFER_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[48].backends[2])); + _register_command_CCV_NNC_DATA_TRANSFER_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[49].backends[2])); + _register_command_CCV_NNC_FORMAT_TRANSFORM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[58].backends[2])); + _register_command_CCV_NNC_FORMAT_TRANSFORM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[59].backends[2])); + _register_command_CCV_NNC_TRANSPOSE_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[62].backends[2])); + _register_command_CCV_NNC_TRANSPOSE_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[63].backends[2])); + _register_command_CCV_NNC_DATATYPE_CONVERSION_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[18].backends[2])); + _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(&(init_map[19].backends[2])); #ifdef HAVE_CUDA - _register_command_CCV_NNC_ADAM_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[44].backends[5])); - _register_command_CCV_NNC_ADAM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[45].backends[5])); - _register_command_CCV_NNC_ADAMW_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[26].backends[5])); - _register_command_CCV_NNC_ADAMW_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[27].backends[5])); - _register_command_CCV_NNC_GEMM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUBLAS(&(init_map[20].backends[0])); - _register_command_CCV_NNC_GEMM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUBLAS(&(init_map[21].backends[0])); - _register_command_CCV_NNC_ADD_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[74].backends[3])); - _register_command_CCV_NNC_ADD_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[75].backends[3])); - _register_command_CCV_NNC_MUL_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[102].backends[3])); - _register_command_CCV_NNC_MUL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[103].backends[3])); - _register_command_CCV_NNC_SCALAR_MUL_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[104].backends[3])); - _register_command_CCV_NNC_SCALAR_MUL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[105].backends[3])); - _register_command_CCV_NNC_COMM_ALLREDUCE_FORWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[30].backends[1])); - _register_command_CCV_NNC_COMM_ALLREDUCE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[31].backends[1])); - _register_command_CCV_NNC_COMM_BROADCAST_FORWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[94].backends[1])); - _register_command_CCV_NNC_COMM_BROADCAST_BACKWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[95].backends[1])); - _register_command_CCV_NNC_COMM_REDUCE_FORWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[118].backends[1])); - _register_command_CCV_NNC_COMM_REDUCE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[119].backends[1])); - _register_command_CCV_NNC_MIN_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[40].backends[5])); - _register_command_CCV_NNC_MIN_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[41].backends[5])); - _register_command_CCV_NNC_MAX_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[10].backends[5])); - _register_command_CCV_NNC_MAX_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[11].backends[5])); - _register_command_CCV_NNC_COMPRESSION_LSSC_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[76].backends[5])); - _register_command_CCV_NNC_COMPRESSION_LSSC_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[77].backends[5])); - _register_command_CCV_NNC_CONVOLUTION_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[8].backends[3])); - _register_command_CCV_NNC_CONVOLUTION_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[9].backends[3])); - _register_command_CCV_NNC_DROPOUT_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[22].backends[3])); - _register_command_CCV_NNC_DROPOUT_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[23].backends[3])); - _register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[78].backends[3])); - _register_command_CCV_NNC_EWSUM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[79].backends[3])); - _register_command_CCV_NNC_EWDIV_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[114].backends[5])); - _register_command_CCV_NNC_EWDIV_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[115].backends[5])); - _register_command_CCV_NNC_EWEXP_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[32].backends[5])); - _register_command_CCV_NNC_EWEXP_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[33].backends[5])); - _register_command_CCV_NNC_EWLOG_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[90].backends[5])); - _register_command_CCV_NNC_EWLOG_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[91].backends[5])); - _register_command_CCV_NNC_EWSQRT_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[46].backends[5])); - _register_command_CCV_NNC_EWSQRT_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[47].backends[5])); - _register_command_CCV_NNC_CLAMP_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[56].backends[5])); - _register_command_CCV_NNC_CLAMP_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[57].backends[5])); - _register_command_CCV_NNC_GELU_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[54].backends[5])); - _register_command_CCV_NNC_GELU_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[55].backends[5])); - _register_command_CCV_NNC_INDEX_SELECT_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[126].backends[5])); - _register_command_CCV_NNC_INDEX_SELECT_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[127].backends[5])); - _register_command_CCV_NNC_REDUCE_ISNAN_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[88].backends[3])); - _register_command_CCV_NNC_REDUCE_ISNAN_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[89].backends[3])); - _register_command_CCV_NNC_LAMB_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[16].backends[5])); - _register_command_CCV_NNC_LAMB_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[17].backends[5])); - _register_command_CCV_NNC_LEAKY_RELU_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[64].backends[5])); - _register_command_CCV_NNC_LEAKY_RELU_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[65].backends[5])); - _register_command_CCV_NNC_BINARY_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[60].backends[5])); - _register_command_CCV_NNC_BINARY_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[61].backends[5])); - _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[92].backends[5])); - _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[93].backends[5])); - _register_command_CCV_NNC_MSE_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[120].backends[5])); - _register_command_CCV_NNC_MSE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[121].backends[5])); - _register_command_CCV_NNC_SMOOTH_L1_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[36].backends[5])); - _register_command_CCV_NNC_SMOOTH_L1_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[37].backends[5])); - _register_command_CCV_NNC_NMS_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[52].backends[5])); - _register_command_CCV_NNC_NMS_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[53].backends[5])); - _register_command_CCV_NNC_BATCH_NORM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[106].backends[3])); - _register_command_CCV_NNC_BATCH_NORM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[107].backends[3])); - _register_command_CCV_NNC_LAYER_NORM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[4].backends[3])); - _register_command_CCV_NNC_LAYER_NORM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[5].backends[3])); - _register_command_CCV_NNC_GROUP_NORM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[12].backends[3])); - _register_command_CCV_NNC_GROUP_NORM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[13].backends[3])); - _register_command_CCV_NNC_MAX_POOL_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[70].backends[3])); - _register_command_CCV_NNC_MAX_POOL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[71].backends[3])); - _register_command_CCV_NNC_AVERAGE_POOL_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[24].backends[3])); - _register_command_CCV_NNC_AVERAGE_POOL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[25].backends[3])); - _register_command_CCV_NNC_RANDOM_UNIFORM_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[68].backends[5])); - _register_command_CCV_NNC_RANDOM_UNIFORM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[69].backends[5])); - _register_command_CCV_NNC_RANDOM_NORMAL_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[84].backends[5])); - _register_command_CCV_NNC_RANDOM_NORMAL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[85].backends[5])); - _register_command_CCV_NNC_REDUCE_SUM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[96].backends[3])); - _register_command_CCV_NNC_REDUCE_SUM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[97].backends[3])); - _register_command_CCV_NNC_REDUCE_MEAN_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[122].backends[3])); - _register_command_CCV_NNC_REDUCE_MEAN_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[123].backends[3])); - _register_command_CCV_NNC_REDUCE_NORM2_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[28].backends[3])); - _register_command_CCV_NNC_REDUCE_NORM2_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[29].backends[3])); - _register_command_CCV_NNC_ARGMAX_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[14].backends[5])); - _register_command_CCV_NNC_ARGMAX_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[15].backends[5])); - _register_command_CCV_NNC_ARGMIN_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[2].backends[5])); - _register_command_CCV_NNC_ARGMIN_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[3].backends[5])); - _register_command_CCV_NNC_RELU_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[82].backends[3])); - _register_command_CCV_NNC_RELU_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[83].backends[3])); - _register_command_CCV_NNC_RMSPROP_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[38].backends[5])); - _register_command_CCV_NNC_RMSPROP_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[39].backends[5])); - _register_command_CCV_NNC_LSTM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[18].backends[3])); - _register_command_CCV_NNC_LSTM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[19].backends[3])); - _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[42].backends[5])); - _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[43].backends[5])); - _register_command_CCV_NNC_SGD_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[124].backends[5])); - _register_command_CCV_NNC_SGD_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[125].backends[5])); - _register_command_CCV_NNC_SIGMOID_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[72].backends[3])); - _register_command_CCV_NNC_SIGMOID_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[73].backends[3])); - _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[80].backends[5])); - _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[81].backends[5])); - _register_command_CCV_NNC_SOFTMAX_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[58].backends[3])); - _register_command_CCV_NNC_SOFTMAX_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[59].backends[3])); - _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[98].backends[3])); - _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[99].backends[3])); - _register_command_CCV_NNC_SWISH_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[48].backends[5])); - _register_command_CCV_NNC_SWISH_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[49].backends[5])); - _register_command_CCV_NNC_TANH_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[66].backends[3])); - _register_command_CCV_NNC_TANH_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[67].backends[3])); - _register_command_CCV_NNC_UPSAMPLE_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[6].backends[5])); - _register_command_CCV_NNC_UPSAMPLE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[7].backends[5])); - _register_command_CCV_NNC_SET_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[100].backends[3])); - _register_command_CCV_NNC_SET_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[101].backends[3])); - _register_command_CCV_NNC_MASKED_FILL_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[50].backends[5])); - _register_command_CCV_NNC_MASKED_FILL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[51].backends[5])); - _register_command_CCV_NNC_DATA_TRANSFER_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[116].backends[5])); - _register_command_CCV_NNC_DATA_TRANSFER_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[117].backends[5])); - _register_command_CCV_NNC_FORMAT_TRANSFORM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[34].backends[3])); - _register_command_CCV_NNC_FORMAT_TRANSFORM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[35].backends[3])); - _register_command_CCV_NNC_TRANSPOSE_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[0].backends[3])); - _register_command_CCV_NNC_TRANSPOSE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[1].backends[3])); - _register_command_CCV_NNC_DATATYPE_CONVERSION_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[108].backends[5])); - _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[109].backends[5])); + _register_command_CCV_NNC_ADAM_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[82].backends[5])); + _register_command_CCV_NNC_ADAM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[83].backends[5])); + _register_command_CCV_NNC_ADAMW_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[110].backends[5])); + _register_command_CCV_NNC_ADAMW_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[111].backends[5])); + _register_command_CCV_NNC_GEMM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUBLAS(&(init_map[106].backends[0])); + _register_command_CCV_NNC_GEMM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUBLAS(&(init_map[107].backends[0])); + _register_command_CCV_NNC_ADD_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[26].backends[3])); + _register_command_CCV_NNC_ADD_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[27].backends[3])); + _register_command_CCV_NNC_MUL_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[94].backends[3])); + _register_command_CCV_NNC_MUL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[95].backends[3])); + _register_command_CCV_NNC_SCALAR_MUL_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[114].backends[3])); + _register_command_CCV_NNC_SCALAR_MUL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[115].backends[3])); + _register_command_CCV_NNC_COMM_ALLREDUCE_FORWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[14].backends[1])); + _register_command_CCV_NNC_COMM_ALLREDUCE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[15].backends[1])); + _register_command_CCV_NNC_COMM_BROADCAST_FORWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[92].backends[1])); + _register_command_CCV_NNC_COMM_BROADCAST_BACKWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[93].backends[1])); + _register_command_CCV_NNC_COMM_REDUCE_FORWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[40].backends[1])); + _register_command_CCV_NNC_COMM_REDUCE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_NCCL(&(init_map[41].backends[1])); + _register_command_CCV_NNC_MIN_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[68].backends[5])); + _register_command_CCV_NNC_MIN_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[69].backends[5])); + _register_command_CCV_NNC_MAX_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[54].backends[5])); + _register_command_CCV_NNC_MAX_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[55].backends[5])); + _register_command_CCV_NNC_COMPRESSION_LSSC_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[64].backends[5])); + _register_command_CCV_NNC_COMPRESSION_LSSC_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[65].backends[5])); + _register_command_CCV_NNC_CONVOLUTION_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[10].backends[3])); + _register_command_CCV_NNC_CONVOLUTION_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[11].backends[3])); + _register_command_CCV_NNC_DROPOUT_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[72].backends[3])); + _register_command_CCV_NNC_DROPOUT_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[73].backends[3])); + _register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[118].backends[3])); + _register_command_CCV_NNC_EWSUM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[119].backends[3])); + _register_command_CCV_NNC_EWDIV_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[44].backends[5])); + _register_command_CCV_NNC_EWDIV_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[45].backends[5])); + _register_command_CCV_NNC_EWEXP_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[6].backends[5])); + _register_command_CCV_NNC_EWEXP_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[7].backends[5])); + _register_command_CCV_NNC_EWLOG_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[126].backends[5])); + _register_command_CCV_NNC_EWLOG_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[127].backends[5])); + _register_command_CCV_NNC_EWSQRT_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[60].backends[5])); + _register_command_CCV_NNC_EWSQRT_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[61].backends[5])); + _register_command_CCV_NNC_CLAMP_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[50].backends[5])); + _register_command_CCV_NNC_CLAMP_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[51].backends[5])); + _register_command_CCV_NNC_GELU_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[90].backends[5])); + _register_command_CCV_NNC_GELU_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[91].backends[5])); + _register_command_CCV_NNC_INDEX_SELECT_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[124].backends[5])); + _register_command_CCV_NNC_INDEX_SELECT_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[125].backends[5])); + _register_command_CCV_NNC_REDUCE_ISNAN_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[38].backends[3])); + _register_command_CCV_NNC_REDUCE_ISNAN_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[39].backends[3])); + _register_command_CCV_NNC_LAMB_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[22].backends[5])); + _register_command_CCV_NNC_LAMB_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[23].backends[5])); + _register_command_CCV_NNC_LEAKY_RELU_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[112].backends[5])); + _register_command_CCV_NNC_LEAKY_RELU_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[113].backends[5])); + _register_command_CCV_NNC_BINARY_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[78].backends[5])); + _register_command_CCV_NNC_BINARY_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[79].backends[5])); + _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[2].backends[5])); + _register_command_CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[3].backends[5])); + _register_command_CCV_NNC_MSE_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[32].backends[5])); + _register_command_CCV_NNC_MSE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[33].backends[5])); + _register_command_CCV_NNC_SMOOTH_L1_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[76].backends[5])); + _register_command_CCV_NNC_SMOOTH_L1_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[77].backends[5])); + _register_command_CCV_NNC_NMS_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[8].backends[5])); + _register_command_CCV_NNC_NMS_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[9].backends[5])); + _register_command_CCV_NNC_BATCH_NORM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[20].backends[3])); + _register_command_CCV_NNC_BATCH_NORM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[21].backends[3])); + _register_command_CCV_NNC_LAYER_NORM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[120].backends[3])); + _register_command_CCV_NNC_LAYER_NORM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[121].backends[3])); + _register_command_CCV_NNC_GROUP_NORM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[96].backends[3])); + _register_command_CCV_NNC_GROUP_NORM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[97].backends[3])); + _register_command_CCV_NNC_MAX_POOL_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[116].backends[3])); + _register_command_CCV_NNC_MAX_POOL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[117].backends[3])); + _register_command_CCV_NNC_AVERAGE_POOL_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[86].backends[3])); + _register_command_CCV_NNC_AVERAGE_POOL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[87].backends[3])); + _register_command_CCV_NNC_RANDOM_UNIFORM_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[4].backends[5])); + _register_command_CCV_NNC_RANDOM_UNIFORM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[5].backends[5])); + _register_command_CCV_NNC_RANDOM_NORMAL_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[108].backends[5])); + _register_command_CCV_NNC_RANDOM_NORMAL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[109].backends[5])); + _register_command_CCV_NNC_REDUCE_SUM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[16].backends[3])); + _register_command_CCV_NNC_REDUCE_SUM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[17].backends[3])); + _register_command_CCV_NNC_REDUCE_MEAN_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[104].backends[3])); + _register_command_CCV_NNC_REDUCE_MEAN_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[105].backends[3])); + _register_command_CCV_NNC_REDUCE_NORM2_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[70].backends[3])); + _register_command_CCV_NNC_REDUCE_NORM2_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[71].backends[3])); + _register_command_CCV_NNC_ARGMAX_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[98].backends[5])); + _register_command_CCV_NNC_ARGMAX_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[99].backends[5])); + _register_command_CCV_NNC_ARGMIN_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[102].backends[5])); + _register_command_CCV_NNC_ARGMIN_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[103].backends[5])); + _register_command_CCV_NNC_RELU_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[84].backends[3])); + _register_command_CCV_NNC_RELU_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[85].backends[3])); + _register_command_CCV_NNC_RMSPROP_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[0].backends[5])); + _register_command_CCV_NNC_RMSPROP_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[1].backends[5])); + _register_command_CCV_NNC_LSTM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[30].backends[3])); + _register_command_CCV_NNC_LSTM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[31].backends[3])); + _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[66].backends[5])); + _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[67].backends[5])); + _register_command_CCV_NNC_SGD_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[28].backends[5])); + _register_command_CCV_NNC_SGD_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[29].backends[5])); + _register_command_CCV_NNC_SIGMOID_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[42].backends[3])); + _register_command_CCV_NNC_SIGMOID_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[43].backends[3])); + _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[100].backends[5])); + _register_command_CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[101].backends[5])); + _register_command_CCV_NNC_SOFTMAX_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[34].backends[3])); + _register_command_CCV_NNC_SOFTMAX_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[35].backends[3])); + _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[12].backends[3])); + _register_command_CCV_NNC_SOFTMAX_CROSSENTROPY_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[13].backends[3])); + _register_command_CCV_NNC_SWISH_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[56].backends[5])); + _register_command_CCV_NNC_SWISH_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[57].backends[5])); + _register_command_CCV_NNC_TANH_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[46].backends[3])); + _register_command_CCV_NNC_TANH_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[47].backends[3])); + _register_command_CCV_NNC_UPSAMPLE_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[80].backends[5])); + _register_command_CCV_NNC_UPSAMPLE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[81].backends[5])); + _register_command_CCV_NNC_SET_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[52].backends[3])); + _register_command_CCV_NNC_SET_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[53].backends[3])); + _register_command_CCV_NNC_MASKED_FILL_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[74].backends[5])); + _register_command_CCV_NNC_MASKED_FILL_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[75].backends[5])); + _register_command_CCV_NNC_DATA_TRANSFER_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[48].backends[5])); + _register_command_CCV_NNC_DATA_TRANSFER_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[49].backends[5])); + _register_command_CCV_NNC_FORMAT_TRANSFORM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[58].backends[3])); + _register_command_CCV_NNC_FORMAT_TRANSFORM_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[59].backends[3])); + _register_command_CCV_NNC_TRANSPOSE_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[62].backends[3])); + _register_command_CCV_NNC_TRANSPOSE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[63].backends[3])); + _register_command_CCV_NNC_DATATYPE_CONVERSION_FORWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[18].backends[5])); + _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[19].backends[5])); #endif #ifdef HAVE_MPS - _register_command_CCV_NNC_GEMM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[20].backends[6])); - _register_command_CCV_NNC_GEMM_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[21].backends[6])); - _register_command_CCV_NNC_ADD_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[74].backends[6])); - _register_command_CCV_NNC_MUL_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[102].backends[6])); - _register_command_CCV_NNC_SCALAR_MUL_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[104].backends[6])); - _register_command_CCV_NNC_CONVOLUTION_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[8].backends[6])); - _register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[78].backends[6])); - _register_command_CCV_NNC_EWDIV_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[114].backends[6])); - _register_command_CCV_NNC_EWEXP_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[32].backends[6])); - _register_command_CCV_NNC_EWLOG_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[90].backends[6])); - _register_command_CCV_NNC_EWSQRT_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[46].backends[6])); - _register_command_CCV_NNC_CLAMP_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[56].backends[6])); - _register_command_CCV_NNC_GELU_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[54].backends[6])); - _register_command_CCV_NNC_INDEX_SELECT_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[126].backends[6])); - _register_command_CCV_NNC_LEAKY_RELU_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[64].backends[6])); - _register_command_CCV_NNC_MSE_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[120].backends[6])); - _register_command_CCV_NNC_LAYER_NORM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[4].backends[6])); - _register_command_CCV_NNC_GROUP_NORM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[12].backends[6])); - _register_command_CCV_NNC_MAX_POOL_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[70].backends[6])); - _register_command_CCV_NNC_AVERAGE_POOL_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[24].backends[6])); - _register_command_CCV_NNC_RANDOM_UNIFORM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[68].backends[6])); - _register_command_CCV_NNC_RANDOM_NORMAL_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[84].backends[6])); - _register_command_CCV_NNC_REDUCE_SUM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[96].backends[6])); - _register_command_CCV_NNC_REDUCE_MEAN_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[122].backends[6])); - _register_command_CCV_NNC_REDUCE_MAX_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[86].backends[6])); - _register_command_CCV_NNC_REDUCE_MIN_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[112].backends[6])); - _register_command_CCV_NNC_ARGMAX_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[14].backends[6])); - _register_command_CCV_NNC_ARGMIN_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[2].backends[6])); - _register_command_CCV_NNC_RELU_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[82].backends[6])); - _register_command_CCV_NNC_SIGMOID_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[72].backends[6])); - _register_command_CCV_NNC_SIGMOID_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[73].backends[6])); - _register_command_CCV_NNC_SOFTMAX_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[58].backends[6])); - _register_command_CCV_NNC_SWISH_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[48].backends[6])); - _register_command_CCV_NNC_UPSAMPLE_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[6].backends[6])); - _register_command_CCV_NNC_SET_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[100].backends[6])); - _register_command_CCV_NNC_SET_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[101].backends[6])); - _register_command_CCV_NNC_DATA_TRANSFER_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[116].backends[6])); - _register_command_CCV_NNC_DATA_TRANSFER_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[117].backends[6])); - _register_command_CCV_NNC_FORMAT_TRANSFORM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[34].backends[6])); - _register_command_CCV_NNC_FORMAT_TRANSFORM_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[35].backends[6])); - _register_command_CCV_NNC_TRANSPOSE_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[0].backends[6])); - _register_command_CCV_NNC_TRANSPOSE_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[1].backends[6])); - _register_command_CCV_NNC_DATATYPE_CONVERSION_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[108].backends[6])); - _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[109].backends[6])); + _register_command_CCV_NNC_GEMM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[106].backends[6])); + _register_command_CCV_NNC_GEMM_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[107].backends[6])); + _register_command_CCV_NNC_ADD_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[26].backends[6])); + _register_command_CCV_NNC_MUL_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[94].backends[6])); + _register_command_CCV_NNC_SCALAR_MUL_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[114].backends[6])); + _register_command_CCV_NNC_CONVOLUTION_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[10].backends[6])); + _register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[118].backends[6])); + _register_command_CCV_NNC_EWDIV_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[44].backends[6])); + _register_command_CCV_NNC_EWEXP_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[6].backends[6])); + _register_command_CCV_NNC_EWLOG_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[126].backends[6])); + _register_command_CCV_NNC_EWSQRT_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[60].backends[6])); + _register_command_CCV_NNC_CLAMP_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[50].backends[6])); + _register_command_CCV_NNC_GELU_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[90].backends[6])); + _register_command_CCV_NNC_GELU_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[91].backends[6])); + _register_command_CCV_NNC_INDEX_SELECT_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[124].backends[6])); + _register_command_CCV_NNC_LEAKY_RELU_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[112].backends[6])); + _register_command_CCV_NNC_LEAKY_RELU_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[113].backends[6])); + _register_command_CCV_NNC_MSE_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[32].backends[6])); + _register_command_CCV_NNC_LAYER_NORM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[120].backends[6])); + _register_command_CCV_NNC_GROUP_NORM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[96].backends[6])); + _register_command_CCV_NNC_MAX_POOL_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[116].backends[6])); + _register_command_CCV_NNC_AVERAGE_POOL_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[86].backends[6])); + _register_command_CCV_NNC_RANDOM_UNIFORM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[4].backends[6])); + _register_command_CCV_NNC_RANDOM_NORMAL_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[108].backends[6])); + _register_command_CCV_NNC_REDUCE_SUM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[16].backends[6])); + _register_command_CCV_NNC_REDUCE_MEAN_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[104].backends[6])); + _register_command_CCV_NNC_REDUCE_MAX_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[24].backends[6])); + _register_command_CCV_NNC_REDUCE_MIN_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[88].backends[6])); + _register_command_CCV_NNC_ARGMAX_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[98].backends[6])); + _register_command_CCV_NNC_ARGMIN_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[102].backends[6])); + _register_command_CCV_NNC_RELU_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[84].backends[6])); + _register_command_CCV_NNC_SIGMOID_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[42].backends[6])); + _register_command_CCV_NNC_SIGMOID_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[43].backends[6])); + _register_command_CCV_NNC_SOFTMAX_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[34].backends[6])); + _register_command_CCV_NNC_SWISH_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[56].backends[6])); + _register_command_CCV_NNC_UPSAMPLE_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[80].backends[6])); + _register_command_CCV_NNC_SET_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[52].backends[6])); + _register_command_CCV_NNC_SET_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[53].backends[6])); + _register_command_CCV_NNC_DATA_TRANSFER_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[48].backends[6])); + _register_command_CCV_NNC_DATA_TRANSFER_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[49].backends[6])); + _register_command_CCV_NNC_FORMAT_TRANSFORM_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[58].backends[6])); + _register_command_CCV_NNC_FORMAT_TRANSFORM_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[59].backends[6])); + _register_command_CCV_NNC_TRANSPOSE_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[62].backends[6])); + _register_command_CCV_NNC_TRANSPOSE_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[63].backends[6])); + _register_command_CCV_NNC_DATATYPE_CONVERSION_FORWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[18].backends[6])); + _register_command_CCV_NNC_DATATYPE_CONVERSION_BACKWARD_backend_CCV_NNC_BACKEND_MPS(&(init_map[19].backends[6])); #endif } diff --git a/lib/nnc/cmd/gelu/mps/ccv_nnc_gelu_mps.m b/lib/nnc/cmd/gelu/mps/ccv_nnc_gelu_mps.m index 73a22376b..b6b980c0d 100644 --- a/lib/nnc/cmd/gelu/mps/ccv_nnc_gelu_mps.m +++ b/lib/nnc/cmd/gelu/mps/ccv_nnc_gelu_mps.m @@ -53,6 +53,101 @@ static int _ccv_nnc_gelu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint return CCV_NNC_EXEC_SUCCESS; } +MPSGraphTensor* normcdf(MPSGraph* mpsGraph, MPSGraphTensor* inputTensor) { + // (1.0f + erf(x*SQRT1_2)) * 0.5f * x; + MPSDataType dataType = [inputTensor dataType]; + const float SQRT1_2 = 0.70710678118654752440; + MPSGraphTensor* sqrt1_2 = [mpsGraph constantWithScalar:SQRT1_2 shape:@[ @1 ] dataType:dataType]; + MPSGraphTensor* onef = [mpsGraph constantWithScalar:1.0f shape:@[ @1 ] dataType:dataType]; + MPSGraphTensor* halff = [mpsGraph constantWithScalar:0.5f shape:@[ @1 ] dataType:dataType]; + + MPSGraphTensor* erf_tensor = [mpsGraph multiplicationWithPrimaryTensor:inputTensor secondaryTensor:sqrt1_2 name:nil]; + erf_tensor = [mpsGraph erfWithTensor:erf_tensor name:nil]; + erf_tensor = [mpsGraph additionWithPrimaryTensor:erf_tensor secondaryTensor:onef name:nil]; + erf_tensor = [mpsGraph multiplicationWithPrimaryTensor:erf_tensor secondaryTensor:halff name:nil]; + + return erf_tensor; +} + +static int _ccv_nnc_gelu_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) +{ + assert(input_size >= 2); + assert(output_size == 1); + const ccv_nnc_tensor_view_t* const g = (const ccv_nnc_tensor_view_t*)inputs[0]; + const ccv_nnc_tensor_view_t* const b = (const ccv_nnc_tensor_view_t*)inputs[1]; + ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0]; + @autoreleasepool { + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); + ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); + int indices[1]; + MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { + MPSGraphTensor* mps_input_g; + MPSGraphTensor* mps_g = ccv_nnc_mps_graph_tensor_input(graph, g, g->info.dim, g->stride, &mps_input_g); + [inputTensors addObject:mps_input_g]; + MPSGraphShapedType* mps_g_shape = ccv_nnc_mps_graph_tensor_input_shape(g, g->info.dim, g->stride); + [inputShapedTypes addObject:mps_g_shape]; + + MPSGraphTensor* mps_input_b; + MPSGraphTensor* mps_b = ccv_nnc_mps_graph_tensor_input(graph, b, b->info.dim, b->stride, &mps_input_b); + [inputTensors addObject:mps_input_b]; + MPSGraphShapedType* mps_b_shape = ccv_nnc_mps_graph_tensor_input_shape(b, b->info.dim, b->stride); + [inputShapedTypes addObject:mps_b_shape]; + MPSGraphTensor* inputTensor = mps_b; + MPSGraphTensor* gradTensor = mps_g; + MPSDataType dataType = mps_b.dataType; + MPSGraphTensor* mps_h; + if (cmd.info.gelu.tanh) { + float kBeta = 0.797884560802865355 * (0.5f); + float kKappa = 0.044715f; + MPSGraphTensor* betaf = [graph constantWithScalar:kBeta shape:@[ @1 ] dataType:dataType]; + MPSGraphTensor* kappaf = [graph constantWithScalar:kKappa shape:@[ @1 ] dataType:dataType]; + MPSGraphTensor* halff = [graph constantWithScalar:0.5f shape:@[ @1 ] dataType:dataType]; + MPSGraphTensor* onef = [graph constantWithScalar:1.0f shape:@[ @1 ] dataType:dataType]; + MPSGraphTensor* threef = [graph constantWithScalar:3.0f shape:@[ @1 ] dataType:dataType]; + MPSGraphTensor* x_sq = [graph multiplicationWithPrimaryTensor:inputTensor secondaryTensor:inputTensor name:nil]; + MPSGraphTensor* x_cube = [graph multiplicationWithPrimaryTensor:x_sq secondaryTensor:inputTensor name:nil]; + MPSGraphTensor* inner = [graph multiplicationWithPrimaryTensor:kappaf secondaryTensor:x_cube name:nil]; + inner = [graph additionWithPrimaryTensor:inner secondaryTensor:inputTensor name:nil]; + inner = [graph multiplicationWithPrimaryTensor:betaf secondaryTensor:inner name:nil]; + MPSGraphTensor* tanhInner = [graph tanhWithTensor:inner name:nil]; + MPSGraphTensor* left = [graph multiplicationWithPrimaryTensor:halff secondaryTensor:inputTensor name:nil]; + MPSGraphTensor* right = [graph additionWithPrimaryTensor:onef secondaryTensor:tanhInner name:nil]; + MPSGraphTensor* left_derivative = [graph multiplicationWithPrimaryTensor:halff secondaryTensor:right name:nil]; + MPSGraphTensor* tanh_derivative = [graph multiplicationWithPrimaryTensor:tanhInner secondaryTensor:tanhInner name:nil]; + tanh_derivative = [graph subtractionWithPrimaryTensor:onef secondaryTensor:tanh_derivative name:nil]; + MPSGraphTensor* inner_derivative = [graph multiplicationWithPrimaryTensor:threef secondaryTensor:kappaf name:nil]; + inner_derivative = [graph multiplicationWithPrimaryTensor:inner_derivative secondaryTensor:x_sq name:nil]; + inner_derivative = [graph additionWithPrimaryTensor:inner_derivative secondaryTensor:onef name:nil]; + inner_derivative = [graph multiplicationWithPrimaryTensor:betaf secondaryTensor:inner_derivative name:nil]; + MPSGraphTensor* right_derivative = [graph multiplicationWithPrimaryTensor:left secondaryTensor:tanh_derivative name:nil]; + right_derivative = [graph multiplicationWithPrimaryTensor:right_derivative secondaryTensor:inner_derivative name:nil]; + mps_h = [graph additionWithPrimaryTensor:left_derivative secondaryTensor:right_derivative name:nil]; + mps_h = [graph multiplicationWithPrimaryTensor:gradTensor secondaryTensor:mps_h name:nil]; + } else { + float kBeta = 0.797884560802865355; + MPSGraphTensor* halff = [graph constantWithScalar:-0.5f dataType:dataType]; + MPSGraphTensor* betaf = [graph constantWithScalar:kBeta dataType:dataType]; + MPSGraphTensor* cdf = normcdf(graph, inputTensor); + MPSGraphTensor* pdfMul = [graph squareWithTensor:inputTensor name:nil]; + pdfMul = [graph multiplicationWithPrimaryTensor:pdfMul secondaryTensor:halff name:nil]; + pdfMul = [graph exponentWithTensor:pdfMul name:nil]; + MPSGraphTensor* pdf = [graph multiplicationWithPrimaryTensor:pdfMul secondaryTensor:betaf name:nil]; + pdf = [graph multiplicationWithPrimaryTensor:inputTensor secondaryTensor:pdf name:nil]; + pdf = [graph additionWithPrimaryTensor:pdf secondaryTensor:cdf name:nil]; + mps_h = [graph multiplicationWithPrimaryTensor:gradTensor secondaryTensor:pdf name:nil]; + } + + [resultTensors addObject:mps_h]; + }); + MPSGraphTensorData* data_g = ccv_nnc_mps_graph_tensor_data(g, g->info.dim, g->stride); + MPSGraphTensorData* data_b = ccv_nnc_mps_graph_tensor_data(b, b->info.dim, b->stride); + MPSGraphTensorData* data[] = {data_g, data_b}; + ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]]], &h, (int*[]){ h->info.dim }, (int*[]){ h->stride }, 1); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); + } + return CCV_NNC_EXEC_SUCCESS; +} + REGISTER_COMMAND_BACKEND(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS)(ccv_nnc_cmd_backend_registry_t* const registry) { registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN; @@ -61,3 +156,12 @@ static int _ccv_nnc_gelu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint registry->algorithms = 1; registry->exec = _ccv_nnc_gelu_forw; } + +REGISTER_COMMAND_BACKEND(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_MPS)(ccv_nnc_cmd_backend_registry_t* const registry) +{ + registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN; + registry->tensor_datatypes = CCV_32F | CCV_16F; + registry->tensor_memory = CCV_TENSOR_GPU_MEMORY; + registry->algorithms = 1; + registry->exec = _ccv_nnc_gelu_back; +} diff --git a/lib/nnc/cmd/leaky_relu/mps/ccv_nnc_leaky_relu_mps.m b/lib/nnc/cmd/leaky_relu/mps/ccv_nnc_leaky_relu_mps.m index 4bc621ef2..52db13b9e 100644 --- a/lib/nnc/cmd/leaky_relu/mps/ccv_nnc_leaky_relu_mps.m +++ b/lib/nnc/cmd/leaky_relu/mps/ccv_nnc_leaky_relu_mps.m @@ -32,6 +32,49 @@ static int _ccv_nnc_leaky_relu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ return CCV_NNC_EXEC_SUCCESS; } +static int _ccv_nnc_leaky_relu_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) +{ + assert(input_size == 3); + assert(output_size == 1); + const ccv_nnc_tensor_view_t* const g = (const ccv_nnc_tensor_view_t*)inputs[0]; + const ccv_nnc_tensor_view_t* const b = (const ccv_nnc_tensor_view_t*)inputs[2]; + ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0]; + const double alpha = (double)cmd.info.leaky_relu.negative_slope; + + @autoreleasepool { + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); + ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); + int indices[1]; + MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { + MPSGraphTensor* mps_input_g; + MPSGraphTensor* mps_g = ccv_nnc_mps_graph_tensor_input(graph, g, g->info.dim, g->stride, &mps_input_g); + [inputTensors addObject:mps_input_g]; + MPSGraphShapedType* mps_g_shape = ccv_nnc_mps_graph_tensor_input_shape(g, g->info.dim, g->stride); + [inputShapedTypes addObject:mps_g_shape]; + + MPSGraphTensor* mps_input_b; + MPSGraphTensor* mps_b = ccv_nnc_mps_graph_tensor_input(graph, b, b->info.dim, b->stride, &mps_input_b); + [inputTensors addObject:mps_input_b]; + MPSGraphShapedType* mps_b_shape = ccv_nnc_mps_graph_tensor_input_shape(b, b->info.dim, b->stride); + [inputShapedTypes addObject:mps_b_shape]; + + MPSGraphTensor* alpha_tensor = [graph constantWithScalar:alpha dataType:[mps_b dataType]]; + MPSGraphTensor* mps_h = [graph leakyReLUGradientWithIncomingGradient:mps_g + sourceTensor:mps_b + alphaTensor:alpha_tensor + name:nil]; + + [resultTensors addObject:mps_h]; + }); + MPSGraphTensorData* data_g = ccv_nnc_mps_graph_tensor_data(g, g->info.dim, g->stride); + MPSGraphTensorData* data_b = ccv_nnc_mps_graph_tensor_data(b, b->info.dim, b->stride); + MPSGraphTensorData* data[] = {data_g, data_b}; + ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]]], &h, (int*[]){ h->info.dim }, (int*[]){ h->stride }, 1); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); + } + return CCV_NNC_EXEC_SUCCESS; +} + REGISTER_COMMAND_BACKEND(CCV_NNC_LEAKY_RELU_FORWARD, CCV_NNC_BACKEND_MPS)(ccv_nnc_cmd_backend_registry_t* const registry) { registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN; @@ -40,3 +83,12 @@ static int _ccv_nnc_leaky_relu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ registry->algorithms = 1; registry->exec = _ccv_nnc_leaky_relu_forw; } + +REGISTER_COMMAND_BACKEND(CCV_NNC_LEAKY_RELU_BACKWARD, CCV_NNC_BACKEND_MPS)(ccv_nnc_cmd_backend_registry_t* const registry) +{ + registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN; + registry->tensor_datatypes = CCV_32F | CCV_16F; + registry->tensor_memory = CCV_TENSOR_GPU_MEMORY; + registry->algorithms = 1; + registry->exec = _ccv_nnc_leaky_relu_back; +} \ No newline at end of file diff --git a/test/int/nnc/gelu.tests.c b/test/int/nnc/gelu.tests.c index fee6ebc2b..6a6ccb710 100644 --- a/test/int/nnc/gelu.tests.c +++ b/test/int/nnc/gelu.tests.c @@ -153,6 +153,66 @@ TEST_CASE("gelu gradient in float") ccv_nnc_symbolic_graph_free(symbolic_graph); } +TEST_CASE("mps gelu gradient in float") +{ + GUARD_ELSE_RETURN((ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS) && + ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_MPS))); + ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new(); + ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 10, 100), "x"); + ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 10, 100), "y"); + ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(0), TENSOR_SYMBOL_LIST(x), TENSOR_SYMBOL_LIST(y), "gelu"); + ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); + ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph)); + ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); + SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH); + ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y); + ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x); + ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + dsfmt_t dsfmt; + dsfmt_init_gen_rand(&dsfmt, 0); + int i; + for (i = 0; i < 10 * 100; i++) + x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); + ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + for (i = 0; i < 10 * 100; i++) + dy_tensor->data.f32[i] = 0; + for (i = 0; i < 10; i++) + dy_tensor->data.f32[i * 100 + i] = 1; + ccv_nnc_tensor_t* const dyt = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor), TENSOR_LIST(dyt), 0); + ccv_nnc_graph_t* graph = 0; + ccv_nnc_tensor_arena_t* tensor_arena = 0; + ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0; + ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, TENSOR_BIND_MAP(KV(dy, dyt)), TENSOR_SYMBOL_LIST(y), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena); + GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH); + ccv_nnc_tensor_t* const xt = ccv_nnc_tensor_from_symbol(tensor_arena, x); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(xt), 0); + ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0); + ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_tensor_t* const dxt = ccv_nnc_tensor_from_symbol(tensor_arena, dx); + ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_tensor_t* const yt = ccv_nnc_tensor_from_symbol(tensor_arena, y); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dxt), TENSOR_LIST(dx_tensor), 0); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(yt), TENSOR_LIST(y_tensor), 0); + ccv_nnc_tensor_t* const ty_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_cmd_exec(CMD_GELU_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty_tensor), 0); + REQUIRE_TENSOR_EQ(ty_tensor, y_tensor, "forward pass should match"); + ccv_nnc_tensor_t* const tdx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_cmd_exec(CMD_GELU_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor, x_tensor, 0), TENSOR_LIST(tdx_tensor), 0); + REQUIRE_TENSOR_EQ(tdx_tensor, dx_tensor, "backward pass should match"); + ccv_nnc_tensor_free(x_tensor); + ccv_nnc_tensor_free(y_tensor); + ccv_nnc_tensor_free(dx_tensor); + ccv_nnc_tensor_free(dy_tensor); + ccv_nnc_tensor_free(ty_tensor); + ccv_nnc_tensor_free(tdx_tensor); + ccv_nnc_tensor_free(dyt); + ccv_nnc_graph_free(graph); + ccv_nnc_tensor_arena_free(tensor_arena); + ccv_nnc_graph_exec_arena_free(graph_exec_arena); + ccv_nnc_symbolic_graph_free(symbolic_graph); +} + TEST_CASE("gelu gradient in half precision") { GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_GPU_REF) && @@ -225,6 +285,78 @@ TEST_CASE("gelu gradient in half precision") ccv_nnc_symbolic_graph_free(symbolic_graph); } +TEST_CASE("mps gelu gradient in half precision") +{ + GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS) && + ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_MPS)); + ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new(); + ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 16F, 10, 100), "x"); + ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 16F, 10, 100), "y"); + ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(0), TENSOR_SYMBOL_LIST(x), TENSOR_SYMBOL_LIST(y), "gelu"); + ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); + ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph)); + ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); + SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH); + ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y); + ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x); + ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + dsfmt_t dsfmt; + dsfmt_init_gen_rand(&dsfmt, 0); + int i; + for (i = 0; i < 10 * 100; i++) + x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); + ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + for (i = 0; i < 10 * 100; i++) + dy_tensor->data.f32[i] = 0; + for (i = 0; i < 10; i++) + dy_tensor->data.f32[i * 100 + i] = 1; + ccv_nnc_tensor_t* const dy16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0); + ccv_nnc_tensor_t* const dyt = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10, 100), 0); + ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor), TENSOR_LIST(dy16_tensor), 0); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy16_tensor), TENSOR_LIST(dyt), 0); + ccv_nnc_graph_t* graph = 0; + ccv_nnc_tensor_arena_t* tensor_arena = 0; + ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0; + ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, TENSOR_BIND_MAP(KV(dy, dyt)), TENSOR_SYMBOL_LIST(y), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena); + GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH); + ccv_nnc_tensor_t* const xt = ccv_nnc_tensor_from_symbol(tensor_arena, x); + ccv_nnc_tensor_t* const x16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0); + ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(x16_tensor), 0); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x16_tensor), TENSOR_LIST(xt), 0); + ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0); + ccv_nnc_tensor_t* const dx16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0); + ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_tensor_t* const dxt = ccv_nnc_tensor_from_symbol(tensor_arena, dx); + ccv_nnc_tensor_t* const y16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0); + ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_tensor_t* const yt = ccv_nnc_tensor_from_symbol(tensor_arena, y); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dxt), TENSOR_LIST(dx16_tensor), 0); + ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dx16_tensor), TENSOR_LIST(dx_tensor), 0); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(yt), TENSOR_LIST(y16_tensor), 0); + ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(y16_tensor), TENSOR_LIST(y_tensor), 0); + ccv_nnc_tensor_t* const ty_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_cmd_exec(CMD_GELU_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty_tensor), 0); + REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, ty_tensor->data.f32, y_tensor->data.f32, 10 * 100, 1e-3, "forward pass should match"); + ccv_nnc_tensor_t* const tdx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_cmd_exec(CMD_GELU_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor, x_tensor, 0), TENSOR_LIST(tdx_tensor), 0); + REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tdx_tensor->data.f32, dx_tensor->data.f32, 10 * 100, 1e-3, "backward pass should match"); + ccv_nnc_tensor_free(x_tensor); + ccv_nnc_tensor_free(x16_tensor); + ccv_nnc_tensor_free(y_tensor); + ccv_nnc_tensor_free(y16_tensor); + ccv_nnc_tensor_free(dx_tensor); + ccv_nnc_tensor_free(dx16_tensor); + ccv_nnc_tensor_free(dy_tensor); + ccv_nnc_tensor_free(dy16_tensor); + ccv_nnc_tensor_free(ty_tensor); + ccv_nnc_tensor_free(tdx_tensor); + ccv_nnc_tensor_free(dyt); + ccv_nnc_graph_free(graph); + ccv_nnc_tensor_arena_free(tensor_arena); + ccv_nnc_graph_exec_arena_free(graph_exec_arena); + ccv_nnc_symbolic_graph_free(symbolic_graph); +} + TEST_CASE("fast gelu in float") { GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS)); diff --git a/test/int/nnc/mpsdnn.tests.c b/test/int/nnc/mpsdnn.tests.c index 2dde7e26b..533e106a3 100644 --- a/test/int/nnc/mpsdnn.tests.c +++ b/test/int/nnc/mpsdnn.tests.c @@ -1384,4 +1384,64 @@ TEST_CASE("mps mse sum loss forward") ccv_nnc_tensor_free(tc); } +TEST_CASE("mps leaky relu gradient in float") +{ + GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LEAKY_RELU_FORWARD, CCV_NNC_BACKEND_MPS) && + ccv_nnc_cmd_ok(CCV_NNC_LEAKY_RELU_BACKWARD, CCV_NNC_BACKEND_MPS)); + ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new(); + ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 10, 100), "x"); + ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 10, 100), "y"); + ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_LEAKY_RELU_FORWARD(0.2), TENSOR_SYMBOL_LIST(x), TENSOR_SYMBOL_LIST(y), "leaky relu"); + ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); + ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph)); + ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); + SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH); + ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y); + ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x); + ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + dsfmt_t dsfmt; + dsfmt_init_gen_rand(&dsfmt, 0); + int i; + for (i = 0; i < 10 * 100; i++) + x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); + ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + for (i = 0; i < 10 * 100; i++) + dy_tensor->data.f32[i] = 0; + for (i = 0; i < 10; i++) + dy_tensor->data.f32[i * 100 + i] = 1; + ccv_nnc_tensor_t* const dyt = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor), TENSOR_LIST(dyt), 0); + ccv_nnc_graph_t* graph = 0; + ccv_nnc_tensor_arena_t* tensor_arena = 0; + ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0; + ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, TENSOR_BIND_MAP(KV(dy, dyt)), TENSOR_SYMBOL_LIST(y), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena); + GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH); + ccv_nnc_tensor_t* const xt = ccv_nnc_tensor_from_symbol(tensor_arena, x); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(xt), 0); + ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0); + ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_tensor_t* const dxt = ccv_nnc_tensor_from_symbol(tensor_arena, dx); + ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_tensor_t* const yt = ccv_nnc_tensor_from_symbol(tensor_arena, y); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dxt), TENSOR_LIST(dx_tensor), 0); + ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(yt), TENSOR_LIST(y_tensor), 0); + ccv_nnc_tensor_t* const ty_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_cmd_exec(CMD_LEAKY_RELU_FORWARD(0.2), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty_tensor), 0); + REQUIRE_TENSOR_EQ(ty_tensor, y_tensor, "forward pass should match"); + ccv_nnc_tensor_t* const tdx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0); + ccv_nnc_cmd_exec(CMD_LEAKY_RELU_BACKWARD(0.2), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor, 0, y_tensor), TENSOR_LIST(tdx_tensor), 0); + REQUIRE_TENSOR_EQ(tdx_tensor, dx_tensor, "backward pass should match"); + ccv_nnc_tensor_free(x_tensor); + ccv_nnc_tensor_free(y_tensor); + ccv_nnc_tensor_free(dx_tensor); + ccv_nnc_tensor_free(dy_tensor); + ccv_nnc_tensor_free(ty_tensor); + ccv_nnc_tensor_free(tdx_tensor); + ccv_nnc_tensor_free(dyt); + ccv_nnc_graph_free(graph); + ccv_nnc_tensor_arena_free(tensor_arena); + ccv_nnc_graph_exec_arena_free(graph_exec_arena); + ccv_nnc_symbolic_graph_free(symbolic_graph); +} + #include "case_main.h"