From 3f80cae33e3856239a3ba38ca1af5b399759d205 Mon Sep 17 00:00:00 2001 From: Liu Liu Date: Fri, 11 Aug 2023 13:05:14 -0400 Subject: [PATCH] Update to speed up exec_dep table maintenance. --- lib/ccv.h | 8 +++ lib/ccv_util.c | 76 ++++++++++++++++++++++++ lib/nnc/ccv_nnc_symbolic_graph_compile.c | 26 ++++---- 3 files changed, 99 insertions(+), 11 deletions(-) diff --git a/lib/ccv.h b/lib/ccv.h index 89ed0485d..b4b17e7b1 100644 --- a/lib/ccv.h +++ b/lib/ccv.h @@ -781,6 +781,14 @@ ccv_numeric_data_t ccv_get_sparse_matrix_cell(const ccv_sparse_matrix_t* mat, in * @param data The data pointer. */ void ccv_set_sparse_matrix_cell(ccv_sparse_matrix_t* mat, int row, int col, const void* data); +/** + * Set cell for a vector of a sparse matrix. + * @param mat The sparse matrix. + * @param vector The vector. + * @param index The index of the cell. + * @param data The data pointer. + */ +void ccv_set_sparse_matrix_cell_from_vector(ccv_sparse_matrix_t* mat, ccv_sparse_matrix_vector_t* const vector, int vidx, const void* data); /** * Transform a sparse matrix into compressed representation. * @param mat The sparse matrix. diff --git a/lib/ccv_util.c b/lib/ccv_util.c index 69da2fd76..8b379ac40 100644 --- a/lib/ccv_util.c +++ b/lib/ccv_util.c @@ -896,6 +896,82 @@ static inline void _ccv_move_sparse_matrix_cell(uint8_t* const index, uint32_t k } } +void ccv_set_sparse_matrix_cell_from_vector(ccv_sparse_matrix_t* mat, ccv_sparse_matrix_vector_t* const vector, int vidx, const void* data) +{ + const size_t cell_size = CCV_GET_DATA_TYPE_SIZE(mat->type) * CCV_GET_CHANNEL(mat->type); + if (mat->type & CCV_DENSE_VECTOR) + { + memcpy(vector->data.u8 + vidx * cell_size, data, cell_size); + return; + } + if ((vector->rnum + 1) * 10llu > vector->size * 9llu) // expand when reached 90%. + _ccv_sparse_matrix_vector_inc_size(mat, vector); + // Align to 4 bytes. + const size_t cell_size_aligned = (CCV_GET_DATA_TYPE_SIZE(mat->type) * CCV_GET_CHANNEL(mat->type) + 3) & -4; + const size_t index_size = sizeof(ccv_sparse_matrix_index_t) + cell_size_aligned; + uint8_t* const index = (uint8_t*)vector->index; + const int prime_index = vector->prime_index; + const uint32_t size = vector->size; + uint32_t idx = _ccv_sparse_matrix_index_for_hash(vidx, prime_index); + uint32_t k = 2; + for (; k < 255; ++idx, ++k) + { + if (idx >= size) + idx = 0; + ccv_sparse_matrix_index_t* const index_idx = (ccv_sparse_matrix_index_t*)(index + index_size * idx); + uint32_t j = index_idx->ifbit; + if (k > j) + { + ++vector->rnum; + index_idx->ifbit = k; + if (!j) + { + index_idx->i = vidx; + // Assign it out. + memcpy(index_idx + 1, data, cell_size); + } else { + _ccv_move_sparse_matrix_cell(index, j /* This is j not k because we are replacing it. */, idx, vidx, size, prime_index, index_size, cell_size_aligned); + memcpy(index_idx + 1, data, cell_size); + } + return; + } + if (index_idx->i == vidx) + { + memcpy(index_idx + 1, data, cell_size); + return; + } + } + // Above or equal to 255, we need to fetch the key to recompute the distance every time now. + for (;; ++idx, ++k) + { + if (idx >= size) + idx = 0; + ccv_sparse_matrix_index_t* const index_idx = (ccv_sparse_matrix_index_t*)(index + index_size * idx); + uint32_t j = index_idx->ifbit; + if (j == 0xff) + j = _ccv_sparse_matrix_index_for_hash(index_idx->i + size - idx, prime_index) + 2; + if (k > j) + { + ++vector->rnum; + index_idx->ifbit = k > 0xff ? 0xff : k; + if (!j) + { + index_idx->i = vidx; + memcpy(index_idx + 1, data, cell_size); + } else { + _ccv_move_sparse_matrix_cell(index, j /* This is j not k because we are replacing it. */, idx, vidx, size, prime_index, index_size, cell_size_aligned); + memcpy(index_idx + 1, data, cell_size); + } + return; + } + if (index_idx->i == vidx) + { + memcpy(index_idx + 1, data, cell_size); + return; + } + } +} + void ccv_set_sparse_matrix_cell(ccv_sparse_matrix_t* mat, int row, int col, const void* data) { assert(data); diff --git a/lib/nnc/ccv_nnc_symbolic_graph_compile.c b/lib/nnc/ccv_nnc_symbolic_graph_compile.c index 683196e7e..c2cedd66e 100644 --- a/lib/nnc/ccv_nnc_symbolic_graph_compile.c +++ b/lib/nnc/ccv_nnc_symbolic_graph_compile.c @@ -2017,7 +2017,7 @@ static void _ccv_nnc_exec_dep_and_tensor_blocks_prep(const ccv_nnc_symbolic_grap ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx); if (vector) CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block); - if (!node->outgoings) + if (!node->outgoings || term) continue; for (i = 0; i < node->outgoings->rnum; i++) { @@ -2026,18 +2026,22 @@ static void _ccv_nnc_exec_dep_and_tensor_blocks_prep(const ccv_nnc_symbolic_grap ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, idx); /* If not found, set, if the current node is the destination node, no need * set itself as parent of subsequent nodes because its terminal nature. */ - if (!term && (!cell.i32 || cell.i32[0] == 0)) + if (!cell.i32 || cell.i32[0] == 0) ccv_set_sparse_matrix_cell(exec_dep, outgoing, idx, &one); - for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */ + if (buf_size > 0) { - ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2]); - /* If not found, set */ - if (!cell.i32 || cell.i32[0] == 0) - ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &buf[j * 2 + 1]); - else { - /* Otherwise, set to the longest one */ - int32_t dep = ccv_max(cell.i32[0], buf[j * 2 + 1]); - ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &dep); + ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, outgoing); + for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */ + { + ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell_from_vector(exec_dep, vector, buf[j * 2]); + /* If not found, set */ + if (!cell.i32 || cell.i32[0] == 0) + ccv_set_sparse_matrix_cell_from_vector(exec_dep, vector, buf[j * 2], &buf[j * 2 + 1]); + else { + /* Otherwise, set to the longest one */ + int32_t dep = ccv_max(cell.i32[0], buf[j * 2 + 1]); + ccv_set_sparse_matrix_cell_from_vector(exec_dep, vector, buf[j * 2], &dep); + } } } }