From 3c117b35f152bcc0e1e33f8bbcae1ed255de4374 Mon Sep 17 00:00:00 2001 From: Philippe Guibert Date: Tue, 21 May 2024 21:07:35 +0200 Subject: [PATCH] zebra: separate nht notifications from dplane_result thread There is a CPU issue in ZEBRA when BGP installs and removes a lot of routes at the same time. The vtysh and shell become unreachable. This is the case of BGP failover scenarios with two peers, and one of the peers becoming unreachable. For each route change, it appears that nexthop tracking is called to check impact about a new route (un)availability. Two observations are done: - In the case of a specific route change, if a bigger route (or a default route is present like it is in the setup) exists, then nexthop tracking is called. there is no need to call nexthop tracking for the same default prefix, knowing that the dplane_result thread handled bulks of routes at the same time. - The first picture from the below link indicates nexthop tracking consumes time, and maintaining this activity in the zebra main thread will still result in STARVATION messages. Propose to separate the nht notifications from the dplane_result thread by creating a queue list that will store the prefixes to evaluate against nexthop tracking. Before enqueuing it, a check is done if the same prefix has not been called before. The processing is done in a separate 'rib_process_nht_thread_loop' function call. Link: https://github.com/FRRouting/frr/pull/16028 Signed-off-by: Philippe Guibert --- zebra/rib.h | 4 +- zebra/zebra_rib.c | 112 +++++++++++++++++++++++++++++++++++++++++++--- zebra/zebra_vty.c | 10 +++++ 3 files changed, 118 insertions(+), 8 deletions(-) diff --git a/zebra/rib.h b/zebra/rib.h index a721f4bac456..426a08658cd9 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -475,7 +475,8 @@ extern struct route_table *rib_tables_iter_next(rib_tables_iter_t *iter); extern uint8_t route_distance(int type); extern void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq, - bool rt_delete); + bool rt_delete, bool enqueue_to_list); +extern void rib_process_nht_thread_loop(struct event *event); /* * rib_find_rn_from_ctx @@ -628,6 +629,7 @@ extern int rib_add_gr_run(afi_t afi, vrf_id_t vrf_id, uint8_t proto, uint8_t instance); extern void zebra_vty_init(void); +extern void zebra_rnh_job_list_display(struct vty *vty); extern pid_t pid; diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 0902f391b35a..6ad7fabf508a 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -56,6 +56,7 @@ DEFINE_MTYPE(ZEBRA, RE, "Route Entry"); DEFINE_MTYPE_STATIC(ZEBRA, RIB_DEST, "RIB destination"); DEFINE_MTYPE_STATIC(ZEBRA, RIB_UPDATE_CTX, "Rib update context object"); DEFINE_MTYPE_STATIC(ZEBRA, WQ_WRAPPER, "WQ wrapper"); +DEFINE_MTYPE_STATIC(ZEBRA, RNH_JOB_CTX, "Rnh Job context"); /* * Event, list, and mutex for delivery of dataplane results @@ -821,11 +822,80 @@ static int rib_can_delete_dest(rib_dest_t *dest) return 1; } +PREDECL_DLIST(zebra_rnh_job_list); +struct zebra_rnh_job_list_head zebra_rnh_list; +struct zebra_rnh_job_ctx { + vrf_id_t vrf_id; + struct prefix prefix; + safi_t safi; + /* Embedded list linkage */ + struct zebra_rnh_job_list_item rnh_entries; +}; +DECLARE_DLIST(zebra_rnh_job_list, struct zebra_rnh_job_ctx, rnh_entries); +static uint32_t zebra_rnh_job_list_num; +static uint32_t zebra_rnh_job_list_dup; +static uint32_t zebra_rnh_job_list_processed; +static uint32_t zebra_rnh_job_list_max_batch; +static struct event *t_zebra_rnh_job_list; + +void zebra_rnh_job_list_display(struct vty *vty) +{ + vty_out(vty, + "RIB route evaluation count %u, dup %u, processed %u, max per batch %u\n", + zebra_rnh_job_list_num, zebra_rnh_job_list_dup, + zebra_rnh_job_list_processed, zebra_rnh_job_list_max_batch); +} + +void rib_process_nht_thread_loop(struct event *event) +{ + struct zebra_rnh_job_list_head ctxlist; + struct zebra_rnh_job_ctx *ctx; + struct zebra_vrf *zvrf; + uint32_t count = 0; + + do { + zebra_rnh_job_list_init(&ctxlist); + + /* Dequeue list of context structs */ + while ((ctx = zebra_rnh_job_list_pop(&zebra_rnh_list)) != NULL) + zebra_rnh_job_list_add_tail(&ctxlist, ctx); + + /* Dequeue context block */ + ctx = zebra_rnh_job_list_pop(&ctxlist); + /* If we've emptied the results queue, we're done */ + if (ctx == NULL) + break; + while (ctx) { + zvrf = zebra_vrf_lookup_by_id(ctx->vrf_id); + if (zvrf) { + zebra_rnh_job_list_processed++; + count++; + zebra_evaluate_rnh(zvrf, + family2afi(ctx->prefix.family), + 0, &ctx->prefix, ctx->safi); + } + XFREE(MTYPE_RNH_JOB_CTX, ctx); + ctx = zebra_rnh_job_list_pop(&ctxlist); + } + } while (1); + + if (count > zebra_rnh_job_list_max_batch) + zebra_rnh_job_list_max_batch = count; +} + +static void rib_process_nht(void) +{ + event_add_timer_msec(zrouter.master, rib_process_nht_thread_loop, NULL, + 5, &t_zebra_rnh_job_list); +} + void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq, - bool rt_delete) + bool rt_delete, bool enqueue_to_list) { rib_dest_t *dest = rib_dest_from_rnode(rn); struct rnh *rnh; + struct zebra_rnh_job_ctx *ctx; + bool found; /* * We are storing the rnh's associated withb @@ -892,8 +962,31 @@ void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq, } rnh->seqno = seq; - zebra_evaluate_rnh(zvrf, family2afi(p->family), 0, p, - rnh->safi); + if (enqueue_to_list) { + zebra_rnh_job_list_num++; + found = false; + frr_each_safe (zebra_rnh_job_list, + &zebra_rnh_list, ctx) { + if (rnh->safi == ctx->safi && + zvrf->vrf->vrf_id == ctx->vrf_id && + prefix_same(&ctx->prefix, p)) { + found = true; + zebra_rnh_job_list_dup++; + break; + } + } + if (!found) { + ctx = XCALLOC(MTYPE_RNH_JOB_CTX, + sizeof(struct zebra_rnh_job_ctx)); + ctx->vrf_id = zvrf->vrf->vrf_id; + ctx->safi = rnh->safi; + prefix_copy(&ctx->prefix, p); + zebra_rnh_job_list_add_tail(&zebra_rnh_list, + ctx); + } + } else + zebra_evaluate_rnh(zvrf, family2afi(p->family), + 0, p, rnh->safi); } rn = rn->parent; @@ -929,7 +1022,7 @@ int rib_gc_dest(struct route_node *rn) } zebra_rib_evaluate_rn_nexthops(rn, zebra_router_get_next_sequence(), - true); + true, false); dest->rnode = NULL; rnh_list_fini(&dest->nht); @@ -2000,7 +2093,7 @@ static void zebra_rib_evaluate_prefix_nhg(struct hash_bucket *b, void *data) redistribute_update(rn, re, re); zebra_rib_evaluate_rn_nexthops(rn, zebra_router_get_next_sequence(), - false); + false, false); zebra_rib_evaluate_mpls(rn); } } @@ -2331,7 +2424,7 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) zebra_rib_fixup_system(rn); } - zebra_rib_evaluate_rn_nexthops(rn, seq, rt_delete); + zebra_rib_evaluate_rn_nexthops(rn, seq, rt_delete, true); zebra_rib_evaluate_mpls(rn); done: @@ -2591,7 +2684,7 @@ static void rib_process_dplane_notify(struct zebra_dplane_ctx *ctx) /* Make any changes visible for lsp and nexthop-tracking processing */ zebra_rib_evaluate_rn_nexthops(rn, zebra_router_get_next_sequence(), - false); + false, false); zebra_rib_evaluate_mpls(rn); @@ -5104,6 +5197,8 @@ static void rib_process_dplane_results(struct event *thread) } while (1); + rib_process_nht(); + #ifdef HAVE_SCRIPTING if (fs) frrscript_delete(fs); @@ -5156,6 +5251,8 @@ void zebra_rib_init(void) { check_route_info(); + zebra_rnh_job_list_init(&zebra_rnh_list); + rib_queue_init(); /* Init dataplane, and register for results */ @@ -5169,6 +5266,7 @@ void zebra_rib_terminate(void) struct zebra_dplane_ctx *ctx; EVENT_OFF(t_dplane); + EVENT_OFF(t_zebra_rnh_job_list); ctx = dplane_ctx_dequeue(&rib_dplane_q); while (ctx) { diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index 44720754ba8b..95ecb5ce76c0 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -4086,6 +4086,15 @@ DEFUN (show_dataplane, return dplane_show_helper(vty, detailed); } +/* Display dataplane info */ +DEFUN(show_rib_info, show_rib_info_cmd, "show rib info", + SHOW_STR "RIB information\n" + "RIB information\n") +{ + zebra_rnh_job_list_display(vty); + return CMD_SUCCESS; +} + /* Display dataplane providers info */ DEFUN (show_dataplane_providers, show_dataplane_providers_cmd, @@ -4463,6 +4472,7 @@ void zebra_vty_init(void) install_element(CONFIG_NODE, &zebra_dplane_queue_limit_cmd); install_element(CONFIG_NODE, &no_zebra_dplane_queue_limit_cmd); + install_element(VIEW_NODE, &show_rib_info_cmd); #ifdef HAVE_NETLINK install_element(CONFIG_NODE, &zebra_kernel_netlink_batch_tx_buf_cmd); install_element(CONFIG_NODE, &no_zebra_kernel_netlink_batch_tx_buf_cmd);