From 11ef862d88c1616e0ebad71ccd70fc6bf73906c2 Mon Sep 17 00:00:00 2001 From: Philippe Guibert Date: Wed, 22 May 2024 13:53:15 +0200 Subject: [PATCH] zebra: handle route owner notifications from dplane_result thread There is a CPU issue in ZEBRA when BGP installs and removes a lot of routes at the same time. The vtysh and shell become unreachable. This is the case of BGP failover scenarios with two peers, and one of the peers becoming unreachable. For each route change, some route notifications may be called. This taks is cpu-intensive, and will perform I/O operations when calling the ZAPI socket to notify daemon of the route (un)install success or failure. The second picture of the below link illustrates it. In order to reduce the time taken in the dplane_result thread, propose to separate the nht notifications from the dplane_result thread by creating a queue list that will store the routes and the client that requested to (un)install the route. The processing is done in a separate 'zebra_route_process_notify_thread_loop()' function call. Link: https://github.com/FRRouting/frr/pull/16028 Signed-off-by: Philippe Guibert --- zebra/rib.h | 6 +++ zebra/zapi_msg.c | 53 ++++++++++++-------- zebra/zapi_msg.h | 9 +++- zebra/zebra_rib.c | 122 +++++++++++++++++++++++++++++++++++++++++++--- zebra/zebra_vty.c | 1 + 5 files changed, 162 insertions(+), 29 deletions(-) diff --git a/zebra/rib.h b/zebra/rib.h index 426a08658cd9..77f0eda7b6d6 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -22,6 +22,7 @@ #include "mpls.h" #include "srcdest_table.h" #include "zebra/zebra_nhg.h" +#include "zclient.h" #ifdef __cplusplus extern "C" { @@ -630,6 +631,11 @@ extern int rib_add_gr_run(afi_t afi, vrf_id_t vrf_id, uint8_t proto, extern void zebra_vty_init(void); extern void zebra_rnh_job_list_display(struct vty *vty); +extern void zebra_route_notify_job_owner_list_display(struct vty *vty); +extern void +zebra_route_notify_job_owner_list_enqueue(struct route_node *rn, + const struct zebra_dplane_ctx *ctx, + enum zapi_route_notify_owner note); extern pid_t pid; diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index 197d75d59400..1ca1eecd9ad4 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -723,7 +723,7 @@ int zsend_nhg_notify(uint16_t type, uint16_t instance, uint32_t session_id, * Common utility send route notification, called from a path using a * route_entry and from a path using a dataplane context. */ -static int route_notify_internal(const struct route_node *rn, int type, +int route_notify_internal_prefix(const struct prefix *p, int type, uint16_t instance, vrf_id_t vrf_id, uint32_t table_id, enum zapi_route_notify_owner note, afi_t afi, @@ -736,17 +736,15 @@ static int route_notify_internal(const struct route_node *rn, int type, client = zserv_find_client(type, instance); if (!client || !client->notify_owner) { if (IS_ZEBRA_DEBUG_PACKET) - zlog_debug( - "Not Notifying Owner: %s about prefix %pRN(%u) %d vrf: %u", - zebra_route_string(type), rn, table_id, note, - vrf_id); + zlog_debug("Not Notifying Owner: %s about prefix %pFX(%u) %d vrf: %u", + zebra_route_string(type), p, table_id, note, + vrf_id); return 0; } if (IS_ZEBRA_DEBUG_PACKET) - zlog_debug( - "Notifying Owner: %s about prefix %pRN(%u) %d vrf: %u", - zebra_route_string(type), rn, table_id, note, vrf_id); + zlog_debug("Notifying Owner: %s about prefix %pFX(%u) %d vrf: %u", + zebra_route_string(type), p, table_id, note, vrf_id); /* We're just allocating a small-ish buffer here, since we only * encode a small amount of data. @@ -759,11 +757,11 @@ static int route_notify_internal(const struct route_node *rn, int type, stream_put(s, ¬e, sizeof(note)); - stream_putc(s, rn->p.family); + stream_putc(s, p->family); - blen = prefix_blen(&rn->p); - stream_putc(s, rn->p.prefixlen); - stream_put(s, &rn->p.u.prefix, blen); + blen = prefix_blen(p); + stream_putc(s, p->prefixlen); + stream_put(s, &p->u.prefix, blen); stream_putl(s, table_id); @@ -776,6 +774,16 @@ static int route_notify_internal(const struct route_node *rn, int type, return zserv_send_message(client, s); } +static int route_notify_internal(const struct route_node *rn, int type, + uint16_t instance, vrf_id_t vrf_id, + uint32_t table_id, + enum zapi_route_notify_owner note, afi_t afi, + safi_t safi) +{ + return route_notify_internal_prefix(&rn->p, type, instance, vrf_id, + table_id, note, afi, safi); +} + int zsend_route_notify_owner(const struct route_node *rn, struct route_entry *re, enum zapi_route_notify_owner note, afi_t afi, @@ -789,16 +797,23 @@ int zsend_route_notify_owner(const struct route_node *rn, * Route-owner notification using info from dataplane update context. */ int zsend_route_notify_owner_ctx(const struct zebra_dplane_ctx *ctx, - enum zapi_route_notify_owner note) + enum zapi_route_notify_owner note, + bool enqueue_to_list) { - int result; + int result = 0; struct route_node *rn = rib_find_rn_from_ctx(ctx); - result = route_notify_internal( - rn, dplane_ctx_get_type(ctx), dplane_ctx_get_instance(ctx), - dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), note, - dplane_ctx_get_afi(ctx), dplane_ctx_get_safi(ctx)); - + if (enqueue_to_list) { + zebra_route_notify_job_owner_list_enqueue(rn, ctx, note); + } else { + rn = rib_find_rn_from_ctx(ctx); + result = route_notify_internal(rn, dplane_ctx_get_type(ctx), + dplane_ctx_get_instance(ctx), + dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), note, + dplane_ctx_get_afi(ctx), + dplane_ctx_get_safi(ctx)); + } route_unlock_node(rn); return result; diff --git a/zebra/zapi_msg.h b/zebra/zapi_msg.h index 43f734d26e6d..51e7ddb8a062 100644 --- a/zebra/zapi_msg.h +++ b/zebra/zapi_msg.h @@ -66,7 +66,8 @@ extern int zsend_route_notify_owner(const struct route_node *rn, enum zapi_route_notify_owner note, afi_t afi, safi_t safi); extern int zsend_route_notify_owner_ctx(const struct zebra_dplane_ctx *ctx, - enum zapi_route_notify_owner note); + enum zapi_route_notify_owner note, + bool enqueue_to_list); extern void zsend_rule_notify_owner(const struct zebra_dplane_ctx *ctx, enum zapi_rule_notify_owner note); @@ -109,7 +110,11 @@ extern int zsend_zebra_srv6_locator_delete(struct zserv *client, struct srv6_locator *loc); extern int zsend_srv6_manager_get_locator_chunk_response(struct zserv *client, vrf_id_t vrf_id, struct srv6_locator *loc); - +extern int route_notify_internal_prefix(const struct prefix *p, int type, + uint16_t instance, vrf_id_t vrf_id, + uint32_t table_id, + enum zapi_route_notify_owner note, + afi_t afi, safi_t safi); #ifdef __cplusplus } #endif diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 6ad7fabf508a..0c2890df4089 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -57,6 +57,7 @@ DEFINE_MTYPE_STATIC(ZEBRA, RIB_DEST, "RIB destination"); DEFINE_MTYPE_STATIC(ZEBRA, RIB_UPDATE_CTX, "Rib update context object"); DEFINE_MTYPE_STATIC(ZEBRA, WQ_WRAPPER, "WQ wrapper"); DEFINE_MTYPE_STATIC(ZEBRA, RNH_JOB_CTX, "Rnh Job context"); +DEFINE_MTYPE_STATIC(ZEBRA, ROUTE_NOTIFY_JOB_OWNER_CTX, "Route Notify Job Owner"); /* * Event, list, and mutex for delivery of dataplane results @@ -2329,20 +2330,24 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) /* Notify route owner */ if (zebra_router_notify_on_ack()) - zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_INSTALLED); + zsend_route_notify_owner_ctx(ctx, + ZAPI_ROUTE_INSTALLED, + true); else { if (re) { if (CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOADED)) zsend_route_notify_owner_ctx( ctx, - ZAPI_ROUTE_INSTALLED); + ZAPI_ROUTE_INSTALLED, + true); if (CHECK_FLAG( re->flags, ZEBRA_FLAG_OFFLOAD_FAILED)) zsend_route_notify_owner_ctx( ctx, - ZAPI_ROUTE_FAIL_INSTALL); + ZAPI_ROUTE_FAIL_INSTALL, + true); } } } else { @@ -2397,15 +2402,16 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) p_nhg_p); } } - zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_REMOVED); + zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_REMOVED, + true); if (zvrf) zvrf->removals++; } else { if (re) SET_FLAG(re->status, ROUTE_ENTRY_FAILED); - zsend_route_notify_owner_ctx(ctx, - ZAPI_ROUTE_REMOVE_FAIL); + zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_REMOVE_FAIL, + true); zlog_warn("%s(%u:%u):%pRN: Route Deletion failure", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx), @@ -2676,10 +2682,12 @@ static void rib_process_dplane_notify(struct zebra_dplane_ctx *ctx) if (!zebra_router_notify_on_ack()) { if (CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOADED)) - zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_INSTALLED); + zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_INSTALLED, + false); if (CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOAD_FAILED)) zsend_route_notify_owner_ctx(ctx, - ZAPI_ROUTE_FAIL_INSTALL); + ZAPI_ROUTE_FAIL_INSTALL, + false); } /* Make any changes visible for lsp and nexthop-tracking processing */ @@ -4994,6 +5002,101 @@ void rib_close_table(struct route_table *table) } } +PREDECL_DLIST(zebra_route_notify_job_owner_list); +struct zebra_route_notify_job_owner_list_head zebra_route_notify_owner_list; +struct zebra_route_notify_job_owner_ctx { + vrf_id_t vrf_id; + struct prefix prefix; + int type; + uint16_t instance; + uint32_t table; + afi_t afi; + safi_t safi; + enum zapi_route_notify_owner note; + /* Embedded list linkage */ + struct zebra_route_notify_job_owner_list_item rno_entries; +}; +DECLARE_DLIST(zebra_route_notify_job_owner_list, + struct zebra_route_notify_job_owner_ctx, rno_entries); +static uint32_t zebra_route_notify_job_owner_list_num; +static uint32_t zebra_route_notify_job_owner_list_processed; +static uint32_t zebra_route_notify_job_owner_list_max_batch; +static struct event *t_zebra_route_notify_job_owner_list; + +void zebra_route_notify_job_owner_list_enqueue( + struct route_node *rn, const struct zebra_dplane_ctx *ctx, + enum zapi_route_notify_owner note) +{ + struct zebra_route_notify_job_owner_ctx *entry; + + entry = XCALLOC(MTYPE_ROUTE_NOTIFY_JOB_OWNER_CTX, + sizeof(struct zebra_route_notify_job_owner_ctx)); + entry->vrf_id = dplane_ctx_get_vrf(ctx); + entry->afi = dplane_ctx_get_afi(ctx); + entry->safi = dplane_ctx_get_safi(ctx); + entry->table = dplane_ctx_get_table(ctx); + entry->instance = dplane_ctx_get_instance(ctx); + entry->type = dplane_ctx_get_type(ctx); + entry->note = note; + prefix_copy(&entry->prefix, &rn->p); + zebra_route_notify_job_owner_list_num++; + zebra_route_notify_job_owner_list_add_tail(&zebra_route_notify_owner_list, + entry); +} + +void zebra_route_notify_job_owner_list_display(struct vty *vty) +{ + vty_out(vty, + "Route notify owner list count %u, processed %u, max per batch %u\n", + zebra_route_notify_job_owner_list_num, + zebra_route_notify_job_owner_list_processed, + zebra_route_notify_job_owner_list_max_batch); +} + +static void zebra_route_process_notify_thread_loop(struct event *event) +{ + struct zebra_route_notify_job_owner_list_head ctxlist; + struct zebra_route_notify_job_owner_ctx *ctx; + uint32_t count = 0; + + do { + zebra_route_notify_job_owner_list_init(&ctxlist); + + /* Dequeue list of context structs */ + while ((ctx = zebra_route_notify_job_owner_list_pop( + &zebra_route_notify_owner_list)) != NULL) + zebra_route_notify_job_owner_list_add_tail(&ctxlist, + ctx); + + /* Dequeue context block */ + ctx = zebra_route_notify_job_owner_list_pop(&ctxlist); + /* If we've emptied the results queue, we're done */ + if (ctx == NULL) + break; + while (ctx) { + zebra_route_notify_job_owner_list_processed++; + count++; + route_notify_internal_prefix(&ctx->prefix, ctx->type, + ctx->instance, ctx->vrf_id, + ctx->table, ctx->note, + ctx->afi, ctx->safi); + XFREE(MTYPE_ROUTE_NOTIFY_JOB_OWNER_CTX, ctx); + ctx = zebra_route_notify_job_owner_list_pop(&ctxlist); + } + } while (1); + + if (count > zebra_route_notify_job_owner_list_max_batch) { + zebra_route_notify_job_owner_list_max_batch = count; + } +} + +static void zebra_route_process_notify(void) +{ + event_add_timer_msec(zrouter.master, + zebra_route_process_notify_thread_loop, NULL, 5, + &t_zebra_route_notify_job_owner_list); +} + /* * Handler for async dataplane results after a pseudowire installation */ @@ -5197,6 +5300,7 @@ static void rib_process_dplane_results(struct event *thread) } while (1); + zebra_route_process_notify(); rib_process_nht(); #ifdef HAVE_SCRIPTING @@ -5252,6 +5356,7 @@ void zebra_rib_init(void) check_route_info(); zebra_rnh_job_list_init(&zebra_rnh_list); + zebra_route_notify_job_owner_list_init(&zebra_route_notify_owner_list); rib_queue_init(); @@ -5267,6 +5372,7 @@ void zebra_rib_terminate(void) EVENT_OFF(t_dplane); EVENT_OFF(t_zebra_rnh_job_list); + EVENT_OFF(t_zebra_route_notify_job_owner_list); ctx = dplane_ctx_dequeue(&rib_dplane_q); while (ctx) { diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index 95ecb5ce76c0..275ece0dbba3 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -4092,6 +4092,7 @@ DEFUN(show_rib_info, show_rib_info_cmd, "show rib info", "RIB information\n") { zebra_rnh_job_list_display(vty); + zebra_route_notify_job_owner_list_display(vty); return CMD_SUCCESS; }