From 29e9bf47160072c51fe9d1b0a429c5cbd901df3f Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Fri, 27 Sep 2024 20:11:39 +1200 Subject: [PATCH 01/13] update that pooling situation to use a fixed quantity of pools and stochastically assign them --- src/clj/web/lobby.clj | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/clj/web/lobby.clj b/src/clj/web/lobby.clj index 595178df33..c9af7e91a4 100644 --- a/src/clj/web/lobby.clj +++ b/src/clj/web/lobby.clj @@ -20,12 +20,32 @@ (read-write/print-time-literals-clj!) -(defonce lobby-pool (cp/threadpool 1 {:name "lobbies-thread"})) -(defn lobby-thread [& body] - (cp/future lobby-pool body)) +;; Oracle guidance for active threads is ~cores+2 +(defonce pool-size (+ 2 (.availableProcessors (Runtime/getRuntime)))) + +(defonce game-pools + (letfn [(new-pool [x] + {:pool (cp/threadpool 1 {:name (str "game-thread-" x)}) + :occupants (atom 0)})] ;; note that occupants is just for load balancing purposes + (vec (map new-pool (range pool-size))))) -(defn game-thread [lobby & body] - (cp/future (:pool lobby) body)) +(defn pool-occupants-info [] (map #(deref (:occupants %)) game-pools)) + +(defn join-pool! + "Returns one of the pools at random with the least occupants. Updates pool occupancy" + [] + (let [pool (first (sort-by #(deref (:occupants %)) (shuffle game-pools)))] + (swap! (:occupants pool) inc) + pool)) + +(defn leave-pool! + "Leaves a pool. This just decrements the occupants, so we can reassign it smartly" + [pool] + (swap! (:occupants pool) dec)) + +(defonce lobby-pool (cp/threadpool 1 {:name "lobbies-thread"})) +(defn lobby-thread [& body] (cp/future lobby-pool body)) +(defn game-thread [lobby & body] (cp/future (get-in lobby [:pool :pool]) body)) (defn validate-precon [format client-precon client-gateway-type] @@ -56,7 +76,7 @@ :corp-spectators [] :runner-spectators [] :messages [] - :pool (cp/threadpool 1 {:name (str "game-" gameid)}) ;; each lobby can have it's own thread + :pool (join-pool!) ;; options :precon (validate-precon format precon gateway-type) :allow-spectator allow-spectator @@ -333,7 +353,7 @@ (defn close-lobby! "Closes the given game lobby, booting all players and updating stats." ([db lobby] (close-lobby! db lobby nil)) - ([db {:keys [gameid players started on-close] :as lobby} skip-on-close] + ([db {:keys [gameid players pool started on-close] :as lobby} skip-on-close] (when started (stats/game-finished db lobby) (stats/update-deck-stats db lobby) @@ -342,7 +362,7 @@ (swap! app-state/app-state update :lobbies dissoc gameid) (doseq [uid (keep :uid (get-players-and-spectators lobby))] (clear-lobby-state uid)) - (cp/shutdown (:pool lobby)) + (leave-pool! pool) (when (and (not skip-on-close) on-close) (on-close lobby)))) From 3b51a3481abfe62917a0841aa3f19a2abf37f631 Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Fri, 27 Sep 2024 20:12:00 +1200 Subject: [PATCH 02/13] auto-thread ws messages so they don't block --- src/clj/web/system.clj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/clj/web/system.clj b/src/clj/web/system.clj index 0536b6fced..27b103ae7e 100644 --- a/src/clj/web/system.clj +++ b/src/clj/web/system.clj @@ -119,7 +119,8 @@ (defmethod ig/init-key :sente/router [_ _opts] (sente/start-server-chsk-router! ch-chsk - event-msg-handler)) + event-msg-handler + {:simple-auto-threading? true})) (defmethod ig/halt-key! :sente/router [_ stop-fn] (when (fn? stop-fn) From a03602d02a3632e0620d1676ec050d03f71d285d Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Fri, 27 Sep 2024 20:12:23 +1200 Subject: [PATCH 03/13] added telemetry for thread pool occupancy --- src/clj/web/telemetry.clj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/clj/web/telemetry.clj b/src/clj/web/telemetry.clj index 8cfc1367a5..f63dc5c657 100644 --- a/src/clj/web/telemetry.clj +++ b/src/clj/web/telemetry.clj @@ -6,7 +6,7 @@ [cljc.java-time.instant :as inst] [game.core.board :refer [all-active]] [web.app-state :refer [app-state]] - [web.lobby :refer [lobby-update-uids]] + [web.lobby :refer [lobby-update-uids pool-occupants-info]] [web.ws :refer [connected-sockets connections_]] [taoensso.encore :as enc] [taoensso.timbre :as timbre])) @@ -95,6 +95,7 @@ " uid: " ws-uid-count " conn: " ws-conn-total " }")) + (timbre/info (str "pool occupants: " (seq (pool-occupants-info)))) ;; TODO - once we've got this set up on the server, wrap it in a try/catch - only ever display ;; the warning once! (timbre/info (str "Active Cards (across all lobbies): " card-freqs)) From ab08f52268d1283ca91db77defbf55ae69621ddc Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Sat, 28 Sep 2024 16:45:23 +1200 Subject: [PATCH 04/13] attach timestamp to ws messages before processing --- src/clj/web/ws.clj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/clj/web/ws.clj b/src/clj/web/ws.clj index bfef021b98..8a1860066a 100644 --- a/src/clj/web/ws.clj +++ b/src/clj/web/ws.clj @@ -1,5 +1,6 @@ (ns web.ws (:require + [cljc.java-time.instant :as inst] [clojure.core.async :refer [! chan go timeout]] [web.app-state :refer [register-user! deregister-user!]] [web.user :refer [active-user?]] @@ -91,7 +92,7 @@ "Wraps `-msg-handler` with logging, error catching, etc." [event] (try - (-msg-handler event) + (-msg-handler (assoc event :timestamp (inst/now))) (catch Exception e (println "Caught an error in the message handler") (println (.printStackTrace e))))) From fcceeab0a5b0ac309c9d428db993a61397564922 Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Sat, 28 Sep 2024 16:48:03 +1200 Subject: [PATCH 05/13] log endpoint processing times and output them with our telemetry --- src/clj/web/chat.clj | 22 +++++--- src/clj/web/decks.clj | 7 ++- src/clj/web/game.clj | 77 +++++++++++++++++++-------- src/clj/web/lobby.clj | 106 +++++++++++++++++++++++++++++--------- src/clj/web/telemetry.clj | 10 +++- 5 files changed, 167 insertions(+), 55 deletions(-) diff --git a/src/clj/web/chat.clj b/src/clj/web/chat.clj index 5d6357a3a5..d3143e3e83 100644 --- a/src/clj/web/chat.clj +++ b/src/clj/web/chat.clj @@ -5,6 +5,7 @@ [monger.collection :as mc] [monger.query :as q] [web.app-state :as app-state] + [web.lobby :as lobby] [web.mongodb :refer [->object-id]] [web.user :refer [active-user? visible-to-user]] [web.utils :refer [response mongo-time-to-utc-string]] @@ -67,7 +68,9 @@ chat-settings :system/chat user :user} :ring-req uid :uid - {:keys [channel msg]} :?data}] + {:keys [channel msg]} :?data + id :id + timestamp :timestamp}] (when (and (active-user? user) (not (s/blank? msg))) (let [len-valid (<= (count msg) (chat-max-length chat-settings)) @@ -88,13 +91,16 @@ (visible-to-user user {:username uid} connected-users))] (ws/broadcast-to! [uid] :chat/message inserted))) (when uid - (ws/broadcast-to! [uid] :chat/blocked {:reason (if len-valid :rate-exceeded :length-exceeded)})))))) + (ws/broadcast-to! [uid] :chat/blocked {:reason (if len-valid :rate-exceeded :length-exceeded)}))))) + (lobby/log-delay! timestamp id)) (defmethod ws/-msg-handler :chat/delete-msg chat--delete-msg [{{db :system/db {:keys [username isadmin ismoderator] :as user} :user} :ring-req - {:keys [msg]} :?data}] + {:keys [msg]} :?data + id :id + timestamp :timestamp}] (when-let [id (:_id msg)] (when (or isadmin ismoderator) (println username "deleted message" msg "\n") @@ -105,13 +111,16 @@ :date (inst/now) :msg msg}) (doseq [uid (ws/connected-uids)] - (ws/broadcast-to! [uid] :chat/delete-msg msg))))) + (ws/broadcast-to! [uid] :chat/delete-msg msg)))) + (lobby/log-delay! timestamp id)) (defmethod ws/-msg-handler :chat/delete-all chat--delete-all [{{db :system/db {:keys [username isadmin ismoderator]} :user :as user} :ring-req - {:keys [sender]} :?data}] + {:keys [sender]} :?data + id :id + timestamp :timestamp}] (when (and sender (or isadmin ismoderator)) (println username "deleted all messages from user" sender "\n") @@ -122,4 +131,5 @@ :date (inst/now) :sender sender}) (doseq [uid (ws/connected-uids)] - (ws/broadcast-to! [uid] :chat/delete-all {:username sender})))) + (ws/broadcast-to! [uid] :chat/delete-all {:username sender}))) + (lobby/log-delay! timestamp id)) diff --git a/src/clj/web/decks.clj b/src/clj/web/decks.clj index b8cfd152db..f4ddbb6e71 100644 --- a/src/clj/web/decks.clj +++ b/src/clj/web/decks.clj @@ -101,7 +101,9 @@ [{{db :system/db {username :username} :user} :ring-req uid :uid - {:keys [input]} :?data}] + {:keys [input]} :?data + id :id + timestamp :timestamp}] (try (let [deck (nrdb/download-public-decklist db input)] (if (every? #(contains? deck %) [:name :identity :cards]) @@ -116,4 +118,5 @@ (ws/broadcast-to! [uid] :decks/import-success "Imported")) (ws/broadcast-to! [uid] :decks/import-failure "Failed to parse imported deck."))) (catch Exception _ - (ws/broadcast-to! [uid] :decks/import-failure "Failed to import deck.")))) + (ws/broadcast-to! [uid] :decks/import-failure "Failed to import deck."))) + (lobby/log-delay! timestamp id)) diff --git a/src/clj/web/game.clj b/src/clj/web/game.clj index 2949e43618..cf02f71522 100644 --- a/src/clj/web/game.clj +++ b/src/clj/web/game.clj @@ -173,7 +173,9 @@ game--start [{{db :system/db} :ring-req uid :uid - {gameid :gameid} :?data}] + {gameid :gameid} :?data + id :id + timestamp :timestamp}] (lobby/lobby-thread (let [{:keys [players started] :as lobby} (app-state/get-lobby gameid)] (when (and lobby (lobby/first-player? uid lobby) (not started)) @@ -186,14 +188,17 @@ (stats/game-started db lobby?) (lobby/send-lobby-state lobby?) (lobby/broadcast-lobby-list) - (send-state-to-participants :game/start lobby? (diffs/public-states (:state lobby?))))))))) + (send-state-to-participants :game/start lobby? (diffs/public-states (:state lobby?))))))) + (lobby/log-delay! timestamp id))) (defmethod ws/-msg-handler :game/leave game--leave [{{db :system/db user :user} :ring-req uid :uid {gameid :gameid} :?data - ?reply-fn :?reply-fn}] + ?reply-fn :?reply-fn + id :id + timestamp :timestamp}] (lobby/lobby-thread (let [{:keys [started state] :as lobby} (app-state/get-lobby gameid)] (when (and lobby (lobby/in-lobby? uid lobby) started state) @@ -203,7 +208,8 @@ lobby? nil nil (str (:username user) " has left the game."))) (lobby/send-lobby-list uid) (lobby/broadcast-lobby-list) - (when ?reply-fn (?reply-fn true)))))) + (when ?reply-fn (?reply-fn true)))) + (lobby/log-delay! timestamp id))) (defn uid-in-lobby-as-original-player? [uid] (find-first @@ -215,7 +221,9 @@ game--rejoin [{{user :user} :ring-req uid :uid - ?data :?data}] + ?data :?data + id :id + timestamp :timestamp}] (lobby/lobby-thread (let [{:keys [original-players started players] :as lobby} (uid-in-lobby-as-original-player? uid) original-player (find-first #(= uid (:uid %)) original-players)] @@ -226,24 +234,30 @@ lobby? (lobby/join-lobby! user uid ?data nil lobby)] (when lobby? (send-state-to-uid! uid :game/start lobby? (diffs/public-states (:state lobby?))) - (update-and-send-diffs! main/handle-rejoin lobby? user))))))) + (update-and-send-diffs! main/handle-rejoin lobby? user))))) + (lobby/log-delay! timestamp id))) (defmethod ws/-msg-handler :game/concede game--concede [{uid :uid - {gameid :gameid} :?data}] + {gameid :gameid} :?data + id :id + timestamp :timestamp}] (let [lobby (app-state/get-lobby gameid) player (lobby/player? uid lobby)] (lobby/game-thread lobby (when (and lobby player) (let [side (side-from-str (:side player))] - (update-and-send-diffs! main/handle-concede lobby side)))))) + (update-and-send-diffs! main/handle-concede lobby side))) + (lobby/log-delay! timestamp id)))) (defmethod ws/-msg-handler :game/action game--action [{uid :uid - {:keys [gameid command args]} :?data}] + {:keys [gameid command args]} :?data + id :id + timestamp :timestamp}] (try (let [{:keys [state] :as lobby} (app-state/get-lobby gameid) player (lobby/player? uid lobby) @@ -268,7 +282,8 @@ :players (map #(select-keys % [:uid :side]) (:players lobby)) :spectators (map #(select-keys % [:uid]) (:spectators lobby)) :command command - :args args}))))) + :args args}))) + (lobby/log-delay! timestamp id))) (catch Exception e (ws/chsk-send! uid [:game/error]) (println (str "Caught exception" @@ -278,7 +293,9 @@ (defmethod ws/-msg-handler :game/resync game--resync [{uid :uid - {gameid :gameid} :?data}] + {gameid :gameid} :?data + id :id + timestamp :timestamp}] (let [lobby (app-state/get-lobby gameid)] (lobby/game-thread lobby @@ -290,14 +307,17 @@ "\nGameID by ClientID:" gameid "\nClientID:" uid "\nPlayers:" (map #(select-keys % [:uid :side]) (:players lobby)) - "\nSpectators" (map #(select-keys % [:uid]) (:spectators lobby))))))))) + "\nSpectators" (map #(select-keys % [:uid]) (:spectators lobby)))))) + (lobby/log-delay! timestamp id)))) (defmethod ws/-msg-handler :game/watch game--watch [{{user :user} :ring-req uid :uid {:keys [gameid password request-side]} :?data - ?reply-fn :?reply-fn}] + ?reply-fn :?reply-fn + id :id + timestamp :timestamp}] (lobby/lobby-thread (let [lobby (app-state/get-lobby gameid)] (when (and lobby (lobby/allowed-in-lobby user lobby)) @@ -322,13 +342,16 @@ (false? correct-password?) (when ?reply-fn (?reply-fn 403)) :else - (when ?reply-fn (?reply-fn 404)))))))) + (when ?reply-fn (?reply-fn 404)))))) + (lobby/log-delay! timestamp id))) (defmethod ws/-msg-handler :game/mute-spectators game--mute-spectators [{{user :user} :ring-req uid :uid - {gameid :gameid} :?data}] + {gameid :gameid} :?data + id :id + timestamp :timestamp}] (let [new-app-state (swap! app-state/app-state update :lobbies #(-> % (lobby/handle-toggle-spectator-mute gameid uid) (lobby/handle-set-last-update gameid uid))) @@ -340,13 +363,16 @@ lobby? (handle-message-and-send-diffs! lobby? nil nil (str (:username user) " " message " spectators.")) ;; needed to update the status bar - (lobby/send-lobby-state lobby?))))) + (lobby/send-lobby-state lobby?) + (lobby/log-delay! timestamp id))))) (defmethod ws/-msg-handler :game/say game--say [{{user :user} :ring-req uid :uid - {:keys [gameid msg]} :?data}] + {:keys [gameid msg]} :?data + id :id + timestamp :timestamp}] (let [new-app-state (swap! app-state/app-state update :lobbies lobby/handle-set-last-update gameid uid) {:keys [state mute-spectators] :as lobby?} (get-in new-app-state [:lobbies gameid]) side (cond+ @@ -355,25 +381,31 @@ (when (and lobby? state side) (lobby/game-thread lobby? - (handle-message-and-send-diffs! lobby? side user msg))))) + (handle-message-and-send-diffs! lobby? side user msg) + (lobby/log-delay! timestamp id))))) (defmethod ws/-msg-handler :game/typing game--typing [{uid :uid - {:keys [gameid typing]} :?data}] + {:keys [gameid typing]} :?data + id :id + timestamp :timestamp}] (let [{:keys [state players] :as lobby} (app-state/get-lobby gameid)] (lobby/game-thread lobby (when (and state (lobby/player? uid lobby)) (doseq [{:keys [uid]} (remove #(= uid (:uid %)) players)] - (ws/chsk-send! uid [:game/typing typing])))))) + (ws/chsk-send! uid [:game/typing typing]))) + (lobby/log-delay! timestamp id)))) (defmethod ws/-msg-handler :chsk/uidport-close chsk--uidport-close [{{db :system/db user :user} :ring-req uid :uid - ?reply-fn :?reply-fn}] + ?reply-fn :?reply-fn + id :id + timestamp :timestamp}] (lobby/lobby-thread (let [{:keys [started state] :as lobby} (app-state/uid->lobby uid)] (when (and started state) @@ -384,4 +416,5 @@ (lobby/send-lobby-list uid) (lobby/broadcast-lobby-list) (app-state/deregister-user! uid) - (when ?reply-fn (?reply-fn true)))) + (when ?reply-fn (?reply-fn true)) + (lobby/log-delay! timestamp id))) diff --git a/src/clj/web/lobby.clj b/src/clj/web/lobby.clj index c9af7e91a4..6c2d06faee 100644 --- a/src/clj/web/lobby.clj +++ b/src/clj/web/lobby.clj @@ -1,6 +1,8 @@ (ns web.lobby (:require [cljc.java-time.instant :as inst] + [cljc.java-time.duration :as duration] + [cljc.java-time.temporal.chrono-unit :as chrono] [clojure.set :as set] [clojure.string :as str] [com.climate.claypoole :as cp] @@ -20,6 +22,26 @@ (read-write/print-time-literals-clj!) +(defonce telemetry-buckets (atom {})) +(defn log-delay! + [timestamp id] + (let [now (inst/now) + start timestamp + key (or id :unknown) + diff (duration/between start now) + total-ms (quot (duration/get diff chrono/nanos) 1000000) + create-or-update (fn [map] + (if (contains? map key) + (assoc map key (conj (key map) total-ms)) + (assoc map key (seq [total-ms]))))] + (swap! telemetry-buckets create-or-update))) +(defn fetch-delay-log! + [] + (let [res @telemetry-buckets] + (reset! telemetry-buckets {}) + res)) + + ;; Oracle guidance for active threads is ~cores+2 (defonce pool-size (+ 2 (.availableProcessors (Runtime/getRuntime)))) @@ -276,7 +298,9 @@ lobby--create [{{user :user} :ring-req uid :uid - ?data :?data}] + ?data :?data + id :id + timestamp :timestamp}] (lobby-thread (let [lobby (-> (create-new-lobby {:uid uid :user user :options ?data}) (send-message @@ -286,7 +310,8 @@ lobby? (get-in new-app-state [:lobbies (:gameid lobby)])] (when lobby? (send-lobby-state lobby?) - (broadcast-lobby-list))))) + (broadcast-lobby-list)) + (log-delay! timestamp id)))) (defn clear-lobby-state [uid] (when uid @@ -303,8 +328,11 @@ (defmethod ws/-msg-handler :lobby/list lobby--list - [{uid :uid}] - (lobby-thread (send-lobby-list uid))) + [{uid :uid + id :id + timestamp :timestamp}] + (lobby-thread (send-lobby-list uid) + (log-delay! timestamp id))) (defn player? "Returns player if the uid is a player in a given lobby" @@ -388,11 +416,14 @@ [{{db :system/db user :user} :ring-req uid :uid {gameid :gameid} :?data - ?reply-fn :?reply-fn}] + ?reply-fn :?reply-fn + id :id + timestamp :timestamp}] (lobby-thread (let [lobby (app-state/get-lobby gameid)] (when (and lobby (in-lobby? uid lobby)) - (leave-lobby! db user uid ?reply-fn lobby))))) + (leave-lobby! db user uid ?reply-fn lobby)) + (log-delay! timestamp id)))) (defn find-deck [db opts] @@ -440,7 +471,9 @@ [{{db :system/db user :user} :ring-req uid :uid {:keys [gameid deck-id]} :?data - ?reply-fn :?reply-fn}] + ?reply-fn :?reply-fn + id :id + timestamp :timestamp}] (lobby-thread (let [lobby (app-state/get-lobby gameid)] (if (and lobby (in-lobby? uid lobby)) @@ -455,7 +488,8 @@ (send-lobby-state lobby?) ;;(broadcast-lobby-list) (?reply-fn (some #(= processed-deck (:deck %)) (:players lobby?)))) - (?reply-fn false))))) + (?reply-fn false)) + (log-delay! timestamp id)))) (defn handle-send-message [lobbies gameid message] (if-let [lobby (get lobbies gameid)] @@ -468,7 +502,9 @@ lobby--say [{{user :user} :ring-req uid :uid - {:keys [gameid text]} :?data}] + {:keys [gameid text]} :?data + id :id + timestamp :timestamp}] (assert (string? text) "Message must be a string") (lobby-thread (let [lobby (app-state/get-lobby gameid)] @@ -479,7 +515,8 @@ (handle-send-message gameid message) (handle-set-last-update gameid uid))) lobby? (get-in new-app-state [:lobbies gameid])] - (send-lobby-state lobby?)))))) + (send-lobby-state lobby?)))) + (log-delay! timestamp id))) (defn check-password [lobby user password] (or (empty? (:password lobby)) @@ -560,10 +597,13 @@ [{{user :user} :ring-req uid :uid {gameid :gameid :as ?data} :?data - ?reply-fn :?reply-fn}] + ?reply-fn :?reply-fn + id :id + timestamp :timestamp}] (lobby-thread (when-let [lobby (app-state/get-lobby gameid)] - (join-lobby! user uid ?data ?reply-fn lobby)))) + (join-lobby! user uid ?data ?reply-fn lobby)) + (log-delay! timestamp id))) (defn swap-side "Returns a new player map with the player's :side switched" @@ -610,7 +650,9 @@ lobby--swap [{{user :user} :ring-req uid :uid - {:keys [gameid side]} :?data}] + {:keys [gameid side]} :?data + id :id + timestamp :timestamp}] (lobby-thread (let [lobby (app-state/get-lobby gameid)] (when (and lobby (first-player? uid lobby)) @@ -623,12 +665,15 @@ (handle-set-last-update gameid uid))) lobby? (get-in new-app-state [:lobbies gameid])] (send-lobby-state lobby?) - (broadcast-lobby-list)))))) + (broadcast-lobby-list)))) + (log-delay! timestamp id))) (defmethod ws/-msg-handler :lobby/rename-game lobby--rename-game [{{db :system/db user :user} :ring-req - {:keys [gameid]} :?data}] + {:keys [gameid]} :?data + id :id + timestamp :timestamp}] (lobby-thread (when-let [lobby (app-state/get-lobby gameid)] (when (superuser? user) @@ -642,12 +687,15 @@ :action :rename-game :game-name bad-name :first-player player-name - :date (inst/now)})))))) + :date (inst/now)})))) + (log-delay! timestamp id))) (defmethod ws/-msg-handler :lobby/delete-game lobby--delete-game [{{db :system/db user :user} :ring-req - {:keys [gameid]} :?data}] + {:keys [gameid]} :?data + id :id + timestamp :timestamp}] (lobby-thread (let [lobby (app-state/get-lobby gameid) player-name (-> lobby :original-players first :user :username) @@ -660,7 +708,8 @@ :action :delete-game :game-name bad-name :first-player player-name - :date (inst/now)}))))) + :date (inst/now)}))) + (log-delay! timestamp id))) (defn clear-inactive-lobbies "Called by a background thread to close lobbies that are inactive for some number of seconds." @@ -713,7 +762,9 @@ [{{user :user} :ring-req uid :uid {:keys [gameid password request-side]} :?data - ?reply-fn :?reply-fn}] + ?reply-fn :?reply-fn + id :id + timestamp :timestamp}] (lobby-thread (let [lobby (app-state/get-lobby gameid)] (when (and lobby (allowed-in-lobby user lobby)) @@ -733,7 +784,8 @@ (false? correct-password?) (when ?reply-fn (?reply-fn 403)) :else - (when ?reply-fn (?reply-fn 404)))))))) + (when ?reply-fn (?reply-fn 404)))))) + (log-delay! timestamp id))) (defn handle-toggle-spectator-mute [lobbies gameid uid] (let [lobby (get lobbies gameid)] @@ -744,13 +796,19 @@ (defmethod ws/-msg-handler :lobby/pause-updates lobby--pause-updates [{{user :user} :ring-req - uid :uid}] - (lobby-thread (app-state/pause-lobby-updates uid))) + uid :uid + id :id + timestamp :timestamp}] + (lobby-thread (app-state/pause-lobby-updates uid) + (log-delay! timestamp id))) (defmethod ws/-msg-handler :lobby/continue-updates lobby--continue-updates [{{user :user} :ring-req - uid :uid}] + uid :uid + id :id + timestamp :timestamp}] (lobby-thread (app-state/continue-lobby-updates uid) - (send-lobby-list uid))) + (send-lobby-list uid) + (log-delay! timestamp id))) diff --git a/src/clj/web/telemetry.clj b/src/clj/web/telemetry.clj index f63dc5c657..cef9e88454 100644 --- a/src/clj/web/telemetry.clj +++ b/src/clj/web/telemetry.clj @@ -6,13 +6,19 @@ [cljc.java-time.instant :as inst] [game.core.board :refer [all-active]] [web.app-state :refer [app-state]] - [web.lobby :refer [lobby-update-uids pool-occupants-info]] + [web.lobby :refer [lobby-update-uids pool-occupants-info fetch-delay-log!]] [web.ws :refer [connected-sockets connections_]] [taoensso.encore :as enc] [taoensso.timbre :as timbre])) (def log-stat-frequency (enc/ms :mins 5)) +(defn format-delay! [] + (let [delays (fetch-delay-log!) + av #(quot (reduce + 0 %) (count %)) + fmt #(str "Average: " (av %) "ms - Max: " (reduce max 0 %) " ms - Count: " (count %))] + (str (update-vals delays fmt)))) + (defn subscriber-time-metrics "average time | oldest" [subs] @@ -70,6 +76,7 @@ lobby-sub-count (count (filter identity (vals (:lobby-updates @app-state)))) lobby-update-uids (count (lobby-update-uids)) [average-sub-time oldest-sub-time] (subscriber-time-metrics (filter identity (vals (:lobby-updates @app-state)))) + latencies (format-delay!) ajax-uid-count (count (:ajax @connected-sockets)) ajax-conn-counts (seq (map count (:ajax @connections_))) ajax-conn-total (reduce + ajax-conn-counts) @@ -96,6 +103,7 @@ " conn: " ws-conn-total " }")) (timbre/info (str "pool occupants: " (seq (pool-occupants-info)))) + (timbre/info latencies) ;; TODO - once we've got this set up on the server, wrap it in a try/catch - only ever display ;; the warning once! (timbre/info (str "Active Cards (across all lobbies): " card-freqs)) From 916f28eb087beb2d69c6fa356758c7dc1393b34f Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Sat, 28 Sep 2024 16:50:32 +1200 Subject: [PATCH 06/13] include --- src/clj/web/decks.clj | 1 + 1 file changed, 1 insertion(+) diff --git a/src/clj/web/decks.clj b/src/clj/web/decks.clj index f4ddbb6e71..5ce84fbb5d 100644 --- a/src/clj/web/decks.clj +++ b/src/clj/web/decks.clj @@ -8,6 +8,7 @@ [jinteki.validator :refer [calculate-deck-status]] [monger.collection :as mc] [monger.result :refer [acknowledged?]] + [web.lobby :as lobby] [web.mongodb :refer [->object-id ->object-id]] [web.nrdb :as nrdb] [web.utils :refer [response mongo-time-to-utc-string]] From 8e4007cd2a3796e892b72f13511b31f69e92beb5 Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Sun, 29 Sep 2024 10:17:44 +1300 Subject: [PATCH 07/13] add percentiles to log output --- src/clj/web/telemetry.clj | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/clj/web/telemetry.clj b/src/clj/web/telemetry.clj index cef9e88454..e541f345a9 100644 --- a/src/clj/web/telemetry.clj +++ b/src/clj/web/telemetry.clj @@ -13,10 +13,21 @@ (def log-stat-frequency (enc/ms :mins 5)) +(defn percentile [vector percentile] + ;; see: https://scicloj.github.io/stats-with-clojure/stats_with_clojure.basic_statistics.html + (let [sorted-vector (sort vector) + idx (int (* percentile (/ (count sorted-vector) 100)))] + (str (nth sorted-vector idx) "ms"))) + +(defn- format-percentiles [data percentiles] + (apply str (interpose "/" (map #(percentile (vec data) %) percentiles)))) + (defn format-delay! [] (let [delays (fetch-delay-log!) + percentiles [5 25 50 75 95] av #(quot (reduce + 0 %) (count %)) - fmt #(str "Average: " (av %) "ms - Max: " (reduce max 0 %) " ms - Count: " (count %))] + fmt #(str "Average: " (av %) "ms - Count: " (count %) " - Percentiles (5/25/50/75/95): " + (format-percentiles % percentiles))] (str (update-vals delays fmt)))) (defn subscriber-time-metrics From d4bc22a1ace8cbeda01b3874e5f14c250aad2a47 Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Sun, 29 Sep 2024 11:20:43 +1300 Subject: [PATCH 08/13] only sort and strip the lobby list once, instead of once per lobby sub --- src/clj/web/lobby.clj | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/clj/web/lobby.clj b/src/clj/web/lobby.clj index 6c2d06faee..e12b22a3a4 100644 --- a/src/clj/web/lobby.clj +++ b/src/clj/web/lobby.clj @@ -230,13 +230,12 @@ (not (or user-blocked-players? players-blocked-user?)))) lobbies))) -(defn summaries-for-lobbies [filtered-lobbies] - (->> filtered-lobbies - (map lobby-summary) +(defn sorted-lobbies [lobbies] + "ideally we only sort these once" + (->> (map lobby-summary lobbies) (sort-by :date) (reverse) - (sort-by :started) - (into []))) + (sort-by :started))) (comment (->> (for [x (range 5 10)] @@ -247,11 +246,11 @@ (defn prepare-lobby-list [lobbies users] - (for [user users - :let [uid (:uid user)]] - (let [filtered-lobbies (filter-lobby-list lobbies user) - lobby-summaries (summaries-for-lobbies filtered-lobbies)] - [uid [:lobby/list lobby-summaries]]))) + (let [in-order (sorted-lobbies lobbies)] + (for [user users + :let [uid (:uid user)]] + (let [filtered-lobbies (into [] (filter-lobby-list in-order user))] + [uid [:lobby/list filtered-lobbies]])))) (defn lobby-update-uids [] From ab69c2ff82c9f5a5cdf051453c1a9dcce65c12fb Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Sun, 29 Sep 2024 12:56:18 +1300 Subject: [PATCH 09/13] cleanup --- src/clj/web/lobby.clj | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/clj/web/lobby.clj b/src/clj/web/lobby.clj index e12b22a3a4..957e4b8935 100644 --- a/src/clj/web/lobby.clj +++ b/src/clj/web/lobby.clj @@ -245,12 +245,11 @@ (summaries-for-lobbies))) (defn prepare-lobby-list - [lobbies users] - (let [in-order (sorted-lobbies lobbies)] - (for [user users - :let [uid (:uid user)]] - (let [filtered-lobbies (into [] (filter-lobby-list in-order user))] - [uid [:lobby/list filtered-lobbies]])))) + [in-order-lobbies users] + (for [user users + :let [uid (:uid user)]] + (let [filtered-lobbies (into [] (filter-lobby-list in-order-lobbies user))] + [uid [:lobby/list filtered-lobbies]]))) (defn lobby-update-uids [] @@ -266,8 +265,9 @@ (broadcast-lobby-list users))) ([users] (assert (or (sequential? users) (nil? users)) (str "Users must be a sequence: " (pr-str users))) - (let [lobbies (app-state/get-lobbies)] - (doseq [[uid ev] (prepare-lobby-list lobbies users)] + (let [lobbies (app-state/get-lobbies) + in-order (sorted-lobbies lobbies)] + (doseq [[uid ev] (prepare-lobby-list in-order users)] (when uid (ws/chsk-send! uid ev)))))) From c1fb5470c05a98fe5dfc7d47a896fd8c16356839 Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Sun, 29 Sep 2024 12:56:58 +1300 Subject: [PATCH 10/13] set lobby-sub timeout to 1hr, prune them regularly --- src/clj/web/app_state.clj | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/clj/web/app_state.clj b/src/clj/web/app_state.clj index 983fd85c8c..5859b98bf1 100644 --- a/src/clj/web/app_state.clj +++ b/src/clj/web/app_state.clj @@ -1,7 +1,9 @@ (ns web.app-state (:require + [clojure.core.async :refer [ Date: Wed, 2 Oct 2024 15:45:30 +1300 Subject: [PATCH 11/13] simple auto-threading off, save that for later --- src/clj/web/system.clj | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/clj/web/system.clj b/src/clj/web/system.clj index 27b103ae7e..0536b6fced 100644 --- a/src/clj/web/system.clj +++ b/src/clj/web/system.clj @@ -119,8 +119,7 @@ (defmethod ig/init-key :sente/router [_ _opts] (sente/start-server-chsk-router! ch-chsk - event-msg-handler - {:simple-auto-threading? true})) + event-msg-handler)) (defmethod ig/halt-key! :sente/router [_ stop-fn] (when (fn? stop-fn) From 4b6bf3d6725d885a3f8e86ca035e8cd1e164a9ea Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Fri, 4 Oct 2024 15:29:08 +1300 Subject: [PATCH 12/13] threading fns to macros --- src/clj/web/lobby.clj | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/clj/web/lobby.clj b/src/clj/web/lobby.clj index 957e4b8935..28005909ec 100644 --- a/src/clj/web/lobby.clj +++ b/src/clj/web/lobby.clj @@ -41,7 +41,6 @@ (reset! telemetry-buckets {}) res)) - ;; Oracle guidance for active threads is ~cores+2 (defonce pool-size (+ 2 (.availableProcessors (Runtime/getRuntime)))) @@ -66,8 +65,10 @@ (swap! (:occupants pool) dec)) (defonce lobby-pool (cp/threadpool 1 {:name "lobbies-thread"})) -(defn lobby-thread [& body] (cp/future lobby-pool body)) -(defn game-thread [lobby & body] (cp/future (get-in lobby [:pool :pool]) body)) +(defmacro lobby-thread [& expr] `(cp/future lobby-pool ~@expr)) +(defmacro game-thread [lobby & expr] + "Note: if the lobby isn't actually real, or has been nulled somehow, executing on the lobby thread is safe" + `(cp/future (or (get-in ~lobby [:pool :pool]) lobby-pool) ~@expr)) (defn validate-precon [format client-precon client-gateway-type] From b01e061041c343169b5a098ba988b82258e84b30 Mon Sep 17 00:00:00 2001 From: NB Kelly Date: Fri, 4 Oct 2024 16:13:10 +1300 Subject: [PATCH 13/13] destructures properly --- src/clj/web/app_state.clj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/clj/web/app_state.clj b/src/clj/web/app_state.clj index 5859b98bf1..9e0337cc5a 100644 --- a/src/clj/web/app_state.clj +++ b/src/clj/web/app_state.clj @@ -67,6 +67,7 @@ (swap! app-state assoc-in [:lobby-updates uid] (inst/now))) (defn receive-lobby-updates? + "checks if a user receives lobby updates, and updates the state if they've timed out to amortize subsequent checks. Mutates" [uid] (if-let [last-ping (get-in @app-state [:lobby-updates uid])] (if (.isBefore (inst/now) (inst/plus last-ping lobby-subs-timeout-hours chrono/hours)) @@ -84,5 +85,5 @@ (defonce cleanup-lobby-subs (go (while true (