diff --git a/Makefile b/Makefile index e228cbe..dd57a0c 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ version := $(shell git describe --tags) prepare_test: curl https://raw.githubusercontent.com/OdyseeTeam/gody-cdn/master/db-init.sql -o init.sql cp config.example.json config.json - docker-compose up -d mysql lbrynet + docker-compose up -d mysql # rm init.sql .PHONY: test @@ -13,7 +13,6 @@ test: .PHONY: test_ci test_ci: - scripts/wait_for_wallet.sh go install golang.org/x/tools/cmd/cover@latest go install github.com/mattn/goveralls@latest go install github.com/jandelgado/gcov2lcov@latest diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index d86e1c5..e5e15ae 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -23,6 +23,14 @@ const ( StreamOriginal = "original" StreamTranscoded = "transcoded" + + ResolveSource = "source" + ResolveKind = "kind" + + ResolveSourceCache = "cache" + ResolveSourceOApi = "oapi" + ResolveFailureGeneral = "general" + ResolveFailureClaimNotFound = "claim_not_found" ) var ( @@ -85,18 +93,33 @@ var ( Name: "evictions_total", Help: "Total number of items evicted from the cache", }) - ResolveFailures = promauto.NewCounter(prometheus.CounterOpts{ + + ResolveFailures = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: ns, Subsystem: "resolve", Name: "failures", Help: "Total number of failed SDK resolves", - }) - ResolveSuccesses = promauto.NewCounter(prometheus.CounterOpts{ + }, []string{ResolveSource, ResolveKind}) + ResolveFailuresDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: ns, + Subsystem: "resolve", + Name: "failures_duration", + Help: "Failed resolves durations", + }, []string{ResolveSource, ResolveKind}) + + ResolveSuccesses = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: ns, Subsystem: "resolve", Name: "successes", - Help: "Total number of succeeded SDK resolves", - }) + Help: "Total number of succeeded resolves", + }, []string{ResolveSource}) + ResolveSuccessesDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: ns, + Subsystem: "resolve", + Name: "successes_duration", + Help: "Successful resolves durations", + }, []string{ResolveSource}) + ResolveTimeMS = promauto.NewHistogram(prometheus.HistogramOpts{ Namespace: ns, Subsystem: "resolve", diff --git a/player/http_handlers.go b/player/http_handlers.go index af10f65..32b6330 100644 --- a/player/http_handlers.go +++ b/player/http_handlers.go @@ -247,7 +247,6 @@ func (h *RequestHandler) Handle(c *gin.Context) { stream, err := h.player.ResolveStream(uri) addBreadcrumb(c.Request, "sdk", fmt.Sprintf("resolve %v", uri)) if err != nil { - metrics.ResolveFailures.Inc() processStreamError("resolve", uri, c.Writer, c.Request, err) return } @@ -339,7 +338,6 @@ func (h *RequestHandler) HandleTranscodedFragment(c *gin.Context) { stream, err := h.player.ResolveStream(uri) addBreadcrumb(c.Request, "sdk", fmt.Sprintf("resolve %v", uri)) if err != nil { - metrics.ResolveFailures.Inc() processStreamError("resolve", uri, c.Writer, c.Request, err) return } diff --git a/player/player.go b/player/player.go index 5c535a9..789df08 100644 --- a/player/player.go +++ b/player/player.go @@ -3,7 +3,6 @@ package player import ( "encoding/hex" "errors" - "math/rand" "regexp" "strings" "time" @@ -11,6 +10,7 @@ import ( "github.com/OdyseeTeam/player-server/internal/metrics" "github.com/OdyseeTeam/player-server/pkg/logger" "github.com/OdyseeTeam/player-server/pkg/paid" + "github.com/prometheus/client_golang/prometheus" tclient "github.com/OdyseeTeam/transcoder/client" ljsonrpc "github.com/lbryio/lbry.go/v2/extras/jsonrpc" @@ -20,8 +20,10 @@ import ( ) const ( - edgeTokenHeader = "Authorization" - edgeTokenPrefix = "Token " + edgeTokenHeader = "Authorization" + edgeTokenPrefix = "Token " + resolveCacheDuration = 5 * time.Minute + defaultSdkAddress = "https://api.na-backend.odysee.com/api/v1/proxy" ) var ( @@ -74,7 +76,7 @@ func WithPrefetch(enabled bool) func(options *PlayerOptions) { // NewPlayer initializes an instance with optional BlobStore. func NewPlayer(hotCache *HotCache, optionFuncs ...func(*PlayerOptions)) *Player { options := &PlayerOptions{ - lbrynetAddress: "http://localhost:5279", + lbrynetAddress: defaultSdkAddress, downloadsEnabled: true, } @@ -107,16 +109,15 @@ func (p *Player) Play(s *Stream, c *gin.Context) error { // ResolveStream resolves provided URI by calling the SDK. func (p *Player) ResolveStream(claimId string) (*Stream, error) { + start := time.Now() defer func(t time.Time) { metrics.ResolveTimeMS.Observe(float64(time.Since(t).Milliseconds())) - }(time.Now()) + }(start) var claim *ljsonrpc.Claim - cachedClaim, err := p.resolveCache.Get(claimId) - if err == nil { - claim = cachedClaim.(*ljsonrpc.Claim) - } else { + cachedClaim, cErr := p.resolveCache.Get(claimId) + if cErr != nil { var err error claim, err = p.resolve(claimId) if err != nil { @@ -142,8 +143,12 @@ func (p *Player) ResolveStream(claimId string) (*Stream, error) { return nil, err } } - metrics.ResolveSuccesses.Inc() - _ = p.resolveCache.SetWithExpire(claimId, claim, time.Duration(rand.Intn(5)+5)*time.Minute) // random time between 5 and 10 min, to spread load on wallet servers + metrics.ResolveSuccesses.WithLabelValues(metrics.ResolveSourceOApi).Inc() + _ = p.resolveCache.SetWithExpire(claimId, claim, resolveCacheDuration) + } else { + metrics.ResolveSuccessesDuration.WithLabelValues(metrics.ResolveSourceCache).Observe(float64(time.Since(start))) + metrics.ResolveSuccesses.WithLabelValues(metrics.ResolveSourceCache).Inc() + claim = cachedClaim.(*ljsonrpc.Claim) } if claim.Value.GetStream() == nil { @@ -158,24 +163,55 @@ func (p *Player) ResolveStream(claimId string) (*Stream, error) { // resolve the claim func (p *Player) resolve(claimID string) (*ljsonrpc.Claim, error) { + generalFailureLabels := prometheus.Labels{ + metrics.ResolveSource: metrics.ResolveSourceOApi, + metrics.ResolveKind: metrics.ResolveFailureGeneral, + } + notFoundFailureLabels := prometheus.Labels{ + metrics.ResolveSource: metrics.ResolveSourceOApi, + metrics.ResolveKind: metrics.ResolveFailureClaimNotFound, + } + + start := time.Now() + // TODO: Get rid of the resolve call when ClaimSearchArgs acquires URI param if !reClaim.MatchString(claimID) { resolved, err := p.lbrynetClient.Resolve(claimID) if err != nil { + metrics.ResolveFailuresDuration.With(generalFailureLabels).Observe(float64(time.Since(start))) + metrics.ResolveFailures.With(generalFailureLabels).Inc() return nil, err } claim := (*resolved)[claimID] if claim.CanonicalURL == "" { + metrics.ResolveFailuresDuration.With(notFoundFailureLabels).Observe(float64(time.Since(start))) + metrics.ResolveFailures.With(notFoundFailureLabels).Inc() return nil, ErrClaimNotFound } return &claim, nil } resp, err := p.lbrynetClient.ClaimSearch(ljsonrpc.ClaimSearchArgs{ClaimID: &claimID, PageSize: 1, Page: 1}) if err != nil { + metrics.ResolveFailuresDuration.With(prometheus.Labels{ + metrics.ResolveSource: metrics.ResolveSourceOApi, + metrics.ResolveKind: metrics.ResolveFailureGeneral, + }).Observe(float64(time.Since(start))) + metrics.ResolveFailures.With(prometheus.Labels{ + metrics.ResolveSource: metrics.ResolveSourceOApi, + metrics.ResolveKind: metrics.ResolveFailureGeneral, + }).Inc() return nil, err } if len(resp.Claims) == 0 { + metrics.ResolveFailuresDuration.With(prometheus.Labels{ + metrics.ResolveSource: metrics.ResolveSourceOApi, + metrics.ResolveKind: metrics.ResolveFailureClaimNotFound, + }).Observe(float64(time.Since(start))) + metrics.ResolveFailures.With(prometheus.Labels{ + metrics.ResolveSource: metrics.ResolveSourceOApi, + metrics.ResolveKind: metrics.ResolveFailureClaimNotFound, + }).Inc() return nil, ErrClaimNotFound } return &resp.Claims[0], nil